X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;ds=sidebyside;f=gcc%2Fexpmed.c;h=ae76695a133b7dc15e60bfccd5deb7bf014ae397;hb=a79b863a00888e242f9ceda4f1f70a8772ecf727;hp=d2edd813c964ea35f15119b4fd593ddcbc873185;hpb=2d232d05278c1a5e45612ed694993cf7e9e5f963;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/expmed.c b/gcc/expmed.c index d2edd813c96..ae76695a133 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -1,7 +1,7 @@ /* Medium-level subroutines: convert bit-field store and extract and shifts, multiplies and divides to rtl instructions. Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of GCC. @@ -23,6 +23,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "config.h" #include "system.h" +#include "coretypes.h" +#include "tm.h" #include "toplev.h" #include "rtl.h" #include "tree.h" @@ -33,32 +35,30 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "optabs.h" #include "real.h" #include "recog.h" - -static void store_fixed_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, rtx)); -static void store_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, rtx)); -static rtx extract_fixed_bit_field PARAMS ((enum machine_mode, rtx, - unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, - rtx, int)); -static rtx mask_rtx PARAMS ((enum machine_mode, int, - int, int)); -static rtx lshift_value PARAMS ((enum machine_mode, rtx, - int, int)); -static rtx extract_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, int)); -static void do_cmp_and_jump PARAMS ((rtx, rtx, enum rtx_code, - enum machine_mode, rtx)); - -/* Non-zero means divides or modulus operations are relatively cheap for +#include "langhooks.h" + +static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, rtx); +static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, rtx); +static rtx extract_fixed_bit_field (enum machine_mode, rtx, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, rtx, int); +static rtx mask_rtx (enum machine_mode, int, int, int); +static rtx lshift_value (enum machine_mode, rtx, int, int); +static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, int); +static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); + +/* Nonzero means divides or modulus operations are relatively cheap for powers of two, so don't use branches; emit the operation instead. Usually, this will mean that the MD file will emit non-branch sequences. */ -static int sdiv_pow2_cheap, smod_pow2_cheap; +static int sdiv_pow2_cheap[NUM_MACHINE_MODES]; +static int smod_pow2_cheap[NUM_MACHINE_MODES]; #ifndef SLOW_UNALIGNED_ACCESS #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT @@ -91,100 +91,66 @@ static int sdiv_pow2_cheap, smod_pow2_cheap; /* Cost of various pieces of RTL. Note that some of these are indexed by shift count and some by mode. */ -static int add_cost, negate_cost, zero_cost; -static int shift_cost[MAX_BITS_PER_WORD]; -static int shiftadd_cost[MAX_BITS_PER_WORD]; -static int shiftsub_cost[MAX_BITS_PER_WORD]; +static int zero_cost; +static int add_cost[NUM_MACHINE_MODES]; +static int neg_cost[NUM_MACHINE_MODES]; +static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; +static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; static int mul_cost[NUM_MACHINE_MODES]; static int div_cost[NUM_MACHINE_MODES]; static int mul_widen_cost[NUM_MACHINE_MODES]; static int mul_highpart_cost[NUM_MACHINE_MODES]; void -init_expmed () +init_expmed (void) { - /* This is "some random pseudo register" for purposes of calling recog - to see what insns exist. */ - rtx reg = gen_rtx_REG (word_mode, 10000); - rtx shift_insn, shiftadd_insn, shiftsub_insn; + rtx reg, shift_insn, shiftadd_insn, shiftsub_insn; + rtx shift_pat, shiftadd_pat, shiftsub_pat; + rtx pow2[MAX_BITS_PER_WORD]; + rtx cint[MAX_BITS_PER_WORD]; int dummy; - int m; + int m, n; enum machine_mode mode, wider_mode; start_sequence (); - reg = gen_rtx_REG (word_mode, 10000); - zero_cost = rtx_cost (const0_rtx, 0); - add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET); - - shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg, - gen_rtx_ASHIFT (word_mode, reg, - const0_rtx))); - - shiftadd_insn - = emit_insn (gen_rtx_SET (VOIDmode, reg, - gen_rtx_PLUS (word_mode, - gen_rtx_MULT (word_mode, - reg, const0_rtx), - reg))); - - shiftsub_insn - = emit_insn (gen_rtx_SET (VOIDmode, reg, - gen_rtx_MINUS (word_mode, - gen_rtx_MULT (word_mode, - reg, const0_rtx), - reg))); init_recog (); - shift_cost[0] = 0; - shiftadd_cost[0] = shiftsub_cost[0] = add_cost; - for (m = 1; m < MAX_BITS_PER_WORD; m++) { - shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000; - - XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m); - if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0) - shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET); - - XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) - = GEN_INT ((HOST_WIDE_INT) 1 << m); - if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0) - shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET); - - XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) - = GEN_INT ((HOST_WIDE_INT) 1 << m); - if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0) - shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET); + pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); + cint[m] = GEN_INT (m); } - negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET); - - sdiv_pow2_cheap - = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET) - <= 2 * add_cost); - smod_pow2_cheap - = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET) - <= 2 * add_cost); - for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) { reg = gen_rtx_REG (mode, 10000); - div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET); - mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET); + add_cost[mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET); + neg_cost[mode] = rtx_cost (gen_rtx_NEG (mode, reg), SET); + div_cost[mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET); + mul_cost[mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET); + + sdiv_pow2_cheap[mode] + = (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET) + <= 2 * add_cost[mode]); + smod_pow2_cheap[mode] + = (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET) + <= 2 * add_cost[mode]); + wider_mode = GET_MODE_WIDER_MODE (mode); if (wider_mode != VOIDmode) { - mul_widen_cost[(int) wider_mode] + mul_widen_cost[wider_mode] = rtx_cost (gen_rtx_MULT (wider_mode, gen_rtx_ZERO_EXTEND (wider_mode, reg), gen_rtx_ZERO_EXTEND (wider_mode, reg)), SET); - mul_highpart_cost[(int) mode] + mul_highpart_cost[mode] = rtx_cost (gen_rtx_TRUNCATE (mode, gen_rtx_LSHIFTRT (wider_mode, @@ -196,6 +162,52 @@ init_expmed () GEN_INT (GET_MODE_BITSIZE (mode)))), SET); } + + shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_ASHIFT (mode, reg, + const0_rtx))); + + shiftadd_insn + = emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_PLUS (mode, + gen_rtx_MULT (mode, + reg, + const0_rtx), + reg))); + + shiftsub_insn + = emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_MINUS (mode, + gen_rtx_MULT (mode, + reg, + const0_rtx), + reg))); + + shift_pat = PATTERN (shift_insn); + shiftadd_pat = PATTERN (shiftadd_insn); + shiftsub_pat = PATTERN (shiftsub_insn); + + shift_cost[mode][0] = 0; + shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode]; + + n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); + for (m = 1; m < n; m++) + { + shift_cost[mode][m] = 32000; + XEXP (SET_SRC (shift_pat), 1) = cint[m]; + if (recog (shift_pat, shift_insn, &dummy) >= 0) + shift_cost[mode][m] = rtx_cost (SET_SRC (shift_pat), SET); + + shiftadd_cost[mode][m] = 32000; + XEXP (XEXP (SET_SRC (shiftadd_pat), 0), 1) = pow2[m]; + if (recog (shiftadd_pat, shiftadd_insn, &dummy) >= 0) + shiftadd_cost[mode][m] = rtx_cost (SET_SRC (shiftadd_pat), SET); + + shiftsub_cost[mode][m] = 32000; + XEXP (XEXP (SET_SRC (shiftsub_pat), 0), 1) = pow2[m]; + if (recog (shiftsub_pat, shiftsub_insn, &dummy) >= 0) + shiftsub_cost[mode][m] = rtx_cost (SET_SRC (shiftsub_pat), SET); + } } end_sequence (); @@ -206,9 +218,7 @@ init_expmed () useful if X is a CONST_INT. */ rtx -negate_rtx (mode, x) - enum machine_mode mode; - rtx x; +negate_rtx (enum machine_mode mode, rtx x) { rtx result = simplify_unary_operation (NEG, mode, x, mode); @@ -223,9 +233,7 @@ negate_rtx (mode, x) is false; else the mode of the specified operand. If OPNO is -1, all the caller cares about is whether the insn is available. */ enum machine_mode -mode_for_extraction (pattern, opno) - enum extraction_pattern pattern; - int opno; +mode_for_extraction (enum extraction_pattern pattern, int opno) { const struct insn_data *data; @@ -286,13 +294,9 @@ mode_for_extraction (pattern, opno) else, we use the mode of operand 3. */ rtx -store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) - rtx str_rtx; - unsigned HOST_WIDE_INT bitsize; - unsigned HOST_WIDE_INT bitnum; - enum machine_mode fieldmode; - rtx value; - HOST_WIDE_INT total_size; +store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, + rtx value, HOST_WIDE_INT total_size) { unsigned int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD; @@ -324,6 +328,53 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) value = protect_from_queue (value, 0); + /* Use vec_extract patterns for extracting parts of vectors whenever + available. */ + if (VECTOR_MODE_P (GET_MODE (op0)) + && GET_CODE (op0) != MEM + && (vec_set_optab->handlers[GET_MODE (op0)].insn_code + != CODE_FOR_nothing) + && fieldmode == GET_MODE_INNER (GET_MODE (op0)) + && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) + && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) + { + enum machine_mode outermode = GET_MODE (op0); + enum machine_mode innermode = GET_MODE_INNER (outermode); + int icode = (int) vec_set_optab->handlers[outermode].insn_code; + int pos = bitnum / GET_MODE_BITSIZE (innermode); + rtx rtxpos = GEN_INT (pos); + rtx src = value; + rtx dest = op0; + rtx pat, seq; + enum machine_mode mode0 = insn_data[icode].operand[0].mode; + enum machine_mode mode1 = insn_data[icode].operand[1].mode; + enum machine_mode mode2 = insn_data[icode].operand[2].mode; + + start_sequence (); + + if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) + src = copy_to_mode_reg (mode1, src); + + if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) + rtxpos = copy_to_mode_reg (mode1, rtxpos); + + /* We could handle this, but we should always be called with a pseudo + for our targets and all insns should take them as outputs. */ + if (! (*insn_data[icode].operand[0].predicate) (dest, mode0) + || ! (*insn_data[icode].operand[1].predicate) (src, mode1) + || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) + abort (); + pat = GEN_FCN (icode) (dest, src, rtxpos); + seq = get_insns (); + end_sequence (); + if (pat) + { + emit_insn (seq); + emit_insn (pat); + return dest; + } + } + if (flag_force_mem) { int old_generating_concat_p = generating_concat_p; @@ -347,7 +398,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) && (GET_CODE (op0) != MEM ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) - && byte_offset % GET_MODE_SIZE (fieldmode) == 0) + && byte_offset % GET_MODE_SIZE (fieldmode) == 0) : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) || (offset * BITS_PER_UNIT % bitsize == 0 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) @@ -366,7 +417,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) subregs results in Severe Tire Damage. */ abort (); } - if (GET_CODE (op0) == REG) + if (REG_P (op0)) op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset); else op0 = adjust_address (op0, fieldmode, offset); @@ -415,13 +466,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) if (GET_CODE (op0) != MEM && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) && bitsize == GET_MODE_BITSIZE (fieldmode) - && (movstrict_optab->handlers[(int) fieldmode].insn_code + && (movstrict_optab->handlers[fieldmode].insn_code != CODE_FOR_nothing)) { - int icode = movstrict_optab->handlers[(int) fieldmode].insn_code; + int icode = movstrict_optab->handlers[fieldmode].insn_code; /* Get appropriate low part of the value being stored. */ - if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG) + if (GET_CODE (value) == CONST_INT || REG_P (value)) value = gen_lowpart (fieldmode, value); else if (!(GET_CODE (value) == SYMBOL_REF || GET_CODE (value) == LABEL_REF @@ -472,7 +523,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) VOIDmode, because that is what store_field uses to indicate that this is a bit field, but passing VOIDmode to operand_subword_force will result in an abort. */ - fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); + fieldmode = GET_MODE (value); + if (fieldmode == VOIDmode) + fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); for (i = 0; i < nwords; i++) { @@ -488,10 +541,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) store_bit_field (op0, MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD), bitnum + bit_offset, word_mode, - operand_subword_force (value, wordnum, - (GET_MODE (value) == VOIDmode - ? fieldmode - : GET_MODE (value))), + operand_subword_force (value, wordnum, fieldmode), total_size); } return value; @@ -508,7 +558,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) if (offset != 0 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) { - if (GET_CODE (op0) != REG) + if (!REG_P (op0)) { /* Since this is a destination (lvalue), we can't copy it to a pseudo. We can trivially remove a SUBREG that does not @@ -535,7 +585,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) structure fields. */ if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) - value = gen_lowpart (word_mode, value); + value = gen_lowpart ((GET_MODE (value) == VOIDmode + ? word_mode : int_mode_for_mode (GET_MODE (value))), + value); /* Now OFFSET is nonzero only if OP0 is memory and is therefore always measured in bytes. */ @@ -545,7 +597,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) && !(bitsize == 1 && GET_CODE (value) == CONST_INT) /* Ensure insv's size is wide enough for this field. */ && (GET_MODE_BITSIZE (op_mode) >= bitsize) - && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))) { int xbitpos = bitpos; @@ -614,7 +666,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) /* We can't just change the mode, because this might clobber op0, and we will need the original value of op0 if insv fails. */ xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); - if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode) + if (REG_P (xop0) && GET_MODE (xop0) != maxmode) xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); /* On big-endian machines, we count bits from the most significant. @@ -675,7 +727,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) if (pat) emit_insn (pat); else - { + { delete_insns_since (last); store_fixed_bit_field (op0, offset, bitsize, bitpos, value); } @@ -699,10 +751,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size) Note that protect_from_queue has already been done on OP0 and VALUE. */ static void -store_fixed_bit_field (op0, offset, bitsize, bitpos, value) - rtx op0; - unsigned HOST_WIDE_INT offset, bitsize, bitpos; - rtx value; +store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, + unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitpos, rtx value) { enum machine_mode mode; unsigned int total_bits = BITS_PER_WORD; @@ -717,7 +768,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value) and a field split across two bytes. Such cases are not supposed to be able to occur. */ - if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + if (REG_P (op0) || GET_CODE (op0) == SUBREG) { if (offset != 0) abort (); @@ -737,8 +788,8 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value) mode = GET_MODE (op0); if (GET_MODE_BITSIZE (mode) == 0 - || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) - mode = word_mode; + || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) + mode = word_mode; mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); @@ -815,7 +866,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value) if (GET_MODE (value) != mode) { - if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG) + if ((REG_P (value) || GET_CODE (value) == SUBREG) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value))) value = gen_lowpart (mode, value); else @@ -834,7 +885,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value) /* Now clear the chosen bits in OP0, except that if VALUE is -1 we need not bother. */ - subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0; + subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0; if (! all_one) { @@ -865,17 +916,15 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value) This does not yet handle fields wider than BITS_PER_WORD. */ static void -store_split_bit_field (op0, bitsize, bitpos, value) - rtx op0; - unsigned HOST_WIDE_INT bitsize, bitpos; - rtx value; +store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitpos, rtx value) { unsigned int unit; unsigned int bitsdone = 0; /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that much at a time. */ - if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + if (REG_P (op0) || GET_CODE (op0) == SUBREG) unit = BITS_PER_WORD; else unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); @@ -964,7 +1013,7 @@ store_split_bit_field (op0, bitsize, bitpos, value) GET_MODE (SUBREG_REG (op0))); offset = 0; } - else if (GET_CODE (op0) == REG) + else if (REG_P (op0)) { word = operand_subword_force (op0, offset, GET_MODE (op0)); offset = 0; @@ -1001,15 +1050,10 @@ store_split_bit_field (op0, bitsize, bitpos, value) if they are equally easy. */ rtx -extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, - target, mode, tmode, total_size) - rtx str_rtx; - unsigned HOST_WIDE_INT bitsize; - unsigned HOST_WIDE_INT bitnum; - int unsignedp; - rtx target; - enum machine_mode mode, tmode; - HOST_WIDE_INT total_size; +extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, + enum machine_mode mode, enum machine_mode tmode, + HOST_WIDE_INT total_size) { unsigned int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD; @@ -1032,29 +1076,19 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, if (tmode == VOIDmode) tmode = mode; + while (GET_CODE (op0) == SUBREG) { - int outer_size = GET_MODE_BITSIZE (GET_MODE (op0)); - int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0))); - - offset += SUBREG_BYTE (op0) / UNITS_PER_WORD; - - inner_size = MIN (inner_size, BITS_PER_WORD); - - if (BYTES_BIG_ENDIAN && (outer_size < inner_size)) + bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT; + if (bitpos > unit) { - bitpos += inner_size - outer_size; - if (bitpos > unit) - { - offset += (bitpos / unit); - bitpos %= unit; - } + offset += (bitpos / unit); + bitpos %= unit; } - op0 = SUBREG_REG (op0); } - if (GET_CODE (op0) == REG + if (REG_P (op0) && mode == GET_MODE (op0) && bitnum == 0 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) @@ -1063,6 +1097,61 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, return op0; } + /* Use vec_extract patterns for extracting parts of vectors whenever + available. */ + if (VECTOR_MODE_P (GET_MODE (op0)) + && GET_CODE (op0) != MEM + && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code + != CODE_FOR_nothing) + && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) + == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) + { + enum machine_mode outermode = GET_MODE (op0); + enum machine_mode innermode = GET_MODE_INNER (outermode); + int icode = (int) vec_extract_optab->handlers[outermode].insn_code; + int pos = bitnum / GET_MODE_BITSIZE (innermode); + rtx rtxpos = GEN_INT (pos); + rtx src = op0; + rtx dest = NULL, pat, seq; + enum machine_mode mode0 = insn_data[icode].operand[0].mode; + enum machine_mode mode1 = insn_data[icode].operand[1].mode; + enum machine_mode mode2 = insn_data[icode].operand[2].mode; + + if (innermode == tmode || innermode == mode) + dest = target; + + if (!dest) + dest = gen_reg_rtx (innermode); + + start_sequence (); + + if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)) + dest = copy_to_mode_reg (mode0, dest); + + if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) + src = copy_to_mode_reg (mode1, src); + + if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) + rtxpos = copy_to_mode_reg (mode1, rtxpos); + + /* We could handle this, but we should always be called with a pseudo + for our targets and all insns should take them as outputs. */ + if (! (*insn_data[icode].operand[0].predicate) (dest, mode0) + || ! (*insn_data[icode].operand[1].predicate) (src, mode1) + || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) + abort (); + + pat = GEN_FCN (icode) (dest, src, rtxpos); + seq = get_insns (); + end_sequence (); + if (pat) + { + emit_insn (seq); + emit_insn (pat); + return dest; + } + } + /* Make sure we are playing with integral modes. Pun with subregs if we aren't. */ { @@ -1087,9 +1176,13 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, set_mem_expr (op0, 0); } - /* ??? We currently assume TARGET is at least as big as BITSIZE. - If that's wrong, the solution is to test for it and set TARGET to 0 - if needed. */ + /* Extraction of a full-word or multi-word value from a structure + in a register or aligned memory can be done with just a SUBREG. + A subword value in the least significant part of a register + can also be extracted with a SUBREG. For this, we need the + byte offset of the value in op0. */ + + byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; /* If OP0 is a register, BITPOS must count within a word. But as we have it, it counts within whatever size OP0 now has. @@ -1099,38 +1192,38 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, && unit > GET_MODE_BITSIZE (GET_MODE (op0))) bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); - /* Extracting a full-word or multi-word value - from a structure in a register or aligned memory. - This can be done with just SUBREG. - So too extracting a subword value in - the least significant part of the register. */ - - byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT - + (offset * UNITS_PER_WORD); + /* ??? We currently assume TARGET is at least as big as BITSIZE. + If that's wrong, the solution is to test for it and set TARGET to 0 + if needed. */ - mode1 = (VECTOR_MODE_P (tmode) - ? mode - : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)); - - if (((GET_CODE (op0) != MEM - && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode), - GET_MODE_BITSIZE (GET_MODE (op0))) - && GET_MODE_SIZE (mode1) != 0 - && byte_offset % GET_MODE_SIZE (mode1) == 0) - || (GET_CODE (op0) == MEM - && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) - || (offset * BITS_PER_UNIT % bitsize == 0 - && MEM_ALIGN (op0) % bitsize == 0)))) - && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) - && bitpos % BITS_PER_WORD == 0) - || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode - /* ??? The big endian test here is wrong. This is correct - if the value is in a register, and if mode_for_size is not - the same mode as op0. This causes us to get unnecessarily - inefficient code from the Thumb port when -mbig-endian. */ - && (BYTES_BIG_ENDIAN - ? bitpos + bitsize == BITS_PER_WORD - : bitpos == 0)))) + /* Only scalar integer modes can be converted via subregs. There is an + additional problem for FP modes here in that they can have a precision + which is different from the size. mode_for_size uses precision, but + we want a mode based on the size, so we must avoid calling it for FP + modes. */ + mode1 = (SCALAR_INT_MODE_P (tmode) + ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) + : mode); + + if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) + && bitpos % BITS_PER_WORD == 0) + || (mode1 != BLKmode + /* ??? The big endian test here is wrong. This is correct + if the value is in a register, and if mode_for_size is not + the same mode as op0. This causes us to get unnecessarily + inefficient code from the Thumb port when -mbig-endian. */ + && (BYTES_BIG_ENDIAN + ? bitpos + bitsize == BITS_PER_WORD + : bitpos == 0))) + && ((GET_CODE (op0) != MEM + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode), + GET_MODE_BITSIZE (GET_MODE (op0))) + && GET_MODE_SIZE (mode1) != 0 + && byte_offset % GET_MODE_SIZE (mode1) == 0) + || (GET_CODE (op0) == MEM + && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) + || (offset * BITS_PER_UNIT % bitsize == 0 + && MEM_ALIGN (op0) % bitsize == 0))))) { if (mode1 != GET_MODE (op0)) { @@ -1144,9 +1237,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, /* Else we've got some float mode source being extracted into a different float mode destination -- this combination of subregs results in Severe Tire Damage. */ - abort (); + goto no_subreg_mode_swap; } - if (GET_CODE (op0) == REG) + if (REG_P (op0)) op0 = gen_rtx_SUBREG (mode1, op0, byte_offset); else op0 = adjust_address (op0, mode1, offset); @@ -1155,6 +1248,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, return convert_to_mode (tmode, op0, unsignedp); return op0; } + no_subreg_mode_swap: /* Handle fields bigger than a word. */ @@ -1168,7 +1262,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; unsigned int i; - if (target == 0 || GET_CODE (target) != REG) + if (target == 0 || !REG_P (target)) target = gen_reg_rtx (mode); /* Indicate for flow that the entire target reg is being set. */ @@ -1252,7 +1346,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, if (offset != 0 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) { - if (GET_CODE (op0) != REG) + if (!REG_P (op0)) op0 = copy_to_reg (op0); op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), op0, (offset * UNITS_PER_WORD)); @@ -1268,7 +1362,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, { if (HAVE_extzv && (GET_MODE_BITSIZE (extzv_mode) >= bitsize) - && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode)))) { unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; @@ -1336,7 +1430,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, SImode). to make it acceptable to the format of extzv. */ if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode) goto extzv_loses; - if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode) + if (REG_P (xop0) && GET_MODE (xop0) != maxmode) xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); /* On big-endian machines, we count bits from the most significant. @@ -1356,7 +1450,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, if (GET_MODE (xtarget) != maxmode) { - if (GET_CODE (xtarget) == REG) + if (REG_P (xtarget)) { int wider = (GET_MODE_SIZE (maxmode) > GET_MODE_SIZE (GET_MODE (xtarget))); @@ -1402,7 +1496,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, { if (HAVE_extv && (GET_MODE_BITSIZE (extv_mode) >= bitsize) - && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode)))) { int xbitpos = bitpos, xoffset = offset; @@ -1464,7 +1558,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, SImode) to make it acceptable to the format of extv. */ if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode) goto extv_loses; - if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode) + if (REG_P (xop0) && GET_MODE (xop0) != maxmode) xop0 = gen_rtx_SUBREG (maxmode, xop0, 0); /* On big-endian machines, we count bits from the most significant. @@ -1485,7 +1579,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, if (GET_MODE (xtarget) != maxmode) { - if (GET_CODE (xtarget) == REG) + if (REG_P (xtarget)) { int wider = (GET_MODE_SIZE (maxmode) > GET_MODE_SIZE (GET_MODE (xtarget))); @@ -1567,17 +1661,16 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ static rtx -extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, - target, unsignedp) - enum machine_mode tmode; - rtx op0, target; - unsigned HOST_WIDE_INT offset, bitsize, bitpos; - int unsignedp; +extract_fixed_bit_field (enum machine_mode tmode, rtx op0, + unsigned HOST_WIDE_INT offset, + unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitpos, rtx target, + int unsignedp) { unsigned int total_bits = BITS_PER_WORD; enum machine_mode mode; - if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG) + if (GET_CODE (op0) == SUBREG || REG_P (op0)) { /* Special treatment for a bit field split across two registers. */ if (bitsize + bitpos > BITS_PER_WORD) @@ -1639,9 +1732,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, tree amount = build_int_2 (bitpos, 0); /* Maybe propagate the target for the shift. */ /* But not if we will return it--could confuse integrate.c. */ - rtx subtarget = (target != 0 && GET_CODE (target) == REG - && !REG_FUNCTION_VALUE_P (target) - ? target : 0); + rtx subtarget = (target != 0 && REG_P (target) ? target : 0); if (tmode != mode) subtarget = 0; op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1); } @@ -1680,10 +1771,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0); /* Maybe propagate the target for the shift. */ - /* But not if we will return the result--could confuse integrate.c. */ - rtx subtarget = (target != 0 && GET_CODE (target) == REG - && ! REG_FUNCTION_VALUE_P (target) - ? target : 0); + rtx subtarget = (target != 0 && REG_P (target) ? target : 0); op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); } @@ -1699,13 +1787,13 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, BITSIZE+BITPOS is too small for MODE. */ static rtx -mask_rtx (mode, bitpos, bitsize, complement) - enum machine_mode mode; - int bitpos, bitsize, complement; +mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) { HOST_WIDE_INT masklow, maskhigh; - if (bitpos < HOST_BITS_PER_WIDE_INT) + if (bitsize == 0) + masklow = 0; + else if (bitpos < HOST_BITS_PER_WIDE_INT) masklow = (HOST_WIDE_INT) -1 << bitpos; else masklow = 0; @@ -1719,7 +1807,9 @@ mask_rtx (mode, bitpos, bitsize, complement) else maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT); - if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT) + if (bitsize == 0) + maskhigh = 0; + else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT) maskhigh &= ((unsigned HOST_WIDE_INT) -1 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); else @@ -1738,10 +1828,7 @@ mask_rtx (mode, bitpos, bitsize, complement) VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ static rtx -lshift_value (mode, value, bitpos, bitsize) - enum machine_mode mode; - rtx value; - int bitpos, bitsize; +lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) { unsigned HOST_WIDE_INT v = INTVAL (value); HOST_WIDE_INT low, high; @@ -1771,10 +1858,8 @@ lshift_value (mode, value, bitpos, bitsize) UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ static rtx -extract_split_bit_field (op0, bitsize, bitpos, unsignedp) - rtx op0; - unsigned HOST_WIDE_INT bitsize, bitpos; - int unsignedp; +extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, + unsigned HOST_WIDE_INT bitpos, int unsignedp) { unsigned int unit; unsigned int bitsdone = 0; @@ -1783,7 +1868,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp) /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that much at a time. */ - if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG) + if (REG_P (op0) || GET_CODE (op0) == SUBREG) unit = BITS_PER_WORD; else unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); @@ -1817,7 +1902,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp) GET_MODE (SUBREG_REG (op0))); offset = 0; } - else if (GET_CODE (op0) == REG) + else if (REG_P (op0)) { word = operand_subword_force (op0, offset, GET_MODE (op0)); offset = 0; @@ -1873,8 +1958,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp) /* Add INC into TARGET. */ void -expand_inc (target, inc) - rtx target, inc; +expand_inc (rtx target, rtx inc) { rtx value = expand_binop (GET_MODE (target), add_optab, target, inc, @@ -1886,8 +1970,7 @@ expand_inc (target, inc) /* Subtract DEC from TARGET. */ void -expand_dec (target, dec) - rtx target, dec; +expand_dec (rtx target, rtx dec) { rtx value = expand_binop (GET_MODE (target), sub_optab, target, dec, @@ -1904,13 +1987,8 @@ expand_dec (target, dec) Return the rtx for where the value is. */ rtx -expand_shift (code, mode, shifted, amount, target, unsignedp) - enum tree_code code; - enum machine_mode mode; - rtx shifted; - tree amount; - rtx target; - int unsignedp; +expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, + tree amount, rtx target, int unsignedp) { rtx op1, temp = 0; int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); @@ -1923,19 +2001,17 @@ expand_shift (code, mode, shifted, amount, target, unsignedp) op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0); -#ifdef SHIFT_COUNT_TRUNCATED if (SHIFT_COUNT_TRUNCATED) { if (GET_CODE (op1) == CONST_INT - && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= + && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) - op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) + op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) % GET_MODE_BITSIZE (mode)); else if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1)) op1 = SUBREG_REG (op1); } -#endif if (op1 == const0_rtx) return shifted; @@ -2082,30 +2158,40 @@ struct algorithm char log[MAX_BITS_PER_WORD]; }; -static void synth_mult PARAMS ((struct algorithm *, - unsigned HOST_WIDE_INT, - int)); -static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT, - int, int, - unsigned HOST_WIDE_INT *, - int *, int *)); -static unsigned HOST_WIDE_INT invert_mod2n PARAMS ((unsigned HOST_WIDE_INT, - int)); +/* Indicates the type of fixup needed after a constant multiplication. + BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that + the result should be negated, and ADD_VARIANT means that the + multiplicand should be added to the result. */ +enum mult_variant {basic_variant, negate_variant, add_variant}; + +static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, + int, enum machine_mode mode); +static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, + struct algorithm *, enum mult_variant *, int); +static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, + const struct algorithm *, enum mult_variant); +static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, + int, unsigned HOST_WIDE_INT *, + int *, int *); +static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); +static rtx extract_high_half (enum machine_mode, rtx); +static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, + int, int); /* Compute and return the best algorithm for multiplying by T. The algorithm must cost less than cost_limit If retval.cost >= COST_LIMIT, no algorithm was found and all - other field of the returned struct are undefined. */ + other field of the returned struct are undefined. + MODE is the machine mode of the multiplication. */ static void -synth_mult (alg_out, t, cost_limit) - struct algorithm *alg_out; - unsigned HOST_WIDE_INT t; - int cost_limit; +synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, + int cost_limit, enum machine_mode mode) { int m; struct algorithm *alg_in, *best_alg; int cost; unsigned HOST_WIDE_INT q; + int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); /* Indicate that no algorithm is yet found. If no algorithm is found, this value will be returned and indicate failure. */ @@ -2114,6 +2200,9 @@ synth_mult (alg_out, t, cost_limit) if (cost_limit <= 0) return; + /* Restrict the bits of "t" to the multiplication's mode. */ + t &= GET_MODE_MASK (mode); + /* t == 1 can be done in zero cost. */ if (t == 1) { @@ -2140,8 +2229,8 @@ synth_mult (alg_out, t, cost_limit) /* We'll be needing a couple extra algorithm structures now. */ - alg_in = (struct algorithm *)alloca (sizeof (struct algorithm)); - best_alg = (struct algorithm *)alloca (sizeof (struct algorithm)); + alg_in = alloca (sizeof (struct algorithm)); + best_alg = alloca (sizeof (struct algorithm)); /* If we have a group of zero bits at the low-order part of T, try multiplying by the remaining bits and then doing a shift. */ @@ -2149,11 +2238,11 @@ synth_mult (alg_out, t, cost_limit) if ((t & 1) == 0) { m = floor_log2 (t & -t); /* m = number of low zero bits */ - if (m < BITS_PER_WORD) + if (m < maxm) { q = t >> m; - cost = shift_cost[m]; - synth_mult (alg_in, q, cost_limit - cost); + cost = shift_cost[mode][m]; + synth_mult (alg_in, q, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2187,8 +2276,8 @@ synth_mult (alg_out, t, cost_limit) { /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ - cost = add_cost; - synth_mult (alg_in, t + 1, cost_limit - cost); + cost = add_cost[mode]; + synth_mult (alg_in, t + 1, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2204,8 +2293,8 @@ synth_mult (alg_out, t, cost_limit) { /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ - cost = add_cost; - synth_mult (alg_in, t - 1, cost_limit - cost); + cost = add_cost[mode]; + synth_mult (alg_in, t - 1, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2234,10 +2323,12 @@ synth_mult (alg_out, t, cost_limit) unsigned HOST_WIDE_INT d; d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; - if (t % d == 0 && t > d && m < BITS_PER_WORD) + if (t % d == 0 && t > d && m < maxm) { - cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]); - synth_mult (alg_in, t / d, cost_limit - cost); + cost = add_cost[mode] + shift_cost[mode][m]; + if (shiftadd_cost[mode][m] < cost) + cost = shiftadd_cost[mode][m]; + synth_mult (alg_in, t / d, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2253,10 +2344,12 @@ synth_mult (alg_out, t, cost_limit) } d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; - if (t % d == 0 && t > d && m < BITS_PER_WORD) + if (t % d == 0 && t > d && m < maxm) { - cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]); - synth_mult (alg_in, t / d, cost_limit - cost); + cost = add_cost[mode] + shift_cost[mode][m]; + if (shiftsub_cost[mode][m] < cost) + cost = shiftsub_cost[mode][m]; + synth_mult (alg_in, t / d, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2278,10 +2371,10 @@ synth_mult (alg_out, t, cost_limit) q = t - 1; q = q & -q; m = exact_log2 (q); - if (m >= 0 && m < BITS_PER_WORD) + if (m >= 0 && m < maxm) { - cost = shiftadd_cost[m]; - synth_mult (alg_in, (t - 1) >> m, cost_limit - cost); + cost = shiftadd_cost[mode][m]; + synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2297,10 +2390,10 @@ synth_mult (alg_out, t, cost_limit) q = t + 1; q = q & -q; m = exact_log2 (q); - if (m >= 0 && m < BITS_PER_WORD) + if (m >= 0 && m < maxm) { - cost = shiftsub_cost[m]; - synth_mult (alg_in, (t + 1) >> m, cost_limit - cost); + cost = shiftsub_cost[mode][m]; + synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode); cost += alg_in->cost; if (cost < cost_limit) @@ -2335,6 +2428,198 @@ synth_mult (alg_out, t, cost_limit) alg_out->ops * sizeof *alg_out->log); } +/* Find the cheapest way of multiplying a value of mode MODE by VAL. + Try three variations: + + - a shift/add sequence based on VAL itself + - a shift/add sequence based on -VAL, followed by a negation + - a shift/add sequence based on VAL - 1, followed by an addition. + + Return true if the cheapest of these cost less than MULT_COST, + describing the algorithm in *ALG and final fixup in *VARIANT. */ + +static bool +choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, + struct algorithm *alg, enum mult_variant *variant, + int mult_cost) +{ + struct algorithm alg2; + + *variant = basic_variant; + synth_mult (alg, val, mult_cost, mode); + + /* This works only if the inverted value actually fits in an + `unsigned int' */ + if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) + { + synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode], + mode); + alg2.cost += neg_cost[mode]; + if (alg2.cost < alg->cost) + *alg = alg2, *variant = negate_variant; + } + + /* This proves very useful for division-by-constant. */ + synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode], + mode); + alg2.cost += add_cost[mode]; + if (alg2.cost < alg->cost) + *alg = alg2, *variant = add_variant; + + return alg->cost < mult_cost; +} + +/* A subroutine of expand_mult, used for constant multiplications. + Multiply OP0 by VAL in mode MODE, storing the result in TARGET if + convenient. Use the shift/add sequence described by ALG and apply + the final fixup specified by VARIANT. */ + +static rtx +expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, + rtx target, const struct algorithm *alg, + enum mult_variant variant) +{ + HOST_WIDE_INT val_so_far; + rtx insn, accum, tem; + int opno; + enum machine_mode nmode; + + /* op0 must be register to make mult_cost match the precomputed + shiftadd_cost array. */ + op0 = protect_from_queue (op0, 0); + + /* Avoid referencing memory over and over. + For speed, but also for correctness when mem is volatile. */ + if (GET_CODE (op0) == MEM) + op0 = force_reg (mode, op0); + + /* ACCUM starts out either as OP0 or as a zero, depending on + the first operation. */ + + if (alg->op[0] == alg_zero) + { + accum = copy_to_mode_reg (mode, const0_rtx); + val_so_far = 0; + } + else if (alg->op[0] == alg_m) + { + accum = copy_to_mode_reg (mode, op0); + val_so_far = 1; + } + else + abort (); + + for (opno = 1; opno < alg->ops; opno++) + { + int log = alg->log[opno]; + int preserve = preserve_subexpressions_p (); + rtx shift_subtarget = preserve ? 0 : accum; + rtx add_target + = (opno == alg->ops - 1 && target != 0 && variant != add_variant + && ! preserve) + ? target : 0; + rtx accum_target = preserve ? 0 : accum; + + switch (alg->op[opno]) + { + case alg_shift: + accum = expand_shift (LSHIFT_EXPR, mode, accum, + build_int_2 (log, 0), NULL_RTX, 0); + val_so_far <<= log; + break; + + case alg_add_t_m2: + tem = expand_shift (LSHIFT_EXPR, mode, op0, + build_int_2 (log, 0), NULL_RTX, 0); + accum = force_operand (gen_rtx_PLUS (mode, accum, tem), + add_target ? add_target : accum_target); + val_so_far += (HOST_WIDE_INT) 1 << log; + break; + + case alg_sub_t_m2: + tem = expand_shift (LSHIFT_EXPR, mode, op0, + build_int_2 (log, 0), NULL_RTX, 0); + accum = force_operand (gen_rtx_MINUS (mode, accum, tem), + add_target ? add_target : accum_target); + val_so_far -= (HOST_WIDE_INT) 1 << log; + break; + + case alg_add_t2_m: + accum = expand_shift (LSHIFT_EXPR, mode, accum, + build_int_2 (log, 0), shift_subtarget, + 0); + accum = force_operand (gen_rtx_PLUS (mode, accum, op0), + add_target ? add_target : accum_target); + val_so_far = (val_so_far << log) + 1; + break; + + case alg_sub_t2_m: + accum = expand_shift (LSHIFT_EXPR, mode, accum, + build_int_2 (log, 0), shift_subtarget, 0); + accum = force_operand (gen_rtx_MINUS (mode, accum, op0), + add_target ? add_target : accum_target); + val_so_far = (val_so_far << log) - 1; + break; + + case alg_add_factor: + tem = expand_shift (LSHIFT_EXPR, mode, accum, + build_int_2 (log, 0), NULL_RTX, 0); + accum = force_operand (gen_rtx_PLUS (mode, accum, tem), + add_target ? add_target : accum_target); + val_so_far += val_so_far << log; + break; + + case alg_sub_factor: + tem = expand_shift (LSHIFT_EXPR, mode, accum, + build_int_2 (log, 0), NULL_RTX, 0); + accum = force_operand (gen_rtx_MINUS (mode, tem, accum), + (add_target ? add_target + : preserve ? 0 : tem)); + val_so_far = (val_so_far << log) - val_so_far; + break; + + default: + abort (); + } + + /* Write a REG_EQUAL note on the last insn so that we can cse + multiplication sequences. Note that if ACCUM is a SUBREG, + we've set the inner register and must properly indicate + that. */ + + tem = op0, nmode = mode; + if (GET_CODE (accum) == SUBREG) + { + nmode = GET_MODE (SUBREG_REG (accum)); + tem = gen_lowpart (nmode, op0); + } + + insn = get_last_insn (); + set_unique_reg_note (insn, REG_EQUAL, + gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far))); + } + + if (variant == negate_variant) + { + val_so_far = -val_so_far; + accum = expand_unop (mode, neg_optab, accum, target, 0); + } + else if (variant == add_variant) + { + val_so_far = val_so_far + 1; + accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); + } + + /* Compare only the bits of val and val_so_far that are significant + in the result mode, to avoid sign-/zero-extension confusion. */ + val &= GET_MODE_MASK (mode); + val_so_far &= GET_MODE_MASK (mode); + if (val != val_so_far) + abort (); + + return accum; +} + /* Perform a multiplication and return an rtx for the result. MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); TARGET is a suggestion for where to store the result (an rtx). @@ -2344,12 +2629,12 @@ synth_mult (alg_out, t, cost_limit) you should swap the two operands if OP0 would be constant. */ rtx -expand_mult (mode, op0, op1, target, unsignedp) - enum machine_mode mode; - rtx op0, op1, target; - int unsignedp; +expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, + int unsignedp) { rtx const_op1 = op1; + enum mult_variant variant; + struct algorithm algorithm; /* synth_mult does an `unsigned int' multiply. As long as the mode is less than or equal in size to `unsigned int' this doesn't matter. @@ -2376,188 +2661,36 @@ expand_mult (mode, op0, op1, target, unsignedp) that it seems better to use synth_mult always. */ if (const_op1 && GET_CODE (const_op1) == CONST_INT - && (unsignedp || ! flag_trapv)) + && (unsignedp || !flag_trapv)) { - struct algorithm alg; - struct algorithm alg2; - HOST_WIDE_INT val = INTVAL (op1); - HOST_WIDE_INT val_so_far; - rtx insn; - int mult_cost; - enum {basic_variant, negate_variant, add_variant} variant = basic_variant; - - /* op0 must be register to make mult_cost match the precomputed - shiftadd_cost array. */ - op0 = force_reg (mode, op0); - - /* Try to do the computation three ways: multiply by the negative of OP1 - and then negate, do the multiplication directly, or do multiplication - by OP1 - 1. */ - - mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET); - mult_cost = MIN (12 * add_cost, mult_cost); - - synth_mult (&alg, val, mult_cost); - - /* This works only if the inverted value actually fits in an - `unsigned int' */ - if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) - { - synth_mult (&alg2, - val, - (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost); - if (alg2.cost + negate_cost < alg.cost) - alg = alg2, variant = negate_variant; - } - - /* This proves very useful for division-by-constant. */ - synth_mult (&alg2, val - 1, - (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost); - if (alg2.cost + add_cost < alg.cost) - alg = alg2, variant = add_variant; - - if (alg.cost < mult_cost) - { - /* We found something cheaper than a multiply insn. */ - int opno; - rtx accum, tem; - enum machine_mode nmode; - - op0 = protect_from_queue (op0, 0); - - /* Avoid referencing memory over and over. - For speed, but also for correctness when mem is volatile. */ - if (GET_CODE (op0) == MEM) - op0 = force_reg (mode, op0); - - /* ACCUM starts out either as OP0 or as a zero, depending on - the first operation. */ - - if (alg.op[0] == alg_zero) - { - accum = copy_to_mode_reg (mode, const0_rtx); - val_so_far = 0; - } - else if (alg.op[0] == alg_m) - { - accum = copy_to_mode_reg (mode, op0); - val_so_far = 1; - } - else - abort (); + int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET); + mult_cost = MIN (12 * add_cost[mode], mult_cost); - for (opno = 1; opno < alg.ops; opno++) - { - int log = alg.log[opno]; - int preserve = preserve_subexpressions_p (); - rtx shift_subtarget = preserve ? 0 : accum; - rtx add_target - = (opno == alg.ops - 1 && target != 0 && variant != add_variant - && ! preserve) - ? target : 0; - rtx accum_target = preserve ? 0 : accum; - - switch (alg.op[opno]) - { - case alg_shift: - accum = expand_shift (LSHIFT_EXPR, mode, accum, - build_int_2 (log, 0), NULL_RTX, 0); - val_so_far <<= log; - break; - - case alg_add_t_m2: - tem = expand_shift (LSHIFT_EXPR, mode, op0, - build_int_2 (log, 0), NULL_RTX, 0); - accum = force_operand (gen_rtx_PLUS (mode, accum, tem), - add_target - ? add_target : accum_target); - val_so_far += (HOST_WIDE_INT) 1 << log; - break; - - case alg_sub_t_m2: - tem = expand_shift (LSHIFT_EXPR, mode, op0, - build_int_2 (log, 0), NULL_RTX, 0); - accum = force_operand (gen_rtx_MINUS (mode, accum, tem), - add_target - ? add_target : accum_target); - val_so_far -= (HOST_WIDE_INT) 1 << log; - break; - - case alg_add_t2_m: - accum = expand_shift (LSHIFT_EXPR, mode, accum, - build_int_2 (log, 0), shift_subtarget, - 0); - accum = force_operand (gen_rtx_PLUS (mode, accum, op0), - add_target - ? add_target : accum_target); - val_so_far = (val_so_far << log) + 1; - break; - - case alg_sub_t2_m: - accum = expand_shift (LSHIFT_EXPR, mode, accum, - build_int_2 (log, 0), shift_subtarget, - 0); - accum = force_operand (gen_rtx_MINUS (mode, accum, op0), - add_target - ? add_target : accum_target); - val_so_far = (val_so_far << log) - 1; - break; - - case alg_add_factor: - tem = expand_shift (LSHIFT_EXPR, mode, accum, - build_int_2 (log, 0), NULL_RTX, 0); - accum = force_operand (gen_rtx_PLUS (mode, accum, tem), - add_target - ? add_target : accum_target); - val_so_far += val_so_far << log; - break; - - case alg_sub_factor: - tem = expand_shift (LSHIFT_EXPR, mode, accum, - build_int_2 (log, 0), NULL_RTX, 0); - accum = force_operand (gen_rtx_MINUS (mode, tem, accum), - (add_target ? add_target - : preserve ? 0 : tem)); - val_so_far = (val_so_far << log) - val_so_far; - break; - - default: - abort (); - } - - /* Write a REG_EQUAL note on the last insn so that we can cse - multiplication sequences. Note that if ACCUM is a SUBREG, - we've set the inner register and must properly indicate - that. */ - - tem = op0, nmode = mode; - if (GET_CODE (accum) == SUBREG) - { - nmode = GET_MODE (SUBREG_REG (accum)); - tem = gen_lowpart (nmode, op0); - } - - insn = get_last_insn (); - set_unique_reg_note (insn, - REG_EQUAL, - gen_rtx_MULT (nmode, tem, - GEN_INT (val_so_far))); - } + if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant, + mult_cost)) + return expand_mult_const (mode, op0, INTVAL (const_op1), target, + &algorithm, variant); + } - if (variant == negate_variant) - { - val_so_far = - val_so_far; - accum = expand_unop (mode, neg_optab, accum, target, 0); - } - else if (variant == add_variant) - { - val_so_far = val_so_far + 1; - accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); - } + if (GET_CODE (op0) == CONST_DOUBLE) + { + rtx temp = op0; + op0 = op1; + op1 = temp; + } - if (val != val_so_far) - abort (); + /* Expand x*2.0 as x+x. */ + if (GET_CODE (op1) == CONST_DOUBLE + && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, op1); - return accum; + if (REAL_VALUES_EQUAL (d, dconst2)) + { + op0 = force_reg (GET_MODE (op0), op0); + return expand_binop (mode, add_optab, op0, op0, + target, unsignedp, OPTAB_LIB_WIDEN); } } @@ -2565,8 +2698,8 @@ expand_mult (mode, op0, op1, target, unsignedp) there is no difference between signed and unsigned. */ op0 = expand_binop (mode, ! unsignedp - && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT) - ? smulv_optab : smul_optab, + && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT) + ? smulv_optab : smul_optab, op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); if (op0 == 0) abort (); @@ -2576,8 +2709,7 @@ expand_mult (mode, op0, op1, target, unsignedp) /* Return the smallest n such that 2**n >= X. */ int -ceil_log2 (x) - unsigned HOST_WIDE_INT x; +ceil_log2 (unsigned HOST_WIDE_INT x) { return floor_log2 (x - 1) + 1; } @@ -2600,13 +2732,9 @@ ceil_log2 (x) static unsigned HOST_WIDE_INT -choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr) - unsigned HOST_WIDE_INT d; - int n; - int precision; - unsigned HOST_WIDE_INT *multiplier_ptr; - int *post_shift_ptr; - int *lgup_ptr; +choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, + unsigned HOST_WIDE_INT *multiplier_ptr, + int *post_shift_ptr, int *lgup_ptr) { HOST_WIDE_INT mhigh_hi, mlow_hi; unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; @@ -2657,14 +2785,14 @@ choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr) abort (); if (mhigh_hi > 1 || mlow_hi > 1) abort (); - /* assert that mlow < mhigh. */ + /* Assert that mlow < mhigh. */ if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo))) abort (); /* If precision == N, then mlow, mhigh exceed 2^N (but they do not exceed 2^(N+1)). */ - /* Reduce to lowest terms */ + /* Reduce to lowest terms. */ for (post_shift = lgup; post_shift > 0; post_shift--) { unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); @@ -2697,9 +2825,7 @@ choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr) congruent to 1 (mod 2**N). */ static unsigned HOST_WIDE_INT -invert_mod2n (x, n) - unsigned HOST_WIDE_INT x; - int n; +invert_mod2n (unsigned HOST_WIDE_INT x, int n) { /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ @@ -2734,10 +2860,8 @@ invert_mod2n (x, n) MODE is the mode of operation. */ rtx -expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp) - enum machine_mode mode; - rtx adj_operand, op0, op1, target; - int unsignedp; +expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, + rtx op1, rtx target, int unsignedp) { rtx tem; enum rtx_code adj_code = unsignedp ? PLUS : MINUS; @@ -2760,151 +2884,177 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp) return target; } -/* Emit code to multiply OP0 and CNST1, putting the high half of the result - in TARGET if that is convenient, and return where the result is. If the - operation can not be performed, 0 is returned. +/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ - MODE is the mode of operation and result. +static rtx +extract_high_half (enum machine_mode mode, rtx op) +{ + enum machine_mode wider_mode; - UNSIGNEDP nonzero means unsigned multiply. + if (mode == word_mode) + return gen_highpart (mode, op); - MAX_COST is the total allowed cost for the expanded RTL. */ + wider_mode = GET_MODE_WIDER_MODE (mode); + op = expand_shift (RSHIFT_EXPR, wider_mode, op, + build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1); + return convert_modes (mode, wider_mode, op, 0); +} -rtx -expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost) - enum machine_mode mode; - rtx op0, target; - unsigned HOST_WIDE_INT cnst1; - int unsignedp; - int max_cost; +/* Like expand_mult_highpart, but only consider using a multiplication + optab. OP1 is an rtx for the constant operand. */ + +static rtx +expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, + rtx target, int unsignedp, int max_cost) { - enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); - optab mul_highpart_optab; + rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); + enum machine_mode wider_mode; optab moptab; rtx tem; - int size = GET_MODE_BITSIZE (mode); - rtx op1, wide_op1; - - /* We can't support modes wider than HOST_BITS_PER_INT. */ - if (size > HOST_BITS_PER_WIDE_INT) - abort (); - - op1 = gen_int_mode (cnst1, mode); - - wide_op1 - = immed_double_const (cnst1, - (unsignedp - ? (HOST_WIDE_INT) 0 - : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))), - wider_mode); - - /* expand_mult handles constant multiplication of word_mode - or narrower. It does a poor job for large modes. */ - if (size < BITS_PER_WORD - && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost) - { - /* We have to do this, since expand_binop doesn't do conversion for - multiply. Maybe change expand_binop to handle widening multiply? */ - op0 = convert_to_mode (wider_mode, op0, unsignedp); - - /* We know that this can't have signed overflow, so pretend this is - an unsigned multiply. */ - tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0); - tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, - build_int_2 (size, 0), NULL_RTX, 1); - return convert_modes (mode, wider_mode, tem, unsignedp); - } + int size; - if (target == 0) - target = gen_reg_rtx (mode); + wider_mode = GET_MODE_WIDER_MODE (mode); + size = GET_MODE_BITSIZE (mode); /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - if (mul_highpart_cost[(int) mode] < max_cost) + if (mul_highpart_cost[mode] < max_cost) { - mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab; - target = expand_binop (mode, mul_highpart_optab, - op0, op1, target, unsignedp, OPTAB_DIRECT); - if (target) - return target; + moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; + tem = expand_binop (mode, moptab, op0, narrow_op1, target, + unsignedp, OPTAB_DIRECT); + if (tem) + return tem; } /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ if (size - 1 < BITS_PER_WORD - && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost - < max_cost)) + && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1] + + 4 * add_cost[mode] < max_cost)) { - mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab; - target = expand_binop (mode, mul_highpart_optab, - op0, op1, target, unsignedp, OPTAB_DIRECT); - if (target) + moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; + tem = expand_binop (mode, moptab, op0, narrow_op1, target, + unsignedp, OPTAB_DIRECT); + if (tem) /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, target, op0, - op1, target, unsignedp); + return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, + tem, unsignedp); } /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing - && mul_widen_cost[(int) wider_mode] < max_cost) + if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing + && mul_widen_cost[wider_mode] < max_cost) { - op1 = force_reg (mode, op1); - goto try; + tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, + unsignedp, OPTAB_WIDEN); + if (tem) + return extract_high_half (mode, tem); } /* Try widening the mode and perform a non-widening multiplication. */ moptab = smul_optab; - if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost) + && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost) { - op1 = wide_op1; - goto try; + tem = expand_binop (wider_mode, moptab, op0, op1, 0, + unsignedp, OPTAB_WIDEN); + if (tem) + return extract_high_half (mode, tem); } /* Try widening multiplication of opposite signedness, and adjust. */ moptab = unsignedp ? smul_widen_optab : umul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && (mul_widen_cost[(int) wider_mode] - + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)) + && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1] + + 4 * add_cost[mode] < max_cost)) { - rtx regop1 = force_reg (mode, op1); - tem = expand_binop (wider_mode, moptab, op0, regop1, + tem = expand_binop (wider_mode, moptab, op0, narrow_op1, NULL_RTX, ! unsignedp, OPTAB_WIDEN); if (tem != 0) { - /* Extract the high half of the just generated product. */ - tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, - build_int_2 (size, 0), NULL_RTX, 1); - tem = convert_modes (mode, wider_mode, tem, unsignedp); + tem = extract_high_half (mode, tem); /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, tem, op0, op1, + return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, target, unsignedp); } } return 0; +} - try: - /* Pass NULL_RTX as target since TARGET has wrong mode. */ - tem = expand_binop (wider_mode, moptab, op0, op1, - NULL_RTX, unsignedp, OPTAB_WIDEN); - if (tem == 0) - return 0; +/* Emit code to multiply OP0 and CNST1, putting the high half of the result + in TARGET if that is convenient, and return where the result is. If the + operation can not be performed, 0 is returned. - /* Extract the high half of the just generated product. */ - if (mode == word_mode) + MODE is the mode of operation and result. + + UNSIGNEDP nonzero means unsigned multiply. + + MAX_COST is the total allowed cost for the expanded RTL. */ + +rtx +expand_mult_highpart (enum machine_mode mode, rtx op0, + unsigned HOST_WIDE_INT cnst1, rtx target, + int unsignedp, int max_cost) +{ + enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); + int extra_cost; + bool sign_adjust = false; + enum mult_variant variant; + struct algorithm alg; + rtx op1, tem; + + /* We can't support modes wider than HOST_BITS_PER_INT. */ + if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) + abort (); + + op1 = gen_int_mode (cnst1, wider_mode); + cnst1 &= GET_MODE_MASK (mode); + + /* We can't optimize modes wider than BITS_PER_WORD. + ??? We might be able to perform double-word arithmetic if + mode == word_mode, however all the cost calculations in + synth_mult etc. assume single-word operations. */ + if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) + return expand_mult_highpart_optab (mode, op0, op1, target, + unsignedp, max_cost); + + extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1]; + + /* Check whether we try to multiply by a negative constant. */ + if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) { - return gen_highpart (mode, tem); + sign_adjust = true; + extra_cost += add_cost[mode]; } - else + + /* See whether shift/add multiplication is cheap enough. */ + if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, + max_cost - extra_cost)) { - tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, - build_int_2 (size, 0), NULL_RTX, 1); - return convert_modes (mode, wider_mode, tem, unsignedp); + /* See whether the specialized multiplication optabs are + cheaper than the shift/add version. */ + tem = expand_mult_highpart_optab (mode, op0, op1, target, + unsignedp, alg.cost + extra_cost); + if (tem) + return tem; + + tem = convert_to_mode (wider_mode, op0, unsignedp); + tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); + tem = extract_high_half (mode, tem); + + /* Adjust result for signedness. */ + if (sign_adjust) + tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); + + return tem; } + return expand_mult_highpart_optab (mode, op0, op1, target, + unsignedp, max_cost); } /* Emit the code to divide OP0 by OP1, putting the result in TARGET @@ -2931,7 +3081,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost) the result is exact for inputs up to 0x1fffffff. The input range can be reduced by using cross-sum rules. For odd divisors >= 3, the following table gives right shift counts - so that if an number is shifted by an integer multiple of the given + so that if a number is shifted by an integer multiple of the given amount, the remainder stays the same: 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, @@ -2948,12 +3098,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost) #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) rtx -expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) - int rem_flag; - enum tree_code code; - enum machine_mode mode; - rtx op0, op1, target; - int unsignedp; +expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, + rtx op0, rtx op1, rtx target, int unsignedp) { enum machine_mode compute_mode; rtx tquotient; @@ -2962,14 +3108,20 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) int size; rtx insn, set; optab optab1, optab2; - int op1_is_constant, op1_is_pow2; + int op1_is_constant, op1_is_pow2 = 0; int max_cost, extra_cost; static HOST_WIDE_INT last_div_const = 0; + static HOST_WIDE_INT ext_op1; op1_is_constant = GET_CODE (op1) == CONST_INT; - op1_is_pow2 = (op1_is_constant - && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) - || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))); + if (op1_is_constant) + { + ext_op1 = INTVAL (op1); + if (unsignedp) + ext_op1 &= GET_MODE_MASK (mode); + op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) + || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); + } /* This is the structure of expand_divmod: @@ -3011,9 +3163,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (! unsignedp && op1 == constm1_rtx) { if (rem_flag) - return const0_rtx; + return const0_rtx; return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT - ? negv_optab : neg_optab, op0, target, 0); + ? negv_optab : neg_optab, op0, target, 0); } if (target @@ -3049,21 +3201,24 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) not straightforward to generalize this. Maybe we should make an array of possible modes in init_expmed? Save this for GCC 2.7. */ - optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab) + optab1 = ((op1_is_pow2 && op1 != const0_rtx) + ? (unsignedp ? lshr_optab : ashr_optab) : (unsignedp ? udiv_optab : sdiv_optab)); - optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab)); + optab2 = ((op1_is_pow2 && op1 != const0_rtx) + ? optab1 + : (unsignedp ? udivmod_optab : sdivmod_optab)); for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing - || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing) + if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing + || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) break; if (compute_mode == VOIDmode) for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[(int) compute_mode].libfunc - || optab2->handlers[(int) compute_mode].libfunc) + if (optab1->handlers[compute_mode].libfunc + || optab2->handlers[compute_mode].libfunc) break; /* If we still couldn't find a mode, use MODE, but we'll probably abort @@ -3087,10 +3242,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) /* Only deduct something for a REM if the last divide done was for a different constant. Then set the constant of the last divide. */ - max_cost = div_cost[(int) compute_mode] + max_cost = div_cost[compute_mode] - (rem_flag && ! (last_div_const != 0 && op1_is_constant && INTVAL (op1) == last_div_const) - ? mul_cost[(int) compute_mode] + add_cost : 0); + ? mul_cost[compute_mode] + add_cost[compute_mode] + : 0); last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; @@ -3146,7 +3302,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) unsigned HOST_WIDE_INT mh, ml; int pre_shift, post_shift; int dummy; - unsigned HOST_WIDE_INT d = INTVAL (op1); + unsigned HOST_WIDE_INT d = (INTVAL (op1) + & GET_MODE_MASK (compute_mode)); if (EXACT_POWER_OF_2_OR_ZERO_P (d)) { @@ -3206,8 +3363,10 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (post_shift - 1 >= BITS_PER_WORD) goto fail1; - extra_cost = (shift_cost[post_shift - 1] - + shift_cost[1] + 2 * add_cost); + extra_cost + = (shift_cost[compute_mode][post_shift - 1] + + shift_cost[compute_mode][1] + + 2 * add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -3237,8 +3396,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, build_int_2 (pre_shift, 0), NULL_RTX, 1); - extra_cost = (shift_cost[pre_shift] - + shift_cost[post_shift]); + extra_cost + = (shift_cost[compute_mode][pre_shift] + + shift_cost[compute_mode][post_shift]); t2 = expand_mult_highpart (compute_mode, t1, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -3259,7 +3419,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) && (set = single_set (insn)) != 0 && SET_DEST (set) == quotient) set_unique_reg_note (insn, - REG_EQUAL, + REG_EQUAL, gen_rtx_UDIV (compute_mode, op0, op1)); } else /* TRUNC_DIV, signed */ @@ -3290,15 +3450,16 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) goto fail1; } else if (EXACT_POWER_OF_2_OR_ZERO_P (d) - && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap) + && (rem_flag ? smod_pow2_cheap[compute_mode] + : sdiv_pow2_cheap[compute_mode]) /* ??? The cheap metric is computed only for word_mode. If this operation is wider, this may not be so. Assume true if the optab has an expander for this mode. */ && (((rem_flag ? smod_optab : sdiv_optab) - ->handlers[(int) compute_mode].insn_code + ->handlers[compute_mode].insn_code != CODE_FOR_nothing) - || (sdivmod_optab->handlers[(int) compute_mode] + || (sdivmod_optab->handlers[compute_mode] .insn_code != CODE_FOR_nothing))) ; else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) @@ -3347,7 +3508,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) && abs_d < ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))) set_unique_reg_note (insn, - REG_EQUAL, + REG_EQUAL, gen_rtx_DIV (compute_mode, op0, GEN_INT @@ -3371,8 +3532,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) || size - 1 >= BITS_PER_WORD) goto fail1; - extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + add_cost); + extra_cost = (shift_cost[compute_mode][post_shift] + + shift_cost[compute_mode][size - 1] + + add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 0, max_cost - extra_cost); @@ -3402,8 +3564,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) goto fail1; ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); - extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + 2 * add_cost); + extra_cost = (shift_cost[compute_mode][post_shift] + + shift_cost[compute_mode][size - 1] + + 2 * add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 0, max_cost - extra_cost); @@ -3438,7 +3601,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) && (set = single_set (insn)) != 0 && SET_DEST (set) == quotient) set_unique_reg_note (insn, - REG_EQUAL, + REG_EQUAL, gen_rtx_DIV (compute_mode, op0, op1)); } break; @@ -3492,8 +3655,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) NULL_RTX, 0); t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); - extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + 2 * add_cost); + extra_cost = (shift_cost[compute_mode][post_shift] + + shift_cost[compute_mode][size - 1] + + 2 * add_cost[compute_mode]); t3 = expand_mult_highpart (compute_mode, t2, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -3549,13 +3713,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (rem_flag) { remainder - = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode); + = REG_P (target) ? target : gen_reg_rtx (compute_mode); quotient = gen_reg_rtx (compute_mode); } else { quotient - = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode); + = REG_P (target) ? target : gen_reg_rtx (compute_mode); remainder = gen_reg_rtx (compute_mode); } @@ -3665,13 +3829,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (rem_flag) { - remainder = (GET_CODE (target) == REG + remainder = (REG_P (target) ? target : gen_reg_rtx (compute_mode)); quotient = gen_reg_rtx (compute_mode); } else { - quotient = (GET_CODE (target) == REG + quotient = (REG_P (target) ? target : gen_reg_rtx (compute_mode)); remainder = gen_reg_rtx (compute_mode); } @@ -3762,13 +3926,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) target = gen_reg_rtx (compute_mode); if (rem_flag) { - remainder= (GET_CODE (target) == REG + remainder= (REG_P (target) ? target : gen_reg_rtx (compute_mode)); quotient = gen_reg_rtx (compute_mode); } else { - quotient = (GET_CODE (target) == REG + quotient = (REG_P (target) ? target : gen_reg_rtx (compute_mode)); remainder = gen_reg_rtx (compute_mode); } @@ -3854,11 +4018,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) build_int_2 (pre_shift, 0), NULL_RTX, unsignedp); quotient = expand_mult (compute_mode, t1, gen_int_mode (ml, compute_mode), - NULL_RTX, 0); + NULL_RTX, 1); insn = get_last_insn (); set_unique_reg_note (insn, - REG_EQUAL, + REG_EQUAL, gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, compute_mode, op0, op1)); @@ -3943,7 +4107,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) { /* Try to produce the remainder without producing the quotient. If we seem to have a divmod pattern that does not require widening, - don't try widening here. We should really have an WIDEN argument + don't try widening here. We should really have a WIDEN argument to expand_twoval_binop, since what we'd really like to do here is 1) try a mod insn in compute_mode 2) try a divmod insn in compute_mode @@ -3954,7 +4118,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) = sign_expand_binop (compute_mode, umod_optab, smod_optab, op0, op1, target, unsignedp, - ((optab2->handlers[(int) compute_mode].insn_code + ((optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); if (remainder == 0) @@ -3982,7 +4146,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, op0, op1, rem_flag ? NULL_RTX : target, unsignedp, - ((optab2->handlers[(int) compute_mode].insn_code + ((optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); @@ -4037,9 +4201,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) generated by loop.c. */ tree -make_tree (type, x) - tree type; - rtx x; +make_tree (tree type, rtx x) { tree t; @@ -4047,8 +4209,9 @@ make_tree (type, x) { case CONST_INT: t = build_int_2 (INTVAL (x), - (TREE_UNSIGNED (type) - && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT)) + (TYPE_UNSIGNED (type) + && (GET_MODE_BITSIZE (TYPE_MODE (type)) + < HOST_BITS_PER_WIDE_INT)) || INTVAL (x) >= 0 ? 0 : -1); TREE_TYPE (t) = type; return t; @@ -4107,21 +4270,22 @@ make_tree (type, x) make_tree (type, XEXP (x, 1)))); case LSHIFTRT: + t = lang_hooks.types.unsigned_type (type); return fold (convert (type, - build (RSHIFT_EXPR, unsigned_type (type), - make_tree (unsigned_type (type), - XEXP (x, 0)), + build (RSHIFT_EXPR, t, + make_tree (t, XEXP (x, 0)), make_tree (type, XEXP (x, 1))))); case ASHIFTRT: + t = lang_hooks.types.signed_type (type); return fold (convert (type, - build (RSHIFT_EXPR, signed_type (type), - make_tree (signed_type (type), XEXP (x, 0)), + build (RSHIFT_EXPR, t, + make_tree (t, XEXP (x, 0)), make_tree (type, XEXP (x, 1))))); case DIV: if (TREE_CODE (type) != REAL_TYPE) - t = signed_type (type); + t = lang_hooks.types.signed_type (type); else t = type; @@ -4130,21 +4294,26 @@ make_tree (type, x) make_tree (t, XEXP (x, 0)), make_tree (t, XEXP (x, 1))))); case UDIV: - t = unsigned_type (type); + t = lang_hooks.types.unsigned_type (type); return fold (convert (type, build (TRUNC_DIV_EXPR, t, make_tree (t, XEXP (x, 0)), make_tree (t, XEXP (x, 1))))); + + case SIGN_EXTEND: + case ZERO_EXTEND: + t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), + GET_CODE (x) == ZERO_EXTEND); + return fold (convert (type, make_tree (t, XEXP (x, 0)))); + default: t = make_node (RTL_EXPR); TREE_TYPE (t) = type; -#ifdef POINTERS_EXTEND_UNSIGNED /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being ptr_mode. So convert. */ - if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type)) + if (POINTER_TYPE_P (type)) x = convert_memory_address (TYPE_MODE (type), x); -#endif RTL_EXPR_RTL (t) = x; /* There are no insns to be output @@ -4154,23 +4323,57 @@ make_tree (type, x) } } +/* Check whether the multiplication X * MULT + ADD overflows. + X, MULT and ADD must be CONST_*. + MODE is the machine mode for the computation. + X and MULT must have mode MODE. ADD may have a different mode. + So can X (defaults to same as MODE). + UNSIGNEDP is nonzero to do unsigned multiplication. */ + +bool +const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp) +{ + tree type, mult_type, add_type, result; + + type = lang_hooks.types.type_for_mode (mode, unsignedp); + + /* In order to get a proper overflow indication from an unsigned + type, we have to pretend that it's a sizetype. */ + mult_type = type; + if (unsignedp) + { + mult_type = copy_node (type); + TYPE_IS_SIZETYPE (mult_type) = 1; + } + + add_type = (GET_MODE (add) == VOIDmode ? mult_type + : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp)); + + result = fold (build (PLUS_EXPR, mult_type, + fold (build (MULT_EXPR, mult_type, + make_tree (mult_type, x), + make_tree (mult_type, mult))), + make_tree (add_type, add))); + + return TREE_CONSTANT_OVERFLOW (result); +} + /* Return an rtx representing the value of X * MULT + ADD. TARGET is a suggestion for where to store the result (an rtx). MODE is the machine mode for the computation. X and MULT must have mode MODE. ADD may have a different mode. So can X (defaults to same as MODE). - UNSIGNEDP is non-zero to do unsigned multiplication. + UNSIGNEDP is nonzero to do unsigned multiplication. This may emit insns. */ rtx -expand_mult_add (x, target, mult, add, mode, unsignedp) - rtx x, target, mult, add; - enum machine_mode mode; - int unsignedp; +expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode, + int unsignedp) { - tree type = type_for_mode (mode, unsignedp); + tree type = lang_hooks.types.type_for_mode (mode, unsignedp); tree add_type = (GET_MODE (add) == VOIDmode - ? type : type_for_mode (GET_MODE (add), unsignedp)); + ? type: lang_hooks.types.type_for_mode (GET_MODE (add), + unsignedp)); tree result = fold (build (PLUS_EXPR, type, fold (build (MULT_EXPR, type, make_tree (type, x), @@ -4186,9 +4389,7 @@ expand_mult_add (x, target, mult, add, mode, unsignedp) If TARGET is 0, a pseudo-register or constant is returned. */ rtx -expand_and (mode, op0, op1, target) - enum machine_mode mode; - rtx op0, op1, target; +expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) { rtx tem = 0; @@ -4220,13 +4421,8 @@ expand_and (mode, op0, op1, target) "raw" out of the scc insn. */ rtx -emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) - rtx target; - enum rtx_code code; - rtx op0, op1; - enum machine_mode mode; - int unsignedp; - int normalizep; +emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, + enum machine_mode mode, int unsignedp, int normalizep) { rtx subtarget; enum insn_code icode; @@ -4300,19 +4496,27 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) { if (code == EQ || code == NE) { + rtx op00, op01, op0both; + /* Do a logical OR of the two words and compare the result. */ - rtx op0h = gen_highpart (word_mode, op0); - rtx op0l = gen_lowpart (word_mode, op0); - rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l, - NULL_RTX, unsignedp, OPTAB_DIRECT); + op00 = simplify_gen_subreg (word_mode, op0, mode, 0); + op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); + op0both = expand_binop (word_mode, ior_optab, op00, op01, + NULL_RTX, unsignedp, OPTAB_DIRECT); if (op0both != 0) return emit_store_flag (target, code, op0both, op1, word_mode, unsignedp, normalizep); } else if (code == LT || code == GE) - /* If testing the sign bit, can just test on high word. */ - return emit_store_flag (target, code, gen_highpart (word_mode, op0), - op1, word_mode, unsignedp, normalizep); + { + rtx op0h; + + /* If testing the sign bit, can just test on high word. */ + op0h = simplify_gen_subreg (word_mode, op0, mode, + subreg_highpart_offset (word_mode, mode)); + return emit_store_flag (target, code, op0h, op1, word_mode, + unsignedp, normalizep); + } } /* From now on, we won't change CODE, so set ICODE now. */ @@ -4325,7 +4529,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) && (normalizep || STORE_FLAG_VALUE == 1 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) - == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))) + == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))) { subtarget = target; @@ -4378,11 +4582,28 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) comparison = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX); - if (GET_CODE (comparison) == CONST_INT) - return (comparison == const0_rtx ? const0_rtx - : normalizep == 1 ? const1_rtx - : normalizep == -1 ? constm1_rtx - : const_true_rtx); + if (CONSTANT_P (comparison)) + { + if (GET_CODE (comparison) == CONST_INT) + { + if (comparison == const0_rtx) + return const0_rtx; + } +#ifdef FLOAT_STORE_FLAG_VALUE + else if (GET_CODE (comparison) == CONST_DOUBLE) + { + if (comparison == CONST0_RTX (GET_MODE (comparison))) + return const0_rtx; + } +#endif + else + abort (); + if (normalizep == 1) + return const1_rtx; + if (normalizep == -1) + return constm1_rtx; + return const_true_rtx; + } /* The code of COMPARISON may not match CODE if compare_from_rtx decided to swap its operands and reverse the original code. @@ -4572,7 +4793,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) if (code == EQ || code == NE) { /* For EQ or NE, one way to do the comparison is to apply an operation - that converts the operand into a positive number if it is non-zero + that converts the operand into a positive number if it is nonzero or zero if it was originally zero. Then, for EQ, we subtract 1 and for NE we negate. This puts the result in the sign bit. Then we normalize with a shift, if needed. @@ -4586,9 +4807,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) that is compensated by the subsequent overflow when subtracting one / negating. */ - if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) + if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing) tem = expand_unop (mode, abs_optab, op0, subtarget, 1); - else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) + else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing) tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) { @@ -4652,13 +4873,8 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) /* Like emit_store_flag, but always succeeds. */ rtx -emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep) - rtx target; - enum rtx_code code; - rtx op0, op1; - enum machine_mode mode; - int unsignedp; - int normalizep; +emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, + enum machine_mode mode, int unsignedp, int normalizep) { rtx tem, label; @@ -4672,7 +4888,7 @@ emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep) /* If this failed, we have to do this with set/compare/jump/set code. */ - if (GET_CODE (target) != REG + if (!REG_P (target) || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) target = gen_reg_rtx (GET_MODE (target)); @@ -4697,10 +4913,8 @@ emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep) be handled if needed). */ static void -do_cmp_and_jump (arg1, arg2, op, mode, label) - rtx arg1, arg2, label; - enum rtx_code op; - enum machine_mode mode; +do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, + rtx label) { /* If this mode is an integer too wide to compare properly, compare word by word. Rely on cse to optimize constant cases. */