X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fexpmed.c;h=65fb007da57ea969e872d1566f74535e483f15a4;hb=188c68d9c1d94d29fc5d73921b70d64a5163e247;hp=3802eec1391eee104d70b449f80b9823656f916c;hpb=0861b09e4706ebfe6497fe18944081e76a7a70ec;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/expmed.c b/gcc/expmed.c index 3802eec1391..65fb007da57 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -1,6 +1,6 @@ /* Medium-level subroutines: convert bit-field store and extract and shifts, multiplies and divides to rtl instructions. - Copyright (C) 1987, 88, 89, 92, 93, 1994 Free Software Foundation, Inc. + Copyright (C) 1987, 88, 89, 92-6, 1997 Free Software Foundation, Inc. This file is part of GNU CC. @@ -16,7 +16,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ #include "config.h" @@ -61,11 +62,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap; #define MAX_BITS_PER_WORD BITS_PER_WORD #endif -/* Cost of various pieces of RTL. */ +/* Cost of various pieces of RTL. Note that some of these are indexed by shift count, + and some by mode. */ static int add_cost, negate_cost, zero_cost; static int shift_cost[MAX_BITS_PER_WORD]; static int shiftadd_cost[MAX_BITS_PER_WORD]; static int shiftsub_cost[MAX_BITS_PER_WORD]; +static int mul_cost[NUM_MACHINE_MODES]; +static int div_cost[NUM_MACHINE_MODES]; +static int mul_widen_cost[NUM_MACHINE_MODES]; +static int mul_highpart_cost[NUM_MACHINE_MODES]; void init_expmed () @@ -77,6 +83,7 @@ init_expmed () rtx shift_insn, shiftadd_insn, shiftsub_insn; int dummy; int m; + enum machine_mode mode, wider_mode; start_sequence (); @@ -138,6 +145,32 @@ init_expmed () = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET) <= 2 * add_cost); + for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); + mode != VOIDmode; + mode = GET_MODE_WIDER_MODE (mode)) + { + reg = gen_rtx (REG, mode, 10000); + div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET); + mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET); + wider_mode = GET_MODE_WIDER_MODE (mode); + if (wider_mode != VOIDmode) + { + mul_widen_cost[(int) wider_mode] + = rtx_cost (gen_rtx (MULT, wider_mode, + gen_rtx (ZERO_EXTEND, wider_mode, reg), + gen_rtx (ZERO_EXTEND, wider_mode, reg)), + SET); + mul_highpart_cost[(int) mode] + = rtx_cost (gen_rtx (TRUNCATE, mode, + gen_rtx (LSHIFTRT, wider_mode, + gen_rtx (MULT, wider_mode, + gen_rtx (ZERO_EXTEND, wider_mode, reg), + gen_rtx (ZERO_EXTEND, wider_mode, reg)), + GEN_INT (GET_MODE_BITSIZE (mode)))), + SET); + } + } + /* Free the objects we just allocated. */ end_sequence (); obfree (free_point); @@ -152,21 +185,12 @@ negate_rtx (mode, x) enum machine_mode mode; rtx x; { - if (GET_CODE (x) == CONST_INT) - { - HOST_WIDE_INT val = - INTVAL (x); - if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT) - { - /* Sign extend the value from the bits that are significant. */ - if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) - val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode); - else - val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1; - } - return GEN_INT (val); - } - else - return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0); + rtx result = simplify_unary_operation (NEG, mode, x, mode); + + if (result == 0) + result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); + + return result; } /* Generate code to store value from rtx VALUE @@ -365,6 +389,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size) #ifdef HAVE_insv if (HAVE_insv + && GET_MODE (value) != BLKmode && !(bitsize == 1 && GET_CODE (value) == CONST_INT) /* Ensure insv's size is wide enough for this field. */ && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]) @@ -384,13 +409,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size) int save_volatile_ok = volatile_ok; volatile_ok = 1; - /* If this machine's insv can only insert into a register, or if we - are to force MEMs into a register, copy OP0 into a register and - save it back later. */ + /* If this machine's insv can only insert into a register, copy OP0 + into a register and save it back later. */ + /* This used to check flag_force_mem, but that was a serious + de-optimization now that flag_force_mem is enabled by -O2. */ if (GET_CODE (op0) == MEM - && (flag_force_mem - || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0]) - (op0, VOIDmode)))) + && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0]) + (op0, VOIDmode))) { rtx tempreg; enum machine_mode bestmode; @@ -409,7 +434,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size) bestmode = GET_MODE (op0); if (bestmode == VOIDmode - || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align)) + || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align)) goto insv_loses; /* Adjust address to point to the containing unit of that mode. */ @@ -532,6 +557,9 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align) int all_zero = 0; int all_one = 0; + if (! SLOW_UNALIGNED_ACCESS) + struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT; + /* There is a case not handled here: a structure with a known alignment of just a halfword and a field split across two aligned halfwords within the structure. @@ -715,7 +743,9 @@ store_split_bit_field (op0, bitsize, bitpos, value, align) value = word; else value = gen_lowpart_common (word_mode, - force_reg (GET_MODE (value), value)); + force_reg (GET_MODE (value) != VOIDmode + ? GET_MODE (value) + : word_mode, value)); } while (bitsdone < bitsize) @@ -736,6 +766,16 @@ store_split_bit_field (op0, bitsize, bitpos, value, align) if (BYTES_BIG_ENDIAN) { + int total_bits; + + /* We must do an endian conversion exactly the same way as it is + done in extract_bit_field, so that the two calls to + extract_fixed_bit_field will have comparable arguments. */ + if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode) + total_bits = BITS_PER_WORD; + else + total_bits = GET_MODE_BITSIZE (GET_MODE (value)); + /* Fetch successively less significant portions. */ if (GET_CODE (value) == CONST_INT) part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) @@ -744,11 +784,19 @@ store_split_bit_field (op0, bitsize, bitpos, value, align) else /* The args are chosen so that the last part includes the lsb. Give extract_bit_field the value it needs (with - endianness compensation) to fetch the piece we want. */ - part = extract_fixed_bit_field (word_mode, value, 0, thissize, - GET_MODE_BITSIZE (GET_MODE (value)) - - bitsize + bitsdone, - NULL_RTX, 1, align); + endianness compensation) to fetch the piece we want. + + ??? We have no idea what the alignment of VALUE is, so + we have to use a guess. */ + part + = extract_fixed_bit_field + (word_mode, value, 0, thissize, + total_bits - bitsize + bitsdone, NULL_RTX, 1, + GET_MODE (value) == VOIDmode + ? UNITS_PER_WORD + : (GET_MODE (value) == BLKmode + ? 1 + : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT)); } else { @@ -758,8 +806,14 @@ store_split_bit_field (op0, bitsize, bitpos, value, align) >> bitsdone) & (((HOST_WIDE_INT) 1 << thissize) - 1)); else - part = extract_fixed_bit_field (word_mode, value, 0, thissize, - bitsdone, NULL_RTX, 1, align); + part + = extract_fixed_bit_field + (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1, + GET_MODE (value) == VOIDmode + ? UNITS_PER_WORD + : (GET_MODE (value) == BLKmode + ? 1 + : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT)); } /* If OP0 is a register, then handle OFFSET here. @@ -831,9 +885,6 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, rtx spec_target = target; rtx spec_target_subreg = 0; - if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx)) - abort (); - /* Discount the part of the structure before the desired byte. We need to know how many bytes are safe to reference after it. */ if (total_size >= 0) @@ -844,9 +895,27 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, tmode = mode; while (GET_CODE (op0) == SUBREG) { + int outer_size = GET_MODE_BITSIZE (GET_MODE (op0)); + int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0))); + offset += SUBREG_WORD (op0); + + if (BYTES_BIG_ENDIAN && (outer_size < inner_size)) + { + bitpos += inner_size - outer_size; + if (bitpos > unit) + { + offset += (bitpos / unit); + bitpos %= unit; + } + } + op0 = SUBREG_REG (op0); } + + /* ??? We currently assume TARGET is at least as big as BITSIZE. + If that's wrong, the solution is to test for it and set TARGET to 0 + if needed. */ /* If OP0 is a register, BITPOS must count within a word. But as we have it, it counts within whatever size OP0 now has. @@ -862,7 +931,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, So too extracting a subword value in the least significant part of the register. */ - if ((GET_CODE (op0) == REG + if (((GET_CODE (op0) == REG + && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode), + GET_MODE_BITSIZE (GET_MODE (op0)))) || (GET_CODE (op0) == MEM && (! SLOW_UNALIGNED_ACCESS || (offset * BITS_PER_UNIT % bitsize == 0 @@ -905,11 +976,18 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, if (target == 0 || GET_CODE (target) != REG) target = gen_reg_rtx (mode); + /* Indicate for flow that the entire target reg is being set. */ + emit_insn (gen_rtx (CLOBBER, VOIDmode, target)); + for (i = 0; i < nwords; i++) { /* If I is 0, use the low-order word in both field and target; if I is 1, use the next to lowest word; and so on. */ - int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i); + /* Word number in TARGET to use. */ + int wordnum = (WORDS_BIG_ENDIAN + ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 + : i); + /* Offset from start of field in OP0. */ int bit_offset = (WORDS_BIG_ENDIAN ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD) : i * BITS_PER_WORD); @@ -929,7 +1007,24 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, } if (unsignedp) - return target; + { + /* Unless we've filled TARGET, the upper regs in a multi-reg value + need to be zero'd out. */ + if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) + { + int i,total_words; + + total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; + for (i = nwords; i < total_words; i++) + { + int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i; + rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); + emit_move_insn (target_part, const0_rtx); + } + } + return target; + } + /* Signed bit field: sign-extend with two arithmetic shifts. */ target = expand_shift (LSHIFT_EXPR, mode, target, build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0), @@ -1012,7 +1107,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, bestmode = GET_MODE (xop0); if (bestmode == VOIDmode - || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align)) + || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align)) goto extzv_loses; /* Compute offset as multiple of this unit, @@ -1148,7 +1243,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp, bestmode = GET_MODE (xop0); if (bestmode == VOIDmode - || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align)) + || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align)) goto extv_loses; /* Compute offset as multiple of this unit, @@ -1378,7 +1473,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, #ifdef SLOW_ZERO_EXTEND /* Always generate an `and' if we just zero-extended op0 and SLOW_ZERO_EXTEND, since it - will combine fruitfully with the zero-extend. */ + will combine fruitfully with the zero-extend. */ || tmode != mode #endif #endif @@ -1424,7 +1519,8 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos, /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value of mode MODE with BITSIZE ones followed by BITPOS zeros, or the complement of that if COMPLEMENT. The mask is truncated if - necessary to the width of mode MODE. */ + necessary to the width of mode MODE. The mask is zero-extended if + BITSIZE+BITPOS is too small for MODE. */ static rtx mask_rtx (mode, bitpos, bitsize, complement) @@ -1653,7 +1749,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp) op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0); -#if SHIFT_COUNT_TRUNCATED +#ifdef SHIFT_COUNT_TRUNCATED if (SHIFT_COUNT_TRUNCATED && GET_CODE (op1) == CONST_INT && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode)) @@ -1682,8 +1778,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp) continue; else if (methods == OPTAB_LIB_WIDEN) { - /* If we are rotating by a constant that is valid and - we have been unable to open-code this by a rotation, + /* If we have been unable to open-code this by a rotation, do it as the IOR of two shifts. I.e., to rotate A by N bits, compute (A << N) | ((unsigned) A >> (C - N)) where C is the bitsize of A. @@ -1695,25 +1790,25 @@ expand_shift (code, mode, shifted, amount, target, unsignedp) this extremely unlikely lossage to avoid complicating the code below. */ - if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0 - && INTVAL (op1) < GET_MODE_BITSIZE (mode)) - { - rtx subtarget = target == shifted ? 0 : target; - rtx temp1; - tree other_amount - = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0); - - shifted = force_reg (mode, shifted); - - temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR, - mode, shifted, amount, subtarget, 1); - temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR, - mode, shifted, other_amount, 0, 1); - return expand_binop (mode, ior_optab, temp, temp1, target, - unsignedp, methods); - } - else - methods = OPTAB_LIB; + rtx subtarget = target == shifted ? 0 : target; + rtx temp1; + tree type = TREE_TYPE (amount); + tree new_amount = make_tree (type, op1); + tree other_amount + = fold (build (MINUS_EXPR, type, + convert (type, + build_int_2 (GET_MODE_BITSIZE (mode), + 0)), + amount)); + + shifted = force_reg (mode, shifted); + + temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR, + mode, shifted, new_amount, subtarget, 1); + temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR, + mode, shifted, other_amount, 0, 1); + return expand_binop (mode, ior_optab, temp, temp1, target, + unsignedp, methods); } temp = expand_binop (mode, @@ -2382,7 +2477,7 @@ invert_mod2n (x, n) unsigned HOST_WIDE_INT x; int n; { - /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ + /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ /* The algorithm notes that the choice y = x satisfies x*y == 1 mod 2^3, since x is assumed odd. @@ -2445,14 +2540,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp) MODE is the mode of operation and result. - UNSIGNEDP nonzero means unsigned multiply. */ + UNSIGNEDP nonzero means unsigned multiply. + + MAX_COST is the total allowed cost for the expanded RTL. */ rtx -expand_mult_highpart (mode, op0, cnst1, target, unsignedp) +expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost) enum machine_mode mode; register rtx op0, target; unsigned HOST_WIDE_INT cnst1; int unsignedp; + int max_cost; { enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); optab mul_highpart_optab; @@ -2479,7 +2577,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) /* expand_mult handles constant multiplication of word_mode or narrower. It does a poor job for large modes. */ - if (size < BITS_PER_WORD) + if (size < BITS_PER_WORD + && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost) { /* We have to do this, since expand_binop doesn't do conversion for multiply. Maybe change expand_binop to handle widening multiply? */ @@ -2488,7 +2587,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp); tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, build_int_2 (size, 0), NULL_RTX, 1); - return gen_lowpart (mode, tem); + return convert_modes (mode, wider_mode, tem, unsignedp); } if (target == 0) @@ -2496,65 +2595,87 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab; - target = expand_binop (mode, mul_highpart_optab, - op0, op1, target, unsignedp, OPTAB_DIRECT); - if (target) - return target; + if (mul_highpart_cost[(int) mode] < max_cost) + { + mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab; + target = expand_binop (mode, mul_highpart_optab, + op0, wide_op1, target, unsignedp, OPTAB_DIRECT); + if (target) + return target; + } /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ - mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab; - target = expand_binop (mode, mul_highpart_optab, - op0, op1, target, unsignedp, OPTAB_DIRECT); - if (target) - /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, target, op0, - op1, target, unsignedp); - - /* Thirdly, we try to use a widening multiplication, or a wider mode - multiplication. */ + if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost) + { + mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab; + target = expand_binop (mode, mul_highpart_optab, + op0, wide_op1, target, unsignedp, OPTAB_DIRECT); + if (target) + /* We used the wrong signedness. Adjust the result. */ + return expand_mult_highpart_adjust (mode, target, op0, + op1, target, unsignedp); + } + /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) - ; - else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) - moptab = smul_optab; - else + if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + && mul_widen_cost[(int) wider_mode] < max_cost) + { + op1 = force_reg (mode, op1); + goto try; + } + + /* Try widening the mode and perform a non-widening multiplication. */ + moptab = smul_optab; + if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost) { - /* Try widening multiplication of opposite signedness, and adjust. */ - moptab = unsignedp ? smul_widen_optab : umul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) + op1 = wide_op1; + goto try; + } + + /* Try widening multiplication of opposite signedness, and adjust. */ + moptab = unsignedp ? smul_widen_optab : umul_widen_optab; + if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + && (mul_widen_cost[(int) wider_mode] + + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)) + { + rtx regop1 = force_reg (mode, op1); + tem = expand_binop (wider_mode, moptab, op0, regop1, + NULL_RTX, ! unsignedp, OPTAB_WIDEN); + if (tem != 0) { - tem = expand_binop (wider_mode, moptab, op0, wide_op1, - NULL_RTX, ! unsignedp, OPTAB_WIDEN); - if (tem != 0) - { - /* Extract the high half of the just generated product. */ - tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, - build_int_2 (size, 0), NULL_RTX, 1); - tem = gen_lowpart (mode, tem); - /* We used the wrong signedness. Adjust the result. */ - return expand_mult_highpart_adjust (mode, tem, op0, op1, - target, unsignedp); - } + /* Extract the high half of the just generated product. */ + tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, + build_int_2 (size, 0), NULL_RTX, 1); + tem = convert_modes (mode, wider_mode, tem, unsignedp); + /* We used the wrong signedness. Adjust the result. */ + return expand_mult_highpart_adjust (mode, tem, op0, op1, + target, unsignedp); } - - /* As a last resort, try widening the mode and perform a - non-widening multiplication. */ - moptab = smul_optab; } + return 0; + + try: /* Pass NULL_RTX as target since TARGET has wrong mode. */ - tem = expand_binop (wider_mode, moptab, op0, wide_op1, + tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp, OPTAB_WIDEN); if (tem == 0) return 0; /* Extract the high half of the just generated product. */ - tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, - build_int_2 (size, 0), NULL_RTX, 1); - return gen_lowpart (mode, tem); + if (mode == word_mode) + { + return gen_highpart (mode, tem); + } + else + { + tem = expand_shift (RSHIFT_EXPR, wider_mode, tem, + build_int_2 (size, 0), NULL_RTX, 1); + return convert_modes (mode, wider_mode, tem, unsignedp); + } } /* Emit the code to divide OP0 by OP1, putting the result in TARGET @@ -2592,6 +2713,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) rtx insn, set; optab optab1, optab2; int op1_is_constant, op1_is_pow2; + int max_cost, extra_cost; op1_is_constant = GET_CODE (op1) == CONST_INT; op1_is_pow2 = (op1_is_constant @@ -2696,11 +2818,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) size = GET_MODE_BITSIZE (compute_mode); #if 0 /* It should be possible to restrict the precision to GET_MODE_BITSIZE - (mode), and thereby get better code when OP1 is a constant. Do that for - GCC 2.7. It will require going over all usages of SIZE below. */ + (mode), and thereby get better code when OP1 is a constant. Do that + later. It will require going over all usages of SIZE below. */ size = GET_MODE_BITSIZE (mode); #endif + max_cost = div_cost[(int) compute_mode] + - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0); + /* Now convert to the best mode to use. */ if (compute_mode != mode) { @@ -2722,7 +2847,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) last = get_last_insn (); - /* Promote floor rouding to trunc rounding for unsigned operations. */ + /* Promote floor rounding to trunc rounding for unsigned operations. */ if (unsignedp) { if (code == FLOOR_DIV_EXPR) @@ -2736,11 +2861,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) { case TRUNC_MOD_EXPR: case TRUNC_DIV_EXPR: - if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) + if (op1_is_constant) { - if (unsignedp - || (INTVAL (op1) - == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (compute_mode) - 1))) + if (unsignedp) { unsigned HOST_WIDE_INT mh, ml; int pre_shift, post_shift; @@ -2752,10 +2875,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) pre_shift = floor_log2 (d); if (rem_flag) { - remainder = expand_binop (compute_mode, and_optab, op0, - GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), - remainder, 1, - OPTAB_LIB_WIDEN); + remainder = + expand_binop (compute_mode, and_optab, op0, + GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), + remainder, 1, + OPTAB_LIB_WIDEN); if (remainder) return gen_lowpart (mode, remainder); } @@ -2763,77 +2887,87 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) build_int_2 (pre_shift, 0), tquotient, 1); } - else if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) + else if (size <= HOST_BITS_PER_WIDE_INT) { - /* Most significant bit of divisor is set, emit a scc insn. - emit_store_flag needs to be passed a place for the - result. */ - quotient = emit_store_flag (tquotient, GEU, op0, op1, - compute_mode, 1, 1); - /* Can emit_store_flag have failed? */ - if (quotient == 0) - goto fail1; - } - else - { - /* Find a suitable multiplier and right shift count instead - of multiplying with D. */ - - mh = choose_multiplier (d, size, size, - &ml, &post_shift, &dummy); - - /* If the suggested multiplier is more than SIZE bits, we - can do better for even divisors, using an initial right - shift. */ - if (mh != 0 && (d & 1) == 0) - { - pre_shift = floor_log2 (d & -d); - mh = choose_multiplier (d >> pre_shift, size, - size - pre_shift, - &ml, &post_shift, &dummy); - if (mh) - abort (); - } - else - pre_shift = 0; - - if (mh != 0) + if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) { - rtx t1, t2, t3, t4; - - t1 = expand_mult_highpart (compute_mode, op0, ml, - NULL_RTX, 1); - if (t1 == 0) + /* Most significant bit of divisor is set; emit an scc + insn. */ + quotient = emit_store_flag (tquotient, GEU, op0, op1, + compute_mode, 1, 1); + if (quotient == 0) goto fail1; - t2 = force_operand (gen_rtx (MINUS, compute_mode, - op0, t1), - NULL_RTX); - t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2, - build_int_2 (1, 0), NULL_RTX, 1); - t4 = force_operand (gen_rtx (PLUS, compute_mode, - t1, t3), - NULL_RTX); - quotient = expand_shift (RSHIFT_EXPR, compute_mode, t4, - build_int_2 (post_shift - 1, - 0), - tquotient, 1); } else { - rtx t1, t2; + /* Find a suitable multiplier and right shift count + instead of multiplying with D. */ - t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, - build_int_2 (pre_shift, 0), - NULL_RTX, 1); - t2 = expand_mult_highpart (compute_mode, t1, ml, - NULL_RTX, 1); - if (t2 == 0) - goto fail1; - quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2, - build_int_2 (post_shift, 0), - tquotient, 1); + mh = choose_multiplier (d, size, size, + &ml, &post_shift, &dummy); + + /* If the suggested multiplier is more than SIZE bits, + we can do better for even divisors, using an + initial right shift. */ + if (mh != 0 && (d & 1) == 0) + { + pre_shift = floor_log2 (d & -d); + mh = choose_multiplier (d >> pre_shift, size, + size - pre_shift, + &ml, &post_shift, &dummy); + if (mh) + abort (); + } + else + pre_shift = 0; + + if (mh != 0) + { + rtx t1, t2, t3, t4; + + extra_cost = (shift_cost[post_shift - 1] + + shift_cost[1] + 2 * add_cost); + t1 = expand_mult_highpart (compute_mode, op0, ml, + NULL_RTX, 1, + max_cost - extra_cost); + if (t1 == 0) + goto fail1; + t2 = force_operand (gen_rtx (MINUS, compute_mode, + op0, t1), + NULL_RTX); + t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2, + build_int_2 (1, 0), NULL_RTX,1); + t4 = force_operand (gen_rtx (PLUS, compute_mode, + t1, t3), + NULL_RTX); + quotient = + expand_shift (RSHIFT_EXPR, compute_mode, t4, + build_int_2 (post_shift - 1, 0), + tquotient, 1); + } + else + { + rtx t1, t2; + + t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, + build_int_2 (pre_shift, 0), + NULL_RTX, 1); + extra_cost = (shift_cost[pre_shift] + + shift_cost[post_shift]); + t2 = expand_mult_highpart (compute_mode, t1, ml, + NULL_RTX, 1, + max_cost - extra_cost); + if (t2 == 0) + goto fail1; + quotient = + expand_shift (RSHIFT_EXPR, compute_mode, t2, + build_int_2 (post_shift, 0), + tquotient, 1); + } } } + else /* Too wide mode to use tricky code */ + break; insn = get_last_insn (); if (insn != last @@ -2863,6 +2997,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) else if (d == -1) quotient = expand_unop (compute_mode, neg_optab, op0, tquotient, 0); + else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) + { + /* This case is not handled correctly below. */ + quotient = emit_store_flag (tquotient, EQ, op0, op1, + compute_mode, 1, 1); + if (quotient == 0) + goto fail1; + } else if (EXACT_POWER_OF_2_OR_ZERO_P (d) && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)) ; @@ -2919,7 +3061,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) quotient, quotient, 0); } } - else + else if (size <= HOST_BITS_PER_WIDE_INT) { choose_multiplier (abs_d, size, size - 1, &ml, &post_shift, &lgup); @@ -2927,8 +3069,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) { rtx t1, t2, t3; + extra_cost = (shift_cost[post_shift] + + shift_cost[size - 1] + add_cost); t1 = expand_mult_highpart (compute_mode, op0, ml, - NULL_RTX, 0); + NULL_RTX, 0, + max_cost - extra_cost); if (t1 == 0) goto fail1; t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1, @@ -2947,8 +3092,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) rtx t1, t2, t3, t4; ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); + extra_cost = (shift_cost[post_shift] + + shift_cost[size - 1] + 2 * add_cost); t1 = expand_mult_highpart (compute_mode, op0, ml, - NULL_RTX, 0); + NULL_RTX, 0, + max_cost - extra_cost); if (t1 == 0) goto fail1; t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0), @@ -2965,6 +3113,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) tquotient); } } + else /* Too wide mode to use tricky code */ + break; insn = get_last_insn (); if (insn != last @@ -3022,8 +3172,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) build_int_2 (size - 1, 0), NULL_RTX, 0); t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); + extra_cost = (shift_cost[post_shift] + + shift_cost[size - 1] + 2 * add_cost); t3 = expand_mult_highpart (compute_mode, t2, ml, - NULL_RTX, 1); + NULL_RTX, 1, + max_cost - extra_cost); if (t3 != 0) { t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3, @@ -3069,16 +3222,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) or remainder to get floor rounding, once we have the remainder. Notice that we compute also the final remainder value here, and return the result right away. */ - if (target == 0) + if (target == 0 || GET_MODE (target) != compute_mode) target = gen_reg_rtx (compute_mode); + if (rem_flag) { - remainder = target; + remainder + = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode); quotient = gen_reg_rtx (compute_mode); } else { - quotient = target; + quotient + = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode); remainder = gen_reg_rtx (compute_mode); } @@ -3193,16 +3349,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) quotient or remainder to get ceiling rounding, once we have the remainder. Notice that we compute also the final remainder value here, and return the result right away. */ - if (target == 0) + if (target == 0 || GET_MODE (target) != compute_mode) target = gen_reg_rtx (compute_mode); + if (rem_flag) { - remainder = target; + remainder = (GET_CODE (target) == REG + ? target : gen_reg_rtx (compute_mode)); quotient = gen_reg_rtx (compute_mode); } else { - quotient = target; + quotient = (GET_CODE (target) == REG + ? target : gen_reg_rtx (compute_mode)); remainder = gen_reg_rtx (compute_mode); } @@ -3292,16 +3451,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) quotient or remainder to get ceiling rounding, once we have the remainder. Notice that we compute also the final remainder value here, and return the result right away. */ - if (target == 0) + if (target == 0 || GET_MODE (target) != compute_mode) target = gen_reg_rtx (compute_mode); if (rem_flag) { - remainder = target; + remainder= (GET_CODE (target) == REG + ? target : gen_reg_rtx (compute_mode)); quotient = gen_reg_rtx (compute_mode); } else { - quotient = target; + quotient = (GET_CODE (target) == REG + ? target : gen_reg_rtx (compute_mode)); remainder = gen_reg_rtx (compute_mode); } @@ -3474,6 +3635,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (quotient == 0) { + if (target && GET_MODE (target) != compute_mode) + target = 0; + if (rem_flag) { /* Try to produce the remainder directly without a library call. */ @@ -3497,11 +3661,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) return gen_lowpart (mode, remainder); } - /* Produce the quotient. */ - /* Try a quotient insn, but not a library call. */ - quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, - op0, op1, rem_flag ? NULL_RTX : target, - unsignedp, OPTAB_WIDEN); + /* Produce the quotient. Try a quotient insn, but not a library call. + If we have a divmod in this mode, use it in preference to widening + the div (for this test we assume it will not fail). Note that optab2 + is set to the one of the two optabs that the call below will use. */ + quotient + = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, + op0, op1, rem_flag ? NULL_RTX : target, + unsignedp, + ((optab2->handlers[(int) compute_mode].insn_code + != CODE_FOR_nothing) + ? OPTAB_DIRECT : OPTAB_WIDEN)); + if (quotient == 0) { /* No luck there. Try a quotient-and-remainder insn, @@ -3525,6 +3696,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) if (rem_flag) { + if (target && GET_MODE (target) != compute_mode) + target = 0; + if (quotient == 0) /* No divide instruction either. Use library for remainder. */ remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, @@ -3707,7 +3881,7 @@ expand_and (op0, op1, target) to perform the operation. It says to use zero-extension. NORMALIZEP is 1 if we should convert the result to be either zero - or one one. Normalize is -1 if we should convert the result to be + or one. Normalize is -1 if we should convert the result to be either zero or -1. If NORMALIZEP is zero, the result will be left "raw" out of the scc insn. */ @@ -3725,12 +3899,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) enum machine_mode compare_mode; enum machine_mode target_mode = GET_MODE (target); rtx tem; - rtx last = 0; + rtx last = get_last_insn (); rtx pattern, comparison; - if (mode == VOIDmode) - mode = GET_MODE (op0); - /* If one operand is constant, make it the second one. Only do this if the other operand is not constant as well. */ @@ -3743,9 +3914,12 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) code = swap_condition (code); } + if (mode == VOIDmode) + mode = GET_MODE (op0); + /* For some comparisons with 1 and -1, we can convert this to comparisons with zero. This will often produce more opportunities for - store-flag insns. */ + store-flag insns. */ switch (code) { @@ -3784,7 +3958,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) && GET_MODE_CLASS (mode) == MODE_INT && (normalizep || STORE_FLAG_VALUE == 1 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT - && (STORE_FLAG_VALUE + && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))) { subtarget = target; @@ -3803,9 +3977,11 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) subtarget = 0; if (code == GE) - op0 = expand_unop (mode, one_cmpl_optab, op0, subtarget, 0); + op0 = expand_unop (mode, one_cmpl_optab, op0, + ((STORE_FLAG_VALUE == 1 || normalizep) + ? 0 : subtarget), 0); - if (normalizep || STORE_FLAG_VALUE == 1) + if (STORE_FLAG_VALUE == 1 || normalizep) /* If we are supposed to produce a 0/1 value, we want to do a logical shift from the sign bit to the low-order bit; for a -1/0 value, we do an arithmetic shift. */ @@ -3923,10 +4099,13 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) } } - if (last) - delete_insns_since (last); + delete_insns_since (last); - subtarget = target_mode == mode ? target : 0; + /* If expensive optimizations, use different pseudo registers for each + insn, instead of reusing the same pseudo. This leads to better CSE, + but slows down the compiler, since there are more pseudos */ + subtarget = (!flag_expensive_optimizations + && (target_mode == mode)) ? target : NULL_RTX; /* If we reached here, we can't do this with a scc insn. However, there are some comparisons that can be done directly. For example, if @@ -3973,7 +4152,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) normalizep = STORE_FLAG_VALUE; else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT - && (STORE_FLAG_VALUE + && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) ; else @@ -4071,21 +4250,65 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep) if (tem && normalizep) tem = expand_shift (RSHIFT_EXPR, mode, tem, size_int (GET_MODE_BITSIZE (mode) - 1), - tem, normalizep == 1); + subtarget, normalizep == 1); - if (tem && GET_MODE (tem) != target_mode) + if (tem) { - convert_move (target, tem, 0); - tem = target; + if (GET_MODE (tem) != target_mode) + { + convert_move (target, tem, 0); + tem = target; + } + else if (!subtarget) + { + emit_move_insn (target, tem); + tem = target; + } } - - if (tem == 0) + else delete_insns_since (last); return tem; } - emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label)); + +/* Like emit_store_flag, but always succeeds. */ + +rtx +emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep) + rtx target; + enum rtx_code code; + rtx op0, op1; + enum machine_mode mode; + int unsignedp; + int normalizep; +{ + rtx tem, label; + + /* First see if emit_store_flag can do the job. */ + tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); + if (tem != 0) + return tem; + + if (normalizep == 0) + normalizep = 1; + + /* If this failed, we have to do this with set/compare/jump/set code. */ + + if (GET_CODE (target) != REG + || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) + target = gen_reg_rtx (GET_MODE (target)); + emit_move_insn (target, const1_rtx); + tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0); + if (GET_CODE (tem) == CONST_INT) + return tem; + + label = gen_label_rtx (); + if (bcc_gen_fctn[(int) code] == 0) + abort (); + + emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label)); + emit_move_insn (target, const0_rtx); emit_label (label); return target;