/* Medium-level subroutines: convert bit-field store and extract
and shifts, multiplies and divides to rtl instructions.
- Copyright (C) 1987, 88, 89, 92, 93, 1994 Free Software Foundation, Inc.
+ Copyright (C) 1987, 88, 89, 92, 93, 94, 1995 Free Software Foundation, Inc.
This file is part of GNU CC.
#define MAX_BITS_PER_WORD BITS_PER_WORD
#endif
-/* Cost of various pieces of RTL. */
+/* Cost of various pieces of RTL. Note that some of these are indexed by shift count,
+ and some by mode. */
static int add_cost, negate_cost, zero_cost;
static int shift_cost[MAX_BITS_PER_WORD];
static int shiftadd_cost[MAX_BITS_PER_WORD];
static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int mul_cost[NUM_MACHINE_MODES];
+static int div_cost[NUM_MACHINE_MODES];
+static int mul_widen_cost[NUM_MACHINE_MODES];
+static int mul_highpart_cost[NUM_MACHINE_MODES];
void
init_expmed ()
rtx shift_insn, shiftadd_insn, shiftsub_insn;
int dummy;
int m;
+ enum machine_mode mode, wider_mode;
start_sequence ();
= (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost);
+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+ mode != VOIDmode;
+ mode = GET_MODE_WIDER_MODE (mode))
+ {
+ reg = gen_rtx (REG, mode, 10000);
+ div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
+ mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
+ wider_mode = GET_MODE_WIDER_MODE (mode);
+ if (wider_mode != VOIDmode)
+ {
+ mul_widen_cost[(int) wider_mode]
+ = rtx_cost (gen_rtx (MULT, wider_mode,
+ gen_rtx (ZERO_EXTEND, wider_mode, reg),
+ gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+ SET);
+ mul_highpart_cost[(int) mode]
+ = rtx_cost (gen_rtx (TRUNCATE, mode,
+ gen_rtx (LSHIFTRT, wider_mode,
+ gen_rtx (MULT, wider_mode,
+ gen_rtx (ZERO_EXTEND, wider_mode, reg),
+ gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+ GEN_INT (GET_MODE_BITSIZE (mode)))),
+ SET);
+ }
+ }
+
/* Free the objects we just allocated. */
end_sequence ();
obfree (free_point);
op0 = SUBREG_REG (op0);
}
-#if BYTES_BIG_ENDIAN
/* If OP0 is a register, BITPOS must count within a word.
But as we have it, it counts within whatever size OP0 now has.
On a bigendian machine, these are not the same, so convert. */
- if (GET_CODE (op0) != MEM && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
+ if (BYTES_BIG_ENDIAN
+ && GET_CODE (op0) != MEM
+ && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
-#endif
value = protect_from_queue (value, 0);
can be done with a movestrict instruction. */
if (GET_CODE (op0) != MEM
-#if BYTES_BIG_ENDIAN
- && bitpos + bitsize == unit
-#else
- && bitpos == 0
-#endif
+ && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
&& bitsize == GET_MODE_BITSIZE (fieldmode)
&& (GET_MODE (op0) == fieldmode
|| (movstrict_optab->handlers[(int) fieldmode].insn_code
/* Here we transfer the words of the field
in the order least significant first.
This is because the most significant word is the one which may
- be less than full. */
+ be less than full.
+ However, only do that if the value is not BLKmode. */
+
+ int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
int i;
{
/* If I is 0, use the low-order word in both field and target;
if I is 1, use the next to lowest word; and so on. */
- int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
- int bit_offset = (WORDS_BIG_ENDIAN
+ int wordnum = (backwards ? nwords - i - 1 : i);
+ int bit_offset = (backwards
? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
: i * BITS_PER_WORD);
store_bit_field (op0, MIN (BITS_PER_WORD,
bestmode = GET_MODE (op0);
if (bestmode == VOIDmode
- || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+ || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
goto insv_loses;
/* Adjust address to point to the containing unit of that mode. */
/* On big-endian machines, we count bits from the most significant.
If the bit field insn does not, we must invert. */
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
- xbitpos = unit - bitsize - xbitpos;
-#endif
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
/* We have been counting XBITPOS within UNIT.
Count instead within the size of the register. */
-#if BITS_BIG_ENDIAN
- if (GET_CODE (xop0) != MEM)
+ if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
-#endif
+
unit = GET_MODE_BITSIZE (maxmode);
/* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
BITPOS is the starting bit number within OP0.
(OP0's mode may actually be narrower than MODE.) */
-#if BYTES_BIG_ENDIAN
- /* BITPOS is the distance between our msb
- and that of the containing datum.
- Convert it to the distance from the lsb. */
+ if (BYTES_BIG_ENDIAN)
+ /* BITPOS is the distance between our msb
+ and that of the containing datum.
+ Convert it to the distance from the lsb. */
+ bitpos = total_bits - bitsize - bitpos;
- bitpos = total_bits - bitsize - bitpos;
-#endif
/* Now BITPOS is always the distance between our lsb
and that of OP0. */
value = word;
else
value = gen_lowpart_common (word_mode,
- force_reg (GET_MODE (value), value));
+ force_reg (GET_MODE (value) != VOIDmode
+ ? GET_MODE (value)
+ : word_mode, value));
}
while (bitsdone < bitsize)
thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
thissize = MIN (thissize, unit - thispos);
-#if BYTES_BIG_ENDIAN
- /* Fetch successively less significant portions. */
- if (GET_CODE (value) == CONST_INT)
- part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
- >> (bitsize - bitsdone - thissize))
- & (((HOST_WIDE_INT) 1 << thissize) - 1));
- else
+ if (BYTES_BIG_ENDIAN)
{
- /* The args are chosen so that the last part
- includes the lsb. */
- int bit_offset = 0;
- /* If the value isn't in memory, then it must be right aligned
- if a register, so skip past the padding on the left. If it
- is in memory, then there is no padding on the left. */
- if (GET_CODE (value) != MEM)
- bit_offset = BITS_PER_WORD - bitsize;
- part = extract_fixed_bit_field (word_mode, value, 0, thissize,
- bit_offset + bitsdone,
- NULL_RTX, 1, align);
+ /* Fetch successively less significant portions. */
+ if (GET_CODE (value) == CONST_INT)
+ part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
+ >> (bitsize - bitsdone - thissize))
+ & (((HOST_WIDE_INT) 1 << thissize) - 1));
+ else
+ /* The args are chosen so that the last part includes the
+ lsb. Give extract_bit_field the value it needs (with
+ endianness compensation) to fetch the piece we want. */
+ part = extract_fixed_bit_field (word_mode, value, 0, thissize,
+ GET_MODE_BITSIZE (GET_MODE (value))
+ - bitsize + bitsdone,
+ NULL_RTX, 1, align);
}
-#else
- /* Fetch successively more significant portions. */
- if (GET_CODE (value) == CONST_INT)
- part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsdone)
- & (((HOST_WIDE_INT) 1 << thissize) - 1));
else
- part = extract_fixed_bit_field (word_mode, value, 0, thissize,
- bitsdone, NULL_RTX, 1, align);
-#endif
+ {
+ /* Fetch successively more significant portions. */
+ if (GET_CODE (value) == CONST_INT)
+ part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
+ >> bitsdone)
+ & (((HOST_WIDE_INT) 1 << thissize) - 1));
+ else
+ part = extract_fixed_bit_field (word_mode, value, 0, thissize,
+ bitsdone, NULL_RTX, 1, align);
+ }
/* If OP0 is a register, then handle OFFSET here.
tmode = mode;
while (GET_CODE (op0) == SUBREG)
{
+ int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
+ int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
+
offset += SUBREG_WORD (op0);
+
+ if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
+ {
+ bitpos += inner_size - outer_size;
+ if (bitpos > unit)
+ {
+ offset += (bitpos / unit);
+ bitpos %= unit;
+ }
+ }
+
op0 = SUBREG_REG (op0);
}
+
+ /* ??? We currently assume TARGET is at least as big as BITSIZE.
+ If that's wrong, the solution is to test for it and set TARGET to 0
+ if needed. */
-#if BYTES_BIG_ENDIAN
/* If OP0 is a register, BITPOS must count within a word.
But as we have it, it counts within whatever size OP0 now has.
On a bigendian machine, these are not the same, so convert. */
- if (GET_CODE (op0) != MEM && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
+ if (BYTES_BIG_ENDIAN &&
+ GET_CODE (op0) != MEM
+ && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
-#endif
/* Extracting a full-word or multi-word value
from a structure in a register or aligned memory.
&& ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
&& bitpos % BITS_PER_WORD == 0)
|| (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
-#if BYTES_BIG_ENDIAN
- && bitpos + bitsize == BITS_PER_WORD
-#else
- && bitpos == 0
-#endif
- )))
+ && (BYTES_BIG_ENDIAN
+ ? bitpos + bitsize == BITS_PER_WORD
+ : bitpos == 0))))
{
enum machine_mode mode1
= mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
{
/* If I is 0, use the low-order word in both field and target;
if I is 1, use the next to lowest word; and so on. */
- int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
+ /* Word number in TARGET to use. */
+ int wordnum = (WORDS_BIG_ENDIAN
+ ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
+ : i);
+ /* Offset from start of field in OP0. */
int bit_offset = (WORDS_BIG_ENDIAN
? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
: i * BITS_PER_WORD);
}
if (unsignedp)
- return target;
+ {
+ /* Unless we've filled TARGET, the upper regs in a multi-reg value
+ need to be zero'd out. */
+ if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
+ {
+ int i,total_words;
+
+ total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
+ for (i = nwords; i < total_words; i++)
+ {
+ int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
+ rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
+ emit_move_insn (target_part, const0_rtx);
+ }
+ }
+ return target;
+ }
+
/* Signed bit field: sign-extend with two arithmetic shifts. */
target = expand_shift (LSHIFT_EXPR, mode, target,
build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
bestmode = GET_MODE (xop0);
if (bestmode == VOIDmode
- || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+ || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
goto extzv_loses;
/* Compute offset as multiple of this unit,
/* On big-endian machines, we count bits from the most significant.
If the bit field insn does not, we must invert. */
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
- xbitpos = unit - bitsize - xbitpos;
-#endif
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
/* Now convert from counting within UNIT to counting in MAXMODE. */
-#if BITS_BIG_ENDIAN
- if (GET_CODE (xop0) != MEM)
+ if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
-#endif
+
unit = GET_MODE_BITSIZE (maxmode);
if (xtarget == 0
bestmode = GET_MODE (xop0);
if (bestmode == VOIDmode
- || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+ || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
goto extv_loses;
/* Compute offset as multiple of this unit,
/* On big-endian machines, we count bits from the most significant.
If the bit field insn does not, we must invert. */
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
- xbitpos = unit - bitsize - xbitpos;
-#endif
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ xbitpos = unit - bitsize - xbitpos;
+
/* XBITPOS counts within a size of UNIT.
Adjust to count within a size of MAXMODE. */
-#if BITS_BIG_ENDIAN
- if (GET_CODE (xop0) != MEM)
+ if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
-#endif
+
unit = GET_MODE_BITSIZE (maxmode);
if (xtarget == 0
mode = GET_MODE (op0);
-#if BYTES_BIG_ENDIAN
- /* BITPOS is the distance between our msb and that of OP0.
- Convert it to the distance from the lsb. */
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* BITPOS is the distance between our msb and that of OP0.
+ Convert it to the distance from the lsb. */
+
+ bitpos = total_bits - bitsize - bitpos;
+ }
- bitpos = total_bits - bitsize - bitpos;
-#endif
/* Now BITPOS is always the distance between the field's lsb and that of OP0.
We have reduced the big-endian case to the little-endian case. */
/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
complement of that if COMPLEMENT. The mask is truncated if
- necessary to the width of mode MODE. */
+ necessary to the width of mode MODE. The mask is zero-extended if
+ BITSIZE+BITPOS is too small for MODE. */
static rtx
mask_rtx (mode, bitpos, bitsize, complement)
bitsdone += thissize;
/* Shift this part into place for the result. */
-#if BYTES_BIG_ENDIAN
- if (bitsize != bitsdone)
- part = expand_shift (LSHIFT_EXPR, word_mode, part,
- build_int_2 (bitsize - bitsdone, 0), 0, 1);
-#else
- if (bitsdone != thissize)
- part = expand_shift (LSHIFT_EXPR, word_mode, part,
- build_int_2 (bitsdone - thissize, 0), 0, 1);
-#endif
+ if (BYTES_BIG_ENDIAN)
+ {
+ if (bitsize != bitsdone)
+ part = expand_shift (LSHIFT_EXPR, word_mode, part,
+ build_int_2 (bitsize - bitsdone, 0), 0, 1);
+ }
+ else
+ {
+ if (bitsdone != thissize)
+ part = expand_shift (LSHIFT_EXPR, word_mode, part,
+ build_int_2 (bitsdone - thissize, 0), 0, 1);
+ }
if (first)
result = part;
op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
-#if 0 && SHIFT_COUNT_TRUNCATED
+#if SHIFT_COUNT_TRUNCATED
if (SHIFT_COUNT_TRUNCATED
&& GET_CODE (op1) == CONST_INT
&& (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
continue;
else if (methods == OPTAB_LIB_WIDEN)
{
- /* If we are rotating by a constant that is valid and
- we have been unable to open-code this by a rotation,
+ /* If we have been unable to open-code this by a rotation,
do it as the IOR of two shifts. I.e., to rotate A
by N bits, compute (A << N) | ((unsigned) A >> (C - N))
where C is the bitsize of A.
this extremely unlikely lossage to avoid complicating the
code below. */
- if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
- && INTVAL (op1) < GET_MODE_BITSIZE (mode))
- {
- rtx subtarget = target == shifted ? 0 : target;
- rtx temp1;
- tree other_amount
- = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
-
- shifted = force_reg (mode, shifted);
-
- temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
- mode, shifted, amount, subtarget, 1);
- temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
- mode, shifted, other_amount, 0, 1);
- return expand_binop (mode, ior_optab, temp, temp1, target,
- unsignedp, methods);
- }
- else
- methods = OPTAB_LIB;
+ rtx subtarget = target == shifted ? 0 : target;
+ rtx temp1;
+ tree type = TREE_TYPE (amount);
+ tree new_amount = make_tree (type, op1);
+ tree other_amount
+ = fold (build (MINUS_EXPR, type,
+ convert (type,
+ build_int_2 (GET_MODE_BITSIZE (mode),
+ 0)),
+ amount));
+
+ shifted = force_reg (mode, shifted);
+
+ temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+ mode, shifted, new_amount, subtarget, 1);
+ temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+ mode, shifted, other_amount, 0, 1);
+ return expand_binop (mode, ior_optab, temp, temp1, target,
+ unsignedp, methods);
}
temp = expand_binop (mode,
{
rtx const_op1 = op1;
+ /* synth_mult does an `unsigned int' multiply. As long as the mode is
+ less than or equal in size to `unsigned int' this doesn't matter.
+ If the mode is larger than `unsigned int', then synth_mult works only
+ if the constant value exactly fits in an `unsigned int' without any
+ truncation. This means that multiplying by negative values does
+ not work; results are off by 2^32 on a 32 bit machine. */
+
/* If we are multiplying in DImode, it may still be a win
to try to work with shifts and adds. */
if (GET_CODE (op1) == CONST_DOUBLE
&& GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
- && HOST_BITS_PER_INT <= BITS_PER_WORD)
- {
- if ((CONST_DOUBLE_HIGH (op1) == 0 && CONST_DOUBLE_LOW (op1) >= 0)
- || (CONST_DOUBLE_HIGH (op1) == -1 && CONST_DOUBLE_LOW (op1) < 0))
- const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
- }
+ && HOST_BITS_PER_INT >= BITS_PER_WORD
+ && CONST_DOUBLE_HIGH (op1) == 0)
+ const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
+ else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
+ && GET_CODE (op1) == CONST_INT
+ && INTVAL (op1) < 0)
+ const_op1 = 0;
/* We used to test optimize here, on the grounds that it's better to
produce a smaller program when -O is not used.
But this causes such a terrible slowdown sometimes
that it seems better to use synth_mult always. */
- if (GET_CODE (const_op1) == CONST_INT)
+ if (const_op1 && GET_CODE (const_op1) == CONST_INT)
{
struct algorithm alg;
struct algorithm alg2;
mult_cost = MIN (12 * add_cost, mult_cost);
synth_mult (&alg, val, mult_cost);
- synth_mult (&alg2, - val,
- (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
- if (alg2.cost + negate_cost < alg.cost)
- alg = alg2, variant = negate_variant;
+
+ /* This works only if the inverted value actually fits in an
+ `unsigned int' */
+ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+ {
+ synth_mult (&alg2, - val,
+ (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
+ if (alg2.cost + negate_cost < alg.cost)
+ alg = alg2, variant = negate_variant;
+ }
/* This proves very useful for division-by-constant. */
synth_mult (&alg2, val - 1,
MODE is the mode of operation and result.
- UNSIGNEDP nonzero means unsigned multiply. */
+ UNSIGNEDP nonzero means unsigned multiply.
+
+ MAX_COST is the total allowed cost for the expanded RTL. */
rtx
-expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
+expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
enum machine_mode mode;
register rtx op0, target;
unsigned HOST_WIDE_INT cnst1;
int unsignedp;
+ int max_cost;
{
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
optab mul_highpart_optab;
/* expand_mult handles constant multiplication of word_mode
or narrower. It does a poor job for large modes. */
- if (size < BITS_PER_WORD)
+ if (size < BITS_PER_WORD
+ && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
/* We have to do this, since expand_binop doesn't do conversion for
multiply. Maybe change expand_binop to handle widening multiply? */
tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
- return gen_lowpart (mode, tem);
+ return convert_modes (mode, wider_mode, tem, unsignedp);
}
if (target == 0)
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
- mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
- target = expand_binop (mode, mul_highpart_optab,
- op0, op1, target, unsignedp, OPTAB_DIRECT);
- if (target)
- return target;
+ if (mul_highpart_cost[(int) mode] < max_cost)
+ {
+ mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+ target = expand_binop (mode, mul_highpart_optab,
+ op0, op1, target, unsignedp, OPTAB_DIRECT);
+ if (target)
+ return target;
+ }
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
- mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
- target = expand_binop (mode, mul_highpart_optab,
- op0, op1, target, unsignedp, OPTAB_DIRECT);
- if (target)
- /* We used the wrong signedness. Adjust the result. */
- return expand_mult_highpart_adjust (mode, target, op0,
- op1, target, unsignedp);
-
- /* Thirdly, we try to use a widening multiplication, or a wider mode
- multiplication. */
+ if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
+ {
+ mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+ target = expand_binop (mode, mul_highpart_optab,
+ op0, op1, target, unsignedp, OPTAB_DIRECT);
+ if (target)
+ /* We used the wrong signedness. Adjust the result. */
+ return expand_mult_highpart_adjust (mode, target, op0,
+ op1, target, unsignedp);
+ }
+ /* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
- if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
- ;
- else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
- moptab = smul_optab;
- else
+ if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+ && mul_widen_cost[(int) wider_mode] < max_cost)
+ goto try;
+
+ /* Try widening the mode and perform a non-widening multiplication. */
+ moptab = smul_optab;
+ if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+ && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
+ goto try;
+
+ /* Try widening multiplication of opposite signedness, and adjust. */
+ moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
+ if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+ && (mul_widen_cost[(int) wider_mode]
+ + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
{
- /* Try widening multiplication of opposite signedness, and adjust. */
- moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
- if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
+ tem = expand_binop (wider_mode, moptab, op0, wide_op1,
+ NULL_RTX, ! unsignedp, OPTAB_WIDEN);
+ if (tem != 0)
{
- tem = expand_binop (wider_mode, moptab, op0, wide_op1,
- NULL_RTX, ! unsignedp, OPTAB_WIDEN);
- if (tem != 0)
- {
- /* Extract the high half of the just generated product. */
- tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
- build_int_2 (size, 0), NULL_RTX, 1);
- tem = gen_lowpart (mode, tem);
- /* We used the wrong signedness. Adjust the result. */
- return expand_mult_highpart_adjust (mode, tem, op0, op1,
- target, unsignedp);
- }
+ /* Extract the high half of the just generated product. */
+ tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+ build_int_2 (size, 0), NULL_RTX, 1);
+ tem = convert_modes (mode, wider_mode, tem, unsignedp);
+ /* We used the wrong signedness. Adjust the result. */
+ return expand_mult_highpart_adjust (mode, tem, op0, op1,
+ target, unsignedp);
}
-
- /* As a last resort, try widening the mode and perform a
- non-widening multiplication. */
- moptab = smul_optab;
}
+ return 0;
+
+ try:
/* Pass NULL_RTX as target since TARGET has wrong mode. */
tem = expand_binop (wider_mode, moptab, op0, wide_op1,
NULL_RTX, unsignedp, OPTAB_WIDEN);
/* Extract the high half of the just generated product. */
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
- return gen_lowpart (mode, tem);
+ return convert_modes (mode, wider_mode, tem, unsignedp);
}
\f
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
rtx insn, set;
optab optab1, optab2;
int op1_is_constant, op1_is_pow2;
+ int max_cost, extra_cost;
op1_is_constant = GET_CODE (op1) == CONST_INT;
op1_is_pow2 = (op1_is_constant
Second comes a switch statement with code specific for each rounding mode.
For some special operands this code emits all RTL for the desired
- operation, for other cases, it generates a quotient and stores it in
+ operation, for other cases, it generates only a quotient and stores it in
QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
to indicate that it has not done anything.
- Last comes code that finishes the operation. If QUOTIENT is set an
- REM_FLAG, the remainder is computed as OP0 - QUOTIENT * OP1. If QUOTIENT
- is not set, it is computed using trunc rounding.
+ Last comes code that finishes the operation. If QUOTIENT is set and
+ REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
+ QUOTIENT is not set, it is computed using trunc rounding.
We try to generate special code for division and remainder when OP1 is a
constant. If |OP1| = 2**n we can use shifts and some other fast
size = GET_MODE_BITSIZE (compute_mode);
#if 0
/* It should be possible to restrict the precision to GET_MODE_BITSIZE
- (mode), and thereby get better code when OP1 is a constant. Do that for
- GCC 2.7. It will require going over all usages of SIZE below. */
+ (mode), and thereby get better code when OP1 is a constant. Do that
+ later. It will require going over all usages of SIZE below. */
size = GET_MODE_BITSIZE (mode);
#endif
+ max_cost = div_cost[(int) compute_mode]
+ - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
+
/* Now convert to the best mode to use. */
if (compute_mode != mode)
{
case TRUNC_DIV_EXPR:
if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
{
- if (unsignedp)
+ if (unsignedp
+ || (INTVAL (op1)
+ == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (compute_mode) - 1)))
{
unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift;
{
rtx t1, t2, t3, t4;
+ extra_cost = (shift_cost[post_shift - 1]
+ + shift_cost[1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
- NULL_RTX, 1);
+ NULL_RTX, 1,
+ max_cost - extra_cost);
if (t1 == 0)
goto fail1;
t2 = force_operand (gen_rtx (MINUS, compute_mode,
t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
build_int_2 (pre_shift, 0),
NULL_RTX, 1);
+ extra_cost = (shift_cost[pre_shift]
+ + shift_cost[post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
- NULL_RTX, 1);
+ NULL_RTX, 1,
+ max_cost - extra_cost);
if (t2 == 0)
goto fail1;
quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
tquotient, 0);
}
+ /* We have computed OP0 / abs(OP1). If OP1 is negative, negate
+ the quotient. */
if (d < 0)
{
insn = get_last_insn ();
{
rtx t1, t2, t3;
+ extra_cost = (shift_cost[post_shift]
+ + shift_cost[size - 1] + add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
- NULL_RTX, 0);
+ NULL_RTX, 0,
+ max_cost - extra_cost);
if (t1 == 0)
goto fail1;
t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
rtx t1, t2, t3, t4;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
+ extra_cost = (shift_cost[post_shift]
+ + shift_cost[size - 1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml,
- NULL_RTX, 0);
+ NULL_RTX, 0,
+ max_cost - extra_cost);
if (t1 == 0)
goto fail1;
t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
build_int_2 (size - 1, 0), NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
+ extra_cost = (shift_cost[post_shift]
+ + shift_cost[size - 1] + 2 * add_cost);
t3 = expand_mult_highpart (compute_mode, t2, ml,
- NULL_RTX, 1);
+ NULL_RTX, 1,
+ max_cost - extra_cost);
if (t3 != 0)
{
t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
and return the result right away. */
if (target == 0)
target = gen_reg_rtx (compute_mode);
+
if (rem_flag)
{
- remainder = target;
+ remainder
+ = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
quotient = gen_reg_rtx (compute_mode);
}
else
{
- quotient = target;
+ quotient
+ = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
remainder = gen_reg_rtx (compute_mode);
}
value here, and return the result right away. */
if (target == 0)
target = gen_reg_rtx (compute_mode);
+
if (rem_flag)
{
- remainder = target;
+ remainder = (GET_CODE (target) == REG
+ ? target : gen_reg_rtx (compute_mode));
quotient = gen_reg_rtx (compute_mode);
}
else
{
- quotient = target;
+ quotient = (GET_CODE (target) == REG
+ ? target : gen_reg_rtx (compute_mode));
remainder = gen_reg_rtx (compute_mode);
}
}
else /* signed */
{
+ if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
+ && INTVAL (op1) >= 0)
+ {
+ /* This is extremely similar to the code for the unsigned case
+ above. For 2.7 we should merge these variants, but for
+ 2.6.1 I don't want to touch the code for unsigned since that
+ get used in C. The signed case will only be used by other
+ languages (Ada). */
+
+ rtx t1, t2, t3;
+ unsigned HOST_WIDE_INT d = INTVAL (op1);
+ t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
+ build_int_2 (floor_log2 (d), 0),
+ tquotient, 0);
+ t2 = expand_binop (compute_mode, and_optab, op0,
+ GEN_INT (d - 1),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ t3 = gen_reg_rtx (compute_mode);
+ t3 = emit_store_flag (t3, NE, t2, const0_rtx,
+ compute_mode, 1, 1);
+ if (t3 == 0)
+ {
+ rtx lab;
+ lab = gen_label_rtx ();
+ emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
+ compute_mode, 0, 0);
+ emit_jump_insn (gen_beq (lab));
+ expand_inc (t1, const1_rtx);
+ emit_label (lab);
+ quotient = t1;
+ }
+ else
+ quotient = force_operand (gen_rtx (PLUS, compute_mode,
+ t1, t3),
+ tquotient);
+ break;
+ }
+
/* Try using an instruction that produces both the quotient and
remainder, using truncation. We can easily compensate the
quotient or remainder to get ceiling rounding, once we have the
target = gen_reg_rtx (compute_mode);
if (rem_flag)
{
- remainder = target;
+ remainder= (GET_CODE (target) == REG
+ ? target : gen_reg_rtx (compute_mode));
quotient = gen_reg_rtx (compute_mode);
}
else
{
- quotient = target;
+ quotient = (GET_CODE (target) == REG
+ ? target : gen_reg_rtx (compute_mode));
remainder = gen_reg_rtx (compute_mode);
}
case ROUND_DIV_EXPR:
case ROUND_MOD_EXPR:
- /* The code that used to be here was wrong, and nothing really
- depends on it. */
- abort ();
- break;
+ if (unsignedp)
+ {
+ rtx tem;
+ rtx label;
+ label = gen_label_rtx ();
+ quotient = gen_reg_rtx (compute_mode);
+ remainder = gen_reg_rtx (compute_mode);
+ if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
+ {
+ rtx tem;
+ quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
+ quotient, 1, OPTAB_LIB_WIDEN);
+ tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
+ remainder = expand_binop (compute_mode, sub_optab, op0, tem,
+ remainder, 1, OPTAB_LIB_WIDEN);
+ }
+ tem = plus_constant (op1, -1);
+ tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
+ build_int_2 (1, 0), NULL_RTX, 1);
+ emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
+ emit_jump_insn (gen_bleu (label));
+ expand_inc (quotient, const1_rtx);
+ expand_dec (remainder, op1);
+ emit_label (label);
+ }
+ else
+ {
+ rtx abs_rem, abs_op1, tem, mask;
+ rtx label;
+ label = gen_label_rtx ();
+ quotient = gen_reg_rtx (compute_mode);
+ remainder = gen_reg_rtx (compute_mode);
+ if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
+ {
+ rtx tem;
+ quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
+ quotient, 0, OPTAB_LIB_WIDEN);
+ tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
+ remainder = expand_binop (compute_mode, sub_optab, op0, tem,
+ remainder, 0, OPTAB_LIB_WIDEN);
+ }
+ abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
+ abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
+ tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
+ build_int_2 (1, 0), NULL_RTX, 1);
+ emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
+ emit_jump_insn (gen_bltu (label));
+ tem = expand_binop (compute_mode, xor_optab, op0, op1,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
+ build_int_2 (size - 1, 0), NULL_RTX, 0);
+ tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ tem = expand_binop (compute_mode, sub_optab, tem, mask,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ expand_inc (quotient, tem);
+ tem = expand_binop (compute_mode, xor_optab, mask, op1,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ tem = expand_binop (compute_mode, sub_optab, tem, mask,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ expand_dec (remainder, tem);
+ emit_label (label);
+ }
+ return gen_lowpart (mode, rem_flag ? remainder : quotient);
}
if (quotient == 0)
rtx last = 0;
rtx pattern, comparison;
- if (mode == VOIDmode)
- mode = GET_MODE (op0);
-
/* If one operand is constant, make it the second one. Only do this
if the other operand is not constant as well. */
code = swap_condition (code);
}
+ if (mode == VOIDmode)
+ mode = GET_MODE (op0);
+
/* For some comparisons with 1 and -1, we can convert this to
comparisons with zero. This will often produce more opportunities for
store-flag insns. */