/* Medium-level subroutines: convert bit-field store and extract
and shifts, multiplies and divides to rtl instructions.
- Copyright (C) 1987, 1988, 1989, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1987, 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
This file is part of GNU CC.
static int sdiv_pow2_cheap, smod_pow2_cheap;
-/* Cost of various pieces of RTL. */
-static int add_cost, shift_cost, mult_cost, negate_cost, lea_cost;
+/* For compilers that support multiple targets with different word sizes,
+ MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
+ is the H8/300(H) compiler. */
-/* Max scale factor for scaled address in lea instruction. */
-static int lea_max_mul;
+#ifndef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD BITS_PER_WORD
+#endif
+
+/* Cost of various pieces of RTL. */
+static int add_cost, mult_cost, negate_cost, zero_cost;
+static int shift_cost[MAX_BITS_PER_WORD];
+static int shiftadd_cost[MAX_BITS_PER_WORD];
+static int shiftsub_cost[MAX_BITS_PER_WORD];
void
init_expmed ()
{
- char *free_point = (char *) oballoc (1);
+ char *free_point;
/* This is "some random pseudo register" for purposes of calling recog
to see what insns exist. */
rtx reg = gen_rtx (REG, word_mode, FIRST_PSEUDO_REGISTER);
- rtx pow2 = gen_rtx (CONST_INT, VOIDmode, 32);
- rtx lea;
- int i, dummy;
-
- add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg));
- shift_cost = rtx_cost (gen_rtx (LSHIFT, word_mode, reg,
- /* Using a constant gives better
- estimate of typical costs.
- 1 or 2 might have quirks. */
- gen_rtx (CONST_INT, VOIDmode, 3)));
- mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg));
- negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg));
+ rtx shift_insn, shiftadd_insn, shiftsub_insn;
+ int dummy;
+ int m;
- /* 999999 is chosen to avoid any plausible faster special case. */
- mult_is_very_cheap
- = (rtx_cost (gen_rtx (MULT, word_mode, reg,
- gen_rtx (CONST_INT, VOIDmode, 999999)))
- < rtx_cost (gen_rtx (LSHIFT, word_mode, reg,
- gen_rtx (CONST_INT, VOIDmode, 7))));
+ start_sequence ();
- sdiv_pow2_cheap
- = rtx_cost (gen_rtx (DIV, word_mode, reg, pow2)) <= 2 * add_cost;
- smod_pow2_cheap
- = rtx_cost (gen_rtx (MOD, word_mode, reg, pow2)) <= 2 * add_cost;
+ /* Since we are on the permanent obstack, we must be sure we save this
+ spot AFTER we call start_sequence, since it will reuse the rtl it
+ makes. */
+
+ free_point = (char *) oballoc (0);
+
+ zero_cost = rtx_cost (const0_rtx, 0);
+ add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
+
+ shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+ gen_rtx (ASHIFT, word_mode, reg,
+ const0_rtx)));
+
+ shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+ gen_rtx (PLUS, word_mode,
+ gen_rtx (MULT, word_mode,
+ reg, const0_rtx),
+ reg)));
+
+ shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+ gen_rtx (MINUS, word_mode,
+ gen_rtx (MULT, word_mode,
+ reg, const0_rtx),
+ reg)));
init_recog ();
- for (i = 2;; i <<= 1)
+
+ shift_cost[0] = 0;
+ shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
+
+ for (m = 1; m < BITS_PER_WORD; m++)
{
- lea = gen_rtx (SET, VOIDmode, reg,
- gen_rtx (PLUS, word_mode,
- gen_rtx (MULT, word_mode, reg,
- gen_rtx (CONST_INT, VOIDmode, i)),
- reg));
- /* Using 0 as second argument is not quite right,
- but what else is there to do? */
- if (recog (lea, 0, &dummy) < 0)
- break;
- lea_max_mul = i;
- lea_cost = rtx_cost (SET_SRC (lea));
+ shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
+
+ XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
+ if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
+ shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
+
+ XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
+ = GEN_INT ((HOST_WIDE_INT) 1 << m);
+ if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
+ shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
+
+ XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
+ = GEN_INT ((HOST_WIDE_INT) 1 << m);
+ if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
+ shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
}
+ mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg), SET);
+ /* For gcc 2.4 keep MULT_COST small to avoid really slow searches
+ in synth_mult. */
+ mult_cost = MIN (12 * add_cost, mult_cost);
+ negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
+
+ /* 999999 is chosen to avoid any plausible faster special case. */
+ mult_is_very_cheap
+ = (rtx_cost (gen_rtx (MULT, word_mode, reg, GEN_INT (999999)), SET)
+ < rtx_cost (gen_rtx (ASHIFT, word_mode, reg, GEN_INT (7)), SET));
+
+ sdiv_pow2_cheap
+ = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
+ <= 2 * add_cost);
+ smod_pow2_cheap
+ = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
+ <= 2 * add_cost);
+
/* Free the objects we just allocated. */
+ end_sequence ();
obfree (free_point);
}
{
if (GET_CODE (x) == CONST_INT)
{
- int val = - INTVAL (x);
- if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_INT)
+ HOST_WIDE_INT val = - INTVAL (x);
+ if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
{
/* Sign extend the value from the bits that are significant. */
- if (val & (1 << (GET_MODE_BITSIZE (mode) - 1)))
- val |= (-1) << GET_MODE_BITSIZE (mode);
+ if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
+ val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
else
- val &= (1 << GET_MODE_BITSIZE (mode)) - 1;
+ val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
}
- return gen_rtx (CONST_INT, VOIDmode, val);
+ return GEN_INT (val);
}
else
- return expand_unop (GET_MODE (x), neg_optab, x, 0, 0);
+ return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
}
\f
/* Generate code to store value from rtx VALUE
/* Note that the adjustment of BITPOS above has no effect on whether
BITPOS is 0 in a REG bigger than a word. */
- if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD && GET_CODE (op0) != MEM
+ if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
+ && (! STRICT_ALIGNMENT || GET_CODE (op0) != MEM)
&& bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
{
/* Storing in a full-word or multi-word field in a register
can be done with just SUBREG. */
if (GET_MODE (op0) != fieldmode)
- op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
+ if (GET_CODE (op0) == REG)
+ op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
+ else
+ op0 = change_address (op0, fieldmode,
+ plus_constant (XEXP (op0, 0), offset));
emit_move_insn (op0, value);
return value;
}
if (GET_MODE (op0) == BLKmode
|| GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
bestmode
- = get_best_mode (bitsize, bitnum,
- align * BITS_PER_UNIT, maxmode,
- GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
+ = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
+ MEM_VOLATILE_P (op0));
else
bestmode = GET_MODE (op0);
if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
{
/* Optimization: Don't bother really extending VALUE
- if it has all the bits we will actually use. */
+ if it has all the bits we will actually use. However,
+ if we must narrow it, be sure we do it correctly. */
- /* Avoid making subreg of a subreg, or of a mem. */
- if (GET_CODE (value1) != REG)
+ if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
+ {
+ /* Avoid making subreg of a subreg, or of a mem. */
+ if (GET_CODE (value1) != REG)
value1 = copy_to_reg (value1);
- value1 = gen_rtx (SUBREG, maxmode, value1, 0);
+ value1 = gen_rtx (SUBREG, maxmode, value1, 0);
+ }
+ else
+ value1 = gen_lowpart (maxmode, value1);
}
else if (!CONSTANT_P (value))
/* Parse phase is supposed to make VALUE's data type
(value1, maxmode)))
value1 = force_reg (maxmode, value1);
- pat = gen_insv (xop0,
- gen_rtx (CONST_INT, VOIDmode, bitsize),
- gen_rtx (CONST_INT, VOIDmode, xbitpos),
- value1);
+ pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
if (pat)
emit_insn (pat);
else
if (GET_CODE (value) == CONST_INT)
{
- register int v = INTVAL (value);
+ register HOST_WIDE_INT v = INTVAL (value);
- if (bitsize < HOST_BITS_PER_INT)
- v &= (1 << bitsize) - 1;
+ if (bitsize < HOST_BITS_PER_WIDE_INT)
+ v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
if (v == 0)
all_zero = 1;
- else if ((bitsize < HOST_BITS_PER_INT && v == (1 << bitsize) - 1)
- || (bitsize == HOST_BITS_PER_INT && v == -1))
+ else if ((bitsize < HOST_BITS_PER_WIDE_INT
+ && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
+ || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
all_one = 1;
value = lshift_value (mode, value, bitpos, bitsize);
if (must_and)
value = expand_binop (mode, and_optab, value,
mask_rtx (mode, 0, bitsize, 0),
- 0, 1, OPTAB_LIB_WIDEN);
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
if (bitpos > 0)
value = expand_shift (LSHIFT_EXPR, mode, value,
- build_int_2 (bitpos, 0), 0, 1);
+ build_int_2 (bitpos, 0), NULL_RTX, 1);
}
/* Now clear the chosen bits in OP0,
if (GET_MODE (value) != VOIDmode)
value = convert_to_mode (word_mode, value, 1);
+
+ if (GET_CODE (value) == CONST_DOUBLE
+ && (part1 = gen_lowpart_common (word_mode, value)) != 0)
+ value = part1;
+
if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
- value = copy_to_reg (value);
+ value = copy_to_mode_reg (word_mode, value);
/* Split the value into two parts:
PART1 gets that which goes in the first word; PART2 the other. */
/* PART1 gets the more significant part. */
if (GET_CODE (value) == CONST_INT)
{
- part1 = gen_rtx (CONST_INT, VOIDmode,
- (unsigned) (INTVAL (value)) >> bitsize_2);
- part2 = gen_rtx (CONST_INT, VOIDmode,
- (unsigned) (INTVAL (value)) & ((1 << bitsize_2) - 1));
+ part1 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsize_2);
+ part2 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value))
+ & (((HOST_WIDE_INT) 1 << bitsize_2) - 1));
}
else
{
part1 = extract_fixed_bit_field (word_mode, value, 0, bitsize_1,
- BITS_PER_WORD - bitsize, 0, 1,
+ BITS_PER_WORD - bitsize, NULL_RTX, 1,
BITS_PER_WORD);
part2 = extract_fixed_bit_field (word_mode, value, 0, bitsize_2,
- BITS_PER_WORD - bitsize_2, 0, 1,
+ BITS_PER_WORD - bitsize_2, NULL_RTX, 1,
BITS_PER_WORD);
}
#else
/* PART1 gets the less significant part. */
if (GET_CODE (value) == CONST_INT)
{
- part1 = gen_rtx (CONST_INT, VOIDmode,
- (unsigned) (INTVAL (value)) & ((1 << bitsize_1) - 1));
- part2 = gen_rtx (CONST_INT, VOIDmode,
- (unsigned) (INTVAL (value)) >> bitsize_1);
+ part1 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value))
+ & (((HOST_WIDE_INT) 1 << bitsize_1) - 1));
+ part2 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsize_1);
}
else
{
part1 = extract_fixed_bit_field (word_mode, value, 0, bitsize_1, 0,
- 0, 1, BITS_PER_WORD);
+ NULL_RTX, 1, BITS_PER_WORD);
part2 = extract_fixed_bit_field (word_mode, value, 0, bitsize_2,
- bitsize_1, 0, 1, BITS_PER_WORD);
+ bitsize_1, NULL_RTX, 1, BITS_PER_WORD);
}
#endif
if (tmode == VOIDmode)
tmode = mode;
-
while (GET_CODE (op0) == SUBREG)
{
offset += SUBREG_WORD (op0);
> GET_MODE_SIZE (maxmode)))
bestmode = get_best_mode (bitsize, bitnum,
align * BITS_PER_UNIT, maxmode,
- (GET_CODE (xop0) == MEM
- && MEM_VOLATILE_P (xop0)));
+ MEM_VOLATILE_P (xop0));
else
bestmode = GET_MODE (xop0);
if (GET_MODE (xtarget) != maxmode)
{
if (GET_CODE (xtarget) == REG)
- xspec_target_subreg = xtarget = gen_lowpart (maxmode, xtarget);
+ {
+ int wider = (GET_MODE_SIZE (maxmode)
+ > GET_MODE_SIZE (GET_MODE (xtarget)));
+ xtarget = gen_lowpart (maxmode, xtarget);
+ if (wider)
+ xspec_target_subreg = xtarget;
+ }
else
xtarget = gen_reg_rtx (maxmode);
}
(xtarget, maxmode)))
xtarget = gen_reg_rtx (maxmode);
- bitsize_rtx = gen_rtx (CONST_INT, VOIDmode, bitsize);
- bitpos_rtx = gen_rtx (CONST_INT, VOIDmode, xbitpos);
+ bitsize_rtx = GEN_INT (bitsize);
+ bitpos_rtx = GEN_INT (xbitpos);
pat = gen_extzv (protect_from_queue (xtarget, 1),
xop0, bitsize_rtx, bitpos_rtx);
> GET_MODE_SIZE (maxmode)))
bestmode = get_best_mode (bitsize, bitnum,
align * BITS_PER_UNIT, maxmode,
- (GET_CODE (xop0) == MEM
- && MEM_VOLATILE_P (xop0)));
+ MEM_VOLATILE_P (xop0));
else
bestmode = GET_MODE (xop0);
if (GET_MODE (xtarget) != maxmode)
{
if (GET_CODE (xtarget) == REG)
- xspec_target_subreg = xtarget = gen_lowpart (maxmode, xtarget);
+ {
+ int wider = (GET_MODE_SIZE (maxmode)
+ > GET_MODE_SIZE (GET_MODE (xtarget)));
+ xtarget = gen_lowpart (maxmode, xtarget);
+ if (wider)
+ xspec_target_subreg = xtarget;
+ }
else
xtarget = gen_reg_rtx (maxmode);
}
(xtarget, maxmode)))
xtarget = gen_reg_rtx (maxmode);
- bitsize_rtx = gen_rtx (CONST_INT, VOIDmode, bitsize);
- bitpos_rtx = gen_rtx (CONST_INT, VOIDmode, xbitpos);
+ bitsize_rtx = GEN_INT (bitsize);
+ bitpos_rtx = GEN_INT (xbitpos);
pat = gen_extv (protect_from_queue (xtarget, 1),
xop0, bitsize_rtx, bitpos_rtx);
total_bits = GET_MODE_BITSIZE (mode);
+ /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
+ be be in the range 0 to total_bits-1, and put any excess bytes in
+ OFFSET. */
+ if (bitpos >= total_bits)
+ {
+ offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
+ bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
+ * BITS_PER_UNIT);
+ }
+
/* Get ref to an aligned byte, halfword, or word containing the field.
Adjust BITPOS to be position within a word,
and OFFSET to be the offset of that word.
enum machine_mode mode;
int bitpos, bitsize, complement;
{
- int masklow, maskhigh;
+ HOST_WIDE_INT masklow, maskhigh;
- if (bitpos < HOST_BITS_PER_INT)
- masklow = -1 << bitpos;
+ if (bitpos < HOST_BITS_PER_WIDE_INT)
+ masklow = (HOST_WIDE_INT) -1 << bitpos;
else
masklow = 0;
- if (bitpos + bitsize < HOST_BITS_PER_INT)
- masklow &= (unsigned) -1 >> (HOST_BITS_PER_INT - bitpos - bitsize);
+ if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
+ masklow &= ((unsigned HOST_WIDE_INT) -1
+ >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
- if (bitpos <= HOST_BITS_PER_INT)
+ if (bitpos <= HOST_BITS_PER_WIDE_INT)
maskhigh = -1;
else
- maskhigh = -1 << (bitpos - HOST_BITS_PER_INT);
+ maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
- if (bitpos + bitsize > HOST_BITS_PER_INT)
- maskhigh &= (unsigned) -1 >> (2 * HOST_BITS_PER_INT - bitpos - bitsize);
+ if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
+ maskhigh &= ((unsigned HOST_WIDE_INT) -1
+ >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
else
maskhigh = 0;
rtx value;
int bitpos, bitsize;
{
- unsigned v = INTVAL (value);
- int low, high;
+ unsigned HOST_WIDE_INT v = INTVAL (value);
+ HOST_WIDE_INT low, high;
- if (bitsize < HOST_BITS_PER_INT)
- v &= ~(-1 << bitsize);
+ if (bitsize < HOST_BITS_PER_WIDE_INT)
+ v &= ~((HOST_WIDE_INT) -1 << bitsize);
- if (bitpos < HOST_BITS_PER_INT)
+ if (bitpos < HOST_BITS_PER_WIDE_INT)
{
low = v << bitpos;
- high = (bitpos > 0 ? (v >> (HOST_BITS_PER_INT - bitpos)) : 0);
+ high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
}
else
{
low = 0;
- high = v << (bitpos - HOST_BITS_PER_INT);
+ high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
}
return immed_double_const (low, high, mode);
: operand_subword_force (op0, offset, GET_MODE (op0)));
part1 = extract_fixed_bit_field (word_mode, word,
GET_CODE (op0) == MEM ? offset : 0,
- bitsize_1, bitpos % unit, 0, 1, align);
+ bitsize_1, bitpos % unit, NULL_RTX,
+ 1, align);
/* Offset op0 by 1 word to get to the following one. */
if (GET_CODE (op0) == SUBREG)
(GET_CODE (op0) == MEM
? CEIL (offset + 1, UNITS_PER_WORD) * UNITS_PER_WORD
: 0),
- bitsize_2, 0, 0, 1, align);
+ bitsize_2, 0, NULL_RTX, 1, align);
/* Shift the more significant part up to fit above the other part. */
#if BYTES_BIG_ENDIAN
/* Combine the two parts with bitwise or. This works
because we extracted both parts as unsigned bit fields. */
- result = expand_binop (word_mode, ior_optab, part1, part2, 0, 1,
+ result = expand_binop (word_mode, ior_optab, part1, part2, NULL_RTX, 1,
OPTAB_LIB_WIDEN);
/* Unsigned bit field: we are done. */
return result;
/* Signed bit field: sign-extend with two arithmetic shifts. */
result = expand_shift (LSHIFT_EXPR, word_mode, result,
- build_int_2 (BITS_PER_WORD - bitsize, 0), 0, 0);
+ build_int_2 (BITS_PER_WORD - bitsize, 0),
+ NULL_RTX, 0);
return expand_shift (RSHIFT_EXPR, word_mode, result,
- build_int_2 (BITS_PER_WORD - bitsize, 0), 0, 0);
+ build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
}
\f
/* Add INC into TARGET. */
and shifted in the other direction; but that does not work
on all machines. */
- op1 = expand_expr (amount, 0, VOIDmode, 0);
+ op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
if (op1 == const0_rtx)
return shifted;
if (methods == OPTAB_WIDEN)
continue;
else if (methods == OPTAB_LIB_WIDEN)
- methods = OPTAB_LIB;
+ {
+ /* If we are rotating by a constant that is valid and
+ we have been unable to open-code this by a rotation,
+ do it as the IOR of two shifts. I.e., to rotate A
+ by N bits, compute (A << N) | ((unsigned) A >> (C - N))
+ where C is the bitsize of A.
+
+ It is theoretically possible that the target machine might
+ not be able to perform either shift and hence we would
+ be making two libcalls rather than just the one for the
+ shift (similarly if IOR could not be done). We will allow
+ this extremely unlikely lossage to avoid complicating the
+ code below. */
+
+ if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
+ && INTVAL (op1) < GET_MODE_BITSIZE (mode))
+ {
+ rtx subtarget = target == shifted ? 0 : target;
+ rtx temp1;
+ tree other_amount
+ = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
+
+ shifted = force_reg (mode, shifted);
+
+ temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+ mode, shifted, amount, subtarget, 1);
+ temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+ mode, shifted, other_amount, 0, 1);
+ return expand_binop (mode, ior_optab, temp, temp1, target,
+ unsignedp, methods);
+ }
+ else
+ methods = OPTAB_LIB;
+ }
temp = expand_binop (mode,
left ? rotl_optab : rotr_optab,
shifted, op1, target, unsignedp, methods);
+
+ /* If we don't have the rotate, but we are rotating by a constant
+ that is in range, try a rotate in the opposite direction. */
+
+ if (temp == 0 && GET_CODE (op1) == CONST_INT
+ && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
+ temp = expand_binop (mode,
+ left ? rotr_optab : rotl_optab,
+ shifted,
+ GEN_INT (GET_MODE_BITSIZE (mode)
+ - INTVAL (op1)),
+ target, unsignedp, methods);
}
else if (unsignedp)
{
|| (methods == OPTAB_WIDEN
&& GET_MODE_SIZE (mode) < GET_MODE_SIZE (output_mode)))
{
- /* Note convert_to_mode does protect_from_queue. */
- rtx shifted1 = convert_to_mode (output_mode, shifted, 1);
+ rtx shifted1 = convert_to_mode (output_mode,
+ protect_from_queue (shifted, 0),
+ 1);
enum machine_mode length_mode
= insn_operand_mode[(int) CODE_FOR_extzv][2];
enum machine_mode pos_mode
(target1, output_mode)))
target1 = gen_reg_rtx (output_mode);
+ xop1 = protect_from_queue (xop1, 0);
xop1 = convert_to_mode (pos_mode, xop1,
TREE_UNSIGNED (TREE_TYPE (amount)));
/* WIDTH gets the width of the bit field to extract:
wordsize minus # bits to shift by. */
if (GET_CODE (xop1) == CONST_INT)
- width = gen_rtx (CONST_INT, VOIDmode,
- (GET_MODE_BITSIZE (mode) - INTVAL (op1)));
+ width = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
else
{
/* Now get the width in the proper mode. */
+ op1 = protect_from_queue (op1, 0);
width = convert_to_mode (length_mode, op1,
TREE_UNSIGNED (TREE_TYPE (amount)));
width = expand_binop (length_mode, sub_optab,
- gen_rtx (CONST_INT, VOIDmode,
- GET_MODE_BITSIZE (mode)),
- width, 0, 0, OPTAB_LIB_WIDEN);
+ GEN_INT (GET_MODE_BITSIZE (mode)),
+ width, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
/* If this machine's extzv insists on a register for
return temp;
}
\f
-enum alg_code { alg_add, alg_subtract, alg_compound };
+enum alg_code { alg_zero, alg_m, alg_shift,
+ alg_add_t_m2, alg_sub_t_m2,
+ alg_add_factor, alg_sub_factor,
+ alg_add_t2_m, alg_sub_t2_m,
+ alg_add, alg_subtract, alg_factor, alg_shiftop };
/* This structure records a sequence of operations.
`ops' is the number of operations recorded.
`cost' is their total cost.
The operations are stored in `op' and the corresponding
- integer coefficients in `coeff'.
- These are the operations:
- alg_add Add to the total the multiplicand times the coefficient.
- alg_subtract Subtract the multiplicand times the coefficient.
- alg_compound This coefficient plus or minus the following one
- is multiplied into the total. The following operation
- is alg_add or alg_subtract to indicate whether to add
- or subtract the two coefficients. */
+ logarithms of the integer coefficients in `log'.
-#ifndef MAX_BITS_PER_WORD
-#define MAX_BITS_PER_WORD BITS_PER_WORD
-#endif
+ These are the operations:
+ alg_zero total := 0;
+ alg_m total := multiplicand;
+ alg_shift total := total * coeff
+ alg_add_t_m2 total := total + multiplicand * coeff;
+ alg_sub_t_m2 total := total - multiplicand * coeff;
+ alg_add_factor total := total * coeff + total;
+ alg_sub_factor total := total * coeff - total;
+ alg_add_t2_m total := total * coeff + multiplicand;
+ alg_sub_t2_m total := total * coeff - multiplicand;
+
+ The first operand must be either alg_zero or alg_m. */
struct algorithm
{
- int cost;
- unsigned int ops;
+ short cost;
+ short ops;
+ /* The size of the OP and LOG fields are not directly related to the
+ word size, but the worst-case algorithms will be if we have few
+ consecutive ones or zeros, i.e., a multiplicand like 10101010101...
+ In that case we will generate shift-by-2, add, shift-by-2, add,...,
+ in total wordsize operations. */
enum alg_code op[MAX_BITS_PER_WORD];
- unsigned int coeff[MAX_BITS_PER_WORD];
+ char log[MAX_BITS_PER_WORD];
};
/* Compute and return the best algorithm for multiplying by T.
- Assume that add insns cost ADD_COST and shifts cost SHIFT_COST.
- Return cost -1 if would cost more than MAX_COST. */
+ The algorithm must cost less than cost_limit
+ If retval.cost >= COST_LIMIT, no algorithm was found and all
+ other field of the returned struct are undefined. */
static struct algorithm
-synth_mult (t, add_cost, shift_cost, max_cost)
- unsigned int t;
- int add_cost, shift_cost;
- int max_cost;
+synth_mult (t, cost_limit)
+ unsigned HOST_WIDE_INT t;
+ int cost_limit;
{
- int m, n;
- struct algorithm *best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
- struct algorithm *alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
+ int m;
+ struct algorithm *best_alg
+ = (struct algorithm *)alloca (sizeof (struct algorithm));
+ struct algorithm *alg_in
+ = (struct algorithm *)alloca (sizeof (struct algorithm));
unsigned int cost;
+ unsigned HOST_WIDE_INT q;
- /* No matter what happens, we want to return a valid algorithm. */
- best_alg->cost = max_cost;
- best_alg->ops = 0;
+ /* Indicate that no algorithm is yet found. If no algorithm
+ is found, this value will be returned and indicate failure. */
+ best_alg->cost = cost_limit;
- /* Is t an exponent of 2, so we can just do a shift? */
+ if (cost_limit <= 0)
+ return *best_alg;
- if ((t & -t) == t)
+ /* t == 1 can be done in zero cost. */
+ if (t == 1)
{
- if (t > 1)
- {
- if (max_cost >= shift_cost)
- {
- best_alg->cost = shift_cost;
- best_alg->ops = 1;
- best_alg->op[0] = alg_add;
- best_alg->coeff[0] = t;
- }
- else
- best_alg->cost = -1;
- }
- else if (t == 1)
- {
- if (max_cost >= 0)
- best_alg->cost = 0;
- }
- else
- best_alg->cost = 0;
-
+ best_alg->ops = 1;
+ best_alg->cost = 0;
+ best_alg->op[0] = alg_m;
return *best_alg;
}
- /* If MAX_COST just permits as little as an addition (or less), we won't
- succeed in synthesizing an algorithm for t. Return immediately with
- an indication of failure. */
- if (max_cost <= add_cost)
- {
- best_alg->cost = -1;
- return *best_alg;
- }
+ /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
+ fail now. */
- /* Look for factors of t of the form
- t = q(2**m +- 1), 2 <= m <= floor(log2(t)) - 1.
- If we find such a factor, we can multiply by t using an algorithm that
- multiplies by q, shift the result by m and add/subtract it to itself. */
-
- for (m = floor_log2 (t) - 1; m >= 2; m--)
+ else if (t == 0)
{
- int m_exp_2 = 1 << m;
- int d;
-
- d = m_exp_2 + 1;
- if (t % d == 0)
+ if (zero_cost >= cost_limit)
+ return *best_alg;
+ else
{
- int q = t / d;
-
- cost = add_cost + shift_cost * 2;
-
- *alg_in = synth_mult (q, add_cost, shift_cost,
- MIN (max_cost, best_alg->cost) - cost);
-
- if (alg_in->cost >= 0)
- {
- cost += alg_in->cost;
-
- if (cost < best_alg->cost)
- {
- struct algorithm *x;
- x = alg_in;
- alg_in = best_alg;
- best_alg = x;
- best_alg->coeff[best_alg->ops] = m_exp_2;
- best_alg->op[best_alg->ops++] = alg_compound;
- best_alg->coeff[best_alg->ops] = 1;
- best_alg->op[best_alg->ops++] = alg_add;
- best_alg->cost = cost;
- }
- }
+ best_alg->ops = 1;
+ best_alg->cost = zero_cost;
+ best_alg->op[0] = alg_zero;
+ return *best_alg;
}
+ }
- d = m_exp_2 - 1;
- if (t % d == 0)
- {
- int q = t / d;
-
- cost = add_cost + shift_cost * 2;
+ /* If we have a group of zero bits at the low-order part of T, try
+ multiplying by the remaining bits and then doing a shift. */
- *alg_in = synth_mult (q, add_cost, shift_cost,
- MIN (max_cost, best_alg->cost) - cost);
+ if ((t & 1) == 0)
+ {
+ m = floor_log2 (t & -t); /* m = number of low zero bits */
+ q = t >> m;
+ cost = shift_cost[m];
+ if (cost < cost_limit)
+ {
+ *alg_in = synth_mult (q, cost_limit - cost);
- if (alg_in->cost >= 0)
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
{
- cost += alg_in->cost;
-
- if (cost < best_alg->cost)
- {
- struct algorithm *x;
- x = alg_in;
- alg_in = best_alg;
- best_alg = x;
- best_alg->coeff[best_alg->ops] = m_exp_2;
- best_alg->op[best_alg->ops++] = alg_compound;
- best_alg->coeff[best_alg->ops] = 1;
- best_alg->op[best_alg->ops++] = alg_subtract;
- best_alg->cost = cost;
- }
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = m;
+ best_alg->op[best_alg->ops++] = alg_shift;
+ best_alg->cost = cost_limit = cost;
}
}
}
- /* Try load effective address instructions, i.e. do a*3, a*5, a*9. */
-
- {
- int q;
- int w;
-
- q = t & -t; /* get out lsb */
- w = (t - q) & -(t - q); /* get out next lsb */
-
- if (w / q <= lea_max_mul)
- {
- cost = lea_cost + (q != 1 ? shift_cost : 0);
-
- *alg_in = synth_mult (t - q - w, add_cost, shift_cost,
- MIN (max_cost, best_alg->cost) - cost);
-
- if (alg_in->cost >= 0)
- {
- cost += alg_in->cost;
-
- /* Use <= to prefer this method to the factoring method
- when the cost appears the same, because this method
- uses fewer temporary registers. */
- if (cost <= best_alg->cost)
- {
- struct algorithm *x;
- x = alg_in;
- alg_in = best_alg;
- best_alg = x;
- best_alg->coeff[best_alg->ops] = w;
- best_alg->op[best_alg->ops++] = alg_add;
- best_alg->coeff[best_alg->ops] = q;
- best_alg->op[best_alg->ops++] = alg_add;
- best_alg->cost = cost;
- }
- }
- }
- }
-
- /* Now, use the good old method to add or subtract at the leftmost
- 1-bit. */
-
+ /* If we have an odd number, add or subtract one. */
+ if ((t & 1) != 0)
{
- int q;
- int w;
+ unsigned HOST_WIDE_INT w;
- q = t & -t; /* get out lsb */
- for (w = q; (w & t) != 0; w <<= 1)
+ for (w = 1; (w & t) != 0; w <<= 1)
;
- if ((w > q << 1)
- /* Reject the case where t has only two bits.
+ if (w > 2
+ /* Reject the case where t is 3.
Thus we prefer addition in that case. */
- && !(t < w && w == q << 2))
+ && t != 3)
{
- /* There are many bits in a row. Make 'em by subtraction. */
+ /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
cost = add_cost;
- if (q != 1)
- cost += shift_cost;
-
- *alg_in = synth_mult (t + q, add_cost, shift_cost,
- MIN (max_cost, best_alg->cost) - cost);
+ *alg_in = synth_mult (t + 1, cost_limit - cost);
- if (alg_in->cost >= 0)
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
{
- cost += alg_in->cost;
-
- /* Use <= to prefer this method to the factoring method
- when the cost appears the same, because this method
- uses fewer temporary registers. */
- if (cost <= best_alg->cost)
- {
- struct algorithm *x;
- x = alg_in;
- alg_in = best_alg;
- best_alg = x;
- best_alg->coeff[best_alg->ops] = q;
- best_alg->op[best_alg->ops++] = alg_subtract;
- best_alg->cost = cost;
- }
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = 0;
+ best_alg->op[best_alg->ops++] = alg_sub_t_m2;
+ best_alg->cost = cost_limit = cost;
}
}
else
{
- /* There's only one bit at the left. Make it by addition. */
+ /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
cost = add_cost;
- if (q != 1)
- cost += shift_cost;
+ *alg_in = synth_mult (t - 1, cost_limit - cost);
- *alg_in = synth_mult (t - q, add_cost, shift_cost,
- MIN (max_cost, best_alg->cost) - cost);
-
- if (alg_in->cost >= 0)
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
{
- cost += alg_in->cost;
-
- if (cost <= best_alg->cost)
- {
- struct algorithm *x;
- x = alg_in;
- alg_in = best_alg;
- best_alg = x;
- best_alg->coeff[best_alg->ops] = q;
- best_alg->op[best_alg->ops++] = alg_add;
- best_alg->cost = cost;
- }
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = 0;
+ best_alg->op[best_alg->ops++] = alg_add_t_m2;
+ best_alg->cost = cost_limit = cost;
}
}
}
- if (best_alg->cost >= max_cost)
- best_alg->cost = -1;
+ /* Look for factors of t of the form
+ t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
+ If we find such a factor, we can multiply by t using an algorithm that
+ multiplies by q, shift the result by m and add/subtract it to itself.
+
+ We search for large factors first and loop down, even if large factors
+ are less probable than small; if we find a large factor we will find a
+ good sequence quickly, and therefore be able to prune (by decreasing
+ COST_LIMIT) the search. */
+
+ for (m = floor_log2 (t - 1); m >= 2; m--)
+ {
+ unsigned HOST_WIDE_INT d;
+
+ d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
+ if (t % d == 0 && t > d)
+ {
+ cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
+ *alg_in = synth_mult (t / d, cost_limit - cost);
+
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
+ {
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = m;
+ best_alg->op[best_alg->ops++] = alg_add_factor;
+ best_alg->cost = cost_limit = cost;
+ }
+ }
+
+ d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
+ if (t % d == 0 && t > d)
+ {
+ cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
+ *alg_in = synth_mult (t / d, cost_limit - cost);
+
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
+ {
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = m;
+ best_alg->op[best_alg->ops++] = alg_sub_factor;
+ best_alg->cost = cost_limit = cost;
+ }
+ }
+ }
+
+ /* Try shift-and-add (load effective address) instructions,
+ i.e. do a*3, a*5, a*9. */
+ if ((t & 1) != 0)
+ {
+ q = t - 1;
+ q = q & -q;
+ m = exact_log2 (q);
+ if (m >= 0)
+ {
+ cost = shiftadd_cost[m];
+ *alg_in = synth_mult ((t - 1) >> m, cost_limit - cost);
+
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
+ {
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = m;
+ best_alg->op[best_alg->ops++] = alg_add_t2_m;
+ best_alg->cost = cost_limit = cost;
+ }
+ }
+
+ q = t + 1;
+ q = q & -q;
+ m = exact_log2 (q);
+ if (m >= 0)
+ {
+ cost = shiftsub_cost[m];
+ *alg_in = synth_mult ((t + 1) >> m, cost_limit - cost);
+
+ cost += alg_in->cost;
+ if (cost < best_alg->cost)
+ {
+ struct algorithm *x;
+ x = alg_in, alg_in = best_alg, best_alg = x;
+ best_alg->log[best_alg->ops] = m;
+ best_alg->op[best_alg->ops++] = alg_sub_t2_m;
+ best_alg->cost = cost_limit = cost;
+ }
+ }
+ }
+
+ /* If we are getting a too long sequence for `struct algorithm'
+ to record, store a fake cost to make this search fail. */
+ if (best_alg->ops == MAX_BITS_PER_WORD)
+ best_alg->cost = cost_limit;
+
return *best_alg;
}
\f
{
if ((CONST_DOUBLE_HIGH (op1) == 0 && CONST_DOUBLE_LOW (op1) >= 0)
|| (CONST_DOUBLE_HIGH (op1) == -1 && CONST_DOUBLE_LOW (op1) < 0))
- const_op1 = gen_rtx (CONST_INT, VOIDmode, CONST_DOUBLE_LOW (op1));
+ const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
}
- if (GET_CODE (const_op1) == CONST_INT && ! mult_is_very_cheap && optimize)
+ /* We used to test optimize here, on the grounds that it's better to
+ produce a smaller program when -O is not used.
+ But this causes such a terrible slowdown sometimes
+ that it seems better to use synth_mult always. */
+
+ if (GET_CODE (const_op1) == CONST_INT && ! mult_is_very_cheap)
{
struct algorithm alg;
struct algorithm neg_alg;
int negate = 0;
- int absval = INTVAL (op1);
- rtx last;
+ HOST_WIDE_INT val = INTVAL (op1);
+ HOST_WIDE_INT val_so_far;
+ rtx insn;
/* Try to do the computation two ways: multiply by the negative of OP1
and then negate, or do the multiplication directly. The latter is
has a factor of 2**m +/- 1, while the negated value does not or
vice versa. */
- alg = synth_mult (absval, add_cost, shift_cost, mult_cost);
- neg_alg = synth_mult (- absval, add_cost, shift_cost,
- mult_cost - negate_cost);
+ alg = synth_mult (val, mult_cost);
+ neg_alg = synth_mult (- val,
+ (alg.cost < mult_cost ? alg.cost : mult_cost)
+ - negate_cost);
- if (neg_alg.cost >= 0 && neg_alg.cost + negate_cost < alg.cost)
- alg = neg_alg, negate = 1, absval = - absval;
+ if (neg_alg.cost + negate_cost < alg.cost)
+ alg = neg_alg, negate = 1;
- if (alg.cost >= 0)
+ if (alg.cost < mult_cost)
{
- /* If we found something, it must be cheaper than multiply.
- So use it. */
- int opno = 0;
+ /* We found something cheaper than a multiply insn. */
+ int opno;
rtx accum, tem;
- int factors_seen = 0;
op0 = protect_from_queue (op0, 0);
if (GET_CODE (op0) == MEM)
op0 = force_reg (mode, op0);
- if (alg.ops == 0)
- accum = copy_to_mode_reg (mode, op0);
- else
+ /* ACCUM starts out either as OP0 or as a zero, depending on
+ the first operation. */
+
+ if (alg.op[0] == alg_zero)
{
- /* 1 if this is the last in a series of adds and subtracts. */
- int last = (1 == alg.ops || alg.op[1] == alg_compound);
- int log = floor_log2 (alg.coeff[0]);
- if (! factors_seen && ! last)
- log -= floor_log2 (alg.coeff[1]);
-
- if (alg.op[0] != alg_add)
- abort ();
- accum = expand_shift (LSHIFT_EXPR, mode, op0,
- build_int_2 (log, 0),
- 0, 0);
+ accum = copy_to_mode_reg (mode, const0_rtx);
+ val_so_far = 0;
}
-
- while (++opno < alg.ops)
+ else if (alg.op[0] == alg_m)
{
- int log = floor_log2 (alg.coeff[opno]);
- /* 1 if this is the last in a series of adds and subtracts. */
- int last = (opno + 1 == alg.ops
- || alg.op[opno + 1] == alg_compound);
-
- /* If we have not yet seen any separate factors (alg_compound)
- then turn op0<<a1 + op0<<a2 + op0<<a3... into
- (op0<<(a1-a2) + op0)<<(a2-a3) + op0... */
+ accum = copy_to_mode_reg (mode, op0);
+ val_so_far = 1;
+ }
+ else
+ abort ();
+
+ for (opno = 1; opno < alg.ops; opno++)
+ {
+ int log = alg.log[opno];
+ rtx shift_subtarget = preserve_subexpressions_p () ? 0 : accum;
+ rtx add_target = opno == alg.ops - 1 && target != 0 ? target : 0;
+
switch (alg.op[opno])
{
- case alg_add:
- if (factors_seen)
- {
- tem = expand_shift (LSHIFT_EXPR, mode, op0,
- build_int_2 (log, 0), 0, 0);
- accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
- accum);
- }
- else
- {
- if (! last)
- log -= floor_log2 (alg.coeff[opno + 1]);
- accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
- accum);
- accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), accum, 0);
- }
+ case alg_shift:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ val_so_far <<= log;
break;
- case alg_subtract:
- if (factors_seen)
- {
- tem = expand_shift (LSHIFT_EXPR, mode, op0,
- build_int_2 (log, 0), 0, 0);
- accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
- accum);
- }
- else
- {
- if (! last)
- log -= floor_log2 (alg.coeff[opno + 1]);
- accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
- accum);
- accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), accum, 0);
- }
+ case alg_add_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
+ add_target ? add_target : accum);
+ val_so_far += (HOST_WIDE_INT) 1 << log;
+ break;
+ case alg_sub_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
+ add_target ? add_target : accum);
+ val_so_far -= (HOST_WIDE_INT) 1 << log;
break;
- case alg_compound:
- factors_seen = 1;
- tem = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), 0, 0);
+ case alg_add_t2_m:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), accum, 0);
+ accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
+ add_target ? add_target : accum);
+ val_so_far = (val_so_far << log) + 1;
+ break;
- log = floor_log2 (alg.coeff[opno + 1]);
+ case alg_sub_t2_m:
accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), 0, 0);
- opno++;
- if (alg.op[opno] == alg_add)
- accum = force_operand (gen_rtx (PLUS, mode, tem, accum),
- tem);
- else
- accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
- tem);
- }
- }
+ build_int_2 (log, 0), accum, 0);
+ accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
+ add_target ? add_target : accum);
+ val_so_far = (val_so_far << log) - 1;
+ break;
- /* Write a REG_EQUAL note on the last insn so that we can cse
- multiplication sequences. We need not do this if we were
- multiplying by a power of two, since only one insn would have
- been generated.
+ case alg_add_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
+ add_target ? add_target : accum);
+ val_so_far += val_so_far << log;
+ break;
- ??? We could also write REG_EQUAL notes on the last insn of
- each sequence that uses a single temporary, but it is not
- clear how to calculate the partial product so far.
+ case alg_sub_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
+ add_target ? add_target : tem);
+ val_so_far = (val_so_far << log) - val_so_far;
+ break;
- Torbjorn: Can you do this? */
+ default:
+ abort ();;
+ }
- if (exact_log2 (absval) < 0)
- {
- last = get_last_insn ();
- REG_NOTES (last)
+ /* Write a REG_EQUAL note on the last insn so that we can cse
+ multiplication sequences. */
+
+ insn = get_last_insn ();
+ REG_NOTES (insn)
= gen_rtx (EXPR_LIST, REG_EQUAL,
- gen_rtx (MULT, mode, op0,
- negate ? gen_rtx (CONST_INT,
- VOIDmode, absval)
- : op1),
- REG_NOTES (last));
+ gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
+ REG_NOTES (insn));
}
- return (negate ? expand_unop (mode, neg_optab, accum, target, 0)
- : accum);
+ if (negate)
+ {
+ val_so_far = - val_so_far;
+ accum = expand_unop (mode, neg_optab, accum, target, 0);
+ }
+
+ if (val != val_so_far)
+ abort ();
+
+ return accum;
}
}
/* This used to use umul_optab if unsigned,
- but I think that for non-widening multiply there is no difference
+ but for non-widening multiply there is no difference
between signed and unsigned. */
op0 = expand_binop (mode, smul_optab,
op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
register rtx result = 0;
enum machine_mode compute_mode;
int log = -1;
+ int size;
int can_clobber_op0;
int mod_insn_no_good = 0;
rtx adjusted_op0 = op0;
optab optab1, optab2;
+ /* We shouldn't be called with op1 == const1_rtx, but some of the
+ code below will malfunction if we are, so check here and handle
+ the special case if so. */
+ if (op1 == const1_rtx)
+ return rem_flag ? const0_rtx : op0;
+
/* Don't use the function value register as a target
since we have to read it as well as write it,
and function-inlining gets confused by this. */
if (compute_mode == VOIDmode)
compute_mode = mode;
+ size = GET_MODE_BITSIZE (compute_mode);
+
/* Now convert to the best mode to use. Show we made a copy of OP0
and hence we can clobber it (we cannot use a SUBREG to widen
something. */
op1 = convert_to_mode (compute_mode, op1, unsignedp);
}
+ /* If we are computing the remainder and one of the operands is a volatile
+ MEM, copy it into a register. */
+
+ if (rem_flag && GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
+ adjusted_op0 = op0 = force_reg (compute_mode, op0), can_clobber_op0 = 1;
+ if (rem_flag && GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
+ op1 = force_reg (compute_mode, op1);
+
+ /* If we are computing the remainder, op0 will be needed later to calculate
+ X - Y * (X / Y), therefore cannot be clobbered. */
+ if (rem_flag)
+ can_clobber_op0 = 0;
+
if (target == 0 || GET_MODE (target) != compute_mode)
target = gen_reg_rtx (compute_mode);
case TRUNC_DIV_EXPR:
if (log >= 0 && ! unsignedp)
{
- rtx label = gen_label_rtx ();
- if (! can_clobber_op0)
+ /* Here we need to add OP1-1 if OP0 is negative, 0 otherwise.
+ This can be computed without jumps by arithmetically shifting
+ OP0 right LOG-1 places and then shifting right logically
+ SIZE-LOG bits. The resulting value is unconditionally added
+ to OP0. */
+ if (log == 1 || BRANCH_COST >= 3)
{
- adjusted_op0 = copy_to_suggested_reg (adjusted_op0, target,
- compute_mode);
- /* Copy op0 to a reg, since emit_cmp_insn will call emit_queue
- which will screw up mem refs for autoincrements. */
- op0 = force_reg (compute_mode, op0);
+ rtx temp = gen_reg_rtx (compute_mode);
+ if (! can_clobber_op0)
+ /* Copy op0 to a reg, to play safe,
+ since this is done in the other path. */
+ op0 = force_reg (compute_mode, op0);
+ temp = copy_to_suggested_reg (adjusted_op0, temp, compute_mode);
+ temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+ build_int_2 (log - 1, 0), NULL_RTX, 0);
+ temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+ build_int_2 (size - log, 0),
+ temp, 1);
+ /* We supply 0 as the target to make a new pseudo
+ for the value; that helps loop.c optimize the result. */
+ adjusted_op0 = expand_binop (compute_mode, add_optab,
+ adjusted_op0, temp,
+ 0, 0, OPTAB_LIB_WIDEN);
+ }
+ else
+ {
+ rtx label = gen_label_rtx ();
+ if (! can_clobber_op0)
+ {
+ adjusted_op0 = copy_to_suggested_reg (adjusted_op0, target,
+ compute_mode);
+ /* Copy op0 to a reg, since emit_cmp_insn will call emit_queue
+ which will screw up mem refs for autoincrements. */
+ op0 = force_reg (compute_mode, op0);
+ }
+ emit_cmp_insn (adjusted_op0, const0_rtx, GE,
+ NULL_RTX, compute_mode, 0, 0);
+ emit_jump_insn (gen_bge (label));
+ expand_inc (adjusted_op0, plus_constant (op1, -1));
+ emit_label (label);
}
- emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
- emit_jump_insn (gen_bge (label));
- expand_inc (adjusted_op0, plus_constant (op1, -1));
- emit_label (label);
mod_insn_no_good = 1;
}
break;
which will screw up mem refs for autoincrements. */
op0 = force_reg (compute_mode, op0);
}
- emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
+ emit_cmp_insn (adjusted_op0, const0_rtx, GE,
+ NULL_RTX, compute_mode, 0, 0);
emit_jump_insn (gen_bge (label));
expand_dec (adjusted_op0, op1);
expand_inc (adjusted_op0, const1_rtx);
if (! unsignedp)
{
label = gen_label_rtx ();
- emit_cmp_insn (adjusted_op0, const0_rtx, LE, 0, compute_mode, 0, 0);
+ emit_cmp_insn (adjusted_op0, const0_rtx, LE,
+ NULL_RTX, compute_mode, 0, 0);
emit_jump_insn (gen_ble (label));
}
expand_inc (adjusted_op0, op1);
{
adjusted_op0 = expand_binop (compute_mode, add_optab,
adjusted_op0, plus_constant (op1, -1),
- 0, 0, OPTAB_LIB_WIDEN);
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
mod_insn_no_good = 1;
break;
if (log < 0)
{
op1 = expand_shift (RSHIFT_EXPR, compute_mode, op1,
- integer_one_node, 0, 0);
+ integer_one_node, NULL_RTX, 0);
if (! unsignedp)
{
- rtx label = gen_label_rtx ();
- emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
- emit_jump_insn (gen_bge (label));
- expand_unop (compute_mode, neg_optab, op1, op1, 0);
- emit_label (label);
+ if (BRANCH_COST >= 2)
+ {
+ /* Negate OP1 if OP0 < 0. Do this by computing a temporary
+ that has all bits equal to the sign bit and exclusive
+ or-ing it with OP1. */
+ rtx temp = gen_reg_rtx (compute_mode);
+ temp = copy_to_suggested_reg (adjusted_op0, temp, compute_mode);
+ temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+ build_int_2 (size - 1, 0),
+ NULL_RTX, 0);
+ op1 = expand_binop (compute_mode, xor_optab, op1, temp, op1,
+ unsignedp, OPTAB_LIB_WIDEN);
+ }
+ else
+ {
+ rtx label = gen_label_rtx ();
+ emit_cmp_insn (adjusted_op0, const0_rtx, GE, NULL_RTX,
+ compute_mode, 0, 0);
+ emit_jump_insn (gen_bge (label));
+ expand_unop (compute_mode, neg_optab, op1, op1, 0);
+ emit_label (label);
+ }
}
expand_inc (adjusted_op0, op1);
}
else
{
- op1 = gen_rtx (CONST_INT, VOIDmode, (1 << log) / 2);
+ op1 = GEN_INT (((HOST_WIDE_INT) 1 << log) / 2);
expand_inc (adjusted_op0, op1);
}
mod_insn_no_good = 1;
/* Try to produce the remainder directly */
if (log >= 0)
result = expand_binop (compute_mode, and_optab, adjusted_op0,
- gen_rtx (CONST_INT, VOIDmode,
- (1 << log) - 1),
+ GEN_INT (((HOST_WIDE_INT) 1 << log) - 1),
target, 1, OPTAB_LIB_WIDEN);
else
{
if (! expand_twoval_binop (unsignedp
? udivmod_optab : sdivmod_optab,
adjusted_op0, op1,
- 0, result, unsignedp))
+ NULL_RTX, result, unsignedp))
result = 0;
}
}
and a remainder subroutine would be ok,
don't use a divide subroutine. */
result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
- adjusted_op0, op1, 0, unsignedp, OPTAB_WIDEN);
+ adjusted_op0, op1, NULL_RTX, unsignedp,
+ OPTAB_WIDEN);
else
{
/* Try a quotient insn, but not a library call. */
result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
- adjusted_op0, op1, rem_flag ? 0 : target,
+ adjusted_op0, op1,
+ rem_flag ? NULL_RTX : target,
unsignedp, OPTAB_WIDEN);
if (result == 0)
{
result = gen_reg_rtx (mode);
if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
adjusted_op0, op1,
- result, 0, unsignedp))
+ result, NULL_RTX, unsignedp))
result = 0;
}
/* If still no luck, use a library call. */
if (result == 0)
result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
- adjusted_op0, op1, rem_flag ? 0 : target,
+ adjusted_op0, op1,
+ rem_flag ? NULL_RTX : target,
unsignedp, OPTAB_LIB_WIDEN);
}
if (mode != VOIDmode)
tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
- tem = gen_rtx (CONST_INT, VOIDmode, INTVAL (op0) & INTVAL (op1));
+ tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
else
abort ();
if (mode == VOIDmode)
mode = GET_MODE (op0);
+ /* If one operand is constant, make it the second one. Only do this
+ if the other operand is not constant as well. */
+
+ if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
+ || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
+ {
+ tem = op0;
+ op0 = op1;
+ op1 = tem;
+ code = swap_condition (code);
+ }
+
/* For some comparisons with 1 and -1, we can convert this to
comparisons with zero. This will often produce more opportunities for
store-flag insns. */
if (op1 == const0_rtx && (code == LT || code == GE)
&& GET_MODE_CLASS (mode) == MODE_INT
&& (normalizep || STORE_FLAG_VALUE == 1
- || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_INT
- && STORE_FLAG_VALUE == 1 << (GET_MODE_BITSIZE (mode) - 1))))
+ || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+ && (STORE_FLAG_VALUE
+ == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
{
- rtx subtarget = target;
+ subtarget = target;
/* If the result is to be wider than OP0, it is best to convert it
first. If it is to be narrower, it is *incorrect* to convert it
first. */
if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
{
+ op0 = protect_from_queue (op0, 0);
op0 = convert_to_mode (target_mode, op0, 0);
mode = target_mode;
}
emit_queue ();
last = get_last_insn ();
- comparison = compare_from_rtx (op0, op1, code, unsignedp, mode, 0, 0);
+ comparison
+ = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
if (GET_CODE (comparison) == CONST_INT)
return (comparison == const0_rtx ? const0_rtx
: normalizep == 1 ? const1_rtx
: normalizep == -1 ? constm1_rtx
: const_true_rtx);
+ /* If the code of COMPARISON doesn't match CODE, something is
+ wrong; we can no longer be sure that we have the operation.
+ We could handle this case, but it should not happen. */
+
+ if (GET_CODE (comparison) != code)
+ abort ();
+
/* Get a reference to the target in the proper mode for this insn. */
compare_mode = insn_operand_mode[(int) icode][0];
subtarget = target;
{
convert_move (target, subtarget,
(GET_MODE_BITSIZE (compare_mode)
- <= HOST_BITS_PER_INT)
+ <= HOST_BITS_PER_WIDE_INT)
&& 0 == (STORE_FLAG_VALUE
- & (1 << (GET_MODE_BITSIZE (compare_mode) -1))));
+ & ((HOST_WIDE_INT) 1
+ << (GET_MODE_BITSIZE (compare_mode) -1))));
op0 = target;
compare_mode = target_mode;
}
else
op0 = subtarget;
+ /* If we want to keep subexpressions around, don't reuse our
+ last target. */
+
+ if (preserve_subexpressions_p ())
+ subtarget = 0;
+
/* Now normalize to the proper value in COMPARE_MODE. Sometimes
we don't have to do anything. */
if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
/* We don't want to use STORE_FLAG_VALUE < 0 below since this
makes it hard to use a value of just the sign bit due to
ANSI integer constant typing rules. */
- else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_INT
+ else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
&& (STORE_FLAG_VALUE
- & (1 << (GET_MODE_BITSIZE (compare_mode) - 1))))
+ & ((HOST_WIDE_INT) 1
+ << (GET_MODE_BITSIZE (compare_mode) - 1))))
op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
size_int (GET_MODE_BITSIZE (compare_mode) - 1),
subtarget, normalizep == 1);
conversion now. */
if (target_mode != compare_mode)
{
- convert_move (target, op0);
+ convert_move (target, op0, 0);
return target;
}
else
comparison with zero. Don't do any of these cases if branches are
very cheap. */
- if (BRANCH_COST >= 0
+ if (BRANCH_COST > 0
&& GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
&& op1 != const0_rtx)
{
if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
normalizep = STORE_FLAG_VALUE;
- else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_INT
- && STORE_FLAG_VALUE == 1 << (GET_MODE_BITSIZE (mode) - 1))
+ else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+ && (STORE_FLAG_VALUE
+ == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
;
else
return 0;
else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
{
mode = word_mode;
+ op0 = protect_from_queue (op0, 0);
tem = convert_to_mode (mode, op0, 1);
}