/* Medium-level subroutines: convert bit-field store and extract
and shifts, multiplies and divides to rtl instructions.
Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
This file is part of GCC.
static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
-static int div_cost[NUM_MACHINE_MODES];
+static int sdiv_cost[NUM_MACHINE_MODES];
+static int udiv_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
static int mul_highpart_cost[NUM_MACHINE_MODES];
struct rtx_def reg; rtunion reg_fld[2];
struct rtx_def plus; rtunion plus_fld1;
struct rtx_def neg;
- struct rtx_def udiv; rtunion udiv_fld1;
struct rtx_def mult; rtunion mult_fld1;
- struct rtx_def div; rtunion div_fld1;
- struct rtx_def mod; rtunion mod_fld1;
+ struct rtx_def sdiv; rtunion sdiv_fld1;
+ struct rtx_def udiv; rtunion udiv_fld1;
struct rtx_def zext;
+ struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
+ struct rtx_def smod_32; rtunion smod_32_fld1;
struct rtx_def wide_mult; rtunion wide_mult_fld1;
struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
struct rtx_def wide_trunc;
PUT_CODE (&all.neg, NEG);
XEXP (&all.neg, 0) = &all.reg;
- PUT_CODE (&all.udiv, UDIV);
- XEXP (&all.udiv, 0) = &all.reg;
- XEXP (&all.udiv, 1) = &all.reg;
-
PUT_CODE (&all.mult, MULT);
XEXP (&all.mult, 0) = &all.reg;
XEXP (&all.mult, 1) = &all.reg;
- PUT_CODE (&all.div, DIV);
- XEXP (&all.div, 0) = &all.reg;
- XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
+ PUT_CODE (&all.sdiv, DIV);
+ XEXP (&all.sdiv, 0) = &all.reg;
+ XEXP (&all.sdiv, 1) = &all.reg;
+
+ PUT_CODE (&all.udiv, UDIV);
+ XEXP (&all.udiv, 0) = &all.reg;
+ XEXP (&all.udiv, 1) = &all.reg;
+
+ PUT_CODE (&all.sdiv_32, DIV);
+ XEXP (&all.sdiv_32, 0) = &all.reg;
+ XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
- PUT_CODE (&all.mod, MOD);
- XEXP (&all.mod, 0) = &all.reg;
- XEXP (&all.mod, 1) = XEXP (&all.div, 1);
+ PUT_CODE (&all.smod_32, MOD);
+ XEXP (&all.smod_32, 0) = &all.reg;
+ XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
PUT_CODE (&all.zext, ZERO_EXTEND);
XEXP (&all.zext, 0) = &all.reg;
PUT_MODE (&all.reg, mode);
PUT_MODE (&all.plus, mode);
PUT_MODE (&all.neg, mode);
- PUT_MODE (&all.udiv, mode);
PUT_MODE (&all.mult, mode);
- PUT_MODE (&all.div, mode);
- PUT_MODE (&all.mod, mode);
+ PUT_MODE (&all.sdiv, mode);
+ PUT_MODE (&all.udiv, mode);
+ PUT_MODE (&all.sdiv_32, mode);
+ PUT_MODE (&all.smod_32, mode);
PUT_MODE (&all.wide_trunc, mode);
PUT_MODE (&all.shift, mode);
PUT_MODE (&all.shift_mult, mode);
add_cost[mode] = rtx_cost (&all.plus, SET);
neg_cost[mode] = rtx_cost (&all.neg, SET);
- div_cost[mode] = rtx_cost (&all.udiv, SET);
mul_cost[mode] = rtx_cost (&all.mult, SET);
+ sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
+ udiv_cost[mode] = rtx_cost (&all.udiv, SET);
- sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
- smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
+ sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
+ <= 2 * add_cost[mode]);
+ smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
+ <= 4 * add_cost[mode]);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
meaningful at a much higher level; when structures are copied
between memory and regs, the higher-numbered regs
always get higher addresses. */
- bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
+ int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
+ int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
+
+ byte_offset = 0;
+
+ /* Paradoxical subregs need special handling on big endian machines. */
+ if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
+ {
+ int difference = inner_mode_size - outer_mode_size;
+
+ if (WORDS_BIG_ENDIAN)
+ byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
+ if (BYTES_BIG_ENDIAN)
+ byte_offset += difference % UNITS_PER_WORD;
+ }
+ else
+ byte_offset = SUBREG_BYTE (op0);
+
+ bitnum += byte_offset * BITS_PER_UNIT;
op0 = SUBREG_REG (op0);
}
if (HAVE_insv
&& GET_MODE (value) != BLKmode
- && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
&& bitsize > 0
&& GET_MODE_BITSIZE (op_mode) >= bitsize
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
- && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
+ && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
+ && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
+ VOIDmode))
{
int xbitpos = bitpos;
rtx value1;
if (! all_one)
{
- temp = expand_binop (mode, and_optab, op0,
+ /* Don't try and keep the intermediate in memory, if we need to
+ perform both a bit-wise AND and a bit-wise IOR (except when
+ we're optimizing for size). */
+ if (MEM_P (subtarget) && !all_zero && !optimize_size)
+ subtarget = force_reg (mode, subtarget);
+ temp = expand_binop (mode, and_optab, subtarget,
mask_rtx (mode, bitpos, bitsize, 1),
subtarget, 1, OPTAB_LIB_WIDEN);
subtarget = temp;
and shifted in the other direction; but that does not work
on all machines. */
- op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
+ op1 = expand_normal (amount);
if (SHIFT_COUNT_TRUNCATED)
{
struct mult_cost limit;
int op_cost;
+ /* Fail quickly for impossible bounds. */
+ if (mult_cost < 0)
+ return false;
+
+ /* Ensure that mult_cost provides a reasonable upper bound.
+ Any constant multiplication can be performed with less
+ than 2 * bits additions. */
+ op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
+ if (mult_cost > op_cost)
+ mult_cost = op_cost;
+
*variant = basic_variant;
limit.cost = mult_cost;
limit.latency = mult_cost;
if (mode == word_mode)
return gen_highpart (mode, op);
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
wider_mode = GET_MODE_WIDER_MODE (mode);
op = expand_shift (RSHIFT_EXPR, wider_mode, op,
build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
rtx tem;
int size;
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
wider_mode = GET_MODE_WIDER_MODE (mode);
size = GET_MODE_BITSIZE (mode);
struct algorithm alg;
rtx tem;
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* We can't support modes wider than HOST_BITS_PER_INT. */
gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
- max_cost = div_cost[compute_mode]
- - (rem_flag && ! (last_div_const != 0 && op1_is_constant
- && INTVAL (op1) == last_div_const)
- ? mul_cost[compute_mode] + add_cost[compute_mode]
- : 0);
+ max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
+ if (rem_flag && ! (last_div_const != 0 && op1_is_constant
+ && INTVAL (op1) == last_div_const))
+ max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
return t;
}
}
-
-/* Check whether the multiplication X * MULT + ADD overflows.
- X, MULT and ADD must be CONST_*.
- MODE is the machine mode for the computation.
- X and MULT must have mode MODE. ADD may have a different mode.
- So can X (defaults to same as MODE).
- UNSIGNEDP is nonzero to do unsigned multiplication. */
-
-bool
-const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
- enum machine_mode mode, int unsignedp)
-{
- tree type, mult_type, add_type, result;
-
- type = lang_hooks.types.type_for_mode (mode, unsignedp);
-
- /* In order to get a proper overflow indication from an unsigned
- type, we have to pretend that it's a sizetype. */
- mult_type = type;
- if (unsignedp)
- {
- /* FIXME:It would be nice if we could step directly from this
- type to its sizetype equivalent. */
- mult_type = build_distinct_type_copy (type);
- TYPE_IS_SIZETYPE (mult_type) = 1;
- }
-
- add_type = (GET_MODE (add) == VOIDmode ? mult_type
- : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
-
- result = fold_build2 (PLUS_EXPR, mult_type,
- fold_build2 (MULT_EXPR, mult_type,
- make_tree (mult_type, x),
- make_tree (mult_type, mult)),
- make_tree (add_type, add));
-
- return TREE_CONSTANT_OVERFLOW (result);
-}
-
-/* Return an rtx representing the value of X * MULT + ADD.
- TARGET is a suggestion for where to store the result (an rtx).
- MODE is the machine mode for the computation.
- X and MULT must have mode MODE. ADD may have a different mode.
- So can X (defaults to same as MODE).
- UNSIGNEDP is nonzero to do unsigned multiplication.
- This may emit insns. */
-
-rtx
-expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
- int unsignedp)
-{
- tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
- tree add_type = (GET_MODE (add) == VOIDmode
- ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
- unsignedp));
- tree result = fold_build2 (PLUS_EXPR, type,
- fold_build2 (MULT_EXPR, type,
- make_tree (type, x),
- make_tree (type, mult)),
- make_tree (add_type, add));
-
- return expand_expr (result, target, VOIDmode, 0);
-}
\f
/* Compute the logical-and of OP0 and OP1, storing it in TARGET
and returning TARGET.
}
\f
/* Perform possibly multi-word comparison and conditional jump to LABEL
- if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
-
- The algorithm is based on the code in expr.c:do_jump.
-
- Note that this does not perform a general comparison. Only
- variants generated within expmed.c are correctly handled, others
- could be handled if needed. */
+ if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
+ now a thin wrapper around do_compare_rtx_and_jump. */
static void
do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
rtx label)
{
- /* If this mode is an integer too wide to compare properly,
- compare word by word. Rely on cse to optimize constant cases. */
-
- if (GET_MODE_CLASS (mode) == MODE_INT
- && ! can_compare_p (op, mode, ccp_jump))
- {
- rtx label2 = gen_label_rtx ();
-
- switch (op)
- {
- case LTU:
- do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
- break;
-
- case LEU:
- do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
- break;
-
- case LT:
- do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
- break;
-
- case GT:
- do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
- break;
-
- case GE:
- do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
- break;
-
- /* do_jump_by_parts_equality_rtx compares with zero. Luckily
- that's the only equality operations we do */
- case EQ:
- gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
- do_jump_by_parts_equality_rtx (arg1, label2, label);
- break;
-
- case NE:
- gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
- do_jump_by_parts_equality_rtx (arg1, label, label2);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- emit_label (label2);
- }
- else
- emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
+ int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
+ do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
+ NULL_RTX, NULL_RTX, label);
}