static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
-static int div_cost[NUM_MACHINE_MODES];
+static int sdiv_cost[NUM_MACHINE_MODES];
+static int udiv_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
static int mul_highpart_cost[NUM_MACHINE_MODES];
struct rtx_def reg; rtunion reg_fld[2];
struct rtx_def plus; rtunion plus_fld1;
struct rtx_def neg;
- struct rtx_def udiv; rtunion udiv_fld1;
struct rtx_def mult; rtunion mult_fld1;
- struct rtx_def div; rtunion div_fld1;
- struct rtx_def mod; rtunion mod_fld1;
+ struct rtx_def sdiv; rtunion sdiv_fld1;
+ struct rtx_def udiv; rtunion udiv_fld1;
struct rtx_def zext;
+ struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
+ struct rtx_def smod_32; rtunion smod_32_fld1;
struct rtx_def wide_mult; rtunion wide_mult_fld1;
struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
struct rtx_def wide_trunc;
PUT_CODE (&all.neg, NEG);
XEXP (&all.neg, 0) = &all.reg;
- PUT_CODE (&all.udiv, UDIV);
- XEXP (&all.udiv, 0) = &all.reg;
- XEXP (&all.udiv, 1) = &all.reg;
-
PUT_CODE (&all.mult, MULT);
XEXP (&all.mult, 0) = &all.reg;
XEXP (&all.mult, 1) = &all.reg;
- PUT_CODE (&all.div, DIV);
- XEXP (&all.div, 0) = &all.reg;
- XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
+ PUT_CODE (&all.sdiv, DIV);
+ XEXP (&all.sdiv, 0) = &all.reg;
+ XEXP (&all.sdiv, 1) = &all.reg;
+
+ PUT_CODE (&all.udiv, UDIV);
+ XEXP (&all.udiv, 0) = &all.reg;
+ XEXP (&all.udiv, 1) = &all.reg;
+
+ PUT_CODE (&all.sdiv_32, DIV);
+ XEXP (&all.sdiv_32, 0) = &all.reg;
+ XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
- PUT_CODE (&all.mod, MOD);
- XEXP (&all.mod, 0) = &all.reg;
- XEXP (&all.mod, 1) = XEXP (&all.div, 1);
+ PUT_CODE (&all.smod_32, MOD);
+ XEXP (&all.smod_32, 0) = &all.reg;
+ XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
PUT_CODE (&all.zext, ZERO_EXTEND);
XEXP (&all.zext, 0) = &all.reg;
PUT_MODE (&all.reg, mode);
PUT_MODE (&all.plus, mode);
PUT_MODE (&all.neg, mode);
- PUT_MODE (&all.udiv, mode);
PUT_MODE (&all.mult, mode);
- PUT_MODE (&all.div, mode);
- PUT_MODE (&all.mod, mode);
+ PUT_MODE (&all.sdiv, mode);
+ PUT_MODE (&all.udiv, mode);
+ PUT_MODE (&all.sdiv_32, mode);
+ PUT_MODE (&all.smod_32, mode);
PUT_MODE (&all.wide_trunc, mode);
PUT_MODE (&all.shift, mode);
PUT_MODE (&all.shift_mult, mode);
add_cost[mode] = rtx_cost (&all.plus, SET);
neg_cost[mode] = rtx_cost (&all.neg, SET);
- div_cost[mode] = rtx_cost (&all.udiv, SET);
mul_cost[mode] = rtx_cost (&all.mult, SET);
+ sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
+ udiv_cost[mode] = rtx_cost (&all.udiv, SET);
- sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
- smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
+ sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
+ <= 2 * add_cost[mode]);
+ smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
+ <= 4 * add_cost[mode]);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
|| (offset * BITS_PER_UNIT % bitsize == 0
&& MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
{
- if (GET_MODE (op0) != fieldmode)
- {
- if (MEM_P (op0))
- op0 = adjust_address (op0, fieldmode, offset);
- else
- op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
- byte_offset);
- }
+ if (MEM_P (op0))
+ op0 = adjust_address (op0, fieldmode, offset);
+ else if (GET_MODE (op0) != fieldmode)
+ op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
+ byte_offset);
emit_move_insn (op0, value);
return value;
}
if (HAVE_insv
&& GET_MODE (value) != BLKmode
&& !(bitsize == 1 && GET_CODE (value) == CONST_INT)
- /* Ensure insv's size is wide enough for this field. */
- && (GET_MODE_BITSIZE (op_mode) >= bitsize)
+ && bitsize > 0
+ && GET_MODE_BITSIZE (op_mode) >= bitsize
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
&& (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
{
bestmode = GET_MODE (op0);
if (bestmode == VOIDmode
+ || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
|| (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
&& GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
goto insv_loses;
if (unsignedp)
{
if (HAVE_extzv
- && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
+ && bitsize > 0
+ && GET_MODE_BITSIZE (extzv_mode) >= bitsize
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
{
xbitpos = bitnum % unit;
xop0 = adjust_address (xop0, bestmode, xoffset);
+ /* Make sure register is big enough for the whole field. */
+ if (xoffset * BITS_PER_UNIT + unit
+ < offset * BITS_PER_UNIT + bitsize)
+ goto extzv_loses;
+
/* Fetch it to a register in that size. */
xop0 = force_reg (bestmode, xop0);
else
{
if (HAVE_extv
- && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
+ && bitsize > 0
+ && GET_MODE_BITSIZE (extv_mode) >= bitsize
&& ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
{
xbitpos = bitnum % unit;
xop0 = adjust_address (xop0, bestmode, xoffset);
+ /* Make sure register is big enough for the whole field. */
+ if (xoffset * BITS_PER_UNIT + unit
+ < offset * BITS_PER_UNIT + bitsize)
+ goto extv_loses;
+
/* Fetch it to a register in that size. */
xop0 = force_reg (bestmode, xop0);
shifted = force_reg (mode, shifted);
temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
- mode, shifted, new_amount, subtarget, 1);
+ mode, shifted, new_amount, 0, 1);
temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
- mode, shifted, other_amount, 0, 1);
+ mode, shifted, other_amount, subtarget, 1);
return expand_binop (mode, ior_optab, temp, temp1, target,
unsignedp, methods);
}
temp = expand_binop (mode,
left ? rotl_optab : rotr_optab,
shifted, op1, target, unsignedp, methods);
-
- /* If we don't have the rotate, but we are rotating by a constant
- that is in range, try a rotate in the opposite direction. */
-
- if (temp == 0 && GET_CODE (op1) == CONST_INT
- && INTVAL (op1) > 0
- && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
- temp = expand_binop (mode,
- left ? rotr_optab : rotl_optab,
- shifted,
- GEN_INT (GET_MODE_BITSIZE (mode)
- - INTVAL (op1)),
- target, unsignedp, methods);
}
else if (unsignedp)
temp = expand_binop (mode,
return temp;
}
\f
-enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
- alg_add_t_m2, alg_sub_t_m2,
- alg_add_factor, alg_sub_factor,
- alg_add_t2_m, alg_sub_t2_m };
+enum alg_code {
+ alg_unknown,
+ alg_zero,
+ alg_m, alg_shift,
+ alg_add_t_m2,
+ alg_sub_t_m2,
+ alg_add_factor,
+ alg_sub_factor,
+ alg_add_t2_m,
+ alg_sub_t2_m,
+ alg_impossible
+};
/* This structure holds the "cost" of a multiply sequence. The
"cost" field holds the total rtx_cost of every operator in the
/* The best multiplication algorithm for t. */
enum alg_code alg;
+
+ /* The cost of multiplication if ALG_CODE is not alg_impossible.
+ Otherwise, the cost within which multiplication by T is
+ impossible. */
+ struct mult_cost cost;
};
/* The number of cache/hash entries. */
&& alg_hash[hash_index].mode == mode
&& alg_hash[hash_index].alg != alg_unknown)
{
- cache_hit = true;
cache_alg = alg_hash[hash_index].alg;
- switch (cache_alg)
+
+ if (cache_alg == alg_impossible)
{
- case alg_shift:
- goto do_alg_shift;
+ /* The cache tells us that it's impossible to synthesize
+ multiplication by T within alg_hash[hash_index].cost. */
+ if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
+ /* COST_LIMIT is at least as restrictive as the one
+ recorded in the hash table, in which case we have no
+ hope of synthesizing a multiplication. Just
+ return. */
+ return;
+
+ /* If we get here, COST_LIMIT is less restrictive than the
+ one recorded in the hash table, so we may be able to
+ synthesize a multiplication. Proceed as if we didn't
+ have the cache entry. */
+ }
+ else
+ {
+ if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
+ /* The cached algorithm shows that this multiplication
+ requires more cost than COST_LIMIT. Just return. This
+ way, we don't clobber this cache entry with
+ alg_impossible but retain useful information. */
+ return;
- case alg_add_t_m2:
- case alg_sub_t_m2:
- goto do_alg_addsub_t_m2;
+ cache_hit = true;
- case alg_add_factor:
- case alg_sub_factor:
- goto do_alg_addsub_factor;
+ switch (cache_alg)
+ {
+ case alg_shift:
+ goto do_alg_shift;
- case alg_add_t2_m:
- goto do_alg_add_t2_m;
+ case alg_add_t_m2:
+ case alg_sub_t_m2:
+ goto do_alg_addsub_t_m2;
- case alg_sub_t2_m:
- goto do_alg_sub_t2_m;
+ case alg_add_factor:
+ case alg_sub_factor:
+ goto do_alg_addsub_factor;
- default:
- gcc_unreachable ();
+ case alg_add_t2_m:
+ goto do_alg_add_t2_m;
+
+ case alg_sub_t2_m:
+ goto do_alg_sub_t2_m;
+
+ default:
+ gcc_unreachable ();
+ }
}
}
done:
/* If best_cost has not decreased, we have not found any algorithm. */
if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
- return;
+ {
+ /* We failed to find an algorithm. Record alg_impossible for
+ this case (that is, <T, MODE, COST_LIMIT>) so that next time
+ we are asked to find an algorithm for T within the same or
+ lower COST_LIMIT, we can immediately return to the
+ caller. */
+ alg_hash[hash_index].t = t;
+ alg_hash[hash_index].mode = mode;
+ alg_hash[hash_index].alg = alg_impossible;
+ alg_hash[hash_index].cost = *cost_limit;
+ return;
+ }
/* Cache the result. */
if (!cache_hit)
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
+ alg_hash[hash_index].cost.cost = best_cost.cost;
+ alg_hash[hash_index].cost.latency = best_cost.latency;
}
/* If we are getting a too long sequence for `struct algorithm'
struct mult_cost limit;
int op_cost;
+ /* Fail quickly for impossible bounds. */
+ if (mult_cost < 0)
+ return false;
+
+ /* Ensure that mult_cost provides a reasonable upper bound.
+ Any constant multiplication can be performed with less
+ than 2 * bits additions. */
+ op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
+ if (mult_cost > op_cost)
+ mult_cost = op_cost;
+
*variant = basic_variant;
limit.cost = mult_cost;
limit.latency = mult_cost;
&& (unsignedp || !flag_trapv))
{
HOST_WIDE_INT coeff = 0;
+ rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
/* synth_mult does an `unsigned int' multiply. As long as the mode is
less than or equal in size to `unsigned int' this doesn't matter.
&& GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
{
/* Its safe to use -INTVAL (op1) even for INT_MIN, as the
- result is interpreted as an unsigned coefficient. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET)
+ result is interpreted as an unsigned coefficient.
+ Exclude cost of op0 from max_cost to match the cost
+ calculation of the synth_mult. */
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
- neg_cost[mode];
if (max_cost > 0
&& choose_mult_variant (mode, -INTVAL (op1), &algorithm,
build_int_cst (NULL_TREE, floor_log2 (coeff)),
target, unsignedp);
- max_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
+ /* Exclude cost of op0 from max_cost to match the cost
+ calculation of the synth_mult. */
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
return expand_mult_const (mode, op0, coeff, target,
/* Expand x*2.0 as x+x. */
if (GET_CODE (op1) == CONST_DOUBLE
- && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ && SCALAR_FLOAT_MODE_P (mode))
{
REAL_VALUE_TYPE d;
REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
if (mode == word_mode)
return gen_highpart (mode, op);
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
wider_mode = GET_MODE_WIDER_MODE (mode);
op = expand_shift (RSHIFT_EXPR, wider_mode, op,
build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
rtx tem;
int size;
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
wider_mode = GET_MODE_WIDER_MODE (mode);
size = GET_MODE_BITSIZE (mode);
struct algorithm alg;
rtx tem;
+ gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* We can't support modes wider than HOST_BITS_PER_INT. */
gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
- max_cost = div_cost[compute_mode]
- - (rem_flag && ! (last_div_const != 0 && op1_is_constant
- && INTVAL (op1) == last_div_const)
- ? mul_cost[compute_mode] + add_cost[compute_mode]
- : 0);
+ max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
+ if (rem_flag && ! (last_div_const != 0 && op1_is_constant
+ && INTVAL (op1) == last_div_const))
+ max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
/* Note that we do *not* use SET_DECL_RTL here, because we do not
want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
- t->decl.rtl = x;
+ t->decl_with_rtl.rtl = x;
return t;
}