/* Medium-level subroutines: convert bit-field store and extract
and shifts, multiplies and divides to rtl instructions.
Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of GCC.
value = protect_from_queue (value, 0);
+ /* Use vec_extract patterns for extracting parts of vectors whenever
+ available. */
+ if (VECTOR_MODE_P (GET_MODE (op0))
+ && GET_CODE (op0) != MEM
+ && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
+ != CODE_FOR_nothing)
+ && fieldmode == GET_MODE_INNER (GET_MODE (op0))
+ && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+ && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+ {
+ enum machine_mode outermode = GET_MODE (op0);
+ enum machine_mode innermode = GET_MODE_INNER (outermode);
+ int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code;
+ int pos = bitnum / GET_MODE_BITSIZE (innermode);
+ rtx rtxpos = GEN_INT (pos);
+ rtx src = value;
+ rtx dest = op0;
+ rtx pat, seq;
+ enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+ start_sequence ();
+
+ if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+ src = copy_to_mode_reg (mode1, src);
+
+ if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+ rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+ /* We could handle this, but we should always be called with a pseudo
+ for our targets and all insns should take them as outputs. */
+ if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+ || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+ || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+ abort ();
+ pat = GEN_FCN (icode) (dest, src, rtxpos);
+ seq = get_insns ();
+ end_sequence ();
+ if (pat)
+ {
+ emit_insn (seq);
+ emit_insn (pat);
+ return dest;
+ }
+ }
+
if (flag_force_mem)
{
int old_generating_concat_p = generating_concat_p;
VOIDmode, because that is what store_field uses to indicate that this
is a bit field, but passing VOIDmode to operand_subword_force will
result in an abort. */
- fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
+ fieldmode = GET_MODE (value);
+ if (fieldmode == VOIDmode)
+ fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
for (i = 0; i < nwords; i++)
{
store_bit_field (op0, MIN (BITS_PER_WORD,
bitsize - i * BITS_PER_WORD),
bitnum + bit_offset, word_mode,
- operand_subword_force (value, wordnum,
- (GET_MODE (value) == VOIDmode
- ? fieldmode
- : GET_MODE (value))),
+ operand_subword_force (value, wordnum, fieldmode),
total_size);
}
return value;
return op0;
}
+ /* Use vec_extract patterns for extracting parts of vectors whenever
+ available. */
+ if (VECTOR_MODE_P (GET_MODE (op0))
+ && GET_CODE (op0) != MEM
+ && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
+ != CODE_FOR_nothing)
+ && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+ == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+ {
+ enum machine_mode outermode = GET_MODE (op0);
+ enum machine_mode innermode = GET_MODE_INNER (outermode);
+ int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code;
+ int pos = bitnum / GET_MODE_BITSIZE (innermode);
+ rtx rtxpos = GEN_INT (pos);
+ rtx src = op0;
+ rtx dest = NULL, pat, seq;
+ enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+ if (innermode == tmode || innermode == mode)
+ dest = target;
+
+ if (!dest)
+ dest = gen_reg_rtx (innermode);
+
+ start_sequence ();
+
+ if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
+ dest = copy_to_mode_reg (mode0, dest);
+
+ if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+ src = copy_to_mode_reg (mode1, src);
+
+ if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+ rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+ /* We could handle this, but we should always be called with a pseudo
+ for our targets and all insns should take them as outputs. */
+ if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+ || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+ || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+ abort ();
+ pat = GEN_FCN (icode) (dest, src, rtxpos);
+ seq = get_insns ();
+ end_sequence ();
+ if (pat)
+ {
+ emit_insn (seq);
+ emit_insn (pat);
+ return extract_bit_field (dest, bitsize,
+ bitnum - pos * GET_MODE_BITSIZE (innermode),
+ unsignedp, target, mode, tmode, total_size);
+ }
+ }
+
/* Make sure we are playing with integral modes. Pun with subregs
if we aren't. */
{
If that's wrong, the solution is to test for it and set TARGET to 0
if needed. */
- mode1 = (VECTOR_MODE_P (tmode)
- ? mode
- : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
+ /* Only scalar integer modes can be converted via subregs. There is an
+ additional problem for FP modes here in that they can have a precision
+ which is different from the size. mode_for_size uses precision, but
+ we want a mode based on the size, so we must avoid calling it for FP
+ modes. */
+ mode1 = (SCALAR_INT_MODE_P (tmode)
+ ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
+ : mode);
if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
&& bitpos % BITS_PER_WORD == 0)
- || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
+ || (mode1 != BLKmode
/* ??? The big endian test here is wrong. This is correct
if the value is in a register, and if mode_for_size is not
the same mode as op0. This causes us to get unnecessarily
op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
-#ifdef SHIFT_COUNT_TRUNCATED
if (SHIFT_COUNT_TRUNCATED)
{
if (GET_CODE (op1) == CONST_INT
&& subreg_lowpart_p (op1))
op1 = SUBREG_REG (op1);
}
-#endif
if (op1 == const0_rtx)
return shifted;
char log[MAX_BITS_PER_WORD];
};
+/* Indicates the type of fixup needed after a constant multiplication.
+ BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+ the result should be negated, and ADD_VARIANT means that the
+ multiplicand should be added to the result. */
+enum mult_variant {basic_variant, negate_variant, add_variant};
+
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+ struct algorithm *, enum mult_variant *, int);
+static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+ const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+static rtx extract_high_half (enum machine_mode, rtx);
+static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+ int, int);
/* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit
If retval.cost >= COST_LIMIT, no algorithm was found and all
/* We'll be needing a couple extra algorithm structures now. */
- alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
- best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
+ alg_in = alloca (sizeof (struct algorithm));
+ best_alg = alloca (sizeof (struct algorithm));
/* If we have a group of zero bits at the low-order part of T, try
multiplying by the remaining bits and then doing a shift. */
alg_out->ops * sizeof *alg_out->log);
}
\f
+/* Find the cheapest way of multiplying a value of mode MODE by VAL.
+ Try three variations:
+
+ - a shift/add sequence based on VAL itself
+ - a shift/add sequence based on -VAL, followed by a negation
+ - a shift/add sequence based on VAL - 1, followed by an addition.
+
+ Return true if the cheapest of these cost less than MULT_COST,
+ describing the algorithm in *ALG and final fixup in *VARIANT. */
+
+static bool
+choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+ struct algorithm *alg, enum mult_variant *variant,
+ int mult_cost)
+{
+ struct algorithm alg2;
+
+ *variant = basic_variant;
+ synth_mult (alg, val, mult_cost);
+
+ /* This works only if the inverted value actually fits in an
+ `unsigned int' */
+ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+ {
+ synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+ alg2.cost += negate_cost;
+ if (alg2.cost < alg->cost)
+ *alg = alg2, *variant = negate_variant;
+ }
+
+ /* This proves very useful for division-by-constant. */
+ synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+ alg2.cost += add_cost;
+ if (alg2.cost < alg->cost)
+ *alg = alg2, *variant = add_variant;
+
+ return alg->cost < mult_cost;
+}
+
+/* A subroutine of expand_mult, used for constant multiplications.
+ Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+ convenient. Use the shift/add sequence described by ALG and apply
+ the final fixup specified by VARIANT. */
+
+static rtx
+expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+ rtx target, const struct algorithm *alg,
+ enum mult_variant variant)
+{
+ HOST_WIDE_INT val_so_far;
+ rtx insn, accum, tem;
+ int opno;
+ enum machine_mode nmode;
+
+ /* op0 must be register to make mult_cost match the precomputed
+ shiftadd_cost array. */
+ op0 = protect_from_queue (op0, 0);
+
+ /* Avoid referencing memory over and over.
+ For speed, but also for correctness when mem is volatile. */
+ if (GET_CODE (op0) == MEM)
+ op0 = force_reg (mode, op0);
+
+ /* ACCUM starts out either as OP0 or as a zero, depending on
+ the first operation. */
+
+ if (alg->op[0] == alg_zero)
+ {
+ accum = copy_to_mode_reg (mode, const0_rtx);
+ val_so_far = 0;
+ }
+ else if (alg->op[0] == alg_m)
+ {
+ accum = copy_to_mode_reg (mode, op0);
+ val_so_far = 1;
+ }
+ else
+ abort ();
+
+ for (opno = 1; opno < alg->ops; opno++)
+ {
+ int log = alg->log[opno];
+ int preserve = preserve_subexpressions_p ();
+ rtx shift_subtarget = preserve ? 0 : accum;
+ rtx add_target
+ = (opno == alg->ops - 1 && target != 0 && variant != add_variant
+ && ! preserve)
+ ? target : 0;
+ rtx accum_target = preserve ? 0 : accum;
+
+ switch (alg->op[opno])
+ {
+ case alg_shift:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ val_so_far <<= log;
+ break;
+
+ case alg_add_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far += (HOST_WIDE_INT) 1 << log;
+ break;
+
+ case alg_sub_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far -= (HOST_WIDE_INT) 1 << log;
+ break;
+
+ case alg_add_t2_m:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), shift_subtarget,
+ 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+ add_target ? add_target : accum_target);
+ val_so_far = (val_so_far << log) + 1;
+ break;
+
+ case alg_sub_t2_m:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), shift_subtarget, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+ add_target ? add_target : accum_target);
+ val_so_far = (val_so_far << log) - 1;
+ break;
+
+ case alg_add_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far += val_so_far << log;
+ break;
+
+ case alg_sub_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+ (add_target ? add_target
+ : preserve ? 0 : tem));
+ val_so_far = (val_so_far << log) - val_so_far;
+ break;
+
+ default:
+ abort ();
+ }
+
+ /* Write a REG_EQUAL note on the last insn so that we can cse
+ multiplication sequences. Note that if ACCUM is a SUBREG,
+ we've set the inner register and must properly indicate
+ that. */
+
+ tem = op0, nmode = mode;
+ if (GET_CODE (accum) == SUBREG)
+ {
+ nmode = GET_MODE (SUBREG_REG (accum));
+ tem = gen_lowpart (nmode, op0);
+ }
+
+ insn = get_last_insn ();
+ set_unique_reg_note (insn, REG_EQUAL,
+ gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+ }
+
+ if (variant == negate_variant)
+ {
+ val_so_far = -val_so_far;
+ accum = expand_unop (mode, neg_optab, accum, target, 0);
+ }
+ else if (variant == add_variant)
+ {
+ val_so_far = val_so_far + 1;
+ accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+ }
+
+ if (val != val_so_far)
+ abort ();
+
+ return accum;
+}
+
/* Perform a multiplication and return an rtx for the result.
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
TARGET is a suggestion for where to store the result (an rtx).
you should swap the two operands if OP0 would be constant. */
rtx
-expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp)
+expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+ int unsignedp)
{
rtx const_op1 = op1;
+ enum mult_variant variant;
+ struct algorithm algorithm;
/* synth_mult does an `unsigned int' multiply. As long as the mode is
less than or equal in size to `unsigned int' this doesn't matter.
that it seems better to use synth_mult always. */
if (const_op1 && GET_CODE (const_op1) == CONST_INT
- && (unsignedp || ! flag_trapv))
+ && (unsignedp || !flag_trapv))
{
- struct algorithm alg;
- struct algorithm alg2;
- HOST_WIDE_INT val = INTVAL (op1);
- HOST_WIDE_INT val_so_far;
- rtx insn;
- int mult_cost;
- enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
-
- /* op0 must be register to make mult_cost match the precomputed
- shiftadd_cost array. */
- op0 = force_reg (mode, op0);
-
- /* Try to do the computation three ways: multiply by the negative of OP1
- and then negate, do the multiplication directly, or do multiplication
- by OP1 - 1. */
-
- mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
+ int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
mult_cost = MIN (12 * add_cost, mult_cost);
- synth_mult (&alg, val, mult_cost);
+ if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
+ mult_cost))
+ return expand_mult_const (mode, op0, INTVAL (const_op1), target,
+ &algorithm, variant);
+ }
- /* This works only if the inverted value actually fits in an
- `unsigned int' */
- if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
- {
- synth_mult (&alg2, - val,
- (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
- if (alg2.cost + negate_cost < alg.cost)
- alg = alg2, variant = negate_variant;
- }
+ if (GET_CODE (op0) == CONST_DOUBLE)
+ {
+ rtx temp = op0;
+ op0 = op1;
+ op1 = temp;
+ }
- /* This proves very useful for division-by-constant. */
- synth_mult (&alg2, val - 1,
- (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
- if (alg2.cost + add_cost < alg.cost)
- alg = alg2, variant = add_variant;
+ /* Expand x*2.0 as x+x. */
+ if (GET_CODE (op1) == CONST_DOUBLE
+ && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ REAL_VALUE_TYPE d;
+ REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
- if (alg.cost < mult_cost)
+ if (REAL_VALUES_EQUAL (d, dconst2))
{
- /* We found something cheaper than a multiply insn. */
- int opno;
- rtx accum, tem;
- enum machine_mode nmode;
-
- op0 = protect_from_queue (op0, 0);
-
- /* Avoid referencing memory over and over.
- For speed, but also for correctness when mem is volatile. */
- if (GET_CODE (op0) == MEM)
- op0 = force_reg (mode, op0);
-
- /* ACCUM starts out either as OP0 or as a zero, depending on
- the first operation. */
-
- if (alg.op[0] == alg_zero)
- {
- accum = copy_to_mode_reg (mode, const0_rtx);
- val_so_far = 0;
- }
- else if (alg.op[0] == alg_m)
- {
- accum = copy_to_mode_reg (mode, op0);
- val_so_far = 1;
- }
- else
- abort ();
-
- for (opno = 1; opno < alg.ops; opno++)
- {
- int log = alg.log[opno];
- int preserve = preserve_subexpressions_p ();
- rtx shift_subtarget = preserve ? 0 : accum;
- rtx add_target
- = (opno == alg.ops - 1 && target != 0 && variant != add_variant
- && ! preserve)
- ? target : 0;
- rtx accum_target = preserve ? 0 : accum;
-
- switch (alg.op[opno])
- {
- case alg_shift:
- accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), NULL_RTX, 0);
- val_so_far <<= log;
- break;
-
- case alg_add_t_m2:
- tem = expand_shift (LSHIFT_EXPR, mode, op0,
- build_int_2 (log, 0), NULL_RTX, 0);
- accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
- add_target
- ? add_target : accum_target);
- val_so_far += (HOST_WIDE_INT) 1 << log;
- break;
-
- case alg_sub_t_m2:
- tem = expand_shift (LSHIFT_EXPR, mode, op0,
- build_int_2 (log, 0), NULL_RTX, 0);
- accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
- add_target
- ? add_target : accum_target);
- val_so_far -= (HOST_WIDE_INT) 1 << log;
- break;
-
- case alg_add_t2_m:
- accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), shift_subtarget,
- 0);
- accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
- add_target
- ? add_target : accum_target);
- val_so_far = (val_so_far << log) + 1;
- break;
-
- case alg_sub_t2_m:
- accum = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), shift_subtarget,
- 0);
- accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
- add_target
- ? add_target : accum_target);
- val_so_far = (val_so_far << log) - 1;
- break;
-
- case alg_add_factor:
- tem = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), NULL_RTX, 0);
- accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
- add_target
- ? add_target : accum_target);
- val_so_far += val_so_far << log;
- break;
-
- case alg_sub_factor:
- tem = expand_shift (LSHIFT_EXPR, mode, accum,
- build_int_2 (log, 0), NULL_RTX, 0);
- accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
- (add_target ? add_target
- : preserve ? 0 : tem));
- val_so_far = (val_so_far << log) - val_so_far;
- break;
-
- default:
- abort ();
- }
-
- /* Write a REG_EQUAL note on the last insn so that we can cse
- multiplication sequences. Note that if ACCUM is a SUBREG,
- we've set the inner register and must properly indicate
- that. */
-
- tem = op0, nmode = mode;
- if (GET_CODE (accum) == SUBREG)
- {
- nmode = GET_MODE (SUBREG_REG (accum));
- tem = gen_lowpart (nmode, op0);
- }
-
- insn = get_last_insn ();
- set_unique_reg_note (insn,
- REG_EQUAL,
- gen_rtx_MULT (nmode, tem,
- GEN_INT (val_so_far)));
- }
-
- if (variant == negate_variant)
- {
- val_so_far = - val_so_far;
- accum = expand_unop (mode, neg_optab, accum, target, 0);
- }
- else if (variant == add_variant)
- {
- val_so_far = val_so_far + 1;
- accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
- }
-
- if (val != val_so_far)
- abort ();
-
- return accum;
+ op0 = force_reg (GET_MODE (op0), op0);
+ return expand_binop (mode, add_optab, op0, op0,
+ target, unsignedp, OPTAB_LIB_WIDEN);
}
}
abort ();
if (mhigh_hi > 1 || mlow_hi > 1)
abort ();
- /* assert that mlow < mhigh. */
+ /* Assert that mlow < mhigh. */
if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
abort ();
return target;
}
-/* Emit code to multiply OP0 and CNST1, putting the high half of the result
- in TARGET if that is convenient, and return where the result is. If the
- operation can not be performed, 0 is returned.
+/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
- MODE is the mode of operation and result.
+static rtx
+extract_high_half (enum machine_mode mode, rtx op)
+{
+ enum machine_mode wider_mode;
- UNSIGNEDP nonzero means unsigned multiply.
+ if (mode == word_mode)
+ return gen_highpart (mode, op);
- MAX_COST is the total allowed cost for the expanded RTL. */
+ wider_mode = GET_MODE_WIDER_MODE (mode);
+ op = expand_shift (RSHIFT_EXPR, wider_mode, op,
+ build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
+ return convert_modes (mode, wider_mode, op, 0);
+}
-rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0,
- unsigned HOST_WIDE_INT cnst1, rtx target,
- int unsignedp, int max_cost)
+/* Like expand_mult_highpart, but only consider using a multiplication
+ optab. OP1 is an rtx for the constant operand. */
+
+static rtx
+expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+ rtx target, int unsignedp, int max_cost)
{
- enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
- optab mul_highpart_optab;
+ enum machine_mode wider_mode;
optab moptab;
rtx tem;
- int size = GET_MODE_BITSIZE (mode);
- rtx op1, wide_op1;
-
- /* We can't support modes wider than HOST_BITS_PER_INT. */
- if (size > HOST_BITS_PER_WIDE_INT)
- abort ();
-
- op1 = gen_int_mode (cnst1, mode);
-
- wide_op1
- = immed_double_const (cnst1,
- (unsignedp
- ? (HOST_WIDE_INT) 0
- : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
- wider_mode);
-
- /* expand_mult handles constant multiplication of word_mode
- or narrower. It does a poor job for large modes. */
- if (size < BITS_PER_WORD
- && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
- {
- /* We have to do this, since expand_binop doesn't do conversion for
- multiply. Maybe change expand_binop to handle widening multiply? */
- op0 = convert_to_mode (wider_mode, op0, unsignedp);
-
- /* We know that this can't have signed overflow, so pretend this is
- an unsigned multiply. */
- tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
- tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
- build_int_2 (size, 0), NULL_RTX, 1);
- return convert_modes (mode, wider_mode, tem, unsignedp);
- }
+ int size;
- if (target == 0)
- target = gen_reg_rtx (mode);
+ wider_mode = GET_MODE_WIDER_MODE (mode);
+ size = GET_MODE_BITSIZE (mode);
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
{
- mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
- target = expand_binop (mode, mul_highpart_optab,
- op0, op1, target, unsignedp, OPTAB_DIRECT);
- if (target)
- return target;
+ moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+ tem = expand_binop (mode, moptab, op0, op1, target,
+ unsignedp, OPTAB_DIRECT);
+ if (tem)
+ return tem;
}
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
< max_cost))
{
- mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
- target = expand_binop (mode, mul_highpart_optab,
- op0, op1, target, unsignedp, OPTAB_DIRECT);
- if (target)
+ moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+ tem = expand_binop (mode, moptab, op0, op1, target,
+ unsignedp, OPTAB_DIRECT);
+ if (tem)
/* We used the wrong signedness. Adjust the result. */
- return expand_mult_highpart_adjust (mode, target, op0,
- op1, target, unsignedp);
+ return expand_mult_highpart_adjust (mode, tem, op0, op1,
+ tem, unsignedp);
}
/* Try widening multiplication. */
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[(int) wider_mode] < max_cost)
{
- op1 = force_reg (mode, op1);
- goto try;
+ tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+ unsignedp, OPTAB_WIDEN);
+ if (tem)
+ return extract_high_half (mode, tem);
}
/* Try widening the mode and perform a non-widening multiplication. */
&& size - 1 < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
- op1 = wide_op1;
- goto try;
+ tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+ unsignedp, OPTAB_WIDEN);
+ if (tem)
+ return extract_high_half (mode, tem);
}
/* Try widening multiplication of opposite signedness, and adjust. */
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
if (tem != 0)
{
- /* Extract the high half of the just generated product. */
- tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
- build_int_2 (size, 0), NULL_RTX, 1);
- tem = convert_modes (mode, wider_mode, tem, unsignedp);
+ tem = extract_high_half (mode, tem);
/* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, tem, op0, op1,
target, unsignedp);
}
return 0;
+}
- try:
- /* Pass NULL_RTX as target since TARGET has wrong mode. */
- tem = expand_binop (wider_mode, moptab, op0, op1,
- NULL_RTX, unsignedp, OPTAB_WIDEN);
- if (tem == 0)
- return 0;
+/* Emit code to multiply OP0 and CNST1, putting the high half of the result
+ in TARGET if that is convenient, and return where the result is. If the
+ operation can not be performed, 0 is returned.
- /* Extract the high half of the just generated product. */
- if (mode == word_mode)
+ MODE is the mode of operation and result.
+
+ UNSIGNEDP nonzero means unsigned multiply.
+
+ MAX_COST is the total allowed cost for the expanded RTL. */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0,
+ unsigned HOST_WIDE_INT cnst1, rtx target,
+ int unsignedp, int max_cost)
+{
+ enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
+ int extra_cost;
+ bool sign_adjust = false;
+ enum mult_variant variant;
+ struct algorithm alg;
+ rtx op1, tem;
+
+ /* We can't support modes wider than HOST_BITS_PER_INT. */
+ if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
+ abort ();
+
+ op1 = gen_int_mode (cnst1, mode);
+ cnst1 &= GET_MODE_MASK (mode);
+
+ /* We can't optimize modes wider than BITS_PER_WORD.
+ ??? We might be able to perform double-word arithmetic if
+ mode == word_mode, however all the cost calculations in
+ synth_mult etc. assume single-word operations. */
+ if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
+ return expand_mult_highpart_optab (mode, op0, op1, target,
+ unsignedp, max_cost);
+
+ extra_cost = shift_cost[GET_MODE_BITSIZE (mode) - 1];
+
+ /* Check whether we try to multiply by a negative constant. */
+ if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
- return gen_highpart (mode, tem);
+ sign_adjust = true;
+ extra_cost += add_cost;
}
- else
+
+ /* See whether shift/add multiplication is cheap enough. */
+ if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
+ max_cost - extra_cost))
{
- tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
- build_int_2 (size, 0), NULL_RTX, 1);
- return convert_modes (mode, wider_mode, tem, unsignedp);
+ /* See whether the specialized multiplication optabs are
+ cheaper than the shift/add version. */
+ tem = expand_mult_highpart_optab (mode, op0, op1, target,
+ unsignedp, alg.cost + extra_cost);
+ if (tem)
+ return tem;
+
+ tem = convert_to_mode (wider_mode, op0, unsignedp);
+ tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
+ tem = extract_high_half (mode, tem);
+
+ /* Adjust result for signedness. */
+ if (sign_adjust)
+ tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
+
+ return tem;
}
+ return expand_mult_highpart_optab (mode, op0, op1, target,
+ unsignedp, max_cost);
}
\f
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
quotient = expand_mult (compute_mode, t1,
gen_int_mode (ml, compute_mode),
- NULL_RTX, 0);
+ NULL_RTX, 1);
insn = get_last_insn ();
set_unique_reg_note (insn,
{
case CONST_INT:
t = build_int_2 (INTVAL (x),
- (TREE_UNSIGNED (type)
- && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
+ (TYPE_UNSIGNED (type)
+ && (GET_MODE_BITSIZE (TYPE_MODE (type))
+ < HOST_BITS_PER_WIDE_INT))
|| INTVAL (x) >= 0 ? 0 : -1);
TREE_TYPE (t) = type;
return t;
make_tree (type, XEXP (x, 1))));
case LSHIFTRT:
- t = (*lang_hooks.types.unsigned_type) (type);
+ t = lang_hooks.types.unsigned_type (type);
return fold (convert (type,
build (RSHIFT_EXPR, t,
make_tree (t, XEXP (x, 0)),
make_tree (type, XEXP (x, 1)))));
case ASHIFTRT:
- t = (*lang_hooks.types.signed_type) (type);
+ t = lang_hooks.types.signed_type (type);
return fold (convert (type,
build (RSHIFT_EXPR, t,
make_tree (t, XEXP (x, 0)),
case DIV:
if (TREE_CODE (type) != REAL_TYPE)
- t = (*lang_hooks.types.signed_type) (type);
+ t = lang_hooks.types.signed_type (type);
else
t = type;
make_tree (t, XEXP (x, 0)),
make_tree (t, XEXP (x, 1)))));
case UDIV:
- t = (*lang_hooks.types.unsigned_type) (type);
+ t = lang_hooks.types.unsigned_type (type);
return fold (convert (type,
build (TRUNC_DIV_EXPR, t,
make_tree (t, XEXP (x, 0)),
case SIGN_EXTEND:
case ZERO_EXTEND:
- t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
- GET_CODE (x) == ZERO_EXTEND);
+ t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
+ GET_CODE (x) == ZERO_EXTEND);
return fold (convert (type, make_tree (t, XEXP (x, 0))));
default:
t = make_node (RTL_EXPR);
TREE_TYPE (t) = type;
-#ifdef POINTERS_EXTEND_UNSIGNED
/* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
ptr_mode. So convert. */
- if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
+ if (POINTER_TYPE_P (type))
x = convert_memory_address (TYPE_MODE (type), x);
-#endif
RTL_EXPR_RTL (t) = x;
/* There are no insns to be output
{
tree type, mult_type, add_type, result;
- type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
+ type = lang_hooks.types.type_for_mode (mode, unsignedp);
/* In order to get a proper overflow indication from an unsigned
type, we have to pretend that it's a sizetype. */
}
add_type = (GET_MODE (add) == VOIDmode ? mult_type
- : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
+ : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
result = fold (build (PLUS_EXPR, mult_type,
fold (build (MULT_EXPR, mult_type,
expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
int unsignedp)
{
- tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
+ tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
tree add_type = (GET_MODE (add) == VOIDmode
- ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
- unsignedp));
+ ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
+ unsignedp));
tree result = fold (build (PLUS_EXPR, type,
fold (build (MULT_EXPR, type,
make_tree (type, x),