+2010-04-22 Bernd Schmidt <bernds@codesourcery.com>
+
+ PR middle-end/29274
+ * optabs.h (expand_widening_mult): Declare.
+ * tree-pass.h (pass_optimize_widening_mul): Declare.
+ * tree-ssa-math-opts.c (execute_optimize_widening_mul,
+ gate_optimize_widening_mul): New static functions.
+ (pass_optimize_widening_mul): New.
+ * expr.c (expand_expr_real_2) <case WIDEN_MULT_EXPR>: New
+ case.
+ <case MULT_EXPR>: Remove support for widening multiplies.
+ * tree.def (WIDEN_MULT_EXPR): Tweak comment.
+ * cfgexpand.c (expand_debug_expr) <case WIDEN_MULT_EXPR>: Use
+ simplify_gen_unary rather than directly building extensions.
+ * tree-cfg.c (verify_gimple_assign_binary): Add tests for
+ WIDEN_MULT_EXPR.
+ * expmed.c (expand_widening_mult): New function.
+ * passes.c (init_optimization_passes): Add pass_optimize_widening_mul.
+
2010-04-21 Jan Hubicka <jh@suse.cz>
* timevar.def (TV_WHOPR_WPA_FIXUP): Remove.
if (SCALAR_INT_MODE_P (GET_MODE (op0))
&& SCALAR_INT_MODE_P (mode))
{
+ enum machine_mode inner_mode = GET_MODE (op0);
if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 0))))
- op0 = gen_rtx_ZERO_EXTEND (mode, op0);
+ op0 = simplify_gen_unary (ZERO_EXTEND, mode, op0, inner_mode);
else
- op0 = gen_rtx_SIGN_EXTEND (mode, op0);
+ op0 = simplify_gen_unary (SIGN_EXTEND, mode, op0, inner_mode);
if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 1))))
- op1 = gen_rtx_ZERO_EXTEND (mode, op1);
+ op1 = simplify_gen_unary (ZERO_EXTEND, mode, op1, inner_mode);
else
- op1 = gen_rtx_SIGN_EXTEND (mode, op1);
+ op1 = simplify_gen_unary (SIGN_EXTEND, mode, op1, inner_mode);
return gen_rtx_MULT (mode, op0, op1);
}
return NULL;
gcc_assert (op0);
return op0;
}
+
+/* Perform a widening multiplication and return an rtx for the result.
+ MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
+ TARGET is a suggestion for where to store the result (an rtx).
+ THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
+ or smul_widen_optab.
+
+ We check specially for a constant integer as OP1, comparing the
+ cost of a widening multiply against the cost of a sequence of shifts
+ and adds. */
+
+rtx
+expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+ int unsignedp, optab this_optab)
+{
+ bool speed = optimize_insn_for_speed_p ();
+
+ if (CONST_INT_P (op1)
+ && (INTVAL (op1) >= 0
+ || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
+ {
+ HOST_WIDE_INT coeff = INTVAL (op1);
+ int max_cost;
+ enum mult_variant variant;
+ struct algorithm algorithm;
+
+ /* Special case powers of two. */
+ if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
+ {
+ op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+ return expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_cst (NULL_TREE, floor_log2 (coeff)),
+ target, unsignedp);
+ }
+
+ /* Exclude cost of op0 from max_cost to match the cost
+ calculation of the synth_mult. */
+ max_cost = mul_widen_cost[speed][mode];
+ if (choose_mult_variant (mode, coeff, &algorithm, &variant,
+ max_cost))
+ {
+ op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+ return expand_mult_const (mode, op0, coeff, target,
+ &algorithm, variant);
+ }
+ }
+ return expand_binop (mode, this_optab, op0, op1, target,
+ unsignedp, OPTAB_LIB_WIDEN);
+}
\f
/* Return the smallest n such that 2**n >= X. */
optab this_optab;
rtx subtarget, original_target;
int ignore;
- tree subexp0, subexp1;
bool reduce_bit_field;
gimple subexp0_def, subexp1_def;
tree top0, top1;
goto binop2;
- case MULT_EXPR:
- /* If this is a fixed-point operation, then we cannot use the code
- below because "expand_mult" doesn't support sat/no-sat fixed-point
- multiplications. */
- if (ALL_FIXED_POINT_MODE_P (mode))
- goto binop;
-
+ case WIDEN_MULT_EXPR:
/* If first operand is constant, swap them.
Thus the following special case checks need only
check the second operand. */
treeop1 = t1;
}
- /* Attempt to return something suitable for generating an
- indexed address, for machines that support that. */
-
- if (modifier == EXPAND_SUM && mode == ptr_mode
- && host_integerp (treeop1, 0))
- {
- tree exp1 = treeop1;
-
- op0 = expand_expr (treeop0, subtarget, VOIDmode,
- EXPAND_SUM);
-
- if (!REG_P (op0))
- op0 = force_operand (op0, NULL_RTX);
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (mode, op0);
-
- return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
- gen_int_mode (tree_low_cst (exp1, 0),
- TYPE_MODE (TREE_TYPE (exp1)))));
- }
-
- if (modifier == EXPAND_STACK_PARM)
- target = 0;
-
- /* Check for multiplying things that have been extended
- from a narrower type. If this machine supports multiplying
- in that narrower type with a result in the desired type,
- do it that way, and avoid the explicit type-conversion. */
-
- subexp0 = treeop0;
- subexp1 = treeop1;
- subexp0_def = get_def_for_expr (subexp0, NOP_EXPR);
- subexp1_def = get_def_for_expr (subexp1, NOP_EXPR);
- top0 = top1 = NULL_TREE;
-
/* First, check if we have a multiplication of one signed and one
unsigned operand. */
- if (subexp0_def
- && (top0 = gimple_assign_rhs1 (subexp0_def))
- && subexp1_def
- && (top1 = gimple_assign_rhs1 (subexp1_def))
- && TREE_CODE (type) == INTEGER_TYPE
- && (TYPE_PRECISION (TREE_TYPE (top0))
- < TYPE_PRECISION (TREE_TYPE (subexp0)))
- && (TYPE_PRECISION (TREE_TYPE (top0))
- == TYPE_PRECISION (TREE_TYPE (top1)))
- && (TYPE_UNSIGNED (TREE_TYPE (top0))
- != TYPE_UNSIGNED (TREE_TYPE (top1))))
+ if (TREE_CODE (treeop1) != INTEGER_CST
+ && (TYPE_UNSIGNED (TREE_TYPE (treeop0))
+ != TYPE_UNSIGNED (TREE_TYPE (treeop1))))
{
- enum machine_mode innermode
- = TYPE_MODE (TREE_TYPE (top0));
+ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
this_optab = usmul_widen_optab;
- if (mode == GET_MODE_WIDER_MODE (innermode))
+ if (mode == GET_MODE_2XWIDER_MODE (innermode))
{
if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
{
- if (TYPE_UNSIGNED (TREE_TYPE (top0)))
- expand_operands (top0, top1, NULL_RTX, &op0, &op1,
+ if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
EXPAND_NORMAL);
else
- expand_operands (top0, top1, NULL_RTX, &op1, &op0,
+ expand_operands (treeop0, treeop1, subtarget, &op1, &op0,
EXPAND_NORMAL);
-
goto binop3;
}
}
}
- /* Check for a multiplication with matching signedness. If
- valid, TOP0 and TOP1 were set in the previous if
- condition. */
- else if (top0
- && TREE_CODE (type) == INTEGER_TYPE
- && (TYPE_PRECISION (TREE_TYPE (top0))
- < TYPE_PRECISION (TREE_TYPE (subexp0)))
- && ((TREE_CODE (subexp1) == INTEGER_CST
- && int_fits_type_p (subexp1, TREE_TYPE (top0))
- /* Don't use a widening multiply if a shift will do. */
- && ((GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (subexp1)))
- > HOST_BITS_PER_WIDE_INT)
- || exact_log2 (TREE_INT_CST_LOW (subexp1)) < 0))
- ||
- (top1
- && (TYPE_PRECISION (TREE_TYPE (top1))
- == TYPE_PRECISION (TREE_TYPE (top0))
- /* If both operands are extended, they must either both
- be zero-extended or both be sign-extended. */
- && (TYPE_UNSIGNED (TREE_TYPE (top1))
- == TYPE_UNSIGNED (TREE_TYPE (top0)))))))
+ /* Check for a multiplication with matching signedness. */
+ else if ((TREE_CODE (treeop1) == INTEGER_CST
+ && int_fits_type_p (treeop1, TREE_TYPE (treeop0)))
+ || (TYPE_UNSIGNED (TREE_TYPE (treeop1))
+ == TYPE_UNSIGNED (TREE_TYPE (treeop0))))
{
- tree op0type = TREE_TYPE (top0);
+ tree op0type = TREE_TYPE (treeop0);
enum machine_mode innermode = TYPE_MODE (op0type);
bool zextend_p = TYPE_UNSIGNED (op0type);
optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
{
if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
{
- if (TREE_CODE (subexp1) == INTEGER_CST)
- expand_operands (top0, subexp1, NULL_RTX, &op0, &op1,
- EXPAND_NORMAL);
- else
- expand_operands (top0, top1, NULL_RTX, &op0, &op1,
- EXPAND_NORMAL);
- goto binop3;
+ expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+ EXPAND_NORMAL);
+ temp = expand_widening_mult (mode, op0, op1, target,
+ unsignedp, this_optab);
+ return REDUCE_BIT_FIELD (temp);
}
- else if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
- && innermode == word_mode)
+ if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
+ && innermode == word_mode)
{
rtx htem, hipart;
- op0 = expand_normal (top0);
- if (TREE_CODE (subexp1) == INTEGER_CST)
+ op0 = expand_normal (treeop0);
+ if (TREE_CODE (treeop1) == INTEGER_CST)
op1 = convert_modes (innermode, mode,
- expand_normal (subexp1), unsignedp);
+ expand_normal (treeop1), unsignedp);
else
- op1 = expand_normal (top1);
+ op1 = expand_normal (treeop1);
temp = expand_binop (mode, other_optab, op0, op1, target,
unsignedp, OPTAB_LIB_WIDEN);
hipart = gen_highpart (innermode, temp);
}
}
}
- expand_operands (subexp0, subexp1, subtarget, &op0, &op1, EXPAND_NORMAL);
+ treeop0 = fold_build1 (CONVERT_EXPR, type, treeop0);
+ treeop1 = fold_build1 (CONVERT_EXPR, type, treeop1);
+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+ return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
+
+ case MULT_EXPR:
+ /* If this is a fixed-point operation, then we cannot use the code
+ below because "expand_mult" doesn't support sat/no-sat fixed-point
+ multiplications. */
+ if (ALL_FIXED_POINT_MODE_P (mode))
+ goto binop;
+
+ /* If first operand is constant, swap them.
+ Thus the following special case checks need only
+ check the second operand. */
+ if (TREE_CODE (treeop0) == INTEGER_CST)
+ {
+ tree t1 = treeop0;
+ treeop0 = treeop1;
+ treeop1 = t1;
+ }
+
+ /* Attempt to return something suitable for generating an
+ indexed address, for machines that support that. */
+
+ if (modifier == EXPAND_SUM && mode == ptr_mode
+ && host_integerp (treeop1, 0))
+ {
+ tree exp1 = treeop1;
+
+ op0 = expand_expr (treeop0, subtarget, VOIDmode,
+ EXPAND_SUM);
+
+ if (!REG_P (op0))
+ op0 = force_operand (op0, NULL_RTX);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (mode, op0);
+
+ return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
+ gen_int_mode (tree_low_cst (exp1, 0),
+ TYPE_MODE (TREE_TYPE (exp1)))));
+ }
+
+ if (modifier == EXPAND_STACK_PARM)
+ target = 0;
+
+ expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
case TRUNC_DIV_EXPR:
NEXT_PASS (pass_forwprop);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_fold_builtins);
+ NEXT_PASS (pass_optimize_widening_mul);
NEXT_PASS (pass_tail_calls);
NEXT_PASS (pass_rename_ssa_copies);
NEXT_PASS (pass_uncprop);
+2010-04-22 Bernd Schmidt <bernds@codesourcery.com>
+
+ PR middle-end/29274
+ * gcc.target/i386/wmul-1.c: New test.
+ * gcc.target/i386/wmul-2.c: New test.
+ * gcc.target/bfin/wmul-1.c: New test.
+ * gcc.target/bfin/wmul-2.c: New test.
+
2010-04-22 Richard Guenther <rguenther@suse.de>
PR fortran/43829
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int mac(const short *a, const short *b, int sqr, int *sum)
+{
+ int i;
+ int dotp = *sum;
+
+ for (i = 0; i < 150; i++) {
+ dotp += b[i] * a[i];
+ sqr += b[i] * b[i];
+ }
+
+ *sum = dotp;
+ return sqr;
+}
+
+/* { dg-final { scan-assembler-times "\\(IS\\)" 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void vec_mpy(int y[], const short x[], short scaler)
+{
+ int i;
+
+ for (i = 0; i < 150; i++)
+ y[i] += ((scaler * x[i]) >> 31);
+}
+
+/* { dg-final { scan-assembler-times "\\(IS\\)" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long mac(const int *a, const int *b, long long sqr, long long *sum)
+{
+ int i;
+ long long dotp = *sum;
+
+ for (i = 0; i < 150; i++) {
+ dotp += (long long)b[i] * a[i];
+ sqr += (long long)b[i] * b[i];
+ }
+
+ *sum = dotp;
+ return sqr;
+}
+
+/* { dg-final { scan-assembler-times "imull" 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void vec_mpy(int y[], const int x[], int scaler)
+{
+ int i;
+
+ for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * x[i]) >> 31);
+}
+
+/* { dg-final { scan-assembler-times "imull" 1 } } */
connected to the operand types. */
return verify_gimple_comparison (lhs_type, rhs1, rhs2);
- case WIDEN_SUM_EXPR:
case WIDEN_MULT_EXPR:
+ if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+ return true;
+ return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
+ || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
+
+ case WIDEN_SUM_EXPR:
case VEC_WIDEN_MULT_HI_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
extern struct gimple_opt_pass pass_cse_reciprocals;
extern struct gimple_opt_pass pass_cse_sincos;
extern struct gimple_opt_pass pass_optimize_bswap;
+extern struct gimple_opt_pass pass_optimize_widening_mul;
extern struct gimple_opt_pass pass_warn_function_return;
extern struct gimple_opt_pass pass_warn_function_noreturn;
extern struct gimple_opt_pass pass_cselim;
0 /* todo_flags_finish */
}
};
+
+/* Find integer multiplications where the operands are extended from
+ smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
+ where appropriate. */
+
+static unsigned int
+execute_optimize_widening_mul (void)
+{
+ bool changed = false;
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ {
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
+ tree type, type1 = NULL, type2 = NULL;
+ tree rhs1, rhs2, rhs1_convop = NULL, rhs2_convop = NULL;
+ enum tree_code rhs1_code, rhs2_code;
+
+ if (!is_gimple_assign (stmt)
+ || gimple_assign_rhs_code (stmt) != MULT_EXPR)
+ continue;
+
+ type = TREE_TYPE (gimple_assign_lhs (stmt));
+
+ if (TREE_CODE (type) != INTEGER_TYPE)
+ continue;
+
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+
+ if (TREE_CODE (rhs1) == SSA_NAME)
+ {
+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+ if (!is_gimple_assign (rhs1_stmt))
+ continue;
+ rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+ if (!CONVERT_EXPR_CODE_P (rhs1_code))
+ continue;
+ rhs1_convop = gimple_assign_rhs1 (rhs1_stmt);
+ type1 = TREE_TYPE (rhs1_convop);
+ if (TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+ continue;
+ }
+ else if (TREE_CODE (rhs1) != INTEGER_CST)
+ continue;
+
+ if (TREE_CODE (rhs2) == SSA_NAME)
+ {
+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+ if (!is_gimple_assign (rhs2_stmt))
+ continue;
+ rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+ if (!CONVERT_EXPR_CODE_P (rhs2_code))
+ continue;
+ rhs2_convop = gimple_assign_rhs1 (rhs2_stmt);
+ type2 = TREE_TYPE (rhs2_convop);
+ if (TYPE_PRECISION (type2) * 2 != TYPE_PRECISION (type))
+ continue;
+ }
+ else if (TREE_CODE (rhs2) != INTEGER_CST)
+ continue;
+
+ if (rhs1_stmt == NULL && rhs2_stmt == NULL)
+ continue;
+
+ /* Verify that the machine can perform a widening multiply in this
+ mode/signedness combination, otherwise this transformation is
+ likely to pessimize code. */
+ if ((rhs1_stmt == NULL || TYPE_UNSIGNED (type1))
+ && (rhs2_stmt == NULL || TYPE_UNSIGNED (type2))
+ && (optab_handler (umul_widen_optab, TYPE_MODE (type))
+ ->insn_code == CODE_FOR_nothing))
+ continue;
+ else if ((rhs1_stmt == NULL || !TYPE_UNSIGNED (type1))
+ && (rhs2_stmt == NULL || !TYPE_UNSIGNED (type2))
+ && (optab_handler (smul_widen_optab, TYPE_MODE (type))
+ ->insn_code == CODE_FOR_nothing))
+ continue;
+ else if (rhs1_stmt != NULL && rhs2_stmt != 0
+ && (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+ && (optab_handler (usmul_widen_optab, TYPE_MODE (type))
+ ->insn_code == CODE_FOR_nothing))
+ continue;
+
+ if ((rhs1_stmt == NULL && !int_fits_type_p (rhs1, type2))
+ || (rhs2_stmt == NULL && !int_fits_type_p (rhs2, type1)))
+ continue;
+
+ if (rhs1_stmt == NULL)
+ gimple_assign_set_rhs1 (stmt, fold_convert (type2, rhs1));
+ else
+ gimple_assign_set_rhs1 (stmt, rhs1_convop);
+ if (rhs2_stmt == NULL)
+ gimple_assign_set_rhs2 (stmt, fold_convert (type1, rhs2));
+ else
+ gimple_assign_set_rhs2 (stmt, rhs2_convop);
+ gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
+ update_stmt (stmt);
+ changed = true;
+ }
+ }
+ return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
+ | TODO_verify_stmts : 0);
+}
+
+static bool
+gate_optimize_widening_mul (void)
+{
+ return flag_expensive_optimizations && optimize;
+}
+
+struct gimple_opt_pass pass_optimize_widening_mul =
+{
+ {
+ GIMPLE_PASS,
+ "widening_mul", /* name */
+ gate_optimize_widening_mul, /* gate */
+ execute_optimize_widening_mul, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0 /* todo_flags_finish */
+ }
+};