--- /dev/null
+/* Test that the compiler properly optimizes floating point multiply
+ and add instructions into vfmaddss, vfmsubss, vfnmaddss,
+ vfnmsubss on FMA4 systems. */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -funsafe-math-optimizations -mfma4" } */
+
+extern void exit (int);
+
+float
+flt_mul_add (float a, float c)
+{
+ return (a * a) + c;
+}
+
+double
+dbl_mul_add (double a, double c)
+{
+ return (a * a) + c;
+}
+
+float
+flt_mul_sub (float a, float c)
+{
+ return (a * a) - c;
+}
+
+double
+dbl_mul_sub (double a, double c)
+{
+ return (a * a) - c;
+}
+
+float
+flt_neg_mul_add (float a, float c)
+{
+ return (-(a * a)) + c;
+}
+
+double
+dbl_neg_mul_add (double a, double c)
+{
+ return (-(a * a)) + c;
+}
+
+float f[10] = { 2, 3, 4 };
+double d[10] = { 2, 3, 4 };
+
+int main ()
+{
+ f[3] = flt_mul_add (f[0], f[2]);
+ f[4] = flt_mul_sub (f[0], f[2]);
+ f[5] = flt_neg_mul_add (f[0], f[2]);
+
+ d[3] = dbl_mul_add (d[0], d[2]);
+ d[4] = dbl_mul_sub (d[0], d[2]);
+ d[5] = dbl_neg_mul_add (d[0], d[2]);
+ exit (0);
+}
+
+/* { dg-final { scan-assembler "vfmaddss" } } */
+/* { dg-final { scan-assembler "vfmaddsd" } } */
+/* { dg-final { scan-assembler "vfmsubss" } } */
+/* { dg-final { scan-assembler "vfmsubsd" } } */
+/* { dg-final { scan-assembler "vfnmaddss" } } */
+/* { dg-final { scan-assembler "vfnmaddsd" } } */
return true;
}
-/* Combine the multiplication at MUL_STMT with uses in additions and
- subtractions to form fused multiply-add operations. Returns true
- if successful and MUL_STMT should be removed. */
+/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
+ with uses in additions and subtractions to form fused multiply-add
+ operations. Returns true if successful and MUL_STMT should be removed. */
static bool
-convert_mult_to_fma (gimple mul_stmt)
+convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
{
- tree mul_result = gimple_assign_lhs (mul_stmt);
+ tree mul_result = gimple_get_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result);
gimple use_stmt, neguse_stmt, fma_stmt;
use_operand_p use_p;
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
enum tree_code use_code;
- tree addop, mulop1, result = mul_result;
+ tree addop, mulop1 = op1, result = mul_result;
bool negate_p = false;
if (is_gimple_debug (use_stmt))
negate_p = !negate_p;
}
- mulop1 = gimple_assign_rhs1 (mul_stmt);
if (negate_p)
mulop1 = force_gimple_operand_gsi (&gsi,
build1 (NEGATE_EXPR,
fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
gimple_assign_lhs (use_stmt),
- mulop1,
- gimple_assign_rhs2 (mul_stmt),
+ mulop1, op2,
addop);
gsi_replace (&gsi, fma_stmt, true);
}
execute_optimize_widening_mul (void)
{
basic_block bb;
+ bool cfg_changed = false;
FOR_EACH_BB (bb)
{
{
case MULT_EXPR:
if (!convert_mult_to_widen (stmt)
- && convert_mult_to_fma (stmt))
+ && convert_mult_to_fma (stmt,
+ gimple_assign_rhs1 (stmt),
+ gimple_assign_rhs2 (stmt)))
{
gsi_remove (&gsi, true);
release_defs (stmt);
default:;
}
}
+ else if (is_gimple_call (stmt))
+ {
+ tree fndecl = gimple_call_fndecl (stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_POWF:
+ case BUILT_IN_POW:
+ case BUILT_IN_POWL:
+ if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
+ && REAL_VALUES_EQUAL
+ (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+ dconst2)
+ && convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 0)))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ if (gimple_purge_dead_eh_edges (bb))
+ cfg_changed = true;
+ continue;
+ }
+ break;
+
+ default:;
+ }
+ }
+ }
gsi_next (&gsi);
}
}
- return 0;
+ return cfg_changed ? TODO_cleanup_cfg : 0;
}
static bool