From 15dbdc8f645ffd08ce1ac17fedf27556a0b70260 Mon Sep 17 00:00:00 2001 From: rguenth Date: Tue, 30 Nov 2010 16:55:35 +0000 Subject: [PATCH] 2010-11-30 Richard Guenther PR tree-optimization/46722 * tree-ssa-math-opts.c (convert_mult_to_fma): Get multiplication operands as arguments. (execute_optimize_widening_mul): Also handle power of two as multiplication. * gcc.target/i386/fma4-fma-2.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@167304 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 8 ++++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/i386/fma4-fma-2.c | 67 ++++++++++++++++++++++++++++++ gcc/tree-ssa-math-opts.c | 54 +++++++++++++++++++----- 4 files changed, 123 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/fma4-fma-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 82fffd03b24..b4fe46515f7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2010-11-30 Richard Guenther + + PR tree-optimization/46722 + * tree-ssa-math-opts.c (convert_mult_to_fma): Get multiplication + operands as arguments. + (execute_optimize_widening_mul): Also handle power of two as + multiplication. + 2010-11-30 Dave Korn PR middle-end/46709 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5792e66d131..ba34ecdbe0f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-11-30 Richard Guenther + + PR tree-optimization/46722 + * gcc.target/i386/fma4-fma-2.c: New testcase. + 2010-11-29 Nicola Pero * objc.dg/duplicate-class-1.m: New. diff --git a/gcc/testsuite/gcc.target/i386/fma4-fma-2.c b/gcc/testsuite/gcc.target/i386/fma4-fma-2.c new file mode 100644 index 00000000000..23f6ec167e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma4-fma-2.c @@ -0,0 +1,67 @@ +/* Test that the compiler properly optimizes floating point multiply + and add instructions into vfmaddss, vfmsubss, vfnmaddss, + vfnmsubss on FMA4 systems. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -funsafe-math-optimizations -mfma4" } */ + +extern void exit (int); + +float +flt_mul_add (float a, float c) +{ + return (a * a) + c; +} + +double +dbl_mul_add (double a, double c) +{ + return (a * a) + c; +} + +float +flt_mul_sub (float a, float c) +{ + return (a * a) - c; +} + +double +dbl_mul_sub (double a, double c) +{ + return (a * a) - c; +} + +float +flt_neg_mul_add (float a, float c) +{ + return (-(a * a)) + c; +} + +double +dbl_neg_mul_add (double a, double c) +{ + return (-(a * a)) + c; +} + +float f[10] = { 2, 3, 4 }; +double d[10] = { 2, 3, 4 }; + +int main () +{ + f[3] = flt_mul_add (f[0], f[2]); + f[4] = flt_mul_sub (f[0], f[2]); + f[5] = flt_neg_mul_add (f[0], f[2]); + + d[3] = dbl_mul_add (d[0], d[2]); + d[4] = dbl_mul_sub (d[0], d[2]); + d[5] = dbl_neg_mul_add (d[0], d[2]); + exit (0); +} + +/* { dg-final { scan-assembler "vfmaddss" } } */ +/* { dg-final { scan-assembler "vfmaddsd" } } */ +/* { dg-final { scan-assembler "vfmsubss" } } */ +/* { dg-final { scan-assembler "vfmsubsd" } } */ +/* { dg-final { scan-assembler "vfnmaddss" } } */ +/* { dg-final { scan-assembler "vfnmaddsd" } } */ diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 7837161e440..435c014ea7c 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -1494,14 +1494,14 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, return true; } -/* Combine the multiplication at MUL_STMT with uses in additions and - subtractions to form fused multiply-add operations. Returns true - if successful and MUL_STMT should be removed. */ +/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 + with uses in additions and subtractions to form fused multiply-add + operations. Returns true if successful and MUL_STMT should be removed. */ static bool -convert_mult_to_fma (gimple mul_stmt) +convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2) { - tree mul_result = gimple_assign_lhs (mul_stmt); + tree mul_result = gimple_get_lhs (mul_stmt); tree type = TREE_TYPE (mul_result); gimple use_stmt, neguse_stmt, fma_stmt; use_operand_p use_p; @@ -1607,7 +1607,7 @@ convert_mult_to_fma (gimple mul_stmt) { gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); enum tree_code use_code; - tree addop, mulop1, result = mul_result; + tree addop, mulop1 = op1, result = mul_result; bool negate_p = false; if (is_gimple_debug (use_stmt)) @@ -1646,7 +1646,6 @@ convert_mult_to_fma (gimple mul_stmt) negate_p = !negate_p; } - mulop1 = gimple_assign_rhs1 (mul_stmt); if (negate_p) mulop1 = force_gimple_operand_gsi (&gsi, build1 (NEGATE_EXPR, @@ -1656,8 +1655,7 @@ convert_mult_to_fma (gimple mul_stmt) fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR, gimple_assign_lhs (use_stmt), - mulop1, - gimple_assign_rhs2 (mul_stmt), + mulop1, op2, addop); gsi_replace (&gsi, fma_stmt, true); } @@ -1673,6 +1671,7 @@ static unsigned int execute_optimize_widening_mul (void) { basic_block bb; + bool cfg_changed = false; FOR_EACH_BB (bb) { @@ -1690,7 +1689,9 @@ execute_optimize_widening_mul (void) { case MULT_EXPR: if (!convert_mult_to_widen (stmt) - && convert_mult_to_fma (stmt)) + && convert_mult_to_fma (stmt, + gimple_assign_rhs1 (stmt), + gimple_assign_rhs2 (stmt))) { gsi_remove (&gsi, true); release_defs (stmt); @@ -1706,11 +1707,42 @@ execute_optimize_widening_mul (void) default:; } } + else if (is_gimple_call (stmt)) + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl + && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + switch (DECL_FUNCTION_CODE (fndecl)) + { + case BUILT_IN_POWF: + case BUILT_IN_POW: + case BUILT_IN_POWL: + if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST + && REAL_VALUES_EQUAL + (TREE_REAL_CST (gimple_call_arg (stmt, 1)), + dconst2) + && convert_mult_to_fma (stmt, + gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 0))) + { + gsi_remove (&gsi, true); + release_defs (stmt); + if (gimple_purge_dead_eh_edges (bb)) + cfg_changed = true; + continue; + } + break; + + default:; + } + } + } gsi_next (&gsi); } } - return 0; + return cfg_changed ? TODO_cleanup_cfg : 0; } static bool -- 2.11.0