Daily bump.

[pf3gnuchains/gcc-fork.git] / gcc / tree-ssa-math-opts.c
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c

index a814f6f..6e2213c 100644 (file)
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1494,6 +1494,183 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
    return true;
  }
  
+/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
+   with uses in additions and subtractions to form fused multiply-add
+   operations.  Returns true if successful and MUL_STMT should be removed.  */
+
+static bool
+convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
+{
+  tree mul_result = gimple_get_lhs (mul_stmt);
+  tree type = TREE_TYPE (mul_result);
+  gimple use_stmt, neguse_stmt, fma_stmt;
+  use_operand_p use_p;
+  imm_use_iterator imm_iter;
+
+  if (FLOAT_TYPE_P (type)
+      && flag_fp_contract_mode == FP_CONTRACT_OFF)
+    return false;
+
+  /* We don't want to do bitfield reduction ops.  */
+  if (INTEGRAL_TYPE_P (type)
+      && (TYPE_PRECISION (type)
+         != GET_MODE_PRECISION (TYPE_MODE (type))))
+    return false;
+
+  /* If the target doesn't support it, don't generate it.  We assume that
+     if fma isn't available then fms, fnma or fnms are not either.  */
+  if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+    return false;
+
+  /* Make sure that the multiplication statement becomes dead after
+     the transformation, thus that all uses are transformed to FMAs.
+     This means we assume that an FMA operation has the same cost
+     as an addition.  */
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+    {
+      enum tree_code use_code;
+      tree result = mul_result;
+      bool negate_p = false;
+
+      use_stmt = USE_STMT (use_p);
+
+      if (is_gimple_debug (use_stmt))
+       continue;
+
+      /* For now restrict this operations to single basic blocks.  In theory
+        we would want to support sinking the multiplication in
+        m = a*b;
+        if ()
+          ma = m + c;
+        else
+          d = m;
+        to form a fma in the then block and sink the multiplication to the
+        else block.  */
+      if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+       return false;
+
+      if (!is_gimple_assign (use_stmt))
+       return false;
+
+      use_code = gimple_assign_rhs_code (use_stmt);
+
+      /* A negate on the multiplication leads to FNMA.  */
+      if (use_code == NEGATE_EXPR)
+       {
+         ssa_op_iter iter;
+         tree use;
+
+         result = gimple_assign_lhs (use_stmt);
+
+         /* Make sure the negate statement becomes dead with this
+            single transformation.  */
+         if (!single_imm_use (gimple_assign_lhs (use_stmt),
+                              &use_p, &neguse_stmt))
+           return false;
+
+         /* Make sure the multiplication isn't also used on that stmt.  */
+         FOR_EACH_SSA_TREE_OPERAND (use, neguse_stmt, iter, SSA_OP_USE)
+           if (use == mul_result)
+             return false;
+
+         /* Re-validate.  */
+         use_stmt = neguse_stmt;
+         if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+           return false;
+         if (!is_gimple_assign (use_stmt))
+           return false;
+
+         use_code = gimple_assign_rhs_code (use_stmt);
+         negate_p = true;
+       }
+
+      switch (use_code)
+       {
+       case MINUS_EXPR:
+         if (gimple_assign_rhs2 (use_stmt) == result)
+           negate_p = !negate_p;
+         break;
+       case PLUS_EXPR:
+         break;
+       default:
+         /* FMA can only be formed from PLUS and MINUS.  */
+         return false;
+       }
+
+      /* We can't handle a * b + a * b.  */
+      if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+       return false;
+
+      /* While it is possible to validate whether or not the exact form
+        that we've recognized is available in the backend, the assumption
+        is that the transformation is never a loss.  For instance, suppose
+        the target only has the plain FMA pattern available.  Consider
+        a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
+        is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
+        still have 3 operations, but in the FMA form the two NEGs are
+        independant and could be run in parallel.  */
+    }
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+    {
+      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+      enum tree_code use_code;
+      tree addop, mulop1 = op1, result = mul_result;
+      bool negate_p = false;
+
+      if (is_gimple_debug (use_stmt))
+       continue;
+
+      use_code = gimple_assign_rhs_code (use_stmt);
+      if (use_code == NEGATE_EXPR)
+       {
+         result = gimple_assign_lhs (use_stmt);
+         single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
+         gsi_remove (&gsi, true);
+         release_defs (use_stmt);
+
+         use_stmt = neguse_stmt;
+         gsi = gsi_for_stmt (use_stmt);
+         use_code = gimple_assign_rhs_code (use_stmt);
+         negate_p = true;
+       }
+
+      if (gimple_assign_rhs1 (use_stmt) == result)
+       {
+         addop = gimple_assign_rhs2 (use_stmt);
+         /* a * b - c -> a * b + (-c)  */
+         if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+           addop = force_gimple_operand_gsi (&gsi,
+                                             build1 (NEGATE_EXPR,
+                                                     type, addop),
+                                             true, NULL_TREE, true,
+                                             GSI_SAME_STMT);
+       }
+      else
+       {
+         addop = gimple_assign_rhs1 (use_stmt);
+         /* a - b * c -> (-b) * c + a */
+         if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+           negate_p = !negate_p;
+       }
+
+      if (negate_p)
+       mulop1 = force_gimple_operand_gsi (&gsi,
+                                          build1 (NEGATE_EXPR,
+                                                  type, mulop1),
+                                          true, NULL_TREE, true,
+                                          GSI_SAME_STMT);
+
+      fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+                                               gimple_assign_lhs (use_stmt),
+                                               mulop1, op2,
+                                               addop);
+      gsi_replace (&gsi, fma_stmt, true);
+    }
+
+  return true;
+}
+
  /* Find integer multiplications where the operands are extended from
     smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
     where appropriate.  */
@@ -1501,31 +1678,81 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
  static unsigned int
  execute_optimize_widening_mul (void)
  {
-  bool changed = false;
    basic_block bb;
+  bool cfg_changed = false;
  
    FOR_EACH_BB (bb)
      {
        gimple_stmt_iterator gsi;
  
-      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
          {
           gimple stmt = gsi_stmt (gsi);
           enum tree_code code;
  
-         if (!is_gimple_assign (stmt))
-           continue;
+         if (is_gimple_assign (stmt))
+           {
+             code = gimple_assign_rhs_code (stmt);
+             switch (code)
+               {
+               case MULT_EXPR:
+                 if (!convert_mult_to_widen (stmt)
+                     && convert_mult_to_fma (stmt,
+                                             gimple_assign_rhs1 (stmt),
+                                             gimple_assign_rhs2 (stmt)))
+                   {
+                     gsi_remove (&gsi, true);
+                     release_defs (stmt);
+                     continue;
+                   }
+                 break;
+
+               case PLUS_EXPR:
+               case MINUS_EXPR:
+                 convert_plusminus_to_widen (&gsi, stmt, code);
+                 break;
+
+               default:;
+               }
+           }
+         else if (is_gimple_call (stmt)
+                  && gimple_call_lhs (stmt))
+           {
+             tree fndecl = gimple_call_fndecl (stmt);
+             if (fndecl
+                 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+               {
+                 switch (DECL_FUNCTION_CODE (fndecl))
+                   {
+                     case BUILT_IN_POWF:
+                     case BUILT_IN_POW:
+                     case BUILT_IN_POWL:
+                       if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
+                           && REAL_VALUES_EQUAL
+                                (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+                                 dconst2)
+                           && convert_mult_to_fma (stmt,
+                                                   gimple_call_arg (stmt, 0),
+                                                   gimple_call_arg (stmt, 0)))
+                         {
+                           unlink_stmt_vdef (stmt);
+                           gsi_remove (&gsi, true);
+                           release_defs (stmt);
+                           if (gimple_purge_dead_eh_edges (bb))
+                             cfg_changed = true;
+                           continue;
+                         }
+                         break;
  
-         code = gimple_assign_rhs_code (stmt);
-         if (code == MULT_EXPR)
-           changed |= convert_mult_to_widen (stmt);
-         else if (code == PLUS_EXPR || code == MINUS_EXPR)
-           changed |= convert_plusminus_to_widen (&gsi, stmt, code);
+                     default:;
+                   }
+               }
+           }
+         gsi_next (&gsi);
         }
      }
  
-  return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
-         | TODO_verify_stmts : 0);
+  return cfg_changed ? TODO_cleanup_cfg : 0;
  }
  
  static bool
@@ -1549,6 +1776,9 @@ struct gimple_opt_pass pass_optimize_widening_mul =
    0,                                   /* properties_provided */
    0,                                   /* properties_destroyed */
    0,                                   /* todo_flags_start */
-  0                                     /* todo_flags_finish */
+  TODO_verify_ssa
+  | TODO_verify_stmts
+  | TODO_dump_func
+  | TODO_update_ssa                     /* todo_flags_finish */
   }
  };