return true;
}
+/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
+ with uses in additions and subtractions to form fused multiply-add
+ operations. Returns true if successful and MUL_STMT should be removed. */
+
+static bool
+convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
+{
+ tree mul_result = gimple_get_lhs (mul_stmt);
+ tree type = TREE_TYPE (mul_result);
+ gimple use_stmt, neguse_stmt, fma_stmt;
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+
+ if (FLOAT_TYPE_P (type)
+ && flag_fp_contract_mode == FP_CONTRACT_OFF)
+ return false;
+
+ /* We don't want to do bitfield reduction ops. */
+ if (INTEGRAL_TYPE_P (type)
+ && (TYPE_PRECISION (type)
+ != GET_MODE_PRECISION (TYPE_MODE (type))))
+ return false;
+
+ /* If the target doesn't support it, don't generate it. We assume that
+ if fma isn't available then fms, fnma or fnms are not either. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* Make sure that the multiplication statement becomes dead after
+ the transformation, thus that all uses are transformed to FMAs.
+ This means we assume that an FMA operation has the same cost
+ as an addition. */
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+ {
+ enum tree_code use_code;
+ tree result = mul_result;
+ bool negate_p = false;
+
+ use_stmt = USE_STMT (use_p);
+
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ /* For now restrict this operations to single basic blocks. In theory
+ we would want to support sinking the multiplication in
+ m = a*b;
+ if ()
+ ma = m + c;
+ else
+ d = m;
+ to form a fma in the then block and sink the multiplication to the
+ else block. */
+ if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ return false;
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+
+ use_code = gimple_assign_rhs_code (use_stmt);
+
+ /* A negate on the multiplication leads to FNMA. */
+ if (use_code == NEGATE_EXPR)
+ {
+ ssa_op_iter iter;
+ tree use;
+
+ result = gimple_assign_lhs (use_stmt);
+
+ /* Make sure the negate statement becomes dead with this
+ single transformation. */
+ if (!single_imm_use (gimple_assign_lhs (use_stmt),
+ &use_p, &neguse_stmt))
+ return false;
+
+ /* Make sure the multiplication isn't also used on that stmt. */
+ FOR_EACH_SSA_TREE_OPERAND (use, neguse_stmt, iter, SSA_OP_USE)
+ if (use == mul_result)
+ return false;
+
+ /* Re-validate. */
+ use_stmt = neguse_stmt;
+ if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ return false;
+ if (!is_gimple_assign (use_stmt))
+ return false;
+
+ use_code = gimple_assign_rhs_code (use_stmt);
+ negate_p = true;
+ }
+
+ switch (use_code)
+ {
+ case MINUS_EXPR:
+ if (gimple_assign_rhs2 (use_stmt) == result)
+ negate_p = !negate_p;
+ break;
+ case PLUS_EXPR:
+ break;
+ default:
+ /* FMA can only be formed from PLUS and MINUS. */
+ return false;
+ }
+
+ /* We can't handle a * b + a * b. */
+ if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+ return false;
+
+ /* While it is possible to validate whether or not the exact form
+ that we've recognized is available in the backend, the assumption
+ is that the transformation is never a loss. For instance, suppose
+ the target only has the plain FMA pattern available. Consider
+ a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
+ is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
+ still have 3 operations, but in the FMA form the two NEGs are
+ independant and could be run in parallel. */
+ }
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ enum tree_code use_code;
+ tree addop, mulop1 = op1, result = mul_result;
+ bool negate_p = false;
+
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ use_code = gimple_assign_rhs_code (use_stmt);
+ if (use_code == NEGATE_EXPR)
+ {
+ result = gimple_assign_lhs (use_stmt);
+ single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
+ gsi_remove (&gsi, true);
+ release_defs (use_stmt);
+
+ use_stmt = neguse_stmt;
+ gsi = gsi_for_stmt (use_stmt);
+ use_code = gimple_assign_rhs_code (use_stmt);
+ negate_p = true;
+ }
+
+ if (gimple_assign_rhs1 (use_stmt) == result)
+ {
+ addop = gimple_assign_rhs2 (use_stmt);
+ /* a * b - c -> a * b + (-c) */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ addop = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, addop),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+ else
+ {
+ addop = gimple_assign_rhs1 (use_stmt);
+ /* a - b * c -> (-b) * c + a */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ negate_p = !negate_p;
+ }
+
+ if (negate_p)
+ mulop1 = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, mulop1),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+
+ fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+ gimple_assign_lhs (use_stmt),
+ mulop1, op2,
+ addop);
+ gsi_replace (&gsi, fma_stmt, true);
+ }
+
+ return true;
+}
+
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
static unsigned int
execute_optimize_widening_mul (void)
{
- bool changed = false;
basic_block bb;
+ bool cfg_changed = false;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
- if (!is_gimple_assign (stmt))
- continue;
+ if (is_gimple_assign (stmt))
+ {
+ code = gimple_assign_rhs_code (stmt);
+ switch (code)
+ {
+ case MULT_EXPR:
+ if (!convert_mult_to_widen (stmt)
+ && convert_mult_to_fma (stmt,
+ gimple_assign_rhs1 (stmt),
+ gimple_assign_rhs2 (stmt)))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ break;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ convert_plusminus_to_widen (&gsi, stmt, code);
+ break;
+
+ default:;
+ }
+ }
+ else if (is_gimple_call (stmt)
+ && gimple_call_lhs (stmt))
+ {
+ tree fndecl = gimple_call_fndecl (stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+ {
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_POWF:
+ case BUILT_IN_POW:
+ case BUILT_IN_POWL:
+ if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
+ && REAL_VALUES_EQUAL
+ (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+ dconst2)
+ && convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 0)))
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ if (gimple_purge_dead_eh_edges (bb))
+ cfg_changed = true;
+ continue;
+ }
+ break;
- code = gimple_assign_rhs_code (stmt);
- if (code == MULT_EXPR)
- changed |= convert_mult_to_widen (stmt);
- else if (code == PLUS_EXPR || code == MINUS_EXPR)
- changed |= convert_plusminus_to_widen (&gsi, stmt, code);
+ default:;
+ }
+ }
+ }
+ gsi_next (&gsi);
}
}
- return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts : 0);
+ return cfg_changed ? TODO_cleanup_cfg : 0;
}
static bool
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- 0 /* todo_flags_finish */
+ TODO_verify_ssa
+ | TODO_verify_stmts
+ | TODO_dump_func
+ | TODO_update_ssa /* todo_flags_finish */
}
};