#include "expr.h"
#include "hashtab.h"
#include "recog.h"
+#include "target.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
}
/* Remove the exit edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
+ FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
remove_path (ein);
VEC_free (edge, heap, remove_edges);
}
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
remove_path (e);
VEC_free (edge, heap, remove_edges);
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
and call break_superblocks when going out of cfglayout mode. But it
turns out that this never happens; and that if it does ever happen,
- the verify_flow_info call in loop_optimizer_finalize would fail.
+ the TODO_verify_flow at the end of the RTL loop passes would fail.
There are two reasons why we expected we could have control flow insns
in INSNS. The first is when a comparison has to be done in parts, and
basic_block bb;
ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
- for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
+ FOR_EACH_VEC_ELT (basic_block, ldom, j, bb)
if (!flow_bb_inside_loop_p (loop, bb))
VEC_safe_push (basic_block, heap, dom_bbs, bb);
}
/* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
remove_path (e);
VEC_free (edge, heap, remove_edges);
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{
static struct var_to_expand *
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
{
- rtx set, dest, src, op1, op2, something;
+ rtx set, dest, src;
struct var_to_expand *ves;
- enum machine_mode mode1, mode2;
unsigned accum_pos;
+ enum rtx_code code;
int debug_uses = 0;
set = single_set (insn);
dest = SET_DEST (set);
src = SET_SRC (set);
+ code = GET_CODE (src);
- if (GET_CODE (src) != PLUS
- && GET_CODE (src) != MINUS
- && GET_CODE (src) != MULT)
+ if (code != PLUS && code != MINUS && code != MULT && code != FMA)
return NULL;
+ if (FLOAT_MODE_P (GET_MODE (dest)))
+ {
+ if (!flag_associative_math)
+ return NULL;
+ /* In the case of FMA, we're also changing the rounding. */
+ if (code == FMA && !flag_unsafe_math_optimizations)
+ return NULL;
+ }
+
/* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
in MD. But if there is no optab to generate the insn, we can not
perform the variable expansion. This can happen if an MD provides
So we check have_insn_for which looks for an optab for the operation
in SRC. If it doesn't exist, we can't perform the expansion even
though INSN is valid. */
- if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
+ if (!have_insn_for (code, GET_MODE (src)))
return NULL;
- op1 = XEXP (src, 0);
- op2 = XEXP (src, 1);
-
if (!REG_P (dest)
&& !(GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest))))
return NULL;
- if (rtx_equal_p (dest, op1))
+ /* Find the accumulator use within the operation. */
+ if (code == FMA)
+ {
+ /* We only support accumulation via FMA in the ADD position. */
+ if (!rtx_equal_p (dest, XEXP (src, 2)))
+ return NULL;
+ accum_pos = 2;
+ }
+ else if (rtx_equal_p (dest, XEXP (src, 0)))
accum_pos = 0;
- else if (rtx_equal_p (dest, op2))
- accum_pos = 1;
+ else if (rtx_equal_p (dest, XEXP (src, 1)))
+ {
+ /* The method of expansion that we are using; which includes the
+ initialization of the expansions with zero and the summation of
+ the expansions at the end of the computation will yield wrong
+ results for (x = something - x) thus avoid using it in that case. */
+ if (code == MINUS)
+ return NULL;
+ accum_pos = 1;
+ }
else
return NULL;
- /* The method of expansion that we are using; which includes
- the initialization of the expansions with zero and the summation of
- the expansions at the end of the computation will yield wrong results
- for (x = something - x) thus avoid using it in that case. */
- if (accum_pos == 1
- && GET_CODE (src) == MINUS)
- return NULL;
-
- something = (accum_pos == 0) ? op2 : op1;
-
- if (rtx_referenced_p (dest, something))
+ /* It must not otherwise be used. */
+ if (code == FMA)
+ {
+ if (rtx_referenced_p (dest, XEXP (src, 0))
+ || rtx_referenced_p (dest, XEXP (src, 1)))
+ return NULL;
+ }
+ else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
return NULL;
+ /* It must be used in exactly one insn. */
if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
return NULL;
- mode1 = GET_MODE (dest);
- mode2 = GET_MODE (something);
- if ((FLOAT_MODE_P (mode1)
- || FLOAT_MODE_P (mode2))
- && !flag_associative_math)
- return NULL;
-
if (dump_file)
- {
- fprintf (dump_file,
- "\n;; Expanding Accumulator ");
- print_rtl (dump_file, dest);
- fprintf (dump_file, "\n");
- }
+ {
+ fprintf (dump_file, "\n;; Expanding Accumulator ");
+ print_rtl (dump_file, dest);
+ fprintf (dump_file, "\n");
+ }
if (debug_uses)
/* Instead of resetting the debug insns, we could replace each
return;
start_sequence ();
- if (ve->op == PLUS || ve->op == MINUS)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- if (honor_signed_zero_p)
- zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
- else
- zero_init = CONST0_RTX (mode);
+ switch (ve->op)
+ {
+ case FMA:
+ /* Note that we only accumulate FMA via the ADD operand. */
+ case PLUS:
+ case MINUS:
+ FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+ {
+ if (honor_signed_zero_p)
+ zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
+ else
+ zero_init = CONST0_RTX (mode);
+ emit_move_insn (var, zero_init);
+ }
+ break;
- emit_move_insn (var, zero_init);
- }
- else if (ve->op == MULT)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- zero_init = CONST1_RTX (GET_MODE (var));
- emit_move_insn (var, zero_init);
- }
+ case MULT:
+ FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+ {
+ zero_init = CONST1_RTX (GET_MODE (var));
+ emit_move_insn (var, zero_init);
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
seq = get_insns ();
end_sequence ();
return;
start_sequence ();
- if (ve->op == PLUS || ve->op == MINUS)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
- var, sum);
- }
- else if (ve->op == MULT)
- for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
- {
- sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
- var, sum);
- }
+ switch (ve->op)
+ {
+ case FMA:
+ /* Note that we only accumulate FMA via the ADD operand. */
+ case PLUS:
+ case MINUS:
+ FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+ sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
+ break;
+
+ case MULT:
+ FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+ sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
expr = force_operand (sum, ve->reg);
if (expr != ve->reg)