/* High-level loop manipulation functions.
- Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
+Free Software Foundation; either version 3, or (at your option) any
later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
for more details.
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "cfglayout.h"
#include "tree-scalar-evolution.h"
#include "params.h"
+#include "tree-inline.h"
/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
It is expected that neither BASE nor STEP are shared with other expressions
}
else
{
- if (!tree_expr_nonnegative_p (step)
+ bool ovf;
+
+ if (!tree_expr_nonnegative_warnv_p (step, &ovf)
&& may_negate_without_overflow_p (step))
{
incr_op = MINUS_EXPR;
}
}
}
-
+ if (POINTER_TYPE_P (TREE_TYPE (base)))
+ {
+ step = fold_convert (sizetype, step);
+ if (incr_op == MINUS_EXPR)
+ step = fold_build1 (NEGATE_EXPR, sizetype, step);
+ incr_op = POINTER_PLUS_EXPR;
+ }
/* Gimplify the step if necessary. We put the computations in front of the
loop (i.e. the step should be loop invariant). */
step = force_gimple_operand (step, &stmts, true, var);
if (stmts)
bsi_insert_on_edge_immediate (pe, stmts);
- stmt = build2 (MODIFY_EXPR, void_type_node, va,
- build2 (incr_op, TREE_TYPE (base),
- vb, step));
+ stmt = build_gimple_modify_stmt (va,
+ build2 (incr_op, TREE_TYPE (base),
+ vb, step));
SSA_NAME_DEF_STMT (va) = stmt;
if (after)
bsi_insert_after (incr_pos, stmt, BSI_NEW_STMT);
def_loop = def_bb->loop_father;
/* If the definition is not inside loop, it is not interesting. */
- if (!def_loop->outer)
+ if (!loop_outer (def_loop))
return;
if (!use_blocks[ver])
tree var;
basic_block bb = bb_for_stmt (stmt);
- FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES | SSA_OP_ALL_KILLS)
+ FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES)
find_uses_to_rename_use (bb, var, use_blocks, need_phis);
}
void
rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
{
- bitmap loop_exits = get_loops_exits ();
+ bitmap loop_exits;
bitmap *use_blocks;
unsigned i, old_num_ssa_names;
- bitmap names_to_rename = BITMAP_ALLOC (NULL);
+ bitmap names_to_rename;
+
+ loops_state_set (LOOP_CLOSED_SSA);
+ if (number_of_loops () <= 1)
+ return;
+
+ loop_exits = get_loops_exits ();
+ names_to_rename = BITMAP_ALLOC (NULL);
/* If the pass has caused the SSA form to be out-of-date, update it
now. */
ssa_op_iter iter;
tree var;
- FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES | SSA_OP_ALL_KILLS)
+ FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_USES)
check_loop_closed_ssa_use (bb, var);
}
tree phi;
unsigned i;
- if (current_loops == NULL)
+ if (number_of_loops () <= 1)
return;
verify_ssa (false);
}
/* Split loop exit edge EXIT. The things are a bit complicated by a need to
- preserve the loop closed ssa form. */
+ preserve the loop closed ssa form. The newly created block is returned. */
-void
+basic_block
split_loop_exit_edge (edge exit)
{
basic_block dest = exit->dest;
name = USE_FROM_PTR (op_p);
- /* If the argument of the phi node is a constant, we do not need
+ /* If the argument of the PHI node is a constant, we do not need
to keep it inside loop. */
if (TREE_CODE (name) != SSA_NAME)
continue;
/* Otherwise create an auxiliary phi node that will copy the value
- of the ssa name out of the loop. */
+ of the SSA name out of the loop. */
new_name = duplicate_ssa_name (name, NULL);
new_phi = create_phi_node (new_name, bb);
SSA_NAME_DEF_STMT (new_name) = new_phi;
add_phi_arg (new_phi, name, exit);
SET_USE (op_p, new_name);
}
+
+ return bb;
}
/* Returns the basic block in that statements should be emitted for induction
bb = single_pred (loop->latch);
last = last_stmt (bb);
- if (TREE_CODE (last) != COND_EXPR)
+ if (!last
+ || TREE_CODE (last) != COND_EXPR)
return NULL;
exit = EDGE_SUCC (bb, 0);
bool
tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
unsigned int ndupl, sbitmap wont_exit,
- edge orig, edge *to_remove,
- unsigned int *n_to_remove, int flags)
+ edge orig, VEC (edge, heap) **to_remove,
+ int flags)
{
unsigned first_new_block;
- if (!(current_loops->state & LOOPS_HAVE_SIMPLE_LATCHES))
+ if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
return false;
- if (!(current_loops->state & LOOPS_HAVE_PREHEADERS))
+ if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
return false;
#ifdef ENABLE_CHECKING
first_new_block = last_basic_block;
if (!duplicate_loop_to_header_edge (loop, e, ndupl, wont_exit,
- orig, to_remove, n_to_remove, flags))
+ orig, to_remove, flags))
return false;
/* Readd the removed phi args for e. */
return true;
}
-/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
-
-static tree
-build_if_stmt (tree cond, tree then_label, tree else_label)
-{
- return build3 (COND_EXPR, void_type_node,
- cond,
- build1 (GOTO_EXPR, void_type_node, then_label),
- build1 (GOTO_EXPR, void_type_node, else_label));
-}
-
/* Returns true if we can unroll LOOP FACTOR times. Number
of iterations of the loop is returned in NITER. */
return false;
/* The final loop should be small enough. */
- if (tree_num_loop_insns (loop) * factor
+ if (tree_num_loop_insns (loop, &eni_size_weights) * factor
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
return false;
tree base = desc->control.base;
tree step = desc->control.step;
tree bound = desc->bound;
- tree type = TREE_TYPE (base);
+ tree type = TREE_TYPE (step);
tree bigstep, delta;
tree min = lower_bound_in_type (type, type);
tree max = upper_bound_in_type (type, type);
enum tree_code cmp = desc->cmp;
tree cond = boolean_true_node, assum;
+ /* For pointers, do the arithmetics in the type of step (sizetype). */
+ base = fold_convert (type, base);
+ bound = fold_convert (type, bound);
+
*enter_cond = boolean_false_node;
*exit_base = NULL_TREE;
*exit_step = NULL_TREE;
of the loop, i.e., BOUND - step * FACTOR does not overflow.
3) # of iterations is at least FACTOR */
- if (!zero_p (desc->may_be_zero))
+ if (!integer_zerop (desc->may_be_zero))
cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
invert_truthvalue (desc->may_be_zero),
cond);
*exit_bound = bound;
}
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+ dominated by BB by NUM/DEN. */
+
+static void
+scale_dominated_blocks_in_loop (struct loop *loop, basic_block bb,
+ int num, int den)
+{
+ basic_block son;
+
+ if (den == 0)
+ return;
+
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ if (!flow_bb_inside_loop_p (loop, son))
+ continue;
+ scale_bbs_frequencies_int (&son, 1, num, den);
+ scale_dominated_blocks_in_loop (loop, son, num, den);
+ }
+}
+
/* Unroll LOOP FACTOR times. DESC describes number of iterations of LOOP.
EXIT is the exit of the loop to that DESC corresponds.
if (st)
break;
post;
- } */
+ }
+
+ Before the loop is unrolled, TRANSFORM is called for it (only for the
+ unrolled loop, but not for its versioned copy). DATA is passed to
+ TRANSFORM. */
+
+/* Probability in % that the unrolled loop is entered. Just a guess. */
+#define PROB_UNROLLED_LOOP_ENTERED 90
void
-tree_unroll_loop (struct loop *loop, unsigned factor,
- edge exit, struct tree_niter_desc *desc)
+tree_transform_and_unroll_loop (struct loop *loop, unsigned factor,
+ edge exit, struct tree_niter_desc *desc,
+ transform_callback transform,
+ void *data)
{
- tree dont_exit, exit_if, ctr_before, ctr_after;
+ tree exit_if, ctr_before, ctr_after;
tree enter_main_cond, exit_base, exit_step, exit_bound;
enum tree_code exit_cmp;
tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
struct loop *new_loop;
basic_block rest, exit_bb;
edge old_entry, new_entry, old_latch, precond_edge, new_exit;
- edge nonexit, new_nonexit;
+ edge new_nonexit, e;
block_stmt_iterator bsi;
use_operand_p op;
bool ok;
- unsigned est_niter;
+ unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
+ unsigned new_est_niter, i, prob;
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
sbitmap wont_exit;
+ VEC (edge, heap) *to_remove = NULL;
est_niter = expected_loop_iterations (loop);
determine_exit_conditions (loop, desc, factor,
&enter_main_cond, &exit_base, &exit_step,
&exit_cmp, &exit_bound);
- new_loop = loop_version (loop, enter_main_cond, NULL, true);
+ /* Let us assume that the unrolled loop is quite likely to be entered. */
+ if (integer_nonzerop (enter_main_cond))
+ prob_entry = REG_BR_PROB_BASE;
+ else
+ prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100;
+
+ /* The values for scales should keep profile consistent, and somewhat close
+ to correct.
+
+ TODO: The current value of SCALE_REST makes it appear that the loop that
+ is created by splitting the remaining iterations of the unrolled loop is
+ executed the same number of times as the original loop, and with the same
+ frequencies, which is obviously wrong. This does not appear to cause
+ problems, so we do not bother with fixing it for now. To make the profile
+ correct, we would need to change the probability of the exit edge of the
+ loop, and recompute the distribution of frequencies in its body because
+ of this change (scale the frequencies of blocks before and after the exit
+ by appropriate factors). */
+ scale_unrolled = prob_entry;
+ scale_rest = REG_BR_PROB_BASE;
+
+ new_loop = loop_version (loop, enter_main_cond, NULL,
+ prob_entry, scale_unrolled, scale_rest, true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa);
- /* Unroll the loop and remove the old exits. */
- dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
- ? boolean_false_node
- : boolean_true_node);
- if (exit == EDGE_SUCC (exit->src, 0))
- nonexit = EDGE_SUCC (exit->src, 1);
- else
- nonexit = EDGE_SUCC (exit->src, 0);
- nonexit->probability = REG_BR_PROB_BASE;
- exit->probability = 0;
- nonexit->count += exit->count;
- exit->count = 0;
- exit_if = last_stmt (exit->src);
- COND_EXPR_COND (exit_if) = dont_exit;
- update_stmt (exit_if);
-
- wont_exit = sbitmap_alloc (factor);
- sbitmap_ones (wont_exit);
- ok = tree_duplicate_loop_to_header_edge
- (loop, loop_latch_edge (loop), factor - 1,
- wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
- free (wont_exit);
- gcc_assert (ok);
- update_ssa (TODO_update_ssa);
+ /* Determine the probability of the exit edge of the unrolled loop. */
+ new_est_niter = est_niter / factor;
+
+ /* Without profile feedback, loops for that we do not know a better estimate
+ are assumed to roll 10 times. When we unroll such loop, it appears to
+ roll too little, and it may even seem to be cold. To avoid this, we
+ ensure that the created loop appears to roll at least 5 times (but at
+ most as many times as before unrolling). */
+ if (new_est_niter < 5)
+ {
+ if (est_niter < 5)
+ new_est_niter = est_niter;
+ else
+ new_est_niter = 5;
+ }
- /* Prepare the cfg and update the phi nodes. */
+ /* Prepare the cfg and update the phi nodes. Move the loop exit to the
+ loop latch (and make its condition dummy, for the moment). */
rest = loop_preheader_edge (new_loop)->src;
precond_edge = single_pred_edge (rest);
split_edge (loop_latch_edge (loop));
exit_bb = single_pred (loop->latch);
+ /* Since the exit edge will be removed, the frequency of all the blocks
+ in the loop that are dominated by it must be scaled by
+ 1 / (1 - exit->probability). */
+ scale_dominated_blocks_in_loop (loop, exit->src,
+ REG_BR_PROB_BASE,
+ REG_BR_PROB_BASE - exit->probability);
+
+ bsi = bsi_last (exit_bb);
+ exit_if = build3 (COND_EXPR, void_type_node, boolean_true_node,
+ NULL_TREE, NULL_TREE);
+
+ bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
- new_exit->count = loop_preheader_edge (loop)->count;
- est_niter = est_niter / factor + 1;
- new_exit->probability = REG_BR_PROB_BASE / est_niter;
+ rescan_loop_exit (new_exit, true, false);
+ /* Set the probability of new exit to the same of the old one. Fix
+ the frequency of the latch block, by scaling it back by
+ 1 - exit->probability. */
+ new_exit->count = exit->count;
+ new_exit->probability = exit->probability;
new_nonexit = single_pred_edge (loop->latch);
+ new_nonexit->probability = REG_BR_PROB_BASE - exit->probability;
new_nonexit->flags = EDGE_TRUE_VALUE;
- new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+ new_nonexit->count -= exit->count;
+ if (new_nonexit->count < 0)
+ new_nonexit->count = 0;
+ scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
+ REG_BR_PROB_BASE);
old_entry = loop_preheader_edge (loop);
new_entry = loop_preheader_edge (new_loop);
SET_USE (op, new_init);
}
+ remove_path (exit);
+
+ /* Transform the loop. */
+ if (transform)
+ (*transform) (loop, data);
+
+ /* Unroll the loop and remove the exits in all iterations except for the
+ last one. */
+ wont_exit = sbitmap_alloc (factor);
+ sbitmap_ones (wont_exit);
+ RESET_BIT (wont_exit, factor - 1);
+
+ ok = tree_duplicate_loop_to_header_edge
+ (loop, loop_latch_edge (loop), factor - 1,
+ wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ);
+ free (wont_exit);
+ gcc_assert (ok);
+
+ for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
+ {
+ ok = remove_path (e);
+ gcc_assert (ok);
+ }
+ VEC_free (edge, heap, to_remove);
+ update_ssa (TODO_update_ssa);
+
+ /* Ensure that the frequencies in the loop match the new estimated
+ number of iterations, and change the probability of the new
+ exit edge. */
+ freq_h = loop->header->frequency;
+ freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
+ if (freq_h != 0)
+ scale_loop_frequencies (loop, freq_e * (new_est_niter + 1), freq_h);
+
+ exit_bb = single_pred (loop->latch);
+ new_exit = find_edge (exit_bb, rest);
+ new_exit->count = loop_preheader_edge (loop)->count;
+ new_exit->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
+
+ rest->count += new_exit->count;
+ rest->frequency += EDGE_FREQUENCY (new_exit);
+
+ new_nonexit = single_pred_edge (loop->latch);
+ prob = new_nonexit->probability;
+ new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+ new_nonexit->count = exit_bb->count - new_exit->count;
+ if (new_nonexit->count < 0)
+ new_nonexit->count = 0;
+ if (prob > 0)
+ scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
+ prob);
+
/* Finally create the new counter for number of iterations and add the new
exit instruction. */
bsi = bsi_last (exit_bb);
+ exit_if = bsi_stmt (bsi);
create_iv (exit_base, exit_step, NULL_TREE, loop,
- &bsi, true, &ctr_before, &ctr_after);
- exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
- exit_bound),
- tree_block_label (loop->latch),
- tree_block_label (rest));
- bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
+ &bsi, false, &ctr_before, &ctr_after);
+ COND_EXPR_COND (exit_if) = build2 (exit_cmp, boolean_type_node, ctr_after,
+ exit_bound);
+ update_stmt (exit_if);
#ifdef ENABLE_CHECKING
verify_flow_info ();
verify_loop_closed_ssa ();
#endif
}
+
+/* Wrapper over tree_transform_and_unroll_loop for case we do not
+ want to transform the loop before unrolling. The meaning
+ of the arguments is the same as for tree_transform_and_unroll_loop. */
+
+void
+tree_unroll_loop (struct loop *loop, unsigned factor,
+ edge exit, struct tree_niter_desc *desc)
+{
+ tree_transform_and_unroll_loop (loop, factor, exit, desc,
+ NULL, NULL);
+}