+2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
+
+ * doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
+ Document.
+ * tree-ssa-loop-niter.c (number_of_iterations_ne,
+ number_of_iterations_lt, number_of_iterations_cond): Remember the shape
+ of the ending condition.
+ * tree-ssa-loop-manip.c: Include params.h.
+ (build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
+ tree_unroll_loop): New functions.
+ * tree-pass.h (pass_loop_prefetch): Declare.
+ * loop.c (rest_of_handle_loop_optimize): Test for
+ -fprefetch-loop-arrays-rtl.
+ * tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
+ * timevar.def (TV_TREE_PREFETCH): New timevar.
+ * tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
+ pass_loop_prefetch): New.
+ * tree-cfgcleanup.c: Include tree-scalar-evolution.h.
+ (cleanup_tree_cfg_loop): Call scev_reset.
+ * common.opt (fprefetch-loop-arrays-rtl): Add.
+ * tree-ssa-loop-prefetch.c: New file.
+ * tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
+ (new_temp_expr_table): Initialize expr_vars.
+ (free_temp_expr_table): Cleanup expr_vars.
+ (check_replaceable, find_replaceable_in_bb): Prevent accumulating
+ expressions from being merged into one.
+ * tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
+ (struct tree_niter_desc): Add control, bound and cmp fields.
+ (tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
+ Declare.
+ * Makefile.in (tree-ssa-loop-prefetch.o): Add.
+ (tree-cfgcleanup.o): Add SCEV_H dependency.
+ (tree-ssa-loop-manip.o): Add PARAMS_H dependency.
+ * passes.c (init_optimization_passes): Add pass_loop_prefetch.
+
2006-02-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/26258
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
- tree-vect-patterns.o \
+ tree-vect-patterns.o tree-ssa-loop-prefetch.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
+tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
+ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
+ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
+ tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
+ $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
+ tree-chrec.h toplev.h langhooks.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
- tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h
+ tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
+ $(PARAMS_H)
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
Generate position-independent code for executables if possible (small mode)
fprefetch-loop-arrays
-Common Report Var(flag_prefetch_loop_arrays)
+Common Report Var(flag_prefetch_loop_arrays,1)
+Generate prefetch instructions, if available, for arrays in loops
+
+fprefetch-loop-arrays-rtl
+Common Report Var(flag_prefetch_loop_arrays,2)
Generate prefetch instructions, if available, for arrays in loops
fprofile
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol
--foptimize-sibling-calls -fprefetch-loop-arrays @gol
+-foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
-fprofile-generate -fprofile-use @gol
-fregmove -frename-registers @gol
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
local variables when unrolling a loop which can result in superior code.
@item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
of the loop on both branches (modified according to result of the condition).
@item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
free_bb_for_insn ();
profile_status = PROFILE_ABSENT;
- do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
+ do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt)
{
vectorizer creates alias relations that are not supported by
pass_may_alias. */
NEXT_PASS (pass_complete_unroll);
+ NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done);
*p = NULL;
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
#include "cfglayout.h"
#include "hashtab.h"
#include "tree-ssa-propagate.h"
+#include "tree-scalar-evolution.h"
/* Remove any fallthru edge from EV. Return true if an edge was removed. */
void
cleanup_tree_cfg_loop (void)
{
- bitmap changed_bbs = BITMAP_ALLOC (NULL);
+ bool changed = cleanup_tree_cfg ();
- cleanup_tree_cfg ();
-
- fix_loop_structure (current_loops, changed_bbs);
- calculate_dominance_info (CDI_DOMINATORS);
+ if (changed)
+ {
+ bitmap changed_bbs = BITMAP_ALLOC (NULL);
+ fix_loop_structure (current_loops, changed_bbs);
+ calculate_dominance_info (CDI_DOMINATORS);
- /* This usually does nothing. But sometimes parts of cfg that originally
- were inside a loop get out of it due to edge removal (since they
- become unreachable by back edges from latch). */
- rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
+ /* This usually does nothing. But sometimes parts of cfg that originally
+ were inside a loop get out of it due to edge removal (since they
+ become unreachable by back edges from latch). */
+ rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
- BITMAP_FREE (changed_bbs);
+ BITMAP_FREE (changed_bbs);
#ifdef ENABLE_CHECKING
- verify_loop_structure (current_loops);
+ verify_loop_structure (current_loops);
#endif
+ scev_reset ();
+ }
}
/* Merge the PHI nodes at BB into those at BB's sole successor. */
extern bool may_propagate_copy (tree, tree);
extern bool may_propagate_copy_into_asm (tree);
+/* Affine iv. */
+
+typedef struct
+{
+ /* Iv = BASE + STEP * i. */
+ tree base, step;
+
+ /* True if this iv does not overflow. */
+ bool no_overflow;
+} affine_iv;
+
/* Description of number of iterations of a loop. All the expressions inside
the structure can be evaluated at the end of the loop's preheader
(and due to ssa form, also anywhere inside the body of the loop). */
MAX_SIGNED_INT. However if the (n <= 0) assumption
is eliminated (by looking at the guard on entry of
the loop), then the information would be lost. */
+
+ /* The simplified shape of the exit condition. The loop exits if
+ CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
+ LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
+ LE_EXPR and negative if CMP is GE_EXPR. This information is used
+ by loop unrolling. */
+ affine_iv control;
+ tree bound;
+ enum tree_code cmp;
};
/* In tree-vectorizer.c */
void tree_ssa_unswitch_loops (struct loops *);
void canonicalize_induction_variables (struct loops *);
void tree_unroll_loops_completely (struct loops *, bool);
+void tree_ssa_prefetch_arrays (struct loops *);
void remove_empty_loops (struct loops *);
void tree_ssa_iv_optimize (struct loops *);
tree expand_simple_operations (tree);
void substitute_in_loop_info (struct loop *, tree, tree);
edge single_dom_exit (struct loop *);
+bool can_unroll_loop_p (struct loop *loop, unsigned factor,
+ struct tree_niter_desc *niter);
+void tree_unroll_loop (struct loops *, struct loop *, unsigned,
+ edge, struct tree_niter_desc *);
/* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block);
typedef struct temp_expr_table_d
{
var_map map;
- void **version_info;
+ void **version_info;
+ bitmap *expr_vars;
value_expr_p *partition_dep_list;
bitmap replaceable;
bool saw_replaceable;
t->map = map;
t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
+ t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
t->partition_dep_list = XCNEWVEC (value_expr_p,
num_var_partitions (map) + 1);
{
value_expr_p p;
tree *ret = NULL;
+ unsigned i;
#ifdef ENABLE_CHECKING
unsigned x;
BITMAP_FREE (t->partition_in_use);
BITMAP_FREE (t->replaceable);
+ for (i = 0; i <= num_ssa_names; i++)
+ if (t->expr_vars[i])
+ BITMAP_FREE (t->expr_vars[i]);
+ free (t->expr_vars);
+
free (t->partition_dep_list);
if (t->saw_replaceable)
ret = (tree *)t->version_info;
static bool
check_replaceable (temp_expr_table_p tab, tree stmt)
{
- tree var, def;
+ tree var, def, basevar;
int version;
var_map map = tab->map;
ssa_op_iter iter;
tree call_expr;
+ bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
if (TREE_CODE (stmt) != MODIFY_EXPR)
return false;
}
version = SSA_NAME_VERSION (def);
+ basevar = SSA_NAME_VAR (def);
+ bitmap_set_bit (def_vars, DECL_UID (basevar));
/* Add this expression to the dependency list for each use partition. */
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
{
add_dependance (tab, version, var);
+
+ use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
+ if (use_vars)
+ bitmap_ior_into (def_vars, use_vars);
}
+ tab->expr_vars[version] = def_vars;
/* If there are VUSES, add a dependence on virtual defs. */
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
{
block_stmt_iterator bsi;
- tree stmt, def;
+ tree stmt, def, use;
stmt_ann_t ann;
int partition;
var_map map = tab->map;
ann = stmt_ann (stmt);
/* Determine if this stmt finishes an existing expression. */
- FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{
- if (tab->version_info[SSA_NAME_VERSION (def)])
+ unsigned ver = SSA_NAME_VERSION (use);
+
+ if (tab->version_info[ver])
{
bool same_root_var = false;
- tree def2;
ssa_op_iter iter2;
+ bitmap vars = tab->expr_vars[ver];
/* See if the root variables are the same. If they are, we
do not want to do the replacement to avoid problems with
code size, see PR tree-optimization/17549. */
- FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
- if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
- {
- same_root_var = true;
- break;
- }
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
+ {
+ if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
+ {
+ same_root_var = true;
+ break;
+ }
+ }
/* Mark expression as replaceable unless stmt is volatile
or DEF sets the same root variable as STMT. */
if (!ann->has_volatile_ops && !same_root_var)
- mark_replaceable (tab, def);
+ mark_replaceable (tab, use);
else
- finish_expr (tab, SSA_NAME_VERSION (def), false);
+ finish_expr (tab, ver, false);
}
}
extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize;
extern struct tree_opt_pass pass_complete_unroll;
+extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_tree_loop_done;
extern struct tree_opt_pass pass_ch;
extern void scev_analysis (void);
void scev_const_prop (void);
-/* Affine iv. */
-
-typedef struct
-{
- /* Iv = BASE + STEP * i. */
- tree base, step;
-
- /* True if this iv does not overflow. */
- bool no_overflow;
-} affine_iv;
-
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */
#include "tree-pass.h"
#include "cfglayout.h"
#include "tree-scalar-evolution.h"
+#include "params.h"
/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
It is expected that neither BASE nor STEP are shared with other expressions
return true;
}
+
+/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
+
+static tree
+build_if_stmt (tree cond, tree then_label, tree else_label)
+{
+ return build3 (COND_EXPR, void_type_node,
+ cond,
+ build1 (GOTO_EXPR, void_type_node, then_label),
+ build1 (GOTO_EXPR, void_type_node, else_label));
+}
+
+/* Returns true if we can unroll LOOP FACTOR times. Number
+ of iterations of the loop is returned in NITER. */
+
+bool
+can_unroll_loop_p (struct loop *loop, unsigned factor,
+ struct tree_niter_desc *niter)
+{
+ edge exit;
+
+ /* Check whether unrolling is possible. We only want to unroll loops
+ for that we are able to determine number of iterations. We also
+ want to split the extra iterations of the loop from its end,
+ therefore we require that the loop has precisely one
+ exit. */
+
+ exit = single_dom_exit (loop);
+ if (!exit)
+ return false;
+
+ if (!number_of_iterations_exit (loop, exit, niter, false)
+ || niter->cmp == ERROR_MARK)
+ return false;
+
+ /* And of course, we must be able to duplicate the loop. */
+ if (!can_duplicate_loop_p (loop))
+ return false;
+
+ /* The final loop should be small enough. */
+ if (tree_num_loop_insns (loop) * factor
+ > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
+ return false;
+
+ return true;
+}
+
+/* Determines the conditions that control execution of LOOP unrolled FACTOR
+ times. DESC is number of iterations of LOOP. ENTER_COND is set to
+ condition that must be true if the main loop can be entered.
+ EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
+ how the exit from the unrolled loop should be controlled. */
+
+static void
+determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
+ unsigned factor, tree *enter_cond,
+ tree *exit_base, tree *exit_step,
+ enum tree_code *exit_cmp, tree *exit_bound)
+{
+ tree stmts;
+ tree base = desc->control.base;
+ tree step = desc->control.step;
+ tree bound = desc->bound;
+ tree type = TREE_TYPE (base);
+ tree bigstep, delta;
+ tree min = lower_bound_in_type (type, type);
+ tree max = upper_bound_in_type (type, type);
+ enum tree_code cmp = desc->cmp;
+ tree cond = boolean_true_node, assum;
+
+ *enter_cond = boolean_false_node;
+ *exit_base = NULL_TREE;
+ *exit_step = NULL_TREE;
+ *exit_cmp = ERROR_MARK;
+ *exit_bound = NULL_TREE;
+ gcc_assert (cmp != ERROR_MARK);
+
+ /* We only need to be correct when we answer question
+ "Do at least FACTOR more iterations remain?" in the unrolled loop.
+ Thus, transforming BASE + STEP * i <> BOUND to
+ BASE + STEP * i < BOUND is ok. */
+ if (cmp == NE_EXPR)
+ {
+ if (tree_int_cst_sign_bit (step))
+ cmp = GT_EXPR;
+ else
+ cmp = LT_EXPR;
+ }
+ else if (cmp == LT_EXPR)
+ {
+ gcc_assert (!tree_int_cst_sign_bit (step));
+ }
+ else if (cmp == GT_EXPR)
+ {
+ gcc_assert (tree_int_cst_sign_bit (step));
+ }
+ else
+ gcc_unreachable ();
+
+ /* The main body of the loop may be entered iff:
+
+ 1) desc->may_be_zero is false.
+ 2) it is possible to check that there are at least FACTOR iterations
+ of the loop, i.e., BOUND - step * FACTOR does not overflow.
+ 3) # of iterations is at least FACTOR */
+
+ if (!zero_p (desc->may_be_zero))
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ invert_truthvalue (desc->may_be_zero),
+ cond);
+
+ bigstep = fold_build2 (MULT_EXPR, type, step,
+ build_int_cst_type (type, factor));
+ delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
+ if (cmp == LT_EXPR)
+ assum = fold_build2 (GE_EXPR, boolean_type_node,
+ bound,
+ fold_build2 (PLUS_EXPR, type, min, delta));
+ else
+ assum = fold_build2 (LE_EXPR, boolean_type_node,
+ bound,
+ fold_build2 (PLUS_EXPR, type, max, delta));
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+ bound = fold_build2 (MINUS_EXPR, type, bound, delta);
+ assum = fold_build2 (cmp, boolean_type_node, base, bound);
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+ cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ /* cond now may be a gimple comparison, which would be OK, but also any
+ other gimple rhs (say a && b). In this case we need to force it to
+ operand. */
+ if (!is_gimple_condexpr (cond))
+ {
+ cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ }
+ *enter_cond = cond;
+
+ base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+
+ *exit_base = base;
+ *exit_step = bigstep;
+ *exit_cmp = cmp;
+ *exit_bound = bound;
+}
+
+/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes
+ number of iterations of LOOP. EXIT is the exit of the loop to that
+ DESC corresponds.
+
+ If N is number of iterations of the loop and MAY_BE_ZERO is the condition
+ under that loop exits in the first iteration even if N != 0,
+
+ while (1)
+ {
+ x = phi (init, next);
+
+ pre;
+ if (st)
+ break;
+ post;
+ }
+
+ becomes (with possibly the exit conditions formulated a bit differently,
+ avoiding the need to create a new iv):
+
+ if (MAY_BE_ZERO || N < FACTOR)
+ goto rest;
+
+ do
+ {
+ x = phi (init, next);
+
+ pre;
+ post;
+ pre;
+ post;
+ ...
+ pre;
+ post;
+ N -= FACTOR;
+
+ } while (N >= FACTOR);
+
+ rest:
+ init' = phi (init, x);
+
+ while (1)
+ {
+ x = phi (init', next);
+
+ pre;
+ if (st)
+ break;
+ post;
+ } */
+
+void
+tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
+ edge exit, struct tree_niter_desc *desc)
+{
+ tree dont_exit, exit_if, ctr_before, ctr_after;
+ tree enter_main_cond, exit_base, exit_step, exit_bound;
+ enum tree_code exit_cmp;
+ tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
+ struct loop *new_loop;
+ basic_block rest, exit_bb;
+ edge old_entry, new_entry, old_latch, precond_edge, new_exit;
+ edge nonexit, new_nonexit;
+ block_stmt_iterator bsi;
+ use_operand_p op;
+ bool ok;
+ unsigned est_niter;
+ sbitmap wont_exit;
+
+ est_niter = expected_loop_iterations (loop);
+ determine_exit_conditions (loop, desc, factor,
+ &enter_main_cond, &exit_base, &exit_step,
+ &exit_cmp, &exit_bound);
+
+ new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
+ gcc_assert (new_loop != NULL);
+ update_ssa (TODO_update_ssa);
+
+ /* Unroll the loop and remove the old exits. */
+ dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
+ ? boolean_false_node
+ : boolean_true_node);
+ if (exit == EDGE_SUCC (exit->src, 0))
+ nonexit = EDGE_SUCC (exit->src, 1);
+ else
+ nonexit = EDGE_SUCC (exit->src, 0);
+ nonexit->probability = REG_BR_PROB_BASE;
+ exit->probability = 0;
+ nonexit->count += exit->count;
+ exit->count = 0;
+ exit_if = last_stmt (exit->src);
+ COND_EXPR_COND (exit_if) = dont_exit;
+ update_stmt (exit_if);
+
+ wont_exit = sbitmap_alloc (factor);
+ sbitmap_ones (wont_exit);
+ ok = tree_duplicate_loop_to_header_edge
+ (loop, loop_latch_edge (loop), loops, factor - 1,
+ wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
+ free (wont_exit);
+ gcc_assert (ok);
+ update_ssa (TODO_update_ssa);
+
+ /* Prepare the cfg and update the phi nodes. */
+ rest = loop_preheader_edge (new_loop)->src;
+ precond_edge = single_pred_edge (rest);
+ loop_split_edge_with (loop_latch_edge (loop), NULL);
+ exit_bb = single_pred (loop->latch);
+
+ new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
+ new_exit->count = loop_preheader_edge (loop)->count;
+ est_niter = est_niter / factor + 1;
+ new_exit->probability = REG_BR_PROB_BASE / est_niter;
+
+ new_nonexit = single_pred_edge (loop->latch);
+ new_nonexit->flags = EDGE_TRUE_VALUE;
+ new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+
+ old_entry = loop_preheader_edge (loop);
+ new_entry = loop_preheader_edge (new_loop);
+ old_latch = loop_latch_edge (loop);
+ for (phi_old_loop = phi_nodes (loop->header),
+ phi_new_loop = phi_nodes (new_loop->header);
+ phi_old_loop;
+ phi_old_loop = PHI_CHAIN (phi_old_loop),
+ phi_new_loop = PHI_CHAIN (phi_new_loop))
+ {
+ init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
+ op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
+ gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
+ next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
+
+ /* Prefer using original variable as a base for the new ssa name.
+ This is necessary for virtual ops, and useful in order to avoid
+ losing debug info for real ops. */
+ if (TREE_CODE (next) == SSA_NAME)
+ var = SSA_NAME_VAR (next);
+ else if (TREE_CODE (init) == SSA_NAME)
+ var = SSA_NAME_VAR (init);
+ else
+ {
+ var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
+ add_referenced_tmp_var (var);
+ }
+
+ new_init = make_ssa_name (var, NULL_TREE);
+ phi_rest = create_phi_node (new_init, rest);
+ SSA_NAME_DEF_STMT (new_init) = phi_rest;
+
+ add_phi_arg (phi_rest, init, precond_edge);
+ add_phi_arg (phi_rest, next, new_exit);
+ SET_USE (op, new_init);
+ }
+
+ /* Finally create the new counter for number of iterations and add the new
+ exit instruction. */
+ bsi = bsi_last (exit_bb);
+ create_iv (exit_base, exit_step, NULL_TREE, loop,
+ &bsi, true, &ctr_before, &ctr_after);
+ exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
+ exit_bound),
+ tree_block_label (loop->latch),
+ tree_block_label (rest));
+ bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
+
+ verify_flow_info ();
+ verify_dominators (CDI_DOMINATORS);
+ verify_loop_structure (loops);
+ verify_loop_closed_ssa ();
+}
tree niter_type = unsigned_type_for (type);
tree s, c, d, bits, assumption, tmp, bound;
+ niter->control = *iv;
+ niter->bound = final;
+ niter->cmp = NE_EXPR;
+
/* Rearrange the terms so that we get inequality s * i <> c, with s
positive. Also cast everything to the unsigned type. */
if (tree_int_cst_sign_bit (iv->step))
tree niter_type = unsigned_type_for (type);
tree delta, step, s;
+ if (nonzero_p (iv0->step))
+ {
+ niter->control = *iv0;
+ niter->cmp = LT_EXPR;
+ niter->bound = iv1->base;
+ }
+ else
+ {
+ niter->control = *iv1;
+ niter->cmp = GT_EXPR;
+ niter->bound = iv0->base;
+ }
+
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
niter->niter = NULL_TREE;
niter->additional_info = boolean_true_node;
+ niter->bound = NULL_TREE;
+ niter->cmp = ERROR_MARK;
+
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
the control variable is on lhs. */
if (code == GE_EXPR || code == GT_EXPR
0 /* letter */
};
+/* Prefetching. */
+
+static void
+tree_ssa_loop_prefetch (void)
+{
+ if (!current_loops)
+ return;
+
+ tree_ssa_prefetch_arrays (current_loops);
+}
+
+static bool
+gate_tree_ssa_loop_prefetch (void)
+{
+ return flag_prefetch_loop_arrays == 1;
+}
+
+struct tree_opt_pass pass_loop_prefetch =
+{
+ "prefetch", /* name */
+ gate_tree_ssa_loop_prefetch, /* gate */
+ tree_ssa_loop_prefetch, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_PREFETCH, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
+ 0 /* letter */
+};
+
/* Induction variable optimizations. */
static void