#include "tree-pass.h"
/*************************************************************************
- Simple Loop Peeling Utilities
- *************************************************************************/
-static void slpeel_update_phis_for_duplicate_loop
- (struct loop *, struct loop *, bool after);
-static void slpeel_update_phi_nodes_for_guard1
- (edge, struct loop *, bool, basic_block *, bitmap *);
-static void slpeel_update_phi_nodes_for_guard2
- (edge, struct loop *, bool, basic_block *);
-static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
-
-static void rename_use_op (use_operand_p);
-static void rename_variables_in_bb (basic_block);
-static void rename_variables_in_loop (struct loop *);
-
-/*************************************************************************
General Vectorization Utilities
*************************************************************************/
-static void vect_set_dump_settings (void);
/* vect_dump will be set to stderr or dump_file if exist. */
FILE *vect_dump;
/* Renames variables in new generated LOOP. */
-static void
+void
rename_variables_in_loop (struct loop *loop)
{
unsigned i;
/* Given LOOP this function generates a new copy of it and puts it
on E which is either the entry or exit of LOOP. */
-static struct loop *
+struct loop *
slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
{
struct loop *new_loop;
if (at_exit) /* Add the loop copy at exit. */
{
redirect_edge_and_branch_force (e, new_loop->header);
+ PENDING_STMT (e) = NULL;
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
if (was_imm_dom)
set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
new_exit_e = EDGE_SUCC (new_loop->header, 1);
redirect_edge_and_branch_force (new_exit_e, loop->header);
+ PENDING_STMT (new_exit_e) = NULL;
set_immediate_dominator (CDI_DOMINATORS, loop->header,
new_exit_e->src);
}
redirect_edge_and_branch_force (entry_e, new_loop->header);
+ PENDING_STMT (entry_e) = NULL;
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
}
static edge
slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
- basic_block dom_bb)
+ basic_block dom_bb)
{
block_stmt_iterator bsi;
edge new_e, enter_e;
tree cond_stmt;
+ tree gimplify_stmt_list;
enter_e = EDGE_SUCC (guard_bb, 0);
enter_e->flags &= ~EDGE_FALLTHRU;
enter_e->flags |= EDGE_FALSE_VALUE;
bsi = bsi_last (guard_bb);
+ cond =
+ force_gimple_operand (cond, &gimplify_stmt_list, true,
+ NULL_TREE);
cond_stmt = build3 (COND_EXPR, void_type_node, cond,
NULL_TREE, NULL_TREE);
+ if (gimplify_stmt_list)
+ bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+ bsi = bsi_last (guard_bb);
bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+
/* Add new edge to connect guard block to the merge/loop-exit block. */
new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
}
#endif
+/* If the run time cost model check determines that vectorization is
+ not profitable and hence scalar loop should be generated then set
+ FIRST_NITERS to prologue peeled iterations. This will allow all the
+ iterations to be executed in the prologue peeled scalar loop. */
+
+void
+set_prologue_iterations (basic_block bb_before_first_loop,
+ tree first_niters,
+ struct loop *loop,
+ unsigned int th)
+{
+ edge e;
+ basic_block cond_bb, then_bb;
+ tree var, prologue_after_cost_adjust_name, stmt;
+ block_stmt_iterator bsi;
+ tree newphi;
+ edge e_true, e_false, e_fallthru;
+ tree cond_stmt;
+ tree gimplify_stmt_list;
+ tree cost_pre_condition = NULL_TREE;
+ tree scalar_loop_iters =
+ unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
+
+ e = single_pred_edge (bb_before_first_loop);
+ cond_bb = split_edge(e);
+
+ e = single_pred_edge (bb_before_first_loop);
+ then_bb = split_edge(e);
+ set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+
+ e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
+ EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
+
+ e_true = EDGE_PRED (then_bb, 0);
+ e_true->flags &= ~EDGE_FALLTHRU;
+ e_true->flags |= EDGE_TRUE_VALUE;
+
+ e_fallthru = EDGE_SUCC (then_bb, 0);
+
+ cost_pre_condition =
+ build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
+ build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+ cost_pre_condition =
+ force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
+ true, NULL_TREE);
+ cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
+ NULL_TREE, NULL_TREE);
+
+ bsi = bsi_last (cond_bb);
+ if (gimplify_stmt_list)
+ bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+ bsi = bsi_last (cond_bb);
+ bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+
+ var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
+ "prologue_after_cost_adjust");
+ add_referenced_var (var);
+ prologue_after_cost_adjust_name =
+ force_gimple_operand (scalar_loop_iters, &stmt, false, var);
+
+ bsi = bsi_last (then_bb);
+ if (stmt)
+ bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+
+ newphi = create_phi_node (var, bb_before_first_loop);
+ add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
+ add_phi_arg (newphi, first_niters, e_false);
+
+ first_niters = PHI_RESULT (newphi);
+}
+
+
/* Function slpeel_tree_peel_loop_to_edge.
Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
that is placed on the entry (exit) edge E of LOOP. After this transformation
we have two loops one after the other - first-loop iterates FIRST_NITERS
times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
+ If the cost model indicates that it is profitable to emit a scalar
+ loop instead of the vector one, then the prolog (epilog) loop will iterate
+ for the entire unchanged scalar iterations of the loop.
Input:
- LOOP: the loop to be peeled.
for updating the loop bound of the first-loop to FIRST_NITERS. If it
is false, the caller of this function may want to take care of this
(this can be useful if we don't want new stmts added to first-loop).
+ - TH: cost model profitability threshold of iterations for vectorization.
+ - CHECK_PROFITABILITY: specify whether cost model check has not occured
+ during versioning and hence needs to occur during
+ prologue generation or whether cost model check
+ has not occured during prologue generation and hence
+ needs to occur during epilogue generation.
+
Output:
The function returns a pointer to the new loop-copy, or NULL if it failed
slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge e, tree first_niters,
tree niters, bool update_first_loop_count,
- unsigned int th)
+ unsigned int th, bool check_profitability)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
- tree pre_condition;
+ tree pre_condition = NULL_TREE;
bitmap definitions;
basic_block bb_before_second_loop, bb_after_second_loop;
basic_block bb_before_first_loop;
basic_block new_exit_bb;
edge exit_e = single_exit (loop);
LOC loop_loc;
+ tree cost_pre_condition = NULL_TREE;
if (!slpeel_can_duplicate_loop_p (loop, e))
return NULL;
rename_variables_in_loop (new_loop);
- /* 2. Add the guard that controls whether the first loop is executed.
- Resulting CFG would be:
+ /* 2. Add the guard code in one of the following ways:
- bb_before_first_loop:
- if (FIRST_NITERS == 0) GOTO bb_before_second_loop
- GOTO first-loop
+ 2.a Add the guard that controls whether the first loop is executed.
+ This occurs when this function is invoked for prologue or epilogiue
+ generation and when the cost model check can be done at compile time.
- first_loop:
- do {
- } while ...
+ Resulting CFG would be:
- bb_before_second_loop:
+ bb_before_first_loop:
+ if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+ GOTO first-loop
- second_loop:
- do {
- } while ...
+ first_loop:
+ do {
+ } while ...
- orig_exit_bb:
- */
+ bb_before_second_loop:
+
+ second_loop:
+ do {
+ } while ...
+
+ orig_exit_bb:
+
+ 2.b Add the cost model check that allows the prologue
+ to iterate for the entire unchanged scalar
+ iterations of the loop in the event that the cost
+ model indicates that the scalar loop is more
+ profitable than the vector one. This occurs when
+ this function is invoked for prologue generation
+ and the cost model check needs to be done at run
+ time.
+
+ Resulting CFG after prologue peeling would be:
+
+ if (scalar_loop_iterations <= th)
+ FIRST_NITERS = scalar_loop_iterations
+
+ bb_before_first_loop:
+ if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+ GOTO first-loop
+
+ first_loop:
+ do {
+ } while ...
+
+ bb_before_second_loop:
+
+ second_loop:
+ do {
+ } while ...
+
+ orig_exit_bb:
+
+ 2.c Add the cost model check that allows the epilogue
+ to iterate for the entire unchanged scalar
+ iterations of the loop in the event that the cost
+ model indicates that the scalar loop is more
+ profitable than the vector one. This occurs when
+ this function is invoked for epilogue generation
+ and the cost model check needs to be done at run
+ time.
+
+ Resulting CFG after prologue peeling would be:
+
+ bb_before_first_loop:
+ if ((scalar_loop_iterations <= th)
+ ||
+ FIRST_NITERS == 0) GOTO bb_before_second_loop
+ GOTO first-loop
+
+ first_loop:
+ do {
+ } while ...
+
+ bb_before_second_loop:
+
+ second_loop:
+ do {
+ } while ...
+
+ orig_exit_bb:
+ */
bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
bb_before_second_loop = split_edge (single_exit (first_loop));
- pre_condition =
- fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), th));
+ /* Epilogue peeling. */
+ if (!update_first_loop_count)
+ {
+ pre_condition =
+ fold_build2 (LE_EXPR, boolean_type_node, first_niters,
+ build_int_cst (TREE_TYPE (first_niters), 0));
+ if (check_profitability)
+ {
+ tree scalar_loop_iters
+ = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
+ (loop_vec_info_for_loop (loop)));
+ cost_pre_condition =
+ build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
+ build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+
+ pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+ cost_pre_condition, pre_condition);
+ }
+ }
+
+ /* Prologue peeling. */
+ else
+ {
+ if (check_profitability)
+ set_prologue_iterations (bb_before_first_loop, first_niters,
+ loop, th);
+
+ pre_condition =
+ fold_build2 (LE_EXPR, boolean_type_node, first_niters,
+ build_int_cst (TREE_TYPE (first_niters), 0));
+ }
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_before_second_loop, bb_before_first_loop);
STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
+ STMT_SLP_TYPE (res) = 0;
DR_GROUP_FIRST_DR (res) = NULL_TREE;
DR_GROUP_NEXT_DR (res) = NULL_TREE;
DR_GROUP_SIZE (res) = 0;
}
+/* Free stmt vectorization related info. */
+
+void
+free_stmt_vec_info (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (!stmt_info)
+ return;
+
+ VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
+ free (stmt_info);
+ set_stmt_info (stmt_ann (stmt), NULL);
+}
+
+
/* Function bb_in_loop_p
Used as predicate for dfs order traversal of the loop bbs. */
LOOP_VINFO_BBS (res) = bbs;
LOOP_VINFO_NITERS (res) = NULL;
+ LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
LOOP_VINFO_MAY_ALIAS_DDRS (res) =
VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
-
+ LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
+ LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
return res;
}
int nbbs;
block_stmt_iterator si;
int j;
+ VEC (slp_instance, heap) *slp_instances;
+ slp_instance instance;
if (!loop_vinfo)
return;
{
basic_block bb = bbs[j];
tree phi;
- stmt_vec_info stmt_info;
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- {
- stmt_ann_t ann = stmt_ann (phi);
-
- stmt_info = vinfo_for_stmt (phi);
- free (stmt_info);
- set_stmt_info (ann, NULL);
- }
+ free_stmt_vec_info (phi);
for (si = bsi_start (bb); !bsi_end_p (si); )
{
tree stmt = bsi_stmt (si);
- stmt_ann_t ann = stmt_ann (stmt);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
if (stmt_info)
}
/* Free stmt_vec_info. */
- VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
- free (stmt_info);
- set_stmt_info (ann, NULL);
+ free_stmt_vec_info (stmt);
/* Remove dead "pattern stmts". */
if (remove_stmt_p)
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
+ slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
+ vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
+ VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+ VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
free (loop_vinfo);
loop->aux = NULL;
on ALIGNMENT bit boundary. */
bool
-vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
+vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
{
if (TREE_CODE (decl) != VAR_DECL)
return false;
if (TREE_STATIC (decl))
return (alignment <= MAX_OFILE_ALIGNMENT);
else
- /* This is not 100% correct. The absolute correct stack alignment
- is STACK_BOUNDARY. We're supposed to hope, but not assume, that
- PREFERRED_STACK_BOUNDARY is honored by all translation units.
- However, until someone implements forced stack alignment, SSE
- isn't really usable without this. */
- return (alignment <= PREFERRED_STACK_BOUNDARY);
+ /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
+ correct until someone implements forced stack alignment. */
+ return (alignment <= STACK_BOUNDARY);
}
iterations, it is *not* guaranteed that is will remain the same throughout
the execution of the inner-loop. This is because the inner-loop advances
with the original scalar step (and not in steps of VS). If the inner-loop
- step happens to be a multiple of VS, then the misalignment remaines fixed
+ step happens to be a multiple of VS, then the misalignment remains fixed
and we can use the optimized realignment scheme. For example:
for (i=0; i<N; i++)
*dt = vect_invariant_def;
return true;
}
-
+
+ if (TREE_CODE (operand) == PAREN_EXPR)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "non-associatable copy.");
+ operand = TREE_OPERAND (operand, 0);
+ }
if (TREE_CODE (operand) != SSA_NAME)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
break;
- case NOP_EXPR:
- case CONVERT_EXPR:
+ CASE_CONVERT:
if (BYTES_BIG_ENDIAN)
{
c1 = VEC_UNPACK_HI_EXPR;
if (code == FIX_TRUNC_EXPR)
{
/* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type);
- optab2 = optab_for_tree_code (c2, type);
+ optab1 = optab_for_tree_code (c1, type, optab_default);
+ optab2 = optab_for_tree_code (c2, type, optab_default);
}
else
{
- optab1 = optab_for_tree_code (c1, vectype);
- optab2 = optab_for_tree_code (c2, vectype);
+ optab1 = optab_for_tree_code (c1, vectype, optab_default);
+ optab2 = optab_for_tree_code (c2, vectype, optab_default);
}
if (!optab1 || !optab2)
bool
supportable_narrowing_operation (enum tree_code code,
- tree stmt, tree vectype,
+ const_tree stmt, const_tree vectype,
enum tree_code *code1)
{
enum machine_mode vec_mode;
switch (code)
{
- case NOP_EXPR:
- case CONVERT_EXPR:
+ CASE_CONVERT:
c1 = VEC_PACK_TRUNC_EXPR;
break;
if (code == FIX_TRUNC_EXPR)
/* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type);
+ optab1 = optab_for_tree_code (c1, type, optab_default);
else
- optab1 = optab_for_tree_code (c1, vectype);
+ optab1 = optab_for_tree_code (c1, vectype, optab_default);
if (!optab1)
return false;
/* Function vect_is_simple_reduction
- Detect a cross-iteration def-use cucle that represents a simple
+ Detect a cross-iteration def-use cycle that represents a simple
reduction computation. We look for the following pattern:
loop_header:
outer-loop vectorization is safe. */
/* CHECKME: check for !flag_finite_math_only too? */
- if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations
+ if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
&& !nested_in_vect_loop_p (vect_loop, def_stmt))
{
/* Changing the order of operations changes the semantics. */
return flag_section_anchors && flag_tree_vectorize;
}
-struct tree_opt_pass pass_ipa_increase_alignment =
+struct simple_ipa_opt_pass pass_ipa_increase_alignment =
{
+ {
+ SIMPLE_IPA_PASS,
"increase_alignment", /* name */
gate_increase_alignment, /* gate */
increase_alignment, /* execute */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- 0, /* todo_flags_finish */
- 0 /* letter */
+ 0 /* todo_flags_finish */
+ }
};