/* Vectorizer Specific Loop Manipulations
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
- Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
+ Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
#include "ggc.h"
#include "tree.h"
#include "basic-block.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "cfgloop.h"
#include "cfglayout.h"
-#include "expr.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "langhooks.h"
free (bbs);
}
+typedef struct
+{
+ tree from, to;
+ basic_block bb;
+} adjust_info;
+
+DEF_VEC_O(adjust_info);
+DEF_VEC_ALLOC_O_STACK(adjust_info);
+#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
+
+/* A stack of values to be adjusted in debug stmts. We have to
+ process them LIFO, so that the closest substitution applies. If we
+ processed them FIFO, without the stack, we might substitute uses
+ with a PHI DEF that would soon become non-dominant, and when we got
+ to the suitable one, it wouldn't have anything to substitute any
+ more. */
+static VEC(adjust_info, stack) *adjust_vec;
+
+/* Adjust any debug stmts that referenced AI->from values to use the
+ loop-closed AI->to, if the references are dominated by AI->bb and
+ not by the definition of AI->from. */
+
+static void
+adjust_debug_stmts_now (adjust_info *ai)
+{
+ basic_block bbphi = ai->bb;
+ tree orig_def = ai->from;
+ tree new_def = ai->to;
+ imm_use_iterator imm_iter;
+ gimple stmt;
+ basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
+
+ gcc_assert (dom_info_available_p (CDI_DOMINATORS));
+
+ /* Adjust any debug stmts that held onto non-loop-closed
+ references. */
+ FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
+ {
+ use_operand_p use_p;
+ basic_block bbuse;
+
+ if (!is_gimple_debug (stmt))
+ continue;
+
+ gcc_assert (gimple_debug_bind_p (stmt));
+
+ bbuse = gimple_bb (stmt);
+
+ if ((bbuse == bbphi
+ || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
+ && !(bbuse == bbdef
+ || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
+ {
+ if (new_def)
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, new_def);
+ else
+ {
+ gimple_debug_bind_reset_value (stmt);
+ update_stmt (stmt);
+ }
+ }
+ }
+}
+
+/* Adjust debug stmts as scheduled before. */
+
+static void
+adjust_vec_debug_stmts (void)
+{
+ if (!MAY_HAVE_DEBUG_STMTS)
+ return;
+
+ gcc_assert (adjust_vec);
+
+ while (!VEC_empty (adjust_info, adjust_vec))
+ {
+ adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
+ VEC_pop (adjust_info, adjust_vec);
+ }
+
+ VEC_free (adjust_info, stack, adjust_vec);
+}
+
+/* Adjust any debug stmts that referenced FROM values to use the
+ loop-closed TO, if the references are dominated by BB and not by
+ the definition of FROM. If adjust_vec is non-NULL, adjustments
+ will be postponed until adjust_vec_debug_stmts is called. */
+
+static void
+adjust_debug_stmts (tree from, tree to, basic_block bb)
+{
+ adjust_info ai;
+
+ if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
+ && SSA_NAME_VAR (from) != gimple_vop (cfun))
+ {
+ ai.from = from;
+ ai.to = to;
+ ai.bb = bb;
+
+ if (adjust_vec)
+ VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
+ else
+ adjust_debug_stmts_now (&ai);
+ }
+}
+
+/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
+ to adjust any debug stmts that referenced the old phi arg,
+ presumably non-loop-closed references left over from other
+ transformations. */
+
+static void
+adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
+{
+ tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
+
+ SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
+
+ if (MAY_HAVE_DEBUG_STMTS)
+ adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
+ gimple_bb (update_phi));
+}
+
/* Update the PHI nodes of NEW_LOOP.
/* An ordinary ssa name defined in the loop. */
add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
+ /* Drop any debug references outside the loop, if they would
+ become ill-formed SSA. */
+ adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
+
/* step 3 (case 1). */
if (!after)
{
gcc_assert (new_loop_exit_e == orig_entry_e);
- SET_PHI_ARG_DEF (phi_orig,
- new_loop_exit_e->dest_idx,
- new_ssa_name);
+ adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
}
}
}
!gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
gsi_next (&gsi_orig), gsi_next (&gsi_update))
{
- source_location loop_locus, guard_locus;;
+ source_location loop_locus, guard_locus;
orig_phi = gsi_stmt (gsi_orig);
update_phi = gsi_stmt (gsi_update);
/* 1.3. Update phi in successor block. */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
|| PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
- SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
update_phi2 = new_phi;
/* 2.3. Update phi in successor of NEW_EXIT_BB: */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
- SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
+ PHI_RESULT (new_phi));
/* 2.4. Record the newly created name with set_current_def.
We want to find a name such that
/* 1.3. Update phi in successor block. */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
- SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
update_phi2 = new_phi;
/* 2.3. Update phi in successor of NEW_EXIT_BB: */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
- SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
+ PHI_RESULT (new_phi));
/** 3. Handle loop-closed-ssa-form phis for first loop **/
/* 3.4. Update phi in successor of GUARD_BB: */
gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
== guard_arg);
- SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (update_phi2, guard_edge,
+ PHI_RESULT (new_phi));
}
}
static void
set_prologue_iterations (basic_block bb_before_first_loop,
- tree first_niters,
+ tree *first_niters,
struct loop *loop,
unsigned int th)
{
newphi = create_phi_node (var, bb_before_first_loop);
add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
UNKNOWN_LOCATION);
- add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
+ add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
- first_niters = PHI_RESULT (newphi);
+ *first_niters = PHI_RESULT (newphi);
}
-
/* Function slpeel_tree_peel_loop_to_edge.
Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
static struct loop*
slpeel_tree_peel_loop_to_edge (struct loop *loop,
- edge e, tree first_niters,
+ edge e, tree *first_niters,
tree niters, bool update_first_loop_count,
unsigned int th, bool check_profitability,
tree cond_expr, gimple_seq cond_expr_stmt_list)
basic_block bb_before_first_loop;
basic_block bb_between_loops;
basic_block new_exit_bb;
+ gimple_stmt_iterator gsi;
edge exit_e = single_exit (loop);
LOC loop_loc;
tree cost_pre_condition = NULL_TREE;
the function tree_duplicate_bb is called. */
gimple_register_cfg_hooks ();
+ /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
+ in the exit bb and rename all the uses after the loop. This simplifies
+ the *guard[12] routines, which assume loop closed SSA form for all PHIs
+ (but normally loop closed SSA form doesn't require virtual PHIs to be
+ in the same form). Doing this early simplifies the checking what
+ uses should be renamed. */
+ for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+ {
+ gimple phi = gsi_stmt (gsi);
+ for (gsi = gsi_start_phis (exit_e->dest);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+ break;
+ if (gsi_end_p (gsi))
+ {
+ gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+ exit_e->dest);
+ tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
+ imm_use_iterator imm_iter;
+ gimple stmt;
+ tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)),
+ new_phi);
+ use_operand_p use_p;
+
+ add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
+ gimple_phi_set_result (new_phi, new_vop);
+ FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
+ if (stmt != new_phi && gimple_bb (stmt) != loop->header)
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, new_vop);
+ }
+ break;
+ }
/* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
Resulting CFG would be:
return NULL;
}
+ if (MAY_HAVE_DEBUG_STMTS)
+ {
+ gcc_assert (!adjust_vec);
+ adjust_vec = VEC_alloc (adjust_info, stack, 32);
+ }
+
if (e == exit_e)
{
/* NEW_LOOP was placed after LOOP. */
if (!update_first_loop_count)
{
pre_condition =
- fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), 0));
+ fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+ build_int_cst (TREE_TYPE (*first_niters), 0));
if (check_profitability)
{
tree scalar_loop_iters
loop, th);
pre_condition =
- fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), 0));
+ fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+ build_int_cst (TREE_TYPE (*first_niters), 0));
}
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_after_second_loop = split_edge (single_exit (second_loop));
pre_condition =
- fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
+ fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
/* 4. Make first-loop iterate FIRST_NITERS times, if requested.
*/
if (update_first_loop_count)
- slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
+ slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
BITMAP_FREE (definitions);
delete_update_ssa ();
+ adjust_vec_debug_stmts ();
+
return new_loop;
}
edge pe;
basic_block new_bb;
gimple_seq stmts;
- tree ni_name;
+ tree ni_name, ni_minus_gap_name;
tree var;
tree ratio_name;
tree ratio_mult_vf_name;
ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+ /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_name,
+ build_one_cst (TREE_TYPE (ni_name)));
+ if (!is_gimple_val (ni_minus_gap_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+ add_referenced_var (var);
+
+ stmts = NULL;
+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+ true, var);
+ if (cond_expr_stmt_list)
+ gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+ else
+ {
+ pe = loop_preheader_edge (loop);
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ }
+ }
+ else
+ ni_minus_gap_name = ni_name;
+
/* Create: ratio = ni >> log2(vf) */
- ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+ ni_minus_gap_name, log_vf);
if (!is_gimple_val (ratio_name))
{
var = create_tmp_var (TREE_TYPE (ni), "bnd");
!gsi_end_p (gsi) && !gsi_end_p (gsi1);
gsi_next (&gsi), gsi_next (&gsi1))
{
- tree access_fn = NULL;
- tree evolution_part;
tree init_expr;
tree step_expr, off;
tree type;
tree var, ni, ni_name;
gimple_stmt_iterator last_gsi;
+ stmt_vec_info stmt_info;
phi = gsi_stmt (gsi);
phi1 = gsi_stmt (gsi1);
}
/* Skip reduction phis. */
- if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+ stmt_info = vinfo_for_stmt (phi);
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc phi. skip.");
continue;
}
- access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
- gcc_assert (access_fn);
- /* We can end up with an access_fn like
- (short int) {(short unsigned int) i_49, +, 1}_1
- for further analysis we need to strip the outer cast but we
- need to preserve the original type. */
- type = TREE_TYPE (access_fn);
- STRIP_NOPS (access_fn);
- evolution_part =
- unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
- gcc_assert (evolution_part != NULL_TREE);
+ type = TREE_TYPE (gimple_phi_result (phi));
+ step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+ step_expr = unshare_expr (step_expr);
/* FORNOW: We do not support IVs whose evolution function is a polynomial
of degree >= 2 or exponential. */
- gcc_assert (!tree_is_chrec (evolution_part));
+ gcc_assert (!tree_is_chrec (step_expr));
- step_expr = evolution_part;
- init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
- loop->num));
- init_expr = fold_convert (type, init_expr);
+ init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
fold_convert (TREE_TYPE (step_expr), niters),
step_expr);
- if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
- ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
- init_expr,
- fold_convert (sizetype, off));
+ if (POINTER_TYPE_P (type))
+ ni = fold_build_pointer_plus (init_expr, off);
else
- ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
- init_expr,
- fold_convert (TREE_TYPE (init_expr), off));
+ ni = fold_build2 (PLUS_EXPR, type,
+ init_expr, fold_convert (type, off));
- var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
+ var = create_tmp_var (type, "tmp");
add_referenced_var (var);
last_gsi = gsi_last_bb (exit_bb);
true, GSI_SAME_STMT);
/* Fix phi expressions in the successor bb. */
- SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
+ adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
}
}
}
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
- ratio_mult_vf_name, ni_name, false,
+ &ratio_mult_vf_name, ni_name, false,
th, check_profitability,
cond_expr, cond_expr_stmt_list);
gcc_assert (new_loop);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree niters_type = TREE_TYPE (loop_niters);
- int step = 1;
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
- if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
- step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-
pe = loop_preheader_edge (loop);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
- int elem_misalign = byte_misalign / element_size;
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "known alignment = %d.", byte_misalign);
+ fprintf (vect_dump, "known peeling = %d.", npeel);
- iters = build_int_cst (niters_type,
- (((nelements - elem_misalign) & (nelements - 1)) / step));
+ iters = build_int_cst (niters_type, npeel);
}
else
{
gimple_seq new_stmts = NULL;
+ bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ tree offset = negative
+ ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
- &new_stmts, NULL_TREE, loop);
+ &new_stmts, offset, loop);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
- fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
+ fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
+ vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
/* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
- iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
+ if (negative)
+ iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
+ else
+ iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters);
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
- for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
vect_update_init_of_dr (dr, niters);
}
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
tree n_iters;
+ tree wide_prolog_niters;
struct loop *new_loop;
unsigned int th = 0;
int min_profitable_iters;
initialize_original_copy_tables ();
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
- niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
-
+ niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
+ ni_name);
/* Get profitability threshold for vectorized loop. */
min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
- niters_of_prolog_loop, ni_name, true,
+ &niters_of_prolog_loop, ni_name, true,
th, true, NULL_TREE, NULL);
gcc_assert (new_loop);
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
+ if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
+ wide_prolog_niters = niters_of_prolog_loop;
+ else
+ {
+ gimple_seq seq = NULL;
+ edge pe = loop_preheader_edge (loop);
+ tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
+ tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
+ add_referenced_var (var);
+ wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
+ var);
+ if (seq)
+ {
+ /* Insert stmt on loop preheader edge. */
+ basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
+ }
+
/* Update the init conditions of the access functions of all data refs. */
- vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
+ vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
/* After peeling we have to reset scalar evolution analyzer. */
scev_reset ();
/* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
of the first vector of the i'th data reference. */
- for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
{
gimple_seq new_stmt_list = NULL;
tree addr_base;
tree addr_tmp, addr_tmp_name;
tree or_tmp, new_or_tmp_name;
gimple addr_stmt, or_stmt;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ bool negative = tree_int_cst_compare
+ (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
+ tree offset = negative
+ ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
/* create: addr_tmp = (int)(address_of_first_vector) */
addr_base =
vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
- NULL_TREE, loop);
+ offset, loop);
if (new_stmt_list != NULL)
gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
Input:
DR: The data reference.
- VECT_FACTOR: vectorization factor.
+ LENGTH_FACTOR: segment length to consider.
Return an expression whose value is the size of segment which will be
accessed by DR. */
static tree
-vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
+vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
{
- tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
- DR_STEP (dr), vect_factor);
+ tree segment_length;
+
+ if (!compare_tree_int (DR_STEP (dr), 0))
+ segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
+ else
+ segment_length = size_binop (MULT_EXPR,
+ fold_convert (sizetype, DR_STEP (dr)),
+ fold_convert (sizetype, length_factor));
- if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_explicit_realign_optimized)
{
tree vector_size = TYPE_SIZE_UNIT
(STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
- segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
- segment_length, vector_size);
+ segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
}
- return fold_convert (sizetype, segment_length);
+ return segment_length;
}
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
VEC (ddr_p, heap) * may_alias_ddrs =
LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
- tree vect_factor =
- build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
ddr_p ddr;
unsigned int i;
- tree part_cond_expr;
+ tree part_cond_expr, length_factor;
/* Create expression
- ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
- || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
+ ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
+ || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
&&
...
&&
- ((store_ptr_n + store_segment_length_n) < load_ptr_n)
- || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
+ ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
+ || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
if (VEC_empty (ddr_p, may_alias_ddrs))
return;
- for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
{
struct data_reference *dr_a, *dr_b;
gimple dr_group_first_a, dr_group_first_b;
tree addr_base_a, addr_base_b;
tree segment_length_a, segment_length_b;
gimple stmt_a, stmt_b;
+ tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr));
- dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
+ dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
if (dr_group_first_a)
{
stmt_a = dr_group_first_a;
dr_b = DDR_B (ddr);
stmt_b = DR_STMT (DDR_B (ddr));
- dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
+ dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
if (dr_group_first_b)
{
stmt_b = dr_group_first_b;
vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
NULL_TREE, loop);
- segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
- segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
+ if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
+ length_factor = scalar_loop_iters;
+ else
+ length_factor = size_int (vect_factor);
+ segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
+ segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
}
+ seg_a_min = addr_base_a;
+ seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
+ if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
+ seg_a_min = seg_a_max, seg_a_max = addr_base_a;
+
+ seg_b_min = addr_base_b;
+ seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
+ if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
+ seg_b_min = seg_b_max, seg_b_max = addr_base_b;
part_cond_expr =
fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LT_EXPR, boolean_type_node,
- fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
- addr_base_a,
- segment_length_a),
- addr_base_b),
- fold_build2 (LT_EXPR, boolean_type_node,
- fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
- addr_base_b,
- segment_length_b),
- addr_base_a));
+ fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
+ fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
tree *cond_expr, gimple_seq *cond_expr_stmt_list)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- struct loop *nloop;
basic_block condition_bb;
gimple_stmt_iterator gsi, cond_exp_gsi;
basic_block merge_bb;
return;
initialize_original_copy_tables ();
- nloop = loop_version (loop, *cond_expr, &condition_bb,
- prob, prob, REG_BR_PROB_BASE - prob, true);
+ loop_version (loop, *cond_expr, &condition_bb,
+ prob, prob, REG_BR_PROB_BASE - prob, true);
free_original_copy_tables();
/* Loop versioning violates an assumption we try to maintain during
arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
add_phi_arg (new_phi, arg, new_exit_e,
gimple_phi_arg_location_from_edge (orig_phi, e));
- SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+ adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
}
/* End loop-exit-fixes after versioning. */