/* Vectorizer Specific Loop Manipulations
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
!gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
gsi_next (&gsi_orig), gsi_next (&gsi_update))
{
- source_location loop_locus, guard_locus;;
+ source_location loop_locus, guard_locus;
orig_phi = gsi_stmt (gsi_orig);
update_phi = gsi_stmt (gsi_update);
static void
set_prologue_iterations (basic_block bb_before_first_loop,
- tree first_niters,
+ tree *first_niters,
struct loop *loop,
unsigned int th)
{
newphi = create_phi_node (var, bb_before_first_loop);
add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
UNKNOWN_LOCATION);
- add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
+ add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
- first_niters = PHI_RESULT (newphi);
+ *first_niters = PHI_RESULT (newphi);
}
/* Function slpeel_tree_peel_loop_to_edge.
static struct loop*
slpeel_tree_peel_loop_to_edge (struct loop *loop,
- edge e, tree first_niters,
+ edge e, tree *first_niters,
tree niters, bool update_first_loop_count,
unsigned int th, bool check_profitability,
tree cond_expr, gimple_seq cond_expr_stmt_list)
basic_block bb_before_first_loop;
basic_block bb_between_loops;
basic_block new_exit_bb;
+ gimple_stmt_iterator gsi;
edge exit_e = single_exit (loop);
LOC loop_loc;
tree cost_pre_condition = NULL_TREE;
the function tree_duplicate_bb is called. */
gimple_register_cfg_hooks ();
+ /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
+ in the exit bb and rename all the uses after the loop. This simplifies
+ the *guard[12] routines, which assume loop closed SSA form for all PHIs
+ (but normally loop closed SSA form doesn't require virtual PHIs to be
+ in the same form). Doing this early simplifies the checking what
+ uses should be renamed. */
+ for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+ {
+ gimple phi = gsi_stmt (gsi);
+ for (gsi = gsi_start_phis (exit_e->dest);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+ break;
+ if (gsi_end_p (gsi))
+ {
+ gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+ exit_e->dest);
+ tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
+ imm_use_iterator imm_iter;
+ gimple stmt;
+ tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)),
+ new_phi);
+ use_operand_p use_p;
+
+ add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
+ gimple_phi_set_result (new_phi, new_vop);
+ FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
+ if (stmt != new_phi && gimple_bb (stmt) != loop->header)
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+ SET_USE (use_p, new_vop);
+ }
+ break;
+ }
/* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
Resulting CFG would be:
if (!update_first_loop_count)
{
pre_condition =
- fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), 0));
+ fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+ build_int_cst (TREE_TYPE (*first_niters), 0));
if (check_profitability)
{
tree scalar_loop_iters
loop, th);
pre_condition =
- fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), 0));
+ fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+ build_int_cst (TREE_TYPE (*first_niters), 0));
}
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_after_second_loop = split_edge (single_exit (second_loop));
pre_condition =
- fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
+ fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
/* 4. Make first-loop iterate FIRST_NITERS times, if requested.
*/
if (update_first_loop_count)
- slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
+ slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
BITMAP_FREE (definitions);
delete_update_ssa ();
!gsi_end_p (gsi) && !gsi_end_p (gsi1);
gsi_next (&gsi), gsi_next (&gsi1))
{
- tree access_fn = NULL;
- tree evolution_part;
tree init_expr;
tree step_expr, off;
tree type;
tree var, ni, ni_name;
gimple_stmt_iterator last_gsi;
+ stmt_vec_info stmt_info;
phi = gsi_stmt (gsi);
phi1 = gsi_stmt (gsi1);
}
/* Skip reduction phis. */
- if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+ stmt_info = vinfo_for_stmt (phi);
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc phi. skip.");
continue;
}
- access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
- gcc_assert (access_fn);
- /* We can end up with an access_fn like
- (short int) {(short unsigned int) i_49, +, 1}_1
- for further analysis we need to strip the outer cast but we
- need to preserve the original type. */
- type = TREE_TYPE (access_fn);
- STRIP_NOPS (access_fn);
- evolution_part =
- unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
- gcc_assert (evolution_part != NULL_TREE);
+ type = TREE_TYPE (gimple_phi_result (phi));
+ step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+ step_expr = unshare_expr (step_expr);
/* FORNOW: We do not support IVs whose evolution function is a polynomial
of degree >= 2 or exponential. */
- gcc_assert (!tree_is_chrec (evolution_part));
+ gcc_assert (!tree_is_chrec (step_expr));
- step_expr = evolution_part;
- init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
- loop->num));
- init_expr = fold_convert (type, init_expr);
+ init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
fold_convert (TREE_TYPE (step_expr), niters),
step_expr);
- if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
+ if (POINTER_TYPE_P (type))
ni = fold_build_pointer_plus (init_expr, off);
else
- ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
- init_expr,
- fold_convert (TREE_TYPE (init_expr), off));
+ ni = fold_build2 (PLUS_EXPR, type,
+ init_expr, fold_convert (type, off));
- var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
+ var = create_tmp_var (type, "tmp");
add_referenced_var (var);
last_gsi = gsi_last_bb (exit_bb);
}
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
- ratio_mult_vf_name, ni_name, false,
+ &ratio_mult_vf_name, ni_name, false,
th, check_profitability,
cond_expr, cond_expr_stmt_list);
gcc_assert (new_loop);
If the misalignment of DR is known at compile time:
addr_mis = int mis = DR_MISALIGNMENT (dr);
Else, compute address misalignment in bytes:
- addr_mis = addr & (vectype_size - 1)
+ addr_mis = addr & (vectype_align - 1)
prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
use TYPE_VECTOR_SUBPARTS. */
static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
- tree *wide_prolog_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
- tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
- tree elem_size_log =
- build_int_cst (type, exact_log2 (vectype_align/nelements));
+ tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
+ HOST_WIDE_INT elem_size =
+ int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+ tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
tree nelements_minus_1 = build_int_cst (type, nelements - 1);
tree nelements_tree = build_int_cst (type, nelements);
tree byte_misalign;
new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
- /* Create: byte_misalign = addr & (vectype_size - 1) */
+ /* Create: byte_misalign = addr & (vectype_align - 1) */
byte_misalign =
fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
- vectype_size_minus_1);
+ vectype_align_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
add_referenced_var (var);
stmts = NULL;
iters_name = force_gimple_operand (iters, &stmts, false, var);
- if (types_compatible_p (sizetype, niters_type))
- *wide_prolog_niters = iters_name;
- else
- {
- gimple_seq seq = NULL;
- tree wide_iters = fold_convert (sizetype, iters);
- var = create_tmp_var (sizetype, "prolog_loop_niters");
- add_referenced_var (var);
- *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
- var);
- if (seq)
- gimple_seq_add_seq (&stmts, seq);
- }
/* Insert stmt on loop preheader edge. */
if (stmts)
initialize_original_copy_tables ();
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
- niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
- &wide_prolog_niters);
-
+ niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
+ ni_name);
/* Get profitability threshold for vectorized loop. */
min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
- niters_of_prolog_loop, ni_name, true,
+ &niters_of_prolog_loop, ni_name, true,
th, true, NULL_TREE, NULL);
gcc_assert (new_loop);
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
+ if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
+ wide_prolog_niters = niters_of_prolog_loop;
+ else
+ {
+ gimple_seq seq = NULL;
+ edge pe = loop_preheader_edge (loop);
+ tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
+ tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
+ add_referenced_var (var);
+ wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
+ var);
+ if (seq)
+ {
+ /* Insert stmt on loop preheader edge. */
+ basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
+ }
+
/* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);