/* Loop Vectorization
- Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
This file is part of GCC.
/*************************************************************************
Simple Loop Peeling Utilities
*************************************************************************/
-static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
- (struct loop *, struct loops *, edge);
static void slpeel_update_phis_for_duplicate_loop
(struct loop *, struct loop *, bool after);
static void slpeel_update_phi_nodes_for_guard1
to mark that it's uninitialized. */
enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
-/* Number of loops, at the beginning of vectorization. */
-unsigned int vect_loops_num;
-
/* Loop location. */
static LOC vect_loop_location;
/* Bitmap of virtual variables to be renamed. */
-bitmap vect_vnames_to_rename;
+bitmap vect_memsyms_to_rename;
\f
/*************************************************************************
Simple Loop Peeling Utilities
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
stmt = bsi_stmt (bsi);
- FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
- (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
rename_use_op (use_p);
}
renaming later. */
name = PHI_RESULT (orig_phi);
if (!is_gimple_reg (SSA_NAME_VAR (name)))
- bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
+ bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
/** 1. Handle new-merge-point phis **/
/** 2. Handle loop-closed-ssa-form phis **/
+ if (!is_gimple_reg (PHI_RESULT (orig_phi)))
+ continue;
+
/* 2.1. Generate new phi node in NEW_EXIT_BB: */
new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
*new_exit_bb);
on E which is either the entry or exit of LOOP. */
static struct loop *
-slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
- edge e)
+slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
{
struct loop *new_loop;
basic_block *new_bbs, *bbs;
}
/* Generate new loop structure. */
- new_loop = duplicate_loop (loops, loop, loop->outer);
+ new_loop = duplicate_loop (loop, loop->outer);
if (!new_loop)
{
free (bbs);
copy_bbs (bbs, loop->num_nodes, new_bbs,
&exit, 1, &new_exit, NULL,
e->src);
- set_single_exit (new_loop, new_exit);
/* Duplicating phi args at exit bbs as coming
also from exit of duplicated loop. */
*/
struct loop*
-slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
+slpeel_tree_peel_loop_to_edge (struct loop *loop,
edge e, tree first_niters,
- tree niters, bool update_first_loop_count)
+ tree niters, bool update_first_loop_count,
+ unsigned int th)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
orig_exit_bb:
*/
- if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
+ if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
{
loop_loc = find_loop_location (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
pre_condition =
fold_build2 (LE_EXPR, boolean_type_node, first_niters,
- build_int_cst (TREE_TYPE (first_niters), 0));
+ build_int_cst (TREE_TYPE (first_niters), th));
+
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_before_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
node = get_loop_exit_condition (loop);
- if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
+ if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)
&& EXPR_FILENAME (node) && EXPR_LINENO (node))
return EXPR_LOC (node);
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
node = bsi_stmt (si);
- if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
+ if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node))
return EXPR_LOC (node);
}
else
STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
+ DR_GROUP_FIRST_DR (res) = NULL_TREE;
+ DR_GROUP_NEXT_DR (res) = NULL_TREE;
+ DR_GROUP_SIZE (res) = 0;
+ DR_GROUP_STORE_COUNT (res) = 0;
+ DR_GROUP_GAP (res) = 0;
+ DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
+ DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
return res;
}
}
/* empty stmt is expected only in case of a function argument.
- (Otherwise - we expect a phi_node or a modify_expr). */
+ (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT). */
if (IS_EMPTY_STMT (*def_stmt))
{
tree arg = TREE_OPERAND (*def_stmt, 0);
|| *dt == vect_invariant_def);
break;
- case MODIFY_EXPR:
- *def = TREE_OPERAND (*def_stmt, 0);
+ case GIMPLE_MODIFY_STMT:
+ *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
break;
return false;
}
- if (*dt == vect_induction_def)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "induction not supported.");
- return false;
- }
-
return true;
}
vector form (i.e., when operating on arguments of type VECTYPE).
The two kinds of widening operations we currently support are
- NOP and WIDEN_MULT. This function checks if these oprations
+ NOP and WIDEN_MULT. This function checks if these operations
are supported by the target platform either directly (via vector
tree-codes), or via target builtins.
enum machine_mode vec_mode;
enum insn_code icode1, icode2;
optab optab1, optab2;
- tree expr = TREE_OPERAND (stmt, 1);
+ tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
tree type = TREE_TYPE (expr);
tree wide_vectype = get_vectype_for_scalar_type (type);
enum tree_code c1, c2;
vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
However, in the special case that the result of the widening operation is
- used in a reduction copmutation only, the order doesn't matter (because
+ used in a reduction computation only, the order doesn't matter (because
when vectorizing a reduction we change the order of the computation).
- Some targets can take advatage of this and generate more efficient code.
+ Some targets can take advantage of this and generate more efficient code.
For example, targets like Altivec, that support widen_mult using a sequence
of {mult_even,mult_odd} generate the following vectors:
vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */
int op_type;
tree operation, op1, op2;
tree type;
+ int nloop_uses;
+ tree name;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
- if (TREE_CODE (loop_arg) != SSA_NAME)
+ name = PHI_RESULT (phi);
+ nloop_uses = 0;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
- if (vect_print_dump_info (REPORT_DETAILS))
+ tree use_stmt = USE_STMT (use_p);
+ if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+ && vinfo_for_stmt (use_stmt)
+ && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+ nloop_uses++;
+ if (nloop_uses > 1)
{
- fprintf (vect_dump, "reduction: not ssa_name: ");
- print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "reduction used in loop.");
+ return NULL_TREE;
}
+ }
+
+ if (TREE_CODE (loop_arg) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "reduction: not ssa_name: ");
+ print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+ }
return NULL_TREE;
}
if (!def_stmt)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "reduction: no def_stmt.");
+ fprintf (vect_dump, "reduction: no def_stmt.");
return NULL_TREE;
}
- if (TREE_CODE (def_stmt) != MODIFY_EXPR)
+ if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
{
if (vect_print_dump_info (REPORT_DETAILS))
- {
- print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
- }
+ print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
return NULL_TREE;
}
- operation = TREE_OPERAND (def_stmt, 1);
+ name = GIMPLE_STMT_OPERAND (def_stmt, 0);
+ nloop_uses = 0;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
+ {
+ tree use_stmt = USE_STMT (use_p);
+ if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+ && vinfo_for_stmt (use_stmt)
+ && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+ nloop_uses++;
+ if (nloop_uses > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "reduction used in loop.");
+ return NULL_TREE;
+ }
+ }
+
+ operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
code = TREE_CODE (operation);
if (!commutative_tree_code (code) || !associative_tree_code (code))
{
return NULL_TREE;
}
- op_type = TREE_CODE_LENGTH (code);
+ op_type = TREE_OPERAND_LENGTH (operation);
if (op_type != binary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
return NULL_TREE;
}
- else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
+ else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
{
/* Changing the order of operations changes the semantics. */
if (vect_print_dump_info (REPORT_DETAILS))
*/
def1 = SSA_NAME_DEF_STMT (op1);
def2 = SSA_NAME_DEF_STMT (op2);
- if (!def1 || !def2)
+ if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
return NULL_TREE;
}
- if (TREE_CODE (def1) == MODIFY_EXPR
+
+ /* Check that one def is the reduction def, defined by PHI,
+ the other def is either defined in the loop by a GIMPLE_MODIFY_STMT,
+ or it's an induction (defined by some phi node). */
+
+ if (def2 == phi
&& flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
- && def2 == phi)
+ && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
}
return def_stmt;
}
- else if (TREE_CODE (def2) == MODIFY_EXPR
- && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
- && def1 == phi)
+ else if (def1 == phi
+ && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
+ && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def))
{
/* Swap operands (just for simplicity - so that the rest of the code
can assume that the reduction variable is always the last (second)
{
tree init_expr;
tree step_expr;
-
tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
/* When there is no evolution in this loop, the evolution function
return false;
step_expr = evolution_part;
- init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
- loop_nb));
+ init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
if (vect_print_dump_info (REPORT_DETAILS))
{
*step = step_expr;
if (TREE_CODE (step_expr) != INTEGER_CST)
- {
+ {
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "step unknown.");
return false;
Entry Point to loop vectorization phase. */
unsigned
-vectorize_loops (struct loops *loops)
+vectorize_loops (void)
{
unsigned int i;
unsigned int num_vectorized_loops = 0;
+ unsigned int vect_loops_num;
+ loop_iterator li;
+ struct loop *loop;
/* Fix the verbosity level if not defined explicitly by the user. */
vect_set_dump_settings ();
/* Allocate the bitmap that records which virtual variables that
need to be renamed. */
- vect_vnames_to_rename = BITMAP_ALLOC (NULL);
+ vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
/* ----------- Analyze loops. ----------- */
/* If some loop was duplicated, it gets bigger number
than all previously defined loops. This fact allows us to run
only over initial loops skipping newly generated ones. */
- vect_loops_num = loops->num;
- for (i = 1; i < vect_loops_num; i++)
+ vect_loops_num = number_of_loops ();
+ FOR_EACH_LOOP (li, loop, 0)
{
loop_vec_info loop_vinfo;
- struct loop *loop = loops->parray[i];
-
- if (!loop)
- continue;
vect_loop_location = find_loop_location (loop);
loop_vinfo = vect_analyze_loop (loop);
if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
continue;
- vect_transform_loop (loop_vinfo, loops);
+ vect_transform_loop (loop_vinfo);
num_vectorized_loops++;
}
vect_loop_location = UNKNOWN_LOC;
/* ----------- Finalize. ----------- */
- BITMAP_FREE (vect_vnames_to_rename);
+ BITMAP_FREE (vect_memsyms_to_rename);
for (i = 1; i < vect_loops_num; i++)
{
- struct loop *loop = loops->parray[i];
loop_vec_info loop_vinfo;
+ loop = get_loop (i);
if (!loop)
continue;
loop_vinfo = loop->aux;
return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
}
+
+/* Increase alignment of global arrays to improve vectorization potential.
+ TODO:
+ - Consider also structs that have an array field.
+ - Use ipa analysis to prune arrays that can't be vectorized?
+ This should involve global alignment analysis and in the future also
+ array padding. */
+
+static unsigned int
+increase_alignment (void)
+{
+ struct varpool_node *vnode;
+
+ /* Increase the alignment of all global arrays for vectorization. */
+ for (vnode = varpool_nodes_queue;
+ vnode;
+ vnode = vnode->next_needed)
+ {
+ tree vectype, decl = vnode->decl;
+ unsigned int alignment;
+
+ if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+ continue;
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
+ if (!vectype)
+ continue;
+ alignment = TYPE_ALIGN (vectype);
+ if (DECL_ALIGN (decl) >= alignment)
+ continue;
+
+ if (vect_can_force_dr_alignment_p (decl, alignment))
+ {
+ DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
+ DECL_USER_ALIGN (decl) = 1;
+ if (dump_file)
+ {
+ fprintf (dump_file, "Increasing alignment of decl: ");
+ print_generic_expr (dump_file, decl, TDF_SLIM);
+ }
+ }
+ }
+ return 0;
+}
+
+static bool
+gate_increase_alignment (void)
+{
+ return flag_section_anchors && flag_tree_vectorize;
+}
+
+struct tree_opt_pass pass_ipa_increase_alignment =
+{
+ "increase_alignment", /* name */
+ gate_increase_alignment, /* gate */
+ increase_alignment, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+ 0 /* letter */
+};