/* Loop Vectorization
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
- Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com> and
Ira Rosen <irar@il.ibm.com>
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
+ tree vf_vectype;
gimple stmt = gsi_stmt (si);
stmt_info = vinfo_for_stmt (stmt);
gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
&& !is_pattern_stmt_p (stmt_info));
- scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
- &dummy);
+ scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "get vectype for scalar type: ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
-
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
}
return false;
}
+
STMT_VINFO_VECTYPE (stmt_info) = vectype;
}
+ /* The vectorization factor is according to the smallest
+ scalar type (or the largest vector size, but we only
+ support one vector size per loop). */
+ scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
+ &dummy);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "get vectype for scalar type: ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+ vf_vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vf_vectype)
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ {
+ fprintf (vect_dump,
+ "not vectorized: unsupported data-type ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+ return false;
+ }
+
+ if ((GET_MODE_SIZE (TYPE_MODE (vectype))
+ != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ {
+ fprintf (vect_dump,
+ "not vectorized: different sized vector "
+ "types in statement, ");
+ print_generic_expr (vect_dump, vectype, TDF_SLIM);
+ fprintf (vect_dump, " and ");
+ print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
+ }
+ return false;
+ }
+
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "vectype: ");
- print_generic_expr (vect_dump, vectype, TDF_SLIM);
+ print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
}
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "nunits = %d", nunits);
if (!vectorization_factor
|| (nunits > vectorization_factor))
vectorization_factor = nunits;
-
}
}
before the loop if needed), where the loop header contains all the
executable statements, and the latch is empty. */
if (!empty_block_p (loop->latch)
- || phi_nodes (loop->latch))
+ || !gimple_seq_empty_p (phi_nodes (loop->latch)))
{
if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
fprintf (vect_dump, "not vectorized: unexpected loop form.");
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
- if (STMT_VINFO_RELEVANT_P (stmt_info) && !PURE_SLP_STMT (stmt_info))
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+
/* STMT needs both SLP and loop-based vectorization. */
only_slp_in_loop = false;
}
{
bool ok;
loop_vec_info loop_vinfo;
+ int max_vf = MAX_VECTORIZATION_FACTOR;
+ int min_vf = 2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "===== analyze_loop_nest =====");
}
/* Find all data references in the loop (which correspond to vdefs/vuses)
- and analyze their evolution in the loop.
+ and analyze their evolution in the loop. Also adjust the minimal
+ vectorization factor according to the loads and stores.
FORNOW: Handle only simple, array references, which
alignment can be forced, and aligned pointer-references. */
- ok = vect_analyze_data_refs (loop_vinfo, NULL);
+ ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
return NULL;
}
- /* Analyze the alignment of the data-refs in the loop.
- Fail if a data reference is found that cannot be vectorized. */
+ /* Analyze data dependences between the data-refs in the loop
+ and adjust the maximum vectorization factor according to
+ the dependences.
+ FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
- if (!ok)
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
+ if (!ok
+ || max_vf < min_vf)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data alignment.");
+ fprintf (vect_dump, "bad data dependence.");
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
+ if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "bad data dependence.");
+ destroy_loop_vec_info (loop_vinfo, true);
+ return NULL;
+ }
- /* Analyze data dependences between the data-refs in the loop.
- FORNOW: fail at the first data dependence that we encounter. */
+ /* Analyze the alignment of the data-refs in the loop.
+ Fail if a data reference is found that cannot be vectorized. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL);
+ ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data dependence.");
+ fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
else
{
if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "cost model: vector iteration cost = %d "
- "is divisible by scalar iteration cost = %d by a factor "
- "greater than or equal to the vectorization factor = %d .",
+ fprintf (vect_dump, "cost model: the vector iteration cost = %d "
+ "divided by the scalar iteration cost = %d "
+ "is greater or equal to the vectorization factor = %d.",
vec_inside_cost, scalar_single_iter_cost, vf);
return -1;
}
tree scalar_dest;
tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_in = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, orig_code, epilog_reduc_code;
stmt_vec_info orig_stmt_info;
tree expr = NULL_TREE;
int i;
- int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int epilog_copies;
stmt_vec_info prev_stmt_info, prev_phi_info;
gimple first_phi = NULL;
nested_cycle = true;
}
- gcc_assert (ncopies >= 1);
-
/* FORNOW: SLP not supported. */
if (STMT_SLP_TYPE (stmt_info))
return false;
reduction variable. */
for (i = 0; i < op_type-1; i++)
{
+ tree tem;
+
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
- is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
- &def, &dt);
+ is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
+ &def_stmt, &def, &dt, &tem);
+ if (!vectype_in)
+ vectype_in = tem;
gcc_assert (is_simple_use);
if (dt != vect_internal_def
&& dt != vect_external_def
}
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
- &def, &dt);
+ &def, &dt);
gcc_assert (is_simple_use);
gcc_assert (dt == vect_reduction_def
|| dt == vect_nested_cycle
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- vec_mode = TYPE_MODE (vectype);
+
+ ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ / TYPE_VECTOR_SUBPARTS (vectype_in));
+ gcc_assert (ncopies >= 1);
+
+ vec_mode = TYPE_MODE (vectype_in);
if (code == COND_EXPR)
{
/* 4. Supportable by target? */
/* 4.1. check support for the operation in the loop */
- optab = optab_for_tree_code (code, vectype, optab_default);
+ optab = optab_for_tree_code (code, vectype_in, optab_default);
if (!optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
/* Worthwhile without SIMD support? */
- if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+ if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
&& LOOP_VINFO_VECT_FACTOR (loop_vinfo)
< vect_min_worthwhile_factor (code))
{
/* This is a reduction pattern: get the vectype from the type of the
reduction variable, and get the tree-code from orig_stmt. */
orig_code = gimple_assign_rhs_code (orig_stmt);
- vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
- if (!vectype)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "unsupported data-type ");
- print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
- }
- return false;
- }
-
- vec_mode = TYPE_MODE (vectype);
+ gcc_assert (vectype_out);
+ vec_mode = TYPE_MODE (vectype_out);
}
else
{
epilog_reduc_code = ERROR_MARK;
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
{
- reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
+ reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
optab_default);
if (!reduc_optab)
{
gcc_assert (ncopies == 1);
/* Create the destination vector */
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
if (op_type == binary_op)
{
if (reduc_index == 0)
- expr = build2 (code, vectype, reduc_def, loop_vec_def0);
+ expr = build2 (code, vectype_out, reduc_def, loop_vec_def0);
else
- expr = build2 (code, vectype, loop_vec_def0, reduc_def);
+ expr = build2 (code, vectype_out, loop_vec_def0, reduc_def);
}
else
{
if (reduc_index == 0)
- expr = build3 (code, vectype, reduc_def, loop_vec_def0,
+ expr = build3 (code, vectype_out, reduc_def, loop_vec_def0,
loop_vec_def1);
else
{
if (reduc_index == 1)
- expr = build3 (code, vectype, loop_vec_def0, reduc_def,
+ expr = build3 (code, vectype_out, loop_vec_def0, reduc_def,
loop_vec_def1);
else
- expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
+ expr = build3 (code, vectype_out, loop_vec_def0, loop_vec_def1,
reduc_def);
}
}
if (!stmt_info)
continue;
+ if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
+ vect_loop_kill_debug_uses (loop, phi);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
- {
- if (MAY_HAVE_DEBUG_STMTS)
- vect_loop_kill_debug_uses (loop, phi);
- continue;
- }
+ continue;
if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
!= (unsigned HOST_WIDE_INT) vectorization_factor)
continue;
}
+ if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
+ vect_loop_kill_debug_uses (loop, stmt);
+
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
{
- if (MAY_HAVE_DEBUG_STMTS)
- vect_loop_kill_debug_uses (loop, stmt);
gsi_next (&si);
continue;
}