LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
+ LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
LOOP_VINFO_UNALIGNED_DR (res) = NULL;
free (LOOP_VINFO_BBS (loop_vinfo));
free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+ VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+ VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
free (loop_vinfo);
loop->aux = NULL;
free (LOOP_VINFO_BBS (loop_vinfo));
free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+ VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (slp)
+ {
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+ }
for (i = 0; i < nbbs; i++)
{
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
}
+ /* Inner-loop loop-closed exit phi in outer-loop vectorization
+ (i.e., a phi in the tail of the outer-loop). */
if (! is_loop_header_bb_p (bb))
{
- /* inner-loop loop-closed exit phi in outer-loop vectorization
- (i.e. a phi in the tail of the outer-loop).
- FORNOW: we currently don't support the case that these phis
+ /* FORNOW: we currently don't support the case that these phis
are not used in the outerloop (unless it is double reduction,
i.e., this phi is vect_reduction_def), cause this case
requires to actually do something here. */
"Unsupported loop-closed phi in outer-loop.");
return false;
}
+
+ /* If PHI is used in the outer loop, we check that its operand
+ is defined in the inner loop. */
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ tree phi_op;
+ gimple op_def_stmt;
+
+ if (gimple_phi_num_args (phi) != 1)
+ return false;
+
+ phi_op = PHI_ARG_DEF (phi, 0);
+ if (TREE_CODE (phi_op) != SSA_NAME)
+ return false;
+
+ op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
+ if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
+ return false;
+
+ if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+ != vect_used_in_outer
+ && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+ != vect_used_in_outer_by_reduction)
+ return false;
+ }
+
continue;
}
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by the
- SLP instances. If that unrolling factor is 1, we say, that we perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, dummy;
+ bool ok, dummy, slp = false;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
if (ok)
{
/* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
+ slp = vect_make_slp_decision (loop_vinfo);
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
/* Scan all the operations in the loop and make sure they are
vectorizable. */
- ok = vect_analyze_loop_operations (loop_vinfo);
+ ok = vect_analyze_loop_operations (loop_vinfo, slp);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
1. operation is commutative and associative and it is safe to
change the order of the computation (if CHECK_REDUCTION is true)
2. no uses for a2 in the loop (a2 is used out of the loop)
- 3. no uses of a1 in the loop besides the reduction operation.
+ 3. no uses of a1 in the loop besides the reduction operation
+ 4. no uses of a1 outside the loop.
- Condition 1 is tested here.
+ Conditions 1,4 are tested here.
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
(2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
gimple use_stmt = USE_STMT (use_p);
if (is_gimple_debug (use_stmt))
continue;
- if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
- && vinfo_for_stmt (use_stmt)
+
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "intermediate value used outside loop.");
+
+ return NULL;
+ }
+
+ if (vinfo_for_stmt (use_stmt)
&& !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
nloop_uses++;
if (nloop_uses > 1)
&& (code == COND_EXPR
|| (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
&& (is_gimple_assign (def1)
+ || is_gimple_call (def1)
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
== vect_induction_def
|| (gimple_code (def1) == GIMPLE_PHI
&& (code == COND_EXPR
|| (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
&& (is_gimple_assign (def2)
+ || is_gimple_call (def2)
|| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
== vect_induction_def
|| (gimple_code (def2) == GIMPLE_PHI
case GIMPLE_BINARY_RHS:
reduction_op = gimple_assign_rhs2 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ reduction_op = gimple_assign_rhs3 (stmt);
+ break;
default:
gcc_unreachable ();
}
gimple_assign_set_lhs (new_stmt, induc_def);
si = gsi_start_bb (bb);
gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
+ set_vinfo_for_stmt (new_stmt,
+ new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
}
return induc_def;
{
case GIMPLE_SINGLE_RHS:
gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
- == ternary_op);
+ == ternary_op);
reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
break;
case GIMPLE_UNARY_RHS:
reduction_op = reduc_index ?
gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ reduction_op = gimple_op (stmt, reduc_index + 1);
+ break;
default:
gcc_unreachable ();
}
ops[1] = gimple_assign_rhs2 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ code = gimple_assign_rhs_code (stmt);
+ op_type = TREE_CODE_LENGTH (code);
+ gcc_assert (op_type == ternary_op);
+ ops[0] = gimple_assign_rhs1 (stmt);
+ ops[1] = gimple_assign_rhs2 (stmt);
+ ops[2] = gimple_assign_rhs3 (stmt);
+ break;
+
case GIMPLE_UNARY_RHS:
return false;
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)