/* Loop Vectorization
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com> and
Ira Rosen <irar@il.ibm.com>
stmt_vec_info stmt_info;
int i;
HOST_WIDE_INT dummy;
+ gimple stmt, pattern_stmt = NULL;
+ gimple_seq pattern_def_seq = NULL;
+ gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+ bool analyze_pattern_stmt = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
}
}
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
{
- tree vf_vectype;
- gimple stmt = gsi_stmt (si);
- stmt_info = vinfo_for_stmt (stmt);
+ tree vf_vectype;
+
+ if (analyze_pattern_stmt)
+ stmt = pattern_stmt;
+ else
+ stmt = gsi_stmt (si);
+
+ stmt_info = vinfo_for_stmt (stmt);
if (vect_print_dump_info (REPORT_DETAILS))
{
gcc_assert (stmt_info);
- /* skip stmts which do not need to be vectorized. */
+ /* Skip stmts which do not need to be vectorized. */
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
+ {
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+ {
+ stmt = pattern_stmt;
+ stmt_info = vinfo_for_stmt (pattern_stmt);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> examining pattern statement: ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "skip.");
+ gsi_next (&si);
+ continue;
+ }
+ }
+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+ analyze_pattern_stmt = true;
+
+ /* If a pattern statement has def stmts, analyze them too. */
+ if (is_pattern_stmt_p (stmt_info))
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "skip.");
- continue;
+ if (pattern_def_seq == NULL)
+ {
+ pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+ pattern_def_si = gsi_start (pattern_def_seq);
+ }
+ else if (!gsi_end_p (pattern_def_si))
+ gsi_next (&pattern_def_si);
+ if (pattern_def_seq != NULL)
+ {
+ gimple pattern_def_stmt = NULL;
+ stmt_vec_info pattern_def_stmt_info = NULL;
+
+ while (!gsi_end_p (pattern_def_si))
+ {
+ pattern_def_stmt = gsi_stmt (pattern_def_si);
+ pattern_def_stmt_info
+ = vinfo_for_stmt (pattern_def_stmt);
+ if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+ || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+ break;
+ gsi_next (&pattern_def_si);
+ }
+
+ if (!gsi_end_p (pattern_def_si))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump,
+ "==> examining pattern def stmt: ");
+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+ TDF_SLIM);
+ }
+
+ stmt = pattern_def_stmt;
+ stmt_info = pattern_def_stmt_info;
+ }
+ else
+ {
+ pattern_def_si = gsi_start (NULL);
+ analyze_pattern_stmt = false;
+ }
+ }
+ else
+ analyze_pattern_stmt = false;
}
if (gimple_get_lhs (stmt) == NULL_TREE)
if (STMT_VINFO_VECTYPE (stmt_info))
{
/* The only case when a vectype had been already set is for stmts
- that contain a dataref, or for "pattern-stmts" (stmts generated
- by the vectorizer to represent/replace a certain idiom). */
+ that contain a dataref, or for "pattern-stmts" (stmts
+ generated by the vectorizer to represent/replace a certain
+ idiom). */
gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
- || is_pattern_stmt_p (stmt_info));
+ || is_pattern_stmt_p (stmt_info)
+ || !gsi_end_p (pattern_def_si));
vectype = STMT_VINFO_VECTYPE (stmt_info);
}
else
{
- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
- && !is_pattern_stmt_p (stmt_info));
-
+ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
if (vect_print_dump_info (REPORT_DETAILS))
{
if (!vectorization_factor
|| (nunits > vectorization_factor))
vectorization_factor = nunits;
+
+ if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+ {
+ pattern_def_seq = NULL;
+ gsi_next (&si);
+ }
}
}
/* Analyze the evolution function. */
access_fn = analyze_scalar_evolution (loop, def);
if (access_fn)
- STRIP_NOPS (access_fn);
- if (access_fn && vect_print_dump_info (REPORT_DETAILS))
{
- fprintf (vect_dump, "Access function of PHI: ");
- print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+ STRIP_NOPS (access_fn);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "Access function of PHI: ");
+ print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+ }
+ STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
+ = evolution_part_in_loop_num (access_fn, loop->num);
}
if (!access_fn
continue;
}
+ gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
+
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Detected induction.");
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
+ LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
LOOP_VINFO_UNALIGNED_DR (res) = NULL;
PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
+ LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_PEELING_HTAB (res) = NULL;
+ LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
return res;
}
free (LOOP_VINFO_BBS (loop_vinfo));
free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+ VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+ VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
free (loop_vinfo);
loop->aux = NULL;
for (si = gsi_start_bb (bb); !gsi_end_p (si); )
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- if (stmt_info)
- {
- /* Check if this is a "pattern stmt" (introduced by the
- vectorizer during the pattern recognition pass). */
- bool remove_stmt_p = false;
- gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
- if (orig_stmt)
- {
- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
- if (orig_stmt_info
- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
- remove_stmt_p = true;
- }
-
- /* Free stmt_vec_info. */
- free_stmt_vec_info (stmt);
-
- /* Remove dead "pattern stmts". */
- if (remove_stmt_p)
- gsi_remove (&si, true);
- }
+ /* Free stmt_vec_info. */
+ free_stmt_vec_info (stmt);
gsi_next (&si);
}
}
free (LOOP_VINFO_BBS (loop_vinfo));
free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+ VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+ VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (slp)
+ {
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+ }
for (i = 0; i < nbbs; i++)
{
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
}
+ /* Inner-loop loop-closed exit phi in outer-loop vectorization
+ (i.e., a phi in the tail of the outer-loop). */
if (! is_loop_header_bb_p (bb))
{
- /* inner-loop loop-closed exit phi in outer-loop vectorization
- (i.e. a phi in the tail of the outer-loop).
- FORNOW: we currently don't support the case that these phis
+ /* FORNOW: we currently don't support the case that these phis
are not used in the outerloop (unless it is double reduction,
i.e., this phi is vect_reduction_def), cause this case
requires to actually do something here. */
"Unsupported loop-closed phi in outer-loop.");
return false;
}
+
+ /* If PHI is used in the outer loop, we check that its operand
+ is defined in the inner loop. */
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ tree phi_op;
+ gimple op_def_stmt;
+
+ if (gimple_phi_num_args (phi) != 1)
+ return false;
+
+ phi_op = PHI_ARG_DEF (phi, 0);
+ if (TREE_CODE (phi_op) != SSA_NAME)
+ return false;
+
+ op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
+ if (!op_def_stmt
+ || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
+ || !vinfo_for_stmt (op_def_stmt))
+ return false;
+
+ if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+ != vect_used_in_outer
+ && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+ != vect_used_in_outer_by_reduction)
+ return false;
+ }
+
continue;
}
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by the
- SLP instances. If that unrolling factor is 1, we say, that we perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, dummy;
+ bool ok, slp = false;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
the dependences.
FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
if (!ok
|| max_vf < min_vf)
{
if (ok)
{
/* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
+ slp = vect_make_slp_decision (loop_vinfo);
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
}
+ else
+ return false;
/* Scan all the operations in the loop and make sure they are
vectorizable. */
- ok = vect_analyze_loop_operations (loop_vinfo);
+ ok = vect_analyze_loop_operations (loop_vinfo, slp);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
}
+/* Detect SLP reduction of the form:
+
+ #a1 = phi <a5, a0>
+ a2 = operation (a1)
+ a3 = operation (a2)
+ a4 = operation (a3)
+ a5 = operation (a4)
+
+ #a = phi <a5>
+
+ PHI is the reduction phi node (#a1 = phi <a5, a0> above)
+ FIRST_STMT is the first reduction stmt in the chain
+ (a2 = operation (a1)).
+
+ Return TRUE if a reduction chain was detected. */
+
+static bool
+vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
+{
+ struct loop *loop = (gimple_bb (phi))->loop_father;
+ struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ enum tree_code code;
+ gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
+ stmt_vec_info use_stmt_info, current_stmt_info;
+ tree lhs;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ int nloop_uses, size = 0, n_out_of_loop_uses;
+ bool found = false;
+
+ if (loop != vect_loop)
+ return false;
+
+ lhs = PHI_RESULT (phi);
+ code = gimple_assign_rhs_code (first_stmt);
+ while (1)
+ {
+ nloop_uses = 0;
+ n_out_of_loop_uses = 0;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ use_stmt = USE_STMT (use_p);
+
+ /* Check if we got back to the reduction phi. */
+ if (use_stmt == phi)
+ {
+ loop_use_stmt = use_stmt;
+ found = true;
+ break;
+ }
+
+ if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ {
+ if (vinfo_for_stmt (use_stmt)
+ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+ {
+ loop_use_stmt = use_stmt;
+ nloop_uses++;
+ }
+ }
+ else
+ n_out_of_loop_uses++;
+
+ /* There are can be either a single use in the loop or two uses in
+ phi nodes. */
+ if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
+ return false;
+ }
+
+ if (found)
+ break;
+
+ /* We reached a statement with no loop uses. */
+ if (nloop_uses == 0)
+ return false;
+
+ /* This is a loop exit phi, and we haven't reached the reduction phi. */
+ if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
+ return false;
+
+ if (!is_gimple_assign (loop_use_stmt)
+ || code != gimple_assign_rhs_code (loop_use_stmt)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
+ return false;
+
+ /* Insert USE_STMT into reduction chain. */
+ use_stmt_info = vinfo_for_stmt (loop_use_stmt);
+ if (current_stmt)
+ {
+ current_stmt_info = vinfo_for_stmt (current_stmt);
+ GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
+ GROUP_FIRST_ELEMENT (use_stmt_info)
+ = GROUP_FIRST_ELEMENT (current_stmt_info);
+ }
+ else
+ GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
+
+ lhs = gimple_assign_lhs (loop_use_stmt);
+ current_stmt = loop_use_stmt;
+ size++;
+ }
+
+ if (!found || loop_use_stmt != phi || size < 2)
+ return false;
+
+ /* Swap the operands, if needed, to make the reduction operand be the second
+ operand. */
+ lhs = PHI_RESULT (phi);
+ next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
+ while (next_stmt)
+ {
+ if (gimple_assign_rhs2 (next_stmt) == lhs)
+ {
+ tree op = gimple_assign_rhs1 (next_stmt);
+ gimple def_stmt = NULL;
+
+ if (TREE_CODE (op) == SSA_NAME)
+ def_stmt = SSA_NAME_DEF_STMT (op);
+
+ /* Check that the other def is either defined in the loop
+ ("vect_internal_def"), or it's an induction (defined by a
+ loop-header phi-node). */
+ if (def_stmt
+ && gimple_bb (def_stmt)
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+ && (is_gimple_assign (def_stmt)
+ || is_gimple_call (def_stmt)
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+ == vect_induction_def
+ || (gimple_code (def_stmt) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+ == vect_internal_def
+ && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
+ {
+ lhs = gimple_assign_lhs (next_stmt);
+ next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+ continue;
+ }
+
+ return false;
+ }
+ else
+ {
+ tree op = gimple_assign_rhs2 (next_stmt);
+ gimple def_stmt = NULL;
+
+ if (TREE_CODE (op) == SSA_NAME)
+ def_stmt = SSA_NAME_DEF_STMT (op);
+
+ /* Check that the other def is either defined in the loop
+ ("vect_internal_def"), or it's an induction (defined by a
+ loop-header phi-node). */
+ if (def_stmt
+ && gimple_bb (def_stmt)
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+ && (is_gimple_assign (def_stmt)
+ || is_gimple_call (def_stmt)
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+ == vect_induction_def
+ || (gimple_code (def_stmt) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+ == vect_internal_def
+ && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "swapping oprnds: ");
+ print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
+ }
+
+ swap_tree_operands (next_stmt,
+ gimple_assign_rhs1_ptr (next_stmt),
+ gimple_assign_rhs2_ptr (next_stmt));
+ mark_symbols_for_renaming (next_stmt);
+ }
+ else
+ return false;
+ }
+
+ lhs = gimple_assign_lhs (next_stmt);
+ next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+ }
+
+ /* Save the chain for further analysis in SLP detection. */
+ first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
+ VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
+ GROUP_SIZE (vinfo_for_stmt (first)) = size;
+
+ return true;
+}
+
+
/* Function vect_is_simple_reduction_1
(1) Detect a cross-iteration def-use cycle that represents a simple
1. operation is commutative and associative and it is safe to
change the order of the computation (if CHECK_REDUCTION is true)
2. no uses for a2 in the loop (a2 is used out of the loop)
- 3. no uses of a1 in the loop besides the reduction operation.
+ 3. no uses of a1 in the loop besides the reduction operation
+ 4. no uses of a1 outside the loop.
- Condition 1 is tested here.
+ Conditions 1,4 are tested here.
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
(2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
gimple use_stmt = USE_STMT (use_p);
if (is_gimple_debug (use_stmt))
continue;
- if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
- && vinfo_for_stmt (use_stmt)
+
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "intermediate value used outside loop.");
+
+ return NULL;
+ }
+
+ if (vinfo_for_stmt (use_stmt)
&& !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
nloop_uses++;
if (nloop_uses > 1)
return NULL;
}
- op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+ op3 = gimple_assign_rhs1 (def_stmt);
if (COMPARISON_CLASS_P (op3))
{
op4 = TREE_OPERAND (op3, 1);
op3 = TREE_OPERAND (op3, 0);
}
- op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
- op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
+ op1 = gimple_assign_rhs2 (def_stmt);
+ op2 = gimple_assign_rhs3 (def_stmt);
if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
{
op1 = gimple_assign_rhs1 (def_stmt);
op2 = gimple_assign_rhs2 (def_stmt);
- if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
{
if (vect_print_dump_info (REPORT_DETAILS))
report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
if (orig_code == MINUS_EXPR)
{
tree rhs = gimple_assign_rhs2 (def_stmt);
- tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+ tree var = TREE_CODE (rhs) == SSA_NAME
+ ? SSA_NAME_VAR (rhs)
+ : create_tmp_reg (TREE_TYPE (rhs), NULL);
+ tree negrhs = make_ssa_name (var, NULL);
gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
rhs, NULL);
gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
def2 = SSA_NAME_DEF_STMT (op2);
if (code != COND_EXPR
- && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
+ && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
{
if (vect_print_dump_info (REPORT_DETAILS))
report_vect_op (def_stmt, "reduction: no defs for operands: ");
if (def2 && def2 == phi
&& (code == COND_EXPR
+ || !def1 || gimple_nop_p (def1)
|| (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
&& (is_gimple_assign (def1)
|| is_gimple_call (def1)
report_vect_op (def_stmt, "detected reduction: ");
return def_stmt;
}
- else if (def1 && def1 == phi
- && (code == COND_EXPR
- || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
- && (is_gimple_assign (def2)
- || is_gimple_call (def2)
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
- == vect_induction_def
- || (gimple_code (def2) == GIMPLE_PHI
- && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
- == vect_internal_def
- && !is_loop_header_bb_p (gimple_bb (def2)))))))
+
+ if (def1 && def1 == phi
+ && (code == COND_EXPR
+ || !def2 || gimple_nop_p (def2)
+ || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
+ && (is_gimple_assign (def2)
+ || is_gimple_call (def2)
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+ == vect_induction_def
+ || (gimple_code (def2) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+ == vect_internal_def
+ && !is_loop_header_bb_p (gimple_bb (def2)))))))
{
if (check_reduction)
{
return def_stmt;
}
- else
+
+ /* Try to find SLP reduction chain. */
+ if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
{
if (vect_print_dump_info (REPORT_DETAILS))
- report_vect_op (def_stmt, "reduction: unknown pattern: ");
+ report_vect_op (def_stmt, "reduction: detected reduction chain: ");
- return NULL;
+ return def_stmt;
}
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ report_vect_op (def_stmt, "reduction: unknown pattern: ");
+
+ return NULL;
}
/* Wrapper around vect_is_simple_reduction_1, that won't modify code
/* Calculate the cost of one scalar iteration of the loop. */
int
-vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
if (stmt_info
&& !STMT_VINFO_RELEVANT_P (stmt_info)
&& (!STMT_VINFO_LIVE_P (stmt_info)
- || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+ || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !STMT_VINFO_IN_PATTERN_P (stmt_info))
continue;
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
peel_iters_prologue = niters < peel_iters_prologue ?
niters : peel_iters_prologue;
*peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ /* If we need to peel for gaps, but no peeling is required, we have to
+ peel VF iterations. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
+ *peel_iters_epilogue = vf;
}
return (peel_iters_prologue * scalar_single_iter_cost)
{
gimple stmt = gsi_stmt (si);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+ {
+ stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+
/* Skip stmts that are not vectorized inside the loop. */
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& (!STMT_VINFO_LIVE_P (stmt_info)
- || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+ || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
continue;
+
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
some of the "outside" costs are generated inside the outer-loop. */
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+ if (is_pattern_stmt_p (stmt_info)
+ && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
+ {
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple pattern_def_stmt = gsi_stmt (gsi);
+ stmt_vec_info pattern_def_stmt_info
+ = vinfo_for_stmt (pattern_def_stmt);
+ if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+ || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+ {
+ vec_inside_cost
+ += STMT_VINFO_INSIDE_OF_LOOP_COST
+ (pattern_def_stmt_info) * factor;
+ vec_outside_cost
+ += STMT_VINFO_OUTSIDE_OF_LOOP_COST
+ (pattern_def_stmt_info);
+ }
+ }
+ }
}
}
- scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+ scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
/* Add additional cost for the peeled instructions in prologue and epilogue
loop.
case GIMPLE_BINARY_RHS:
reduction_op = gimple_assign_rhs2 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ reduction_op = gimple_assign_rhs3 (stmt);
+ break;
default:
gcc_unreachable ();
}
vec_def, vec_step);
vec_def = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, vec_def);
-
+
gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
if (!useless_type_conversion_p (resvectype, vectype))
{
gimple_assign_set_lhs (new_stmt, induc_def);
si = gsi_start_bb (bb);
gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
+ set_vinfo_for_stmt (new_stmt,
+ new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
}
return induc_def;
gimple use_stmt, orig_stmt, reduction_phi = NULL;
bool nested_in_vect_loop = false;
VEC (gimple, heap) *new_phis = NULL;
+ VEC (gimple, heap) *inner_phis = NULL;
enum vect_def_type dt = vect_unknown_def_type;
int j, i;
VEC (tree, heap) *scalar_results = NULL;
unsigned int group_size = 1, k, ratio;
VEC (tree, heap) *vec_initial_defs = NULL;
VEC (gimple, heap) *phis;
+ bool slp_reduc = false;
+ tree new_phi_result;
+ gimple inner_phi = NULL;
if (slp_node)
group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
{
case GIMPLE_SINGLE_RHS:
gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
- == ternary_op);
+ == ternary_op);
reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
break;
case GIMPLE_UNARY_RHS:
reduction_op = reduc_index ?
gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ reduction_op = gimple_op (stmt, reduc_index + 1);
+ break;
default:
gcc_unreachable ();
}
/* Get the loop-entry arguments. */
if (slp_node)
- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
- NULL, reduc_index);
+ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
+ NULL, slp_node, reduc_index);
else
{
vec_initial_defs = VEC_alloc (tree, heap, 1);
}
/* The epilogue is created for the outer-loop, i.e., for the loop being
- vectorized. */
+ vectorized. Create exit phis for the outer loop. */
if (double_reduc)
{
loop = outer_loop;
exit_bb = single_exit (loop)->dest;
+ inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
+ FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
+ {
+ gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+ exit_bb);
+ SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+ PHI_RESULT (phi));
+ set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+ loop_vinfo, NULL));
+ VEC_quick_push (gimple, inner_phis, phi);
+ VEC_replace (gimple, new_phis, i, outer_phi);
+ prev_phi_info = vinfo_for_stmt (outer_phi);
+ while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
+ {
+ phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+ outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+ exit_bb);
+ SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+ PHI_RESULT (phi));
+ set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+ loop_vinfo, NULL));
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
+ prev_phi_info = vinfo_for_stmt (outer_phi);
+ }
+ }
}
exit_gsi = gsi_after_labels (exit_bb);
if (nested_in_vect_loop && !double_reduc)
goto vect_finalize_reduction;
+ /* SLP reduction without reduction chain, e.g.,
+ # a1 = phi <a2, a0>
+ # b1 = phi <b2, b0>
+ a2 = operation (a1)
+ b2 = operation (b1) */
+ slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+
+ /* In case of reduction chain, e.g.,
+ # a1 = phi <a3, a0>
+ a2 = operation (a1)
+ a3 = operation (a2),
+
+ we may end up with more than one vector result. Here we reduce them to
+ one vector. */
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+ {
+ tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
+ tree tmp;
+ gimple new_vec_stmt = NULL;
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ for (k = 1; k < VEC_length (gimple, new_phis); k++)
+ {
+ gimple next_phi = VEC_index (gimple, new_phis, k);
+ tree second_vect = PHI_RESULT (next_phi);
+
+ tmp = build2 (code, vectype, first_vect, second_vect);
+ new_vec_stmt = gimple_build_assign (vec_dest, tmp);
+ first_vect = make_ssa_name (vec_dest, new_vec_stmt);
+ gimple_assign_set_lhs (new_vec_stmt, first_vect);
+ gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
+ }
+
+ new_phi_result = first_vect;
+ if (new_vec_stmt)
+ {
+ VEC_truncate (gimple, new_phis, 0);
+ VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
+ }
+ }
+ else
+ new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
+
/* 2.3 Create the reduction code, using one of the three schemes described
above. In SLP we simply need to extract all the elements from the
vector (without reducing them), so we use scalar shifts. */
- if (reduc_code != ERROR_MARK && !slp_node)
+ if (reduc_code != ERROR_MARK && !slp_reduc)
{
tree tmp;
fprintf (vect_dump, "Reduce using direct vector reduction.");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- new_phi = VEC_index (gimple, new_phis, 0);
- tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
+ tmp = build1 (reduc_code, vectype, new_phi_result);
epilog_stmt = gimple_build_assign (vec_dest, tmp);
new_temp = make_ssa_name (vec_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
have_whole_vector_shift = false;
}
- if (have_whole_vector_shift && !slp_node)
+ if (have_whole_vector_shift && !slp_reduc)
{
/*** Case 2: Create:
for (offset = VS/2; offset >= element_size; offset/=2)
fprintf (vect_dump, "Reduce using vector shifts");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- new_phi = VEC_index (gimple, new_phis, 0);
- new_temp = PHI_RESULT (new_phi);
+ new_temp = new_phi_result;
for (bit_offset = vec_size_in_bits/2;
bit_offset >= element_bitsize;
bit_offset /= 2)
vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
{
- vec_temp = PHI_RESULT (new_phi);
+ if (gimple_code (new_phi) == GIMPLE_PHI)
+ vec_temp = PHI_RESULT (new_phi);
+ else
+ vec_temp = gimple_assign_lhs (new_phi);
rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitsize_zero_node);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
/* In SLP we don't need to apply reduction operation, so we just
collect s' values in SCALAR_RESULTS. */
- if (slp_node)
+ if (slp_reduc)
VEC_safe_push (tree, heap, scalar_results, new_temp);
for (bit_offset = element_bitsize;
gimple_assign_set_lhs (epilog_stmt, new_name);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- if (slp_node)
+ if (slp_reduc)
{
/* In SLP we don't need to apply reduction operation, so
we just collect s' values in SCALAR_RESULTS. */
unrolling. If the size of SCALAR_RESULTS is greater than
GROUP_SIZE, we reduce them combining elements modulo
GROUP_SIZE. */
- if (slp_node)
+ if (slp_reduc)
{
tree res, first_res, new_res;
gimple new_stmt;
if (adjustment_def)
{
- gcc_assert (!slp_node);
+ gcc_assert (!slp_reduc);
if (nested_in_vect_loop)
{
new_phi = VEC_index (gimple, new_phis, 0);
use <s_out4>
use <s_out4> */
+
+ /* In SLP reduction chain we reduce vector results into one vector if
+ necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
+ the last stmt in the reduction chain, since we are looking for the loop
+ exit phi node. */
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+ {
+ scalar_dest = gimple_assign_lhs (VEC_index (gimple,
+ SLP_TREE_SCALAR_STMTS (slp_node),
+ group_size - 1));
+ group_size = 1;
+ }
+
/* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
case that GROUP_SIZE is greater than vectorization factor). Therefore, we
need to match SCALAR_RESULTS with corresponding statements. The first
{
epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
+ if (double_reduc)
+ inner_phi = VEC_index (gimple, inner_phis, k / ratio);
}
- if (slp_node)
+ if (slp_reduc)
{
gimple current_stmt = VEC_index (gimple,
SLP_TREE_SCALAR_STMTS (slp_node), k);
vs1 was created previously in this function by a call to
vect_get_vec_def_for_operand and is stored in
vec_initial_def;
- vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+ vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
vs0 is created here. */
/* Create vector phi node. */
add_phi_arg (vect_phi, vect_phi_init,
loop_preheader_edge (outer_loop),
UNKNOWN_LOCATION);
- add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+ add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
if (vect_print_dump_info (REPORT_DETAILS))
{
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
VEC (gimple, heap) *phis = NULL;
int vec_num;
- tree def0, def1, tem;
+ tree def0, def1, tem, op0, op1 = NULL_TREE;
+
+ /* In case of reduction chain we switch to the first stmt in the chain, but
+ we don't update STMT_INFO, since only the last stmt is marked as reduction
+ and has reduction properties. */
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+ stmt = GROUP_FIRST_ELEMENT (stmt_info);
if (nested_in_vect_loop_p (loop, stmt))
{
}
/* 1. Is vectorizable reduction? */
- /* Not supportable if the reduction variable is used in the loop. */
- if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
+ /* Not supportable if the reduction variable is used in the loop, unless
+ it's a reduction chain. */
+ if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
+ && !GROUP_FIRST_ELEMENT (stmt_info))
return false;
/* Reductions that are not used even in an enclosing outer-loop,
if (orig_stmt)
{
orig_stmt_info = vinfo_for_stmt (orig_stmt);
- gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
}
ops[1] = gimple_assign_rhs2 (stmt);
break;
+ case GIMPLE_TERNARY_RHS:
+ code = gimple_assign_rhs_code (stmt);
+ op_type = TREE_CODE_LENGTH (code);
+ gcc_assert (op_type == ternary_op);
+ ops[0] = gimple_assign_rhs1 (stmt);
+ ops[1] = gimple_assign_rhs2 (stmt);
+ ops[2] = gimple_assign_rhs3 (stmt);
+ break;
+
case GIMPLE_UNARY_RHS:
return false;
gcc_unreachable ();
}
+ if (code == COND_EXPR && slp_node)
+ return false;
+
scalar_dest = gimple_assign_lhs (stmt);
scalar_type = TREE_TYPE (scalar_dest);
if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
&& !SCALAR_FLOAT_TYPE_P (scalar_type))
return false;
+ /* Do not try to vectorize bit-precision reductions. */
+ if ((TYPE_PRECISION (scalar_type)
+ != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
+ return false;
+
/* All uses but the last are expected to be defined in the loop.
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- for (i = 0; i < op_type-1; i++)
+ for (i = 0; i < op_type - 1; i++)
{
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
if (i == 0 && code == COND_EXPR)
continue;
- is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
+ is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
&def_stmt, &def, &dt, &tem);
if (!vectype_in)
vectype_in = tem;
gcc_assert (is_simple_use);
+
if (dt != vect_internal_def
&& dt != vect_external_def
&& dt != vect_constant_def
}
}
- is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
- &def, &dt, &tem);
+ is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
+ &def_stmt, &def, &dt, &tem);
if (!vectype_in)
vectype_in = tem;
gcc_assert (is_simple_use);
- gcc_assert (dt == vect_reduction_def
- || dt == vect_nested_cycle
- || ((dt == vect_internal_def || dt == vect_external_def
- || dt == vect_constant_def || dt == vect_induction_def)
- && nested_cycle && found_nested_cycle_def));
+ if (!(dt == vect_reduction_def
+ || dt == vect_nested_cycle
+ || ((dt == vect_internal_def || dt == vect_external_def
+ || dt == vect_constant_def || dt == vect_induction_def)
+ && nested_cycle && found_nested_cycle_def)))
+ {
+ /* For pattern recognized stmts, orig_stmt might be a reduction,
+ but some helper statements for the pattern might not, or
+ might be COND_EXPRs with reduction uses in the condition. */
+ gcc_assert (orig_stmt);
+ return false;
+ }
if (!found_nested_cycle_def)
reduc_def_stmt = def_stmt;
!nested_cycle,
&dummy));
else
- gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
- !nested_cycle, &dummy));
+ {
+ gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
+ !nested_cycle, &dummy);
+ /* We changed STMT to be the first stmt in reduction chain, hence we
+ check that in this case the first element in the chain is STMT. */
+ gcc_assert (stmt == tmp
+ || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
+ }
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
if (code == COND_EXPR)
{
- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
+ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported condition in reduction");
return false;
}
+ /* In case of widenning multiplication by a constant, we update the type
+ of the constant to be the type of the other operand. We check that the
+ constant fits the type in the pattern recognition pass. */
+ if (code == DOT_PROD_EXPR
+ && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
+ {
+ if (TREE_CODE (ops[0]) == INTEGER_CST)
+ ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
+ else if (TREE_CODE (ops[1]) == INTEGER_CST)
+ ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "invalid types in dot-prod");
+
+ return false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
return false;
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
}
gcc_assert (!slp_node);
vectorizable_condition (stmt, gsi, vec_stmt,
PHI_RESULT (VEC_index (gimple, phis, 0)),
- reduc_index);
+ reduc_index, NULL);
/* Multiple types are not supported for condition. */
break;
}
/* Handle uses. */
if (j == 0)
{
- tree op0, op1 = NULL_TREE;
-
op0 = ops[!reduc_index];
if (op_type == ternary_op)
{
}
if (slp_node)
- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
- -1);
+ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+ slp_node, -1);
else
{
loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
{
if (!slp_node)
{
- enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
- loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
+ enum vect_def_type dt;
+ gimple dummy_stmt;
+ tree dummy;
+
+ vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
+ &dummy_stmt, &dummy, &dt);
+ loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
+ loop_vec_def0);
VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
if (op_type == ternary_op)
{
+ vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
+ &dummy, &dt);
loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
loop_vec_def1);
VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
if (slp_node)
{
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
tree vec_def;
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
+ /* FORNOW. These restrictions should be relaxed. */
+ if (nested_in_vect_loop_p (loop, phi))
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ gimple exit_phi;
+ edge latch_e;
+ tree loop_arg;
+
+ if (ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+
+ exit_phi = NULL;
+ latch_e = loop_latch_edge (loop->inner);
+ loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
+ {
+ if (!flow_bb_inside_loop_p (loop->inner,
+ gimple_bb (USE_STMT (use_p))))
+ {
+ exit_phi = USE_STMT (use_p);
+ break;
+ }
+ }
+ if (exit_phi)
+ {
+ stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
+ if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
+ && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "inner-loop induction only used outside "
+ "of the outer vectorized loop.");
+ return false;
+ }
+ }
}
if (!STMT_VINFO_RELEVANT_P (stmt_info))
else
op = gimple_op (stmt, i + 1);
if (op
- && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
+ && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
+ &dt))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL;
bool do_peeling_for_loop_bound;
+ gimple stmt, pattern_stmt;
+ gimple_seq pattern_def_seq = NULL;
+ gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+ bool transform_pattern_stmt = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
do_peeling_for_loop_bound
= (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+ && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
}
}
- for (si = gsi_start_bb (bb); !gsi_end_p (si);)
+ pattern_stmt = NULL;
+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
{
- gimple stmt = gsi_stmt (si);
bool is_store;
+ if (transform_pattern_stmt)
+ stmt = pattern_stmt;
+ else
+ stmt = gsi_stmt (si);
+
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "------>vectorizing statement: ");
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
- {
- gsi_next (&si);
- continue;
+ {
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+ {
+ stmt = pattern_stmt;
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+ else
+ {
+ gsi_next (&si);
+ continue;
+ }
}
+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+ transform_pattern_stmt = true;
+
+ /* If pattern statement has def stmts, vectorize them too. */
+ if (is_pattern_stmt_p (stmt_info))
+ {
+ if (pattern_def_seq == NULL)
+ {
+ pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+ pattern_def_si = gsi_start (pattern_def_seq);
+ }
+ else if (!gsi_end_p (pattern_def_si))
+ gsi_next (&pattern_def_si);
+ if (pattern_def_seq != NULL)
+ {
+ gimple pattern_def_stmt = NULL;
+ stmt_vec_info pattern_def_stmt_info = NULL;
+
+ while (!gsi_end_p (pattern_def_si))
+ {
+ pattern_def_stmt = gsi_stmt (pattern_def_si);
+ pattern_def_stmt_info
+ = vinfo_for_stmt (pattern_def_stmt);
+ if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+ || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+ break;
+ gsi_next (&pattern_def_si);
+ }
+
+ if (!gsi_end_p (pattern_def_si))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> vectorizing pattern def"
+ " stmt: ");
+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+ TDF_SLIM);
+ }
+
+ stmt = pattern_def_stmt;
+ stmt_info = pattern_def_stmt_info;
+ }
+ else
+ {
+ pattern_def_si = gsi_start (NULL);
+ transform_pattern_stmt = false;
+ }
+ }
+ else
+ transform_pattern_stmt = false;
+ }
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
- nunits =
- (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+ STMT_VINFO_VECTYPE (stmt_info));
if (!STMT_SLP_TYPE (stmt_info)
&& nunits != (unsigned int) vectorization_factor
&& vect_print_dump_info (REPORT_DETAILS))
/* Hybrid SLP stmts must be vectorized in addition to SLP. */
if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
{
- gsi_next (&si);
+ if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+ {
+ pattern_def_seq = NULL;
+ gsi_next (&si);
+ }
continue;
}
}
/* Interleaving. If IS_STORE is TRUE, the vectorization of the
interleaving chain was completed - free all the stores in
the chain. */
- vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
- gsi_remove (&si, true);
- continue;
+ gsi_next (&si);
+ vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
+ continue;
}
else
{
/* Free the attached stmt_vec_info and remove the stmt. */
- free_stmt_vec_info (stmt);
+ free_stmt_vec_info (gsi_stmt (si));
gsi_remove (&si, true);
continue;
}
}
- gsi_next (&si);
+
+ if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+ {
+ pattern_def_seq = NULL;
+ gsi_next (&si);
+ }
} /* stmts in BB */
} /* BBs in loop */