#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "toplev.h"
+#include "recog.h"
/* Main analysis functions. */
static loop_vec_info vect_analyze_loop_form (struct loop *);
}
+/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
+ the number of created vector stmts depends on the unrolling factor). However,
+ the actual number of vector stmts for every SLP node depends on VF which is
+ set later in vect_analyze_operations(). Hence, SLP costs should be updated.
+ In this function we assume that the inside costs calculated in
+ vect_model_xxx_cost are linear in ncopies. */
+
+static void
+vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
+{
+ unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ /* We assume that costs are linear in ncopies. */
+ SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
+ / SLP_INSTANCE_UNROLLING_FACTOR (instance);
+}
+
+
/* Function vect_analyze_operations.
Scan the loop stmts and make sure they are all vectorizable. */
int min_profitable_iters;
int min_scalar_loop_bound;
unsigned int th;
+ bool only_slp_in_loop = true;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_operations ===");
ok = (vectorizable_type_promotion (stmt, NULL, NULL)
|| vectorizable_type_demotion (stmt, NULL, NULL)
- || vectorizable_conversion (stmt, NULL, NULL)
- || vectorizable_operation (stmt, NULL, NULL)
- || vectorizable_assignment (stmt, NULL, NULL)
- || vectorizable_load (stmt, NULL, NULL)
+ || vectorizable_conversion (stmt, NULL, NULL, NULL)
+ || vectorizable_operation (stmt, NULL, NULL, NULL)
+ || vectorizable_assignment (stmt, NULL, NULL, NULL)
+ || vectorizable_load (stmt, NULL, NULL, NULL)
|| vectorizable_call (stmt, NULL, NULL)
- || vectorizable_store (stmt, NULL, NULL)
+ || vectorizable_store (stmt, NULL, NULL, NULL)
|| vectorizable_condition (stmt, NULL, NULL)
|| vectorizable_reduction (stmt, NULL, NULL));
}
return false;
}
+
+ if (!PURE_SLP_STMT (stmt_info))
+ {
+ /* STMT needs loop-based vectorization. */
+ only_slp_in_loop = false;
+
+ /* Groups of strided accesses whose size is not a power of 2 are
+ not vectorizable yet using loop-vectorization. Therefore, if
+ this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
+ both SLPed and loop-based vectorzed), the loop cannot be
+ vectorized. */
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+ && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
+ DR_GROUP_FIRST_DR (stmt_info)))) == -1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "not vectorized: the size of group "
+ "of strided accesses is not a power of 2");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+ }
} /* stmts in bb */
} /* bbs */
return false;
}
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by the
+ SLP instances. If that unrolling factor is 1, we say, that we perform
+ pure SLP on loop - cross iteration parallelism is not exploited. */
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
/* Analyze cost. Decide if worth while to vectorize. */
+ /* Once VF is set, SLP costs should be updated since the number of created
+ vector stmts depends on VF. */
+ vect_update_slp_costs_according_to_vf (loop_vinfo);
+
min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
if (min_profitable_iters < 0)
/* For interleaved data accesses the step in the loop must be multiplied by
the size of the interleaving group. */
- if (DR_GROUP_FIRST_DR (stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
- if (DR_GROUP_FIRST_DR (peel_stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
/* It can be assumed that the data refs with the same alignment as dr_peel
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
/* For interleaving, only the alignment of the first access matters. */
- if (DR_GROUP_FIRST_DR (stmt_info)
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt)
continue;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- if (DR_GROUP_FIRST_DR (stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{
/* For interleaved access we peel only if number of iterations in
the prolog loop ({VF - misalignment}), is a multiple of the
/* For interleaving, only the alignment of the first access
matters. */
- if (DR_GROUP_FIRST_DR (stmt_info)
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt)
continue;
members of the group, therefore we divide the number of iterations
by the group size. */
stmt_info = vinfo_for_stmt (DR_STMT (dr0));
- if (DR_GROUP_FIRST_DR (stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
npeel /= DR_GROUP_SIZE (stmt_info);
if (vect_print_dump_info (REPORT_DETAILS))
stmt_info = vinfo_for_stmt (stmt);
/* For interleaving, only the alignment of the first access
matters. */
- if (DR_GROUP_FIRST_DR (stmt_info)
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt)
continue;
/* For interleaving, only the alignment of the first access
matters. */
if (aligned_access_p (dr)
- || (DR_GROUP_FIRST_DR (stmt_info)
+ || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt))
continue;
}
-/* Function vect_analyze_data_ref_access.
-
- Analyze the access pattern of the data-reference DR. For now, a data access
- has to be consecutive to be considered vectorizable. */
+/* Analyze groups of strided accesses: check that DR belongs to a group of
+ strided accesses of legal size, step, etc. Detect gaps, single element
+ interleaving, and other special cases. Set strided access info.
+ Collect groups of strided stores for further use in SLP analysis. */
static bool
-vect_analyze_data_ref_access (struct data_reference *dr)
+vect_analyze_group_access (struct data_reference *dr)
{
tree step = DR_STEP (dr);
tree scalar_type = TREE_TYPE (DR_REF (dr));
tree stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
HOST_WIDE_INT stride;
+ bool slp_impossible = false;
- /* Don't allow invariant accesses. */
- if (dr_step == 0)
- return false;
-
- if (nested_in_vect_loop_p (loop, stmt))
- {
- /* For the rest of the analysis we use the outer-loop step. */
- step = STMT_VINFO_DR_STEP (stmt_info);
- dr_step = TREE_INT_CST_LOW (step);
-
- if (dr_step == 0)
- {
- if (vect_print_dump_info (REPORT_ALIGNMENT))
- fprintf (vect_dump, "zero step in outer loop.");
- if (DR_IS_READ (dr))
- return true;
- else
- return false;
- }
- }
-
/* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
interleaving group (including gaps). */
stride = dr_step / type_size;
- /* Consecutive? */
- if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
- {
- /* Mark that it is not interleaving. */
- DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
- return true;
- }
-
- if (nested_in_vect_loop_p (loop, stmt))
- {
- if (vect_print_dump_info (REPORT_ALIGNMENT))
- fprintf (vect_dump, "strided access in outer loop.");
- return false;
- }
-
/* Not consecutive access is possible only if it is a part of interleaving. */
if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
{
HOST_WIDE_INT diff, count_in_bytes;
while (next)
- {
- /* Skip same data-refs. In case that two or more stmts share data-ref
- (supported only for loads), we vectorize only the first stmt, and
- the rest get their vectorized loads from the first one. */
- if (!tree_int_cst_compare (DR_INIT (data_ref),
- DR_INIT (STMT_VINFO_DATA_REF (
- vinfo_for_stmt (next)))))
- {
+ {
+ /* Skip same data-refs. In case that two or more stmts share data-ref
+ (supported only for loads), we vectorize only the first stmt, and
+ the rest get their vectorized loads from the first one. */
+ if (!tree_int_cst_compare (DR_INIT (data_ref),
+ DR_INIT (STMT_VINFO_DATA_REF (
+ vinfo_for_stmt (next)))))
+ {
if (!DR_IS_READ (data_ref))
- {
+ {
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Two store stmts share the same dr.");
- return false;
+ return false;
}
- /* Check that there is no load-store dependencies for this loads
+ /* Check that there is no load-store dependencies for this loads
to prevent a case of load-store-load to the same location. */
if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
|| DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump,
+ fprintf (vect_dump,
"READ_WRITE dependence in interleaving.");
return false;
}
- /* For load use the same data-ref load. */
- DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
+ /* For load use the same data-ref load. */
+ DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
- prev = next;
- next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
- continue;
- }
- prev = next;
+ prev = next;
+ next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ continue;
+ }
+ prev = next;
- /* Check that all the accesses have the same STEP. */
- next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
- if (tree_int_cst_compare (step, next_step))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "not consecutive access in interleaving");
- return false;
- }
+ /* Check that all the accesses have the same STEP. */
+ next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
+ if (tree_int_cst_compare (step, next_step))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "not consecutive access in interleaving");
+ return false;
+ }
- data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
- /* Check that the distance between two accesses is equal to the type
- size. Otherwise, we have gaps. */
- diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
- - TREE_INT_CST_LOW (prev_init)) / type_size;
- if (!DR_IS_READ (data_ref) && diff != 1)
+ data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
+ /* Check that the distance between two accesses is equal to the type
+ size. Otherwise, we have gaps. */
+ diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
+ - TREE_INT_CST_LOW (prev_init)) / type_size;
+ if (diff != 1)
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "interleaved store with gaps");
- return false;
+ /* FORNOW: SLP of accesses with gaps is not supported. */
+ slp_impossible = true;
+ if (!DR_IS_READ (data_ref))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "interleaved store with gaps");
+ return false;
+ }
}
- /* Store the gap from the previous member of the group. If there is no
+
+ /* Store the gap from the previous member of the group. If there is no
gap in the access, DR_GROUP_GAP is always 1. */
- DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
+ DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
- prev_init = DR_INIT (data_ref);
- next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
- /* Count the number of data-refs in the chain. */
- count++;
- }
+ prev_init = DR_INIT (data_ref);
+ next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ /* Count the number of data-refs in the chain. */
+ count++;
+ }
- /* COUNT is the number of accesses found, we multiply it by the size of
- the type to get COUNT_IN_BYTES. */
+ /* COUNT is the number of accesses found, we multiply it by the size of
+ the type to get COUNT_IN_BYTES. */
count_in_bytes = type_size * count;
/* Check that the size of the interleaving is not greater than STEP. */
- if (dr_step < count_in_bytes)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "interleaving size is greater than step for ");
- print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
- }
- return false;
- }
+ if (dr_step < count_in_bytes)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "interleaving size is greater than step for ");
+ print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
+ }
+ return false;
+ }
- /* Check that the size of the interleaving is equal to STEP for stores,
- i.e., that there are no gaps. */
- if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "interleaved store with gaps");
- return false;
- }
+ /* Check that the size of the interleaving is equal to STEP for stores,
+ i.e., that there are no gaps. */
+ if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "interleaved store with gaps");
+ return false;
+ }
/* Check that STEP is a multiple of type size. */
if ((dr_step % type_size) != 0)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "step is not a multiple of type size: step ");
print_generic_expr (vect_dump, step, TDF_SLIM);
print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
TDF_SLIM);
}
- return false;
- }
+ return false;
+ }
- /* FORNOW: we handle only interleaving that is a power of 2. */
+ /* FORNOW: we handle only interleaving that is a power of 2.
+ We don't fail here if it may be still possible to vectorize the
+ group using SLP. If not, the size of the group will be checked in
+ vect_analyze_operations, and the vectorization will fail. */
if (exact_log2 (stride) == -1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "interleaving is not a power of 2");
- return false;
+
+ if (slp_impossible)
+ return false;
}
DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
+
+ /* SLP: create an SLP data structure for every interleaving group of
+ stores for further analysis in vect_analyse_slp. */
+ if (!DR_IS_READ (dr) && !slp_impossible)
+ VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
}
+
return true;
}
+/* Analyze the access pattern of the data-reference DR.
+ In case of non-consecutive accesse call vect_analyze_group_access() to
+ analyze groups of strided accesses. */
+
+static bool
+vect_analyze_data_ref_access (struct data_reference *dr)
+{
+ tree step = DR_STEP (dr);
+ tree scalar_type = TREE_TYPE (DR_REF (dr));
+ tree stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+
+ if (!step)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "bad data-ref access");
+ return false;
+ }
+
+ /* Don't allow invariant accesses. */
+ if (dr_step == 0)
+ return false;
+
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ /* For the rest of the analysis we use the outer-loop step. */
+ step = STMT_VINFO_DR_STEP (stmt_info);
+ dr_step = TREE_INT_CST_LOW (step);
+
+ if (dr_step == 0)
+ {
+ if (vect_print_dump_info (REPORT_ALIGNMENT))
+ fprintf (vect_dump, "zero step in outer loop.");
+ if (DR_IS_READ (dr))
+ return true;
+ else
+ return false;
+ }
+ }
+
+ /* Consecutive? */
+ if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+ {
+ /* Mark that it is not interleaving. */
+ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
+ return true;
+ }
+
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ if (vect_print_dump_info (REPORT_ALIGNMENT))
+ fprintf (vect_dump, "strided access in outer loop.");
+ return false;
+ }
+
+ /* Not consecutive access - check if it's a part of interleaving group. */
+ return vect_analyze_group_access (dr);
+}
+
+
/* Function vect_analyze_data_ref_accesses.
Analyze the access pattern of all the data references in the loop.
}
+/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
+
+void
+vect_free_slp_tree (slp_tree node)
+{
+ if (!node)
+ return;
+
+ if (SLP_TREE_LEFT (node))
+ vect_free_slp_tree (SLP_TREE_LEFT (node));
+
+ if (SLP_TREE_RIGHT (node))
+ vect_free_slp_tree (SLP_TREE_RIGHT (node));
+
+ VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
+
+ if (SLP_TREE_VEC_STMTS (node))
+ VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
+
+ free (node);
+}
+
+
+/* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
+ of a legal type and that they match the defs of the first stmt of the SLP
+ group (stored in FIRST_STMT_...). */
+
+static bool
+vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
+ tree rhs, VEC (tree, heap) **def_stmts0,
+ VEC (tree, heap) **def_stmts1,
+ enum vect_def_type *first_stmt_dt0,
+ enum vect_def_type *first_stmt_dt1,
+ tree *first_stmt_def0_type,
+ tree *first_stmt_def1_type,
+ tree *first_stmt_const_oprnd,
+ int ncopies_for_cost)
+{
+ tree oprnd;
+ enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
+ unsigned int i, number_of_oprnds = op_type;
+ tree def, def_stmt;
+ enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+ stmt_vec_info stmt_info =
+ vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
+
+ /* Store. */
+ if (!op_type)
+ number_of_oprnds = 1;
+ else
+ gcc_assert (op_type == unary_op || op_type == binary_op);
+
+ for (i = 0; i < number_of_oprnds; i++)
+ {
+ if (op_type)
+ oprnd = TREE_OPERAND (rhs, i);
+ else
+ oprnd = rhs;
+
+ if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
+ || (!def_stmt && dt[i] != vect_constant_def))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: can't find def for ");
+ print_generic_expr (vect_dump, oprnd, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ if (!*first_stmt_dt0)
+ {
+ /* op0 of the first stmt of the group - store its info. */
+ *first_stmt_dt0 = dt[i];
+ if (def)
+ *first_stmt_def0_type = TREE_TYPE (def);
+ else
+ *first_stmt_const_oprnd = oprnd;
+
+ /* Analyze costs (for the first stmt of the group only). */
+ if (op_type)
+ /* Not memory operation (we don't call this functions for loads). */
+ vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
+ else
+ /* Store. */
+ vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+ }
+
+ else
+ {
+ if (!*first_stmt_dt1 && i == 1)
+ {
+ /* op1 of the first stmt of the group - store its info. */
+ *first_stmt_dt1 = dt[i];
+ if (def)
+ *first_stmt_def1_type = TREE_TYPE (def);
+ else
+ {
+ /* We assume that the stmt contains only one constant
+ operand. We fail otherwise, to be on the safe side. */
+ if (*first_stmt_const_oprnd)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: two constant "
+ "oprnds in stmt");
+ return false;
+ }
+ *first_stmt_const_oprnd = oprnd;
+ }
+ }
+ else
+ {
+ /* Not first stmt of the group, check that the def-stmt/s match
+ the def-stmt/s of the first stmt. */
+ if ((i == 0
+ && (*first_stmt_dt0 != dt[i]
+ || (*first_stmt_def0_type && def
+ && *first_stmt_def0_type != TREE_TYPE (def))))
+ || (i == 1
+ && (*first_stmt_dt1 != dt[i]
+ || (*first_stmt_def1_type && def
+ && *first_stmt_def1_type != TREE_TYPE (def))))
+ || (!def
+ && TREE_TYPE (*first_stmt_const_oprnd)
+ != TREE_TYPE (oprnd)))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: different types ");
+
+ return false;
+ }
+ }
+ }
+
+ /* Check the types of the definitions. */
+ switch (dt[i])
+ {
+ case vect_constant_def:
+ case vect_invariant_def:
+ break;
+
+ case vect_loop_def:
+ if (i == 0)
+ VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
+ else
+ VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
+ break;
+
+ default:
+ /* FORNOW: Not supported. */
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: illegal type of def ");
+ print_generic_expr (vect_dump, def, TDF_SLIM);
+ }
+
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+/* Recursively build an SLP tree starting from NODE.
+ Fail (and return FALSE) if def-stmts are not isomorphic, require data
+ permutation or are of unsupported types of operation. Otherwise, return
+ TRUE.
+ SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
+ in the case of multiple types for now. */
+
+static bool
+vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
+ unsigned int group_size, bool *slp_impossible,
+ int *inside_cost, int *outside_cost,
+ int ncopies_for_cost)
+{
+ VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
+ VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
+ unsigned int i;
+ VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
+ tree stmt = VEC_index (tree, stmts, 0);
+ enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
+ enum tree_code first_stmt_code = 0;
+ tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
+ tree lhs, rhs, prev_stmt = NULL_TREE;
+ bool stop_recursion = false, need_same_oprnds = false;
+ tree vectype, scalar_type, first_op1 = NULL_TREE;
+ unsigned int vectorization_factor = 0, ncopies;
+ optab optab;
+ int icode;
+ enum machine_mode optab_op2_mode;
+ enum machine_mode vec_mode;
+ tree first_stmt_const_oprnd = NULL_TREE;
+ struct data_reference *first_dr;
+
+ /* For every stmt in NODE find its def stmt/s. */
+ for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP for ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
+ if (ncopies > 1)
+ {
+ /* FORNOW. */
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "SLP failed - multiple types ");
+
+ *slp_impossible = true;
+ return false;
+ }
+
+ lhs = GIMPLE_STMT_OPERAND (stmt, 0);
+ rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+
+ /* Check the operation. */
+ if (i == 0)
+ {
+ first_stmt_code = TREE_CODE (rhs);
+
+ /* Shift arguments should be equal in all the packed stmts for a
+ vector shift with scalar shift operand. */
+ if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
+ {
+ vec_mode = TYPE_MODE (vectype);
+ optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
+ if (!optab)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: no optab.");
+ return false;
+ }
+ icode = (int) optab->handlers[(int) vec_mode].insn_code;
+ optab_op2_mode = insn_data[icode].operand[2].mode;
+ if (!VECTOR_MODE_P (optab_op2_mode))
+ {
+ need_same_oprnds = true;
+ first_op1 = TREE_OPERAND (rhs, 1);
+ }
+ }
+ }
+ else
+ {
+ if (first_stmt_code != TREE_CODE (rhs))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump,
+ "Build SLP failed: different operation in stmt ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ if (need_same_oprnds
+ && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump,
+ "Build SLP failed: different shift arguments in ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+ }
+
+ /* Strided store or load. */
+ if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
+ {
+ if (REFERENCE_CLASS_P (lhs))
+ {
+ /* Store. */
+ if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
+ &def_stmts0, &def_stmts1,
+ &first_stmt_dt0,
+ &first_stmt_dt1,
+ &first_stmt_def0_type,
+ &first_stmt_def1_type,
+ &first_stmt_const_oprnd,
+ ncopies_for_cost))
+ return false;
+ }
+ else
+ {
+ /* Load. */
+ if (i == 0)
+ {
+ /* First stmt of the SLP group should be the first load of
+ the interleaving loop if data permutation is not
+ allowed. */
+ if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
+ {
+ /* FORNOW: data permutations are not supported. */
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: strided "
+ " loads need permutation ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ if (vect_supportable_dr_alignment (first_dr)
+ == dr_unaligned_unsupported)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: unsupported "
+ " unaligned load ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ /* Analyze costs (for the first stmt in the group). */
+ vect_model_load_cost (vinfo_for_stmt (stmt),
+ ncopies_for_cost, *node);
+ }
+ else
+ {
+ if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
+ {
+ /* FORNOW: data permutations are not supported. */
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: strided "
+ " loads need permutation ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+ return false;
+ }
+ }
+
+ prev_stmt = stmt;
+
+ /* We stop the tree when we reach a group of loads. */
+ stop_recursion = true;
+ continue;
+ }
+ } /* Strided access. */
+ else
+ {
+ if (REFERENCE_CLASS_P (rhs))
+ {
+ /* Not strided load. */
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: not strided load ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ /* FORNOW: Not strided loads are not supported. */
+ return false;
+ }
+
+ /* Not memory operation. */
+ if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: operation");
+ fprintf (vect_dump, " unsupported ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ return false;
+ }
+
+ /* Find the def-stmts. */
+ if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
+ &def_stmts1, &first_stmt_dt0,
+ &first_stmt_dt1,
+ &first_stmt_def0_type,
+ &first_stmt_def1_type,
+ &first_stmt_const_oprnd,
+ ncopies_for_cost))
+ return false;
+ }
+ }
+
+ /* Add the costs of the node to the overall instance costs. */
+ *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
+ *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
+
+ /* Strided loads were reached - stop the recursion. */
+ if (stop_recursion)
+ return true;
+
+ /* Create SLP_TREE nodes for the definition node/s. */
+ if (first_stmt_dt0 == vect_loop_def)
+ {
+ slp_tree left_node = XNEW (struct _slp_tree);
+ SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
+ SLP_TREE_VEC_STMTS (left_node) = NULL;
+ SLP_TREE_LEFT (left_node) = NULL;
+ SLP_TREE_RIGHT (left_node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
+ if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
+ slp_impossible, inside_cost, outside_cost,
+ ncopies_for_cost))
+ return false;
+
+ SLP_TREE_LEFT (*node) = left_node;
+ }
+
+ if (first_stmt_dt1 == vect_loop_def)
+ {
+ slp_tree right_node = XNEW (struct _slp_tree);
+ SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
+ SLP_TREE_VEC_STMTS (right_node) = NULL;
+ SLP_TREE_LEFT (right_node) = NULL;
+ SLP_TREE_RIGHT (right_node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
+ if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
+ slp_impossible, inside_cost, outside_cost,
+ ncopies_for_cost))
+ return false;
+
+ SLP_TREE_RIGHT (*node) = right_node;
+ }
+
+ return true;
+}
+
+
+static void
+vect_print_slp_tree (slp_tree node)
+{
+ int i;
+ tree stmt;
+
+ if (!node)
+ return;
+
+ fprintf (vect_dump, "node ");
+ for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ {
+ fprintf (vect_dump, "\n\tstmt %d ", i);
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+ fprintf (vect_dump, "\n");
+
+ vect_print_slp_tree (SLP_TREE_LEFT (node));
+ vect_print_slp_tree (SLP_TREE_RIGHT (node));
+}
+
+
+/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
+ If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
+ J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
+ stmts in NODE are to be marked. */
+
+static void
+vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
+{
+ int i;
+ tree stmt;
+
+ if (!node)
+ return;
+
+ for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ if (j < 0 || i == j)
+ STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
+
+ vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
+ vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
+}
+
+
+/* Analyze an SLP instance starting from a group of strided stores. Call
+ vect_build_slp_tree to build a tree of packed stmts if possible.
+ Return FALSE if it's impossible to SLP any stmt in the loop. */
+
+static bool
+vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
+{
+ slp_instance new_instance;
+ slp_tree node = XNEW (struct _slp_tree);
+ unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ unsigned int unrolling_factor = 1, nunits;
+ tree vectype, scalar_type, next;
+ unsigned int vectorization_factor = 0, ncopies;
+ bool slp_impossible = false;
+ int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
+
+ /* FORNOW: multiple types are not supported. */
+ scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ ncopies = vectorization_factor / nunits;
+ if (ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "SLP failed - multiple types ");
+
+ return false;
+ }
+
+ /* Create a node (a root of the SLP tree) for the packed strided stores. */
+ SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
+ next = stmt;
+ /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
+ while (next)
+ {
+ VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
+ next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ }
+
+ SLP_TREE_VEC_STMTS (node) = NULL;
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
+ SLP_TREE_LEFT (node) = NULL;
+ SLP_TREE_RIGHT (node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
+
+ /* Calculate the unrolling factor. */
+ unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
+
+ /* Calculate the number of vector stmts to create based on the unrolling
+ factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+ GROUP_SIZE / NUNITS otherwise. */
+ ncopies_for_cost = unrolling_factor * group_size / nunits;
+
+ /* Build the tree for the SLP instance. */
+ if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
+ &inside_cost, &outside_cost, ncopies_for_cost))
+ {
+ /* Create a new SLP instance. */
+ new_instance = XNEW (struct _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+ SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
+ SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
+ VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+ new_instance);
+ if (vect_print_dump_info (REPORT_SLP))
+ vect_print_slp_tree (node);
+
+ return true;
+ }
+
+ /* Failed to SLP. */
+ /* Free the allocated memory. */
+ vect_free_slp_tree (node);
+
+ if (slp_impossible)
+ return false;
+
+ /* SLP failed for this instance, but it is still possible to SLP other stmts
+ in the loop. */
+ return true;
+}
+
+
+/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
+ trees of packed scalar stmts if SLP is possible. */
+
+static bool
+vect_analyze_slp (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ tree store;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_analyze_slp ===");
+
+ for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
+ if (!vect_analyze_slp_instance (loop_vinfo, store))
+ {
+ /* SLP failed. No instance can be SLPed in the loop. */
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+ fprintf (vect_dump, "SLP failed.");
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/* For each possible SLP instance decide whether to SLP it and calculate overall
+ unrolling factor needed to SLP the loop. */
+
+static void
+vect_make_slp_decision (loop_vec_info loop_vinfo)
+{
+ unsigned int i, unrolling_factor = 1;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+ int decided_to_slp = 0;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_make_slp_decision ===");
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ {
+ /* FORNOW: SLP if you can. */
+ if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
+ unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
+
+ /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
+ call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
+ loop-based vectorization. Such stmts will be marked as HYBRID. */
+ vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
+ decided_to_slp++;
+ }
+
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
+
+ if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
+ decided_to_slp, unrolling_factor);
+}
+
+
+/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
+ can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
+
+static void
+vect_detect_hybrid_slp_stmts (slp_tree node)
+{
+ int i;
+ tree stmt;
+ imm_use_iterator imm_iter;
+ tree use_stmt;
+
+ if (!node)
+ return;
+
+ for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
+ && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
+ if (vinfo_for_stmt (use_stmt)
+ && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
+ vect_mark_slp_stmts (node, hybrid, i);
+
+ vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
+ vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
+}
+
+
+/* Find stmts that must be both vectorized and SLPed. */
+
+static void
+vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
+}
+
+
/* Function vect_analyze_data_refs.
Find all the data references in the loop.
return NULL;
}
+ /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
+ ok = vect_analyze_slp (loop_vinfo);
+ if (ok)
+ {
+ /* Decide which possible SLP instances to SLP. */
+ vect_make_slp_decision (loop_vinfo);
+
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+ }
+
/* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */