/* SLP - Basic Block Vectorization
- Copyright (C) 2007, 2008, 2009, 2010
+ Copyright (C) 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
#include "recog.h"
#include "optabs.h"
#include "tree-vectorizer.h"
+#include "langhooks.h"
/* Extract the location of the basic block in the source code.
Return the basic block location if succeed and NULL if not. */
{
tree oprnd;
unsigned int i, number_of_oprnds;
- tree def;
+ tree def[2];
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
stmt_vec_info stmt_info =
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
enum gimple_rhs_class rhs_class;
struct loop *loop = NULL;
+ enum tree_code rhs_code;
+ bool different_types = false;
if (loop_vinfo)
loop = LOOP_VINFO_LOOP (loop_vinfo);
{
oprnd = gimple_op (stmt, i + 1);
- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
&dt[i])
|| (!def_stmt && dt[i] != vect_constant_def))
{
if (loop && def_stmt && gimple_bb (def_stmt)
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
&& vinfo_for_stmt (def_stmt)
- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
{
if (!*first_stmt_dt0)
*pattern0 = true;
switch (gimple_code (def_stmt))
{
case GIMPLE_PHI:
- def = gimple_phi_result (def_stmt);
+ def[i] = gimple_phi_result (def_stmt);
break;
case GIMPLE_ASSIGN:
- def = gimple_assign_lhs (def_stmt);
+ def[i] = gimple_assign_lhs (def_stmt);
break;
default:
{
/* op0 of the first stmt of the group - store its info. */
*first_stmt_dt0 = dt[i];
- if (def)
- *first_stmt_def0_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def0_type = TREE_TYPE (def[i]);
else
*first_stmt_const_oprnd = oprnd;
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
else
/* Store. */
- vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+ vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+ dt[0], slp_node);
}
else
{
/* op1 of the first stmt of the group - store its info. */
*first_stmt_dt1 = dt[i];
- if (def)
- *first_stmt_def1_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def1_type = TREE_TYPE (def[i]);
else
{
/* We assume that the stmt contains only one constant
else
{
/* Not first stmt of the group, check that the def-stmt/s match
- the def-stmt/s of the first stmt. */
+ the def-stmt/s of the first stmt. Allow different definition
+ types for reduction chains: the first stmt must be a
+ vect_reduction_def (a phi node), and the rest
+ vect_internal_def. */
if ((i == 0
- && (*first_stmt_dt0 != dt[i]
- || (*first_stmt_def0_type && def
+ && ((*first_stmt_dt0 != dt[i]
+ && !(*first_stmt_dt0 == vect_reduction_def
+ && dt[i] == vect_internal_def))
+ || (*first_stmt_def0_type && def[0]
&& !types_compatible_p (*first_stmt_def0_type,
- TREE_TYPE (def)))))
+ TREE_TYPE (def[0])))))
|| (i == 1
- && (*first_stmt_dt1 != dt[i]
- || (*first_stmt_def1_type && def
+ && ((*first_stmt_dt1 != dt[i]
+ && !(*first_stmt_dt1 == vect_reduction_def
+ && dt[i] == vect_internal_def))
+ || (*first_stmt_def1_type && def[1]
&& !types_compatible_p (*first_stmt_def1_type,
- TREE_TYPE (def)))))
- || (!def
+ TREE_TYPE (def[1])))))
+ || (!def[i]
&& !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
- TREE_TYPE (oprnd))))
+ TREE_TYPE (oprnd)))
+ || different_types)
{
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "Build SLP failed: different types ");
+ if (i != number_of_oprnds - 1)
+ different_types = true;
+ else
+ {
+ if (is_gimple_assign (stmt)
+ && (rhs_code = gimple_assign_rhs_code (stmt))
+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
+ && commutative_tree_code (rhs_code)
+ && *first_stmt_dt0 == dt[1]
+ && *first_stmt_dt1 == dt[0]
+ && def[0] && def[1]
+ && !(*first_stmt_def0_type
+ && !types_compatible_p (*first_stmt_def0_type,
+ TREE_TYPE (def[1])))
+ && !(*first_stmt_def1_type
+ && !types_compatible_p (*first_stmt_def1_type,
+ TREE_TYPE (def[0]))))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Swapping operands of ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+
+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+ gimple_assign_rhs2_ptr (stmt));
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: different types ");
- return false;
+ return false;
+ }
+ }
}
}
}
case vect_internal_def:
case vect_reduction_def:
- if (i == 0)
+ if ((i == 0 && !different_types) || (i == 1 && different_types))
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
else
- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
break;
default:
if (vect_print_dump_info (REPORT_SLP))
{
fprintf (vect_dump, "Build SLP failed: illegal type of def ");
- print_generic_expr (vect_dump, def, TDF_SLIM);
+ print_generic_expr (vect_dump, def[i], TDF_SLIM);
}
return false;
int ncopies_for_cost, unsigned int *max_nunits,
VEC (int, heap) **load_permutation,
VEC (slp_tree, heap) **loads,
- unsigned int vectorization_factor)
+ unsigned int vectorization_factor, bool *loads_permuted)
{
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
return false;
}
- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies != 1)
+ /* In case of multiple types we need to detect the smallest type. */
+ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
{
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP with multiple types ");
-
- /* FORNOW: multiple types are unsupported in BB SLP. */
- if (bb_vinfo)
- return false;
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (bb_vinfo)
+ vectorization_factor = *max_nunits;
}
- /* In case of multiple types we need to detect the smallest type. */
- if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
if (is_gimple_call (stmt))
rhs_code = CALL_EXPR;
}
}
}
+ else if (rhs_code == WIDEN_LSHIFT_EXPR)
+ {
+ need_same_oprnds = true;
+ first_op1 = gimple_assign_rhs2 (stmt);
+ }
}
else
{
{
/* Load. */
/* FORNOW: Check that there is no gap between the loads. */
- if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
- || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
+ if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
+ && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+ || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
+ && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
{
if (vect_print_dump_info (REPORT_SLP))
{
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+ if (loop_vinfo
+ && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
{
if (vect_print_dump_info (REPORT_SLP))
{
return false;
}
- first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
if (prev_first_load)
{
/* Check that there are no loads from different interleaving
/* Analyze costs (for the first stmt in the group). */
vect_model_load_cost (vinfo_for_stmt (stmt),
- ncopies_for_cost, *node);
+ ncopies_for_cost, false, *node);
}
/* Store the place of this load in the interleaving chain. In
/* Strided loads were reached - stop the recursion. */
if (stop_recursion)
{
+ VEC_safe_push (slp_tree, heap, *loads, *node);
if (permutation)
{
- VEC_safe_push (slp_tree, heap, *loads, *node);
+
+ *loads_permuted = true;
*inside_cost
+= targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
* group_size;
}
else
- {
- /* We don't check here complex numbers chains, so we keep them in
- LOADS for further check in vect_supported_load_permutation_p. */
+ {
+ /* We don't check here complex numbers chains, so we set
+ LOADS_PERMUTED for further check in
+ vect_supported_load_permutation_p. */
if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
- VEC_safe_push (slp_tree, heap, *loads, *node);
+ *loads_permuted = true;
}
return true;
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_LEFT (*node) = left_node;
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_RIGHT (*node) = right_node;
{
slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
- gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
VEC (slp_tree, heap) *sorted_loads = NULL;
int index;
slp_tree *tmp_loads = NULL;
{
gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
/* Check that the loads are all in the same interleaving chain. */
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
{
if (vect_print_dump_info (REPORT_DETAILS))
{
bool supported, bad_permutation = false;
sbitmap load_index;
slp_tree node, other_complex_node;
- gimple stmt, first = NULL, other_node_first;
+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
unsigned complex_numbers = 0;
+ struct data_reference *dr;
+ bb_vec_info bb_vinfo;
/* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
first = stmt;
else
{
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
{
if (complex_numbers != 2)
return false;
other_node_first = VEC_index (gimple,
SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
!= other_node_first)
return false;
}
GROUP_SIZE. */
number_of_groups = VEC_length (int, load_permutation) / group_size;
- /* Reduction (there are no data-refs in the root). */
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ /* Reduction (there are no data-refs in the root).
+ In reduction chain the order of the loads is important. */
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
+ && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
int first_group_load_index;
if (!bad_permutation)
{
- /* This permutaion is valid for reduction. Since the order of the
+ /* Check that the loads in the first sequence are different and there
+ are no gaps between them. */
+ load_index = sbitmap_alloc (group_size);
+ sbitmap_zero (load_index);
+ for (k = 0; k < group_size; k++)
+ {
+ first_group_load_index = VEC_index (int, load_permutation, k);
+ if (TEST_BIT (load_index, first_group_load_index))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ SET_BIT (load_index, first_group_load_index);
+ }
+
+ if (!bad_permutation)
+ for (k = 0; k < group_size; k++)
+ if (!TEST_BIT (load_index, k))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ sbitmap_free (load_index);
+ }
+
+ if (!bad_permutation)
+ {
+ /* This permutation is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
}
}
+ /* In basic block vectorization we allow any subchain of an interleaving
+ chain.
+ FORNOW: not supported in loop SLP because of realignment compications. */
+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+ bad_permutation = false;
+ /* Check that for every node in the instance teh loads form a subchain. */
+ if (bb_vinfo)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ next_load = NULL;
+ first_load = NULL;
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
+ {
+ if (!first_load)
+ first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
+ else if (first_load
+ != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ if (j != 0 && next_load != load)
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
+ }
+
+ if (bad_permutation)
+ break;
+ }
+
+ /* Check that the alignment of the first load in every subchain, i.e.,
+ the first statement in every load node, is supported. */
+ if (!bad_permutation)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ if (first_load
+ != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
+ {
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_unaligned_unsupported)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "unsupported unaligned load ");
+ print_gimple_stmt (vect_dump, first_load, 0,
+ TDF_SLIM);
+ }
+ bad_permutation = true;
+ break;
+ }
+ }
+ }
+
+ if (!bad_permutation)
+ {
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+ return true;
+ }
+ }
+ }
+
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
well (unless it's reduction). */
{
slp_instance new_instance;
slp_tree node = XNEW (struct _slp_tree);
- unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
unsigned int unrolling_factor = 1, nunits;
tree vectype, scalar_type = NULL_TREE;
gimple next;
VEC (int, heap) *load_permutation;
VEC (slp_tree, heap) *loads;
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ bool loads_permuted = false;
- if (dr)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
- scalar_type = TREE_TYPE (DR_REF (dr));
- vectype = get_vectype_for_scalar_type (scalar_type);
- group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ if (dr)
+ {
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ }
+ else
+ {
+ gcc_assert (loop_vinfo);
+ vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
+ }
+
+ group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
}
else
{
if (loop_vinfo)
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
else
- /* No multitypes in BB SLP. */
vectorization_factor = nunits;
/* Calculate the unrolling factor. */
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
next = stmt;
- if (dr)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
while (next)
{
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
- next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
}
}
else
for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
next);
i++)
- {
- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "pushing reduction into node: ");
- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
- }
- }
+ VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
}
SLP_TREE_VEC_STMTS (node) = NULL;
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
&inside_cost, &outside_cost, ncopies_for_cost,
&max_nunits, &load_permutation, &loads,
- vectorization_factor))
+ vectorization_factor, &loads_permuted))
{
- /* Create a new SLP instance. */
- new_instance = XNEW (struct _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- /* Calculate the unrolling factor based on the smallest type in the
- loop. */
+ /* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits)
unrolling_factor = least_common_multiple (max_nunits, group_size)
/ group_size;
+ if (unrolling_factor != 1 && !loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+ " block SLP");
+ return false;
+ }
+
+ /* Create a new SLP instance. */
+ new_instance = XNEW (struct _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
SLP_INSTANCE_LOADS (new_instance) = loads;
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
- if (VEC_length (slp_tree, loads))
+
+ if (loads_permuted)
{
if (!vect_supported_load_permutation_p (new_instance, group_size,
load_permutation))
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (gimple, heap) *strided_stores, *reductions = NULL;
- gimple store;
+ VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
+ gimple first_element;
bool ok = false;
if (vect_print_dump_info (REPORT_SLP))
if (loop_vinfo)
{
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
}
else
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
/* Find SLP sequences starting from groups of strided stores. */
- FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
- if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
+ FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
ok = true;
if (bb_vinfo && !ok)
return false;
}
+ if (loop_vinfo
+ && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
+ {
+ /* Find SLP sequences starting from reduction chains. */
+ FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
+ ok = true;
+ else
+ return false;
+
+ /* Don't try to vectorize SLP reductions if reduction chain was
+ detected. */
+ return ok;
+ }
+
/* Find SLP sequences starting from groups of reductions. */
if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
&& vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
/* For each possible SLP instance decide whether to SLP it and calculate overall
- unrolling factor needed to SLP the loop. */
+ unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
+ least one instance. */
-void
+bool
vect_make_slp_decision (loop_vec_info loop_vinfo)
{
unsigned int i, unrolling_factor = 1;
if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
decided_to_slp, unrolling_factor);
+
+ return (decided_to_slp > 0);
}
free_stmt_vec_info (stmt);
}
+ free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
+ free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
free (bb_vinfo);
return true;
}
-/* Check if loads and stores are mixed in the basic block (in that
- case if we are not sure that the accesses differ, we can't vectorize the
- basic block). Also return FALSE in case that there is statement marked as
- not vectorizable. */
-
-static bool
-vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
-{
- basic_block bb = BB_VINFO_BB (bb_vinfo);
- gimple_stmt_iterator si;
- bool detected_store = false;
- gimple stmt;
- struct data_reference *dr;
-
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
- {
- stmt = gsi_stmt (si);
-
- /* We can't allow not analyzed statements, since they may contain data
- accesses. */
- if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
- return false;
-
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
- continue;
-
- dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (DR_IS_READ (dr) && detected_store)
- return false;
-
- if (!DR_IS_READ (dr))
- detected_store = true;
- }
-
- return true;
-}
-
/* Check if vectorization of the basic block is profitable. */
static bool
/* Check if the basic block can be vectorized. */
-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
{
bb_vec_info bb_vinfo;
VEC (ddr_p, heap) *ddrs;
VEC (slp_instance, heap) *slp_instances;
slp_instance instance;
- int i, insns = 0;
- gimple_stmt_iterator gsi;
+ int i;
int min_vf = 2;
int max_vf = MAX_VECTORIZATION_FACTOR;
- bool data_dependence_in_bb = false;
-
- current_vector_size = 0;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
- insns++;
- }
-
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- {
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
- fprintf (vect_dump, "not vectorized: too many instructions in basic "
- "block.\n");
-
- return NULL;
- }
bb_vinfo = new_bb_vec_info (bb);
if (!bb_vinfo)
return NULL;
}
- if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
- &data_dependence_in_bb)
- || min_vf > max_vf
- || (data_dependence_in_bb
- && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
+ if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
+ || min_vf > max_vf)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "not vectorized: unhandled data dependence "
}
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo;
+ int insns = 0;
+ gimple_stmt_iterator gsi;
+ unsigned int vector_sizes;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt)
+ && !gimple_nop_p (stmt)
+ && gimple_code (stmt) != GIMPLE_LABEL)
+ insns++;
+ }
+
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
+ "block.\n");
+
+ return NULL;
+ }
+
+ /* Autodetect first vector size we try. */
+ current_vector_size = 0;
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ while (1)
+ {
+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
+ if (bb_vinfo)
+ return bb_vinfo;
+
+ destroy_bb_vec_info (bb_vinfo);
+
+ vector_sizes &= ~current_vector_size;
+ if (vector_sizes == 0
+ || current_vector_size == 0)
+ return NULL;
+
+ /* Try the next biggest vector size. */
+ current_vector_size = 1 << floor_log2 (vector_sizes);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "***** Re-trying analysis with "
+ "vector size %d\n", current_vector_size);
+ }
+}
+
+
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor).
However, the actual number of vector stmts for every SLP node depends on
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
- OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
- stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
+ OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
+ scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
REDUC_INDEX is the index of the reduction operand in the statements, unless
it is -1. */
static void
-vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
+vect_get_constant_vectors (tree op, slp_tree slp_node,
+ VEC (tree, heap) **vec_oprnds,
unsigned int op_num, unsigned int number_of_vectors,
int reduc_index)
{
tree t = NULL_TREE;
int j, number_of_places_left_in_vector;
tree vector_type;
- tree op, vop;
+ tree vop;
int group_size = VEC_length (gimple, stmts);
unsigned int vec_num, i;
int number_of_copies = 1;
VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
bool constant_p, is_store;
tree neutral_op = NULL;
+ enum tree_code code = gimple_assign_rhs_code (stmt);
+ gimple def_stmt;
+ struct loop *loop;
- if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ && reduc_index != -1)
{
- enum tree_code code = gimple_assign_rhs_code (stmt);
- if (reduc_index == -1)
- {
- VEC_free (tree, heap, *vec_oprnds);
- return;
- }
-
op_num = reduc_index - 1;
- op = gimple_op (stmt, op_num + 1);
+ op = gimple_op (stmt, reduc_index);
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
we need either neutral operands or the original operands. See
get_initial_def_for_reduction() for details. */
neutral_op = build_int_cst (TREE_TYPE (op), -1);
break;
+ case MAX_EXPR:
+ case MIN_EXPR:
+ def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+ break;
+
default:
- neutral_op = NULL;
+ neutral_op = NULL;
}
}
op = gimple_assign_rhs1 (stmt);
}
else
- {
- is_store = false;
- op = gimple_op (stmt, op_num + 1);
- }
+ is_store = false;
+
+ gcc_assert (op);
if (CONSTANT_CLASS_P (op))
constant_p = true;
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
gcc_assert (vector_type);
-
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
if (reduc_index != -1)
{
- struct loop *loop = (gimple_bb (stmt))->loop_father;
- gimple def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ def_stmt = SSA_NAME_DEF_STMT (op);
gcc_assert (loop);
- /* Get the def before the loop. */
- op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- if (j != (number_of_copies - 1) && neutral_op)
+
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. */
+ if ((j != (number_of_copies - 1)
+ || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
+ && i != 0))
+ && neutral_op)
op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
}
/* Create 'vect_ = {op0,op1,...,opn}'. */
if (neutral_op)
{
if (!neutral_vec)
- {
- t = NULL;
- for (i = 0; i < (unsigned) nunits; i++)
- t = tree_cons (NULL_TREE, neutral_op, t);
- neutral_vec = build_vector (vector_type, t);
- }
+ neutral_vec = build_vector_from_val (vector_type, neutral_op);
VEC_quick_push (tree, *vec_oprnds, neutral_vec);
}
the right node. This is used when the second operand must remain scalar. */
void
-vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
+vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
+ VEC (tree,heap) **vec_oprnds0,
VEC (tree,heap) **vec_oprnds1, int reduc_index)
{
gimple first_stmt;
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
else
/* Build vectors from scalar defs. */
- vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
+ vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
reduc_index);
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
return;
code = gimple_assign_rhs_code (first_stmt);
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
return;
/* The number of vector defs is determined by the number of vector statements
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
else
/* Build vectors from scalar defs. */
- vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
+ vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
+ -1);
}
vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
tree mask, int first_vec_indx, int second_vec_indx,
gimple_stmt_iterator *gsi, slp_tree node,
- tree builtin_decl, tree vectype,
- VEC(tree,heap) *dr_chain,
+ tree vectype, VEC(tree,heap) *dr_chain,
int ncopies, int vect_stmts_counter)
{
tree perm_dest;
second_vec = VEC_index (tree, dr_chain, second_vec_indx);
/* Generate the permute statement. */
- perm_stmt = gimple_build_call (builtin_decl,
- 3, first_vec, second_vec, mask);
+ perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
+ first_vec, second_vec, mask);
data_ref = make_ssa_name (perm_dest, perm_stmt);
- gimple_call_set_lhs (perm_stmt, data_ref);
+ gimple_set_lhs (perm_stmt, data_ref);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
/* Store the vector statement in NODE. */
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree mask_element_type = NULL_TREE, mask_type;
- int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
+ int i, j, k, nunits, vec_index = 0, scalar_index;
slp_tree node;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
gimple next_scalar_stmt;
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
int first_mask_element;
bool mask_fixed = false;
bool needs_first_vector = false;
- if (!targetm.vectorize.builtin_vec_perm)
+ if (!can_vec_perm_expr_p (vectype, NULL_TREE))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
- fprintf (vect_dump, "no builtin for vect permute for ");
+ fprintf (vect_dump, "no vect permute for ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
-
- return false;
- }
-
- builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
- &mask_element_type);
- if (!builtin_decl || !mask_element_type)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "no builtin for vect permute for ");
- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
- }
-
- return false;
+ return false;
}
+ /* The generic VEC_PERM_EXPR code always uses an integral type of the
+ same size as the vector element being permuted. */
+ mask_element_type
+ = lang_hooks.types.type_for_size
+ (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
mask_type = get_vectype_for_scalar_type (mask_element_type);
- mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
- mask = (int *) xmalloc (sizeof (int) * mask_nunits);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- scale = mask_nunits / nunits;
+ mask = (int *) xmalloc (sizeof (int) * nunits);
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
...
- The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
- scpecific type, e.g., in bytes for Altivec.
+ The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
The last mask is illegal since we assume two operands for permute
operation, and the mask element values can't be outside that range.
Hence, the last mask must be converted into {2,5,5,5}.
{
for (k = 0; k < group_size; k++)
{
- first_mask_element = (i + j * group_size) * scale;
- for (m = 0; m < scale; m++)
- {
- if (!vect_get_mask_element (stmt, first_mask_element, m,
- mask_nunits, only_one_vec, index, mask,
- ¤t_mask_element, &need_next_vector,
- &number_of_mask_fixes, &mask_fixed,
- &needs_first_vector))
- return false;
-
- mask[index++] = current_mask_element;
- }
+ first_mask_element = i + j * group_size;
+ if (!vect_get_mask_element (stmt, first_mask_element, 0,
+ nunits, only_one_vec, index,
+ mask, ¤t_mask_element,
+ &need_next_vector,
+ &number_of_mask_fixes, &mask_fixed,
+ &needs_first_vector))
+ return false;
+ mask[index++] = current_mask_element;
- if (index == mask_nunits)
+ if (index == nunits)
{
tree mask_vec = NULL;
mask_vec = build_vector (mask_type, mask_vec);
index = 0;
- if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
- mask_vec))
+ if (!can_vec_perm_expr_p (vectype, mask_vec))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
vect_create_mask_and_perm (stmt, next_scalar_stmt,
mask_vec, first_vec_index, second_vec_index,
- gsi, node, builtin_decl, vectype, dr_chain,
+ gsi, node, vectype, dr_chain,
ncopies, vect_stmts_counter++);
}
}
/* Loads should be inserted before the first load. */
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
&& STMT_VINFO_STRIDED_ACCESS (stmt_info)
- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
+ && SLP_INSTANCE_LOAD_PERMUTATION (instance))
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
+ else if (is_pattern_stmt_p (stmt_info))
+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
else
si = gsi_for_stmt (stmt);
si = gsi_for_stmt (last_store);
}
+ /* Mark the first element of the reduction chain as reduction to properly
+ transform the node. In the analysis phase only the last element of the
+ chain is marked as reduction. */
+ if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
+ && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
+ {
+ STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ }
+
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
return is_store;
}