/* SLP - Basic Block Vectorization
- Copyright (C) 2007, 2008, 2009, 2010
+ Copyright (C) 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
and Ira Rosen <irar@il.ibm.com>
#include "recog.h"
#include "optabs.h"
#include "tree-vectorizer.h"
+#include "langhooks.h"
/* Extract the location of the basic block in the source code.
Return the basic block location if succeed and NULL if not. */
{
tree oprnd;
unsigned int i, number_of_oprnds;
- tree def;
+ tree def[2];
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
stmt_vec_info stmt_info =
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
enum gimple_rhs_class rhs_class;
struct loop *loop = NULL;
+ enum tree_code rhs_code;
+ bool different_types = false;
if (loop_vinfo)
loop = LOOP_VINFO_LOOP (loop_vinfo);
{
oprnd = gimple_op (stmt, i + 1);
- if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
&dt[i])
|| (!def_stmt && dt[i] != vect_constant_def))
{
if (loop && def_stmt && gimple_bb (def_stmt)
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
&& vinfo_for_stmt (def_stmt)
- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
{
if (!*first_stmt_dt0)
*pattern0 = true;
switch (gimple_code (def_stmt))
{
case GIMPLE_PHI:
- def = gimple_phi_result (def_stmt);
+ def[i] = gimple_phi_result (def_stmt);
break;
case GIMPLE_ASSIGN:
- def = gimple_assign_lhs (def_stmt);
+ def[i] = gimple_assign_lhs (def_stmt);
break;
default:
{
/* op0 of the first stmt of the group - store its info. */
*first_stmt_dt0 = dt[i];
- if (def)
- *first_stmt_def0_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def0_type = TREE_TYPE (def[i]);
else
*first_stmt_const_oprnd = oprnd;
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
else
/* Store. */
- vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+ vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+ dt[0], slp_node);
}
else
{
/* op1 of the first stmt of the group - store its info. */
*first_stmt_dt1 = dt[i];
- if (def)
- *first_stmt_def1_type = TREE_TYPE (def);
+ if (def[i])
+ *first_stmt_def1_type = TREE_TYPE (def[i]);
else
{
/* We assume that the stmt contains only one constant
else
{
/* Not first stmt of the group, check that the def-stmt/s match
- the def-stmt/s of the first stmt. */
+ the def-stmt/s of the first stmt. Allow different definition
+ types for reduction chains: the first stmt must be a
+ vect_reduction_def (a phi node), and the rest
+ vect_internal_def. */
if ((i == 0
- && (*first_stmt_dt0 != dt[i]
- || (*first_stmt_def0_type && def
+ && ((*first_stmt_dt0 != dt[i]
+ && !(*first_stmt_dt0 == vect_reduction_def
+ && dt[i] == vect_internal_def))
+ || (*first_stmt_def0_type && def[0]
&& !types_compatible_p (*first_stmt_def0_type,
- TREE_TYPE (def)))))
+ TREE_TYPE (def[0])))))
|| (i == 1
- && (*first_stmt_dt1 != dt[i]
- || (*first_stmt_def1_type && def
+ && ((*first_stmt_dt1 != dt[i]
+ && !(*first_stmt_dt1 == vect_reduction_def
+ && dt[i] == vect_internal_def))
+ || (*first_stmt_def1_type && def[1]
&& !types_compatible_p (*first_stmt_def1_type,
- TREE_TYPE (def)))))
- || (!def
+ TREE_TYPE (def[1])))))
+ || (!def[i]
&& !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
- TREE_TYPE (oprnd))))
+ TREE_TYPE (oprnd)))
+ || different_types)
{
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "Build SLP failed: different types ");
+ if (i != number_of_oprnds - 1)
+ different_types = true;
+ else
+ {
+ if (is_gimple_assign (stmt)
+ && (rhs_code = gimple_assign_rhs_code (stmt))
+ && TREE_CODE_CLASS (rhs_code) == tcc_binary
+ && commutative_tree_code (rhs_code)
+ && *first_stmt_dt0 == dt[1]
+ && *first_stmt_dt1 == dt[0]
+ && def[0] && def[1]
+ && !(*first_stmt_def0_type
+ && !types_compatible_p (*first_stmt_def0_type,
+ TREE_TYPE (def[1])))
+ && !(*first_stmt_def1_type
+ && !types_compatible_p (*first_stmt_def1_type,
+ TREE_TYPE (def[0]))))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Swapping operands of ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+
+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+ gimple_assign_rhs2_ptr (stmt));
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: different types ");
- return false;
+ return false;
+ }
+ }
}
}
}
case vect_internal_def:
case vect_reduction_def:
- if (i == 0)
+ if ((i == 0 && !different_types) || (i == 1 && different_types))
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
else
- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+ VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
break;
default:
if (vect_print_dump_info (REPORT_SLP))
{
fprintf (vect_dump, "Build SLP failed: illegal type of def ");
- print_generic_expr (vect_dump, def, TDF_SLIM);
+ print_generic_expr (vect_dump, def[i], TDF_SLIM);
}
return false;
int ncopies_for_cost, unsigned int *max_nunits,
VEC (int, heap) **load_permutation,
VEC (slp_tree, heap) **loads,
- unsigned int vectorization_factor)
+ unsigned int vectorization_factor, bool *loads_permuted)
{
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
return false;
}
- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies != 1)
+ /* In case of multiple types we need to detect the smallest type. */
+ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
{
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP with multiple types ");
-
- /* FORNOW: multiple types are unsupported in BB SLP. */
- if (bb_vinfo)
- return false;
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (bb_vinfo)
+ vectorization_factor = *max_nunits;
}
- /* In case of multiple types we need to detect the smallest type. */
- if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
if (is_gimple_call (stmt))
rhs_code = CALL_EXPR;
}
}
}
+ else if (rhs_code == WIDEN_LSHIFT_EXPR)
+ {
+ need_same_oprnds = true;
+ first_op1 = gimple_assign_rhs2 (stmt);
+ }
}
else
{
{
/* Load. */
/* FORNOW: Check that there is no gap between the loads. */
- if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
- || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
+ if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
+ && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+ || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
+ && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
{
if (vect_print_dump_info (REPORT_SLP))
{
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+ if (loop_vinfo
+ && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
{
if (vect_print_dump_info (REPORT_SLP))
{
return false;
}
- first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
if (prev_first_load)
{
/* Check that there are no loads from different interleaving
/* Analyze costs (for the first stmt in the group). */
vect_model_load_cost (vinfo_for_stmt (stmt),
- ncopies_for_cost, *node);
+ ncopies_for_cost, false, *node);
}
/* Store the place of this load in the interleaving chain. In
/* Strided loads were reached - stop the recursion. */
if (stop_recursion)
{
+ VEC_safe_push (slp_tree, heap, *loads, *node);
if (permutation)
{
- VEC_safe_push (slp_tree, heap, *loads, *node);
+
+ *loads_permuted = true;
*inside_cost
+= targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
* group_size;
}
else
- {
- /* We don't check here complex numbers chains, so we keep them in
- LOADS for further check in vect_supported_load_permutation_p. */
+ {
+ /* We don't check here complex numbers chains, so we set
+ LOADS_PERMUTED for further check in
+ vect_supported_load_permutation_p. */
if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
- VEC_safe_push (slp_tree, heap, *loads, *node);
+ *loads_permuted = true;
}
return true;
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_LEFT (*node) = left_node;
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
inside_cost, outside_cost, ncopies_for_cost,
max_nunits, load_permutation, loads,
- vectorization_factor))
+ vectorization_factor, loads_permuted))
return false;
SLP_TREE_RIGHT (*node) = right_node;
{
slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
- gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
VEC (slp_tree, heap) *sorted_loads = NULL;
int index;
slp_tree *tmp_loads = NULL;
{
gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
/* Check that the loads are all in the same interleaving chain. */
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
{
if (vect_print_dump_info (REPORT_DETAILS))
{
bool supported, bad_permutation = false;
sbitmap load_index;
slp_tree node, other_complex_node;
- gimple stmt, first = NULL, other_node_first;
+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
unsigned complex_numbers = 0;
+ struct data_reference *dr;
+ bb_vec_info bb_vinfo;
/* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
first = stmt;
else
{
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
{
if (complex_numbers != 2)
return false;
other_node_first = VEC_index (gimple,
SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
- if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
!= other_node_first)
return false;
}
GROUP_SIZE. */
number_of_groups = VEC_length (int, load_permutation) / group_size;
- /* Reduction (there are no data-refs in the root). */
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ /* Reduction (there are no data-refs in the root).
+ In reduction chain the order of the loads is important. */
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
+ && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
int first_group_load_index;
}
}
+ /* In basic block vectorization we allow any subchain of an interleaving
+ chain.
+ FORNOW: not supported in loop SLP because of realignment compications. */
+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+ bad_permutation = false;
+ /* Check that for every node in the instance teh loads form a subchain. */
+ if (bb_vinfo)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ next_load = NULL;
+ first_load = NULL;
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
+ {
+ if (!first_load)
+ first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
+ else if (first_load
+ != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ if (j != 0 && next_load != load)
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
+ }
+
+ if (bad_permutation)
+ break;
+ }
+
+ /* Check that the alignment of the first load in every subchain, i.e.,
+ the first statement in every load node, is supported. */
+ if (!bad_permutation)
+ {
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+ {
+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ if (first_load
+ != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
+ {
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_unaligned_unsupported)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "unsupported unaligned load ");
+ print_gimple_stmt (vect_dump, first_load, 0,
+ TDF_SLIM);
+ }
+ bad_permutation = true;
+ break;
+ }
+ }
+ }
+
+ if (!bad_permutation)
+ {
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+ return true;
+ }
+ }
+ }
+
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
well (unless it's reduction). */
{
slp_instance new_instance;
slp_tree node = XNEW (struct _slp_tree);
- unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
unsigned int unrolling_factor = 1, nunits;
tree vectype, scalar_type = NULL_TREE;
gimple next;
VEC (int, heap) *load_permutation;
VEC (slp_tree, heap) *loads;
struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ bool loads_permuted = false;
- if (dr)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
- scalar_type = TREE_TYPE (DR_REF (dr));
- vectype = get_vectype_for_scalar_type (scalar_type);
- group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ if (dr)
+ {
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ }
+ else
+ {
+ gcc_assert (loop_vinfo);
+ vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
+ }
+
+ group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
}
else
{
if (loop_vinfo)
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
else
- /* No multitypes in BB SLP. */
vectorization_factor = nunits;
/* Calculate the unrolling factor. */
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
next = stmt;
- if (dr)
+ if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
while (next)
{
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
- next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
}
}
else
for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
next);
i++)
- {
- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "pushing reduction into node: ");
- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
- }
- }
+ VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
}
SLP_TREE_VEC_STMTS (node) = NULL;
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
&inside_cost, &outside_cost, ncopies_for_cost,
&max_nunits, &load_permutation, &loads,
- vectorization_factor))
+ vectorization_factor, &loads_permuted))
{
- /* Create a new SLP instance. */
- new_instance = XNEW (struct _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- /* Calculate the unrolling factor based on the smallest type in the
- loop. */
+ /* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits)
unrolling_factor = least_common_multiple (max_nunits, group_size)
/ group_size;
+ if (unrolling_factor != 1 && !loop_vinfo)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+ " block SLP");
+ return false;
+ }
+
+ /* Create a new SLP instance. */
+ new_instance = XNEW (struct _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
SLP_INSTANCE_LOADS (new_instance) = loads;
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
- if (VEC_length (slp_tree, loads))
+
+ if (loads_permuted)
{
if (!vect_supported_load_permutation_p (new_instance, group_size,
load_permutation))
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (gimple, heap) *strided_stores, *reductions = NULL;
- gimple store;
+ VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
+ gimple first_element;
bool ok = false;
if (vect_print_dump_info (REPORT_SLP))
if (loop_vinfo)
{
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
}
else
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
/* Find SLP sequences starting from groups of strided stores. */
- FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
- if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
+ FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
ok = true;
if (bb_vinfo && !ok)
return false;
}
+ if (loop_vinfo
+ && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
+ {
+ /* Find SLP sequences starting from reduction chains. */
+ FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
+ ok = true;
+ else
+ return false;
+
+ /* Don't try to vectorize SLP reductions if reduction chain was
+ detected. */
+ return ok;
+ }
+
/* Find SLP sequences starting from groups of reductions. */
if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
&& vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
return true;
}
-/* Check if loads and stores are mixed in the basic block (in that
- case if we are not sure that the accesses differ, we can't vectorize the
- basic block). Also return FALSE in case that there is statement marked as
- not vectorizable. */
-
-static bool
-vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
-{
- basic_block bb = BB_VINFO_BB (bb_vinfo);
- gimple_stmt_iterator si;
- bool detected_store = false;
- gimple stmt;
- struct data_reference *dr;
-
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
- {
- stmt = gsi_stmt (si);
-
- /* We can't allow not analyzed statements, since they may contain data
- accesses. */
- if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
- return false;
-
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
- continue;
-
- dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (DR_IS_READ (dr) && detected_store)
- return false;
-
- if (!DR_IS_READ (dr))
- detected_store = true;
- }
-
- return true;
-}
-
/* Check if vectorization of the basic block is profitable. */
static bool
/* Check if the basic block can be vectorized. */
-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
{
bb_vec_info bb_vinfo;
VEC (ddr_p, heap) *ddrs;
VEC (slp_instance, heap) *slp_instances;
slp_instance instance;
- int i, insns = 0;
- gimple_stmt_iterator gsi;
+ int i;
int min_vf = 2;
int max_vf = MAX_VECTORIZATION_FACTOR;
- bool data_dependence_in_bb = false;
-
- current_vector_size = 0;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
- insns++;
- }
-
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- {
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
- fprintf (vect_dump, "not vectorized: too many instructions in basic "
- "block.\n");
-
- return NULL;
- }
bb_vinfo = new_bb_vec_info (bb);
if (!bb_vinfo)
return NULL;
}
- if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
- &data_dependence_in_bb)
- || min_vf > max_vf
- || (data_dependence_in_bb
- && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
+ if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
+ || min_vf > max_vf)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "not vectorized: unhandled data dependence "
}
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+ bb_vec_info bb_vinfo;
+ int insns = 0;
+ gimple_stmt_iterator gsi;
+ unsigned int vector_sizes;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!is_gimple_debug (stmt)
+ && !gimple_nop_p (stmt)
+ && gimple_code (stmt) != GIMPLE_LABEL)
+ insns++;
+ }
+
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: too many instructions in basic "
+ "block.\n");
+
+ return NULL;
+ }
+
+ /* Autodetect first vector size we try. */
+ current_vector_size = 0;
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ while (1)
+ {
+ bb_vinfo = vect_slp_analyze_bb_1 (bb);
+ if (bb_vinfo)
+ return bb_vinfo;
+
+ destroy_bb_vec_info (bb_vinfo);
+
+ vector_sizes &= ~current_vector_size;
+ if (vector_sizes == 0
+ || current_vector_size == 0)
+ return NULL;
+
+ /* Try the next biggest vector size. */
+ current_vector_size = 1 << floor_log2 (vector_sizes);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "***** Re-trying analysis with "
+ "vector size %d\n", current_vector_size);
+ }
+}
+
+
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor).
However, the actual number of vector stmts for every SLP node depends on
bool constant_p, is_store;
tree neutral_op = NULL;
enum tree_code code = gimple_assign_rhs_code (stmt);
+ gimple def_stmt;
+ struct loop *loop;
- if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ && reduc_index != -1)
{
- if (reduc_index == -1)
- {
- VEC_free (tree, heap, *vec_oprnds);
- return;
- }
-
op_num = reduc_index - 1;
op = gimple_op (stmt, reduc_index);
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
neutral_op = build_int_cst (TREE_TYPE (op), -1);
break;
+ case MAX_EXPR:
+ case MIN_EXPR:
+ def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+ break;
+
default:
- neutral_op = NULL;
+ neutral_op = NULL;
}
}
if (reduc_index != -1)
{
- struct loop *loop = (gimple_bb (stmt))->loop_father;
- gimple def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ def_stmt = SSA_NAME_DEF_STMT (op);
gcc_assert (loop);
- /* Get the def before the loop. */
- op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- if (j != (number_of_copies - 1) && neutral_op)
+
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. */
+ if ((j != (number_of_copies - 1)
+ || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
+ && i != 0))
+ && neutral_op)
op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
}
/* Create 'vect_ = {op0,op1,...,opn}'. */
return;
code = gimple_assign_rhs_code (first_stmt);
- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
return;
/* The number of vector defs is determined by the number of vector statements
vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
tree mask, int first_vec_indx, int second_vec_indx,
gimple_stmt_iterator *gsi, slp_tree node,
- tree builtin_decl, tree vectype,
- VEC(tree,heap) *dr_chain,
+ tree vectype, VEC(tree,heap) *dr_chain,
int ncopies, int vect_stmts_counter)
{
tree perm_dest;
second_vec = VEC_index (tree, dr_chain, second_vec_indx);
/* Generate the permute statement. */
- perm_stmt = gimple_build_call (builtin_decl,
- 3, first_vec, second_vec, mask);
+ perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
+ first_vec, second_vec, mask);
data_ref = make_ssa_name (perm_dest, perm_stmt);
- gimple_call_set_lhs (perm_stmt, data_ref);
+ gimple_set_lhs (perm_stmt, data_ref);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
/* Store the vector statement in NODE. */
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree mask_element_type = NULL_TREE, mask_type;
- int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
+ int i, j, k, nunits, vec_index = 0, scalar_index;
slp_tree node;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
gimple next_scalar_stmt;
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
int first_mask_element;
bool mask_fixed = false;
bool needs_first_vector = false;
- if (!targetm.vectorize.builtin_vec_perm)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "no builtin for vect permute for ");
- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
- }
-
- return false;
- }
-
- builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
- &mask_element_type);
- if (!builtin_decl || !mask_element_type)
+ if (!can_vec_perm_expr_p (vectype, NULL_TREE))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
- fprintf (vect_dump, "no builtin for vect permute for ");
+ fprintf (vect_dump, "no vect permute for ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
-
- return false;
+ return false;
}
+ /* The generic VEC_PERM_EXPR code always uses an integral type of the
+ same size as the vector element being permuted. */
+ mask_element_type
+ = lang_hooks.types.type_for_size
+ (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
mask_type = get_vectype_for_scalar_type (mask_element_type);
- mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
- mask = (int *) xmalloc (sizeof (int) * mask_nunits);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- scale = mask_nunits / nunits;
+ mask = (int *) xmalloc (sizeof (int) * nunits);
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
...
- The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
- scpecific type, e.g., in bytes for Altivec.
+ The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
The last mask is illegal since we assume two operands for permute
operation, and the mask element values can't be outside that range.
Hence, the last mask must be converted into {2,5,5,5}.
{
for (k = 0; k < group_size; k++)
{
- first_mask_element = (i + j * group_size) * scale;
- for (m = 0; m < scale; m++)
- {
- if (!vect_get_mask_element (stmt, first_mask_element, m,
- mask_nunits, only_one_vec, index, mask,
- ¤t_mask_element, &need_next_vector,
- &number_of_mask_fixes, &mask_fixed,
- &needs_first_vector))
- return false;
-
- mask[index++] = current_mask_element;
- }
+ first_mask_element = i + j * group_size;
+ if (!vect_get_mask_element (stmt, first_mask_element, 0,
+ nunits, only_one_vec, index,
+ mask, ¤t_mask_element,
+ &need_next_vector,
+ &number_of_mask_fixes, &mask_fixed,
+ &needs_first_vector))
+ return false;
+ mask[index++] = current_mask_element;
- if (index == mask_nunits)
+ if (index == nunits)
{
tree mask_vec = NULL;
mask_vec = build_vector (mask_type, mask_vec);
index = 0;
- if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
- mask_vec))
+ if (!can_vec_perm_expr_p (vectype, mask_vec))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
vect_create_mask_and_perm (stmt, next_scalar_stmt,
mask_vec, first_vec_index, second_vec_index,
- gsi, node, builtin_decl, vectype, dr_chain,
+ gsi, node, vectype, dr_chain,
ncopies, vect_stmts_counter++);
}
}
/* Loads should be inserted before the first load. */
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
&& STMT_VINFO_STRIDED_ACCESS (stmt_info)
- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
+ && SLP_INSTANCE_LOAD_PERMUTATION (instance))
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
+ else if (is_pattern_stmt_p (stmt_info))
+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
else
si = gsi_for_stmt (stmt);
si = gsi_for_stmt (last_store);
}
+ /* Mark the first element of the reduction chain as reduction to properly
+ transform the node. In the analysis phase only the last element of the
+ chain is marked as reduction. */
+ if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
+ && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
+ {
+ STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ }
+
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
return is_store;
}