static void vect_finish_stmt_generation
(gimple stmt, gimple vec_stmt, gimple_stmt_iterator *);
static bool vect_is_simple_cond (tree, loop_vec_info);
-static void vect_create_epilog_for_reduction (tree, gimple, enum tree_code,
- gimple);
+static void vect_create_epilog_for_reduction
+ (tree, gimple, int, enum tree_code, gimple);
static tree get_initial_def_for_reduction (gimple, tree, tree *);
/* Utility function dealing with loop peeling (not peeling itself). */
int i;
int inside_cost = 0, outside_cost = 0;
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
inside_cost = ncopies * TARG_VEC_STMT_COST;
/* FORNOW: Assuming maximum 2 args per stmts. */
int group_size;
int inside_cost = 0, outside_cost = 0;
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
if (dt == vect_constant_def || dt == vect_invariant_def)
outside_cost = TARG_SCALAR_TO_VEC_COST;
/* Strided access? */
- if (DR_GROUP_FIRST_DR (stmt_info))
+ if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
group_size = vect_cost_strided_group_size (stmt_info);
/* Not a strided access. */
else
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
int inside_cost = 0, outside_cost = 0;
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
/* Strided accesses? */
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
if (first_stmt && !slp_node)
/* Get vectorized definitions from SLP_NODE that contains corresponding
vectorized def-stmts. */
-
+
static void
vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
{
gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
- for (i = 0;
+ for (i = 0;
VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
i++)
{
must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
vect_get_slp_vect_defs() to retrieve them.
If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
- the right node. This is used when the second operand must remain scalar. */
+ the right node. This is used when the second operand must remain scalar. */
static void
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
{
gimple first_stmt;
enum tree_code code;
+ int number_of_vects;
+
+ /* The number of vector defs is determined by the number of vector statements
+ in the node from which we get those statements. */
+ if (SLP_TREE_LEFT (slp_node))
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
+ else
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Allocate memory for vectorized defs. */
- *vec_oprnds0 = VEC_alloc (tree, heap,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
- /* SLP_NODE corresponds either to a group of stores or to a group of
+ /* SLP_NODE corresponds either to a group of stores or to a group of
unary/binary operations. We don't call this function for loads. */
- if (SLP_TREE_LEFT (slp_node))
- /* The defs are already vectorized. */
+ if (SLP_TREE_LEFT (slp_node))
+ /* The defs are already vectorized. */
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
else
/* Build vectors from scalar defs. */
first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
- /* Since we don't call this function with loads, this is a group of
+ /* Since we don't call this function with loads, this is a group of
stores. */
return;
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
return;
- *vec_oprnds1 = VEC_alloc (tree, heap,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ /* The number of vector defs is determined by the number of vector statements
+ in the node from which we get those statements. */
+ if (SLP_TREE_RIGHT (slp_node))
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
+ else
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
+ *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
if (SLP_TREE_RIGHT (slp_node))
- /* The defs are already vectorized. */
+ /* The defs are already vectorized. */
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
else
/* Build vectors from scalar defs. */
if (vect_print_dump_info (REPORT_DETAILS))
{
- fprintf (vect_dump, "transform induction: created def-use cycle:");
+ fprintf (vect_dump, "transform induction: created def-use cycle: ");
print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
fprintf (vect_dump, "\n");
print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
gcc_assert (vec_stmt_for_operand);
vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
+ if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
+ vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
+ else
+ vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
return vec_oprnd;
}
VECT_DEF is a vector of partial results.
REDUC_CODE is the tree-code for the epilog reduction.
+ NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
+ number of elements that we can fit in a vectype (nunits). In this case
+ we have to generate more than one vector stmt - i.e - we need to "unroll"
+ the vector stmt by a factor VF/nunits. For more details see documentation
+ in vectorizable_operation.
STMT is the scalar reduction stmt that is being vectorized.
REDUCTION_PHI is the phi-node that carries the reduction computation.
static void
vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
+ int ncopies,
enum tree_code reduc_code,
gimple reduction_phi)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ stmt_vec_info prev_phi_info;
tree vectype;
enum machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
basic_block exit_bb;
tree scalar_dest;
tree scalar_type;
- gimple new_phi;
+ gimple new_phi = NULL, phi;
gimple_stmt_iterator exit_gsi;
tree vec_dest;
tree new_temp = NULL_TREE;
tree bitsize, bitpos, bytesize;
enum tree_code code = gimple_assign_rhs_code (stmt);
tree adjustment_def;
- tree vec_initial_def;
+ tree vec_initial_def, def;
tree orig_name;
imm_use_iterator imm_iter;
use_operand_p use_p;
gimple use_stmt;
bool nested_in_vect_loop = false;
VEC(gimple,heap) *phis = NULL;
- int i;
+ enum vect_def_type dt = vect_unknown_def_type;
+ int j, i;
if (nested_in_vect_loop_p (loop, stmt))
{
/*** 1. Create the reduction def-use cycle ***/
- /* 1.1 set the loop-entry arg of the reduction-phi: */
/* For the case of reduction, vect_get_vec_def_for_operand returns
the scalar def before the loop, that defines the initial value
of the reduction variable. */
vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
&adjustment_def);
- add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
-
- /* 1.2 set the loop-latch arg for the reduction-phi: */
- add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
- if (vect_print_dump_info (REPORT_DETAILS))
+ phi = reduction_phi;
+ def = vect_def;
+ for (j = 0; j < ncopies; j++)
{
- fprintf (vect_dump, "transform reduction: created def-use cycle:");
- print_gimple_stmt (vect_dump, reduction_phi, 0, TDF_SLIM);
- fprintf (vect_dump, "\n");
- print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vect_def), 0, TDF_SLIM);
- }
+ /* 1.1 set the loop-entry arg of the reduction-phi: */
+ add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop));
+
+ /* 1.2 set the loop-latch arg for the reduction-phi: */
+ if (j > 0)
+ def = vect_get_vec_def_for_stmt_copy (dt, def);
+ add_phi_arg (phi, def, loop_latch_edge (loop));
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "transform reduction: created def-use cycle: ");
+ print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
+ fprintf (vect_dump, "\n");
+ print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
+ }
+
+ phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+ }
/*** 2. Create epilog code
The reduction epilog code operates across the elements of the vector
s_out3 = extract_field <v_out2, 0> # step 2
s_out4 = adjust_result <s_out3> # step 3
- (step 3 is optional, and step2 1 and 2 may be combined).
+ (step 3 is optional, and steps 1 and 2 may be combined).
Lastly, the uses of s_out0 are replaced by s_out4.
***/
v_out1 = phi <v_loop> */
exit_bb = single_exit (loop)->dest;
- new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
- SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
+ def = vect_def;
+ prev_phi_info = NULL;
+ for (j = 0; j < ncopies; j++)
+ {
+ phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
+ set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
+ if (j == 0)
+ new_phi = phi;
+ else
+ {
+ def = vect_get_vec_def_for_stmt_copy (dt, def);
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
+ }
+ SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
+ prev_phi_info = vinfo_for_stmt (phi);
+ }
exit_gsi = gsi_after_labels (exit_bb);
/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
if (nested_in_vect_loop)
goto vect_finalize_reduction;
+ /* FORNOW */
+ gcc_assert (ncopies == 1);
+
/* 2.3 Create the reduction code, using one of the three schemes described
above. */
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
- /* FORNOW. Currently not supporting the case that an inner-loop reduction
- is not used in the outer-loop (but only outside the outer-loop). */
+ /* FORNOW. Currently not supporting the case that an inner-loop
+ reduction is not used in the outer-loop (but only outside the
+ outer-loop). */
gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
&& !STMT_VINFO_LIVE_P (stmt_vinfo));
- epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
+ epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
- set_vinfo_for_stmt (epilog_stmt,
+ set_vinfo_for_stmt (epilog_stmt,
new_stmt_vec_info (epilog_stmt, loop_vinfo));
+ if (adjustment_def)
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
continue;
}
tree def;
gimple def_stmt;
enum vect_def_type dt;
- gimple new_phi;
+ gimple new_phi = NULL;
tree scalar_type;
bool is_simple_use;
gimple orig_stmt;
int i;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
- stmt_vec_info prev_stmt_info;
+ int epilog_copies;
+ stmt_vec_info prev_stmt_info, prev_phi_info;
+ gimple first_phi = NULL;
+ bool single_defuse_cycle = false;
tree reduc_def;
gimple new_stmt = NULL;
int j;
tree ops[3];
if (nested_in_vect_loop_p (loop, stmt))
- {
- loop = loop->inner;
- /* FORNOW. This restriction should be relaxed. */
- if (ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
- }
+ loop = loop->inner;
gcc_assert (ncopies >= 1);
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* Create the reduction-phi that defines the reduction-operand. */
- new_phi = create_phi_node (vec_dest, loop->header);
-
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. For more details see documentation
in vectorizable_operation. */
+ /* If the reduction is used in an outer loop we need to generate
+ VF intermediate results, like so (e.g. for ncopies=2):
+ r0 = phi (init, r0)
+ r1 = phi (init, r1)
+ r0 = x0 + r0;
+ r1 = x1 + r1;
+ (i.e. we generate VF results in 2 registers).
+ In this case we have a separate def-use cycle for each copy, and therefore
+ for each copy we get the vector def for the reduction variable from the
+ respective phi node created for this copy.
+
+ Otherwise (the reduction is unused in the loop nest), we can combine
+ together intermediate results, like so (e.g. for ncopies=2):
+ r = phi (init, r)
+ r = x0 + r;
+ r = x1 + r;
+ (i.e. we generate VF/2 results in a single register).
+ In this case for each copy we get the vector def for the reduction variable
+ from the vectorized reduction operation generated in the previous iteration.
+ */
+
+ if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop)
+ {
+ single_defuse_cycle = true;
+ epilog_copies = 1;
+ }
+ else
+ epilog_copies = ncopies;
+
prev_stmt_info = NULL;
+ prev_phi_info = NULL;
for (j = 0; j < ncopies; j++)
{
+ if (j == 0 || !single_defuse_cycle)
+ {
+ /* Create the reduction-phi that defines the reduction-operand. */
+ new_phi = create_phi_node (vec_dest, loop->header);
+ set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo));
+ }
+
/* Handle uses. */
if (j == 0)
{
/* Get the vector def for the reduction variable from the phi node */
reduc_def = PHI_RESULT (new_phi);
+ first_phi = new_phi;
}
else
{
if (op_type == ternary_op)
loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
- /* Get the vector def for the reduction variable from the vectorized
- reduction operation generated in the previous iteration (j-1) */
- reduc_def = gimple_assign_lhs (new_stmt);
+ if (single_defuse_cycle)
+ reduc_def = gimple_assign_lhs (new_stmt);
+ else
+ reduc_def = PHI_RESULT (new_phi);
+
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
}
/* Arguments are ready. create the new vector stmt. */
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
+ prev_phi_info = vinfo_for_stmt (new_phi);
}
- /* Finalize the reduction-phi (set it's arguments) and create the
+ /* Finalize the reduction-phi (set its arguments) and create the
epilog reduction code. */
- vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
+ if (!single_defuse_cycle)
+ new_temp = gimple_assign_lhs (*vec_stmt);
+ vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
+ epilog_reduc_code, first_phi);
return true;
}
int nunits_in;
int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree fndecl, new_temp, def, rhs_type, lhs_type;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
needs to be generated. */
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
-
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
-
/* Handle def. */
scalar_dest = gimple_call_lhs (stmt);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
/* Function vect_gen_widened_results_half
Create a vector stmt whose code, type, number of arguments, and result
- variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
+ variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
In the case that CODE is a CALL_EXPR, this means that a call to DECL
needs to be created (DECL is a function-decl of a target-builtin).
static gimple
vect_gen_widened_results_half (enum tree_code code,
- tree vectype ATTRIBUTE_UNUSED,
tree decl,
tree vec_oprnd0, tree vec_oprnd1, int op_type,
tree vec_dest, gimple_stmt_iterator *gsi,
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp;
int i;
VEC(tree,heap) *vec_oprnds0 = NULL;
tree vop0;
+ tree integral_type;
+ VEC(tree,heap) *dummy = NULL;
+ int dummy_int;
/* Is STMT a vectorizable conversion? */
|| (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
return false;
+ integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
+
if (modifier == NARROW)
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
else
needs to be generated. */
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
-
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
{
/* Supportable by target? */
if ((modifier == NONE
- && !targetm.vectorize.builtin_conversion (code, vectype_in))
+ && !targetm.vectorize.builtin_conversion (code, integral_type))
|| (modifier == WIDEN
&& !supportable_widening_operation (code, stmt, vectype_in,
&decl1, &decl2,
- &code1, &code2))
+ &code1, &code2,
+ &dummy_int, &dummy))
|| (modifier == NARROW
&& !supportable_narrowing_operation (code, stmt, vectype_in,
- &code1)))
+ &code1, &dummy_int, &dummy)))
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "op not supported by target.");
+ fprintf (vect_dump, "conversion not supported by target.");
return false;
}
ssa_op_iter iter;
if (j == 0)
- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
+ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
builtin_decl =
- targetm.vectorize.builtin_conversion (code, vectype_in);
+ targetm.vectorize.builtin_conversion (code, integral_type);
for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
{
/* Arguments are ready. create the new vector stmt. */
/* Generate first half of the widened result: */
new_stmt
- = vect_gen_widened_results_half (code1, vectype_out, decl1,
+ = vect_gen_widened_results_half (code1, decl1,
vec_oprnd0, vec_oprnd1,
unary_op, vec_dest, gsi, stmt);
if (j == 0)
/* Generate second half of the widened result: */
new_stmt
- = vect_gen_widened_results_half (code2, vectype_out, decl2,
+ = vect_gen_widened_results_half (code2, decl2,
vec_oprnd0, vec_oprnd1,
unary_op, vec_dest, gsi, stmt);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int i;
VEC(tree,heap) *vec_oprnds = NULL;
tree vop;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis
- verifies this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp_node)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
if (ncopies > 1)
stmt_vec_info stmt_info = vinfo_for_stmt (phi);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
tree vec_def;
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code;
enum machine_mode vec_mode;
tree new_temp;
int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
int nunits_out;
tree vectype_out;
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+ int ncopies;
int j, i;
VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
tree vop0, vop1;
bool shift_p = false;
bool scalar_shift_arg = false;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp_node)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
}
+ if (slp_node)
+ continue;
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
}
+/* Get vectorized definitions for loop-based vectorization. For the first
+ operand we call vect_get_vec_def_for_operand() (with OPRND containing
+ scalar operand), and for the rest we get a copy with
+ vect_get_vec_def_for_stmt_copy() using the previous vector definition
+ (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
+ The vectors are collected into VEC_OPRNDS. */
+
+static void
+vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
+ VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
+{
+ tree vec_oprnd;
+
+ /* Get first vector operand. */
+ /* All the vector operands except the very first one (that is scalar oprnd)
+ are stmt copies. */
+ if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
+ vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
+ else
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
+
+ VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
+
+ /* Get second vector operand. */
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+ VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
+
+ *oprnd = vec_oprnd;
+
+ /* For conversion in multiple steps, continue to get operands
+ recursively. */
+ if (multi_step_cvt)
+ vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
+}
+
+
+/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
+ For multi-step conversions store the resulting vectors and call the function
+ recursively. */
+
+static void
+vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
+ int multi_step_cvt, gimple stmt,
+ VEC (tree, heap) *vec_dsts,
+ gimple_stmt_iterator *gsi,
+ slp_tree slp_node, enum tree_code code,
+ stmt_vec_info *prev_stmt_info)
+{
+ unsigned int i;
+ tree vop0, vop1, new_tmp, vec_dest;
+ gimple new_stmt;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ vec_dest = VEC_pop (tree, vec_dsts);
+
+ for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
+ {
+ /* Create demotion operation. */
+ vop0 = VEC_index (tree, *vec_oprnds, i);
+ vop1 = VEC_index (tree, *vec_oprnds, i + 1);
+ new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
+ new_tmp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_tmp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+ if (multi_step_cvt)
+ /* Store the resulting vector for next recursive call. */
+ VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
+ else
+ {
+ /* This is the last step of the conversion sequence. Store the
+ vectors in SLP_NODE or in vector info of the scalar statement
+ (or in STMT_VINFO_RELATED_STMT chain). */
+ if (slp_node)
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+ else
+ {
+ if (!*prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
+
+ *prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+ }
+ }
+
+ /* For multi-step demotion operations we first generate demotion operations
+ from the source type to the intermediate types, and then combine the
+ results (stored in VEC_OPRNDS) in demotion operation to the destination
+ type. */
+ if (multi_step_cvt)
+ {
+ /* At each level of recursion we have have of the operands we had at the
+ previous level. */
+ VEC_truncate (tree, *vec_oprnds, (i+1)/2);
+ vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
+ stmt, vec_dsts, gsi, slp_node,
+ code, prev_stmt_info);
+ }
+}
+
+
/* Function vectorizable_type_demotion
Check if STMT performs a binary or unary operation that involves
bool
vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
- gimple *vec_stmt)
+ gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
tree op0;
- tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK;
- tree new_temp;
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
- gimple new_stmt;
stmt_vec_info prev_stmt_info;
int nunits_in;
int nunits_out;
tree vectype_out;
int ncopies;
- int j;
+ int j, i;
tree vectype_in;
+ int multi_step_cvt = 0;
+ VEC (tree, heap) *vec_oprnds0 = NULL;
+ VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+ tree last_oprnd, intermediate_type;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
return false;
code = gimple_assign_rhs_code (stmt);
- if (code != NOP_EXPR && code != CONVERT_EXPR)
+ if (!CONVERT_EXPR_CODE_P (code))
return false;
op0 = gimple_assign_rhs1 (stmt);
if (!vectype_out)
return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_in != nunits_out / 2) /* FORNOW */
+ if (nunits_in >= nunits_out)
return false;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
+ if (slp_node)
+ ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0)))
|| (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
- && (code == NOP_EXPR || code == CONVERT_EXPR))))
+ && CONVERT_EXPR_CODE_P (code))))
return false;
/* Check the operands of the operation. */
}
/* Supportable by target? */
- if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
+ if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
+ &multi_step_cvt, &interm_types))
return false;
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
ncopies);
- /* Handle def. */
+ /* In case of multi-step demotion, we first generate demotion operations to
+ the intermediate types, and then from that types to the final one.
+ We create vector destinations for the intermediate type (TYPES) received
+ from supportable_narrowing_operation, and store them in the correct order
+ for future use in vect_create_vectorized_demotion_stmts(). */
+ if (multi_step_cvt)
+ vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
+ else
+ vec_dsts = VEC_alloc (tree, heap, 1);
+
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
-
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+
+ if (multi_step_cvt)
+ {
+ for (i = VEC_length (tree, interm_types) - 1;
+ VEC_iterate (tree, interm_types, i, intermediate_type); i--)
+ {
+ vec_dest = vect_create_destination_var (scalar_dest,
+ intermediate_type);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+ }
+ }
+
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. */
+ last_oprnd = op0;
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
/* Handle uses. */
- if (j == 0)
- {
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- }
+ if (slp_node)
+ vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
else
- {
- vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- }
+ {
+ VEC_free (tree, heap, vec_oprnds0);
+ vec_oprnds0 = VEC_alloc (tree, heap,
+ (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
+ vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
+ vect_pow2 (multi_step_cvt) - 1);
+ }
- /* Arguments are ready. Create the new vector stmt. */
- new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
- vec_oprnd1);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ /* Arguments are ready. Create the new vector stmts. */
+ tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
+ vect_create_vectorized_demotion_stmts (&vec_oprnds0,
+ multi_step_cvt, stmt, tmp_vec_dsts,
+ gsi, slp_node, code1,
+ &prev_stmt_info);
+ }
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
+ VEC_free (tree, heap, vec_oprnds0);
+ VEC_free (tree, heap, vec_dsts);
+ VEC_free (tree, heap, tmp_vec_dsts);
+ VEC_free (tree, heap, interm_types);
+
+ *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ return true;
+}
+
+
+/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
+ and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
+ the resulting vectors and call the function recursively. */
+
+static void
+vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
+ VEC (tree, heap) **vec_oprnds1,
+ int multi_step_cvt, gimple stmt,
+ VEC (tree, heap) *vec_dsts,
+ gimple_stmt_iterator *gsi,
+ slp_tree slp_node, enum tree_code code1,
+ enum tree_code code2, tree decl1,
+ tree decl2, int op_type,
+ stmt_vec_info *prev_stmt_info)
+{
+ int i;
+ tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
+ gimple new_stmt1, new_stmt2;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ VEC (tree, heap) *vec_tmp;
+
+ vec_dest = VEC_pop (tree, vec_dsts);
+ vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
+
+ for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
+ {
+ if (op_type == binary_op)
+ vop1 = VEC_index (tree, *vec_oprnds1, i);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ vop1 = NULL_TREE;
+
+ /* Generate the two halves of promotion operation. */
+ new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
+ op_type, vec_dest, gsi, stmt);
+ new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
+ op_type, vec_dest, gsi, stmt);
+ if (is_gimple_call (new_stmt1))
+ {
+ new_tmp1 = gimple_call_lhs (new_stmt1);
+ new_tmp2 = gimple_call_lhs (new_stmt2);
+ }
+ else
+ {
+ new_tmp1 = gimple_assign_lhs (new_stmt1);
+ new_tmp2 = gimple_assign_lhs (new_stmt2);
+ }
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ if (multi_step_cvt)
+ {
+ /* Store the results for the recursive call. */
+ VEC_quick_push (tree, vec_tmp, new_tmp1);
+ VEC_quick_push (tree, vec_tmp, new_tmp2);
+ }
+ else
+ {
+ /* Last step of promotion sequience - store the results. */
+ if (slp_node)
+ {
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
+ }
+ else
+ {
+ if (!*prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
+ else
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
+
+ *prev_stmt_info = vinfo_for_stmt (new_stmt1);
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
+ *prev_stmt_info = vinfo_for_stmt (new_stmt2);
+ }
+ }
}
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- return true;
+ if (multi_step_cvt)
+ {
+ /* For multi-step promotion operation we first generate we call the
+ function recurcively for every stage. We start from the input type,
+ create promotion operations to the intermediate types, and then
+ create promotions to the output type. */
+ *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
+ VEC_free (tree, heap, vec_tmp);
+ vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
+ multi_step_cvt - 1, stmt,
+ vec_dsts, gsi, slp_node, code1,
+ code2, decl2, decl2, op_type,
+ prev_stmt_info);
+ }
}
bool
vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
- gimple *vec_stmt)
+ gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
int op_type;
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
- gimple new_stmt;
stmt_vec_info prev_stmt_info;
int nunits_in;
int nunits_out;
tree vectype_out;
int ncopies;
- int j;
+ int j, i;
tree vectype_in;
+ tree intermediate_type = NULL_TREE;
+ int multi_step_cvt = 0;
+ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+ VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
return false;
code = gimple_assign_rhs_code (stmt);
- if (code != NOP_EXPR && code != CONVERT_EXPR
+ if (!CONVERT_EXPR_CODE_P (code)
&& code != WIDEN_MULT_EXPR)
return false;
if (!vectype_out)
return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_out != nunits_in / 2) /* FORNOW */
+ if (nunits_in <= nunits_out)
return false;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
+ if (slp_node)
+ ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
gcc_assert (ncopies >= 1);
- /* FORNOW. This restriction should be relaxed. */
- if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "multiple types in nested loop.");
- return false;
- }
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0)))
|| (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
- && (code == CONVERT_EXPR || code == NOP_EXPR))))
+ && CONVERT_EXPR_CODE_P (code))))
return false;
/* Check the operands of the operation. */
/* Supportable by target? */
if (!supportable_widening_operation (code, stmt, vectype_in,
- &decl1, &decl2, &code1, &code2))
+ &decl1, &decl2, &code1, &code2,
+ &multi_step_cvt, &interm_types))
return false;
+ /* Binary widening operation can only be supported directly by the
+ architecture. */
+ gcc_assert (!(multi_step_cvt && op_type == binary_op));
+
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
if (!vec_stmt) /* transformation not required. */
ncopies);
/* Handle def. */
+ /* In case of multi-step promotion, we first generate promotion operations
+ to the intermediate types, and then from that types to the final one.
+ We store vector destination in VEC_DSTS in the correct order for
+ recursive creation of promotion operations in
+ vect_create_vectorized_promotion_stmts(). Vector destinations are created
+ according to TYPES recieved from supportable_widening_operation(). */
+ if (multi_step_cvt)
+ vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
+ else
+ vec_dsts = VEC_alloc (tree, heap, 1);
+
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+
+ if (multi_step_cvt)
+ {
+ for (i = VEC_length (tree, interm_types) - 1;
+ VEC_iterate (tree, interm_types, i, intermediate_type); i--)
+ {
+ vec_dest = vect_create_destination_var (scalar_dest,
+ intermediate_type);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+ }
+ }
+
+ if (!slp_node)
+ {
+ vec_oprnds0 = VEC_alloc (tree, heap,
+ (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
+ if (op_type == binary_op)
+ vec_oprnds1 = VEC_alloc (tree, heap, 1);
+ }
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
/* Handle uses. */
if (j == 0)
{
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
- if (op_type == binary_op)
- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+ if (slp_node)
+ vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
+ else
+ {
+ vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
+ VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
+ if (op_type == binary_op)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+ }
+ }
}
else
{
- vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- if (op_type == binary_op)
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+ vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
+ VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
+ if (op_type == binary_op)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+ VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
+ }
}
- /* Arguments are ready. Create the new vector stmt. We are creating
- two vector defs because the widened result does not fit in one vector.
- The vectorized stmt can be expressed as a call to a target builtin,
- or a using a tree-code. */
- /* Generate first half of the widened result: */
- new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
- vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
-
- /* Generate second half of the widened result: */
- new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
- vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
-
+ /* Arguments are ready. Create the new vector stmts. */
+ tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
+ vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
+ multi_step_cvt, stmt,
+ tmp_vec_dsts,
+ gsi, slp_node, code1, code2,
+ decl1, decl2, op_type,
+ &prev_stmt_info);
}
+ VEC_free (tree, heap, vec_dsts);
+ VEC_free (tree, heap, tmp_vec_dsts);
+ VEC_free (tree, heap, interm_types);
+ VEC_free (tree, heap, vec_oprnds0);
+ VEC_free (tree, heap, vec_oprnds1);
+
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
return true;
}
stmt_vec_info prev_stmt_info = NULL;
tree dataref_ptr = NULL_TREE;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int j;
gimple next_stmt, first_stmt = NULL;
bool strided_store = false;
stmt_vec_info first_stmt_vinfo;
unsigned int vec_num;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
- if (!PURE_SLP_STMT (stmt_info))
- vect_model_store_cost (stmt_info, ncopies, dt, NULL);
+ vect_model_store_cost (stmt_info, ncopies, dt, NULL);
return true;
}
strided_store = false;
/* VEC_NUM is the number of vect stmts to be created for this group. */
- if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
+ if (slp)
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
vec_num = group_size;
}
else
{
- /* FORNOW SLP doesn't work for multiple types. */
- gcc_assert (!slp);
-
/* For interleaved stores we created vectorized defs for all the
defs stored in OPRNDS in the previous iteration (previous copy).
DR_CHAIN is then used as an input to vect_permute_store_chain(),
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
mark_symbols_for_renaming (new_stmt);
+
+ if (slp)
+ continue;
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
tree dataref_ptr = NULL_TREE;
gimple ptr_incr;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int i, j, group_size;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
bool slp = (slp_node != NULL);
enum tree_code code;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
}
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
- dr_chain = VEC_alloc (tree, heap, group_size);
/* VEC_NUM is the number of vect stmts to be created for this group. */
if (slp)
}
else
vec_num = group_size;
+
+ dr_chain = VEC_alloc (tree, heap, vec_num);
}
else
{
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
}
- /* FORNOW: SLP with multiple types is unsupported. */
if (slp)
- return true;
+ continue;
if (strided_load)
{
switch (STMT_VINFO_TYPE (stmt_info))
{
case type_demotion_vec_info_type:
- gcc_assert (!slp_node);
- done = vectorizable_type_demotion (stmt, gsi, &vec_stmt);
+ done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
break;
case type_promotion_vec_info_type:
- gcc_assert (!slp_node);
- done = vectorizable_type_promotion (stmt, gsi, &vec_stmt);
+ done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
break;
case store_vec_info_type:
done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
- if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
{
/* In case of interleaving, the whole chain is vectorized when the
last store in the chain is reached. Store stmts before the last
addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
add_referenced_var (addr_tmp);
addr_tmp_name = make_ssa_name (addr_tmp, NULL);
- addr_stmt = gimple_build_assign (addr_tmp_name, addr_base);
+ addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
+ addr_base, NULL_TREE);
SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
/* Vectorize SLP instance tree in postorder. */
static bool
-vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
+vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+ unsigned int vectorization_factor)
{
gimple stmt;
bool strided_store, is_store;
gimple_stmt_iterator si;
stmt_vec_info stmt_info;
+ unsigned int vec_stmts_size, nunits, group_size;
+ tree vectype;
if (!node)
return false;
- vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
+ vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
+ vectorization_factor);
+ vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
+ vectorization_factor);
- stmt = VEC_index(gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
stmt_info = vinfo_for_stmt (stmt);
+ /* VECTYPE is the type of the destination. */
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
+ group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+
+ /* For each SLP instance calculate number of vector stmts to be created
+ for the scalar stmts in each node of the SLP tree. Number of vector
+ elements in one vector iteration is the number of scalar elements in
+ one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
+ size. */
+ vec_stmts_size = (vectorization_factor * group_size) / nunits;
+
SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
static bool
-vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
+vect_schedule_slp (loop_vec_info loop_vinfo)
{
VEC (slp_instance, heap) *slp_instances =
LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
slp_instance instance;
- unsigned int vec_stmts_size;
- unsigned int group_size, i;
- unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned int i;
bool is_store = false;
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
{
- group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- /* For each SLP instance calculate number of vector stmts to be created
- for the scalar stmts in each node of the SLP tree. Number of vector
- elements in one vector iteration is the number of scalar elements in
- one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
- size. */
- vec_stmts_size = vectorization_factor * group_size / nunits;
-
/* Schedule the tree of INSTANCE. */
- is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
- vec_stmts_size);
-
+ is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
+ instance,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+
if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
|| vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
fprintf (vect_dump, "vectorizing stmts using SLP.");
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== scheduling SLP instances ===");
- is_store = vect_schedule_slp (loop_vinfo, nunits);
+ is_store = vect_schedule_slp (loop_vinfo);
/* IS_STORE is true if STMT is a store. Stores cannot be of
hybrid SLP type. They are removed in