gimple first_load, prev_first_load = NULL;
/* For every stmt in NODE find its def stmt/s. */
- for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
{
if (vect_print_dump_info (REPORT_SLP))
{
&& (first_stmt_code != IMAGPART_EXPR
|| rhs_code != REALPART_EXPR)
&& (first_stmt_code != REALPART_EXPR
- || rhs_code != IMAGPART_EXPR))
+ || rhs_code != IMAGPART_EXPR)
+ && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
+ && (first_stmt_code == ARRAY_REF
+ || first_stmt_code == INDIRECT_REF
+ || first_stmt_code == COMPONENT_REF
+ || first_stmt_code == MEM_REF)))
{
if (vect_print_dump_info (REPORT_SLP))
{
return;
fprintf (vect_dump, "node ");
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
fprintf (vect_dump, "\n\tstmt %d ", i);
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
if (!node)
return;
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
if (j < 0 || i == j)
STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
if (!node)
return;
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_info = vinfo_for_stmt (stmt);
gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
for (i = 0; i < group_size; i++)
VEC_safe_push (gimple, heap, tmp_stmts, NULL);
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
index = VEC_index (int, permutation, i);
VEC_replace (gimple, tmp_stmts, index, stmt);
if (vect_print_dump_info (REPORT_SLP))
{
fprintf (vect_dump, "Load permutation ");
- for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
+ FOR_EACH_VEC_ELT (int, load_permutation, i, next)
fprintf (vect_dump, "%d ", next);
}
permutation). */
/* Check that all the load nodes are of the same size. */
- for (i = 0;
- VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
- i++)
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
{
if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
!= (unsigned) group_size)
chains are mixed, they match the above pattern. */
if (complex_numbers)
{
- for (i = 0;
- VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
- i++)
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
{
- for (j = 0;
- VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt);
- j++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
{
if (j == 0)
first = stmt;
slp_tree load_node;
gimple first_load = NULL, load;
- for (i = 0;
- VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
- i++)
- for (j = 0;
- VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
- j++)
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
first_load = get_earlier_stmt (load, first_load);
return first_load;
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
/* Find SLP sequences starting from groups of strided stores. */
- for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
+ FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
ok = true;
if (vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "=== vect_make_slp_decision ===");
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
{
/* FORNOW: SLP if you can. */
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
if (!node)
return;
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
if (vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
}
|| !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
return false;
- for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
}
-/* Cheick if the basic block can be vectorized. */
+/* Check if vectorization of the basic block is profitable. */
+
+static bool
+vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+{
+ VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ slp_instance instance;
+ int i;
+ unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
+ unsigned int stmt_cost;
+ gimple stmt;
+ gimple_stmt_iterator si;
+ basic_block bb = BB_VINFO_BB (bb_vinfo);
+ stmt_vec_info stmt_info = NULL;
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ /* Calculate vector costs. */
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+ {
+ vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
+ vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
+ }
+
+ /* Calculate scalar cost. */
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+ stmt_info = vinfo_for_stmt (stmt);
+
+ if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
+ || !PURE_SLP_STMT (stmt_info))
+ continue;
+
+ if (STMT_VINFO_DATA_REF (stmt_info))
+ {
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_load, dummy_type, dummy);
+ else
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_store, dummy_type, dummy);
+ }
+ else
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_stmt, dummy_type, dummy);
+
+ scalar_cost += stmt_cost;
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ {
+ fprintf (vect_dump, "Cost model analysis: \n");
+ fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
+ vec_inside_cost);
+ fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
+ vec_outside_cost);
+ fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
+ }
+
+ /* Vectorization is profitable if its cost is less than the cost of scalar
+ version. */
+ if (vec_outside_cost + vec_inside_cost >= scalar_cost)
+ return false;
+
+ return true;
+}
+
+/* Check if the basic block can be vectorized. */
bb_vec_info
vect_slp_analyze_bb (basic_block bb)
/* Mark all the statements that we want to vectorize as pure SLP and
relevant. */
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
{
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
return NULL;
}
+ /* Cost model: check if the vectorization is worthwhile. */
+ if (flag_vect_cost_model
+ && !vect_bb_vectorization_profitable_p (bb_vinfo))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ fprintf (vect_dump, "not vectorized: vectorization is not "
+ "profitable.\n");
+
+ destroy_bb_vec_info (bb_vinfo);
+ return NULL;
+ }
+
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
if (vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
/* We assume that costs are linear in ncopies. */
SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (instance);
gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
- for (i = 0;
- VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
- i++)
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
{
gcc_assert (vec_def_stmt);
vec_oprnd = gimple_get_lhs (vec_def_stmt);
we need the second and the third vectors: {b1,c1,a2,b2} and
{c2,a3,b3,c3}. */
- for (i = 0;
- VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
- i, node);
- i++)
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
{
scalar_index = 0;
index = 0;
all the nodes that participate in that permutation. */
if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
{
- for (i = 0;
- VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
- i++)
+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
{
if (!SLP_TREE_VEC_STMTS (loads_node))
{
vf = 1;
}
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
{
/* Schedule the tree of INSTANCE. */
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
fprintf (vect_dump, "vectorizing stmts using SLP.");
}
- for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
{
slp_tree root = SLP_INSTANCE_TREE (instance);
gimple store;