+ /* Strided loads were reached - stop the recursion. */
+ if (stop_recursion)
+ {
+ if (permutation)
+ {
+ VEC_safe_push (slp_tree, heap, *loads, *node);
+ *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
+ }
+
+ return true;
+ }
+
+ /* Create SLP_TREE nodes for the definition node/s. */
+ if (first_stmt_dt0 == vect_loop_def)
+ {
+ slp_tree left_node = XNEW (struct _slp_tree);
+ SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
+ SLP_TREE_VEC_STMTS (left_node) = NULL;
+ SLP_TREE_LEFT (left_node) = NULL;
+ SLP_TREE_RIGHT (left_node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
+ if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
+ inside_cost, outside_cost, ncopies_for_cost,
+ max_nunits, load_permutation, loads))
+ return false;
+
+ SLP_TREE_LEFT (*node) = left_node;
+ }
+
+ if (first_stmt_dt1 == vect_loop_def)
+ {
+ slp_tree right_node = XNEW (struct _slp_tree);
+ SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
+ SLP_TREE_VEC_STMTS (right_node) = NULL;
+ SLP_TREE_LEFT (right_node) = NULL;
+ SLP_TREE_RIGHT (right_node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
+ if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
+ inside_cost, outside_cost, ncopies_for_cost,
+ max_nunits, load_permutation, loads))
+ return false;
+
+ SLP_TREE_RIGHT (*node) = right_node;
+ }
+
+ return true;
+}
+
+
+static void
+vect_print_slp_tree (slp_tree node)
+{
+ int i;
+ gimple stmt;
+
+ if (!node)
+ return;
+
+ fprintf (vect_dump, "node ");
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ {
+ fprintf (vect_dump, "\n\tstmt %d ", i);
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+ fprintf (vect_dump, "\n");
+
+ vect_print_slp_tree (SLP_TREE_LEFT (node));
+ vect_print_slp_tree (SLP_TREE_RIGHT (node));
+}
+
+
+/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
+ If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
+ J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
+ stmts in NODE are to be marked. */
+
+static void
+vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
+{
+ int i;
+ gimple stmt;
+
+ if (!node)
+ return;
+
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ if (j < 0 || i == j)
+ STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
+
+ vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
+ vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
+}
+
+
+/* Check if the permutation required by the SLP INSTANCE is supported.
+ Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
+
+static bool
+vect_supported_slp_permutation_p (slp_instance instance)
+{
+ slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
+ gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ VEC (slp_tree, heap) *sorted_loads = NULL;
+ int index;
+ slp_tree *tmp_loads = NULL;
+ int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
+ slp_tree load;
+
+ /* FORNOW: The only supported loads permutation is loads from the same
+ location in all the loads in the node, when the data-refs in
+ nodes of LOADS constitute an interleaving chain.
+ Sort the nodes according to the order of accesses in the chain. */
+ tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
+ for (i = 0, j = 0;
+ VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
+ && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
+ i += group_size, j++)
+ {
+ gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
+ /* Check that the loads are all in the same interleaving chain. */
+ if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "Build SLP failed: unsupported data "
+ "permutation ");
+ print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
+ }
+
+ free (tmp_loads);
+ return false;
+ }
+
+ tmp_loads[index] = load;
+ }
+
+ sorted_loads = VEC_alloc (slp_tree, heap, group_size);
+ for (i = 0; i < group_size; i++)
+ VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
+
+ VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
+ SLP_INSTANCE_LOADS (instance) = sorted_loads;
+ free (tmp_loads);
+
+ if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
+ SLP_INSTANCE_UNROLLING_FACTOR (instance),
+ instance, true))
+ return false;
+
+ return true;
+}
+
+
+/* Check if the required load permutation is supported.
+ LOAD_PERMUTATION contains a list of indices of the loads.
+ In SLP this permutation is relative to the order of strided stores that are
+ the base of the SLP instance. */
+
+static bool
+vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
+ VEC (int, heap) *load_permutation)
+{
+ int i = 0, j, prev = -1, next, k;
+ bool supported;
+
+ /* FORNOW: permutations are only supported for loop-aware SLP. */
+ if (!slp_instn)
+ return false;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Load permutation ");
+ for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
+ fprintf (vect_dump, "%d ", next);
+ }
+
+ /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
+ GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
+ well. */
+ if (VEC_length (int, load_permutation)
+ != (unsigned int) (group_size * group_size))
+ return false;
+
+ supported = true;
+ for (j = 0; j < group_size; j++)
+ {
+ for (i = j * group_size, k = 0;
+ VEC_iterate (int, load_permutation, i, next) && k < group_size;
+ i++, k++)
+ {
+ if (i != j * group_size && next != prev)
+ {
+ supported = false;
+ break;
+ }
+
+ prev = next;
+ }
+ }
+
+ if (supported && i == group_size * group_size
+ && vect_supported_slp_permutation_p (slp_instn))
+ return true;
+
+ return false;
+}
+
+
+/* Find the first load in the loop that belongs to INSTANCE.
+ When loads are in several SLP nodes, there can be a case in which the first
+ load does not appear in the first SLP node to be transformed, causing
+ incorrect order of statements. Since we generate all the loads together,
+ they must be inserted before the first load of the SLP instance and not
+ before the first load of the first node of the instance. */
+static gimple
+vect_find_first_load_in_slp_instance (slp_instance instance)
+{
+ int i, j;
+ slp_tree load_node;
+ gimple first_load = NULL, load;
+
+ for (i = 0;
+ VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
+ i++)
+ for (j = 0;
+ VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
+ j++)
+ first_load = get_earlier_stmt (load, first_load);
+
+ return first_load;
+}
+
+
+/* Analyze an SLP instance starting from a group of strided stores. Call
+ vect_build_slp_tree to build a tree of packed stmts if possible.
+ Return FALSE if it's impossible to SLP any stmt in the loop. */
+
+static bool
+vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
+{
+ slp_instance new_instance;
+ slp_tree node = XNEW (struct _slp_tree);
+ unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ unsigned int unrolling_factor = 1, nunits;
+ tree vectype, scalar_type;
+ gimple next;
+ unsigned int vectorization_factor = 0, ncopies;
+ bool slp_impossible = false;
+ int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
+ unsigned int max_nunits = 0;
+ VEC (int, heap) *load_permutation;
+ VEC (slp_tree, heap) *loads;
+
+ scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
+ vinfo_for_stmt (stmt))));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ if (!vectype)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+ return false;
+ }
+
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ ncopies = vectorization_factor / nunits;
+
+ /* Create a node (a root of the SLP tree) for the packed strided stores. */
+ SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
+ next = stmt;
+ /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
+ while (next)
+ {
+ VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
+ next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ }
+
+ SLP_TREE_VEC_STMTS (node) = NULL;
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
+ SLP_TREE_LEFT (node) = NULL;
+ SLP_TREE_RIGHT (node) = NULL;
+ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
+ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
+
+ /* Calculate the unrolling factor. */
+ unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
+
+ /* Calculate the number of vector stmts to create based on the unrolling
+ factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+ GROUP_SIZE / NUNITS otherwise. */
+ ncopies_for_cost = unrolling_factor * group_size / nunits;
+
+ load_permutation = VEC_alloc (int, heap, group_size * group_size);
+ loads = VEC_alloc (slp_tree, heap, group_size);
+
+ /* Build the tree for the SLP instance. */
+ if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
+ &outside_cost, ncopies_for_cost, &max_nunits,
+ &load_permutation, &loads))
+ {
+ /* Create a new SLP instance. */
+ new_instance = XNEW (struct _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+ /* Calculate the unrolling factor based on the smallest type in the
+ loop. */
+ if (max_nunits > nunits)
+ unrolling_factor = least_common_multiple (max_nunits, group_size)
+ / group_size;
+
+ SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
+ SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
+ SLP_INSTANCE_LOADS (new_instance) = loads;
+ SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
+ SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
+ if (VEC_length (slp_tree, loads))
+ {
+ if (!vect_supported_load_permutation_p (new_instance, group_size,
+ load_permutation))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: unsupported load "
+ "permutation ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+
+ vect_free_slp_instance (new_instance);
+ return false;
+ }
+
+ SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
+ = vect_find_first_load_in_slp_instance (new_instance);
+ }
+ else
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
+
+ VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+ new_instance);
+ if (vect_print_dump_info (REPORT_SLP))
+ vect_print_slp_tree (node);
+
+ return true;
+ }
+
+ /* Failed to SLP. */
+ /* Free the allocated memory. */
+ vect_free_slp_tree (node);
+ VEC_free (int, heap, load_permutation);
+ VEC_free (slp_tree, heap, loads);
+
+ if (slp_impossible)
+ return false;
+
+ /* SLP failed for this instance, but it is still possible to SLP other stmts
+ in the loop. */
+ return true;
+}
+
+
+/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
+ trees of packed scalar stmts if SLP is possible. */
+
+static bool
+vect_analyze_slp (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ gimple store;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_analyze_slp ===");
+
+ for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
+ if (!vect_analyze_slp_instance (loop_vinfo, store))
+ {
+ /* SLP failed. No instance can be SLPed in the loop. */
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+ fprintf (vect_dump, "SLP failed.");
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/* For each possible SLP instance decide whether to SLP it and calculate overall
+ unrolling factor needed to SLP the loop. */
+
+static void
+vect_make_slp_decision (loop_vec_info loop_vinfo)
+{
+ unsigned int i, unrolling_factor = 1;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+ int decided_to_slp = 0;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_make_slp_decision ===");
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ {
+ /* FORNOW: SLP if you can. */
+ if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
+ unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
+
+ /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
+ call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
+ loop-based vectorization. Such stmts will be marked as HYBRID. */
+ vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
+ decided_to_slp++;
+ }
+
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
+
+ if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
+ decided_to_slp, unrolling_factor);
+}
+
+
+/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
+ can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
+
+static void
+vect_detect_hybrid_slp_stmts (slp_tree node)
+{
+ int i;
+ gimple stmt;
+ imm_use_iterator imm_iter;
+ gimple use_stmt;
+
+ if (!node)
+ return;
+
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
+ && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
+ if (vinfo_for_stmt (use_stmt)
+ && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
+ && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
+ vect_mark_slp_stmts (node, hybrid, i);
+
+ vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
+ vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
+}
+
+
+/* Find stmts that must be both vectorized and SLPed. */
+
+static void
+vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));