+ if (loop_vinfo)
+ VEC_safe_push (slp_instance, heap,
+ LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+ new_instance);
+ else
+ VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
+ new_instance);
+
+ if (vect_print_dump_info (REPORT_SLP))
+ vect_print_slp_tree (node);
+
+ return true;
+ }
+
+ /* Failed to SLP. */
+ /* Free the allocated memory. */
+ vect_free_slp_tree (node);
+ VEC_free (int, heap, load_permutation);
+ VEC_free (slp_tree, heap, loads);
+
+ return false;
+}
+
+
+/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
+ trees of packed scalar stmts if SLP is possible. */
+
+bool
+vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+{
+ unsigned int i;
+ VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
+ gimple first_element;
+ bool ok = false;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_analyze_slp ===");
+
+ if (loop_vinfo)
+ {
+ strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
+ reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
+ }
+ else
+ strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
+
+ /* Find SLP sequences starting from groups of strided stores. */
+ FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
+ ok = true;
+
+ if (bb_vinfo && !ok)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Failed to SLP the basic block.");
+
+ return false;
+ }
+
+ if (loop_vinfo
+ && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
+ {
+ /* Find SLP sequences starting from reduction chains. */
+ FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
+ if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
+ ok = true;
+ else
+ return false;
+
+ /* Don't try to vectorize SLP reductions if reduction chain was
+ detected. */
+ return ok;
+ }
+
+ /* Find SLP sequences starting from groups of reductions. */
+ if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
+ && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
+ VEC_index (gimple, reductions, 0)))
+ ok = true;
+
+ return true;
+}
+
+
+/* For each possible SLP instance decide whether to SLP it and calculate overall
+ unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
+ least one instance. */
+
+bool
+vect_make_slp_decision (loop_vec_info loop_vinfo)
+{
+ unsigned int i, unrolling_factor = 1;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+ int decided_to_slp = 0;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_make_slp_decision ===");
+
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+ {
+ /* FORNOW: SLP if you can. */
+ if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
+ unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
+
+ /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
+ call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
+ loop-based vectorization. Such stmts will be marked as HYBRID. */
+ vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
+ decided_to_slp++;
+ }
+
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
+
+ if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
+ decided_to_slp, unrolling_factor);
+
+ return (decided_to_slp > 0);
+}
+
+
+/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
+ can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
+
+static void
+vect_detect_hybrid_slp_stmts (slp_tree node)
+{
+ int i;
+ VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
+ gimple stmt = VEC_index (gimple, stmts, 0);
+ imm_use_iterator imm_iter;
+ gimple use_stmt;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ slp_void_p child;
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop = NULL;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+ basic_block bb = NULL;
+
+ if (!node)
+ return;
+
+ if (loop_vinfo)
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
+ else
+ bb = BB_VINFO_BB (bb_vinfo);
+
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
+ if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
+ && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
+ if (gimple_bb (use_stmt)
+ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+ || bb == gimple_bb (use_stmt))
+ && (stmt_vinfo = vinfo_for_stmt (use_stmt))
+ && !STMT_SLP_TYPE (stmt_vinfo)
+ && (STMT_VINFO_RELEVANT (stmt_vinfo)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
+ && !(gimple_code (use_stmt) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (stmt_vinfo)
+ == vect_reduction_def))
+ vect_mark_slp_stmts (node, hybrid, i);
+
+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+ vect_detect_hybrid_slp_stmts ((slp_tree) child);
+}
+
+
+/* Find stmts that must be both vectorized and SLPed. */
+
+void
+vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+
+ if (vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
+
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+ vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
+}
+
+
+/* Create and initialize a new bb_vec_info struct for BB, as well as
+ stmt_vec_info structs for all the stmts in it. */
+
+static bb_vec_info
+new_bb_vec_info (basic_block bb)
+{
+ bb_vec_info res = NULL;
+ gimple_stmt_iterator gsi;
+
+ res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
+ BB_VINFO_BB (res) = bb;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ gimple_set_uid (stmt, 0);
+ set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
+ }
+
+ BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
+ BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
+
+ bb->aux = res;
+ return res;
+}
+
+
+/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
+ stmts in the basic block. */
+
+static void
+destroy_bb_vec_info (bb_vec_info bb_vinfo)
+{
+ VEC (slp_instance, heap) *slp_instances;
+ slp_instance instance;
+ basic_block bb;
+ gimple_stmt_iterator si;
+ unsigned i;
+
+ if (!bb_vinfo)
+ return;
+
+ bb = BB_VINFO_BB (bb_vinfo);
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (stmt_info)
+ /* Free stmt_vec_info. */
+ free_stmt_vec_info (stmt);
+ }
+
+ free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
+ free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
+ VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
+ slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+ vect_free_slp_instance (instance);
+ VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
+ free (bb_vinfo);
+ bb->aux = NULL;
+}
+
+
+/* Analyze statements contained in SLP tree node after recursively analyzing
+ the subtree. Return TRUE if the operations are supported. */
+
+static bool
+vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+{
+ bool dummy;
+ int i;
+ gimple stmt;
+ slp_void_p child;
+
+ if (!node)
+ return true;
+
+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
+ return false;
+
+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
+ {
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ gcc_assert (PURE_SLP_STMT (stmt_info));
+
+ if (!vect_analyze_stmt (stmt, &dummy, node))
+ return false;
+ }
+
+ return true;
+}
+
+
+/* Analyze statements in SLP instances of the basic block. Return TRUE if the
+ operations are supported. */
+
+static bool
+vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+{
+ VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ slp_instance instance;
+ int i;
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
+ {
+ if (!vect_slp_analyze_node_operations (bb_vinfo,
+ SLP_INSTANCE_TREE (instance)))
+ {
+ vect_free_slp_instance (instance);
+ VEC_ordered_remove (slp_instance, slp_instances, i);
+ }
+ else
+ i++;
+ }
+
+ if (!VEC_length (slp_instance, slp_instances))
+ return false;
+
+ return true;
+}
+
+/* Check if vectorization of the basic block is profitable. */
+
+static bool
+vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+{
+ VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+ slp_instance instance;
+ int i;
+ unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
+ unsigned int stmt_cost;
+ gimple stmt;
+ gimple_stmt_iterator si;
+ basic_block bb = BB_VINFO_BB (bb_vinfo);
+ stmt_vec_info stmt_info = NULL;
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ /* Calculate vector costs. */
+ FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+ {
+ vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
+ vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
+ }
+
+ /* Calculate scalar cost. */
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+ stmt_info = vinfo_for_stmt (stmt);
+
+ if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
+ || !PURE_SLP_STMT (stmt_info))
+ continue;
+
+ if (STMT_VINFO_DATA_REF (stmt_info))
+ {
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_load, dummy_type, dummy);
+ else
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_store, dummy_type, dummy);
+ }
+ else
+ stmt_cost = targetm.vectorize.builtin_vectorization_cost
+ (scalar_stmt, dummy_type, dummy);
+
+ scalar_cost += stmt_cost;
+ }