OSDN Git Service

* tree-vectorizer.h (enum vect_def_type): Start enumeration from 1.
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-analyze.c
index a37fcf4..684d12d 100644 (file)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
 #include "toplev.h"
+#include "recog.h"
 
 /* Main analysis functions.  */
 static loop_vec_info vect_analyze_loop_form (struct loop *);
@@ -300,6 +301,30 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 }
 
 
+/* SLP costs are calculated according to SLP instance unrolling factor (i.e., 
+   the number of created vector stmts depends on the unrolling factor). However,
+   the actual number of vector stmts for every SLP node depends on VF which is
+   set later in vect_analyze_operations(). Hence, SLP costs should be updated.
+   In this function we assume that the inside costs calculated in 
+   vect_model_xxx_cost are linear in ncopies.  */
+
+static void
+vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
+{
+  unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+  slp_instance instance;
+
+  if (vect_print_dump_info (REPORT_SLP))
+    fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
+
+  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+    /* We assume that costs are linear in ncopies.  */
+    SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf 
+      / SLP_INSTANCE_UNROLLING_FACTOR (instance);        
+}
+
+
 /* Function vect_analyze_operations.
 
    Scan the loop stmts and make sure they are all vectorizable.  */
@@ -320,6 +345,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
   int min_profitable_iters;
   int min_scalar_loop_bound;
   unsigned int th;
+  bool only_slp_in_loop = true;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_analyze_operations ===");
@@ -456,12 +482,12 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
 
          ok = (vectorizable_type_promotion (stmt, NULL, NULL)
                || vectorizable_type_demotion (stmt, NULL, NULL)
-               || vectorizable_conversion (stmt, NULL, NULL)
-               || vectorizable_operation (stmt, NULL, NULL)
-               || vectorizable_assignment (stmt, NULL, NULL)
-               || vectorizable_load (stmt, NULL, NULL)
+               || vectorizable_conversion (stmt, NULL, NULL, NULL)
+               || vectorizable_operation (stmt, NULL, NULL, NULL)
+               || vectorizable_assignment (stmt, NULL, NULL, NULL)
+               || vectorizable_load (stmt, NULL, NULL, NULL)
                || vectorizable_call (stmt, NULL, NULL)
-               || vectorizable_store (stmt, NULL, NULL)
+               || vectorizable_store (stmt, NULL, NULL, NULL)
                || vectorizable_condition (stmt, NULL, NULL)
                || vectorizable_reduction (stmt, NULL, NULL));
 
@@ -480,6 +506,30 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
                }
              return false;
            }   
+
+         if (!PURE_SLP_STMT (stmt_info))
+           {
+             /* STMT needs loop-based vectorization.  */
+             only_slp_in_loop = false;
+
+             /* Groups of strided accesses whose size is not a power of 2 are 
+                not vectorizable yet using loop-vectorization. Therefore, if 
+                this stmt feeds non-SLP-able stmts (i.e., this stmt has to be 
+                both SLPed and loop-based vectorzed), the loop cannot be 
+                vectorized.  */
+             if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+                 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
+                                 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
+               {
+                 if (vect_print_dump_info (REPORT_DETAILS))
+                   {
+                     fprintf (vect_dump, "not vectorized: the size of group "
+                              "of strided accesses is not a power of 2");
+                     print_generic_expr (vect_dump, stmt, TDF_SLIM);
+                   }
+                 return false;
+               }
+           }
        } /* stmts in bb */
     } /* bbs */
 
@@ -499,6 +549,18 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
       return false;
     }
 
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by the
+     SLP instances.  If that unrolling factor is 1, we say, that we perform
+     pure SLP on loop - cross iteration parallelism is not exploited.  */
+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor = least_common_multiple (vectorization_factor,
+                               LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+  
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -518,6 +580,10 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
 
   /* Analyze cost. Decide if worth while to vectorize.  */
 
+  /* Once VF is set, SLP costs should be updated since the number of created
+     vector stmts depends on VF.  */
+  vect_update_slp_costs_according_to_vf (loop_vinfo);
+
   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
   if (min_profitable_iters < 0)
@@ -1462,9 +1528,9 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
 
  /* For interleaved data accesses the step in the loop must be multiplied by
      the size of the interleaving group.  */
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-  if (DR_GROUP_FIRST_DR (peel_stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
     dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
 
   /* It can be assumed that the data refs with the same alignment as dr_peel
@@ -1516,7 +1582,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
       /* For interleaving, only the alignment of the first access matters.  */
-      if (DR_GROUP_FIRST_DR (stmt_info)
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
         continue;
 
@@ -1554,7 +1620,7 @@ vector_alignment_reachable_p (struct data_reference *dr)
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       /* For interleaved access we peel only if number of iterations in
         the prolog loop ({VF - misalignment}), is a multiple of the
@@ -1768,7 +1834,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
       /* For interleaving, only the alignment of the first access
          matters.  */
-      if (DR_GROUP_FIRST_DR (stmt_info)
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
         continue;
 
@@ -1818,7 +1884,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
             members of the group, therefore we divide the number of iterations
             by the group size.  */
          stmt_info = vinfo_for_stmt (DR_STMT (dr0));     
-         if (DR_GROUP_FIRST_DR (stmt_info))
+         if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
            npeel /= DR_GROUP_SIZE (stmt_info);
 
           if (vect_print_dump_info (REPORT_DETAILS))
@@ -1837,7 +1903,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
          stmt_info = vinfo_for_stmt (stmt);
          /* For interleaving, only the alignment of the first access
             matters.  */
-         if (DR_GROUP_FIRST_DR (stmt_info)
+         if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
              && DR_GROUP_FIRST_DR (stmt_info) != stmt)
            continue;
 
@@ -1907,7 +1973,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
          /* For interleaving, only the alignment of the first access
             matters.  */
          if (aligned_access_p (dr)
-             || (DR_GROUP_FIRST_DR (stmt_info)
+             || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
                  && DR_GROUP_FIRST_DR (stmt_info) != stmt))
            continue;
 
@@ -2019,13 +2085,13 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
 }
 
 
-/* Function vect_analyze_data_ref_access.
-
-   Analyze the access pattern of the data-reference DR. For now, a data access
-   has to be consecutive to be considered vectorizable.  */
+/* Analyze groups of strided accesses: check that DR belongs to a group of
+   strided accesses of legal size, step, etc. Detect gaps, single element
+   interleaving, and other special cases. Set strided access info.
+   Collect groups of strided stores for further use in SLP analysis.  */
 
 static bool
-vect_analyze_data_ref_access (struct data_reference *dr)
+vect_analyze_group_access (struct data_reference *dr)
 {
   tree step = DR_STEP (dr);
   tree scalar_type = TREE_TYPE (DR_REF (dr));
@@ -2033,50 +2099,14 @@ vect_analyze_data_ref_access (struct data_reference *dr)
   tree stmt = DR_STMT (dr);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
   HOST_WIDE_INT stride;
+  bool slp_impossible = false;
 
-  /* Don't allow invariant accesses.  */
-  if (dr_step == 0)
-    return false; 
-
-  if (nested_in_vect_loop_p (loop, stmt))
-    {
-      /* For the rest of the analysis we use the outer-loop step.  */
-      step = STMT_VINFO_DR_STEP (stmt_info);
-      dr_step = TREE_INT_CST_LOW (step);
-      
-      if (dr_step == 0)
-       {
-         if (vect_print_dump_info (REPORT_ALIGNMENT))
-           fprintf (vect_dump, "zero step in outer loop.");
-         if (DR_IS_READ (dr))
-           return true; 
-         else
-           return false;
-       }
-    }
-    
   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the 
      interleaving group (including gaps).  */
   stride = dr_step / type_size; 
 
-  /* Consecutive?  */
-  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
-    {
-      /* Mark that it is not interleaving.  */
-      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
-      return true;
-    }
-
-  if (nested_in_vect_loop_p (loop, stmt))
-    {
-      if (vect_print_dump_info (REPORT_ALIGNMENT))
-       fprintf (vect_dump, "strided access in outer loop.");
-      return false;
-    }
-
   /* Not consecutive access is possible only if it is a part of interleaving.  */
   if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
     {
@@ -2119,99 +2149,105 @@ vect_analyze_data_ref_access (struct data_reference *dr)
       HOST_WIDE_INT diff, count_in_bytes;
 
       while (next)
-       {
-         /* Skip same data-refs. In case that two or more stmts share data-ref
-            (supported only for loads), we vectorize only the first stmt, and
-            the rest get their vectorized loads from the first one.  */
-         if (!tree_int_cst_compare (DR_INIT (data_ref),
-                                    DR_INIT (STMT_VINFO_DATA_REF (
-                                                     vinfo_for_stmt (next)))))
-           {
+        {
+          /* Skip same data-refs. In case that two or more stmts share data-ref
+             (supported only for loads), we vectorize only the first stmt, and
+             the rest get their vectorized loads from the first one.  */
+          if (!tree_int_cst_compare (DR_INIT (data_ref),
+                                     DR_INIT (STMT_VINFO_DATA_REF (
+                                                  vinfo_for_stmt (next)))))
+            {
               if (!DR_IS_READ (data_ref))
-                { 
+                {
                   if (vect_print_dump_info (REPORT_DETAILS))
                     fprintf (vect_dump, "Two store stmts share the same dr.");
-                  return false; 
+                  return false;
                 }
 
-              /* Check that there is no load-store dependencies for this loads 
+              /* Check that there is no load-store dependencies for this loads
                  to prevent a case of load-store-load to the same location.  */
               if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
                   || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
                 {
                   if (vect_print_dump_info (REPORT_DETAILS))
-                    fprintf (vect_dump, 
+                    fprintf (vect_dump,
                              "READ_WRITE dependence in interleaving.");
                   return false;
                 }
 
-             /* For load use the same data-ref load.  */
-             DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
+              /* For load use the same data-ref load.  */
+              DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
 
-             prev = next;
-             next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
-             continue;
-           }
-         prev = next;
+              prev = next;
+              next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+              continue;
+            }
+          prev = next;
 
-         /* Check that all the accesses have the same STEP.  */
-         next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
-         if (tree_int_cst_compare (step, next_step))
-           {
-             if (vect_print_dump_info (REPORT_DETAILS))
-               fprintf (vect_dump, "not consecutive access in interleaving");
-             return false;
-           }
+          /* Check that all the accesses have the same STEP.  */
+          next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
+          if (tree_int_cst_compare (step, next_step))
+            {
+              if (vect_print_dump_info (REPORT_DETAILS))
+                fprintf (vect_dump, "not consecutive access in interleaving");
+              return false;
+            }
 
-         data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
-         /* Check that the distance between two accesses is equal to the type
-            size. Otherwise, we have gaps.  */
-         diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) 
-                 - TREE_INT_CST_LOW (prev_init)) / type_size;
-         if (!DR_IS_READ (data_ref) && diff != 1)
+          data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
+          /* Check that the distance between two accesses is equal to the type
+             size. Otherwise, we have gaps.  */
+          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
+                  - TREE_INT_CST_LOW (prev_init)) / type_size;
+         if (diff != 1)
            {
-             if (vect_print_dump_info (REPORT_DETAILS))
-               fprintf (vect_dump, "interleaved store with gaps");
-             return false;
+             /* FORNOW: SLP of accesses with gaps is not supported.  */
+             slp_impossible = true;
+             if (!DR_IS_READ (data_ref))
+               {
+                 if (vect_print_dump_info (REPORT_DETAILS))
+                   fprintf (vect_dump, "interleaved store with gaps");
+                 return false;
+               }
            }
-         /* Store the gap from the previous member of the group. If there is no
+
+          /* Store the gap from the previous member of the group. If there is no
              gap in the access, DR_GROUP_GAP is always 1.  */
-         DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
+          DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
 
-         prev_init = DR_INIT (data_ref);
-         next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
-         /* Count the number of data-refs in the chain.  */
-         count++;
-       }
+          prev_init = DR_INIT (data_ref);
+          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+          /* Count the number of data-refs in the chain.  */
+          count++;
+        }
 
-      /* COUNT is the number of accesses found, we multiply it by the size of 
-        the type to get COUNT_IN_BYTES.  */
+      /* COUNT is the number of accesses found, we multiply it by the size of
+         the type to get COUNT_IN_BYTES.  */
       count_in_bytes = type_size * count;
 
       /* Check that the size of the interleaving is not greater than STEP.  */
-      if (dr_step < count_in_bytes) 
-       {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           {
-             fprintf (vect_dump, "interleaving size is greater than step for ");
-             print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 
-           }
-         return false;
-       }
+      if (dr_step < count_in_bytes)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "interleaving size is greater than step for ");
+              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
+            }
+          return false;
+        }
 
-      /* Check that the size of the interleaving is equal to STEP for stores, 
-         i.e., that there are no gaps.  */ 
-      if (!DR_IS_READ (dr) && dr_step != count_in_bytes) 
-       {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "interleaved store with gaps");
-         return false;
-       }
+      /* Check that the size of the interleaving is equal to STEP for stores,
+         i.e., that there are no gaps.  */
+      if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "interleaved store with gaps");
+          return false;
+        }
 
       /* Check that STEP is a multiple of type size.  */
       if ((dr_step % type_size) != 0)
-       {
-         if (vect_print_dump_info (REPORT_DETAILS)) 
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
             {
               fprintf (vect_dump, "step is not a multiple of type size: step ");
               print_generic_expr (vect_dump, step, TDF_SLIM);
@@ -2219,22 +2255,98 @@ vect_analyze_data_ref_access (struct data_reference *dr)
               print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
                                   TDF_SLIM);
             }
-         return false;
-       }
+          return false;
+        }
 
-      /* FORNOW: we handle only interleaving that is a power of 2.  */
+      /* FORNOW: we handle only interleaving that is a power of 2.  
+         We don't fail here if it may be still possible to vectorize the
+         group using SLP. If not, the size of the group will be checked in
+         vect_analyze_operations, and the vectorization will fail.  */
       if (exact_log2 (stride) == -1)
        {
          if (vect_print_dump_info (REPORT_DETAILS))
            fprintf (vect_dump, "interleaving is not a power of 2");
-         return false;
+
+         if (slp_impossible)
+           return false;
        }
       DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
+
+      /* SLP: create an SLP data structure for every interleaving group of 
+        stores for further analysis in vect_analyse_slp.  */
+      if (!DR_IS_READ (dr) && !slp_impossible)
+       VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
     }
+
   return true;
 }
 
 
+/* Analyze the access pattern of the data-reference DR.
+   In case of non-consecutive accesse call vect_analyze_group_access() to
+   analyze groups of strided accesses.  */
+
+static bool
+vect_analyze_data_ref_access (struct data_reference *dr)
+{
+  tree step = DR_STEP (dr);
+  tree scalar_type = TREE_TYPE (DR_REF (dr));
+  tree stmt = DR_STMT (dr);
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+
+  if (!step)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "bad data-ref access");
+      return false;
+    }
+
+  /* Don't allow invariant accesses.  */
+  if (dr_step == 0)
+    return false; 
+
+  if (nested_in_vect_loop_p (loop, stmt))
+    {
+      /* For the rest of the analysis we use the outer-loop step.  */
+      step = STMT_VINFO_DR_STEP (stmt_info);
+      dr_step = TREE_INT_CST_LOW (step);
+      
+      if (dr_step == 0)
+       {
+         if (vect_print_dump_info (REPORT_ALIGNMENT))
+           fprintf (vect_dump, "zero step in outer loop.");
+         if (DR_IS_READ (dr))
+           return true; 
+         else
+           return false;
+       }
+    }
+
+  /* Consecutive?  */
+  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+    {
+      /* Mark that it is not interleaving.  */
+      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
+      return true;
+    }
+
+  if (nested_in_vect_loop_p (loop, stmt))
+    {
+      if (vect_print_dump_info (REPORT_ALIGNMENT))
+       fprintf (vect_dump, "strided access in outer loop.");
+      return false;
+    }
+
+  /* Not consecutive access - check if it's a part of interleaving group.  */
+  return vect_analyze_group_access (dr);
+}
+
+
 /* Function vect_analyze_data_ref_accesses.
 
    Analyze the access pattern of all the data references in the loop.
@@ -2266,6 +2378,697 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
 }
 
 
+/* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
+
+void
+vect_free_slp_tree (slp_tree node)
+{
+  if (!node)
+    return;
+
+  if (SLP_TREE_LEFT (node))
+    vect_free_slp_tree (SLP_TREE_LEFT (node));
+   
+  if (SLP_TREE_RIGHT (node))
+    vect_free_slp_tree (SLP_TREE_RIGHT (node));
+   
+  VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
+  
+  if (SLP_TREE_VEC_STMTS (node))
+    VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
+
+  free (node);
+}
+
+
+/* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are 
+   of a legal type and that they match the defs of the first stmt of the SLP 
+   group (stored in FIRST_STMT_...).  */
+
+static bool
+vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
+                            tree rhs, VEC (tree, heap) **def_stmts0,
+                            VEC (tree, heap) **def_stmts1,
+                            enum vect_def_type *first_stmt_dt0,
+                            enum vect_def_type *first_stmt_dt1,
+                            tree *first_stmt_def0_type, 
+                            tree *first_stmt_def1_type,
+                            tree *first_stmt_const_oprnd,
+                            int ncopies_for_cost)
+{
+  tree oprnd;
+  enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
+  unsigned int i, number_of_oprnds = op_type;
+  tree def, def_stmt;
+  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+  stmt_vec_info stmt_info = 
+    vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
+
+  /* Store.  */
+  if (!op_type)
+    number_of_oprnds = 1;
+  else
+    gcc_assert (op_type == unary_op || op_type == binary_op);
+
+  for (i = 0; i < number_of_oprnds; i++)
+    {
+      if (op_type)
+       oprnd = TREE_OPERAND (rhs, i);
+      else
+       oprnd = rhs;
+
+      if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
+         || (!def_stmt && dt[i] != vect_constant_def))
+       {
+         if (vect_print_dump_info (REPORT_SLP)) 
+           {
+             fprintf (vect_dump, "Build SLP failed: can't find def for ");
+             print_generic_expr (vect_dump, oprnd, TDF_SLIM);
+           }
+
+         return false;
+       }
+
+      if (!*first_stmt_dt0)
+       {
+         /* op0 of the first stmt of the group - store its info.  */
+         *first_stmt_dt0 = dt[i];
+         if (def)
+           *first_stmt_def0_type = TREE_TYPE (def);
+         else
+           *first_stmt_const_oprnd = oprnd;
+
+         /* Analyze costs (for the first stmt of the group only).  */
+         if (op_type)
+           /* Not memory operation (we don't call this functions for loads).  */
+           vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
+         else
+           /* Store.  */
+           vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+       }
+      
+      else
+       {
+         if (!*first_stmt_dt1 && i == 1)
+           {
+             /* op1 of the first stmt of the group - store its info.  */
+             *first_stmt_dt1 = dt[i];
+             if (def)
+               *first_stmt_def1_type = TREE_TYPE (def);
+             else
+               {
+                 /* We assume that the stmt contains only one constant 
+                    operand. We fail otherwise, to be on the safe side.  */
+                 if (*first_stmt_const_oprnd)
+                   {
+                     if (vect_print_dump_info (REPORT_SLP)) 
+                       fprintf (vect_dump, "Build SLP failed: two constant "
+                                "oprnds in stmt");                 
+                     return false;
+                   }
+                 *first_stmt_const_oprnd = oprnd;
+               }
+           }
+         else
+           {
+             /* Not first stmt of the group, check that the def-stmt/s match 
+                the def-stmt/s of the first stmt.  */
+             if ((i == 0 
+                  && (*first_stmt_dt0 != dt[i]
+                      || (*first_stmt_def0_type && def
+                          && *first_stmt_def0_type != TREE_TYPE (def))))
+                 || (i == 1 
+                     && (*first_stmt_dt1 != dt[i]
+                         || (*first_stmt_def1_type && def
+                             && *first_stmt_def1_type != TREE_TYPE (def))))              
+                 || (!def 
+                     && TREE_TYPE (*first_stmt_const_oprnd) 
+                     != TREE_TYPE (oprnd)))
+               { 
+                 if (vect_print_dump_info (REPORT_SLP)) 
+                   fprintf (vect_dump, "Build SLP failed: different types ");
+                 
+                 return false;
+               }
+           }
+       }
+
+      /* Check the types of the definitions.  */
+      switch (dt[i])
+       {
+       case vect_constant_def:
+       case vect_invariant_def:
+         break;
+         
+       case vect_loop_def:
+         if (i == 0)
+           VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
+         else
+           VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
+         break;
+
+       default:
+         /* FORNOW: Not supported.  */
+         if (vect_print_dump_info (REPORT_SLP)) 
+           {
+             fprintf (vect_dump, "Build SLP failed: illegal type of def ");
+             print_generic_expr (vect_dump, def, TDF_SLIM);
+           }
+
+         return false;
+       }
+    }
+
+  return true;
+}
+
+
+/* Recursively build an SLP tree starting from NODE.
+   Fail (and return FALSE) if def-stmts are not isomorphic, require data 
+   permutation or are of unsupported types of operation. Otherwise, return 
+   TRUE.
+   SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
+   in the case of multiple types for now.  */
+
+static bool
+vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, 
+                    unsigned int group_size, bool *slp_impossible,
+                    int *inside_cost, int *outside_cost,
+                    int ncopies_for_cost)
+{
+  VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
+  VEC (tree, heap) *def_stmts1 =  VEC_alloc (tree, heap, group_size);
+  unsigned int i;
+  VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
+  tree stmt = VEC_index (tree, stmts, 0);
+  enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
+  enum tree_code first_stmt_code = 0;
+  tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
+  tree lhs, rhs, prev_stmt = NULL_TREE;
+  bool stop_recursion = false, need_same_oprnds = false;
+  tree vectype, scalar_type, first_op1 = NULL_TREE;
+  unsigned int vectorization_factor = 0, ncopies;
+  optab optab;
+  int icode;
+  enum machine_mode optab_op2_mode;
+  enum machine_mode vec_mode;
+  tree first_stmt_const_oprnd = NULL_TREE;
+  struct data_reference *first_dr;
+  /* For every stmt in NODE find its def stmt/s.  */
+  for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
+    {
+      if (vect_print_dump_info (REPORT_SLP)) 
+       {
+         fprintf (vect_dump, "Build SLP for ");
+         print_generic_expr (vect_dump, stmt, TDF_SLIM);
+       }
+
+      if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
+       {
+         if (vect_print_dump_info (REPORT_SLP)) 
+           {
+             fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
+             print_generic_expr (vect_dump, stmt, TDF_SLIM);
+           }
+         
+         return false;
+       }
+
+      scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
+      vectype = get_vectype_for_scalar_type (scalar_type);
+      gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+      vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
+      if (ncopies > 1)
+       {
+         /* FORNOW.  */
+         if (vect_print_dump_info (REPORT_SLP)) 
+           fprintf (vect_dump, "SLP failed - multiple types ");
+         
+         *slp_impossible = true;
+         return false;
+       }
+
+      lhs = GIMPLE_STMT_OPERAND (stmt, 0);
+      rhs = GIMPLE_STMT_OPERAND (stmt, 1);
+
+      /* Check the operation.  */
+      if (i == 0)
+       {
+         first_stmt_code = TREE_CODE (rhs);
+
+         /* Shift arguments should be equal in all the packed stmts for a 
+            vector shift with scalar shift operand.  */
+         if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
+           {
+             vec_mode = TYPE_MODE (vectype);
+             optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
+             if (!optab)
+               {
+                 if (vect_print_dump_info (REPORT_SLP))
+                   fprintf (vect_dump, "Build SLP failed: no optab.");
+                 return false;
+               }
+             icode = (int) optab->handlers[(int) vec_mode].insn_code;
+             optab_op2_mode = insn_data[icode].operand[2].mode;
+             if (!VECTOR_MODE_P (optab_op2_mode))
+               {
+                 need_same_oprnds = true;
+                 first_op1 = TREE_OPERAND (rhs, 1);
+               }
+           }
+       }
+      else
+       {
+         if (first_stmt_code != TREE_CODE (rhs))
+           {
+             if (vect_print_dump_info (REPORT_SLP)) 
+               {
+                 fprintf (vect_dump, 
+                          "Build SLP failed: different operation in stmt ");
+                 print_generic_expr (vect_dump, stmt, TDF_SLIM);
+               }
+             
+             return false;
+           }
+         
+         if (need_same_oprnds 
+             && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
+           {
+             if (vect_print_dump_info (REPORT_SLP)) 
+               {
+                 fprintf (vect_dump, 
+                          "Build SLP failed: different shift arguments in ");
+                 print_generic_expr (vect_dump, stmt, TDF_SLIM);
+               }
+             
+             return false;
+           }
+       }
+
+      /* Strided store or load.  */
+      if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
+       {
+         if (REFERENCE_CLASS_P (lhs))
+           {
+             /* Store.  */
+             if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, 
+                                               &def_stmts0, &def_stmts1, 
+                                               &first_stmt_dt0, 
+                                               &first_stmt_dt1, 
+                                               &first_stmt_def0_type, 
+                                               &first_stmt_def1_type,
+                                               &first_stmt_const_oprnd,
+                                               ncopies_for_cost))
+               return false;
+           }
+           else
+             {
+               /* Load.  */
+               if (i == 0)
+                 {
+                   /* First stmt of the SLP group should be the first load of 
+                      the interleaving loop if data permutation is not 
+                      allowed.  */
+                   if  (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt) 
+                     {
+                       /* FORNOW: data permutations are not supported.  */
+                       if (vect_print_dump_info (REPORT_SLP)) 
+                         {
+                           fprintf (vect_dump, "Build SLP failed: strided "
+                                    " loads need permutation ");
+                           print_generic_expr (vect_dump, stmt, TDF_SLIM);
+                         }
+
+                       return false;
+                     }
+
+                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+                   if (vect_supportable_dr_alignment (first_dr)
+                       == dr_unaligned_unsupported)
+                     {
+                       if (vect_print_dump_info (REPORT_SLP)) 
+                         {
+                           fprintf (vect_dump, "Build SLP failed: unsupported "
+                                    " unaligned load ");
+                           print_generic_expr (vect_dump, stmt, TDF_SLIM);
+                         }
+
+                       return false;
+                     }
+
+                   /* Analyze costs (for the first stmt in the group).  */
+                   vect_model_load_cost (vinfo_for_stmt (stmt), 
+                                         ncopies_for_cost, *node);
+                 }
+               else
+                 {
+                   if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
+                     {
+                       /* FORNOW: data permutations are not supported.  */
+                       if (vect_print_dump_info (REPORT_SLP)) 
+                         {
+                           fprintf (vect_dump, "Build SLP failed: strided "
+                                    " loads need permutation ");
+                           print_generic_expr (vect_dump, stmt, TDF_SLIM);
+                         }
+                       return false;
+                     }
+                 }
+
+               prev_stmt = stmt;
+
+               /* We stop the tree when we reach a group of loads.  */
+               stop_recursion = true;
+               continue;
+             }
+       } /* Strided access.  */
+      else
+       {
+         if (REFERENCE_CLASS_P (rhs))
+           {
+             /* Not strided load. */
+             if (vect_print_dump_info (REPORT_SLP)) 
+               {
+                 fprintf (vect_dump, "Build SLP failed: not strided load ");
+                 print_generic_expr (vect_dump, stmt, TDF_SLIM);
+               }
+
+             /* FORNOW: Not strided loads are not supported.  */
+             return false;
+           }
+
+         /* Not memory operation.  */
+         if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
+           {
+             if (vect_print_dump_info (REPORT_SLP)) 
+               {
+                 fprintf (vect_dump, "Build SLP failed: operation");
+                 fprintf (vect_dump, " unsupported ");
+                 print_generic_expr (vect_dump, stmt, TDF_SLIM);
+               }
+
+             return false;
+           }
+
+         /* Find the def-stmts.  */ 
+         if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0, 
+                                           &def_stmts1, &first_stmt_dt0, 
+                                           &first_stmt_dt1, 
+                                           &first_stmt_def0_type, 
+                                           &first_stmt_def1_type,
+                                           &first_stmt_const_oprnd,
+                                           ncopies_for_cost))
+           return false;
+       }
+    }
+
+  /* Add the costs of the node to the overall instance costs.  */
+  *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node); 
+  *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
+
+  /* Strided loads were reached - stop the recursion.  */
+  if (stop_recursion)
+    return true;
+
+  /* Create SLP_TREE nodes for the definition node/s.  */ 
+  if (first_stmt_dt0 == vect_loop_def)
+    {
+      slp_tree left_node = XNEW (struct _slp_tree);
+      SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
+      SLP_TREE_VEC_STMTS (left_node) = NULL;
+      SLP_TREE_LEFT (left_node) = NULL;
+      SLP_TREE_RIGHT (left_node) = NULL;
+      SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
+      SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
+      if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size, 
+                               slp_impossible, inside_cost, outside_cost,
+                               ncopies_for_cost))
+       return false;
+      
+      SLP_TREE_LEFT (*node) = left_node;
+    }
+
+  if (first_stmt_dt1 == vect_loop_def)
+    {
+      slp_tree right_node = XNEW (struct _slp_tree);
+      SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
+      SLP_TREE_VEC_STMTS (right_node) = NULL;
+      SLP_TREE_LEFT (right_node) = NULL;
+      SLP_TREE_RIGHT (right_node) = NULL;
+      SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
+      SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
+      if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
+                               slp_impossible, inside_cost, outside_cost,
+                               ncopies_for_cost))
+       return false;
+      
+      SLP_TREE_RIGHT (*node) = right_node;
+    }
+
+  return true;
+}
+
+
+static void
+vect_print_slp_tree (slp_tree node)
+{
+  int i;
+  tree stmt;
+
+  if (!node)
+    return;
+
+  fprintf (vect_dump, "node ");
+  for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+    {
+      fprintf (vect_dump, "\n\tstmt %d ", i);
+      print_generic_expr (vect_dump, stmt, TDF_SLIM);  
+    }
+  fprintf (vect_dump, "\n");
+
+  vect_print_slp_tree (SLP_TREE_LEFT (node));
+  vect_print_slp_tree (SLP_TREE_RIGHT (node));
+}
+
+
+/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). 
+   If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index 
+   J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the 
+   stmts in NODE are to be marked.  */
+
+static void
+vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
+{
+  int i;
+  tree stmt;
+
+  if (!node)
+    return;
+
+  for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+    if (j < 0 || i == j)
+      STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
+
+  vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
+  vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
+}
+
+
+/* Analyze an SLP instance starting from a group of strided stores. Call
+   vect_build_slp_tree to build a tree of packed stmts if possible. 
+   Return FALSE if it's impossible to SLP any stmt in the loop.  */
+
+static bool
+vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
+{
+  slp_instance new_instance;
+  slp_tree node = XNEW (struct _slp_tree);
+  unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+  unsigned int unrolling_factor = 1, nunits;
+  tree vectype, scalar_type, next;
+  unsigned int vectorization_factor = 0, ncopies;
+  bool slp_impossible = false; 
+  int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
+
+  /* FORNOW: multiple types are not supported.  */
+  scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
+  vectype = get_vectype_for_scalar_type (scalar_type);
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  ncopies = vectorization_factor / nunits;
+  if (ncopies > 1)
+    {
+      if (vect_print_dump_info (REPORT_SLP)) 
+         fprintf (vect_dump, "SLP failed - multiple types ");
+
+      return false;
+    }
+
+  /* Create a node (a root of the SLP tree) for the packed strided stores.  */ 
+  SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
+  next = stmt;
+  /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
+  while (next)
+    {
+      VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
+      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+    }
+
+  SLP_TREE_VEC_STMTS (node) = NULL;
+  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
+  SLP_TREE_LEFT (node) = NULL;
+  SLP_TREE_RIGHT (node) = NULL;
+  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
+  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
+
+  /* Calculate the unrolling factor.  */
+  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
+       
+  /* Calculate the number of vector stmts to create based on the unrolling
+     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+     GROUP_SIZE / NUNITS otherwise.  */
+  ncopies_for_cost = unrolling_factor * group_size / nunits;
+
+  /* Build the tree for the SLP instance.  */
+  if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
+                          &inside_cost, &outside_cost, ncopies_for_cost))
+    {
+      /* Create a new SLP instance.  */  
+      new_instance = XNEW (struct _slp_instance);
+      SLP_INSTANCE_TREE (new_instance) = node;
+      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+      SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
+      SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
+      VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo), 
+                    new_instance);
+      if (vect_print_dump_info (REPORT_SLP))
+       vect_print_slp_tree (node);
+
+      return true;
+    }
+
+  /* Failed to SLP.  */
+  /* Free the allocated memory.  */
+  vect_free_slp_tree (node);
+
+  if (slp_impossible)
+    return false;
+
+  /* SLP failed for this instance, but it is still possible to SLP other stmts 
+     in the loop.  */
+  return true;
+}
+
+
+/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
+   trees of packed scalar stmts if SLP is possible.  */
+
+static bool
+vect_analyze_slp (loop_vec_info loop_vinfo)
+{
+  unsigned int i;
+  VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+  tree store;
+
+  if (vect_print_dump_info (REPORT_SLP))
+    fprintf (vect_dump, "=== vect_analyze_slp ===");
+
+  for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
+    if (!vect_analyze_slp_instance (loop_vinfo, store))
+      {
+       /* SLP failed. No instance can be SLPed in the loop.  */
+       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))   
+         fprintf (vect_dump, "SLP failed.");
+
+       return false;
+      }
+
+  return true;
+}
+
+
+/* For each possible SLP instance decide whether to SLP it and calculate overall
+   unrolling factor needed to SLP the loop.  */
+
+static void
+vect_make_slp_decision (loop_vec_info loop_vinfo)
+{
+  unsigned int i, unrolling_factor = 1;
+  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+  slp_instance instance;
+  int decided_to_slp = 0;
+
+  if (vect_print_dump_info (REPORT_SLP))
+    fprintf (vect_dump, "=== vect_make_slp_decision ===");
+
+  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+    {
+      /* FORNOW: SLP if you can.  */
+      if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
+       unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
+
+      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we 
+        call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and 
+        loop-based vectorization. Such stmts will be marked as HYBRID.  */
+      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
+      decided_to_slp++;
+    }
+
+  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
+
+  if (decided_to_slp && vect_print_dump_info (REPORT_SLP)) 
+    fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d", 
+            decided_to_slp, unrolling_factor);
+}
+
+
+/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
+   can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
+
+static void
+vect_detect_hybrid_slp_stmts (slp_tree node)
+{
+  int i;
+  tree stmt;
+  imm_use_iterator imm_iter;
+  tree use_stmt;
+
+  if (!node)
+    return;
+
+  for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+    if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
+       && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
+      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
+       if (vinfo_for_stmt (use_stmt)
+           && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
+         vect_mark_slp_stmts (node, hybrid, i);
+
+  vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
+  vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
+}
+
+
+/* Find stmts that must be both vectorized and SLPed.  */
+
+static void
+vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
+{
+  unsigned int i;
+  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+  slp_instance instance;
+
+  if (vect_print_dump_info (REPORT_SLP))
+    fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
+
+  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+    vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
+}
+
+
 /* Function vect_analyze_data_refs.
 
   Find all the data references in the loop.
@@ -3424,6 +4227,17 @@ vect_analyze_loop (struct loop *loop)
       return NULL;
     }
 
+  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
+  ok = vect_analyze_slp (loop_vinfo);
+  if (ok)
+    {
+      /* Decide which possible SLP instances to SLP.  */
+      vect_make_slp_decision (loop_vinfo);
+
+      /* Find stmts that need to be both vectorized and SLPed.  */
+      vect_detect_hybrid_slp (loop_vinfo);
+    }
+
   /* This pass will decide on using loop versioning and/or loop peeling in
      order to enhance the alignment of data references in the loop.  */