OSDN Git Service

2014-05-13 Richard Biener <rguenther@suse.de>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-loop.c
index d474688..964e5dd 100644 (file)
@@ -1,5 +1,5 @@
 /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
    Ira Rosen <irar@il.ibm.com>
@@ -181,6 +181,10 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
   stmt_vec_info stmt_info;
   int i;
   HOST_WIDE_INT dummy;
+  gimple stmt, pattern_stmt = NULL;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+  bool analyze_pattern_stmt = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -241,11 +245,16 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
            }
        }
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
         {
-         tree vf_vectype;
-         gimple stmt = gsi_stmt (si);
-         stmt_info = vinfo_for_stmt (stmt);
+          tree vf_vectype;
+
+          if (analyze_pattern_stmt)
+           stmt = pattern_stmt;
+          else
+            stmt = gsi_stmt (si);
+
+          stmt_info = vinfo_for_stmt (stmt);
 
          if (vect_print_dump_info (REPORT_DETAILS))
            {
@@ -255,13 +264,84 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 
          gcc_assert (stmt_info);
 
-         /* skip stmts which do not need to be vectorized.  */
+         /* Skip stmts which do not need to be vectorized.  */
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && !STMT_VINFO_LIVE_P (stmt_info))
+            {
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+                {
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (pattern_stmt);
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    {
+                      fprintf (vect_dump, "==> examining pattern statement: ");
+                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                    }
+                }
+              else
+               {
+                 if (vect_print_dump_info (REPORT_DETAILS))
+                   fprintf (vect_dump, "skip.");
+                  gsi_next (&si);
+                 continue;
+                }
+           }
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            analyze_pattern_stmt = true;
+
+         /* If a pattern statement has def stmts, analyze them too.  */
+         if (is_pattern_stmt_p (stmt_info))
            {
-             if (vect_print_dump_info (REPORT_DETAILS))
-               fprintf (vect_dump, "skip.");
-             continue;
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
+
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump,
+                                  "==> examining pattern def stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_start (NULL);
+                     analyze_pattern_stmt = false;
+                   }
+               }
+             else
+               analyze_pattern_stmt = false;
            }
 
          if (gimple_get_lhs (stmt) == NULL_TREE)
@@ -287,17 +367,17 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
          if (STMT_VINFO_VECTYPE (stmt_info))
            {
              /* The only case when a vectype had been already set is for stmts
-                that contain a dataref, or for "pattern-stmts" (stmts generated
-                by the vectorizer to represent/replace a certain idiom).  */
+                that contain a dataref, or for "pattern-stmts" (stmts
+                generated by the vectorizer to represent/replace a certain
+                idiom).  */
              gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
-                         || is_pattern_stmt_p (stmt_info));
+                         || is_pattern_stmt_p (stmt_info)
+                         || !gsi_end_p (pattern_def_si));
              vectype = STMT_VINFO_VECTYPE (stmt_info);
            }
          else
            {
-             gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
-                         && !is_pattern_stmt_p (stmt_info));
-
+             gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
              scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
              if (vect_print_dump_info (REPORT_DETAILS))
                {
@@ -369,6 +449,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
          if (!vectorization_factor
              || (nunits > vectorization_factor))
            vectorization_factor = nunits;
+
+         if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
         }
     }
 
@@ -479,11 +565,15 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
       /* Analyze the evolution function.  */
       access_fn = analyze_scalar_evolution (loop, def);
       if (access_fn)
-       STRIP_NOPS (access_fn);
-      if (access_fn && vect_print_dump_info (REPORT_DETAILS))
        {
-         fprintf (vect_dump, "Access function of PHI: ");
-         print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+         STRIP_NOPS (access_fn);
+         if (vect_print_dump_info (REPORT_DETAILS))
+           {
+             fprintf (vect_dump, "Access function of PHI: ");
+             print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+           }
+         STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
+           = evolution_part_in_loop_num (access_fn, loop->num);
        }
 
       if (!access_fn
@@ -493,6 +583,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
          continue;
        }
 
+      gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
+
       if (vect_print_dump_info (REPORT_DETAILS))
        fprintf (vect_dump, "Detected induction.");
       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
@@ -757,9 +849,11 @@ new_loop_vec_info (struct loop *loop)
                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
   LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
+  LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
   LOOP_VINFO_PEELING_HTAB (res) = NULL;
+  LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 
   return res;
 }
@@ -812,29 +906,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          if (stmt_info)
-            {
-              /* Check if this is a "pattern stmt" (introduced by the
-                 vectorizer during the pattern recognition pass).  */
-              bool remove_stmt_p = false;
-              gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-              if (orig_stmt)
-                {
-                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
-                  if (orig_stmt_info
-                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
-                    remove_stmt_p = true;
-                }
-
-              /* Free stmt_vec_info.  */
-              free_stmt_vec_info (stmt);
-
-              /* Remove dead "pattern stmts".  */
-              if (remove_stmt_p)
-                gsi_remove (&si, true);
-            }
+         /* Free stmt_vec_info.  */
+         free_stmt_vec_info (stmt);
           gsi_next (&si);
         }
     }
@@ -852,6 +925,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
   VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+  VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
 
   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
@@ -1146,7 +1220,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cost)
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,6 +1241,40 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 
   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  if (slp)
+    {
+      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+        vectorization factor of the loop is the unrolling factor required by
+        the SLP instances.  If that unrolling factor is 1, we say, that we
+        perform pure SLP on loop - cross iteration parallelism is not
+        exploited.  */
+      for (i = 0; i < nbbs; i++)
+       {
+         basic_block bb = bbs[i];
+         for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+           {
+             gimple stmt = gsi_stmt (si);
+             stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+             gcc_assert (stmt_info);
+             if ((STMT_VINFO_RELEVANT_P (stmt_info)
+                  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+                 && !PURE_SLP_STMT (stmt_info))
+               /* STMT needs both SLP and loop-based vectorization.  */
+               only_slp_in_loop = false;
+           }
+       }
+
+      if (only_slp_in_loop)
+       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+      else
+       vectorization_factor = least_common_multiple (vectorization_factor,
+                               LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "Updating vectorization factor to %d ",
+                           vectorization_factor);
+    }
 
   for (i = 0; i < nbbs; i++)
     {
@@ -1184,11 +1292,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
             }
 
+          /* Inner-loop loop-closed exit phi in outer-loop vectorization
+             (i.e., a phi in the tail of the outer-loop).  */
           if (! is_loop_header_bb_p (bb))
             {
-              /* inner-loop loop-closed exit phi in outer-loop vectorization
-                 (i.e. a phi in the tail of the outer-loop).
-                 FORNOW: we currently don't support the case that these phis
+              /* FORNOW: we currently don't support the case that these phis
                  are not used in the outerloop (unless it is double reduction,
                  i.e., this phi is vect_reduction_def), cause this case
                  requires to actually do something here.  */
@@ -1202,6 +1310,34 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
                              "Unsupported loop-closed phi in outer-loop.");
                   return false;
                 }
+
+              /* If PHI is used in the outer loop, we check that its operand
+                 is defined in the inner loop.  */
+              if (STMT_VINFO_RELEVANT_P (stmt_info))
+                {
+                  tree phi_op;
+                  gimple op_def_stmt;
+
+                  if (gimple_phi_num_args (phi) != 1)
+                    return false;
+
+                  phi_op = PHI_ARG_DEF (phi, 0);
+                  if (TREE_CODE (phi_op) != SSA_NAME)
+                    return false;
+
+                  op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
+                 if (!op_def_stmt
+                     || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
+                     || !vinfo_for_stmt (op_def_stmt))
+                    return false;
+
+                  if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+                        != vect_used_in_outer
+                      && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+                           != vect_used_in_outer_by_reduction)
+                    return false;
+                }
+
               continue;
             }
 
@@ -1246,18 +1382,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
          if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
            return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */
 
@@ -1277,18 +1403,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
       return false;
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -1352,7 +1466,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 
   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
-      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+      || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "epilog loop required.");
@@ -1384,7 +1498,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy;
+  bool ok, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1425,7 +1539,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
      the dependences.
      FORNOW: fail at the first data dependence that we encounter.  */
 
-  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
+  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
   if (!ok
       || max_vf < min_vf)
     {
@@ -1498,16 +1612,18 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   if (ok)
     {
       /* Decide which possible SLP instances to SLP.  */
-      vect_make_slp_decision (loop_vinfo);
+      slp = vect_make_slp_decision (loop_vinfo);
 
       /* Find stmts that need to be both vectorized and SLPed.  */
       vect_detect_hybrid_slp (loop_vinfo);
     }
+  else
+    return false;
 
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_loop_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo, slp);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -1635,6 +1751,202 @@ report_vect_op (gimple stmt, const char *msg)
 }
 
 
+/* Detect SLP reduction of the form:
+
+   #a1 = phi <a5, a0>
+   a2 = operation (a1)
+   a3 = operation (a2)
+   a4 = operation (a3)
+   a5 = operation (a4)
+
+   #a = phi <a5>
+
+   PHI is the reduction phi node (#a1 = phi <a5, a0> above)
+   FIRST_STMT is the first reduction stmt in the chain
+   (a2 = operation (a1)).
+
+   Return TRUE if a reduction chain was detected.  */
+
+static bool
+vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
+{
+  struct loop *loop = (gimple_bb (phi))->loop_father;
+  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+  enum tree_code code;
+  gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
+  stmt_vec_info use_stmt_info, current_stmt_info;
+  tree lhs;
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+  int nloop_uses, size = 0, n_out_of_loop_uses;
+  bool found = false;
+
+  if (loop != vect_loop)
+    return false;
+
+  lhs = PHI_RESULT (phi);
+  code = gimple_assign_rhs_code (first_stmt);
+  while (1)
+    {
+      nloop_uses = 0;
+      n_out_of_loop_uses = 0;
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+        {
+         gimple use_stmt = USE_STMT (use_p);
+          if (is_gimple_debug (use_stmt))
+            continue;
+
+         use_stmt = USE_STMT (use_p);
+
+          /* Check if we got back to the reduction phi.  */
+         if (use_stmt == phi)
+            {
+             loop_use_stmt = use_stmt;
+              found = true;
+              break;
+            }
+
+          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+            {
+              if (vinfo_for_stmt (use_stmt)
+                  && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+                {
+                  loop_use_stmt = use_stmt;
+                  nloop_uses++;
+                }
+            }
+           else
+             n_out_of_loop_uses++;
+
+           /* There are can be either a single use in the loop or two uses in
+              phi nodes.  */
+           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
+             return false;
+        }
+
+      if (found)
+        break;
+
+      /* We reached a statement with no loop uses.  */
+      if (nloop_uses == 0)
+       return false;
+
+      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
+      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
+        return false;
+
+      if (!is_gimple_assign (loop_use_stmt)
+         || code != gimple_assign_rhs_code (loop_use_stmt)
+         || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
+        return false;
+
+      /* Insert USE_STMT into reduction chain.  */
+      use_stmt_info = vinfo_for_stmt (loop_use_stmt);
+      if (current_stmt)
+        {
+          current_stmt_info = vinfo_for_stmt (current_stmt);
+         GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
+          GROUP_FIRST_ELEMENT (use_stmt_info)
+            = GROUP_FIRST_ELEMENT (current_stmt_info);
+        }
+      else
+       GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
+
+      lhs = gimple_assign_lhs (loop_use_stmt);
+      current_stmt = loop_use_stmt;
+      size++;
+   }
+
+  if (!found || loop_use_stmt != phi || size < 2)
+    return false;
+
+  /* Swap the operands, if needed, to make the reduction operand be the second
+     operand.  */
+  lhs = PHI_RESULT (phi);
+  next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
+  while (next_stmt)
+    {
+      if (gimple_assign_rhs2 (next_stmt) == lhs)
+       {
+         tree op = gimple_assign_rhs1 (next_stmt);
+          gimple def_stmt = NULL;
+
+          if (TREE_CODE (op) == SSA_NAME)
+            def_stmt = SSA_NAME_DEF_STMT (op);
+
+         /* Check that the other def is either defined in the loop
+            ("vect_internal_def"), or it's an induction (defined by a
+            loop-header phi-node).  */
+          if (def_stmt
+              && gimple_bb (def_stmt)
+             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+              && (is_gimple_assign (def_stmt)
+                  || is_gimple_call (def_stmt)
+                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+                           == vect_induction_def
+                  || (gimple_code (def_stmt) == GIMPLE_PHI
+                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+                                  == vect_internal_def
+                      && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
+           {
+             lhs = gimple_assign_lhs (next_stmt);
+             next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+             continue;
+           }
+
+         return false;
+       }
+      else
+       {
+          tree op = gimple_assign_rhs2 (next_stmt);
+          gimple def_stmt = NULL;
+
+          if (TREE_CODE (op) == SSA_NAME)
+            def_stmt = SSA_NAME_DEF_STMT (op);
+
+          /* Check that the other def is either defined in the loop
+            ("vect_internal_def"), or it's an induction (defined by a
+            loop-header phi-node).  */
+          if (def_stmt
+              && gimple_bb (def_stmt)
+             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+              && (is_gimple_assign (def_stmt)
+                  || is_gimple_call (def_stmt)
+                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+                              == vect_induction_def
+                  || (gimple_code (def_stmt) == GIMPLE_PHI
+                      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
+                                  == vect_internal_def
+                      && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               {
+                 fprintf (vect_dump, "swapping oprnds: ");
+                 print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
+               }
+
+             swap_tree_operands (next_stmt,
+                                 gimple_assign_rhs1_ptr (next_stmt),
+                                  gimple_assign_rhs2_ptr (next_stmt));
+             mark_symbols_for_renaming (next_stmt);
+           }
+         else
+           return false;
+        }
+
+      lhs = gimple_assign_lhs (next_stmt);
+      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+    }
+
+  /* Save the chain for further analysis in SLP detection.  */
+  first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
+  VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
+  GROUP_SIZE (vinfo_for_stmt (first)) = size;
+
+  return true;
+}
+
+
 /* Function vect_is_simple_reduction_1
 
    (1) Detect a cross-iteration def-use cycle that represents a simple
@@ -1697,6 +2009,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
 
   name = PHI_RESULT (phi);
+  /* ???  If there are no uses of the PHI result the inner loop reduction
+     won't be detected as possibly double-reduction by vectorizable_reduction
+     because that tries to walk the PHI arg from the preheader edge which
+     can be constant.  See PR60382.  */
+  if (has_zero_uses (name))
+    return NULL;
   nloop_uses = 0;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
     {
@@ -1839,15 +2157,15 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
           return NULL;
         }
 
-      op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+      op3 = gimple_assign_rhs1 (def_stmt);
       if (COMPARISON_CLASS_P (op3))
         {
           op4 = TREE_OPERAND (op3, 1);
           op3 = TREE_OPERAND (op3, 0);
         }
 
-      op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
-      op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
+      op1 = gimple_assign_rhs2 (def_stmt);
+      op2 = gimple_assign_rhs3 (def_stmt);
 
       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
         {
@@ -1862,7 +2180,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
       op1 = gimple_assign_rhs1 (def_stmt);
       op2 = gimple_assign_rhs2 (def_stmt);
 
-      if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
         {
           if (vect_print_dump_info (REPORT_DETAILS))
            report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
@@ -1945,7 +2263,10 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
   if (orig_code == MINUS_EXPR)
     {
       tree rhs = gimple_assign_rhs2 (def_stmt);
-      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+      tree var = TREE_CODE (rhs) == SSA_NAME
+                ? SSA_NAME_VAR (rhs)
+                : create_tmp_reg (TREE_TYPE (rhs), NULL);
+      tree negrhs = make_ssa_name (var, NULL);
       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
                                                         rhs, NULL);
       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
@@ -1968,7 +2289,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
     def2 = SSA_NAME_DEF_STMT (op2);
 
   if (code != COND_EXPR
-      && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
+      && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
        report_vect_op (def_stmt, "reduction: no defs for operands: ");
@@ -1981,6 +2302,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
   if (def2 && def2 == phi
       && (code == COND_EXPR
+         || !def1 || gimple_nop_p (def1)
           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
               && (is_gimple_assign (def1)
                  || is_gimple_call (def1)
@@ -1995,17 +2317,19 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
        report_vect_op (def_stmt, "detected reduction: ");
       return def_stmt;
     }
-  else if (def1 && def1 == phi
-          && (code == COND_EXPR
-               || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
-                  && (is_gimple_assign (def2)
-                      || is_gimple_call (def2)
-                      || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
-                           == vect_induction_def
-                      || (gimple_code (def2) == GIMPLE_PHI
-                          && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
-                               == vect_internal_def
-                          && !is_loop_header_bb_p (gimple_bb (def2)))))))
+
+  if (def1 && def1 == phi
+      && (code == COND_EXPR
+         || !def2 || gimple_nop_p (def2)
+          || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
+             && (is_gimple_assign (def2)
+                 || is_gimple_call (def2)
+                 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+                      == vect_induction_def
+                 || (gimple_code (def2) == GIMPLE_PHI
+                     && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+                          == vect_internal_def
+                     && !is_loop_header_bb_p (gimple_bb (def2)))))))
     {
       if (check_reduction)
         {
@@ -2027,13 +2351,20 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
       return def_stmt;
     }
-  else
+
+  /* Try to find SLP reduction chain.  */
+  if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
-       report_vect_op (def_stmt, "reduction: unknown pattern: ");
+        report_vect_op (def_stmt, "reduction: detected reduction chain: ");
 
-      return NULL;
+      return def_stmt;
     }
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    report_vect_op (def_stmt, "reduction: unknown pattern: ");
+       
+  return NULL;
 }
 
 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
@@ -2061,7 +2392,7 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 /* Calculate the cost of one scalar iteration of the loop.  */
 int
-vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -2103,7 +2434,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
           if (stmt_info
               && !STMT_VINFO_RELEVANT_P (stmt_info)
               && (!STMT_VINFO_LIVE_P (stmt_info)
-                  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+             && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
@@ -2149,6 +2481,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
       peel_iters_prologue = niters < peel_iters_prologue ?
                             niters : peel_iters_prologue;
       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+      /* If we need to peel for gaps, but no peeling is required, we have to
+        peel VF iterations.  */
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
+        *peel_iters_epilogue = vf;
     }
 
    return (peel_iters_prologue * scalar_single_iter_cost)
@@ -2246,19 +2582,50 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        {
          gimple stmt = gsi_stmt (si);
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+         if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+           {
+             stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             stmt_info = vinfo_for_stmt (stmt);
+           }
+
          /* Skip stmts that are not vectorized inside the loop.  */
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && (!STMT_VINFO_LIVE_P (stmt_info)
-                 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
            continue;
+
          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
          /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
             some of the "outside" costs are generated inside the outer-loop.  */
          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+          if (is_pattern_stmt_p (stmt_info)
+             && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
+            {
+             gimple_stmt_iterator gsi;
+             
+             for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
+                  !gsi_end_p (gsi); gsi_next (&gsi))
+                {
+                  gimple pattern_def_stmt = gsi_stmt (gsi);
+                  stmt_vec_info pattern_def_stmt_info
+                   = vinfo_for_stmt (pattern_def_stmt);
+                  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                   {
+                      vec_inside_cost
+                       += STMT_VINFO_INSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info) * factor;
+                      vec_outside_cost
+                       += STMT_VINFO_OUTSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info);
+                    }
+               }
+           }
        }
     }
 
-  scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+  scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
 
   /* Add additional cost for the peeled instructions in prologue and epilogue
      loop.
@@ -2498,6 +2865,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
     case GIMPLE_BINARY_RHS:
       reduction_op = gimple_assign_rhs2 (stmt);
       break;
+    case GIMPLE_TERNARY_RHS:
+      reduction_op = gimple_assign_rhs3 (stmt);
+      break;
     default:
       gcc_unreachable ();
     }
@@ -2814,7 +3184,7 @@ get_initial_def_for_induction (gimple iv_phi)
                                                   vec_def, vec_step);
          vec_def = make_ssa_name (vec_dest, new_stmt);
          gimple_assign_set_lhs (new_stmt, vec_def);
-
          gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
          if (!useless_type_conversion_p (resvectype, vectype))
            {
@@ -2886,6 +3256,10 @@ get_initial_def_for_induction (gimple iv_phi)
       gimple_assign_set_lhs (new_stmt, induc_def);
       si = gsi_start_bb (bb);
       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
+      set_vinfo_for_stmt (new_stmt,
+                         new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
+      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
+       = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
     }
 
   return induc_def;
@@ -3165,12 +3539,16 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
   VEC (gimple, heap) *new_phis = NULL;
+  VEC (gimple, heap) *inner_phis = NULL;
   enum vect_def_type dt = vect_unknown_def_type;
   int j, i;
   VEC (tree, heap) *scalar_results = NULL;
   unsigned int group_size = 1, k, ratio;
   VEC (tree, heap) *vec_initial_defs = NULL;
   VEC (gimple, heap) *phis;
+  bool slp_reduc = false;
+  tree new_phi_result;
+  gimple inner_phi = NULL;
 
   if (slp_node)
     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node)); 
@@ -3187,7 +3565,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     {
     case GIMPLE_SINGLE_RHS:
       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
-                                       == ternary_op);
+                 == ternary_op);
       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
       break;
     case GIMPLE_UNARY_RHS:
@@ -3197,6 +3575,9 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
       reduction_op = reduc_index ?
                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
       break;
+    case GIMPLE_TERNARY_RHS:
+      reduction_op = gimple_op (stmt, reduc_index + 1);
+      break;
     default:
       gcc_unreachable ();
     }
@@ -3224,8 +3605,8 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
 
   /* Get the loop-entry arguments.  */
   if (slp_node)
-    vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
-                       NULL, reduc_index);
+    vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
+                       NULL, slp_node, reduc_index);
   else
     {
       vec_initial_defs = VEC_alloc (tree, heap, 1);
@@ -3324,11 +3705,36 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     }
 
   /* The epilogue is created for the outer-loop, i.e., for the loop being
-     vectorized.  */
+     vectorized.  Create exit phis for the outer loop.  */
   if (double_reduc)
     {
       loop = outer_loop;
       exit_bb = single_exit (loop)->dest;
+      inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
+      FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
+       {
+         gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                             exit_bb);
+         SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                          PHI_RESULT (phi));
+         set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                           loop_vinfo, NULL));
+         VEC_quick_push (gimple, inner_phis, phi);
+         VEC_replace (gimple, new_phis, i, outer_phi);
+         prev_phi_info = vinfo_for_stmt (outer_phi);
+          while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
+            {
+             phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+             outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                          exit_bb);
+             SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                              PHI_RESULT (phi));
+             set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                       loop_vinfo, NULL));
+             STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
+             prev_phi_info = vinfo_for_stmt (outer_phi);
+           }
+       }
     }
 
   exit_gsi = gsi_after_labels (exit_bb);
@@ -3377,10 +3783,53 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   if (nested_in_vect_loop && !double_reduc)
     goto vect_finalize_reduction;
 
+  /* SLP reduction without reduction chain, e.g.,
+     # a1 = phi <a2, a0>
+     # b1 = phi <b2, b0>
+     a2 = operation (a1)
+     b2 = operation (b1)  */
+  slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+
+  /* In case of reduction chain, e.g.,
+     # a1 = phi <a3, a0>
+     a2 = operation (a1)
+     a3 = operation (a2),
+
+     we may end up with more than one vector result.  Here we reduce them to
+     one vector.  */
+  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+    {
+      tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
+      tree tmp;
+      gimple new_vec_stmt = NULL;
+
+      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+      for (k = 1; k < VEC_length (gimple, new_phis); k++)
+        {
+          gimple next_phi = VEC_index (gimple, new_phis, k);
+          tree second_vect = PHI_RESULT (next_phi);
+
+          tmp = build2 (code, vectype,  first_vect, second_vect);
+          new_vec_stmt = gimple_build_assign (vec_dest, tmp);
+          first_vect = make_ssa_name (vec_dest, new_vec_stmt);
+          gimple_assign_set_lhs (new_vec_stmt, first_vect);
+          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
+        }
+
+      new_phi_result = first_vect;
+      if (new_vec_stmt)
+        {
+          VEC_truncate (gimple, new_phis, 0);
+          VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
+        }
+    }
+  else
+    new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
   /* 2.3 Create the reduction code, using one of the three schemes described
          above. In SLP we simply need to extract all the elements from the 
          vector (without reducing them), so we use scalar shifts.  */
-  if (reduc_code != ERROR_MARK && !slp_node)
+  if (reduc_code != ERROR_MARK && !slp_reduc)
     {
       tree tmp;
 
@@ -3391,8 +3840,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
         fprintf (vect_dump, "Reduce using direct vector reduction.");
 
       vec_dest = vect_create_destination_var (scalar_dest, vectype);
-      new_phi = VEC_index (gimple, new_phis, 0);
-      tmp = build1 (reduc_code, vectype,  PHI_RESULT (new_phi));
+      tmp = build1 (reduc_code, vectype, new_phi_result);
       epilog_stmt = gimple_build_assign (vec_dest, tmp);
       new_temp = make_ssa_name (vec_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
@@ -3429,7 +3877,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
             have_whole_vector_shift = false;
         }
 
-      if (have_whole_vector_shift && !slp_node)
+      if (have_whole_vector_shift && !slp_reduc)
         {
           /*** Case 2: Create:
              for (offset = VS/2; offset >= element_size; offset/=2)
@@ -3442,8 +3890,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
             fprintf (vect_dump, "Reduce using vector shifts");
 
           vec_dest = vect_create_destination_var (scalar_dest, vectype);
-          new_phi = VEC_index (gimple, new_phis, 0);
-          new_temp = PHI_RESULT (new_phi);
+          new_temp = new_phi_result;
           for (bit_offset = vec_size_in_bits/2;
                bit_offset >= element_bitsize;
                bit_offset /= 2)
@@ -3485,7 +3932,10 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
             {
-              vec_temp = PHI_RESULT (new_phi);
+              if (gimple_code (new_phi) == GIMPLE_PHI)
+                vec_temp = PHI_RESULT (new_phi);
+              else
+                vec_temp = gimple_assign_lhs (new_phi);
               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
                             bitsize_zero_node);
               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
@@ -3495,7 +3945,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
 
               /* In SLP we don't need to apply reduction operation, so we just
                  collect s' values in SCALAR_RESULTS.  */
-              if (slp_node)
+              if (slp_reduc)
                 VEC_safe_push (tree, heap, scalar_results, new_temp);
 
               for (bit_offset = element_bitsize;
@@ -3511,7 +3961,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
                   gimple_assign_set_lhs (epilog_stmt, new_name);
                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
 
-                  if (slp_node)
+                  if (slp_reduc)
                     {
                       /* In SLP we don't need to apply reduction operation, so 
                          we just collect s' values in SCALAR_RESULTS.  */
@@ -3533,7 +3983,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
              unrolling.  If the size of SCALAR_RESULTS is greater than
              GROUP_SIZE, we reduce them combining elements modulo 
              GROUP_SIZE.  */
-          if (slp_node)
+          if (slp_reduc)
             {
               tree res, first_res, new_res;
               gimple new_stmt;
@@ -3596,7 +4046,7 @@ vect_finalize_reduction:
 
   if (adjustment_def)
     {
-      gcc_assert (!slp_node);
+      gcc_assert (!slp_reduc);
       if (nested_in_vect_loop)
        {
           new_phi = VEC_index (gimple, new_phis, 0);
@@ -3661,6 +4111,19 @@ vect_finalize_reduction:
           use <s_out4>  
           use <s_out4> */
 
+
+  /* In SLP reduction chain we reduce vector results into one vector if
+     necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
+     the last stmt in the reduction chain, since we are looking for the loop
+     exit phi node.  */
+  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+    {
+      scalar_dest = gimple_assign_lhs (VEC_index (gimple,
+                                       SLP_TREE_SCALAR_STMTS (slp_node),
+                                       group_size - 1));
+      group_size = 1;
+    }
+
   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in 
      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
      need to match SCALAR_RESULTS with corresponding statements.  The first
@@ -3681,9 +4144,11 @@ vect_finalize_reduction:
         {
           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
+         if (double_reduc)
+           inner_phi = VEC_index (gimple, inner_phis, k / ratio);
         }
 
-      if (slp_node)
+      if (slp_reduc)
         {
           gimple current_stmt = VEC_index (gimple,
                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
@@ -3764,7 +4229,7 @@ vect_finalize_reduction:
                      vs1 was created previously in this function by a call to
                        vect_get_vec_def_for_operand and is stored in
                        vec_initial_def;
-                     vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
                      vs0 is created here.  */
 
                   /* Create vector phi node.  */
@@ -3785,7 +4250,7 @@ vect_finalize_reduction:
                   add_phi_arg (vect_phi, vect_phi_init,
                                loop_preheader_edge (outer_loop),
                                UNKNOWN_LOCATION);
-                  add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+                  add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
                   if (vect_print_dump_info (REPORT_DETAILS))
                     {
@@ -3950,7 +4415,13 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
   VEC (gimple, heap) *phis = NULL;
   int vec_num;
-  tree def0, def1, tem;
+  tree def0, def1, tem, op0, op1 = NULL_TREE;
+
+  /* In case of reduction chain we switch to the first stmt in the chain, but
+     we don't update STMT_INFO, since only the last stmt is marked as reduction
+     and has reduction properties.  */
+  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+    stmt = GROUP_FIRST_ELEMENT (stmt_info);
 
   if (nested_in_vect_loop_p (loop, stmt))
     {
@@ -3960,8 +4431,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   /* 1. Is vectorizable reduction?  */
-  /* Not supportable if the reduction variable is used in the loop.  */
-  if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
+  /* Not supportable if the reduction variable is used in the loop, unless
+     it's a reduction chain.  */
+  if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
+      && !GROUP_FIRST_ELEMENT (stmt_info))
     return false;
 
   /* Reductions that are not used even in an enclosing outer-loop,
@@ -3986,7 +4459,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   if (orig_stmt)
     {
       orig_stmt_info = vinfo_for_stmt (orig_stmt);
-      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
     }
@@ -4022,6 +4494,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       ops[1] = gimple_assign_rhs2 (stmt);
       break;
 
+    case GIMPLE_TERNARY_RHS:
+      code = gimple_assign_rhs_code (stmt);
+      op_type = TREE_CODE_LENGTH (code);
+      gcc_assert (op_type == ternary_op);
+      ops[0] = gimple_assign_rhs1 (stmt);
+      ops[1] = gimple_assign_rhs2 (stmt);
+      ops[2] = gimple_assign_rhs3 (stmt);
+      break;
+
     case GIMPLE_UNARY_RHS:
       return false;
 
@@ -4029,27 +4510,36 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       gcc_unreachable ();
     }
 
+  if (code == COND_EXPR && slp_node)
+    return false;
+
   scalar_dest = gimple_assign_lhs (stmt);
   scalar_type = TREE_TYPE (scalar_dest);
   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
       && !SCALAR_FLOAT_TYPE_P (scalar_type))
     return false;
 
+  /* Do not try to vectorize bit-precision reductions.  */
+  if ((TYPE_PRECISION (scalar_type)
+       != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
+    return false;
+
   /* All uses but the last are expected to be defined in the loop.
      The last use is the reduction variable.  In case of nested cycle this
      assumption is not true: we use reduc_index to record the index of the
      reduction variable.  */
-  for (i = 0; i < op_type-1; i++)
+  for (i = 0; i < op_type - 1; i++)
     {
       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
       if (i == 0 && code == COND_EXPR)
         continue;
 
-      is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
+      is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
                                            &def_stmt, &def, &dt, &tem);
       if (!vectype_in)
        vectype_in = tem;
       gcc_assert (is_simple_use);
+
       if (dt != vect_internal_def
          && dt != vect_external_def
          && dt != vect_constant_def
@@ -4065,16 +4555,23 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         }
     }
 
-  is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
-                                       &def, &dt, &tem);
+  is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
+                                       &def_stmt, &def, &dt, &tem);
   if (!vectype_in)
     vectype_in = tem;
   gcc_assert (is_simple_use);
-  gcc_assert (dt == vect_reduction_def
-              || dt == vect_nested_cycle
-              || ((dt == vect_internal_def || dt == vect_external_def
-                   || dt == vect_constant_def || dt == vect_induction_def)
-                   && nested_cycle && found_nested_cycle_def));
+  if (!(dt == vect_reduction_def
+       || dt == vect_nested_cycle
+       || ((dt == vect_internal_def || dt == vect_external_def
+            || dt == vect_constant_def || dt == vect_induction_def)
+           && nested_cycle && found_nested_cycle_def)))
+    {
+      /* For pattern recognized stmts, orig_stmt might be a reduction,
+        but some helper statements for the pattern might not, or
+        might be COND_EXPRs with reduction uses in the condition.  */
+      gcc_assert (orig_stmt);
+      return false;
+    }
   if (!found_nested_cycle_def)
     reduc_def_stmt = def_stmt;
 
@@ -4085,13 +4582,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
                                                        !nested_cycle,
                                                        &dummy));
   else
-    gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-                                                  !nested_cycle, &dummy));
+    {
+      gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
+                                             !nested_cycle, &dummy);
+      /* We changed STMT to be the first stmt in reduction chain, hence we
+         check that in this case the first element in the chain is STMT.  */
+      gcc_assert (stmt == tmp
+                  || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
+    }
 
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
 
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
@@ -4103,7 +4606,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (code == COND_EXPR)
     {
-      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
+      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "unsupported condition in reduction");
@@ -4258,11 +4761,30 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       return false;
     }
 
+  /* In case of widenning multiplication by a constant, we update the type
+     of the constant to be the type of the other operand.  We check that the
+     constant fits the type in the pattern recognition pass.  */
+  if (code == DOT_PROD_EXPR
+      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
+    {
+      if (TREE_CODE (ops[0]) == INTEGER_CST)
+        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
+      else if (TREE_CODE (ops[1]) == INTEGER_CST)
+        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
+      else
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "invalid types in dot-prod");
+
+          return false;
+        }
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
-      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
         return false;
+      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
     }
 
@@ -4356,7 +4878,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
           gcc_assert (!slp_node);
           vectorizable_condition (stmt, gsi, vec_stmt, 
                                   PHI_RESULT (VEC_index (gimple, phis, 0)), 
-                                  reduc_index);
+                                  reduc_index, NULL);
           /* Multiple types are not supported for condition.  */
           break;
         }
@@ -4364,8 +4886,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       /* Handle uses.  */
       if (j == 0)
         {
-          tree op0, op1 = NULL_TREE;
-
           op0 = ops[!reduc_index];
           if (op_type == ternary_op)
             {
@@ -4376,8 +4896,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
             }
 
           if (slp_node)
-            vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
-                               -1);
+            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+                               slp_node, -1);
           else
             {
               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
@@ -4395,11 +4915,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         {
           if (!slp_node)
             {
-              enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
-              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
+              enum vect_def_type dt;
+              gimple dummy_stmt;
+              tree dummy;
+
+              vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
+                                  &dummy_stmt, &dummy, &dt);
+              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
+                                                              loop_vec_def0);
               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
               if (op_type == ternary_op)
                 {
+                  vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
+                                      &dummy, &dt);
                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
                                                                 loop_vec_def1);
                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
@@ -4448,6 +4976,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
           new_temp = make_ssa_name (vec_dest, new_stmt);
           gimple_assign_set_lhs (new_stmt, new_temp);
           vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
           if (slp_node)
             {
               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
@@ -4536,12 +5065,46 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
   tree vec_def;
 
   gcc_assert (ncopies >= 1);
-  /* FORNOW. This restriction should be relaxed.  */
-  if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
+  /* FORNOW. These restrictions should be relaxed.  */
+  if (nested_in_vect_loop_p (loop, phi))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "multiple types in nested loop.");
-      return false;
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+      gimple exit_phi;
+      edge latch_e;
+      tree loop_arg;
+
+      if (ncopies > 1)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "multiple types in nested loop.");
+         return false;
+       }
+
+      exit_phi = NULL;
+      latch_e = loop_latch_edge (loop->inner);
+      loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
+       {
+         if (!flow_bb_inside_loop_p (loop->inner,
+                                     gimple_bb (USE_STMT (use_p))))
+           {
+             exit_phi = USE_STMT (use_p);
+             break;
+           }
+       }
+      if (exit_phi)
+       {
+         stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
+         if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
+               && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "inner-loop induction only used outside "
+                        "of the outer vectorized loop.");
+             return false;
+           }
+       }
     }
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
@@ -4629,7 +5192,8 @@ vectorizable_live_operation (gimple stmt,
       else
        op = gimple_op (stmt, i + 1);
       if (op
-          && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
+          && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
+                                 &dt))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "use not simple.");
@@ -4704,6 +5268,10 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   tree cond_expr = NULL_TREE;
   gimple_seq cond_expr_stmt_list = NULL;
   bool do_peeling_for_loop_bound;
+  gimple stmt, pattern_stmt;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+  bool transform_pattern_stmt = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vec_transform_loop ===");
@@ -4717,7 +5285,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   do_peeling_for_loop_bound
     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
+          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
+       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
 
   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
@@ -4790,11 +5359,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
            }
        }
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
+      pattern_stmt = NULL;
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
        {
-         gimple stmt = gsi_stmt (si);
          bool is_store;
 
+          if (transform_pattern_stmt)
+           stmt = pattern_stmt;
+          else
+            stmt = gsi_stmt (si);
+
          if (vect_print_dump_info (REPORT_DETAILS))
            {
              fprintf (vect_dump, "------>vectorizing statement: ");
@@ -4817,14 +5391,79 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && !STMT_VINFO_LIVE_P (stmt_info))
-           {
-             gsi_next (&si);
-             continue;
+            {
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+                {
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (stmt);
+                }
+              else
+               {
+                 gsi_next (&si);
+                 continue;
+                }
            }
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            transform_pattern_stmt = true;
+
+         /* If pattern statement has def stmts, vectorize them too.  */
+         if (is_pattern_stmt_p (stmt_info))
+           {
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
+
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump, "==> vectorizing pattern def"
+                                             " stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_start (NULL);
+                     transform_pattern_stmt = false;
+                   }
+               }
+             else
+               transform_pattern_stmt = false;
+            }
 
          gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
-         nunits =
-           (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+         nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+                                               STMT_VINFO_VECTYPE (stmt_info));
          if (!STMT_SLP_TYPE (stmt_info)
              && nunits != (unsigned int) vectorization_factor
               && vect_print_dump_info (REPORT_DETAILS))
@@ -4849,7 +5488,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
              /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
              if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
                {
-                 gsi_next (&si);
+                 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_seq = NULL;
+                     gsi_next (&si);
+                   }
                  continue;
                }
            }
@@ -4867,19 +5510,24 @@ vect_transform_loop (loop_vec_info loop_vinfo)
                  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
                     interleaving chain was completed - free all the stores in
                     the chain.  */
-                 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
-                 gsi_remove (&si, true);
-                 continue;
+                 gsi_next (&si);
+                 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
+                 continue;
                }
              else
                {
                  /* Free the attached stmt_vec_info and remove the stmt.  */
-                 free_stmt_vec_info (stmt);
+                 free_stmt_vec_info (gsi_stmt (si));
                  gsi_remove (&si, true);
                  continue;
                }
            }
-         gsi_next (&si);
+
+         if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
        }                       /* stmts in BB */
     }                          /* BBs in loop */