OSDN Git Service

2014-05-13 Richard Biener <rguenther@suse.de>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-loop.c
index 7691267..964e5dd 100644 (file)
@@ -1,5 +1,5 @@
 /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
    Ira Rosen <irar@il.ibm.com>
@@ -181,6 +181,10 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
   stmt_vec_info stmt_info;
   int i;
   HOST_WIDE_INT dummy;
+  gimple stmt, pattern_stmt = NULL;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+  bool analyze_pattern_stmt = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -241,11 +245,16 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
            }
        }
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
         {
-         tree vf_vectype;
-         gimple stmt = gsi_stmt (si), pattern_stmt;
-         stmt_info = vinfo_for_stmt (stmt);
+          tree vf_vectype;
+
+          if (analyze_pattern_stmt)
+           stmt = pattern_stmt;
+          else
+            stmt = gsi_stmt (si);
+
+          stmt_info = vinfo_for_stmt (stmt);
 
          if (vect_print_dump_info (REPORT_DETAILS))
            {
@@ -276,9 +285,64 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
                {
                  if (vect_print_dump_info (REPORT_DETAILS))
                    fprintf (vect_dump, "skip.");
+                  gsi_next (&si);
                  continue;
                 }
            }
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            analyze_pattern_stmt = true;
+
+         /* If a pattern statement has def stmts, analyze them too.  */
+         if (is_pattern_stmt_p (stmt_info))
+           {
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
+
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump,
+                                  "==> examining pattern def stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_start (NULL);
+                     analyze_pattern_stmt = false;
+                   }
+               }
+             else
+               analyze_pattern_stmt = false;
+           }
 
          if (gimple_get_lhs (stmt) == NULL_TREE)
            {
@@ -303,10 +367,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
          if (STMT_VINFO_VECTYPE (stmt_info))
            {
              /* The only case when a vectype had been already set is for stmts
-                that contain a dataref, or for "pattern-stmts" (stmts generated
-                by the vectorizer to represent/replace a certain idiom).  */
+                that contain a dataref, or for "pattern-stmts" (stmts
+                generated by the vectorizer to represent/replace a certain
+                idiom).  */
              gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
-                         || is_pattern_stmt_p (stmt_info));
+                         || is_pattern_stmt_p (stmt_info)
+                         || !gsi_end_p (pattern_def_si));
              vectype = STMT_VINFO_VECTYPE (stmt_info);
            }
          else
@@ -383,6 +449,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
          if (!vectorization_factor
              || (nunits > vectorization_factor))
            vectorization_factor = nunits;
+
+         if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
         }
     }
 
@@ -493,11 +565,15 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
       /* Analyze the evolution function.  */
       access_fn = analyze_scalar_evolution (loop, def);
       if (access_fn)
-       STRIP_NOPS (access_fn);
-      if (access_fn && vect_print_dump_info (REPORT_DETAILS))
        {
-         fprintf (vect_dump, "Access function of PHI: ");
-         print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+         STRIP_NOPS (access_fn);
+         if (vect_print_dump_info (REPORT_DETAILS))
+           {
+             fprintf (vect_dump, "Access function of PHI: ");
+             print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+           }
+         STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
+           = evolution_part_in_loop_num (access_fn, loop->num);
        }
 
       if (!access_fn
@@ -507,6 +583,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
          continue;
        }
 
+      gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
+
       if (vect_print_dump_info (REPORT_DETAILS))
        fprintf (vect_dump, "Detected induction.");
       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
@@ -828,21 +906,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          if (stmt_info)
-            {
-              /* Check if this statement has a related "pattern stmt"
-                 (introduced by the vectorizer during the pattern recognition
-                 pass).  Free pattern's stmt_vec_info.  */
-              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
-                  && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)))
-                free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
-
-              /* Free stmt_vec_info.  */
-              free_stmt_vec_info (stmt);
-            }
-
+         /* Free stmt_vec_info.  */
+         free_stmt_vec_info (stmt);
           gsi_next (&si);
         }
     }
@@ -1261,7 +1326,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
                     return false;
 
                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
-                  if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
+                 if (!op_def_stmt
+                     || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
+                     || !vinfo_for_stmt (op_def_stmt))
                     return false;
 
                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
@@ -1399,7 +1466,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 
   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
-      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+      || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "epilog loop required.");
@@ -1431,7 +1498,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy, slp = false;
+  bool ok, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1472,7 +1539,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
      the dependences.
      FORNOW: fail at the first data dependence that we encounter.  */
 
-  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
+  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
   if (!ok
       || max_vf < min_vf)
     {
@@ -1711,7 +1778,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
   tree lhs;
   imm_use_iterator imm_iter;
   use_operand_p use_p;
-  int nloop_uses, size = 0;
+  int nloop_uses, size = 0, n_out_of_loop_uses;
   bool found = false;
 
   if (loop != vect_loop)
@@ -1722,6 +1789,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
   while (1)
     {
       nloop_uses = 0;
+      n_out_of_loop_uses = 0;
       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
         {
          gimple use_stmt = USE_STMT (use_p);
@@ -1738,16 +1806,22 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
               break;
             }
 
-          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
-              && vinfo_for_stmt (use_stmt)
-             && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
-           {
-             loop_use_stmt = use_stmt;
-             nloop_uses++;
-           }
+          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+            {
+              if (vinfo_for_stmt (use_stmt)
+                  && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+                {
+                  loop_use_stmt = use_stmt;
+                  nloop_uses++;
+                }
+            }
+           else
+             n_out_of_loop_uses++;
 
-          if (nloop_uses > 1)
-            return false;
+           /* There are can be either a single use in the loop or two uses in
+              phi nodes.  */
+           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
+             return false;
         }
 
       if (found)
@@ -1804,6 +1878,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
             ("vect_internal_def"), or it's an induction (defined by a
             loop-header phi-node).  */
           if (def_stmt
+              && gimple_bb (def_stmt)
              && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
               && (is_gimple_assign (def_stmt)
                   || is_gimple_call (def_stmt)
@@ -1833,6 +1908,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
             ("vect_internal_def"), or it's an induction (defined by a
             loop-header phi-node).  */
           if (def_stmt
+              && gimple_bb (def_stmt)
              && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
               && (is_gimple_assign (def_stmt)
                   || is_gimple_call (def_stmt)
@@ -1933,6 +2009,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
 
   name = PHI_RESULT (phi);
+  /* ???  If there are no uses of the PHI result the inner loop reduction
+     won't be detected as possibly double-reduction by vectorizable_reduction
+     because that tries to walk the PHI arg from the preheader edge which
+     can be constant.  See PR60382.  */
+  if (has_zero_uses (name))
+    return NULL;
   nloop_uses = 0;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
     {
@@ -2075,15 +2157,15 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
           return NULL;
         }
 
-      op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+      op3 = gimple_assign_rhs1 (def_stmt);
       if (COMPARISON_CLASS_P (op3))
         {
           op4 = TREE_OPERAND (op3, 1);
           op3 = TREE_OPERAND (op3, 0);
         }
 
-      op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
-      op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
+      op1 = gimple_assign_rhs2 (def_stmt);
+      op2 = gimple_assign_rhs3 (def_stmt);
 
       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
         {
@@ -2098,7 +2180,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
       op1 = gimple_assign_rhs1 (def_stmt);
       op2 = gimple_assign_rhs2 (def_stmt);
 
-      if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
         {
           if (vect_print_dump_info (REPORT_DETAILS))
            report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
@@ -2181,7 +2263,10 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
   if (orig_code == MINUS_EXPR)
     {
       tree rhs = gimple_assign_rhs2 (def_stmt);
-      tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
+      tree var = TREE_CODE (rhs) == SSA_NAME
+                ? SSA_NAME_VAR (rhs)
+                : create_tmp_reg (TREE_TYPE (rhs), NULL);
+      tree negrhs = make_ssa_name (var, NULL);
       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
                                                         rhs, NULL);
       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
@@ -2204,7 +2289,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
     def2 = SSA_NAME_DEF_STMT (op2);
 
   if (code != COND_EXPR
-      && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
+      && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
        report_vect_op (def_stmt, "reduction: no defs for operands: ");
@@ -2217,6 +2302,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
   if (def2 && def2 == phi
       && (code == COND_EXPR
+         || !def1 || gimple_nop_p (def1)
           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
               && (is_gimple_assign (def1)
                  || is_gimple_call (def1)
@@ -2234,6 +2320,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 
   if (def1 && def1 == phi
       && (code == COND_EXPR
+         || !def2 || gimple_nop_p (def2)
           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
              && (is_gimple_assign (def2)
                  || is_gimple_call (def2)
@@ -2305,7 +2392,7 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
 
 /* Calculate the cost of one scalar iteration of the loop.  */
 int
-vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -2347,7 +2434,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
           if (stmt_info
               && !STMT_VINFO_RELEVANT_P (stmt_info)
               && (!STMT_VINFO_LIVE_P (stmt_info)
-                  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+             && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
@@ -2494,19 +2582,50 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        {
          gimple stmt = gsi_stmt (si);
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+         if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+           {
+             stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             stmt_info = vinfo_for_stmt (stmt);
+           }
+
          /* Skip stmts that are not vectorized inside the loop.  */
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && (!STMT_VINFO_LIVE_P (stmt_info)
-                 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
            continue;
+
          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
          /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
             some of the "outside" costs are generated inside the outer-loop.  */
          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+          if (is_pattern_stmt_p (stmt_info)
+             && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
+            {
+             gimple_stmt_iterator gsi;
+             
+             for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
+                  !gsi_end_p (gsi); gsi_next (&gsi))
+                {
+                  gimple pattern_def_stmt = gsi_stmt (gsi);
+                  stmt_vec_info pattern_def_stmt_info
+                   = vinfo_for_stmt (pattern_def_stmt);
+                  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                   {
+                      vec_inside_cost
+                       += STMT_VINFO_INSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info) * factor;
+                      vec_outside_cost
+                       += STMT_VINFO_OUTSIDE_OF_LOOP_COST
+                          (pattern_def_stmt_info);
+                    }
+               }
+           }
        }
     }
 
-  scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+  scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
 
   /* Add additional cost for the peeled instructions in prologue and epilogue
      loop.
@@ -3420,6 +3539,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
   VEC (gimple, heap) *new_phis = NULL;
+  VEC (gimple, heap) *inner_phis = NULL;
   enum vect_def_type dt = vect_unknown_def_type;
   int j, i;
   VEC (tree, heap) *scalar_results = NULL;
@@ -3428,6 +3548,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
   VEC (gimple, heap) *phis;
   bool slp_reduc = false;
   tree new_phi_result;
+  gimple inner_phi = NULL;
 
   if (slp_node)
     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node)); 
@@ -3484,8 +3605,8 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
 
   /* Get the loop-entry arguments.  */
   if (slp_node)
-    vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
-                       NULL, reduc_index);
+    vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
+                       NULL, slp_node, reduc_index);
   else
     {
       vec_initial_defs = VEC_alloc (tree, heap, 1);
@@ -3584,11 +3705,36 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     }
 
   /* The epilogue is created for the outer-loop, i.e., for the loop being
-     vectorized.  */
+     vectorized.  Create exit phis for the outer loop.  */
   if (double_reduc)
     {
       loop = outer_loop;
       exit_bb = single_exit (loop)->dest;
+      inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
+      FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
+       {
+         gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                             exit_bb);
+         SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                          PHI_RESULT (phi));
+         set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                           loop_vinfo, NULL));
+         VEC_quick_push (gimple, inner_phis, phi);
+         VEC_replace (gimple, new_phis, i, outer_phi);
+         prev_phi_info = vinfo_for_stmt (outer_phi);
+          while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
+            {
+             phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
+             outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                          exit_bb);
+             SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+                              PHI_RESULT (phi));
+             set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
+                                                       loop_vinfo, NULL));
+             STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
+             prev_phi_info = vinfo_for_stmt (outer_phi);
+           }
+       }
     }
 
   exit_gsi = gsi_after_labels (exit_bb);
@@ -3655,13 +3801,13 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     {
       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
       tree tmp;
+      gimple new_vec_stmt = NULL;
 
       vec_dest = vect_create_destination_var (scalar_dest, vectype);
       for (k = 1; k < VEC_length (gimple, new_phis); k++)
         {
           gimple next_phi = VEC_index (gimple, new_phis, k);
           tree second_vect = PHI_RESULT (next_phi);
-          gimple new_vec_stmt;
 
           tmp = build2 (code, vectype,  first_vect, second_vect);
           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
@@ -3671,6 +3817,11 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
         }
 
       new_phi_result = first_vect;
+      if (new_vec_stmt)
+        {
+          VEC_truncate (gimple, new_phis, 0);
+          VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
+        }
     }
   else
     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
@@ -3781,7 +3932,10 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
             {
-              vec_temp = PHI_RESULT (new_phi);
+              if (gimple_code (new_phi) == GIMPLE_PHI)
+                vec_temp = PHI_RESULT (new_phi);
+              else
+                vec_temp = gimple_assign_lhs (new_phi);
               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
                             bitsize_zero_node);
               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
@@ -3990,6 +4144,8 @@ vect_finalize_reduction:
         {
           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
+         if (double_reduc)
+           inner_phi = VEC_index (gimple, inner_phis, k / ratio);
         }
 
       if (slp_reduc)
@@ -4073,7 +4229,7 @@ vect_finalize_reduction:
                      vs1 was created previously in this function by a call to
                        vect_get_vec_def_for_operand and is stored in
                        vec_initial_def;
-                     vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
                      vs0 is created here.  */
 
                   /* Create vector phi node.  */
@@ -4094,7 +4250,7 @@ vect_finalize_reduction:
                   add_phi_arg (vect_phi, vect_phi_init,
                                loop_preheader_edge (outer_loop),
                                UNKNOWN_LOCATION);
-                  add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+                  add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
                   if (vect_print_dump_info (REPORT_DETAILS))
                     {
@@ -4259,7 +4415,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
   VEC (gimple, heap) *phis = NULL;
   int vec_num;
-  tree def0, def1, tem;
+  tree def0, def1, tem, op0, op1 = NULL_TREE;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -4303,7 +4459,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   if (orig_stmt)
     {
       orig_stmt_info = vinfo_for_stmt (orig_stmt);
-      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
     }
@@ -4355,23 +4510,31 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       gcc_unreachable ();
     }
 
+  if (code == COND_EXPR && slp_node)
+    return false;
+
   scalar_dest = gimple_assign_lhs (stmt);
   scalar_type = TREE_TYPE (scalar_dest);
   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
       && !SCALAR_FLOAT_TYPE_P (scalar_type))
     return false;
 
+  /* Do not try to vectorize bit-precision reductions.  */
+  if ((TYPE_PRECISION (scalar_type)
+       != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
+    return false;
+
   /* All uses but the last are expected to be defined in the loop.
      The last use is the reduction variable.  In case of nested cycle this
      assumption is not true: we use reduc_index to record the index of the
      reduction variable.  */
-  for (i = 0; i < op_type-1; i++)
+  for (i = 0; i < op_type - 1; i++)
     {
       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
       if (i == 0 && code == COND_EXPR)
         continue;
 
-      is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
+      is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
                                            &def_stmt, &def, &dt, &tem);
       if (!vectype_in)
        vectype_in = tem;
@@ -4392,16 +4555,23 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         }
     }
 
-  is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
-                                       &def, &dt, &tem);
+  is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
+                                       &def_stmt, &def, &dt, &tem);
   if (!vectype_in)
     vectype_in = tem;
   gcc_assert (is_simple_use);
-  gcc_assert (dt == vect_reduction_def
-              || dt == vect_nested_cycle
-              || ((dt == vect_internal_def || dt == vect_external_def
-                   || dt == vect_constant_def || dt == vect_induction_def)
-                   && nested_cycle && found_nested_cycle_def));
+  if (!(dt == vect_reduction_def
+       || dt == vect_nested_cycle
+       || ((dt == vect_internal_def || dt == vect_external_def
+            || dt == vect_constant_def || dt == vect_induction_def)
+           && nested_cycle && found_nested_cycle_def)))
+    {
+      /* For pattern recognized stmts, orig_stmt might be a reduction,
+        but some helper statements for the pattern might not, or
+        might be COND_EXPRs with reduction uses in the condition.  */
+      gcc_assert (orig_stmt);
+      return false;
+    }
   if (!found_nested_cycle_def)
     reduc_def_stmt = def_stmt;
 
@@ -4436,7 +4606,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (code == COND_EXPR)
     {
-      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
+      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "unsupported condition in reduction");
@@ -4708,7 +4878,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
           gcc_assert (!slp_node);
           vectorizable_condition (stmt, gsi, vec_stmt, 
                                   PHI_RESULT (VEC_index (gimple, phis, 0)), 
-                                  reduc_index);
+                                  reduc_index, NULL);
           /* Multiple types are not supported for condition.  */
           break;
         }
@@ -4716,8 +4886,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       /* Handle uses.  */
       if (j == 0)
         {
-          tree op0, op1 = NULL_TREE;
-
           op0 = ops[!reduc_index];
           if (op_type == ternary_op)
             {
@@ -4728,8 +4896,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
             }
 
           if (slp_node)
-            vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
-                               -1);
+            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+                               slp_node, -1);
           else
             {
               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
@@ -4747,11 +4915,19 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         {
           if (!slp_node)
             {
-              enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
-              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
+              enum vect_def_type dt;
+              gimple dummy_stmt;
+              tree dummy;
+
+              vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
+                                  &dummy_stmt, &dummy, &dt);
+              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
+                                                              loop_vec_def0);
               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
               if (op_type == ternary_op)
                 {
+                  vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
+                                      &dummy, &dt);
                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
                                                                 loop_vec_def1);
                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
@@ -4889,12 +5065,46 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
   tree vec_def;
 
   gcc_assert (ncopies >= 1);
-  /* FORNOW. This restriction should be relaxed.  */
-  if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
+  /* FORNOW. These restrictions should be relaxed.  */
+  if (nested_in_vect_loop_p (loop, phi))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "multiple types in nested loop.");
-      return false;
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+      gimple exit_phi;
+      edge latch_e;
+      tree loop_arg;
+
+      if (ncopies > 1)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "multiple types in nested loop.");
+         return false;
+       }
+
+      exit_phi = NULL;
+      latch_e = loop_latch_edge (loop->inner);
+      loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
+       {
+         if (!flow_bb_inside_loop_p (loop->inner,
+                                     gimple_bb (USE_STMT (use_p))))
+           {
+             exit_phi = USE_STMT (use_p);
+             break;
+           }
+       }
+      if (exit_phi)
+       {
+         stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
+         if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
+               && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "inner-loop induction only used outside "
+                        "of the outer vectorized loop.");
+             return false;
+           }
+       }
     }
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
@@ -4982,7 +5192,8 @@ vectorizable_live_operation (gimple stmt,
       else
        op = gimple_op (stmt, i + 1);
       if (op
-          && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
+          && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
+                                 &dt))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "use not simple.");
@@ -5057,6 +5268,10 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   tree cond_expr = NULL_TREE;
   gimple_seq cond_expr_stmt_list = NULL;
   bool do_peeling_for_loop_bound;
+  gimple stmt, pattern_stmt;
+  gimple_seq pattern_def_seq = NULL;
+  gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
+  bool transform_pattern_stmt = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vec_transform_loop ===");
@@ -5144,11 +5359,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
            }
        }
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
+      pattern_stmt = NULL;
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
        {
-         gimple stmt = gsi_stmt (si), pattern_stmt;
          bool is_store;
 
+          if (transform_pattern_stmt)
+           stmt = pattern_stmt;
+          else
+            stmt = gsi_stmt (si);
+
          if (vect_print_dump_info (REPORT_DETAILS))
            {
              fprintf (vect_dump, "------>vectorizing statement: ");
@@ -5186,6 +5406,60 @@ vect_transform_loop (loop_vec_info loop_vinfo)
                  continue;
                 }
            }
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            transform_pattern_stmt = true;
+
+         /* If pattern statement has def stmts, vectorize them too.  */
+         if (is_pattern_stmt_p (stmt_info))
+           {
+             if (pattern_def_seq == NULL)
+               {
+                 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
+                 pattern_def_si = gsi_start (pattern_def_seq);
+               }
+             else if (!gsi_end_p (pattern_def_si))
+               gsi_next (&pattern_def_si);
+             if (pattern_def_seq != NULL)
+               {
+                 gimple pattern_def_stmt = NULL;
+                 stmt_vec_info pattern_def_stmt_info = NULL;
+
+                 while (!gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_stmt = gsi_stmt (pattern_def_si);
+                     pattern_def_stmt_info
+                       = vinfo_for_stmt (pattern_def_stmt);
+                     if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+                         || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+                       break;
+                     gsi_next (&pattern_def_si);
+                   }
+
+                 if (!gsi_end_p (pattern_def_si))
+                   {
+                     if (vect_print_dump_info (REPORT_DETAILS))
+                       {
+                         fprintf (vect_dump, "==> vectorizing pattern def"
+                                             " stmt: ");
+                         print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                            TDF_SLIM);
+                       }
+
+                     stmt = pattern_def_stmt;
+                     stmt_info = pattern_def_stmt_info;
+                   }
+                 else
+                   {
+                     pattern_def_si = gsi_start (NULL);
+                     transform_pattern_stmt = false;
+                   }
+               }
+             else
+               transform_pattern_stmt = false;
+            }
 
          gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
          nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
@@ -5214,7 +5488,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
              /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
              if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
                {
-                 gsi_next (&si);
+                 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+                   {
+                     pattern_def_seq = NULL;
+                     gsi_next (&si);
+                   }
                  continue;
                }
            }
@@ -5232,19 +5510,24 @@ vect_transform_loop (loop_vec_info loop_vinfo)
                  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
                     interleaving chain was completed - free all the stores in
                     the chain.  */
+                 gsi_next (&si);
                  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
-                 gsi_remove (&si, true);
-                 continue;
+                 continue;
                }
              else
                {
                  /* Free the attached stmt_vec_info and remove the stmt.  */
-                 free_stmt_vec_info (stmt);
+                 free_stmt_vec_info (gsi_stmt (si));
                  gsi_remove (&si, true);
                  continue;
                }
            }
-         gsi_next (&si);
+
+         if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
+           {
+             pattern_def_seq = NULL;
+             gsi_next (&si);
+           }
        }                       /* stmts in BB */
     }                          /* BBs in loop */