OSDN Git Service

2014-05-07 Richard Biener <rguenther@suse.de>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-slp.c
index eab5e40..b74f5eb 100644 (file)
@@ -1,5 +1,5 @@
 /* SLP - Basic Block Vectorization
-   Copyright (C) 2007, 2008, 2009, 2010, 2011
+   Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
@@ -75,8 +75,9 @@ vect_free_slp_tree (slp_tree node)
     return;
 
   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
-    vect_free_slp_tree ((slp_tree)child);
+    vect_free_slp_tree ((slp_tree) child);
 
+  VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
 
   if (SLP_TREE_VEC_STMTS (node))
@@ -94,6 +95,7 @@ vect_free_slp_instance (slp_instance instance)
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
   VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
   VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
+  free (instance);
 }
 
 
@@ -102,17 +104,22 @@ vect_free_slp_instance (slp_instance instance)
 static slp_tree
 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
 {
-  slp_tree node = XNEW (struct _slp_tree);
+  slp_tree node;
   gimple stmt = VEC_index (gimple, scalar_stmts, 0);
   unsigned int nops;
 
   if (is_gimple_call (stmt))
     nops = gimple_call_num_args (stmt);
   else if (is_gimple_assign (stmt))
-    nops = gimple_num_ops (stmt) - 1;
+    {
+      nops = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+       nops++;
+    }
   else
     return NULL;
 
+  node = XNEW (struct _slp_tree);
   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
   SLP_TREE_VEC_STMTS (node) = NULL;
   SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
@@ -148,21 +155,19 @@ vect_create_oprnd_info (int nops, int group_size)
 }
 
 
-/* Free operands info.  Free def-stmts in FREE_DEF_STMTS is true.
-   (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
-   succeds.  In the later case we don't need the operands info that we used to
-   check isomorphism of the stmts, but we still need the def-stmts - they are
-   used as scalar stmts in SLP nodes.  */
+/* Free operands info.  */
+
 static void
-vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
-                      bool free_def_stmts)
+vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
 {
   int i;
   slp_oprnd_info oprnd_info;
 
-  if (free_def_stmts)
-    FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
+  FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
+    {
       VEC_free (gimple, heap, oprnd_info->def_stmts);
+      XDELETE (oprnd_info);
+    }
 
   VEC_free (slp_oprnd_info, heap, *oprnds_info);
 }
@@ -191,22 +196,46 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   bool different_types = false;
   bool pattern = false;
   slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
+  int op_idx = 1;
+  tree compare_rhs = NULL_TREE;
 
   if (loop_vinfo)
     loop = LOOP_VINFO_LOOP (loop_vinfo);
 
   if (is_gimple_call (stmt))
-    number_of_oprnds = gimple_call_num_args (stmt);
+    {
+      number_of_oprnds = gimple_call_num_args (stmt);
+      op_idx = 3;
+    }
+  else if (is_gimple_assign (stmt))
+    {
+      number_of_oprnds = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+        number_of_oprnds++;
+    }
   else
-    number_of_oprnds = gimple_num_ops (stmt) - 1;
+    return false;
 
   for (i = 0; i < number_of_oprnds; i++)
     {
-      oprnd = gimple_op (stmt, i + 1);
+      if (compare_rhs)
+       {
+         oprnd = compare_rhs;
+         compare_rhs = NULL_TREE;
+       }
+      else
+        oprnd = gimple_op (stmt, op_idx++);
+
       oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
 
-      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
-                               &dt)
+      if (COMPARISON_CLASS_P (oprnd))
+        {
+          compare_rhs = TREE_OPERAND (oprnd, 1);
+          oprnd = TREE_OPERAND (oprnd, 0);
+       }
+
+      if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
+                              &def, &dt)
          || (!def_stmt && dt != vect_constant_def))
        {
          if (vect_print_dump_info (REPORT_SLP))
@@ -244,8 +273,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
 
-          if (dt == vect_unknown_def_type
-             || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
+          if (dt == vect_unknown_def_type)
             {
               if (vect_print_dump_info (REPORT_DETAILS))
                 fprintf (vect_dump, "Unsupported pattern.");
@@ -294,10 +322,15 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
                                         dt, slp_node);
              else
-               /* Not memory operation (we don't call this function for
-                  loads).  */
-               vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
-                                       slp_node);
+               {
+                 enum vect_def_type dts[2];
+                 dts[0] = dt;
+                 dts[1] = vect_uninitialized_def;
+                 /* Not memory operation (we don't call this function for
+                    loads).  */
+                 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
+                                         slp_node);
+               }
            }
        }
       else
@@ -424,6 +457,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
   gimple stmt = VEC_index (gimple, stmts, 0);
   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
+  enum tree_code first_cond_code = ERROR_MARK;
   tree lhs;
   bool stop_recursion = false, need_same_oprnds = false;
   tree vectype, scalar_type, first_op1 = NULL_TREE;
@@ -440,11 +474,18 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   VEC (slp_oprnd_info, heap) *oprnds_info;
   unsigned int nops;
   slp_oprnd_info oprnd_info;
+  tree cond;
 
   if (is_gimple_call (stmt))
     nops = gimple_call_num_args (stmt);
+  else if (is_gimple_assign (stmt))
+    {
+      nops = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+       nops++;
+    }
   else
-    nops = gimple_num_ops (stmt) - 1;
+    return false;
 
   oprnds_info = vect_create_oprnd_info (nops, group_size);
 
@@ -467,7 +508,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
             }
 
-         vect_free_oprnd_info (&oprnds_info, true);
+         vect_free_oprnd_info (&oprnds_info);
           return false;
         }
 
@@ -481,10 +522,26 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
            }
 
-         vect_free_oprnd_info (&oprnds_info, true);
+         vect_free_oprnd_info (&oprnds_info);
          return false;
        }
 
+       if (is_gimple_assign (stmt)
+          && gimple_assign_rhs_code (stmt) == COND_EXPR
+           && (cond = gimple_assign_rhs1 (stmt))
+           && !COMPARISON_CLASS_P (cond))
+        {
+          if (vect_print_dump_info (REPORT_SLP))
+            {
+              fprintf (vect_dump,
+                       "Build SLP failed: condition is not comparison ");
+              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+            }
+
+         vect_free_oprnd_info (&oprnds_info);
+          return false;
+        }
+
       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
       vectype = get_vectype_for_scalar_type (scalar_type);
       if (!vectype)
@@ -495,7 +552,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
             }
 
-         vect_free_oprnd_info (&oprnds_info, true);
+         vect_free_oprnd_info (&oprnds_info);
           return false;
         }
 
@@ -510,7 +567,25 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
 
       if (is_gimple_call (stmt))
-       rhs_code = CALL_EXPR;
+       {
+         rhs_code = CALL_EXPR;
+         if (gimple_call_internal_p (stmt)
+             || gimple_call_tail_p (stmt)
+             || gimple_call_noreturn_p (stmt)
+             || !gimple_call_nothrow_p (stmt)
+             || gimple_call_chain (stmt))
+           {
+             if (vect_print_dump_info (REPORT_SLP))
+               {
+                 fprintf (vect_dump,
+                          "Build SLP failed: unsupported call type ");
+                 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+               }
+
+             vect_free_oprnd_info (&oprnds_info);
+             return false;
+           }
+       }
       else
        rhs_code = gimple_assign_rhs_code (stmt);
 
@@ -542,7 +617,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                    {
                      if (vect_print_dump_info (REPORT_SLP))
                        fprintf (vect_dump, "Build SLP failed: no optab.");
-                     vect_free_oprnd_info (&oprnds_info, true);
+                     vect_free_oprnd_info (&oprnds_info);
                      return false;
                    }
                  icode = (int) optab_handler (optab, vec_mode);
@@ -551,7 +626,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                      if (vect_print_dump_info (REPORT_SLP))
                        fprintf (vect_dump, "Build SLP failed: "
                                            "op not supported by target.");
-                     vect_free_oprnd_info (&oprnds_info, true);
+                     vect_free_oprnd_info (&oprnds_info);
                      return false;
                    }
                  optab_op2_mode = insn_data[icode].operand[2].mode;
@@ -588,7 +663,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                }
 
-             vect_free_oprnd_info (&oprnds_info, true);
+             vect_free_oprnd_info (&oprnds_info);
              return false;
            }
 
@@ -602,9 +677,30 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                }
 
-             vect_free_oprnd_info (&oprnds_info, true);
+             vect_free_oprnd_info (&oprnds_info);
              return false;
            }
+
+         if (rhs_code == CALL_EXPR)
+           {
+             gimple first_stmt = VEC_index (gimple, stmts, 0);
+             if (gimple_call_num_args (stmt) != nops
+                 || !operand_equal_p (gimple_call_fn (first_stmt),
+                                      gimple_call_fn (stmt), 0)
+                 || gimple_call_fntype (first_stmt)
+                    != gimple_call_fntype (stmt))
+               {
+                 if (vect_print_dump_info (REPORT_SLP))
+                   {
+                     fprintf (vect_dump,
+                              "Build SLP failed: different calls in ");
+                     print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                   }
+
+                 vect_free_oprnd_info (&oprnds_info);
+                 return false;
+               }
+           }
        }
 
       /* Strided store or load.  */
@@ -617,7 +713,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                                                stmt, ncopies_for_cost,
                                                (i == 0), &oprnds_info))
                {
-                 vect_free_oprnd_info (&oprnds_info, true);
+                 vect_free_oprnd_info (&oprnds_info);
                  return false;
                }
            }
@@ -637,7 +733,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                     }
 
-                 vect_free_oprnd_info (&oprnds_info, true);
+                 vect_free_oprnd_info (&oprnds_info);
                   return false;
                 }
 
@@ -654,7 +750,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                     }
 
-                 vect_free_oprnd_info (&oprnds_info, true);
+                 vect_free_oprnd_info (&oprnds_info);
                   return false;
                 }
 
@@ -675,7 +771,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                         }
  
-                     vect_free_oprnd_info (&oprnds_info, true);
+                     vect_free_oprnd_info (&oprnds_info);
                       return false;
                     }
                 }
@@ -695,7 +791,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                         }
 
-                     vect_free_oprnd_info (&oprnds_info, true);
+                     vect_free_oprnd_info (&oprnds_info);
                       return false;
                     }
 
@@ -731,13 +827,15 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                }
 
              /* FORNOW: Not strided loads are not supported.  */
-             vect_free_oprnd_info (&oprnds_info, true);
+             vect_free_oprnd_info (&oprnds_info);
              return false;
            }
 
          /* Not memory operation.  */
          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
-             && TREE_CODE_CLASS (rhs_code) != tcc_unary)
+             && TREE_CODE_CLASS (rhs_code) != tcc_unary
+             && rhs_code != COND_EXPR
+             && rhs_code != CALL_EXPR)
            {
              if (vect_print_dump_info (REPORT_SLP))
                {
@@ -746,16 +844,36 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                }
 
-             vect_free_oprnd_info (&oprnds_info, true);
+             vect_free_oprnd_info (&oprnds_info);
              return false;
            }
 
+          if (rhs_code == COND_EXPR)
+            {
+              tree cond_expr = gimple_assign_rhs1 (stmt);
+
+             if (i == 0)
+               first_cond_code = TREE_CODE (cond_expr);
+              else if (first_cond_code != TREE_CODE (cond_expr))
+                {
+                  if (vect_print_dump_info (REPORT_SLP))
+                    {
+                      fprintf (vect_dump, "Build SLP failed: different"
+                                         " operation");
+                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                    }
+
+                 vect_free_oprnd_info (&oprnds_info);
+                  return false;
+               }
+            }
+
          /* Find the def-stmts.  */
          if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
                                            ncopies_for_cost, (i == 0),
                                            &oprnds_info))
            {
-             vect_free_oprnd_info (&oprnds_info, true);
+             vect_free_oprnd_info (&oprnds_info);
              return false;
            }
        }
@@ -786,6 +904,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
             *loads_permuted = true;
         }
 
+      vect_free_oprnd_info (&oprnds_info);
       return true;
     }
 
@@ -804,15 +923,18 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                                max_nunits, load_permutation, loads,
                                vectorization_factor, loads_permuted))
         {
-          free (child);
-          vect_free_oprnd_info (&oprnds_info, true);
+         if (child)
+           oprnd_info->def_stmts = NULL;
+         vect_free_slp_tree (child);
+         vect_free_oprnd_info (&oprnds_info);
          return false;
        }
 
+      oprnd_info->def_stmts = NULL;
       VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
     }
 
-  vect_free_oprnd_info (&oprnds_info, false);
+  vect_free_oprnd_info (&oprnds_info);
   return true;
 }
 
@@ -1076,7 +1198,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
 
   /* We checked that this case ok, so there is no need to proceed with 
      permutation tests.  */
-  if (complex_numbers == 2)
+  if (complex_numbers == 2
+      && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
     {
       VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
       VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
@@ -1402,7 +1525,12 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
       while (next)
         {
-          VEC_safe_push (gimple, heap, scalar_stmts, next);
+         if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
+             && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
+           VEC_safe_push (gimple, heap, scalar_stmts,
+                       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
+         else
+            VEC_safe_push (gimple, heap, scalar_stmts, next);
           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
         }
     }
@@ -1411,7 +1539,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       /* Collect reduction statements.  */
       VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
       for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
-        VEC_safe_push (gimple, heap, scalar_stmts, next);
+       VEC_safe_push (gimple, heap, scalar_stmts, next);
     }
 
   node = vect_create_new_slp_node (scalar_stmts);
@@ -1440,6 +1568,9 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
           if (vect_print_dump_info (REPORT_SLP))
             fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
                                " block SLP");
+         vect_free_slp_tree (node);
+         VEC_free (int, heap, load_permutation);
+         VEC_free (slp_tree, heap, loads);
           return false;
         }
 
@@ -1606,26 +1737,39 @@ static void
 vect_detect_hybrid_slp_stmts (slp_tree node)
 {
   int i;
-  gimple stmt;
+  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
+  gimple stmt = VEC_index (gimple, stmts, 0);
   imm_use_iterator imm_iter;
   gimple use_stmt;
-  stmt_vec_info stmt_vinfo
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
   slp_void_p child;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  struct loop *loop = NULL;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+  basic_block bb = NULL;
 
   if (!node)
     return;
 
+  if (loop_vinfo)
+    loop = LOOP_VINFO_LOOP (loop_vinfo);
+  else
+    bb = BB_VINFO_BB (bb_vinfo);
+
   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
        && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
-       if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
+       if (gimple_bb (use_stmt)
+            && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+                || bb == gimple_bb (use_stmt))
+           && (stmt_vinfo = vinfo_for_stmt (use_stmt))
            && !STMT_SLP_TYPE (stmt_vinfo)
             && (STMT_VINFO_RELEVANT (stmt_vinfo)
                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
-            && !(gimple_code (use_stmt) == GIMPLE_PHI
-                 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt)) 
-                     == vect_reduction_def))
+           && !(gimple_code (use_stmt) == GIMPLE_PHI
+                 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
+                  == vect_reduction_def))
          vect_mark_slp_stmts (node, hybrid, i);
 
   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
@@ -1683,8 +1827,11 @@ new_bb_vec_info (basic_block bb)
 static void
 destroy_bb_vec_info (bb_vec_info bb_vinfo)
 {
+  VEC (slp_instance, heap) *slp_instances;
+  slp_instance instance;
   basic_block bb;
   gimple_stmt_iterator si;
+  unsigned i;
 
   if (!bb_vinfo)
     return;
@@ -1704,6 +1851,9 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo)
   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
   VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
+  slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
+  FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
+    vect_free_slp_instance (instance);
   VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
   free (bb_vinfo);
   bb->aux = NULL;
@@ -2074,7 +2224,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
   VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
   bool constant_p, is_store;
   tree neutral_op = NULL;
-  enum tree_code code = gimple_assign_rhs_code (stmt);
+  enum tree_code code = gimple_expr_code (stmt);
   gimple def_stmt;
   struct loop *loop;
 
@@ -2150,15 +2300,15 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
 
      For example, we have two scalar operands, s1 and s2 (e.g., group of
      strided accesses of size two), while NUNITS is four (i.e., four scalars
-     of this type can be packed in a vector). The output vector will contain
-     two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
+     of this type can be packed in a vector).  The output vector will contain
+     two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
      will be 2).
 
      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
      containing the operands.
 
      For example, NUNITS is four as before, and the group size is 8
-     (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
+     (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
@@ -2171,7 +2321,32 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
           if (is_store)
             op = gimple_assign_rhs1 (stmt);
           else
-            op = gimple_op (stmt, op_num + 1);
+           {
+             switch (code)
+               {
+                 case COND_EXPR:
+                   if (op_num == 0 || op_num == 1)
+                     {
+                       tree cond = gimple_assign_rhs1 (stmt);
+                       op = TREE_OPERAND (cond, op_num);
+                     }
+                   else
+                     {
+                       if (op_num == 2)
+                         op = gimple_assign_rhs2 (stmt);
+                       else
+                         op = gimple_assign_rhs3 (stmt);
+                     }
+                   break;
+
+                 case CALL_EXPR:
+                   op = gimple_call_arg (stmt, op_num);
+                   break;
+
+                 default:
+                   op = gimple_op (stmt, op_num + 1);
+               }
+           }
 
           if (reduc_index != -1)
             {
@@ -2739,6 +2914,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
     { 
       gimple last_store = vect_find_last_store_in_slp_instance (instance);
+      if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
+       last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
       si = gsi_for_stmt (last_store);
     }
 
@@ -2756,6 +2933,46 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   return is_store;
 }
 
+/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
+   For loop vectorization this is done in vectorizable_call, but for SLP
+   it needs to be deferred until end of vect_schedule_slp, because multiple
+   SLP instances may refer to the same scalar stmt.  */
+
+static void
+vect_remove_slp_scalar_calls (slp_tree node)
+{
+  gimple stmt, new_stmt;
+  gimple_stmt_iterator gsi;
+  int i;
+  slp_void_p child;
+  tree lhs;
+  stmt_vec_info stmt_info;
+
+  if (!node)
+    return;
+
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_remove_slp_scalar_calls ((slp_tree) child);
+
+  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
+    {
+      if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
+       continue;
+      stmt_info = vinfo_for_stmt (stmt);
+      if (stmt_info == NULL
+         || is_pattern_stmt_p (stmt_info)
+         || !PURE_SLP_STMT (stmt_info))
+       continue;
+      lhs = gimple_call_lhs (stmt);
+      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
+      set_vinfo_for_stmt (new_stmt, stmt_info);
+      set_vinfo_for_stmt (stmt, NULL);
+      STMT_VINFO_STMT (stmt_info) = new_stmt;
+      gsi = gsi_for_stmt (stmt);
+      gsi_replace (&gsi, new_stmt, false);
+      SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
+    }
+}
 
 /* Generate vector code for all SLP instances in the loop/basic block.  */
 
@@ -2764,7 +2981,8 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
-  unsigned int i, vf;
+  slp_tree loads_node;
+  unsigned int i, j, vf;
   bool is_store = false;
 
   if (loop_vinfo)
@@ -2783,6 +3001,15 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
       /* Schedule the tree of INSTANCE.  */
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
                                              instance, vf);
+
+      /* Clear STMT_VINFO_VEC_STMT of all loads.  With shared loads
+        between SLP instances we fail to properly initialize the
+        vectorized SLP stmts and confuse different load permutations.  */
+      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), j, loads_node)
+       STMT_VINFO_VEC_STMT
+         (vinfo_for_stmt
+           (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (loads_node), 0))) = NULL;
+
       if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
          || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
        fprintf (vect_dump, "vectorizing stmts using SLP.");
@@ -2795,12 +3022,24 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
       unsigned int j;
       gimple_stmt_iterator gsi;
 
+      /* Remove scalar call stmts.  Do not do this for basic-block
+        vectorization as not all uses may be vectorized.
+        ???  Why should this be necessary?  DCE should be able to
+        remove the stmts itself.
+        ???  For BB vectorization we can as well remove scalar
+        stmts starting from the SLP tree root if they have no
+        uses.  */
+      if (loop_vinfo)
+       vect_remove_slp_scalar_calls (root);
+
       for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
         {
           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
             break;
 
+         if (is_pattern_stmt_p (vinfo_for_stmt (store)))
+           store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
           /* Free the attached stmt_vec_info and remove the stmt.  */
           gsi = gsi_for_stmt (store);
           gsi_remove (&gsi, true);