PR testsuite/34168

[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-transform.c
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c

index 5331fcd..59fa6d5 100644 (file)
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -1,5 +1,5 @@
  /* Transformation Utilities for Loop Vectorization.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
     Contributed by Dorit Naishlos <dorit@il.ibm.com>
  
  This file is part of GCC.
@@ -119,11 +119,13 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
    int vec_inside_cost = 0;
    int vec_outside_cost = 0;
    int scalar_single_iter_cost = 0;
+  int scalar_outside_cost = 0;
+  bool runtime_test = false;
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
    int nbbs = loop->num_nodes;
-  int byte_misalign;
+  int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
    int peel_guard_costs = 0;
    int innerloop_iters = 0, factor;
    VEC (slp_instance, heap) *slp_instances;
@@ -132,20 +134,44 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
    /* Cost model disabled.  */
    if (!flag_vect_cost_model)
      {
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "cost model disabled.");      
        return 0;
      }
  
-  /* Requires loop versioning tests to handle misalignment.
-     FIXME: Make cost depend on number of stmts in may_misalign list.  */
+  /* If the number of iterations is unknown, or the
+     peeling-for-misalignment amount is unknown, we will have to generate
+     a runtime test to test the loop count against the threshold.    */
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || (byte_misalign < 0))
+    runtime_test = true;
+
+  /* Requires loop versioning tests to handle misalignment.  */
  
    if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
      {
-      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
-      if (vect_print_dump_info (REPORT_DETAILS))
+      /*  FIXME: Make cost depend on complexity of individual check.  */
+      vec_outside_cost +=
+        VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "cost model: Adding cost of checks for loop "
-                 "versioning.\n");
+                 "versioning to treat misalignment.\n");
+    }
+
+  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    {
+      /*  FIXME: Make cost depend on complexity of individual check.  */
+      vec_outside_cost +=
+        VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
+      if (vect_print_dump_info (REPORT_COST))
+        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+                 "versioning aliasing.\n");
+    }
+
+  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    {
+      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
      }
  
    /* Count statements in scalar loop.  Using this as scalar cost for a single
@@ -171,18 +197,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
         factor = 1;
  
        for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
-        {
-          tree stmt = bsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-          if (!STMT_VINFO_RELEVANT_P (stmt_info)
-              && !STMT_VINFO_LIVE_P (stmt_info))
-            continue;
-          scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
-          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
+       {
+         tree stmt = bsi_stmt (si);
+         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+         /* Skip stmts that are not vectorized inside the loop.  */
+         if (!STMT_VINFO_RELEVANT_P (stmt_info)
+             && (!STMT_VINFO_LIVE_P (stmt_info)
+                 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+           continue;
+         scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
+         vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
           /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
              some of the "outside" costs are generated inside the outer-loop.  */
-          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
-        }
+         vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+       }
      }
  
    /* Add additional cost for the peeled instructions in prologue and epilogue
@@ -194,19 +222,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
       TODO: Build an expression that represents peel_iters for prologue and
       epilogue to be used in a run-time test.  */
  
-  byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
-
    if (byte_misalign < 0)
      {
        peel_iters_prologue = vf/2;
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "cost model: "
                   "prologue peel iters set to vf/2.");
  
        /* If peeling for alignment is unknown, loop bound of main loop becomes
           unknown.  */
        peel_iters_epilogue = vf/2;
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "cost model: "
                   "epilogue peel iters set to vf/2 because "
                   "peeling for alignment is unknown .");
@@ -236,7 +262,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
          {
            peel_iters_epilogue = vf/2;
-          if (vect_print_dump_info (REPORT_DETAILS))
+          if (vect_print_dump_info (REPORT_COST))
              fprintf (vect_dump, "cost model: "
                       "epilogue peel iters set to vf/2 because "
                       "loop iterations are unknown .");
@@ -259,24 +285,72 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
                        + (peel_iters_epilogue * scalar_single_iter_cost)
                        + peel_guard_costs;
  
-  /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
-     information we provide for the target is whether testing against the
-     threshold involves a runtime test.  */
-  if (targetm.vectorize.builtin_vectorization_cost)
-    {
-      bool runtime_test = false;
-
-      /* If the number of iterations is unknown, or the
-        peeling-for-misalignment amount is unknown, we eill have to generate
-        a runtime test to test the loop count against the threshold.  */
-      if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-         || (byte_misalign < 0))
-       runtime_test = true;
-      vec_outside_cost +=
-       targetm.vectorize.builtin_vectorization_cost (runtime_test);
-      if (vect_print_dump_info (REPORT_DETAILS))
-       fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
-                 targetm.vectorize.builtin_vectorization_cost (runtime_test));
+  /* FORNOW: The scalar outside cost is incremented in one of the
+     following ways:
+
+     1. The vectorizer checks for alignment and aliasing and generates
+     a condition that allows dynamic vectorization.  A cost model
+     check is ANDED with the versioning condition.  Hence scalar code
+     path now has the added cost of the versioning check.
+
+       if (cost > th & versioning_check)
+         jmp to vector code
+
+     Hence run-time scalar is incremented by not-taken branch cost.
+
+     2. The vectorizer then checks if a prologue is required.  If the
+     cost model check was not done before during versioning, it has to
+     be done before the prologue check.
+
+       if (cost <= th)
+         prologue = scalar_iters
+       if (prologue == 0)
+         jmp to vector code
+       else
+         execute prologue
+       if (prologue == num_iters)
+        go to exit
+
+     Hence the run-time scalar cost is incremented by a taken branch,
+     plus a not-taken branch, plus a taken branch cost.
+
+     3. The vectorizer then checks if an epilogue is required.  If the
+     cost model check was not done before during prologue check, it
+     has to be done with the epilogue check.
+
+       if (prologue == 0)
+         jmp to vector code
+       else
+         execute prologue
+       if (prologue == num_iters)
+        go to exit
+       vector code:
+         if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
+           jmp to epilogue
+
+     Hence the run-time scalar cost should be incremented by 2 taken
+     branches.
+
+     TODO: The back end may reorder the BBS's differently and reverse
+     conditions/branch directions.  Change the stimates below to
+     something more reasonable.  */
+
+  if (runtime_test)
+    {
+      /* Cost model check occurs at versioning.  */
+      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+         || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+       scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
+      else
+       {
+         /* Cost model occurs at prologue generation.  */
+         if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+           scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
+             + TARG_COND_NOT_TAKEN_BRANCH_COST;
+         /* Cost model check occurs at epilogue generation.  */
+         else
+           scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
+       }
      }
  
    /* Add SLP costs.  */
@@ -289,9 +363,13 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
  
    /* Calculate number of iterations required to make the vector version 
       profitable, relative to the loop bodies only. The following condition
-     must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
+     must hold true: 
+     SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
+     where
       SIC = scalar iteration cost, VIC = vector iteration cost,
-     VOC = vector outside cost and VF = vectorization factor.  */
+     VOC = vector outside cost, VF = vectorization factor,
+     PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
+     SOC = scalar outside cost for run time cost model check.  */
  
    if ((scalar_single_iter_cost * vf) > vec_inside_cost)
      {
@@ -299,22 +377,22 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
          min_profitable_iters = 1;
        else
          {
-          min_profitable_iters = (vec_outside_cost * vf 
-                                  - vec_inside_cost * peel_iters_prologue
+          min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
+                                 - vec_inside_cost * peel_iters_prologue
                                    - vec_inside_cost * peel_iters_epilogue)
                                   / ((scalar_single_iter_cost * vf)
                                      - vec_inside_cost);
  
            if ((scalar_single_iter_cost * vf * min_profitable_iters)
                <= ((vec_inside_cost * min_profitable_iters)
-                  + (vec_outside_cost * vf)))
+                  + ((vec_outside_cost - scalar_outside_cost) * vf)))
              min_profitable_iters++;
          }
      }
    /* vector version will never be profitable.  */
    else
      {
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "cost model: vector iteration cost = %d "
                   "is divisible by scalar iteration cost = %d by a factor "
                   "greater than or equal to the vectorization factor = %d .",
@@ -322,14 +400,16 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        return -1;
      }
  
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      {
        fprintf (vect_dump, "Cost model analysis: \n");
        fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
                vec_inside_cost);
        fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
                vec_outside_cost);
-      fprintf (vect_dump, "  Scalar cost: %d\n", scalar_single_iter_cost);
+      fprintf (vect_dump, "  Scalar iteration cost: %d\n",
+              scalar_single_iter_cost);
+      fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
        fprintf (vect_dump, "  prologue iterations: %d\n",
                 peel_iters_prologue);
        fprintf (vect_dump, "  epilogue iterations: %d\n",
@@ -346,7 +426,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
         then skip the vectorized loop.  */
    min_profitable_iters--;
  
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "  Profitability threshold = %d\n",
              min_profitable_iters);
      
@@ -363,7 +443,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
     generated within the strip-mine loop, the initial definition before
     the loop, and the epilogue code that must be generated.  */
  
-static void
+static bool 
  vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
                            int ncopies)
  {
@@ -384,6 +464,16 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
  
    reduction_op = TREE_OPERAND (operation, op_type-1);
    vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+  if (!vectype)
+    {
+      if (vect_print_dump_info (REPORT_COST))
+        {
+          fprintf (vect_dump, "unsupported data-type ");
+          print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
+        }
+      return false;
+   }
+  
    mode = TYPE_MODE (vectype);
    orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
  
@@ -431,10 +521,12 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
  
    STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
  
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
               "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
               STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+
+  return true;
  }
  
  
@@ -450,7 +542,7 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
    /* prologue cost for vec_init and vec_step.  */
    STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
    
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
               "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
               STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
@@ -479,7 +571,7 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
         outside_cost += TARG_SCALAR_TO_VEC_COST; 
      }
    
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
               "outside_cost = %d .", inside_cost, outside_cost);
  
@@ -537,7 +629,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
        inside_cost = ncopies * exact_log2(group_size) * group_size 
               * TARG_VEC_STMT_COST;
  
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
                   group_size);
  
@@ -546,7 +638,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
    /* Costs of the stores.  */
    inside_cost += ncopies * TARG_VEC_STORE_COST;
  
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
               "outside_cost = %d .", inside_cost, outside_cost);
  
@@ -597,7 +689,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
        inside_cost = ncopies * exact_log2(group_size) * group_size
         * TARG_VEC_STMT_COST;
  
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
                   group_size);
  
@@ -610,7 +702,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
        {
          inside_cost += ncopies * TARG_VEC_LOAD_COST;
  
-        if (vect_print_dump_info (REPORT_DETAILS))
+        if (vect_print_dump_info (REPORT_COST))
            fprintf (vect_dump, "vect_model_load_cost: aligned.");
  
          break;
@@ -620,7 +712,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
          /* Here, we assign an additional cost for the unaligned load.  */
          inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
  
-        if (vect_print_dump_info (REPORT_DETAILS))
+        if (vect_print_dump_info (REPORT_COST))
            fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
                     "hardware.");
  
@@ -640,7 +732,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
        }
      case dr_explicit_realign_optimized:
        {
-        if (vect_print_dump_info (REPORT_DETAILS))
+        if (vect_print_dump_info (REPORT_COST))
            fprintf (vect_dump, "vect_model_load_cost: unaligned software "
                     "pipelined.");
  
@@ -667,7 +759,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
        gcc_unreachable ();
      }
    
-  if (vect_print_dump_info (REPORT_DETAILS))
+  if (vect_print_dump_info (REPORT_COST))
      fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
               "outside_cost = %d .", inside_cost, outside_cost);
  
@@ -1289,6 +1381,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
    bool is_store = false;
    unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
    VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
+  bool constant_p;
  
    if (STMT_VINFO_DATA_REF (stmt_vinfo))
      is_store = true;
@@ -1312,6 +1405,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
    number_of_copies = least_common_multiple (nunits, group_size) / group_size;
  
    number_of_places_left_in_vector = nunits;
+  constant_p = true;
    for (j = 0; j < number_of_copies; j++)
      {
        for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
@@ -1321,6 +1415,8 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
             op = operation;
           else
             op = TREE_OPERAND (operation, op_num);
+         if (!CONSTANT_CLASS_P (op))
+           constant_p = false;
  
            /* Create 'vect_ = {op0,op1,...,opn}'.  */
            t = tree_cons (NULL_TREE, op, t);
@@ -1332,7 +1428,12 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
                number_of_places_left_in_vector = nunits;
  
               vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-              vec_cst = build_constructor_from_list (vector_type, t);
+              gcc_assert (vector_type);
+             if (constant_p)
+               vec_cst = build_vector (vector_type, t);
+             else
+               vec_cst = build_constructor_from_list (vector_type, t);
+             constant_p = true;
                VEC_quick_push (tree, voprnds,
                                vect_init_vector (stmt, vec_cst, vector_type,
                                                 NULL));
@@ -1364,7 +1465,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
  }
  
  
-/* Get vectorized defintions from SLP_NODE that contains corresponding
+/* Get vectorized definitions from SLP_NODE that contains corresponding
     vectorized def-stmts.  */
   
  static void
@@ -1456,8 +1557,8 @@ get_initial_def_for_induction (tree iv_phi)
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
-  tree vectype = get_vectype_for_scalar_type (scalar_type);
-  int nunits =  TYPE_VECTOR_SUBPARTS (vectype);
+  tree vectype; 
+  int nunits;
    edge pe = loop_preheader_edge (loop);
    struct loop *iv_loop;
    basic_block new_bb;
@@ -1471,7 +1572,7 @@ get_initial_def_for_induction (tree iv_phi)
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    int i;
    bool ok;
-  int ncopies = vf / nunits;
+  int ncopies;
    tree expr;
    stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
    bool nested_in_vect_loop = false;
@@ -1484,6 +1585,11 @@ get_initial_def_for_induction (tree iv_phi)
    block_stmt_iterator si;
    basic_block bb = bb_for_stmt (iv_phi);
  
+  vectype = get_vectype_for_scalar_type (scalar_type);
+  gcc_assert (vectype);
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  ncopies = vf / nunits;
+
    gcc_assert (phi_info);
    gcc_assert (ncopies >= 1);
  
@@ -1582,7 +1688,8 @@ get_initial_def_for_induction (tree iv_phi)
    t = NULL_TREE;
    for (i = 0; i < nunits; i++)
      t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
-  vec = build_constructor_from_list (vectype, t);
+  gcc_assert (CONSTANT_CLASS_P (new_name));
+  vec = build_vector (vectype, t);
    vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
  
  
@@ -1638,7 +1745,8 @@ get_initial_def_for_induction (tree iv_phi)
        t = NULL_TREE;
        for (i = 0; i < nunits; i++)
         t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
-      vec = build_constructor_from_list (vectype, t);
+      gcc_assert (CONSTANT_CLASS_P (new_name));
+      vec = build_vector (vectype, t);
        vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
  
        vec_def = induc_def;
@@ -1774,6 +1882,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
              t = tree_cons (NULL_TREE, op, t);
            }
          vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
+        gcc_assert (vector_type);
          vec_cst = build_vector (vector_type, t);
  
          return vect_init_vector (stmt, vec_cst, vector_type, NULL);
@@ -1796,6 +1905,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
  
         /* FIXME: use build_constructor directly.  */
         vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
+        gcc_assert (vector_type);
          vec_inv = build_constructor_from_list (vector_type, t);
          return vect_init_vector (stmt, vec_inv, vector_type, NULL);
        }
@@ -1938,7 +2048,7 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
    VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
  
-  if (vec_oprnds1)
+  if (vec_oprnds1 && *vec_oprnds1)
      {
        vec_oprnd = VEC_pop (tree, *vec_oprnds1);
        vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
@@ -2001,11 +2111,7 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt,
    /* Make sure bsi points to the stmt that is being vectorized.  */
    gcc_assert (stmt == bsi_stmt (*bsi));
  
-#ifdef USE_MAPPED_LOCATION
    SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
-#else
-  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
-#endif
  }
  
  
@@ -2070,7 +2176,7 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
    tree vector_type;
    bool nested_in_vect_loop = false; 
  
-  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+  gcc_assert (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
    if (nested_in_vect_loop_p (loop, stmt))
      nested_in_vect_loop = true;
    else
@@ -2083,18 +2189,19 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
    case WIDEN_SUM_EXPR:
    case DOT_PROD_EXPR:
    case PLUS_EXPR:
-      if (nested_in_vect_loop)
-       *adjustment_def = vecdef;
-      else
-       *adjustment_def = init_val;
-    /* Create a vector of zeros for init_def.  */
-    if (INTEGRAL_TYPE_P (type))
-      def_for_init = build_int_cst (type, 0);
+    if (nested_in_vect_loop)
+      *adjustment_def = vecdef;
      else
+      *adjustment_def = init_val;
+    /* Create a vector of zeros for init_def.  */
+    if (SCALAR_FLOAT_TYPE_P (type))
        def_for_init = build_real (type, dconst0);
-      for (i = nunits - 1; i >= 0; --i)
-    t = tree_cons (NULL_TREE, def_for_init, t);
+    else
+      def_for_init = build_int_cst (type, 0);
+    for (i = nunits - 1; i >= 0; --i)
+      t = tree_cons (NULL_TREE, def_for_init, t);
      vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
+    gcc_assert (vector_type);
      init_def = build_vector (vector_type, t);
      break;
  
@@ -2205,6 +2312,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
    op_type = TREE_OPERAND_LENGTH (operation);
    reduction_op = TREE_OPERAND (operation, op_type-1);
    vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+  gcc_assert (vectype);
    mode = TYPE_MODE (vectype);
  
    /*** 1. Create the reduction def-use cycle  ***/
@@ -2409,7 +2517,6 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
           rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
                          bitsize_zero_node);
-         BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
           epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
           GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
@@ -2424,7 +2531,6 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
                                  bitpos);
                 
-             BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
               epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
               new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
               GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
@@ -2460,7 +2566,6 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
         bitpos = bitsize_zero_node;
  
        rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
-      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
        epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
        new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
        GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp; 
@@ -2677,6 +2782,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      return false;
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    scalar_type = TREE_TYPE (scalar_dest);
+  if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) 
+      && !SCALAR_FLOAT_TYPE_P (scalar_type))
+    return false;
  
    /* All uses but the last are expected to be defined in the loop.
       The last use is the reduction variable.  */
@@ -2778,6 +2886,16 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
           reduction variable, and get the tree-code from orig_stmt.  */
        orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
        vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
+      if (!vectype)
+       {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "unsupported data-type ");
+              print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
+            }
+          return false;
+        }
+
        vec_mode = TYPE_MODE (vectype);
      }
    else
@@ -2806,7 +2924,8 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    if (!vec_stmt) /* transformation not required.  */
      {
        STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
-      vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
+      if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
+        return false;
        return true;
      }
  
@@ -2944,14 +3063,6 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    if (STMT_SLP_TYPE (stmt_info))
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is STMT a vectorizable call?   */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -2999,10 +3110,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      return false;
  
    vectype_in = get_vectype_for_scalar_type (rhs_type);
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
    vectype_out = get_vectype_for_scalar_type (lhs_type);
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
  
    /* FORNOW */
@@ -3286,14 +3401,6 @@ vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      /* FORNOW: not yet supported.  */
-      if (vect_print_dump_info (REPORT_DETAILS))
-       fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
  
@@ -3309,11 +3416,15 @@ vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
    op0 = TREE_OPERAND (operation, 0);
    rhs_type = TREE_TYPE (op0);
    vectype_in = get_vectype_for_scalar_type (rhs_type);
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    lhs_type = TREE_TYPE (scalar_dest);
    vectype_out = get_vectype_for_scalar_type (lhs_type);
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
  
    /* FORNOW */
@@ -3520,6 +3631,9 @@ vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
        *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
      }
  
+  if (vec_oprnds0)
+    VEC_free (tree, heap, vec_oprnds0); 
+
    return true;
  }
  
@@ -3560,14 +3674,6 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is vectorizable assignment?  */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -3677,14 +3783,6 @@ vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
  
    gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
  
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      /* FORNOW: not yet supported.  */
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    if (TREE_CODE (phi) != PHI_NODE)
      return false;
  
@@ -3746,6 +3844,8 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    int j, i;
    VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
    tree vop0, vop1;
+  unsigned int k;
+  bool scalar_shift_arg = false;
  
    /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
       this, so we can safely override NCOPIES with 1 here.  */
@@ -3766,14 +3866,6 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is STMT a vectorizable binary/unary operation?   */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -3783,6 +3875,8 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_out != nunits_in)
      return false;
@@ -3838,18 +3932,21 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
      {
        if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "op not supported by target.");
+      /* Check only during analysis.  */
        if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
-          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
-            < vect_min_worthwhile_factor (code))
+          || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+             < vect_min_worthwhile_factor (code)
+              && !vec_stmt))
          return false;
        if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "proceeding using word mode.");
      }
  
-  /* Worthwhile without SIMD support?  */
+  /* Worthwhile without SIMD support? Check only during analysis.  */
    if (!VECTOR_MODE_P (TYPE_MODE (vectype))
        && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
-        < vect_min_worthwhile_factor (code))
+        < vect_min_worthwhile_factor (code)
+      && !vec_stmt)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "not worthwhile without SIMD support.");
@@ -3865,14 +3962,18 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
        /* Invariant argument is needed for a vector shift
          by a scalar shift operand.  */
        optab_op2_mode = insn_data[icode].operand[2].mode;
-      if (! (VECTOR_MODE_P (optab_op2_mode)
-            || dt[1] == vect_constant_def
-            || dt[1] == vect_invariant_def))
+      if (!VECTOR_MODE_P (optab_op2_mode))
         {
-         if (vect_print_dump_info (REPORT_DETAILS))
-           fprintf (vect_dump, "operand mode requires invariant argument.");
-         return false;
-       }
+         if (dt[1] != vect_constant_def && dt[1] != vect_invariant_def)
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "operand mode requires invariant"
+                                    " argument.");
+             return false;
+           }
+
+          scalar_shift_arg = true;
+        }
      }
  
    if (!vec_stmt) /* transformation not required.  */
@@ -3892,10 +3993,21 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    /* Handle def.  */
    vec_dest = vect_create_destination_var (scalar_dest, vectype);
  
+  /* Allocate VECs for vector operands. In case of SLP, vector operands are 
+     created in the previous stages of the recursion, so no allocation is
+     needed, except for the case of shift with scalar shift argument. In that
+     case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
+     be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
+     In case of loop-based vectorization we allocate VECs of size 1. We 
+     allocate VEC_OPRNDS1 only in case of binary operation.  */ 
    if (!slp_node)
-    vec_oprnds0 = VEC_alloc (tree, heap, 1);
-  if (op_type == binary_op)
-    vec_oprnds1 = VEC_alloc (tree, heap, 1);
+    {
+      vec_oprnds0 = VEC_alloc (tree, heap, 1);
+      if (op_type == binary_op)
+        vec_oprnds1 = VEC_alloc (tree, heap, 1);
+    }
+  else if (scalar_shift_arg)
+    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);  
  
    /* In case the vectorization factor (VF) is bigger than the number
       of elements that we can fit in a vectype (nunits), we have to generate
@@ -3970,10 +4082,20 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
                     fprintf (vect_dump, "operand 1 using scalar mode.");
                   vec_oprnd1 = op1;
                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+                 if (slp_node)
+                   {
+                     /* Store vec_oprnd1 for every vector stmt to be created
+                        for SLP_NODE. We check during the analysis that all the
+                         shift arguments are the same.  
+                        TODO: Allow different constants for different vector 
+                        stmts generated for an SLP instance.  */          
+                     for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
+                       VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+                   }
                 }
             }
          
-          /* vec_oprnd is available if operand 1 should be of a scalar-type 
+          /* vec_oprnd1 is available if operand 1 should be of a scalar-type 
               (a special case for certain kind of vector shifts); otherwise, 
               operand 1 should be of a vector type (the usual case).  */
           if (op_type == binary_op && !vec_oprnd1)
@@ -4061,14 +4183,6 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is STMT a vectorizable type-demotion operation?  */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -4083,10 +4197,14 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
  
    op0 = TREE_OPERAND (operation, 0);
    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_in != nunits_out / 2) /* FORNOW */
      return false;
@@ -4218,14 +4336,6 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is STMT a vectorizable type-promotion operation?  */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -4241,10 +4351,14 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
  
    op0 = TREE_OPERAND (operation, 0);
    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_out != nunits_in / 2) /* FORNOW */
      return false;
@@ -4471,11 +4585,8 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain,
    tree scalar_dest, tmp;
    int i;
    unsigned int j;
-  VEC(tree,heap) *first, *second;
    
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
-  first = VEC_alloc (tree, heap, length/2);
-  second = VEC_alloc (tree, heap, length/2);
  
    /* Check that the operation is supported.  */
    if (!vect_strided_store_supported (vectype))
@@ -4563,7 +4674,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
    int j;
-  tree next_stmt, first_stmt;
+  tree next_stmt, first_stmt = NULL_TREE;
    bool strided_store = false;
    unsigned int group_size, i;
    VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
@@ -4594,13 +4705,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is vectorizable store? */
  
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
@@ -4632,9 +4736,28 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
      {
        strided_store = true;
+      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
        if (!vect_strided_store_supported (vectype)
           && !PURE_SLP_STMT (stmt_info) && !slp)
-       return false;      
+       return false;
+     
+      if (first_stmt == stmt)
+       {
+          /* STMT is the leader of the group. Check the operands of all the
+             stmts of the group.  */
+          next_stmt = DR_GROUP_NEXT_DR (stmt_info);
+          while (next_stmt)
+            {
+              op = GIMPLE_STMT_OPERAND (next_stmt, 1);
+              if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+                {
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    fprintf (vect_dump, "use not simple.");
+                  return false;
+                }
+              next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+            }
+        }
      }
  
    if (!vec_stmt) /* transformation not required.  */
@@ -4649,7 +4772,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
  
    if (strided_store)
      {
-      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
        first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
        group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
  
@@ -4795,8 +4917,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
              OPRNDS are of size 1.  */
           for (i = 0; i < group_size; i++)
             {
-             vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, 
-                                                  VEC_index (tree, oprnds, i));
+             op = VEC_index (tree, oprnds, i);
+             vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+             vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op); 
               VEC_replace(tree, dr_chain, i, vec_oprnd);
               VEC_replace(tree, oprnds, i, vec_oprnd);
             }
@@ -4846,6 +4969,11 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
         }
      }
  
+  VEC_free (tree, heap, dr_chain);  
+  VEC_free (tree, heap, oprnds);  
+  if (result_chain)
+    VEC_free (tree, heap, result_chain);  
+
    return true;
  }
  
@@ -5015,6 +5143,7 @@ vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
        new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
        new_temp = make_ssa_name (vec_dest, new_stmt);
        GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+      mark_symbols_for_renaming (new_stmt);
        new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
        gcc_assert (!new_bb);
        msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
@@ -5350,6 +5479,8 @@ vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
             break;
          }
      }
+
+  VEC_free (tree, heap, result_chain);
    return true;
  }
  
@@ -5423,14 +5554,6 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
      return false;
  
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "value used after loop.");
-      return false;
-    }
-
    /* Is vectorizable load? */
    if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
      return false;
@@ -5758,8 +5881,6 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
                   bitpos = bitsize_zero_node;
                   vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, 
                                                             bitsize, bitpos);
-                 BIT_FIELD_REF_UNSIGNED (vec_inv) = 
-                                                TYPE_UNSIGNED (scalar_type);
                   vec_dest = 
                         vect_create_destination_var (scalar_dest, NULL_TREE);
                   new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
@@ -5797,6 +5918,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
           if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
             return false;         
           *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+          VEC_free (tree, heap, dr_chain);
           dr_chain = VEC_alloc (tree, heap, group_size);
         }
        else
@@ -5809,6 +5931,9 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
         }
      }
  
+  if (dr_chain)
+    VEC_free (tree, heap, dr_chain);
+
    return true;
  }
  
@@ -5858,7 +5983,7 @@ vectorizable_live_operation (tree stmt,
    for (i = 0; i < op_type; i++)
      {
        op = TREE_OPERAND (operation, i);
-      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+      if (op && !vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
          {
            if (vect_print_dump_info (REPORT_DETAILS))
              fprintf (vect_dump, "use not simple.");
@@ -6408,6 +6533,33 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
      }
  }
  
+/* Return the more conservative threshold between the
+   min_profitable_iters returned by the cost model and the user
+   specified threshold, if provided.  */
+
+static unsigned int
+conservative_cost_threshold (loop_vec_info loop_vinfo,
+                            int min_profitable_iters)
+{
+  unsigned int th;
+  int min_scalar_loop_bound;
+
+  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+                           * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
+
+  /* Use the cost model only if it is more conservative than user specified
+     threshold.  */
+  th = (unsigned) min_scalar_loop_bound;
+  if (min_profitable_iters
+      && (!min_scalar_loop_bound
+          || min_profitable_iters > min_scalar_loop_bound))
+    th = (unsigned) min_profitable_iters;
+
+  if (th && vect_print_dump_info (REPORT_COST))
+    fprintf (vect_dump, "Vectorization may not be profitable.");
+
+  return th;
+}
  
  /* Function vect_do_peeling_for_loop_bound
  
@@ -6428,8 +6580,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
    edge update_e;
    basic_block preheader;
    int loop_num;
-  unsigned int th;
-  int min_scalar_loop_bound;
+  bool check_profitability = false;
+  unsigned int th = 0;
    int min_profitable_iters;
  
    if (vect_print_dump_info (REPORT_DETAILS))
@@ -6447,28 +6599,24 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
  
    loop_num  = loop->num; 
  
-  /* Analyze cost to set threshhold for vectorized loop.  */
-  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
-  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
-                           * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
-
-  /* Use the cost model only if it is more conservative than user specified
-     threshold.  */
+  /* If cost model check not done during versioning and 
+     peeling for alignment.  */
+  if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+      && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
+      && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+    {
+      check_profitability = true;
  
-  th = (unsigned) min_scalar_loop_bound;
-  if (min_profitable_iters
-      && (!min_scalar_loop_bound
-          || min_profitable_iters > min_scalar_loop_bound))
-    th = (unsigned) min_profitable_iters;
+      /* Get profitability threshold for vectorized loop.  */
+      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
  
-  if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
-      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
-      && vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "vectorization may not be profitable.");
+      th = conservative_cost_threshold (loop_vinfo, 
+                                       min_profitable_iters);
+    }
  
    new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
                                              ratio_mult_vf_name, ni_name, false,
-                                            th);
+                                            th, check_profitability);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
  #ifdef ENABLE_CHECKING
@@ -6686,6 +6834,9 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
    tree niters_of_prolog_loop, ni_name;
    tree n_iters;
    struct loop *new_loop;
+  bool check_profitability = false;
+  unsigned int th = 0;
+  int min_profitable_iters;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
@@ -6695,10 +6846,26 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
    ni_name = vect_build_loop_niters (loop_vinfo);
    niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
    
+
+  /* If cost model check not done during versioning.  */
+  if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+      && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    {
+      check_profitability = true;
+
+      /* Get profitability threshold for vectorized loop.  */
+      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
+
+      th = conservative_cost_threshold (loop_vinfo, 
+                                       min_profitable_iters);
+    }
+
    /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
-  new_loop = 
-       slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), 
-                                      niters_of_prolog_loop, ni_name, true, 0); 
+  new_loop =
+    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
+                                  niters_of_prolog_loop, ni_name, true,
+                                  th, check_profitability);
+
    gcc_assert (new_loop);
  #ifdef ENABLE_CHECKING
    slpeel_verify_cfg_after_peeling (new_loop, loop);
@@ -6726,6 +6893,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
     checked at runtime.
  
     Input:
+   COND_EXPR  - input conditional expression.  New conditions will be chained
+                with logical AND operation.
     LOOP_VINFO - two fields of the loop information are used.
                  LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
                  LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
@@ -6742,8 +6911,9 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
          test can be done as a&(n-1) == 0.  For example, for 16
          byte vectors the test is a&0xf == 0.  */
  
-static tree
+static void
  vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
+                                   tree *cond_expr,
                                     tree *cond_expr_stmt_list)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -6759,6 +6929,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    tree or_tmp_name = NULL_TREE;
    tree and_tmp, and_tmp_name, and_stmt;
    tree ptrsize_zero;
+  tree part_cond_expr;
  
    /* Check that mask is one less than a power of 2, i.e., mask is
       all zeros followed by all ones.  */
@@ -6832,8 +7003,13 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    /* Make and_tmp the left operand of the conditional test against zero.
       if and_tmp has a nonzero bit then some address is unaligned.  */
    ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
-  return build2 (EQ_EXPR, boolean_type_node,
-                 and_tmp_name, ptrsize_zero);
+  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
+                               and_tmp_name, ptrsize_zero);
+  if (*cond_expr)
+    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+                             *cond_expr, part_cond_expr);
+  else
+    *cond_expr = part_cond_expr;
  }
  
  /* Function vect_vfa_segment_size.
@@ -6852,31 +7028,18 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
  static tree
  vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
  {
-  tree segment_length;
+  tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
+                                    DR_STEP (dr), vect_factor);
  
    if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
      {
-      tree vector_size =
-        build_int_cst (integer_type_node,
-          GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
-           (vinfo_for_stmt (DR_STMT (dr))))));
+      tree vector_size = TYPE_SIZE_UNIT
+                         (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
  
-      segment_length =
-       fold_convert (sizetype,
-         fold_build2 (PLUS_EXPR, integer_type_node,
-           fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
-                        vect_factor),
-           vector_size));
-    }
-  else
-    {
-      segment_length =
-       fold_convert (sizetype,
-         fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
-                      vect_factor));
+      segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
+                                   segment_length, vector_size);
      }
-
-    return segment_length;
+  return fold_convert (sizetype, segment_length);
  }
  
  /* Function vect_create_cond_for_alias_checks.
@@ -6887,7 +7050,7 @@ vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
  
     Input:
     COND_EXPR  - input conditional expression.  New conditions will be chained
-                with logical and operation.
+                with logical AND operation.
     LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
                 to be checked.
  
@@ -6895,6 +7058,8 @@ vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
     COND_EXPR - conditional expression.
     COND_EXPR_STMT_LIST - statements needed to construct the conditional
                           expression.
+
+
     The returned value is the conditional expression to be used in the if
     statement that controls which version of the loop gets executed at runtime.
  */
@@ -6928,26 +7093,47 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
  
    for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
      {
-      tree stmt_a = DR_STMT (DDR_A (ddr));
-      tree stmt_b = DR_STMT (DDR_B (ddr));
+      struct data_reference *dr_a, *dr_b;
+      tree dr_group_first_a, dr_group_first_b;
+      tree addr_base_a, addr_base_b;
+      tree segment_length_a, segment_length_b;
+      tree stmt_a, stmt_b;
+
+      dr_a = DDR_A (ddr);
+      stmt_a = DR_STMT (DDR_A (ddr));
+      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
+      if (dr_group_first_a)
+        {
+         stmt_a = dr_group_first_a;
+         dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
+       }
+
+      dr_b = DDR_B (ddr);
+      stmt_b = DR_STMT (DDR_B (ddr));
+      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
+      if (dr_group_first_b)
+        {
+         stmt_b = dr_group_first_b;
+         dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
+       }
  
-      tree addr_base_a =
+      addr_base_a =
          vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
                                               NULL_TREE, loop);
-      tree addr_base_b =
+      addr_base_b =
          vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
                                               NULL_TREE, loop);
  
-      tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
-      tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
+      segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
+      segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
  
        if (vect_print_dump_info (REPORT_DR_DETAILS))
         {
           fprintf (vect_dump,
                    "create runtime check for data references ");
-         print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
+         print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
           fprintf (vect_dump, " and ");
-         print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
+         print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
         }
  
  
@@ -6976,16 +7162,116 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
  
  }
  
+/* Function vect_loop_versioning.
+ 
+   If the loop has data references that may or may not be aligned or/and
+   has data reference relations whose independence was not proven then
+   two versions of the loop need to be generated, one which is vectorized
+   and one which isn't.  A test is then generated to control which of the
+   loops is executed.  The test checks for the alignment of all of the
+   data references that may or may not be aligned.  An additional
+   sequence of runtime tests is generated for each pairs of DDRs whose
+   independence was not proven.  The vectorized version of loop is 
+   executed only if both alias and alignment tests are passed.  
+  
+   The test generated to check which version of loop is executed
+   is modified to also check for profitability as indicated by the 
+   cost model initially.  */
+
+static void
+vect_loop_versioning (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *nloop;
+  tree cond_expr = NULL_TREE;
+  tree cond_expr_stmt_list = NULL_TREE;
+  basic_block condition_bb;
+  block_stmt_iterator cond_exp_bsi;
+  basic_block merge_bb;
+  basic_block new_exit_bb;
+  edge new_exit_e, e;
+  tree orig_phi, new_phi, arg;
+  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
+  tree gimplify_stmt_list;
+  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
+  int min_profitable_iters = 0;
+  unsigned int th;
+
+  /* Get profitability threshold for vectorized loop.  */
+  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
+
+  th = conservative_cost_threshold (loop_vinfo,
+                                   min_profitable_iters);
+
+  cond_expr =
+    build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, 
+           build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+
+  cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
+                                   false, NULL_TREE);
+
+  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+      vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
+                                        &cond_expr_stmt_list);
+
+  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, 
+                                      &cond_expr_stmt_list);
+
+  cond_expr =
+    fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
+  cond_expr =
+    force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
+                         NULL_TREE);
+  append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
+
+  initialize_original_copy_tables ();
+  nloop = loop_version (loop, cond_expr, &condition_bb,
+                       prob, prob, REG_BR_PROB_BASE - prob, true);
+  free_original_copy_tables();
+
+  /* Loop versioning violates an assumption we try to maintain during 
+     vectorization - that the loop exit block has a single predecessor.
+     After versioning, the exit block of both loop versions is the same
+     basic block (i.e. it has two predecessors). Just in order to simplify
+     following transformations in the vectorizer, we fix this situation
+     here by adding a new (empty) block on the exit-edge of the loop,
+     with the proper loop-exit phis to maintain loop-closed-form.  */
+  
+  merge_bb = single_exit (loop)->dest;
+  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
+  new_exit_bb = split_edge (single_exit (loop));
+  new_exit_e = single_exit (loop);
+  e = EDGE_SUCC (new_exit_bb, 0);
+
+  for (orig_phi = phi_nodes (merge_bb); orig_phi; 
+       orig_phi = PHI_CHAIN (orig_phi))
+    {
+      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+                                 new_exit_bb);
+      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+      add_phi_arg (new_phi, arg, new_exit_e);
+      SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+    } 
+
+  /* End loop-exit-fixes after versioning.  */
+
+  update_ssa (TODO_update_ssa);
+  if (cond_expr_stmt_list)
+    {
+      cond_exp_bsi = bsi_last (condition_bb);
+      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
+    }
+}
+
  /* Remove a group of stores (for SLP or interleaving), free their 
     stmt_vec_info.  */
  
  static void
  vect_remove_stores (tree first_stmt)
  {
-  stmt_ann_t ann;
    tree next = first_stmt;
    tree tmp;
-  stmt_vec_info next_stmt_info;
    block_stmt_iterator next_si;
  
    while (next)
@@ -6993,11 +7279,8 @@ vect_remove_stores (tree first_stmt)
        /* Free the attached stmt_vec_info and remove the stmt.  */
        next_si = bsi_for_stmt (next);
        bsi_remove (&next_si, true);
-      next_stmt_info = vinfo_for_stmt (next);
-      ann = stmt_ann (next);
-      tmp = DR_GROUP_NEXT_DR (next_stmt_info);
-      free (next_stmt_info);
-      set_stmt_info (ann, NULL);
+      tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+      free_stmt_vec_info (next);
        next = tmp;
      }
  }
@@ -7084,7 +7367,6 @@ vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
    return is_store;
  }
  
-
  /* Function vect_transform_loop.
  
     The analysis phase has determined that the loop is vectorizable.
@@ -7097,7 +7379,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
    int nbbs = loop->num_nodes;
-  block_stmt_iterator si, next_si;
+  block_stmt_iterator si;
    int i;
    tree ratio = NULL;
    int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -7108,81 +7390,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vec_transform_loop ===");
  
-  /* If the loop has data references that may or may not be aligned or/and
-     has data reference relations whose independence was not proven then
-     two versions of the loop need to be generated, one which is vectorized
-     and one which isn't.  A test is then generated to control which of the
-     loops is executed.  The test checks for the alignment of all of the
-     data references that may or may not be aligned.  An additional
-     sequence of runtime tests is generated for each pairs of DDRs whose
-     independence was not proven.  The vectorized version of loop is 
-     executed only if both alias and alignment tests are passed.  */
-
    if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
        || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
-    {
-      struct loop *nloop;
-      tree cond_expr = NULL_TREE;
-      tree cond_expr_stmt_list = NULL_TREE;
-      basic_block condition_bb;
-      block_stmt_iterator cond_exp_bsi;
-      basic_block merge_bb;
-      basic_block new_exit_bb;
-      edge new_exit_e, e;
-      tree orig_phi, new_phi, arg;
-      unsigned prob = 4 * REG_BR_PROB_BASE / 5;
-      tree gimplify_stmt_list;
-
-      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
-       cond_expr =
-         vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
-
-      if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
-       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
-                                                     &cond_expr_stmt_list);
-
-      cond_expr =
-        fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
-      cond_expr =
-        force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
-                              NULL_TREE);
-      append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
-
-      initialize_original_copy_tables ();
-      nloop = loop_version (loop, cond_expr, &condition_bb,
-                           prob, prob, REG_BR_PROB_BASE - prob, true);
-      free_original_copy_tables();
-
-      /** Loop versioning violates an assumption we try to maintain during 
-        vectorization - that the loop exit block has a single predecessor.
-        After versioning, the exit block of both loop versions is the same
-        basic block (i.e. it has two predecessors). Just in order to simplify
-        following transformations in the vectorizer, we fix this situation
-        here by adding a new (empty) block on the exit-edge of the loop,
-        with the proper loop-exit phis to maintain loop-closed-form.  **/
-      
-      merge_bb = single_exit (loop)->dest;
-      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
-      new_exit_bb = split_edge (single_exit (loop));
-      new_exit_e = single_exit (loop);
-      e = EDGE_SUCC (new_exit_bb, 0);
-
-      for (orig_phi = phi_nodes (merge_bb); orig_phi; 
-          orig_phi = PHI_CHAIN (orig_phi))
-       {
-          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
-                                    new_exit_bb);
-          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
-          add_phi_arg (new_phi, arg, new_exit_e);
-         SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
-       } 
-
-      /** end loop-exit-fixes after versioning  **/
-
-      update_ssa (TODO_update_ssa);
-      cond_exp_bsi = bsi_last (condition_bb);
-      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
-    }
+    vect_loop_versioning (loop_vinfo);
  
    /* CHECKME: we wouldn't need this if we called update_ssa once
       for all loops.  */
@@ -7334,37 +7544,19 @@ vect_transform_loop (loop_vec_info loop_vinfo)
           is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
            if (is_store)
              {
-             stmt_ann_t ann;
               if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
                 {
                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
                      interleaving chain was completed - free all the stores in
                      the chain.  */
-                 tree next = DR_GROUP_FIRST_DR (stmt_info);
-                 tree tmp;
-                 stmt_vec_info next_stmt_info;
-
-                 while (next)
-                   {
-                     next_si = bsi_for_stmt (next);
-                     next_stmt_info = vinfo_for_stmt (next);
-                     /* Free the attached stmt_vec_info and remove the stmt.  */
-                     ann = stmt_ann (next);
-                     tmp = DR_GROUP_NEXT_DR (next_stmt_info);
-                     free (next_stmt_info);
-                     set_stmt_info (ann, NULL);
-                     bsi_remove (&next_si, true);
-                     next = tmp;
-                   }
+                 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
                   bsi_remove (&si, true);
                   continue;
                 }
               else
                 {
                   /* Free the attached stmt_vec_info and remove the stmt.  */
-                 ann = stmt_ann (stmt);
-                 free (stmt_info);
-                 set_stmt_info (ann, NULL);
+                 free_stmt_vec_info (stmt);
                   bsi_remove (&si, true);
                   continue;
                 }