OSDN Git Service

2012-04-03 Richard Guenther <rguenther@suse.de>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-loop-manip.c
index ea1a4d6..b7db6c2 100644 (file)
@@ -1,6 +1,6 @@
 /* Vectorizer Specific Loop Manipulations
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
-   Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
+   Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
    and Ira Rosen <irar@il.ibm.com>
 
@@ -27,13 +27,13 @@ along with GCC; see the file COPYING3.  If not see
 #include "ggc.h"
 #include "tree.h"
 #include "basic-block.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
 #include "tree-flow.h"
 #include "tree-dump.h"
 #include "cfgloop.h"
 #include "cfglayout.h"
-#include "expr.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
 #include "langhooks.h"
@@ -113,6 +113,131 @@ rename_variables_in_loop (struct loop *loop)
   free (bbs);
 }
 
+typedef struct
+{
+  tree from, to;
+  basic_block bb;
+} adjust_info;
+
+DEF_VEC_O(adjust_info);
+DEF_VEC_ALLOC_O_STACK(adjust_info);
+#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
+
+/* A stack of values to be adjusted in debug stmts.  We have to
+   process them LIFO, so that the closest substitution applies.  If we
+   processed them FIFO, without the stack, we might substitute uses
+   with a PHI DEF that would soon become non-dominant, and when we got
+   to the suitable one, it wouldn't have anything to substitute any
+   more.  */
+static VEC(adjust_info, stack) *adjust_vec;
+
+/* Adjust any debug stmts that referenced AI->from values to use the
+   loop-closed AI->to, if the references are dominated by AI->bb and
+   not by the definition of AI->from.  */
+
+static void
+adjust_debug_stmts_now (adjust_info *ai)
+{
+  basic_block bbphi = ai->bb;
+  tree orig_def = ai->from;
+  tree new_def = ai->to;
+  imm_use_iterator imm_iter;
+  gimple stmt;
+  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
+
+  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
+
+  /* Adjust any debug stmts that held onto non-loop-closed
+     references.  */
+  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
+    {
+      use_operand_p use_p;
+      basic_block bbuse;
+
+      if (!is_gimple_debug (stmt))
+       continue;
+
+      gcc_assert (gimple_debug_bind_p (stmt));
+
+      bbuse = gimple_bb (stmt);
+
+      if ((bbuse == bbphi
+          || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
+         && !(bbuse == bbdef
+              || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
+       {
+         if (new_def)
+           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+             SET_USE (use_p, new_def);
+         else
+           {
+             gimple_debug_bind_reset_value (stmt);
+             update_stmt (stmt);
+           }
+       }
+    }
+}
+
+/* Adjust debug stmts as scheduled before.  */
+
+static void
+adjust_vec_debug_stmts (void)
+{
+  if (!MAY_HAVE_DEBUG_STMTS)
+    return;
+
+  gcc_assert (adjust_vec);
+
+  while (!VEC_empty (adjust_info, adjust_vec))
+    {
+      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
+      VEC_pop (adjust_info, adjust_vec);
+    }
+
+  VEC_free (adjust_info, stack, adjust_vec);
+}
+
+/* Adjust any debug stmts that referenced FROM values to use the
+   loop-closed TO, if the references are dominated by BB and not by
+   the definition of FROM.  If adjust_vec is non-NULL, adjustments
+   will be postponed until adjust_vec_debug_stmts is called.  */
+
+static void
+adjust_debug_stmts (tree from, tree to, basic_block bb)
+{
+  adjust_info ai;
+
+  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
+      && SSA_NAME_VAR (from) != gimple_vop (cfun))
+    {
+      ai.from = from;
+      ai.to = to;
+      ai.bb = bb;
+
+      if (adjust_vec)
+       VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
+      else
+       adjust_debug_stmts_now (&ai);
+    }
+}
+
+/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
+   to adjust any debug stmts that referenced the old phi arg,
+   presumably non-loop-closed references left over from other
+   transformations.  */
+
+static void
+adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
+{
+  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
+
+  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
+
+  if (MAY_HAVE_DEBUG_STMTS)
+    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
+                       gimple_bb (update_phi));
+}
+
 
 /* Update the PHI nodes of NEW_LOOP.
 
@@ -195,13 +320,15 @@ slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
       /* An ordinary ssa name defined in the loop.  */
       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
 
+      /* Drop any debug references outside the loop, if they would
+        become ill-formed SSA.  */
+      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
+
       /* step 3 (case 1).  */
       if (!after)
         {
           gcc_assert (new_loop_exit_e == orig_entry_e);
-          SET_PHI_ARG_DEF (phi_orig,
-                           new_loop_exit_e->dest_idx,
-                           new_ssa_name);
+         adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
         }
     }
 }
@@ -386,7 +513,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
        gsi_next (&gsi_orig), gsi_next (&gsi_update))
     {
-      source_location loop_locus, guard_locus;;
+      source_location loop_locus, guard_locus;
       orig_phi = gsi_stmt (gsi_orig);
       update_phi = gsi_stmt (gsi_update);
 
@@ -413,7 +540,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
       /* 1.3. Update phi in successor block.  */
       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
-      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
       update_phi2 = new_phi;
 
 
@@ -431,7 +558,8 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 
       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
-      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
+                                 PHI_RESULT (new_phi));
 
       /* 2.4. Record the newly created name with set_current_def.
          We want to find a name such that
@@ -560,7 +688,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 
       /* 1.3. Update phi in successor block.  */
       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
-      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
       update_phi2 = new_phi;
 
 
@@ -575,7 +703,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 
       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
-      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
+                                 PHI_RESULT (new_phi));
 
 
       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
@@ -612,7 +741,8 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
       /* 3.4. Update phi in successor of GUARD_BB:  */
       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
                                                                 == guard_arg);
-      SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
+                                 PHI_RESULT (new_phi));
     }
 }
 
@@ -907,7 +1037,7 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 
 static void
 set_prologue_iterations (basic_block bb_before_first_loop,
-                        tree first_niters,
+                        tree *first_niters,
                         struct loop *loop,
                         unsigned int th)
 {
@@ -970,12 +1100,11 @@ set_prologue_iterations (basic_block bb_before_first_loop,
   newphi = create_phi_node (var, bb_before_first_loop);
   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
               UNKNOWN_LOCATION);
-  add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
+  add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
 
-  first_niters = PHI_RESULT (newphi);
+  *first_niters = PHI_RESULT (newphi);
 }
 
-
 /* Function slpeel_tree_peel_loop_to_edge.
 
    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
@@ -1029,7 +1158,7 @@ set_prologue_iterations (basic_block bb_before_first_loop,
 
 static struct loop*
 slpeel_tree_peel_loop_to_edge (struct loop *loop,
-                              edge e, tree first_niters,
+                              edge e, tree *first_niters,
                               tree niters, bool update_first_loop_count,
                               unsigned int th, bool check_profitability,
                               tree cond_expr, gimple_seq cond_expr_stmt_list)
@@ -1042,6 +1171,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   basic_block bb_before_first_loop;
   basic_block bb_between_loops;
   basic_block new_exit_bb;
+  gimple_stmt_iterator gsi;
   edge exit_e = single_exit (loop);
   LOC loop_loc;
   tree cost_pre_condition = NULL_TREE;
@@ -1055,6 +1185,40 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
    the function tree_duplicate_bb is called.  */
   gimple_register_cfg_hooks ();
 
+  /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
+     in the exit bb and rename all the uses after the loop.  This simplifies
+     the *guard[12] routines, which assume loop closed SSA form for all PHIs
+     (but normally loop closed SSA form doesn't require virtual PHIs to be
+     in the same form).  Doing this early simplifies the checking what
+     uses should be renamed.  */
+  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+    if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+      {
+       gimple phi = gsi_stmt (gsi);
+       for (gsi = gsi_start_phis (exit_e->dest);
+            !gsi_end_p (gsi); gsi_next (&gsi))
+         if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
+           break;
+       if (gsi_end_p (gsi))
+         {
+           gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                             exit_e->dest);
+           tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
+           imm_use_iterator imm_iter;
+           gimple stmt;
+           tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)),
+                                         new_phi);
+           use_operand_p use_p;
+
+           add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
+           gimple_phi_set_result (new_phi, new_vop);
+           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
+             if (stmt != new_phi && gimple_bb (stmt) != loop->header)
+               FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+                 SET_USE (use_p, new_vop);
+         }
+       break;
+      }
 
   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
         Resulting CFG would be:
@@ -1083,6 +1247,12 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
       return NULL;
     }
 
+  if (MAY_HAVE_DEBUG_STMTS)
+    {
+      gcc_assert (!adjust_vec);
+      adjust_vec = VEC_alloc (adjust_info, stack, 32);
+    }
+
   if (e == exit_e)
     {
       /* NEW_LOOP was placed after LOOP.  */
@@ -1193,8 +1363,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   if (!update_first_loop_count)
     {
       pre_condition =
-       fold_build2 (LE_EXPR, boolean_type_node, first_niters,
-                    build_int_cst (TREE_TYPE (first_niters), 0));
+       fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+                    build_int_cst (TREE_TYPE (*first_niters), 0));
       if (check_profitability)
        {
          tree scalar_loop_iters
@@ -1225,8 +1395,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
                                 loop, th);
 
       pre_condition =
-       fold_build2 (LE_EXPR, boolean_type_node, first_niters,
-                    build_int_cst (TREE_TYPE (first_niters), 0));
+       fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
+                    build_int_cst (TREE_TYPE (*first_niters), 0));
     }
 
   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
@@ -1267,7 +1437,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   bb_after_second_loop = split_edge (single_exit (second_loop));
 
   pre_condition =
-       fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
+       fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
                                   bb_after_second_loop, bb_before_first_loop);
   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
@@ -1276,11 +1446,13 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
    */
   if (update_first_loop_count)
-    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
+    slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
 
   BITMAP_FREE (definitions);
   delete_update_ssa ();
 
+  adjust_vec_debug_stmts ();
+
   return new_loop;
 }
 
@@ -1378,7 +1550,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   edge pe;
   basic_block new_bb;
   gimple_seq stmts;
-  tree ni_name;
+  tree ni_name, ni_minus_gap_name;
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
@@ -1395,9 +1567,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
 
+  /* If epilogue loop is required because of data accesses with gaps, we
+     subtract one iteration from the total number of iterations here for
+     correct calculation of RATIO.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+    {
+      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+                                      ni_name,
+                                      build_one_cst (TREE_TYPE (ni_name)));
+      if (!is_gimple_val (ni_minus_gap_name))
+       {
+         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+          add_referenced_var (var);
+
+          stmts = NULL;
+          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+                                                   true, var);
+          if (cond_expr_stmt_list)
+            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
+          else
+            {
+              pe = loop_preheader_edge (loop);
+              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+              gcc_assert (!new_bb);
+            }
+        }
+    }
+  else
+    ni_minus_gap_name = ni_name;
+
   /* Create: ratio = ni >> log2(vf) */
 
-  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
+  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+                           ni_minus_gap_name, log_vf);
   if (!is_gimple_val (ratio_name))
     {
       var = create_tmp_var (TREE_TYPE (ni), "bnd");
@@ -1595,13 +1797,12 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
        gsi_next (&gsi), gsi_next (&gsi1))
     {
-      tree access_fn = NULL;
-      tree evolution_part;
       tree init_expr;
       tree step_expr, off;
       tree type;
       tree var, ni, ni_name;
       gimple_stmt_iterator last_gsi;
+      stmt_vec_info stmt_info;
 
       phi = gsi_stmt (gsi);
       phi1 = gsi_stmt (gsi1);
@@ -1620,47 +1821,34 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
        }
 
       /* Skip reduction phis.  */
-      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+      stmt_info = vinfo_for_stmt (phi);
+      if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "reduc phi. skip.");
           continue;
         }
 
-      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
-      gcc_assert (access_fn);
-      /* We can end up with an access_fn like
-           (short int) {(short unsigned int) i_49, +, 1}_1
-        for further analysis we need to strip the outer cast but we
-        need to preserve the original type.  */
-      type = TREE_TYPE (access_fn);
-      STRIP_NOPS (access_fn);
-      evolution_part =
-        unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
-      gcc_assert (evolution_part != NULL_TREE);
+      type = TREE_TYPE (gimple_phi_result (phi));
+      step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+      step_expr = unshare_expr (step_expr);
 
       /* FORNOW: We do not support IVs whose evolution function is a polynomial
          of degree >= 2 or exponential.  */
-      gcc_assert (!tree_is_chrec (evolution_part));
+      gcc_assert (!tree_is_chrec (step_expr));
 
-      step_expr = evolution_part;
-      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
-                                                              loop->num));
-      init_expr = fold_convert (type, init_expr);
+      init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 
       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
                         fold_convert (TREE_TYPE (step_expr), niters),
                         step_expr);
-      if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
-       ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
-                         init_expr,
-                         fold_convert (sizetype, off));
+      if (POINTER_TYPE_P (type))
+       ni = fold_build_pointer_plus (init_expr, off);
       else
-       ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
-                         init_expr,
-                         fold_convert (TREE_TYPE (init_expr), off));
+       ni = fold_build2 (PLUS_EXPR, type,
+                         init_expr, fold_convert (type, off));
 
-      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
+      var = create_tmp_var (type, "tmp");
       add_referenced_var (var);
 
       last_gsi = gsi_last_bb (exit_bb);
@@ -1668,7 +1856,7 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
                                          true, GSI_SAME_STMT);
 
       /* Fix phi expressions in the successor bb.  */
-      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
+      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
     }
 }
 
@@ -1760,7 +1948,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
     }
 
   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
-                                            ratio_mult_vf_name, ni_name, false,
+                                            &ratio_mult_vf_name, ni_name, false,
                                             th, check_profitability,
                                            cond_expr, cond_expr_stmt_list);
   gcc_assert (new_loop);
@@ -1837,31 +2025,27 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
   tree niters_type = TREE_TYPE (loop_niters);
-  int step = 1;
-  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 
-  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
-    step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-
   pe = loop_preheader_edge (loop);
 
   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
     {
-      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
-      int elem_misalign = byte_misalign / element_size;
+      int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
 
       if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
+        fprintf (vect_dump, "known peeling = %d.", npeel);
 
-      iters = build_int_cst (niters_type,
-                     (((nelements - elem_misalign) & (nelements - 1)) / step));
+      iters = build_int_cst (niters_type, npeel);
     }
   else
     {
       gimple_seq new_stmts = NULL;
+      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+      tree offset = negative
+         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
-                                               &new_stmts, NULL_TREE, loop);
+                                               &new_stmts, offset, loop);
       tree ptr_type = TREE_TYPE (start_addr);
       tree size = TYPE_SIZE (ptr_type);
       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
@@ -1878,14 +2062,18 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
 
       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
       byte_misalign =
-        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
+        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), 
+                     vectype_size_minus_1);
 
       /* Create:  elem_misalign = byte_misalign / element_size  */
       elem_misalign =
         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 
       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
-      iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
+      if (negative)
+       iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
+      else
+       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
       iters = fold_convert (niters_type, iters);
     }
@@ -1958,7 +2146,7 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
 
-  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
     vect_update_init_of_dr (dr, niters);
 }
 
@@ -1977,6 +2165,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree niters_of_prolog_loop, ni_name;
   tree n_iters;
+  tree wide_prolog_niters;
   struct loop *new_loop;
   unsigned int th = 0;
   int min_profitable_iters;
@@ -1987,8 +2176,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   initialize_original_copy_tables ();
 
   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
-  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
-
+  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
+                                                          ni_name);
 
   /* Get profitability threshold for vectorized loop.  */
   min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
@@ -1998,7 +2187,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
   new_loop =
     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
-                                  niters_of_prolog_loop, ni_name, true,
+                                  &niters_of_prolog_loop, ni_name, true,
                                   th, true, NULL_TREE, NULL);
 
   gcc_assert (new_loop);
@@ -2011,8 +2200,27 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
 
+  if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
+    wide_prolog_niters = niters_of_prolog_loop;
+  else
+    {
+      gimple_seq seq = NULL;
+      edge pe = loop_preheader_edge (loop);
+      tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
+      tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
+      add_referenced_var (var);
+      wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
+                                                 var);
+      if (seq)
+       {
+         /* Insert stmt on loop preheader edge.  */
+          basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+          gcc_assert (!new_bb);
+        }
+    }
+
   /* Update the init conditions of the access functions of all data refs.  */
-  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
+  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
 
   /* After peeling we have to reset scalar evolution analyzer.  */
   scev_reset ();
@@ -2080,18 +2288,24 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
      of the first vector of the i'th data reference. */
 
-  for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
+  FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
     {
       gimple_seq new_stmt_list = NULL;
       tree addr_base;
       tree addr_tmp, addr_tmp_name;
       tree or_tmp, new_or_tmp_name;
       gimple addr_stmt, or_stmt;
+      stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
+      tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+      bool negative = tree_int_cst_compare
+       (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
+      tree offset = negative
+       ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
 
       /* create: addr_tmp = (int)(address_of_first_vector) */
       addr_base =
        vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
-                                             NULL_TREE, loop);
+                                             offset, loop);
       if (new_stmt_list != NULL)
        gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
 
@@ -2158,26 +2372,32 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
 
    Input:
      DR: The data reference.
-     VECT_FACTOR: vectorization factor.
+     LENGTH_FACTOR: segment length to consider.
 
    Return an expression whose value is the size of segment which will be
    accessed by DR.  */
 
 static tree
-vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
+vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
 {
-  tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
-                                    DR_STEP (dr), vect_factor);
+  tree segment_length;
+
+  if (!compare_tree_int (DR_STEP (dr), 0))
+    segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
+  else
+    segment_length = size_binop (MULT_EXPR,
+                                 fold_convert (sizetype, DR_STEP (dr)),
+                                 fold_convert (sizetype, length_factor));
 
-  if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
+  if (vect_supportable_dr_alignment (dr, false)
+        == dr_explicit_realign_optimized)
     {
       tree vector_size = TYPE_SIZE_UNIT
                          (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
 
-      segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
-                                   segment_length, vector_size);
+      segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
     }
-  return fold_convert (sizetype, segment_length);
+  return segment_length;
 }
 
 
@@ -2211,36 +2431,37 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   VEC (ddr_p, heap) * may_alias_ddrs =
     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
-  tree vect_factor =
-    build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+  int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
 
   ddr_p ddr;
   unsigned int i;
-  tree part_cond_expr;
+  tree part_cond_expr, length_factor;
 
   /* Create expression
-     ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
-     || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
+     ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
+     || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
      &&
      ...
      &&
-     ((store_ptr_n + store_segment_length_n) < load_ptr_n)
-     || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
+     ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
+     || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
 
   if (VEC_empty (ddr_p, may_alias_ddrs))
     return;
 
-  for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
+  FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
     {
       struct data_reference *dr_a, *dr_b;
       gimple dr_group_first_a, dr_group_first_b;
       tree addr_base_a, addr_base_b;
       tree segment_length_a, segment_length_b;
       gimple stmt_a, stmt_b;
+      tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
 
       dr_a = DDR_A (ddr);
       stmt_a = DR_STMT (DDR_A (ddr));
-      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
+      dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
       if (dr_group_first_a)
         {
          stmt_a = dr_group_first_a;
@@ -2249,7 +2470,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
 
       dr_b = DDR_B (ddr);
       stmt_b = DR_STMT (DDR_B (ddr));
-      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
+      dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
       if (dr_group_first_b)
         {
          stmt_b = dr_group_first_b;
@@ -2263,8 +2484,12 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
                                              NULL_TREE, loop);
 
-      segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
-      segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
+      if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
+       length_factor = scalar_loop_iters;
+      else
+       length_factor = size_int (vect_factor);
+      segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
+      segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
 
       if (vect_print_dump_info (REPORT_DR_DETAILS))
        {
@@ -2275,19 +2500,20 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
          print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
        }
 
+      seg_a_min = addr_base_a;
+      seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
+      if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
+       seg_a_min = seg_a_max, seg_a_max = addr_base_a;
+
+      seg_b_min = addr_base_b;
+      seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
+      if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
+       seg_b_min = seg_b_max, seg_b_max = addr_base_b;
 
       part_cond_expr =
        fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-         fold_build2 (LT_EXPR, boolean_type_node,
-           fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
-             addr_base_a,
-             segment_length_a),
-           addr_base_b),
-         fold_build2 (LT_EXPR, boolean_type_node,
-           fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
-             addr_base_b,
-             segment_length_b),
-           addr_base_a));
+         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
+         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
 
       if (*cond_expr)
        *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
@@ -2327,7 +2553,6 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
                      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  struct loop *nloop;
   basic_block condition_bb;
   gimple_stmt_iterator gsi, cond_exp_gsi;
   basic_block merge_bb;
@@ -2374,8 +2599,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
     return;
 
   initialize_original_copy_tables ();
-  nloop = loop_version (loop, *cond_expr, &condition_bb,
-                       prob, prob, REG_BR_PROB_BASE - prob, true);
+  loop_version (loop, *cond_expr, &condition_bb,
+               prob, prob, REG_BR_PROB_BASE - prob, true);
   free_original_copy_tables();
 
   /* Loop versioning violates an assumption we try to maintain during
@@ -2400,7 +2625,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
       add_phi_arg (new_phi, arg, new_exit_e,
                   gimple_phi_arg_location_from_edge (orig_phi, e));
-      SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
     }
 
   /* End loop-exit-fixes after versioning.  */