* gcc.c-torture/compile/pr31034.c: New test.

[pf3gnuchains/gcc-fork.git] / gcc / tree-vectorizer.c
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c

index c6002c4..b0aedf7 100644 (file)
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1,5 +1,5 @@
  /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
     Contributed by Dorit Naishlos <dorit@il.ibm.com>
  
  This file is part of GCC.
@@ -16,8 +16,8 @@ for more details.
  
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.  */
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
  
  /* Loop Vectorization Pass.
  
@@ -91,7 +91,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  
     To vectorize stmt S2, the vectorizer first finds the stmt that defines
     the operand 'b' (S1), and gets the relevant vector def 'vb' from the
-   vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
+   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
     resulting sequence would be:
  
     VS1: vb = px[i];
@@ -136,7 +136,9 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "cfgloop.h"
  #include "cfglayout.h"
  #include "expr.h"
+#include "recog.h"
  #include "optabs.h"
+#include "params.h"
  #include "toplev.h"
  #include "tree-chrec.h"
  #include "tree-data-ref.h"
@@ -148,8 +150,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  /*************************************************************************
    Simple Loop Peeling Utilities
   *************************************************************************/
-static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg 
-  (struct loop *, struct loops *, edge);
  static void slpeel_update_phis_for_duplicate_loop 
    (struct loop *, struct loop *, bool after);
  static void slpeel_update_phi_nodes_for_guard1 
@@ -174,8 +174,11 @@ FILE *vect_dump;
     to mark that it's uninitialized.  */
  enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
  
-/* Number of loops, at the beginning of vectorization.  */
-unsigned int vect_loops_num;
+/* Loop location.  */
+static LOC vect_loop_location;
+
+/* Bitmap of virtual variables to be renamed.  */
+bitmap vect_memsyms_to_rename;
  \f
  /*************************************************************************
    Simple Loop Peeling Utilities
@@ -223,8 +226,7 @@ rename_variables_in_bb (basic_block bb)
    for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
      {
        stmt = bsi_stmt (bsi);
-      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, 
-                                (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
         rename_use_op (use_p);
      }
  
@@ -271,7 +273,7 @@ slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
    tree def;
    edge orig_loop_latch = loop_latch_edge (orig_loop);
    edge orig_entry_e = loop_preheader_edge (orig_loop);
-  edge new_loop_exit_e = new_loop->single_exit;
+  edge new_loop_exit_e = single_exit (new_loop);
    edge new_loop_entry_e = loop_preheader_edge (new_loop);
    edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
  
@@ -508,10 +510,10 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
    basic_block orig_bb = loop->header;
    edge new_exit_e;
    tree current_new_name;
+  tree name;
  
    /* Create new bb between loop and new_merge_bb.  */
-  *new_exit_bb = split_edge (loop->single_exit);
-  add_bb_to_loop (*new_exit_bb, loop->outer);
+  *new_exit_bb = split_edge (single_exit (loop));
  
    new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
  
@@ -519,6 +521,15 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
         orig_phi && update_phi;
         orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
      {
+      /* Virtual phi; Mark it for renaming. We actually want to call
+        mar_sym_for_renaming, but since all ssa renaming datastructures
+        are going to be freed before we get to call ssa_upate, we just
+        record this name for now in a bitmap, and will mark it for
+        renaming later.  */
+      name = PHI_RESULT (orig_phi);
+      if (!is_gimple_reg (SSA_NAME_VAR (name)))
+        bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
+
        /** 1. Handle new-merge-point phis  **/
  
        /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
@@ -542,12 +553,15 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
  
        /** 2. Handle loop-closed-ssa-form phis  **/
  
+      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
+       continue;
+
        /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
        new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
                                   *new_exit_bb);
  
        /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
-      add_phi_arg (new_phi, loop_arg, loop->single_exit);
+      add_phi_arg (new_phi, loop_arg, single_exit (loop));
  
        /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
        gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
@@ -575,9 +589,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
           if (!current_new_name)
             continue;
          }
-#ifdef ENABLE_CHECKING
        gcc_assert (get_current_def (current_new_name) == NULL_TREE);
-#endif
  
        set_current_def (current_new_name, PHI_RESULT (new_phi));
        bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
@@ -629,8 +641,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
    tree arg;
  
    /* Create new bb between loop and new_merge_bb.  */
-  *new_exit_bb = split_edge (loop->single_exit);
-  add_bb_to_loop (*new_exit_bb, loop->outer);
+  *new_exit_bb = split_edge (single_exit (loop));
  
    new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
  
@@ -639,6 +650,10 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
      {
        orig_phi = update_phi;
        orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+      /* This loop-closed-phi actually doesn't represent a use
+         out of the loop - the phi arg is a constant.  */ 
+      if (TREE_CODE (orig_def) != SSA_NAME)
+        continue;
        orig_def_new_name = get_current_def (orig_def);
        arg = NULL_TREE;
  
@@ -691,7 +706,7 @@ slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
                                   *new_exit_bb);
  
        /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
-      add_phi_arg (new_phi, loop_arg, loop->single_exit);
+      add_phi_arg (new_phi, loop_arg, single_exit (loop));
  
        /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
        gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
@@ -748,12 +763,12 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
  {
    tree indx_before_incr, indx_after_incr, cond_stmt, cond;
    tree orig_cond;
-  edge exit_edge = loop->single_exit;
+  edge exit_edge = single_exit (loop);
    block_stmt_iterator loop_cond_bsi;
    block_stmt_iterator incr_bsi;
    bool insert_after;
    tree begin_label = tree_block_label (loop->latch);
-  tree exit_label = tree_block_label (loop->single_exit->dest);
+  tree exit_label = tree_block_label (single_exit (loop)->dest);
    tree init = build_int_cst (TREE_TYPE (niters), 0);
    tree step = build_int_cst (TREE_TYPE (niters), 1);
    tree then_label;
@@ -761,9 +776,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
    LOC loop_loc;
  
    orig_cond = get_loop_exit_condition (loop);
-#ifdef ENABLE_CHECKING
    gcc_assert (orig_cond);
-#endif
    loop_cond_bsi = bsi_for_stmt (orig_cond);
  
    standard_iv_increment_position (loop, &incr_bsi, &insert_after);
@@ -788,7 +801,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
    bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
  
    /* Remove old loop exit test:  */
-  bsi_remove (&loop_cond_bsi);
+  bsi_remove (&loop_cond_bsi, true);
  
    loop_loc = find_loop_location (loop);
    if (dump_file && (dump_flags & TDF_DETAILS))
@@ -807,8 +820,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
     on E which is either the entry or exit of LOOP.  */
  
  static struct loop *
-slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, 
-                                       edge e)
+slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
  {
    struct loop *new_loop;
    basic_block *new_bbs, *bbs;
@@ -816,8 +828,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
    bool was_imm_dom;
    basic_block exit_dest; 
    tree phi, phi_arg;
+  edge exit, new_exit;
  
-  at_exit = (e == loop->single_exit); 
+  at_exit = (e == single_exit (loop)); 
    if (!at_exit && e != loop_preheader_edge (loop))
      return NULL;
  
@@ -831,28 +844,30 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
      }
  
    /* Generate new loop structure.  */
-  new_loop = duplicate_loop (loops, loop, loop->outer);
+  new_loop = duplicate_loop (loop, loop->outer);
    if (!new_loop)
      {
        free (bbs);
        return NULL;
      }
  
-  exit_dest = loop->single_exit->dest;
+  exit_dest = single_exit (loop)->dest;
    was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, 
                                           exit_dest) == loop->header ? 
                  true : false);
  
-  new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
+  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
  
+  exit = single_exit (loop);
    copy_bbs (bbs, loop->num_nodes, new_bbs,
-           &loop->single_exit, 1, &new_loop->single_exit, NULL);
+           &exit, 1, &new_exit, NULL,
+           e->src);
  
    /* Duplicating phi args at exit bbs as coming 
       also from exit of duplicated loop.  */
    for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
      {
-      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
+      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
        if (phi_arg)
         {
           edge new_loop_exit_edge;
@@ -952,7 +967,7 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
  bool
  slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
  {
-  edge exit_e = loop->single_exit;
+  edge exit_e = single_exit (loop);
    edge entry_e = loop_preheader_edge (loop);
    tree orig_cond = get_loop_exit_condition (loop);
    block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
@@ -966,7 +981,7 @@ slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
        || !loop->outer
        || loop->num_nodes != 2
        || !empty_block_p (loop->latch)
-      || !loop->single_exit
+      || !single_exit (loop)
        /* Verify that new loop exit condition can be trivially modified.  */
        || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
        || (e != exit_e && e != entry_e))
@@ -980,7 +995,7 @@ void
  slpeel_verify_cfg_after_peeling (struct loop *first_loop,
                                   struct loop *second_loop)
  {
-  basic_block loop1_exit_bb = first_loop->single_exit->dest;
+  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
    basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
    basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
  
@@ -1047,9 +1062,10 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
  */
  
  struct loop*
-slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, 
+slpeel_tree_peel_loop_to_edge (struct loop *loop, 
                                edge e, tree first_niters, 
-                              tree niters, bool update_first_loop_count)
+                              tree niters, bool update_first_loop_count,
+                              unsigned int th)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
@@ -1059,7 +1075,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
    basic_block bb_before_first_loop;
    basic_block bb_between_loops;
    basic_block new_exit_bb;
-  edge exit_e = loop->single_exit;
+  edge exit_e = single_exit (loop);
    LOC loop_loc;
    
    if (!slpeel_can_duplicate_loop_p (loop, e))
@@ -1086,7 +1102,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
          orig_exit_bb:
     */
    
-  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
+  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
      {
        loop_loc = find_loop_location (loop);
        if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1138,12 +1154,12 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
     */
  
    bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
-  add_bb_to_loop (bb_before_first_loop, first_loop->outer);
-  bb_before_second_loop = split_edge (first_loop->single_exit);
-  add_bb_to_loop (bb_before_second_loop, first_loop->outer);
+  bb_before_second_loop = split_edge (single_exit (first_loop));
  
    pre_condition =
-    fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
+    fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+       build_int_cst (TREE_TYPE (first_niters), th));
+
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                    bb_before_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
@@ -1178,11 +1194,10 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
     */
  
    bb_between_loops = new_exit_bb;
-  bb_after_second_loop = split_edge (second_loop->single_exit);
-  add_bb_to_loop (bb_after_second_loop, second_loop->outer);
+  bb_after_second_loop = split_edge (single_exit (second_loop));
  
    pre_condition = 
-       fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
+       fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
    skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
                                    bb_after_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
@@ -1218,7 +1233,7 @@ find_loop_location (struct loop *loop)
  
    node = get_loop_exit_condition (loop);
  
-  if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
+  if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)
        && EXPR_FILENAME (node) && EXPR_LINENO (node))
      return EXPR_LOC (node);
  
@@ -1233,7 +1248,7 @@ find_loop_location (struct loop *loop)
    for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
      {
        node = bsi_stmt (si);
-      if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
+      if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node))
          return EXPR_LOC (node);
      }
  
@@ -1303,18 +1318,21 @@ vect_set_dump_settings (void)
     For vectorization debug dumps.  */
  
  bool
-vect_print_dump_info (enum verbosity_levels vl, LOC loc)
+vect_print_dump_info (enum verbosity_levels vl)
  {
    if (vl > vect_verbosity_level)
      return false;
  
-  if (loc == UNKNOWN_LOC)
+  if (!current_function_decl || !vect_dump)
+    return false;
+
+  if (vect_loop_location == UNKNOWN_LOC)
      fprintf (vect_dump, "\n%s:%d: note: ",
-                DECL_SOURCE_FILE (current_function_decl),
-                DECL_SOURCE_LINE (current_function_decl));
+            DECL_SOURCE_FILE (current_function_decl),
+            DECL_SOURCE_LINE (current_function_decl));
    else
-    fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
-
+    fprintf (vect_dump, "\n%s:%d: note: ", 
+            LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
  
    return true;
  }
@@ -1337,23 +1355,25 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
    STMT_VINFO_TYPE (res) = undef_vec_info_type;
    STMT_VINFO_STMT (res) = stmt;
    STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
-  STMT_VINFO_RELEVANT_P (res) = 0;
-  STMT_VINFO_LIVE_P (res) = 0;
+  STMT_VINFO_RELEVANT (res) = 0;
+  STMT_VINFO_LIVE_P (res) = false;
    STMT_VINFO_VECTYPE (res) = NULL;
    STMT_VINFO_VEC_STMT (res) = NULL;
+  STMT_VINFO_IN_PATTERN_P (res) = false;
+  STMT_VINFO_RELATED_STMT (res) = NULL;
    STMT_VINFO_DATA_REF (res) = NULL;
    if (TREE_CODE (stmt) == PHI_NODE)
      STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
    else
      STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
-  STMT_VINFO_MEMTAG (res) = NULL;
-  STMT_VINFO_PTR_INFO (res) = NULL;
-  STMT_VINFO_SUBVARS (res) = NULL;
-  STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
-  STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
-  STMT_VINFO_VECT_STEP (res) = NULL_TREE;
-  STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
-  STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
+  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
+  DR_GROUP_FIRST_DR (res) = NULL_TREE;
+  DR_GROUP_NEXT_DR (res) = NULL_TREE;
+  DR_GROUP_SIZE (res) = 0;
+  DR_GROUP_STORE_COUNT (res) = 0;
+  DR_GROUP_GAP (res) = 0;
+  DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
+  DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
  
    return res;
  }
@@ -1384,7 +1404,7 @@ new_loop_vec_info (struct loop *loop)
  
        for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
          {
-          tree_ann_t ann = get_tree_ann (phi);
+          stmt_ann_t ann = get_stmt_ann (phi);
            set_stmt_info (ann, new_stmt_vec_info (phi, res));
          }
  
@@ -1394,7 +1414,7 @@ new_loop_vec_info (struct loop *loop)
           stmt_ann_t ann;
  
           ann = stmt_ann (stmt);
-         set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
+         set_stmt_info (ann, new_stmt_vec_info (stmt, res));
         }
      }
  
@@ -1405,12 +1425,11 @@ new_loop_vec_info (struct loop *loop)
    LOOP_VINFO_VECTORIZABLE_P (res) = 0;
    LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
    LOOP_VINFO_VECT_FACTOR (res) = 0;
-  VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
-                          "loop_write_datarefs");
-  VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
-                          "loop_read_datarefs");
+  LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
+  LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
    LOOP_VINFO_UNALIGNED_DR (res) = NULL;
-  LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
+  LOOP_VINFO_MAY_MISALIGN_STMTS (res)
+    = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
  
    return res;
  }
@@ -1446,14 +1465,14 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
  
        for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
          {
-          tree_ann_t ann = get_tree_ann (phi);
+          stmt_ann_t ann = stmt_ann (phi);
  
            stmt_info = vinfo_for_stmt (phi);
            free (stmt_info);
            set_stmt_info (ann, NULL);
          }
  
-      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+      for (si = bsi_start (bb); !bsi_end_p (si); )
         {
           tree stmt = bsi_stmt (si);
           stmt_ann_t ann = stmt_ann (stmt);
@@ -1461,47 +1480,40 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
  
           if (stmt_info)
             {
+             /* Check if this is a "pattern stmt" (introduced by the 
+                vectorizer during the pattern recognition pass).  */
+             bool remove_stmt_p = false;
+             tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             if (orig_stmt)
+               {
+                 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
+                 if (orig_stmt_info
+                     && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
+                   remove_stmt_p = true; 
+               }
+                       
+             /* Free stmt_vec_info.  */
               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
               free (stmt_info);
-             set_stmt_info ((tree_ann_t)ann, NULL);
+             set_stmt_info (ann, NULL);
+
+             /* Remove dead "pattern stmts".  */
+             if (remove_stmt_p)
+               bsi_remove (&si, true);
             }
+         bsi_next (&si);
         }
      }
  
    free (LOOP_VINFO_BBS (loop_vinfo));
-  varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
-  varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
+  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+  free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+  VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
  
    free (loop_vinfo);
  }
  
  
-/* Function vect_strip_conversions
-
-   Strip conversions that don't narrow the mode.  */
-
-tree 
-vect_strip_conversion (tree expr)
-{
-  tree to, ti, oprnd0;
-  
-  while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
-    {
-      to = TREE_TYPE (expr);
-      oprnd0 = TREE_OPERAND (expr, 0);
-      ti = TREE_TYPE (oprnd0);
- 
-      if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
-       return NULL_TREE;
-      if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
-       return NULL_TREE;
-      
-      expr = oprnd0;
-    }
-  return expr; 
-}
-
-
  /* Function vect_force_dr_alignment_p.
  
     Returns whether the alignment of a DECL can be forced to be aligned
@@ -1552,7 +1564,7 @@ get_vectype_for_scalar_type (tree scalar_type)
    nunits = UNITS_PER_SIMD_WORD / nbytes;
  
    vectype = build_vector_type (scalar_type, nunits);
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "get vectype with %d units of type ", nunits);
        print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
@@ -1561,7 +1573,7 @@ get_vectype_for_scalar_type (tree scalar_type)
    if (!vectype)
      return NULL_TREE;
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "vectype: ");
        print_generic_expr (vect_dump, vectype, TDF_SLIM);
@@ -1570,7 +1582,7 @@ get_vectype_for_scalar_type (tree scalar_type)
    if (!VECTOR_MODE_P (TYPE_MODE (vectype))
        && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "mode not supported by target.");
        return NULL_TREE;
      }
@@ -1636,7 +1648,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
    *def_stmt = NULL_TREE;
    *def = NULL_TREE;
    
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "vect_is_simple_use: operand ");
        print_generic_expr (vect_dump, operand, TDF_SLIM);
@@ -1650,7 +1662,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
      
    if (TREE_CODE (operand) != SSA_NAME)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "not ssa-name.");
        return false;
      }
@@ -1658,19 +1670,19 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
    *def_stmt = SSA_NAME_DEF_STMT (operand);
    if (*def_stmt == NULL_TREE )
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "no def_stmt.");
        return false;
      }
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "def_stmt: ");
        print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
      }
  
    /* empty stmt is expected only in case of a function argument.
-     (Otherwise - we expect a phi_node or a modify_expr).  */
+     (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT).  */
    if (IS_EMPTY_STMT (*def_stmt))
      {
        tree arg = TREE_OPERAND (*def_stmt, 0);
@@ -1681,7 +1693,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
            return true;
          }
  
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "Unexpected empty stmt.");
        return false;
      }
@@ -1697,7 +1709,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
  
    if (*dt == vect_unknown_def_type)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "Unsupported pattern.");
        return false;
      }
@@ -1706,12 +1718,12 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
       a reduction operation cannot have uses in the loop.  */
    if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "reduction used in loop.");
        return false;
      }
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "type of def: %d.",*dt);
  
    switch (TREE_CODE (*def_stmt))
@@ -1722,33 +1734,182 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
                    || *dt == vect_invariant_def);
        break;
  
-    case MODIFY_EXPR:
-      *def = TREE_OPERAND (*def_stmt, 0);
+    case GIMPLE_MODIFY_STMT:
+      *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
        gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
        break;
  
      default:
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "unsupported defining stmt: ");
        return false;
      }
  
-  if (*dt == vect_induction_def)
+  return true;
+}
+
+
+/* Function supportable_widening_operation
+
+   Check whether an operation represented by the code CODE is a 
+   widening operation that is supported by the target platform in 
+   vector form (i.e., when operating on arguments of type VECTYPE).
+    
+   The two kinds of widening operations we currently support are
+   NOP and WIDEN_MULT. This function checks if these operations
+   are supported by the target platform either directly (via vector 
+   tree-codes), or via target builtins.
+
+   Output:
+   - CODE1 and CODE2 are codes of vector operations to be used when 
+   vectorizing the operation, if available. 
+   - DECL1 and DECL2 are decls of target builtin functions to be used
+   when vectorizing the operation, if available. In this case,
+   CODE1 and CODE2 are CALL_EXPR.  */
+
+bool
+supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
+                                tree *decl1, tree *decl2,
+                                enum tree_code *code1, enum tree_code *code2)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  bool ordered_p;
+  enum machine_mode vec_mode;
+  enum insn_code icode1, icode2;
+  optab optab1, optab2;
+  tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
+  tree type = TREE_TYPE (expr);
+  tree wide_vectype = get_vectype_for_scalar_type (type);
+  enum tree_code c1, c2;
+
+  /* The result of a vectorized widening operation usually requires two vectors 
+     (because the widened results do not fit int one vector). The generated 
+     vector results would normally be expected to be generated in the same 
+     order as in the original scalar computation. i.e. if 8 results are 
+     generated in each vector iteration, they are to be organized as follows:
+        vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. 
+
+     However, in the special case that the result of the widening operation is 
+     used in a reduction computation only, the order doesn't matter (because
+     when vectorizing a reduction we change the order of the computation). 
+     Some targets can take advantage of this and generate more efficient code.
+     For example, targets like Altivec, that support widen_mult using a sequence
+     of {mult_even,mult_odd} generate the following vectors:
+        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].  */
+
+   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
+     ordered_p = false;
+   else
+     ordered_p = true;
+
+  if (!ordered_p
+      && code == WIDEN_MULT_EXPR
+      && targetm.vectorize.builtin_mul_widen_even
+      && targetm.vectorize.builtin_mul_widen_even (vectype)
+      && targetm.vectorize.builtin_mul_widen_odd
+      && targetm.vectorize.builtin_mul_widen_odd (vectype))
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-        fprintf (vect_dump, "induction not supported.");
-      return false;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "Unordered widening operation detected.");
+
+      *code1 = *code2 = CALL_EXPR;
+      *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
+      *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
+      return true;
+    }
+
+  switch (code)
+    {
+    case WIDEN_MULT_EXPR:
+      if (BYTES_BIG_ENDIAN)
+        {
+          c1 = VEC_WIDEN_MULT_HI_EXPR;
+          c2 = VEC_WIDEN_MULT_LO_EXPR;
+        }
+      else
+        {
+          c2 = VEC_WIDEN_MULT_HI_EXPR;
+          c1 = VEC_WIDEN_MULT_LO_EXPR;
+        }
+      break;
+
+    case NOP_EXPR:
+      if (BYTES_BIG_ENDIAN)
+        {
+          c1 = VEC_UNPACK_HI_EXPR;
+          c2 = VEC_UNPACK_LO_EXPR;
+        }
+      else
+        {
+          c2 = VEC_UNPACK_HI_EXPR;
+          c1 = VEC_UNPACK_LO_EXPR;
+        }
+      break;
+
+    default:
+      gcc_unreachable ();
      }
  
+  *code1 = c1;
+  *code2 = c2;
+  optab1 = optab_for_tree_code (c1, vectype);
+  optab2 = optab_for_tree_code (c2, vectype);
+
+  if (!optab1 || !optab2)
+    return false;
+
+  vec_mode = TYPE_MODE (vectype);
+  if ((icode1 = optab1->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing
+      || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
+      || (icode2 = optab2->handlers[(int) vec_mode].insn_code)
+                                                        == CODE_FOR_nothing
+      || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
+    return false;
+
    return true;
  }
  
  
+/* Function reduction_code_for_scalar_code
+
+   Input:
+   CODE - tree_code of a reduction operations.
+
+   Output:
+   REDUC_CODE - the corresponding tree-code to be used to reduce the
+      vector of partial results into a single scalar result (which
+      will also reside in a vector).
+
+   Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
+
+bool
+reduction_code_for_scalar_code (enum tree_code code,
+                                enum tree_code *reduc_code)
+{
+  switch (code)
+  {
+  case MAX_EXPR:
+    *reduc_code = REDUC_MAX_EXPR;
+    return true;
+
+  case MIN_EXPR:
+    *reduc_code = REDUC_MIN_EXPR;
+    return true;
+
+  case PLUS_EXPR:
+    *reduc_code = REDUC_PLUS_EXPR;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
  /* Function vect_is_simple_reduction
  
-   TODO:
     Detect a cross-iteration def-use cucle that represents a simple
-   reduction computation. We look for the followng pattern:
+   reduction computation. We look for the following pattern:
  
     loop_header:
       a1 = phi < a0, a2 >
@@ -1756,18 +1917,221 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
       a2 = operation (a3, a1)
    
     such that:
-   1. operation is...
-   2. no uses for a2 in the loop (elsewhere)  */
+   1. operation is commutative and associative and it is safe to 
+      change the order of the computation.
+   2. no uses for a2 in the loop (a2 is used out of the loop)
+   3. no uses of a1 in the loop besides the reduction operation.
+
+   Condition 1 is tested here.
+   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
  
  tree
-vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED, 
-                         tree phi ATTRIBUTE_UNUSED)
+vect_is_simple_reduction (struct loop *loop, tree phi)
  {
-  /* FORNOW */
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-    fprintf (vect_dump, "reduction: unknown pattern.");
+  edge latch_e = loop_latch_edge (loop);
+  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+  tree def_stmt, def1, def2;
+  enum tree_code code;
+  int op_type;
+  tree operation, op1, op2;
+  tree type;
+  int nloop_uses;
+  tree name;
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+
+  name = PHI_RESULT (phi);
+  nloop_uses = 0;
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
+    {
+      tree use_stmt = USE_STMT (use_p);
+      if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+         && vinfo_for_stmt (use_stmt)
+         && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+        nloop_uses++;
+      if (nloop_uses > 1)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "reduction used in loop.");
+          return NULL_TREE;
+        }
+    }
+
+  if (TREE_CODE (loop_arg) != SSA_NAME)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       {
+         fprintf (vect_dump, "reduction: not ssa_name: ");
+         print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+       }
+      return NULL_TREE;
+    }
+
+  def_stmt = SSA_NAME_DEF_STMT (loop_arg);
+  if (!def_stmt)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "reduction: no def_stmt.");
+      return NULL_TREE;
+    }
+
+  if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
+      return NULL_TREE;
+    }
+
+  name = GIMPLE_STMT_OPERAND (def_stmt, 0);
+  nloop_uses = 0;
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
+    {
+      tree use_stmt = USE_STMT (use_p);
+      if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+         && vinfo_for_stmt (use_stmt)
+         && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+       nloop_uses++;
+      if (nloop_uses > 1)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "reduction used in loop.");
+         return NULL_TREE;
+       }
+    }
+
+  operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
+  code = TREE_CODE (operation);
+  if (!commutative_tree_code (code) || !associative_tree_code (code))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: not commutative/associative: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+
+  op_type = TREE_OPERAND_LENGTH (operation);
+  if (op_type != binary_op)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: not binary operation: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+
+  op1 = TREE_OPERAND (operation, 0);
+  op2 = TREE_OPERAND (operation, 1);
+  if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: uses not ssa_names: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+
+  /* Check that it's ok to change the order of the computation.  */
+  type = TREE_TYPE (operation);
+  if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
+      || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: multiple types: operation type: ");
+          print_generic_expr (vect_dump, type, TDF_SLIM);
+          fprintf (vect_dump, ", operands types: ");
+          print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
+          fprintf (vect_dump, ",");
+          print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+
+  /* CHECKME: check for !flag_finite_math_only too?  */
+  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
+    {
+      /* Changing the order of operations changes the semantics.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
+    {
+      /* Changing the order of operations changes the semantics.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: unsafe int math optimization: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
+
+  /* reduction is safe. we're dealing with one of the following:
+     1) integer arithmetic and no trapv
+     2) floating point arithmetic, and special flags permit this optimization.
+   */
+  def1 = SSA_NAME_DEF_STMT (op1);
+  def2 = SSA_NAME_DEF_STMT (op2);
+  if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: no defs for operands: ");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
  
-  return NULL_TREE;
+
+  /* Check that one def is the reduction def, defined by PHI,
+     the other def is either defined in the loop by a GIMPLE_MODIFY_STMT,
+     or it's an induction (defined by some phi node).  */
+
+  if (def2 == phi
+      && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
+      && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT 
+         || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "detected reduction:");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return def_stmt;
+    }
+  else if (def1 == phi
+          && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
+          && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT 
+              || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def))
+    {
+      /* Swap operands (just for simplicity - so that the rest of the code
+        can assume that the reduction variable is always the last (second)
+        argument).  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "detected reduction: need to swap operands:");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0), 
+                                   &TREE_OPERAND (operation, 1));
+      return def_stmt;
+    }
+  else
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "reduction: unknown pattern.");
+          print_generic_expr (vect_dump, operation, TDF_SLIM);
+        }
+      return NULL_TREE;
+    }
  }
  
  
@@ -1782,7 +2146,6 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
  {
    tree init_expr;
    tree step_expr;
-  
    tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
  
    /* When there is no evolution in this loop, the evolution function
@@ -1796,10 +2159,9 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
      return false;
    
    step_expr = evolution_part;
-  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
-                                                           loop_nb));
+  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "step: ");
        print_generic_expr (vect_dump, step_expr, TDF_SLIM);
@@ -1811,8 +2173,8 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
    *step = step_expr;
  
    if (TREE_CODE (step_expr) != INTEGER_CST)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+    { 
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "step unknown.");
        return false;
      }
@@ -1825,54 +2187,136 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
     
     Entry Point to loop vectorization phase.  */
  
-void
-vectorize_loops (struct loops *loops)
+unsigned
+vectorize_loops (void)
  {
    unsigned int i;
    unsigned int num_vectorized_loops = 0;
+  unsigned int vect_loops_num;
+  loop_iterator li;
+  struct loop *loop;
+
+  vect_loops_num = number_of_loops ();
+
+  /* Bail out if there are no loops.  */
+  if (vect_loops_num <= 1)
+    return 0;
  
    /* Fix the verbosity level if not defined explicitly by the user.  */
    vect_set_dump_settings ();
  
+  /* Allocate the bitmap that records which virtual variables that 
+     need to be renamed.  */
+  vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
+
    /*  ----------- Analyze loops. -----------  */
  
    /* If some loop was duplicated, it gets bigger number 
       than all previously defined loops. This fact allows us to run 
       only over initial loops skipping newly generated ones.  */
-  vect_loops_num = loops->num;
-  for (i = 1; i < vect_loops_num; i++)
+  FOR_EACH_LOOP (li, loop, 0)
      {
        loop_vec_info loop_vinfo;
-      struct loop *loop = loops->parray[i];
-
-      if (!loop)
-        continue;
  
+      vect_loop_location = find_loop_location (loop);
        loop_vinfo = vect_analyze_loop (loop);
        loop->aux = loop_vinfo;
  
        if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
         continue;
  
-      vect_transform_loop (loop_vinfo, loops); 
+      vect_transform_loop (loop_vinfo);
        num_vectorized_loops++;
      }
+  vect_loop_location = UNKNOWN_LOC;
  
-  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
+      || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
+         && num_vectorized_loops > 0))
      fprintf (vect_dump, "vectorized %u loops in function.\n",
              num_vectorized_loops);
  
    /*  ----------- Finalize. -----------  */
  
+  BITMAP_FREE (vect_memsyms_to_rename);
+
    for (i = 1; i < vect_loops_num; i++)
      {
-      struct loop *loop = loops->parray[i];
        loop_vec_info loop_vinfo;
  
+      loop = get_loop (i);
        if (!loop)
         continue;
        loop_vinfo = loop->aux;
        destroy_loop_vec_info (loop_vinfo);
        loop->aux = NULL;
      }
+
+  return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
+}
+
+/* Increase alignment of global arrays to improve vectorization potential.
+   TODO:
+   - Consider also structs that have an array field.
+   - Use ipa analysis to prune arrays that can't be vectorized?
+     This should involve global alignment analysis and in the future also
+     array padding.  */
+
+static unsigned int
+increase_alignment (void)
+{
+  struct varpool_node *vnode;
+
+  /* Increase the alignment of all global arrays for vectorization.  */
+  for (vnode = varpool_nodes_queue;
+       vnode;
+       vnode = vnode->next_needed)
+    {
+      tree vectype, decl = vnode->decl;
+      unsigned int alignment;
+
+      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+       continue;
+      vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
+      if (!vectype)
+       continue;
+      alignment = TYPE_ALIGN (vectype);
+      if (DECL_ALIGN (decl) >= alignment)
+       continue;
+
+      if (vect_can_force_dr_alignment_p (decl, alignment))
+       { 
+         DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
+         DECL_USER_ALIGN (decl) = 1;
+         if (dump_file)
+           { 
+             fprintf (dump_file, "Increasing alignment of decl: ");
+             print_generic_expr (dump_file, decl, TDF_SLIM);
+           }
+       }
+    }
+  return 0;
+}
+
+static bool
+gate_increase_alignment (void)
+{
+  return flag_section_anchors && flag_tree_vectorize;
  }
+
+struct tree_opt_pass pass_ipa_increase_alignment = 
+{
+  "increase_alignment",                        /* name */
+  gate_increase_alignment,             /* gate */
+  increase_alignment,                  /* execute */
+  NULL,                                        /* sub */
+  NULL,                                        /* next */
+  0,                                   /* static_pass_number */
+  0,                                   /* tv_id */
+  0,                                   /* properties_required */
+  0,                                   /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  0,                                   /* todo_flags_finish */
+  0                                    /* letter */
+};