2006-04-21 Andrew Haley <aph@redhat.com>

[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-transform.c
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c

index 0079fc8..1d23f14 100644 (file)
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -1,5 +1,5 @@
  /* Transformation Utilities for Loop Vectorization.
  /* Transformation Utilities for Loop Vectorization.
-   Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
     Contributed by Dorit Naishlos <dorit@il.ibm.com>
  
  This file is part of GCC.
     Contributed by Dorit Naishlos <dorit@il.ibm.com>
  
  This file is part of GCC.
@@ -16,14 +16,13 @@ for more details.
  
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
  
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.  */
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
  
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
  
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
-#include "errors.h"
  #include "ggc.h"
  #include "tree.h"
  #include "target.h"
  #include "ggc.h"
  #include "tree.h"
  #include "target.h"
@@ -36,6 +35,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "cfgloop.h"
  #include "expr.h"
  #include "optabs.h"
  #include "cfgloop.h"
  #include "expr.h"
  #include "optabs.h"
+#include "recog.h"
  #include "tree-data-ref.h"
  #include "tree-chrec.h"
  #include "tree-scalar-evolution.h"
  #include "tree-data-ref.h"
  #include "tree-chrec.h"
  #include "tree-scalar-evolution.h"
@@ -43,6 +43,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "langhooks.h"
  #include "tree-pass.h"
  #include "toplev.h"
  #include "langhooks.h"
  #include "tree-pass.h"
  #include "toplev.h"
+#include "real.h"
  
  /* Utility functions for the code transformation.  */
  static bool vect_transform_stmt (tree, block_stmt_iterator *);
  
  /* Utility functions for the code transformation.  */
  static bool vect_transform_stmt (tree, block_stmt_iterator *);
@@ -50,15 +51,16 @@ static void vect_align_data_ref (tree);
  static tree vect_create_destination_var (tree, tree);
  static tree vect_create_data_ref_ptr 
    (tree, block_stmt_iterator *, tree, tree *, bool); 
  static tree vect_create_destination_var (tree, tree);
  static tree vect_create_data_ref_ptr 
    (tree, block_stmt_iterator *, tree, tree *, bool); 
-static tree vect_create_index_for_vector_ref (loop_vec_info);
  static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
  static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
  static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
  static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
-static tree vect_get_vec_def_for_operand (tree, tree);
+static tree vect_get_vec_def_for_operand (tree, tree, tree *);
  static tree vect_init_vector (tree, tree);
  static void vect_finish_stmt_generation 
    (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
  static bool vect_is_simple_cond (tree, loop_vec_info); 
  static void update_vuses_to_preheader (tree, struct loop*);
  static tree vect_init_vector (tree, tree);
  static void vect_finish_stmt_generation 
    (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
  static bool vect_is_simple_cond (tree, loop_vec_info); 
  static void update_vuses_to_preheader (tree, struct loop*);
+static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
+static tree get_initial_def_for_reduction (tree, tree, tree *);
  
  /* Utility function dealing with loop peeling (not peeling itself).  */
  static void vect_generate_tmps_on_preheader 
  
  /* Utility function dealing with loop peeling (not peeling itself).  */
  static void vect_generate_tmps_on_preheader 
@@ -71,6 +73,7 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
  static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
  static void vect_do_peeling_for_loop_bound 
    (loop_vec_info, tree *, struct loops *);
  static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
  static void vect_do_peeling_for_loop_bound 
    (loop_vec_info, tree *, struct loops *);
+static int vect_min_worthwhile_factor (enum tree_code);
  
  
  /* Function vect_get_new_vect_var.
  
  
  /* Function vect_get_new_vect_var.
@@ -86,10 +89,20 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
    const char *prefix;
    tree new_vect_var;
  
    const char *prefix;
    tree new_vect_var;
  
-  if (var_kind == vect_simple_var)
-    prefix = "vect_"; 
-  else
+  switch (var_kind)
+  {
+  case vect_simple_var:
+    prefix = "vect_";
+    break;
+  case vect_scalar_var:
+    prefix = "stmp_";
+    break;
+  case vect_pointer_var:
      prefix = "vect_p";
      prefix = "vect_p";
+    break;
+  default:
+    gcc_unreachable ();
+  }
  
    if (name)
      new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
  
    if (name)
      new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
@@ -100,58 +113,6 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
  }
  
  
  }
  
  
-/* Function vect_create_index_for_vector_ref.
-
-   Create (and return) an index variable, along with it's update chain in the
-   loop. This variable will be used to access a memory location in a vector
-   operation.
-
-   Input:
-   LOOP: The loop being vectorized.
-   BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
-        function can be added here, or in the loop pre-header.
-
-   Output:
-   Return an index that will be used to index a vector array.  It is expected
-   that a pointer to the first vector will be used as the base address for the
-   indexed reference.
-
-   FORNOW: we are not trying to be efficient, just creating a new index each
-   time from scratch.  At this time all vector references could use the same
-   index.
-
-   TODO: create only one index to be used by all vector references.  Record
-   the index in the LOOP_VINFO the first time this procedure is called and
-   return it on subsequent calls.  The increment of this index must be placed
-   just before the conditional expression that ends the single block loop.  */
-
-static tree
-vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
-{
-  tree init, step;
-  block_stmt_iterator incr_bsi;
-  bool insert_after;
-  tree indx_before_incr, indx_after_incr;
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  tree incr;
-
-  /* It is assumed that the base pointer used for vectorized access contains
-     the address of the first vector.  Therefore the index used for vectorized
-     access must be initialized to zero and incremented by 1.  */
-
-  init = integer_zero_node;
-  step = integer_one_node;
-
-  standard_iv_increment_position (loop, &incr_bsi, &insert_after);
-  create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
-       &indx_before_incr, &indx_after_incr);
-  incr = bsi_stmt (incr_bsi);
-  set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
-
-  return indx_before_incr;
-}
-
-
  /* Function vect_create_addr_base_for_vector_ref.
  
     Create an expression that computes the address of the first memory location
  /* Function vect_create_addr_base_for_vector_ref.
  
     Create an expression that computes the address of the first memory location
@@ -177,8 +138,7 @@ vect_create_addr_base_for_vector_ref (tree stmt,
  {
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
  {
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
-  tree data_ref_base = 
-    unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
+  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
    tree base_name = build_fold_indirect_ref (data_ref_base);
    tree ref = DR_REF (dr);
    tree scalar_type = TREE_TYPE (ref);
    tree base_name = build_fold_indirect_ref (data_ref_base);
    tree ref = DR_REF (dr);
    tree scalar_type = TREE_TYPE (ref);
@@ -187,9 +147,11 @@ vect_create_addr_base_for_vector_ref (tree stmt,
    tree new_temp;
    tree addr_base, addr_expr;
    tree dest, new_stmt;
    tree new_temp;
    tree addr_base, addr_expr;
    tree dest, new_stmt;
-  tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
+  tree base_offset = unshare_expr (DR_OFFSET (dr));
+  tree init = unshare_expr (DR_INIT (dr));
  
    /* Create base_offset */
  
    /* Create base_offset */
+  base_offset = size_binop (PLUS_EXPR, base_offset, init);
    dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
    add_referenced_tmp_var (dest);
    base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);  
    dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
    add_referenced_tmp_var (dest);
    base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);  
@@ -199,17 +161,17 @@ vect_create_addr_base_for_vector_ref (tree stmt,
      {
        tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
        add_referenced_tmp_var (tmp);
      {
        tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
        add_referenced_tmp_var (tmp);
-      offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset, 
-                            STMT_VINFO_VECT_STEP (stmt_info)));
-      base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), 
-                                 base_offset, offset));
+      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
+                           DR_STEP (dr));
+      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
+                                base_offset, offset);
        base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);  
        append_to_statement_list_force (new_stmt, new_stmt_list);
      }
    
    /* base + base_offset */
        base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);  
        append_to_statement_list_force (new_stmt, new_stmt_list);
      }
    
    /* base + base_offset */
-  addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base, 
-                           base_offset));
+  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
+                          base_offset);
  
    /* addr_expr = addr_base */
    addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
  
    /* addr_expr = addr_base */
    addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
@@ -220,7 +182,7 @@ vect_create_addr_base_for_vector_ref (tree stmt,
    TREE_OPERAND (vec_stmt, 0) = new_temp;
    append_to_statement_list_force (vec_stmt, new_stmt_list);
  
    TREE_OPERAND (vec_stmt, 0) = new_temp;
    append_to_statement_list_force (vec_stmt, new_stmt_list);
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "created ");
        print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
      {
        fprintf (vect_dump, "created ");
        print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
@@ -278,24 +240,17 @@ vect_align_data_ref (tree stmt)
  
        Return the initial_address in INITIAL_ADDRESS.
  
  
        Return the initial_address in INITIAL_ADDRESS.
  
-   2. Create a data-reference in the loop based on the new vector pointer vp,
-      and using a new index variable 'idx' as follows:
-
-      vp' = vp + update
-
-      where if ONLY_INIT is true:
-         update = zero
-      and otherwise
-         update = idx + vector_type_size
-
-      Return the pointer vp'.
-
+   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
+      a data-reference in the loop based on the new vector pointer vp.  This
+      new data reference will by some means be updated each iteration of
+      the loop.  Return the pointer vp'.
  
     FORNOW: handle only aligned and consecutive accesses.  */
  
  static tree
  
     FORNOW: handle only aligned and consecutive accesses.  */
  
  static tree
-vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
-                          tree *initial_address, bool only_init)
+vect_create_data_ref_ptr (tree stmt,
+                         block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+                         tree offset, tree *initial_address, bool only_init)
  {
    tree base_name;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
  {
    tree base_name;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -308,22 +263,17 @@ vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
    tree new_temp;
    tree vec_stmt;
    tree new_stmt_list = NULL_TREE;
    tree new_temp;
    tree vec_stmt;
    tree new_stmt_list = NULL_TREE;
-  tree idx;
    edge pe = loop_preheader_edge (loop);
    basic_block new_bb;
    tree vect_ptr_init;
    edge pe = loop_preheader_edge (loop);
    basic_block new_bb;
    tree vect_ptr_init;
-  tree vectype_size;
-  tree ptr_update;
-  tree data_ref_ptr;
-  tree type, tmp, size;
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
  
  
-  base_name =  build_fold_indirect_ref (unshare_expr (
-                     STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
+  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        tree data_ref_base = base_name;
      {
        tree data_ref_base = base_name;
-      fprintf (vect_dump, "create array_ref of type: ");
+      fprintf (vect_dump, "create vector-pointer variable to type: ");
        print_generic_expr (vect_dump, vectype, TDF_SLIM);
        if (TREE_CODE (data_ref_base) == VAR_DECL)
          fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
        print_generic_expr (vect_dump, vectype, TDF_SLIM);
        if (TREE_CODE (data_ref_base) == VAR_DECL)
          fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
@@ -345,22 +295,19 @@ vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
    
    
    /** (2) Add aliasing information to the new vector-pointer:
    
    
    /** (2) Add aliasing information to the new vector-pointer:
-          (The points-to info (SSA_NAME_PTR_INFO) may be defined later.)  **/
+          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
    
    
-  tag = STMT_VINFO_MEMTAG (stmt_info);
+  tag = DR_MEMTAG (dr);
    gcc_assert (tag);
  
    gcc_assert (tag);
  
-  /* If the memory tag of the original reference was not a type tag or
-     if the pointed-to type of VECT_PTR has an alias set number
-     different than TAG's, then we need to create a new type tag for
-     VECT_PTR and add TAG to its alias set.  */
-  if (var_ann (tag)->mem_tag_kind == NOT_A_TAG
-      || get_alias_set (tag) != get_alias_set (TREE_TYPE (vect_ptr_type)))
-    add_type_alias (vect_ptr, tag);
+  /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
+     tag must be created with tag added to its may alias list.  */
+  if (!MTAG_P (tag))
+    new_type_alias (vect_ptr, tag);
    else
    else
-    var_ann (vect_ptr)->type_mem_tag = tag;
-  
-  var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
+    var_ann (vect_ptr)->symbol_mem_tag = tag;
+
+  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
  
    /** (3) Calculate the initial address the vector-pointer, and set
            the vector-pointer to point to it before the loop:  **/
  
    /** (3) Calculate the initial address the vector-pointer, and set
            the vector-pointer to point to it before the loop:  **/
@@ -376,11 +323,10 @@ vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
    /* Create: p = (vectype *) initial_base  */
    vec_stmt = fold_convert (vect_ptr_type, new_temp);
    vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
    /* Create: p = (vectype *) initial_base  */
    vec_stmt = fold_convert (vect_ptr_type, new_temp);
    vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
-  new_temp = make_ssa_name (vect_ptr, vec_stmt);
-  TREE_OPERAND (vec_stmt, 0) = new_temp;
+  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
+  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
    new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
    gcc_assert (!new_bb);
    new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
    gcc_assert (!new_bb);
-  vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
  
  
    /** (4) Handle the updating of the vector-pointer inside the loop: **/
  
  
    /** (4) Handle the updating of the vector-pointer inside the loop: **/
@@ -388,45 +334,37 @@ vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
    if (only_init) /* No update in loop is required.  */
      {
        /* Copy the points-to information if it exists. */
    if (only_init) /* No update in loop is required.  */
      {
        /* Copy the points-to information if it exists. */
-      if (STMT_VINFO_PTR_INFO (stmt_info))
-        duplicate_ssa_name_ptr_info (vect_ptr_init,
-                                     STMT_VINFO_PTR_INFO (stmt_info));
+      if (DR_PTR_INFO (dr))
+        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
        return vect_ptr_init;
      }
        return vect_ptr_init;
      }
+  else
+    {
+      block_stmt_iterator incr_bsi;
+      bool insert_after;
+      tree indx_before_incr, indx_after_incr;
+      tree incr;
+
+      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
+      create_iv (vect_ptr_init,
+                fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
+                NULL_TREE, loop, &incr_bsi, insert_after,
+                &indx_before_incr, &indx_after_incr);
+      incr = bsi_stmt (incr_bsi);
+      set_stmt_info ((tree_ann_t)stmt_ann (incr),
+                    new_stmt_vec_info (incr, loop_vinfo));
  
  
-  idx = vect_create_index_for_vector_ref (loop_vinfo);
-
-  /* Create: update = idx * vectype_size  */
-  tmp = create_tmp_var (integer_type_node, "update");
-  add_referenced_tmp_var (tmp);
-  size = TYPE_SIZE (vect_ptr_type); 
-  type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
-  ptr_update = create_tmp_var (type, "update");
-  add_referenced_tmp_var (ptr_update);
-  vectype_size = TYPE_SIZE_UNIT (vectype);
-  vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
-  vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
-  new_temp = make_ssa_name (tmp, vec_stmt);
-  TREE_OPERAND (vec_stmt, 0) = new_temp;
-  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
-  vec_stmt = fold_convert (type, new_temp);
-  vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
-  new_temp = make_ssa_name (ptr_update, vec_stmt);
-  TREE_OPERAND (vec_stmt, 0) = new_temp;
-  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
-
-  /* Create: data_ref_ptr = vect_ptr_init + update  */
-  vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
-  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
-  new_temp = make_ssa_name (vect_ptr, vec_stmt);
-  TREE_OPERAND (vec_stmt, 0) = new_temp;
-  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
-  data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
+      /* Copy the points-to information if it exists. */
+      if (DR_PTR_INFO (dr))
+       {
+         duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
+         duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
+       }
+      merge_alias_info (vect_ptr_init, indx_before_incr);
+      merge_alias_info (vect_ptr_init, indx_after_incr);
  
  
-  /* Copy the points-to information if it exists. */
-  if (STMT_VINFO_PTR_INFO (stmt_info))
-    duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
-  return data_ref_ptr;
+      return indx_before_incr;
+    }
  }
  
  
  }
  
  
@@ -439,13 +377,18 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
  {
    tree vec_dest;
    const char *new_name;
  {
    tree vec_dest;
    const char *new_name;
+  tree type;
+  enum vect_var_kind kind;
+
+  kind = vectype ? vect_simple_var : vect_scalar_var;
+  type = vectype ? vectype : TREE_TYPE (scalar_dest);
  
    gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
  
    new_name = get_name (scalar_dest);
    if (!new_name)
      new_name = "var_";
  
    gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
  
    new_name = get_name (scalar_dest);
    if (!new_name)
      new_name = "var_";
-  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
+  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
    add_referenced_tmp_var (vec_dest);
  
    return vec_dest;
    add_referenced_tmp_var (vec_dest);
  
    return vec_dest;
@@ -483,7 +426,7 @@ vect_init_vector (tree stmt, tree vector_var)
    new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
    gcc_assert (!new_bb);
  
    new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
    gcc_assert (!new_bb);
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "created new init_stmt: ");
        print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
      {
        fprintf (vect_dump, "created new init_stmt: ");
        print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
@@ -506,7 +449,7 @@ vect_init_vector (tree stmt, tree vector_var)
     needs to be introduced.  */
  
  static tree
     needs to be introduced.  */
  
  static tree
-vect_get_vec_def_for_operand (tree op, tree stmt)
+vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
  {
    tree vec_oprnd;
    tree vec_stmt;
  {
    tree vec_oprnd;
    tree vec_stmt;
@@ -517,143 +460,906 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  basic_block bb;
    tree vec_inv;
    tree vec_inv;
+  tree vec_cst;
    tree t = NULL_TREE;
    tree def;
    int i;
    tree t = NULL_TREE;
    tree def;
    int i;
+  enum vect_def_type dt;
+  bool is_simple_use;
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
        print_generic_expr (vect_dump, op, TDF_SLIM);
      }
  
      {
        fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
        print_generic_expr (vect_dump, op, TDF_SLIM);
      }
  
-  /** ===> Case 1: operand is a constant.  **/
+  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+  gcc_assert (is_simple_use);
+  if (vect_print_dump_info (REPORT_DETAILS))
+    {
+      if (def)
+        {
+          fprintf (vect_dump, "def =  ");
+          print_generic_expr (vect_dump, def, TDF_SLIM);
+        }
+      if (def_stmt)
+        {
+          fprintf (vect_dump, "  def_stmt =  ");
+          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
+        }
+    }
  
  
-  if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
+  switch (dt)
      {
      {
-      /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
+    /* Case 1: operand is a constant.  */
+    case vect_constant_def:
+      {
+       if (scalar_def) 
+         *scalar_def = op;
+
+        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
+
+        for (i = nunits - 1; i >= 0; --i)
+          {
+            t = tree_cons (NULL_TREE, op, t);
+          }
+        vec_cst = build_vector (vectype, t);
+        return vect_init_vector (stmt, vec_cst);
+      }
+
+    /* Case 2: operand is defined outside the loop - loop invariant.  */
+    case vect_invariant_def:
+      {
+       if (scalar_def) 
+         *scalar_def = def;
+
+        /* Create 'vec_inv = {inv,inv,..,inv}'  */
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "Create vector_inv.");
+
+        for (i = nunits - 1; i >= 0; --i)
+          {
+            t = tree_cons (NULL_TREE, def, t);
+          }
+
+       /* FIXME: use build_constructor directly.  */
+        vec_inv = build_constructor_from_list (vectype, t);
+        return vect_init_vector (stmt, vec_inv);
+      }
+
+    /* Case 3: operand is defined inside the loop.  */
+    case vect_loop_def:
+      {
+       if (scalar_def) 
+         *scalar_def = def_stmt;
+
+        /* Get the def from the vectorized stmt.  */
+        def_stmt_info = vinfo_for_stmt (def_stmt);
+        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
+        gcc_assert (vec_stmt);
+        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
+        return vec_oprnd;
+      }
+
+    /* Case 4: operand is defined by a loop header phi - reduction  */
+    case vect_reduction_def:
+      {
+        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+
+        /* Get the def before the loop  */
+        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+        return get_initial_def_for_reduction (stmt, op, scalar_def);
+     }
+
+    /* Case 5: operand is defined by loop-header phi - induction.  */
+    case vect_induction_def:
+      {
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "induction - unsupported.");
+        internal_error ("no support for induction"); /* FORNOW */
+      }
  
  
-      tree vec_cst;
+    default:
+      gcc_unreachable ();
+    }
+}
  
  
-      /* Build a tree with vector elements.  */
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-        fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
  
  
-      for (i = nunits - 1; i >= 0; --i)
-        {
-          t = tree_cons (NULL_TREE, op, t);
-        }
-      vec_cst = build_vector (vectype, t);
-      return vect_init_vector (stmt, vec_cst);
+/* Function vect_finish_stmt_generation.
+
+   Insert a new stmt.  */
+
+static void
+vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
+{
+  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    {
+      fprintf (vect_dump, "add new stmt: ");
+      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
      }
  
      }
  
-  gcc_assert (TREE_CODE (op) == SSA_NAME);
- 
-  /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
+  /* Make sure bsi points to the stmt that is being vectorized.  */
+  gcc_assert (stmt == bsi_stmt (*bsi));
+
+#ifdef USE_MAPPED_LOCATION
+  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
+#else
+  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
+#endif
+}
+
+
+#define ADJUST_IN_EPILOG 1
+
+/* Function get_initial_def_for_reduction
+
+   Input:
+   STMT - a stmt that performs a reduction operation in the loop.
+   INIT_VAL - the initial value of the reduction variable
+
+   Output:
+   SCALAR_DEF - a tree that holds a value to be added to the final result
+       of the reduction (used for "ADJUST_IN_EPILOG" - see below).
+   Return a vector variable, initialized according to the operation that STMT
+       performs. This vector will be used as the initial value of the
+       vector of partial results.
+
+   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
+     add:         [0,0,...,0,0]
+     mult:        [1,1,...,1,1]
+     min/max:     [init_val,init_val,..,init_val,init_val]
+     bit and/or:  [init_val,init_val,..,init_val,init_val]
+   and when necessary (e.g. add/mult case) let the caller know 
+   that it needs to adjust the result by init_val.
+
+   Option2: Initialize the vector as follows:
+     add:         [0,0,...,0,init_val]
+     mult:        [1,1,...,1,init_val]
+     min/max:     [init_val,init_val,...,init_val]
+     bit and/or:  [init_val,init_val,...,init_val]
+   and no adjustments are needed.
+
+   For example, for the following code:
+
+   s = init_val;
+   for (i=0;i<n;i++)
+     s = s + a[i];
+
+   STMT is 's = s + a[i]', and the reduction variable is 's'.
+   For a vector of 4 units, we want to return either [0,0,0,init_val],
+   or [0,0,0,0] and let the caller know that it needs to adjust
+   the result at the end by 'init_val'.
+
+   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
+   TODO: Use some cost-model to estimate which scheme is more profitable.
+*/
+
+static tree
+get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+  int nelements;
+  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+  tree type = TREE_TYPE (init_val);
+  tree def;
+  tree vec, t = NULL_TREE;
+  bool need_epilog_adjust;
+  int i;
+
+  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+
+  switch (code)
+  {
+  case WIDEN_SUM_EXPR:
+  case DOT_PROD_EXPR:
+  case PLUS_EXPR:
+    if (INTEGRAL_TYPE_P (type))
+      def = build_int_cst (type, 0);
+    else
+      def = build_real (type, dconst0);
+
+#ifdef ADJUST_IN_EPILOG
+    /* All the 'nunits' elements are set to 0. The final result will be
+       adjusted by 'init_val' at the loop epilog.  */
+    nelements = nunits;
+    need_epilog_adjust = true;
+#else
+    /* 'nunits - 1' elements are set to 0; The last element is set to 
+        'init_val'.  No further adjustments at the epilog are needed.  */
+    nelements = nunits - 1;
+    need_epilog_adjust = false;
+#endif
+    break;
+
+  case MIN_EXPR:
+  case MAX_EXPR:
+    def = init_val;
+    nelements = nunits;
+    need_epilog_adjust = false;
+    break;
+
+  default:
+    gcc_unreachable ();
+  }
+
+  for (i = nelements - 1; i >= 0; --i)
+    t = tree_cons (NULL_TREE, def, t);
+
+  if (nelements == nunits - 1)
+    {
+      /* Set the last element of the vector.  */
+      t = tree_cons (NULL_TREE, init_val, t);
+      nelements += 1;
+    }
+  gcc_assert (nelements == nunits);
+  
+  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
+    vec = build_vector (vectype, t);
+  else
+    vec = build_constructor_from_list (vectype, t);
+    
+  if (!need_epilog_adjust)
+    *scalar_def = NULL_TREE;
+  else
+    *scalar_def = init_val;
+
+  return vect_init_vector (stmt, vec);
+}
+
+
+/* Function vect_create_epilog_for_reduction
+    
+   Create code at the loop-epilog to finalize the result of a reduction
+   computation. 
+  
+   VECT_DEF is a vector of partial results. 
+   REDUC_CODE is the tree-code for the epilog reduction.
+   STMT is the scalar reduction stmt that is being vectorized.
+   REDUCTION_PHI is the phi-node that carries the reduction computation.
+
+   This function:
+   1. Creates the reduction def-use cycle: sets the the arguments for 
+      REDUCTION_PHI:
+      The loop-entry argument is the vectorized initial-value of the reduction.
+      The loop-latch argument is VECT_DEF - the vector of partial sums.
+   2. "Reduces" the vector of partial results VECT_DEF into a single result,
+      by applying the operation specified by REDUC_CODE if available, or by 
+      other means (whole-vector shifts or a scalar loop).
+      The function also creates a new phi node at the loop exit to preserve 
+      loop-closed form, as illustrated below.
+  
+     The flow at the entry to this function:
+    
+        loop:
+          vec_def = phi <null, null>            # REDUCTION_PHI
+          VECT_DEF = vector_stmt                # vectorized form of STMT       
+          s_loop = scalar_stmt                  # (scalar) STMT
+        loop_exit:
+          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
+          use <s_out0>
+          use <s_out0>
+
+     The above is transformed by this function into:
+
+        loop:
+          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
+          VECT_DEF = vector_stmt                # vectorized form of STMT
+          s_loop = scalar_stmt                  # (scalar) STMT 
+        loop_exit:
+          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
+          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
+          v_out2 = reduce <v_out1>
+          s_out3 = extract_field <v_out2, 0>
+          s_out4 = adjust_result <s_out3>
+          use <s_out4>
+          use <s_out4>
+*/
  
  
-  def_stmt = SSA_NAME_DEF_STMT (op);
-  def_stmt_info = vinfo_for_stmt (def_stmt);
+static void
+vect_create_epilog_for_reduction (tree vect_def, tree stmt,
+                                  enum tree_code reduc_code, tree reduction_phi)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree vectype;
+  enum machine_mode mode;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block exit_bb;
+  tree scalar_dest;
+  tree scalar_type;
+  tree new_phi;
+  block_stmt_iterator exit_bsi;
+  tree vec_dest;
+  tree new_temp;
+  tree new_name;
+  tree epilog_stmt;
+  tree new_scalar_dest, exit_phi;
+  tree bitsize, bitpos, bytesize; 
+  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+  tree scalar_initial_def;
+  tree vec_initial_def;
+  tree orig_name;
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+  bool extract_scalar_result;
+  tree reduction_op;
+  tree orig_stmt;
+  tree operation = TREE_OPERAND (stmt, 1);
+  int op_type;
+  
+  op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
+  reduction_op = TREE_OPERAND (operation, op_type-1);
+  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+  mode = TYPE_MODE (vectype);
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  /*** 1. Create the reduction def-use cycle  ***/
+  
+  /* 1.1 set the loop-entry arg of the reduction-phi:  */
+  /* For the case of reduction, vect_get_vec_def_for_operand returns
+     the scalar def before the loop, that defines the initial value
+     of the reduction variable.  */
+  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
+                                                 &scalar_initial_def);
+  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
+
+  /* 1.2 set the loop-latch arg for the reduction-phi:  */
+  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
+
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
      {
-      fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
-      print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
+      fprintf (vect_dump, "transform reduction: created def-use cycle:");
+      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
+      fprintf (vect_dump, "\n");
+      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
      }
  
  
      }
  
  
-  /** ==> Case 2.1: operand is defined inside the loop.  **/
+  /*** 2. Create epilog code
+         The reduction epilog code operates across the elements of the vector
+          of partial results computed by the vectorized loop.
+          The reduction epilog code consists of:
+          step 1: compute the scalar result in a vector (v_out2)
+          step 2: extract the scalar result (s_out3) from the vector (v_out2)
+          step 3: adjust the scalar result (s_out3) if needed.
+
+          Step 1 can be accomplished using one the following three schemes:
+          (scheme 1) using reduc_code, if available.
+          (scheme 2) using whole-vector shifts, if available.
+          (scheme 3) using a scalar loop. In this case steps 1+2 above are 
+                     combined.
+                
+          The overall epilog code looks like this:
+
+          s_out0 = phi <s_loop>         # original EXIT_PHI
+          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
+          v_out2 = reduce <v_out1>              # step 1
+          s_out3 = extract_field <v_out2, 0>    # step 2
+          s_out4 = adjust_result <s_out3>       # step 3
+
+          (step 3 is optional, and step2 1 and 2 may be combined).
+          Lastly, the uses of s_out0 are replaced by s_out4.
+
+         ***/
+
+  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
+        v_out1 = phi <v_loop>  */
+
+  exit_bb = loop->single_exit->dest;
+  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
+  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
+  exit_bsi = bsi_start (exit_bb);
+
+  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 
+         (i.e. when reduc_code is not available) and in the final adjustment code
+         (if needed).  Also get the original scalar reduction variable as
+         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it 
+         represents a reduction pattern), the tree-code and scalar-def are 
+         taken from the original stmt that the pattern-stmt (STMT) replaces.  
+         Otherwise (it is a regular reduction) - the tree-code and scalar-def
+         are taken from STMT.  */ 
+
+  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+  if (!orig_stmt)
+    {
+      /* Regular reduction  */
+      orig_stmt = stmt;
+    }
+  else
+    {
+      /* Reduction pattern  */
+      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
+      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
+      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
+    }
+  code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
+  scalar_dest = TREE_OPERAND (orig_stmt, 0);
+  scalar_type = TREE_TYPE (scalar_dest);
+  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
+  bitsize = TYPE_SIZE (scalar_type);
+  bytesize = TYPE_SIZE_UNIT (scalar_type);
+
+  /* 2.3 Create the reduction code, using one of the three schemes described
+         above.  */
  
  
-  if (def_stmt_info)
+  if (reduc_code < NUM_TREE_CODES)
      {
      {
-      /* Get the def from the vectorized stmt.  */
+      /*** Case 1:  Create:
+          v_out2 = reduc_expr <v_out1>  */
  
  
-      vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
-      gcc_assert (vec_stmt);
-      vec_oprnd = TREE_OPERAND (vec_stmt, 0);
-      return vec_oprnd;
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "Reduce using direct vector reduction.");
+
+      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+                       build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
+      new_temp = make_ssa_name (vec_dest, epilog_stmt);
+      TREE_OPERAND (epilog_stmt, 0) = new_temp;
+      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+      extract_scalar_result = true;
      }
      }
+  else
+    {
+      enum tree_code shift_code = 0;
+      bool have_whole_vector_shift = true;
+      int bit_offset;
+      int element_bitsize = tree_low_cst (bitsize, 1);
+      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+      tree vec_temp;
+
+      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
+       shift_code = VEC_RSHIFT_EXPR;
+      else
+       have_whole_vector_shift = false;
+
+      /* Regardless of whether we have a whole vector shift, if we're
+        emulating the operation via tree-vect-generic, we don't want
+        to use it.  Only the first round of the reduction is likely
+        to still be profitable via emulation.  */
+      /* ??? It might be better to emit a reduction tree code here, so that
+        tree-vect-generic can expand the first round via bit tricks.  */
+      if (!VECTOR_MODE_P (mode))
+       have_whole_vector_shift = false;
+      else
+       {
+         optab optab = optab_for_tree_code (code, vectype);
+         if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
+           have_whole_vector_shift = false;
+       }
  
  
+      if (have_whole_vector_shift)
+        {
+         /*** Case 2: Create:
+            for (offset = VS/2; offset >= element_size; offset/=2)
+               {
+                 Create:  va' = vec_shift <va, offset>
+                 Create:  va = vop <va, va'>
+               }  */
+
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "Reduce using vector shifts");
  
  
-  /** ==> Case 2.2: operand is defined by the loop-header phi-node - 
-                    it is a reduction/induction.  **/
+         vec_dest = vect_create_destination_var (scalar_dest, vectype);
+         new_temp = PHI_RESULT (new_phi);
  
  
-  bb = bb_for_stmt (def_stmt);
-  if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
+         for (bit_offset = vec_size_in_bits/2;
+              bit_offset >= element_bitsize;
+              bit_offset /= 2)
+           {
+             tree bitpos = size_int (bit_offset);
+
+             epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+             build2 (shift_code, vectype, new_temp, bitpos));
+             new_name = make_ssa_name (vec_dest, epilog_stmt);
+             TREE_OPERAND (epilog_stmt, 0) = new_name;
+             bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+             epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+             build2 (code, vectype, new_name, new_temp));
+             new_temp = make_ssa_name (vec_dest, epilog_stmt);
+             TREE_OPERAND (epilog_stmt, 0) = new_temp;
+             bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+           }
+
+         extract_scalar_result = true;
+       }
+      else
+        {
+         tree rhs;
+
+         /*** Case 3: Create:  
+            s = extract_field <v_out2, 0>
+            for (offset = element_size; 
+                 offset < vector_size; 
+                 offset += element_size;)
+              {
+                Create:  s' = extract_field <v_out2, offset>
+                Create:  s = op <s, s'>
+              }  */
+
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "Reduce using scalar code. ");
+
+         vec_temp = PHI_RESULT (new_phi);
+         vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+         rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
+                        bitsize_zero_node);
+         BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
+         epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
+         new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+         TREE_OPERAND (epilog_stmt, 0) = new_temp;
+         bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+             
+         for (bit_offset = element_bitsize;
+              bit_offset < vec_size_in_bits;
+              bit_offset += element_bitsize)
+           { 
+             tree bitpos = bitsize_int (bit_offset);
+             tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
+                                bitpos);
+               
+             BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
+             epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+                                   rhs);       
+             new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
+             TREE_OPERAND (epilog_stmt, 0) = new_name;
+             bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+             epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+                               build2 (code, scalar_type, new_name, new_temp));
+             new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+             TREE_OPERAND (epilog_stmt, 0) = new_temp;
+             bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+           }
+
+         extract_scalar_result = false;
+       }
+    }
+
+  /* 2.4  Extract the final scalar result.  Create:
+         s_out3 = extract_field <v_out2, bitpos>  */
+  
+  if (extract_scalar_result)
      {
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-       fprintf (vect_dump, "reduction/induction - unsupported.");
-      internal_error ("no support for reduction/induction"); /* FORNOW */
+      tree rhs;
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "extract scalar result");
+
+      if (BYTES_BIG_ENDIAN)
+       bitpos = size_binop (MULT_EXPR,
+                      bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
+                      TYPE_SIZE (scalar_type));
+      else
+       bitpos = bitsize_zero_node;
+
+      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
+      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
+      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
+      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+      TREE_OPERAND (epilog_stmt, 0) = new_temp; 
+      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
      }
  
      }
  
+  /* 2.4 Adjust the final result by the initial value of the reduction
+        variable. (When such adjustment is not needed, then
+        'scalar_initial_def' is zero).
  
  
-  /** ==> Case 2.3: operand is defined outside the loop - 
-                    it is a loop invariant.  */
+        Create: 
+        s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
+  
+  if (scalar_initial_def)
+    {
+      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+                      build2 (code, scalar_type, new_temp, scalar_initial_def));
+      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+      TREE_OPERAND (epilog_stmt, 0) = new_temp;
+      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+    }
+
+  /* 2.6 Replace uses of s_out0 with uses of s_out3  */
  
  
-  switch (TREE_CODE (def_stmt))
+  /* Find the loop-closed-use at the loop exit of the original scalar result.  
+     (The reduction result is expected to have two immediate uses - one at the 
+     latch block, and one at the loop exit).  */
+  exit_phi = NULL;
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
      {
      {
-    case PHI_NODE:
-      def = PHI_RESULT (def_stmt);
-      break;
-    case MODIFY_EXPR:
-      def = TREE_OPERAND (def_stmt, 0);
-      break;
-    case NOP_EXPR:
-      def = TREE_OPERAND (def_stmt, 0);
-      gcc_assert (IS_EMPTY_STMT (def_stmt));
-      def = op;
-      break;
-    default:
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
         {
         {
-          fprintf (vect_dump, "unsupported defining stmt: ");
-         print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
+         exit_phi = USE_STMT (use_p);
+         break;
         }
         }
-      internal_error ("unsupported defining stmt");
      }
      }
+  /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
+  gcc_assert (exit_phi);
+  /* Replace the uses:  */
+  orig_name = PHI_RESULT (exit_phi);
+  FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
+    SET_USE (use_p, new_temp);
+} 
+
+
+/* Function vectorizable_reduction.
+
+   Check if STMT performs a reduction operation that can be vectorized.
+   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+   Return FALSE if not a vectorizable STMT, TRUE otherwise.
+
+   This function also handles reduction idioms (patterns) that have been 
+   recognized in advance during vect_pattern_recog. In this case, STMT may be
+   of this form:
+     X = pattern_expr (arg0, arg1, ..., X)
+   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
+   sequence that had been detected and replaced by the pattern-stmt (STMT).
+  
+   In some cases of reduction patterns, the type of the reduction variable X is 
+   different than the type of the other arguments of STMT.
+   In such cases, the vectype that is used when transforming STMT into a vector
+   stmt is different than the vectype that is used to determine the 
+   vectorization factor, because it consists of a different number of elements 
+   than the actual number of elements that are being operated upon in parallel.
+
+   For example, consider an accumulation of shorts into an int accumulator. 
+   On some targets it's possible to vectorize this pattern operating on 8
+   shorts at a time (hence, the vectype for purposes of determining the
+   vectorization factor should be V8HI); on the other hand, the vectype that
+   is used to create the vector form is actually V4SI (the type of the result). 
+
+   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that 
+   indicates what is the actual level of parallelism (V8HI in the example), so 
+   that the right vectorization factor would be derived. This vectype 
+   corresponds to the type of arguments to the reduction stmt, and should *NOT* 
+   be used to create the vectorized stmt. The right vectype for the vectorized
+   stmt is obtained from the type of the result X: 
+        get_vectype_for_scalar_type (TREE_TYPE (X))
+
+   This means that, contrary to "regular" reductions (or "regular" stmts in 
+   general), the following equation:
+      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
+   does *NOT* necessarily hold for reduction patterns.  */
+
+bool
+vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+{
+  tree vec_dest;
+  tree scalar_dest;
+  tree op;
+  tree loop_vec_def0, loop_vec_def1;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree operation;
+  enum tree_code code, orig_code, epilog_reduc_code = 0;
+  enum machine_mode vec_mode;
+  int op_type;
+  optab optab, reduc_optab;
+  tree new_temp;
+  tree def, def_stmt;
+  enum vect_def_type dt;
+  tree new_phi;
+  tree scalar_type;
+  bool is_simple_use;
+  tree orig_stmt;
+  stmt_vec_info orig_stmt_info;
+  tree expr = NULL_TREE;
+  int i;
+
+  /* 1. Is vectorizable reduction?  */
+
+  /* Not supportable if the reduction variable is used in the loop.  */
+  if (STMT_VINFO_RELEVANT_P (stmt_info))
+    return false;
+
+  if (!STMT_VINFO_LIVE_P (stmt_info))
+    return false;
+
+  /* Make sure it was already recognized as a reduction computation.  */
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+    return false;
  
  
-  /* Build a tree with vector elements.
-     Create 'vec_inv = {inv,inv,..,inv}'  */
+  /* 2. Has this been recognized as a reduction pattern? 
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-    fprintf (vect_dump, "Create vector_inv.");
+     Check if STMT represents a pattern that has been recognized
+     in earlier analysis stages.  For stmts that represent a pattern,
+     the STMT_VINFO_RELATED_STMT field records the last stmt in
+     the original sequence that constitutes the pattern.  */
  
  
-  for (i = nunits - 1; i >= 0; --i)
+  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+  if (orig_stmt)
      {
      {
-      t = tree_cons (NULL_TREE, def, t);
+      orig_stmt_info = vinfo_for_stmt (orig_stmt);
+      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
+      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
+      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
      }
      }
+ 
+  /* 3. Check the operands of the operation. The first operands are defined
+        inside the loop body. The last operand is the reduction variable,
+        which is defined by the loop-header-phi.  */
  
  
-  vec_inv = build_constructor (vectype, t);
-  return vect_init_vector (stmt, vec_inv);
-}
+  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
  
  
+  operation = TREE_OPERAND (stmt, 1);
+  code = TREE_CODE (operation);
+  op_type = TREE_CODE_LENGTH (code);
  
  
-/* Function vect_finish_stmt_generation.
+  if (op_type != binary_op && op_type != ternary_op)
+    return false;
+  scalar_dest = TREE_OPERAND (stmt, 0);
+  scalar_type = TREE_TYPE (scalar_dest);
  
  
-   Insert a new stmt.  */
+  /* All uses but the last are expected to be defined in the loop.
+     The last use is the reduction variable.  */
+  for (i = 0; i < op_type-1; i++)
+    {
+      op = TREE_OPERAND (operation, i);
+      is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+      gcc_assert (is_simple_use);
+      gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
+                  dt == vect_constant_def);
+    }
  
  
-static void
-vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
-{
-  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+  op = TREE_OPERAND (operation, i);
+  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+  gcc_assert (is_simple_use);
+  gcc_assert (dt == vect_reduction_def);
+  gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+  if (orig_stmt) 
+    gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
+  else
+    gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
+  
+  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+    return false;
+
+  /* 4. Supportable by target?  */
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  /* 4.1. check support for the operation in the loop  */
+  optab = optab_for_tree_code (code, vectype);
+  if (!optab)
      {
      {
-      fprintf (vect_dump, "add new stmt: ");
-      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "no optab.");
+      return false;
+    }
+  vec_mode = TYPE_MODE (vectype);
+  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "op not supported by target.");
+      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
+          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+            < vect_min_worthwhile_factor (code))
+        return false;
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "proceeding using word mode.");
      }
  
      }
  
-#ifdef ENABLE_CHECKING
-  /* Make sure bsi points to the stmt that is being vectorized.  */
-  gcc_assert (stmt == bsi_stmt (*bsi));
-#endif
+  /* Worthwhile without SIMD support?  */
+  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+        < vect_min_worthwhile_factor (code))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "not worthwhile without SIMD support.");
+      return false;
+    }
  
  
-#ifdef USE_MAPPED_LOCATION
-  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
-#else
-  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
-#endif
+  /* 4.2. Check support for the epilog operation.
+
+          If STMT represents a reduction pattern, then the type of the
+          reduction variable may be different than the type of the rest
+          of the arguments.  For example, consider the case of accumulation
+          of shorts into an int accumulator; The original code:
+                        S1: int_a = (int) short_a;
+          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
+
+          was replaced with:
+                        STMT: int_acc = widen_sum <short_a, int_acc>
+
+          This means that:
+          1. The tree-code that is used to create the vector operation in the 
+             epilog code (that reduces the partial results) is not the 
+             tree-code of STMT, but is rather the tree-code of the original 
+             stmt from the pattern that STMT is replacing. I.e, in the example 
+             above we want to use 'widen_sum' in the loop, but 'plus' in the 
+             epilog.
+          2. The type (mode) we use to check available target support
+             for the vector operation to be created in the *epilog*, is 
+             determined by the type of the reduction variable (in the example 
+             above we'd check this: plus_optab[vect_int_mode]).
+             However the type (mode) we use to check available target support
+             for the vector operation to be created *inside the loop*, is
+             determined by the type of the other arguments to STMT (in the
+             example we'd check this: widen_sum_optab[vect_short_mode]).
+  
+          This is contrary to "regular" reductions, in which the types of all 
+          the arguments are the same as the type of the reduction variable. 
+          For "regular" reductions we can therefore use the same vector type 
+          (and also the same tree-code) when generating the epilog code and
+          when generating the code inside the loop.  */
+
+  if (orig_stmt)
+    {
+      /* This is a reduction pattern: get the vectype from the type of the
+         reduction variable, and get the tree-code from orig_stmt.  */
+      orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
+      vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
+      vec_mode = TYPE_MODE (vectype);
+    }
+  else
+    {
+      /* Regular reduction: use the same vectype and tree-code as used for
+         the vector code inside the loop can be used for the epilog code. */
+      orig_code = code;
+    }
+
+  if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
+    return false;
+  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
+  if (!reduc_optab)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "no optab for reduction.");
+      epilog_reduc_code = NUM_TREE_CODES;
+    }
+  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "reduc op not supported by target.");
+      epilog_reduc_code = NUM_TREE_CODES;
+    }
+ 
+  if (!vec_stmt) /* transformation not required.  */
+    {
+      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+      return true;
+    }
+
+  /** Transform.  **/
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "transform reduction.");
+
+  /* Create the destination vector  */
+  vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+  /* Create the reduction-phi that defines the reduction-operand.  */
+  new_phi = create_phi_node (vec_dest, loop->header);
+
+  /* Prepare the operand that is defined inside the loop body  */
+  op = TREE_OPERAND (operation, 0);
+  loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
+  if (op_type == binary_op)
+    expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
+  else if (op_type == ternary_op)
+    {
+      op = TREE_OPERAND (operation, 1);
+      loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
+      expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, 
+                    PHI_RESULT (new_phi));
+    }
+
+  /* Create the vectorized operation that computes the partial results  */
+  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
+  new_temp = make_ssa_name (vec_dest, *vec_stmt);
+  TREE_OPERAND (*vec_stmt, 0) = new_temp;
+  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+
+  /* Finalize the reduction-phi (set it's arguments) and create the
+     epilog reduction code.  */
+  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
+  return true;
  }
  
  
  }
  
  
@@ -675,8 +1381,14 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    tree new_temp;
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    tree new_temp;
+  tree def, def_stmt;
+  enum vect_def_type dt;
  
    /* Is vectorizable assignment?  */
  
    /* Is vectorizable assignment?  */
+  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+    return false;
+
+  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
@@ -686,9 +1398,9 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      return false;
  
    op = TREE_OPERAND (stmt, 1);
      return false;
  
    op = TREE_OPERAND (stmt, 1);
-  if (!vect_is_simple_use (op, loop_vinfo, NULL))
+  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
      {
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "use not simple.");
        return false;
      }
          fprintf (vect_dump, "use not simple.");
        return false;
      }
@@ -700,7 +1412,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      }
  
    /** Transform.  **/
      }
  
    /** Transform.  **/
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "transform assignment.");
  
    /* Handle def.  */
      fprintf (vect_dump, "transform assignment.");
  
    /* Handle def.  */
@@ -708,7 +1420,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /* Handle use.  */
    op = TREE_OPERAND (stmt, 1);
  
    /* Handle use.  */
    op = TREE_OPERAND (stmt, 1);
-  vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
+  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
  
    /* Arguments are ready. create the new vector stmt.  */
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
  
    /* Arguments are ready. create the new vector stmt.  */
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
@@ -746,6 +1458,7 @@ vect_min_worthwhile_factor (enum tree_code code)
      }
  }
  
      }
  }
  
+
  /* Function vectorizable_operation.
  
     Check if STMT performs a binary or unary operation that can be vectorized. 
  /* Function vectorizable_operation.
  
     Check if STMT performs a binary or unary operation that can be vectorized. 
@@ -771,8 +1484,25 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    int op_type;
    tree op;
    optab optab;
    int op_type;
    tree op;
    optab optab;
+  int icode;
+  enum machine_mode optab_op2_mode;
+  tree def, def_stmt;
+  enum vect_def_type dt;
  
    /* Is STMT a vectorizable binary/unary operation?   */
  
    /* Is STMT a vectorizable binary/unary operation?   */
+  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+    return false;
+
+  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
+
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      /* FORNOW: not yet supported.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "value used after loop.");
+      return false;
+    }
+
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
@@ -787,7 +1517,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    op_type = TREE_CODE_LENGTH (code);
    if (op_type != unary_op && op_type != binary_op)
      {
    op_type = TREE_CODE_LENGTH (code);
    if (op_type != unary_op && op_type != binary_op)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
        return false;
      }
         fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
        return false;
      }
@@ -795,9 +1525,9 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    for (i = 0; i < op_type; i++)
      {
        op = TREE_OPERAND (operation, i);
    for (i = 0; i < op_type; i++)
      {
        op = TREE_OPERAND (operation, i);
-      if (!vect_is_simple_use (op, loop_vinfo, NULL))
+      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
         {
         {
-         if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+         if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "use not simple.");
           return false;
         }       
             fprintf (vect_dump, "use not simple.");
           return false;
         }       
@@ -806,20 +1536,21 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    /* Supportable by target?  */
    if (!optab)
      {
    /* Supportable by target?  */
    if (!optab)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "no optab.");
        return false;
      }
    vec_mode = TYPE_MODE (vectype);
         fprintf (vect_dump, "no optab.");
        return false;
      }
    vec_mode = TYPE_MODE (vectype);
-  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+  icode = (int) optab->handlers[(int) vec_mode].insn_code;
+  if (icode == CODE_FOR_nothing)
      {
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "op not supported by target.");
        if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
            || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
              < vect_min_worthwhile_factor (code))
          return false;
         fprintf (vect_dump, "op not supported by target.");
        if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
            || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
              < vect_min_worthwhile_factor (code))
          return false;
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "proceeding using word mode.");
      }
  
         fprintf (vect_dump, "proceeding using word mode.");
      }
  
@@ -828,11 +1559,30 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
        && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
          < vect_min_worthwhile_factor (code))
      {
        && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
          < vect_min_worthwhile_factor (code))
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "not worthwhile without SIMD support.");
        return false;
      }
  
         fprintf (vect_dump, "not worthwhile without SIMD support.");
        return false;
      }
  
+  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
+    {
+      /* FORNOW: not yet supported.  */
+      if (!VECTOR_MODE_P (vec_mode))
+       return false;
+
+      /* Invariant argument is needed for a vector shift
+        by a scalar shift operand.  */
+      optab_op2_mode = insn_data[icode].operand[2].mode;
+      if (! (VECTOR_MODE_P (optab_op2_mode)
+            || dt == vect_constant_def
+            || dt == vect_invariant_def))
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "operand mode requires invariant argument.");
+         return false;
+       }
+    }
+
    if (!vec_stmt) /* transformation not required.  */
      {
        STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
    if (!vec_stmt) /* transformation not required.  */
      {
        STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
@@ -841,7 +1591,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /** Transform.  **/
  
  
    /** Transform.  **/
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "transform binary/unary operation.");
  
    /* Handle def.  */
      fprintf (vect_dump, "transform binary/unary operation.");
  
    /* Handle def.  */
@@ -850,12 +1600,30 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /* Handle uses.  */
    op0 = TREE_OPERAND (operation, 0);
  
    /* Handle uses.  */
    op0 = TREE_OPERAND (operation, 0);
-  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
+  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
  
    if (op_type == binary_op)
      {
        op1 = TREE_OPERAND (operation, 1);
  
    if (op_type == binary_op)
      {
        op1 = TREE_OPERAND (operation, 1);
-      vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); 
+
+      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
+       {
+         /* Vector shl and shr insn patterns can be defined with
+            scalar operand 2 (shift operand).  In this case, use
+            constant or loop invariant op1 directly, without
+            extending it to vector mode first.  */
+
+         optab_op2_mode = insn_data[icode].operand[2].mode;
+         if (!VECTOR_MODE_P (optab_op2_mode))
+           {
+             if (vect_print_dump_info (REPORT_DETAILS))
+               fprintf (vect_dump, "operand 1 using scalar mode.");
+             vec_oprnd1 = op1;
+           }
+       }
+
+      if (!vec_oprnd1)
+       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); 
      }
  
    /* Arguments are ready. create the new vector stmt.  */
      }
  
    /* Arguments are ready. create the new vector stmt.  */
@@ -897,7 +1665,8 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    tree dummy;
    enum dr_alignment_support alignment_support_cheme;
    ssa_op_iter iter;
    tree dummy;
    enum dr_alignment_support alignment_support_cheme;
    ssa_op_iter iter;
-  tree def;
+  tree def, def_stmt;
+  enum vect_def_type dt;
  
    /* Is vectorizable store? */
  
  
    /* Is vectorizable store? */
  
@@ -910,9 +1679,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      return false;
  
    op = TREE_OPERAND (stmt, 1);
      return false;
  
    op = TREE_OPERAND (stmt, 1);
-  if (!vect_is_simple_use (op, loop_vinfo, NULL))
+  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
      {
      {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "use not simple.");
        return false;
      }
          fprintf (vect_dump, "use not simple.");
        return false;
      }
@@ -935,7 +1704,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /** Transform.  **/
  
  
    /** Transform.  **/
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "transform store");
  
    alignment_support_cheme = vect_supportable_dr_alignment (dr);
      fprintf (vect_dump, "transform store");
  
    alignment_support_cheme = vect_supportable_dr_alignment (dr);
@@ -943,7 +1712,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
  
    /* Handle use - get the vectorized def from the defining stmt.  */
    gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
  
    /* Handle use - get the vectorized def from the defining stmt.  */
-  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
+  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
  
    /* Handle def.  */
    /* FORNOW: make sure the data reference is aligned.  */
  
    /* Handle def.  */
    /* FORNOW: make sure the data reference is aligned.  */
@@ -955,16 +1724,22 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
    vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
  
    *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
    vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
  
-  /* Mark all non-SSA variables in the statement for rewriting.  */
-  mark_new_vars_to_rename (*vec_stmt);
-           
-  /* The new vectorized statement will have better aliasing
-     information, so some of the virtual definitions of the old
-     statement will likely disappear from the IL.  Mark them to have
-     their SSA form updated.  */
+  /* Copy the V_MAY_DEFS representing the aliasing of the original array
+     element's definition to the vector's definition then update the
+     defining statement.  The original is being deleted so the same
+     SSA_NAMEs can be used.  */
+  copy_virtual_operands (*vec_stmt, stmt);
+
    FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
    FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
-    mark_sym_for_renaming (SSA_NAME_VAR (def));
- 
+    {
+      SSA_NAME_DEF_STMT (def) = *vec_stmt;
+
+      /* If this virtual def has a use outside the loop and a loop peel is 
+        performed then the def may be renamed by the peel.  Mark it for 
+        renaming so the later use will also be renamed.  */
+      mark_sym_for_renaming (SSA_NAME_VAR (def));
+    }
+
    return true;
  }
  
    return true;
  }
  
@@ -999,6 +1774,18 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    enum dr_alignment_support alignment_support_cheme;
  
    /* Is vectorizable load? */
    enum dr_alignment_support alignment_support_cheme;
  
    /* Is vectorizable load? */
+  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+    return false;
+
+  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
+
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      /* FORNOW: not yet supported.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "value used after loop.");
+      return false;
+    }
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
@@ -1020,7 +1807,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      (e.g. - data copies).  */
    if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
      {
      (e.g. - data copies).  */
    if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
      {
-      if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
+      if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "Aligned load, but unsupported type.");
        return false;
      }
         fprintf (vect_dump, "Aligned load, but unsupported type.");
        return false;
      }
@@ -1033,7 +1820,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /** Transform.  **/
  
  
    /** Transform.  **/
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "transform load.");
  
    alignment_support_cheme = vect_supportable_dr_alignment (dr);
      fprintf (vect_dump, "transform load.");
  
    alignment_support_cheme = vect_supportable_dr_alignment (dr);
@@ -1109,9 +1896,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
  
        /* <2> Create lsq = *(floor(p2')) in the loop  */ 
  
  
        /* <2> Create lsq = *(floor(p2')) in the loop  */ 
-      offset = build_int_cst (integer_type_node, 
-                             TYPE_VECTOR_SUBPARTS (vectype));
-      offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
+      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
        vec_dest = vect_create_destination_var (scalar_dest, vectype);
        dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
        data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
        vec_dest = vect_create_destination_var (scalar_dest, vectype);
        dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
        data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
@@ -1143,7 +1928,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
              the value of the parameter and no global variables are touched
              which makes the builtin a "const" function.  Requiring the
              builtin to have the "const" attribute makes it unnecessary
              the value of the parameter and no global variables are touched
              which makes the builtin a "const" function.  Requiring the
              builtin to have the "const" attribute makes it unnecessary
-            to call mark_call_clobbered_vars_to_rename.  */
+            to call mark_call_clobbered.  */
           gcc_assert (TREE_READONLY (builtin_decl));
         }
        else
           gcc_assert (TREE_READONLY (builtin_decl));
         }
        else
@@ -1178,6 +1963,64 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    return true;
  }
  
    return true;
  }
  
+
+/* Function vectorizable_live_operation.
+
+   STMT computes a value that is used outside the loop. Check if 
+   it can be supported.  */
+
+bool
+vectorizable_live_operation (tree stmt,
+                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+                             tree *vec_stmt ATTRIBUTE_UNUSED)
+{
+  tree operation;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  int i;
+  enum tree_code code;
+  int op_type;
+  tree op;
+  tree def, def_stmt;
+  enum vect_def_type dt; 
+
+  if (!STMT_VINFO_LIVE_P (stmt_info))
+    return false;
+
+  if (TREE_CODE (stmt) != MODIFY_EXPR)
+    return false;
+
+  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
+    return false;
+
+  operation = TREE_OPERAND (stmt, 1);
+  code = TREE_CODE (operation);
+
+  op_type = TREE_CODE_LENGTH (code);
+
+  /* FORNOW: support only if all uses are invariant. This means
+     that the scalar operations can remain in place, unvectorized.
+     The original last scalar value that they compute will be used.  */
+
+  for (i = 0; i < op_type; i++)
+    {
+      op = TREE_OPERAND (operation, i);
+      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "use not simple.");
+          return false;
+        }
+
+      if (dt != vect_invariant_def && dt != vect_constant_def)
+        return false;
+    }
+
+  /* No transformation is required for the cases we currently support.  */
+  return true;
+}
+
+
  /* Function vect_is_simple_cond.
    
     Input:
  /* Function vect_is_simple_cond.
    
     Input:
@@ -1191,6 +2034,8 @@ static bool
  vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
  {
    tree lhs, rhs;
  vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
  {
    tree lhs, rhs;
+  tree def;
+  enum vect_def_type dt;
  
    if (!COMPARISON_CLASS_P (cond))
      return false;
  
    if (!COMPARISON_CLASS_P (cond))
      return false;
@@ -1201,7 +2046,7 @@ vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
    if (TREE_CODE (lhs) == SSA_NAME)
      {
        tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
    if (TREE_CODE (lhs) == SSA_NAME)
      {
        tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
-      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt))
+      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
         return false;
      }
    else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
         return false;
      }
    else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
@@ -1210,7 +2055,7 @@ vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
    if (TREE_CODE (rhs) == SSA_NAME)
      {
        tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
    if (TREE_CODE (rhs) == SSA_NAME)
      {
        tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
-      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt))
+      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
         return false;
      }
    else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
         return false;
      }
    else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
@@ -1242,10 +2087,22 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    tree new_temp;
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    enum machine_mode vec_mode;
    tree new_temp;
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    enum machine_mode vec_mode;
+  tree def;
+  enum vect_def_type dt;
  
    if (!STMT_VINFO_RELEVANT_P (stmt_info))
      return false;
  
  
    if (!STMT_VINFO_RELEVANT_P (stmt_info))
      return false;
  
+  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
+
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      /* FORNOW: not yet supported.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "value used after loop.");
+      return false;
+    }
+
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
  
@@ -1264,7 +2121,8 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    if (TREE_CODE (then_clause) == SSA_NAME)
      {
        tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
    if (TREE_CODE (then_clause) == SSA_NAME)
      {
        tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
-      if (!vect_is_simple_use (then_clause, loop_vinfo, &then_def_stmt))
+      if (!vect_is_simple_use (then_clause, loop_vinfo, 
+                              &then_def_stmt, &def, &dt))
         return false;
      }
    else if (TREE_CODE (then_clause) != INTEGER_CST 
         return false;
      }
    else if (TREE_CODE (then_clause) != INTEGER_CST 
@@ -1274,7 +2132,8 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
    if (TREE_CODE (else_clause) == SSA_NAME)
      {
        tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
    if (TREE_CODE (else_clause) == SSA_NAME)
      {
        tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
-      if (!vect_is_simple_use (else_clause, loop_vinfo, &else_def_stmt))
+      if (!vect_is_simple_use (else_clause, loop_vinfo, 
+                              &else_def_stmt, &def, &dt))
         return false;
      }
    else if (TREE_CODE (else_clause) != INTEGER_CST 
         return false;
      }
    else if (TREE_CODE (else_clause) != INTEGER_CST 
@@ -1298,17 +2157,17 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  
    /* Handle cond expr.  */
    vec_cond_lhs = 
  
    /* Handle cond expr.  */
    vec_cond_lhs = 
-    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
+    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
    vec_cond_rhs = 
    vec_cond_rhs = 
-    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt);
-  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt);
-  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt);
+    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
+  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
+  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
  
    /* Arguments are ready. create the new vector stmt.  */
    vec_compare = build2 (TREE_CODE (cond_expr), vectype, 
                         vec_cond_lhs, vec_cond_rhs);
  
    /* Arguments are ready. create the new vector stmt.  */
    vec_compare = build2 (TREE_CODE (cond_expr), vectype, 
                         vec_cond_lhs, vec_cond_rhs);
-  vec_cond_expr = build (VEC_COND_EXPR, vectype, 
-                        vec_compare, vec_then_clause, vec_else_clause);
+  vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 
+                         vec_compare, vec_then_clause, vec_else_clause);
  
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
    new_temp = make_ssa_name (vec_dest, *vec_stmt);
  
    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
    new_temp = make_ssa_name (vec_dest, *vec_stmt);
@@ -1328,45 +2187,88 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
    bool is_store = false;
    tree vec_stmt = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    bool is_store = false;
    tree vec_stmt = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree orig_stmt_in_pattern;
    bool done;
  
    bool done;
  
-  switch (STMT_VINFO_TYPE (stmt_info))
+  if (STMT_VINFO_RELEVANT_P (stmt_info))
      {
      {
-    case op_vec_info_type:
-      done = vectorizable_operation (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
+      switch (STMT_VINFO_TYPE (stmt_info))
+      {
+      case op_vec_info_type:
+       done = vectorizable_operation (stmt, bsi, &vec_stmt);
+       gcc_assert (done);
+       break;
+
+      case assignment_vec_info_type:
+       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
+       gcc_assert (done);
+       break;
+
+      case load_vec_info_type:
+       done = vectorizable_load (stmt, bsi, &vec_stmt);
+       gcc_assert (done);
+       break;
+
+      case store_vec_info_type:
+       done = vectorizable_store (stmt, bsi, &vec_stmt);
+       gcc_assert (done);
+       is_store = true;
+       break;
+
+      case condition_vec_info_type:
+       done = vectorizable_condition (stmt, bsi, &vec_stmt);
+       gcc_assert (done);
+       break;
+
+      default:
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "stmt not supported.");
+       gcc_unreachable ();
+      }
  
  
-    case assignment_vec_info_type:
-      done = vectorizable_assignment (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
-
-    case load_vec_info_type:
-      done = vectorizable_load (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
-
-    case store_vec_info_type:
-      done = vectorizable_store (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      is_store = true;
-      break;
-
-    case condition_vec_info_type:
-      done = vectorizable_condition (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
-
-    default:
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-        fprintf (vect_dump, "stmt not supported.");
-      gcc_unreachable ();
+      gcc_assert (vec_stmt);
+      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
+      if (orig_stmt_in_pattern)
+        {
+          stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
+          if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+            {
+              gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
+
+              /* STMT was inserted by the vectorizer to replace a computation 
+                 idiom.  ORIG_STMT_IN_PATTERN is a stmt in the original
+                 sequence that computed this idiom.  We need to record a pointer
+                 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN.  See more
+                 detail in the documentation of vect_pattern_recog.  */
+
+              STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
+            }
+        }
      }
  
      }
  
-  STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      switch (STMT_VINFO_TYPE (stmt_info))
+      {
+      case reduc_vec_info_type:
+        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
+        gcc_assert (done);
+        break;
+
+      default:
+        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
+        gcc_assert (done);
+      }
+
+      if (vec_stmt)
+        {
+          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
+          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+        }
+    }
  
  
-  return is_store;
+  return is_store; 
  }
  
  
  }
  
  
@@ -1420,7 +2322,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    tree ni = LOOP_VINFO_NITERS (loop_vinfo);
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    tree ni = LOOP_VINFO_NITERS (loop_vinfo);
    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
+  tree log_vf;
  
    pe = loop_preheader_edge (loop);
  
  
    pe = loop_preheader_edge (loop);
  
@@ -1428,6 +2330,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
       number of iterations loop executes.  */
  
    ni_name = vect_build_loop_niters (loop_vinfo);
       number of iterations loop executes.  */
  
    ni_name = vect_build_loop_niters (loop_vinfo);
+  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
  
    /* Create: ratio = ni >> log2(vf) */
  
  
    /* Create: ratio = ni >> log2(vf) */
  
@@ -1605,14 +2508,28 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
        tree var, stmt, ni, ni_name;
        block_stmt_iterator last_bsi;
  
        tree var, stmt, ni, ni_name;
        block_stmt_iterator last_bsi;
  
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
+          print_generic_expr (vect_dump, phi, TDF_SLIM);
+        }
+
        /* Skip virtual phi's.  */
        if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
         {
        /* Skip virtual phi's.  */
        if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
         {
-         if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+         if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "virtual phi. skip.");
           continue;
         }
  
             fprintf (vect_dump, "virtual phi. skip.");
           continue;
         }
  
+      /* Skip reduction phis.  */
+      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+        { 
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "reduc phi. skip.");
+          continue;
+        } 
+
        access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 
        gcc_assert (access_fn);
        evolution_part =
        access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 
        gcc_assert (access_fn);
        evolution_part =
@@ -1661,18 +2578,17 @@ static void
  vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
                                 struct loops *loops)
  {
  vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
                                 struct loops *loops)
  {
-
    tree ni_name, ratio_mult_vf_name;
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    struct loop *new_loop;
    edge update_e;
    basic_block preheader;
    tree ni_name, ratio_mult_vf_name;
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    struct loop *new_loop;
    edge update_e;
    basic_block preheader;
-#ifdef ENABLE_CHECKING
    int loop_num;
    int loop_num;
-#endif
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-    fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
+
+  initialize_original_copy_tables ();
  
    /* Generate the following variables on the preheader of original loop:
          
  
    /* Generate the following variables on the preheader of original loop:
          
@@ -1682,14 +2598,12 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
    vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
                                    &ratio_mult_vf_name, ratio);
  
    vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
                                    &ratio_mult_vf_name, ratio);
  
-#ifdef ENABLE_CHECKING
    loop_num  = loop->num; 
    loop_num  = loop->num; 
-#endif
    new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
                                             ratio_mult_vf_name, ni_name, false);
    new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
                                             ratio_mult_vf_name, ni_name, false);
-#ifdef ENABLE_CHECKING
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
+#ifdef ENABLE_CHECKING
    slpeel_verify_cfg_after_peeling (loop, new_loop);
  #endif
  
    slpeel_verify_cfg_after_peeling (loop, new_loop);
  #endif
  
@@ -1712,7 +2626,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  
-  return;
+  free_original_copy_tables ();
  }
  
  
  }
  
  
@@ -1750,7 +2664,6 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
    stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
    stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
-  tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
    tree niters_type = TREE_TYPE (loop_niters);
  
    pe = loop_preheader_edge (loop); 
    tree niters_type = TREE_TYPE (loop_niters);
  
    pe = loop_preheader_edge (loop); 
@@ -1761,7 +2674,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
        int element_size = vectype_align/vf;
        int elem_misalign = byte_misalign / element_size;
  
        int element_size = vectype_align/vf;
        int elem_misalign = byte_misalign / element_size;
  
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+      if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "known alignment = %d.", byte_misalign);
        iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
      }
          fprintf (vect_dump, "known alignment = %d.", byte_misalign);
        iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
      }
@@ -1775,8 +2688,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
        tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
        tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
        tree elem_size_log =
        tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
        tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
        tree elem_size_log =
-        build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
-      tree vf_tree = build_int_cst (unsigned_type_node, vf);
+        build_int_cst (type, exact_log2 (vectype_align/vf));
+      tree vf_minus_1 = build_int_cst (type, vf - 1);
+      tree vf_tree = build_int_cst (type, vf);
        tree byte_misalign;
        tree elem_misalign;
  
        tree byte_misalign;
        tree elem_misalign;
  
@@ -1789,11 +2703,11 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
    
        /* Create:  elem_misalign = byte_misalign / element_size  */
        elem_misalign =
    
        /* Create:  elem_misalign = byte_misalign / element_size  */
        elem_misalign =
-        build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
+        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
  
        /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
  
        /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
-      iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
-      iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
+      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
+      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
        iters = fold_convert (niters_type, iters);
      }
  
        iters = fold_convert (niters_type, iters);
      }
  
@@ -1804,7 +2718,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
    if (TREE_CODE (loop_niters) != INTEGER_CST)
      iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
  
    if (TREE_CODE (loop_niters) != INTEGER_CST)
      iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      {
        fprintf (vect_dump, "niters for prolog loop: ");
        print_generic_expr (vect_dump, iters, TDF_SLIM);
      {
        fprintf (vect_dump, "niters for prolog loop: ");
        print_generic_expr (vect_dump, iters, TDF_SLIM);
@@ -1830,18 +2744,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
     NITERS iterations were peeled from LOOP.  DR represents a data reference
     in LOOP.  This function updates the information recorded in DR to
     account for the fact that the first NITERS iterations had already been 
     NITERS iterations were peeled from LOOP.  DR represents a data reference
     in LOOP.  This function updates the information recorded in DR to
     account for the fact that the first NITERS iterations had already been 
-   executed.  Specifically, it updates the OFFSET field of stmt_info.  */
+   executed.  Specifically, it updates the OFFSET field of DR.  */
  
  static void
  vect_update_init_of_dr (struct data_reference *dr, tree niters)
  {
  
  static void
  vect_update_init_of_dr (struct data_reference *dr, tree niters)
  {
-  stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
-  tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
+  tree offset = DR_OFFSET (dr);
        
        
-  niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters, 
-                        STMT_VINFO_VECT_STEP (stmt_info)));
-  offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
-  STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
+  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
+  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
+  DR_OFFSET (dr) = offset;
  }
  
  
  }
  
  
@@ -1857,23 +2769,14 @@ static void
  vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
  {
    unsigned int i;
  vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
  {
    unsigned int i;
-  varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
-  varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+  struct data_reference *dr;
  
    if (vect_dump && (dump_flags & TDF_DETAILS))
      fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
  
  
    if (vect_dump && (dump_flags & TDF_DETAILS))
      fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
  
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
-    {
-      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
-      vect_update_init_of_dr (dr, niters);
-    }
-
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
-    {
-      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
-      vect_update_init_of_dr (dr, niters);
-    }
+  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+    vect_update_init_of_dr (dr, niters);
  }
  
  
  }
  
  
@@ -1893,9 +2796,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
    tree n_iters;
    struct loop *new_loop;
  
    tree n_iters;
    struct loop *new_loop;
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
  
      fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
  
+  initialize_original_copy_tables ();
+
    ni_name = vect_build_loop_niters (loop_vinfo);
    niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
    
    ni_name = vect_build_loop_niters (loop_vinfo);
    niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
    
@@ -1903,15 +2808,15 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
    new_loop = 
         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 
                                        niters_of_prolog_loop, ni_name, true); 
    new_loop = 
         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 
                                        niters_of_prolog_loop, ni_name, true); 
-#ifdef ENABLE_CHECKING
    gcc_assert (new_loop);
    gcc_assert (new_loop);
+#ifdef ENABLE_CHECKING
    slpeel_verify_cfg_after_peeling (new_loop, loop);
  #endif
  
    /* Update number of times loop executes.  */
    n_iters = LOOP_VINFO_NITERS (loop_vinfo);
    slpeel_verify_cfg_after_peeling (new_loop, loop);
  #endif
  
    /* Update number of times loop executes.  */
    n_iters = LOOP_VINFO_NITERS (loop_vinfo);
-  LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
-               TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
+  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
+               TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
  
    /* Update the init conditions of the access functions of all data refs.  */
    vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
  
    /* Update the init conditions of the access functions of all data refs.  */
    vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
@@ -1919,7 +2824,129 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  
-  return;
+  free_original_copy_tables ();
+}
+
+
+/* Function vect_create_cond_for_align_checks.
+
+   Create a conditional expression that represents the alignment checks for
+   all of data references (array element references) whose alignment must be
+   checked at runtime.
+
+   Input:
+   LOOP_VINFO - two fields of the loop information are used.
+                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
+                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
+
+   Output:
+   COND_EXPR_STMT_LIST - statements needed to construct the conditional
+                         expression.
+   The returned value is the conditional expression to be used in the if
+   statement that controls which version of the loop gets executed at runtime.
+
+   The algorithm makes two assumptions:
+     1) The number of bytes "n" in a vector is a power of 2.
+     2) An address "a" is aligned if a%n is zero and that this
+        test can be done as a&(n-1) == 0.  For example, for 16
+        byte vectors the test is a&0xf == 0.  */
+
+static tree
+vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
+                                   tree *cond_expr_stmt_list)
+{
+  VEC(tree,heap) *may_misalign_stmts
+    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
+  tree ref_stmt;
+  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
+  tree mask_cst;
+  unsigned int i;
+  tree psize;
+  tree int_ptrsize_type;
+  char tmp_name[20];
+  tree or_tmp_name = NULL_TREE;
+  tree and_tmp, and_tmp_name, and_stmt;
+  tree ptrsize_zero;
+
+  /* Check that mask is one less than a power of 2, i.e., mask is
+     all zeros followed by all ones.  */
+  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
+
+  /* CHECKME: what is the best integer or unsigned type to use to hold a
+     cast from a pointer value?  */
+  psize = TYPE_SIZE (ptr_type_node);
+  int_ptrsize_type
+    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
+
+  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
+     of the first vector of the i'th data reference. */
+
+  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
+    {
+      tree new_stmt_list = NULL_TREE;   
+      tree addr_base;
+      tree addr_tmp, addr_tmp_name, addr_stmt;
+      tree or_tmp, new_or_tmp_name, or_stmt;
+
+      /* create: addr_tmp = (int)(address_of_first_vector) */
+      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt, 
+                                                       &new_stmt_list, 
+                                                       NULL_TREE);
+
+      if (new_stmt_list != NULL_TREE)
+        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
+
+      sprintf (tmp_name, "%s%d", "addr2int", i);
+      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
+      add_referenced_tmp_var (addr_tmp);
+      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
+      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
+      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
+                          addr_tmp_name, addr_stmt);
+      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
+      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
+
+      /* The addresses are OR together.  */
+
+      if (or_tmp_name != NULL_TREE)
+        {
+          /* create: or_tmp = or_tmp | addr_tmp */
+          sprintf (tmp_name, "%s%d", "orptrs", i);
+          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
+          add_referenced_tmp_var (or_tmp);
+          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
+          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
+                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
+                                   or_tmp_name,
+                                    addr_tmp_name));
+          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
+          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
+          or_tmp_name = new_or_tmp_name;
+        }
+      else
+        or_tmp_name = addr_tmp_name;
+
+    } /* end for i */
+
+  mask_cst = build_int_cst (int_ptrsize_type, mask);
+
+  /* create: and_tmp = or_tmp & mask  */
+  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
+  add_referenced_tmp_var (and_tmp);
+  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
+
+  and_stmt = build2 (MODIFY_EXPR, void_type_node,
+                     and_tmp_name,
+                     build2 (BIT_AND_EXPR, int_ptrsize_type,
+                             or_tmp_name, mask_cst));
+  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
+  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
+
+  /* Make and_tmp the left operand of the conditional test against zero.
+     if and_tmp has a nonzero bit then some address is unaligned.  */
+  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
+  return build2 (EQ_EXPR, boolean_type_node,
+                 and_tmp_name, ptrsize_zero);
  }
  
  
  }
  
  
@@ -1940,11 +2967,72 @@ vect_transform_loop (loop_vec_info loop_vinfo,
    int i;
    tree ratio = NULL;
    int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    int i;
    tree ratio = NULL;
    int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  bitmap_iterator bi;
+  unsigned int j;
  
  
-  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+  if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vec_transform_loop ===");
  
      fprintf (vect_dump, "=== vec_transform_loop ===");
  
-  
+  /* If the loop has data references that may or may not be aligned then
+     two versions of the loop need to be generated, one which is vectorized
+     and one which isn't.  A test is then generated to control which of the
+     loops is executed.  The test checks for the alignment of all of the
+     data references that may or may not be aligned. */
+
+  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+    {
+      struct loop *nloop;
+      tree cond_expr;
+      tree cond_expr_stmt_list = NULL_TREE;
+      basic_block condition_bb;
+      block_stmt_iterator cond_exp_bsi;
+      basic_block merge_bb;
+      basic_block new_exit_bb;
+      edge new_exit_e, e;
+      tree orig_phi, new_phi, arg;
+
+      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
+                                                     &cond_expr_stmt_list);
+      initialize_original_copy_tables ();
+      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
+      free_original_copy_tables();
+
+      /** Loop versioning violates an assumption we try to maintain during 
+        vectorization - that the loop exit block has a single predecessor.
+        After versioning, the exit block of both loop versions is the same
+        basic block (i.e. it has two predecessors). Just in order to simplify
+        following transformations in the vectorizer, we fix this situation
+        here by adding a new (empty) block on the exit-edge of the loop,
+        with the proper loop-exit phis to maintain loop-closed-form.  **/
+      
+      merge_bb = loop->single_exit->dest;
+      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
+      new_exit_bb = split_edge (loop->single_exit);
+      add_bb_to_loop (new_exit_bb, loop->outer);
+      new_exit_e = loop->single_exit;
+      e = EDGE_SUCC (new_exit_bb, 0);
+
+      for (orig_phi = phi_nodes (merge_bb); orig_phi; 
+          orig_phi = PHI_CHAIN (orig_phi))
+       {
+          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+                                    new_exit_bb);
+          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+          add_phi_arg (new_phi, arg, new_exit_e);
+         SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+       } 
+
+      /** end loop-exit-fixes after versioning  **/
+
+      update_ssa (TODO_update_ssa);
+      cond_exp_bsi = bsi_last (condition_bb);
+      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
+    }
+
+  /* CHECKME: we wouldn't need this if we calles update_ssa once
+     for all loops.  */
+  bitmap_zero (vect_vnames_to_rename);
+
    /* Peel the loop if there are data refs with unknown alignment.
       Only one data ref with unknown store is allowed.  */
  
    /* Peel the loop if there are data refs with unknown alignment.
       Only one data ref with unknown store is allowed.  */
  
@@ -1990,27 +3078,27 @@ vect_transform_loop (loop_vec_info loop_vinfo,
           stmt_vec_info stmt_info;
           bool is_store;
  
           stmt_vec_info stmt_info;
           bool is_store;
  
-         if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+         if (vect_print_dump_info (REPORT_DETAILS))
             {
               fprintf (vect_dump, "------>vectorizing statement: ");
               print_generic_expr (vect_dump, stmt, TDF_SLIM);
             }   
           stmt_info = vinfo_for_stmt (stmt);
           gcc_assert (stmt_info);
             {
               fprintf (vect_dump, "------>vectorizing statement: ");
               print_generic_expr (vect_dump, stmt, TDF_SLIM);
             }   
           stmt_info = vinfo_for_stmt (stmt);
           gcc_assert (stmt_info);
-         if (!STMT_VINFO_RELEVANT_P (stmt_info))
+         if (!STMT_VINFO_RELEVANT_P (stmt_info)
+             && !STMT_VINFO_LIVE_P (stmt_info))
             {
               bsi_next (&si);
               continue;
             }
             {
               bsi_next (&si);
               continue;
             }
-#ifdef ENABLE_CHECKING
           /* FORNOW: Verify that all stmts operate on the same number of
                      units and no inner unrolling is necessary.  */
           gcc_assert 
                 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
           /* FORNOW: Verify that all stmts operate on the same number of
                      units and no inner unrolling is necessary.  */
           gcc_assert 
                 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
-                == vectorization_factor);
-#endif
+                == (unsigned HOST_WIDE_INT) vectorization_factor);
+
           /* -------- vectorize statement ------------ */
           /* -------- vectorize statement ------------ */
-         if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+         if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "transform statement.");
  
           is_store = vect_transform_stmt (stmt, &si);
             fprintf (vect_dump, "transform statement.");
  
           is_store = vect_transform_stmt (stmt, &si);
@@ -2019,8 +3107,8 @@ vect_transform_loop (loop_vec_info loop_vinfo,
               /* Free the attached stmt_vec_info and remove the stmt.  */
               stmt_ann_t ann = stmt_ann (stmt);
               free (stmt_info);
               /* Free the attached stmt_vec_info and remove the stmt.  */
               stmt_ann_t ann = stmt_ann (stmt);
               free (stmt_info);
-             set_stmt_info (ann, NULL);
-             bsi_remove (&si);
+             set_stmt_info ((tree_ann_t)ann, NULL);
+             bsi_remove (&si, true);
               continue;
             }
  
               continue;
             }
  
@@ -2030,11 +3118,14 @@ vect_transform_loop (loop_vec_info loop_vinfo,
  
    slpeel_make_loop_iterate_ntimes (loop, ratio);
  
  
    slpeel_make_loop_iterate_ntimes (loop, ratio);
  
+  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
+    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
+
    /* The memory tags and pointers in vectorized statements need to
       have their SSA forms updated.  FIXME, why can't this be delayed
       until all the loops have been transformed?  */
    update_ssa (TODO_update_ssa);
  
    /* The memory tags and pointers in vectorized statements need to
       have their SSA forms updated.  FIXME, why can't this be delayed
       until all the loops have been transformed?  */
    update_ssa (TODO_update_ssa);
  
-  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
+  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
      fprintf (vect_dump, "LOOP VECTORIZED.");
  }
      fprintf (vect_dump, "LOOP VECTORIZED.");
  }