OSDN Git Service

2010-10-22 Richard Guenther <rguenther@suse.de>
authorrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 22 Oct 2010 14:44:48 +0000 (14:44 +0000)
committerrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 22 Oct 2010 14:44:48 +0000 (14:44 +0000)
PR tree-optimization/45720
* tree-vect-data-refs.c (vect_update_misalignment_for_peel):
Handle negative step.
(vect_enhance_data_refs_alignment): Likewise.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Likewise.
(vect_create_cond_for_align_checks): Likewise.
(vect_create_cond_for_alias_checks): Likewise.

* gcc.dg/torture/pr45720.c: New testcase.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@165832 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/torture/pr45720.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop-manip.c

index 4f9fc04..2d39d39 100644 (file)
@@ -1,3 +1,13 @@
+2010-10-22  Richard Guenther  <rguenther@suse.de>
+
+       PR tree-optimization/45720
+       * tree-vect-data-refs.c (vect_update_misalignment_for_peel):
+       Handle negative step.
+       (vect_enhance_data_refs_alignment): Likewise.
+       * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Likewise.
+       (vect_create_cond_for_align_checks): Likewise.
+       (vect_create_cond_for_alias_checks): Likewise.
+
 2010-10-22  Ira Rosen  <irar@il.ibm.com>
 
        PR tree-optimization/46126
index 1ca8a4a..b853e6e 100644 (file)
@@ -1,3 +1,8 @@
+2010-10-22  Richard Guenther  <rguenther@suse.de>
+
+       PR tree-optimization/45720
+       * gcc.dg/torture/pr45720.c: New testcase.
+
 2010-10-22  Ira Rosen  <irar@il.ibm.com>
 
        PR tree-optimization/46126
diff --git a/gcc/testsuite/gcc.dg/torture/pr45720.c b/gcc/testsuite/gcc.dg/torture/pr45720.c
new file mode 100644 (file)
index 0000000..9de8d11
--- /dev/null
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-vectorize" } */
+
+float val[256];
+float x;
+void __attribute__((noinline,noclone))
+foo(int len, int beg)
+{
+  int i;
+  for (i = len - 1; i >= beg; --i)
+    x += val[i] * 2;
+}
+void __attribute__((noinline,noclone))
+bar(void)
+{
+  int i;
+  for (i = 255; i >= 0; --i)
+    x += val[i] * 2;
+  for (i = 254; i >= 0; --i)
+    x += val[i] * 2;
+  for (i = 253; i >= 0; --i)
+    x += val[i] * 2;
+  for (i = 252; i >= 0; --i)
+    x += val[i] * 2;
+}
+float y[256];
+void __attribute__((noinline,noclone))
+foobar(void)
+{
+  int i;
+  for (i = 0; i < 252; ++i)
+    {
+      float l = 0;
+      l += val[255 - i] * 2;
+      l += val[254 - i] * 2;
+      l += val[253 - i] * 2;
+      l += val[252 - i] * 2;
+      y[i] = l;
+    }
+}
+int main()
+{
+  foo(256-1, 0);
+  foo(256-2, 0);
+  foo(256-3, 0);
+  foo(256-4, 0);
+  bar();
+  foobar();
+  return 0;
+}
index 0828e22..b4da517 100644 (file)
@@ -1016,10 +1016,11 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
   if (known_alignment_for_access_p (dr)
       && known_alignment_for_access_p (dr_peel))
     {
+      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
       int misal = DR_MISALIGNMENT (dr);
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-      misal += npeel * dr_size;
-      misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
+      misal += negative ? -npeel * dr_size : npeel * dr_size;
+      misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
       SET_DR_MISALIGNMENT (dr, misal);
       return;
     }
@@ -1503,6 +1504,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
           if (known_alignment_for_access_p (dr))
             {
               unsigned int npeel_tmp;
+             bool negative = tree_int_cst_compare (DR_STEP (dr),
+                                                   size_zero_node) < 0;
 
               /* Save info about DR in the hash table.  */
               if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
@@ -1514,7 +1517,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
               nelements = TYPE_VECTOR_SUBPARTS (vectype);
               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
                                                 TREE_TYPE (DR_REF (dr))));
-              npeel_tmp = (nelements - mis) % vf;
+              npeel_tmp = (negative
+                          ? (mis - nelements) : (nelements - mis)) & (vf - 1);
 
               /* For multiple types, it is possible that the bigger type access
                  will have more than one peeling option.  E.g., a loop with two
@@ -1707,6 +1711,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
       if (known_alignment_for_access_p (dr0))
         {
+         bool negative = tree_int_cst_compare (DR_STEP (dr0),
+                                               size_zero_node) < 0;
           if (!npeel)
             {
               /* Since it's known at compile time, compute the number of
@@ -1716,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  count.  */
               mis = DR_MISALIGNMENT (dr0);
               mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
-              npeel = nelements - mis;
+              npeel = (negative ? mis - nelements : nelements - mis) & (vf - 1);
             }
 
          /* For interleaved data access every iteration accesses all the
index 5771c51..f006182 100644 (file)
@@ -1993,8 +1993,11 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
   else
     {
       gimple_seq new_stmts = NULL;
+      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+      tree offset = negative
+         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
-                                               &new_stmts, NULL_TREE, loop);
+                                               &new_stmts, offset, loop);
       tree ptr_type = TREE_TYPE (start_addr);
       tree size = TYPE_SIZE (ptr_type);
       tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
@@ -2019,7 +2022,10 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 
       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
-      iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
+      if (negative)
+       iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
+      else
+       iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
       iters = fold_convert (niters_type, iters);
     }
@@ -2236,11 +2242,17 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
       tree addr_tmp, addr_tmp_name;
       tree or_tmp, new_or_tmp_name;
       gimple addr_stmt, or_stmt;
+      stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
+      tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+      bool negative = tree_int_cst_compare
+       (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
+      tree offset = negative
+       ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
 
       /* create: addr_tmp = (int)(address_of_first_vector) */
       addr_base =
        vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
-                                             NULL_TREE, loop);
+                                             offset, loop);
       if (new_stmt_list != NULL)
        gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
 
@@ -2387,6 +2399,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
       tree addr_base_a, addr_base_b;
       tree segment_length_a, segment_length_b;
       gimple stmt_a, stmt_b;
+      tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
 
       dr_a = DDR_A (ddr);
       stmt_a = DR_STMT (DDR_A (ddr));
@@ -2425,19 +2438,22 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
          print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
        }
 
+      seg_a_min = addr_base_a;
+      seg_a_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a), 
+                              addr_base_a, segment_length_a);
+      if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
+       seg_a_min = seg_a_max, seg_a_max = addr_base_a;
+
+      seg_b_min = addr_base_b;
+      seg_b_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
+                              addr_base_b, segment_length_b);
+      if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
+       seg_b_min = seg_b_max, seg_b_max = addr_base_b;
 
       part_cond_expr =
        fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-         fold_build2 (LT_EXPR, boolean_type_node,
-           fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
-             addr_base_a,
-             segment_length_a),
-           addr_base_b),
-         fold_build2 (LT_EXPR, boolean_type_node,
-           fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
-             addr_base_b,
-             segment_length_b),
-           addr_base_a));
+         fold_build2 (LT_EXPR, boolean_type_node, seg_a_max, seg_b_min),
+         fold_build2 (LT_EXPR, boolean_type_node, seg_b_max, seg_a_min));
 
       if (*cond_expr)
        *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,