OSDN Git Service

Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distr...
authorspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 2 Dec 2010 16:53:16 +0000 (16:53 +0000)
committerspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 2 Dec 2010 16:53:16 +0000 (16:53 +0000)
2010-11-30  Sebastian Pop  <sebastian.pop@amd.com>

PR tree-optimization/45199
* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New.
(stores_zero_from_loop): Call
mem_write_stride_of_same_size_as_unit_type_p.
* tree-data-ref.h (stride_of_unit_type_p): New.
* tree-loop-distribution.c (generate_memset_zero): Simplified.
Call stride_of_unit_type_p.
(build_rdg_partition_for_component): Do not call
rdg_flag_similar_memory_accesses when
flag_tree_loop_distribute_patterns is set.

* gcc.dg/tree-ssa/ldist-15.c: New.
* gcc.dg/tree-ssa/ldist-16.c: New.
* gfortran.dg/ldist-pr45199.f: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@167380 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c [new file with mode: 0644]
gcc/testsuite/gfortran.dg/ldist-pr45199.f [new file with mode: 0644]
gcc/tree-data-ref.c
gcc/tree-data-ref.h
gcc/tree-loop-distribution.c

index 3241bbd..c72c693 100644 (file)
@@ -1,3 +1,16 @@
+2010-12-02  Sebastian Pop  <sebastian.pop@amd.com>
+
+       PR tree-optimization/45199
+       * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New.
+       (stores_zero_from_loop): Call
+       mem_write_stride_of_same_size_as_unit_type_p.
+       * tree-data-ref.h (stride_of_unit_type_p): New.
+       * tree-loop-distribution.c (generate_memset_zero): Simplified.
+       Call stride_of_unit_type_p.
+       (build_rdg_partition_for_component): Do not call
+       rdg_flag_similar_memory_accesses when
+       flag_tree_loop_distribute_patterns is set.
+
 2010-12-02  Richard Guenther  <rguenther@suse.de>
 
        * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Disregard
index 43f1768..3eeee83 100644 (file)
@@ -1,3 +1,10 @@
+2010-12-02  Sebastian Pop  <sebastian.pop@amd.com>
+
+       PR tree-optimization/45199
+       * gcc.dg/tree-ssa/ldist-15.c: New.
+       * gcc.dg/tree-ssa/ldist-16.c: New.
+       * gfortran.dg/ldist-pr45199.f: New.
+
 2010-12-02  Richard Guenther  <rguenther@suse.de>
 
        PR tree-optimization/46723
@@ -23,8 +30,8 @@
 2010-12-02  Nicola Pero  <nicola.pero@meta-innovation.com>
 
        * objc.dg/exceptions-6.m: New.
-       * obj-c++.dg/exceptions-6.mm: New.      
-       
+       * obj-c++.dg/exceptions-6.mm: New.
+
 2010-12-01  Jan Hubicka  <jh@suse.cz>
 
        * gcc.c-torture/execute/bcp-1.c: Make ready for -fuse-linker-plugin
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
new file mode 100644 (file)
index 0000000..7ce3b95
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+  int i;
+
+  for (i = 0; i < n; ++i)
+    {
+      x[2*i] = 0;
+      x[2*i + 1] = 1;
+    }
+}
+
+/* We should not apply loop distribution as it is not beneficial from
+   a data locality point of view.  Also it is not possible to generate
+   a memset (0) as the write has a stride of 2.  */
+
+/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
new file mode 100644 (file)
index 0000000..61e8e56
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+  int i;
+
+  for (i = 0; i < n; ++i)
+    {
+      x[i] = 0;
+      x[2*i + 1] = 1;
+    }
+}
+
+/* We should apply loop distribution and generate a memset (0).  */
+
+/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f
new file mode 100644 (file)
index 0000000..6f65501
--- /dev/null
@@ -0,0 +1,27 @@
+! { dg-do compile }
+! { dg-options "-O3 -fdump-tree-ldist-details" }
+
+      parameter(numlev=3,numoblev=1000)
+      integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev)
+      logical l_numob(numoblev,numlev)
+      do ixe=1,numoblev
+         do iye=1,numlev
+            i_otyp(ixe,iye)=0
+            i_styp(ixe,iye)=0
+            l_numob(ixe,iye)=.false.
+         enddo
+      enddo
+      do i=1,m
+         do j=1,n
+            if (l_numob(i,j)) then
+               write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j)
+            endif
+         enddo
+      enddo
+      end
+
+! GCC should apply memset zero loop distribution and it should not ICE.
+
+! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
+! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } }
+! { dg-final { cleanup-tree-dump "ldist" } }
index 3cee320..094d168 100644 (file)
@@ -4974,6 +4974,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
   free (bbs);
 }
 
+/* Returns true when STMT is an assignment that contains a data
+   reference on its LHS with a stride of the same size as its unit
+   type.  */
+
+static bool
+mem_write_stride_of_same_size_as_unit_type_p (gimple stmt)
+{
+  struct data_reference *dr = XCNEW (struct data_reference);
+  tree op0 = gimple_assign_lhs (stmt);
+  bool res;
+
+  DR_STMT (dr) = stmt;
+  DR_REF (dr) = op0;
+
+  res = dr_analyze_innermost (dr)
+    && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
+
+  free_data_ref (dr);
+  return res;
+}
+
 /* Initialize STMTS with all the statements of LOOP that contain a
    store to memory of the form "A[i] = 0".  */
 
@@ -4994,7 +5015,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
          && is_gimple_assign (stmt)
          && gimple_assign_rhs_code (stmt) == INTEGER_CST
          && (op = gimple_assign_rhs1 (stmt))
-         && (integer_zerop (op) || real_zerop (op)))
+         && (integer_zerop (op) || real_zerop (op))
+         && mem_write_stride_of_same_size_as_unit_type_p (stmt))
        VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
 
   free (bbs);
index 844a2ae..d929f31 100644 (file)
@@ -603,6 +603,17 @@ void remove_similar_memory_refs (VEC (gimple, heap) **);
 bool rdg_defs_used_in_other_loops_p (struct graph *, int);
 bool have_similar_memory_accesses (gimple, gimple);
 
+/* Returns true when STRIDE is equal in absolute value to the size of
+   the unit type of TYPE.  */
+
+static inline bool
+stride_of_unit_type_p (tree stride, tree type)
+{
+  return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride),
+                                        stride),
+                            TYPE_SIZE_UNIT (type));
+}
+
 /* Determines whether RDG vertices V1 and V2 access to similar memory
    locations, in which case they have to be in the same partition.  */
 
index 007c4f3..357f51f 100644 (file)
@@ -258,42 +258,27 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
   if (!dr_analyze_innermost (dr))
     goto end;
 
-  /* Test for a positive stride, iterating over every element.  */
-  if (integer_zerop (size_binop (MINUS_EXPR,
-                                fold_convert (sizetype, DR_STEP (dr)),
-                                TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
-    {
-      addr_base = fold_convert_loc (loc, sizetype,
-                                   size_binop_loc (loc, PLUS_EXPR,
-                                                   DR_OFFSET (dr),
-                                                   DR_INIT (dr)));
-      addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
-                                  TREE_TYPE (DR_BASE_ADDRESS (dr)),
-                                  DR_BASE_ADDRESS (dr), addr_base);
-
-      nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
-    }
+  if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)))
+    goto end;
+
+  nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
+  addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
+  addr_base = fold_convert_loc (loc, sizetype, addr_base);
 
   /* Test for a negative stride, iterating over every element.  */
-  else if (integer_zerop (size_binop (PLUS_EXPR,
-                                     TYPE_SIZE_UNIT (TREE_TYPE (op0)),
-                                     fold_convert (sizetype, DR_STEP (dr)))))
+  if (integer_zerop (size_binop (PLUS_EXPR,
+                                TYPE_SIZE_UNIT (TREE_TYPE (op0)),
+                                fold_convert (sizetype, DR_STEP (dr)))))
     {
-      nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
-
-      addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
-      addr_base = fold_convert_loc (loc, sizetype, addr_base);
       addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
                                  fold_convert_loc (loc, sizetype, nb_bytes));
       addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
                                  TYPE_SIZE_UNIT (TREE_TYPE (op0)));
-      addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
-                                  TREE_TYPE (DR_BASE_ADDRESS (dr)),
-                                  DR_BASE_ADDRESS (dr), addr_base);
     }
-  else
-    goto end;
 
+  addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
+                              TREE_TYPE (DR_BASE_ADDRESS (dr)),
+                              DR_BASE_ADDRESS (dr), addr_base);
   mem = force_gimple_operand (addr_base, &stmts, true, NULL);
   gimple_seq_add_seq (&stmt_list, stmts);
 
@@ -781,8 +766,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
      and determine those vertices that have some memory affinity with
      the current nodes in the component: these are stores to the same
      arrays, i.e. we're taking care of cache locality.  */
-  rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
-                                   other_stores);
+  if (!flag_tree_loop_distribute_patterns)
+    rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
+                                     other_stores);
 
   rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);