2010-11-30 Sebastian Pop <sebastian.pop@amd.com>
PR tree-optimization/45199
* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New.
(stores_zero_from_loop): Call
mem_write_stride_of_same_size_as_unit_type_p.
* tree-data-ref.h (stride_of_unit_type_p): New.
* tree-loop-distribution.c (generate_memset_zero): Simplified.
Call stride_of_unit_type_p.
(build_rdg_partition_for_component): Do not call
rdg_flag_similar_memory_accesses when
flag_tree_loop_distribute_patterns is set.
* gcc.dg/tree-ssa/ldist-15.c: New.
* gcc.dg/tree-ssa/ldist-16.c: New.
* gfortran.dg/ldist-pr45199.f: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@167380
138bc75d-0d04-0410-961f-
82ee72b054a4
+2010-12-02 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/45199
+ * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New.
+ (stores_zero_from_loop): Call
+ mem_write_stride_of_same_size_as_unit_type_p.
+ * tree-data-ref.h (stride_of_unit_type_p): New.
+ * tree-loop-distribution.c (generate_memset_zero): Simplified.
+ Call stride_of_unit_type_p.
+ (build_rdg_partition_for_component): Do not call
+ rdg_flag_similar_memory_accesses when
+ flag_tree_loop_distribute_patterns is set.
+
2010-12-02 Richard Guenther <rguenther@suse.de>
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Disregard
+2010-12-02 Sebastian Pop <sebastian.pop@amd.com>
+
+ PR tree-optimization/45199
+ * gcc.dg/tree-ssa/ldist-15.c: New.
+ * gcc.dg/tree-ssa/ldist-16.c: New.
+ * gfortran.dg/ldist-pr45199.f: New.
+
2010-12-02 Richard Guenther <rguenther@suse.de>
PR tree-optimization/46723
2010-12-02 Nicola Pero <nicola.pero@meta-innovation.com>
* objc.dg/exceptions-6.m: New.
- * obj-c++.dg/exceptions-6.mm: New.
-
+ * obj-c++.dg/exceptions-6.mm: New.
+
2010-12-01 Jan Hubicka <jh@suse.cz>
* gcc.c-torture/execute/bcp-1.c: Make ready for -fuse-linker-plugin
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+ int i;
+
+ for (i = 0; i < n; ++i)
+ {
+ x[2*i] = 0;
+ x[2*i + 1] = 1;
+ }
+}
+
+/* We should not apply loop distribution as it is not beneficial from
+ a data locality point of view. Also it is not possible to generate
+ a memset (0) as the write has a stride of 2. */
+
+/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+ int i;
+
+ for (i = 0; i < n; ++i)
+ {
+ x[i] = 0;
+ x[2*i + 1] = 1;
+ }
+}
+
+/* We should apply loop distribution and generate a memset (0). */
+
+/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
--- /dev/null
+! { dg-do compile }
+! { dg-options "-O3 -fdump-tree-ldist-details" }
+
+ parameter(numlev=3,numoblev=1000)
+ integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev)
+ logical l_numob(numoblev,numlev)
+ do ixe=1,numoblev
+ do iye=1,numlev
+ i_otyp(ixe,iye)=0
+ i_styp(ixe,iye)=0
+ l_numob(ixe,iye)=.false.
+ enddo
+ enddo
+ do i=1,m
+ do j=1,n
+ if (l_numob(i,j)) then
+ write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j)
+ endif
+ enddo
+ enddo
+ end
+
+! GCC should apply memset zero loop distribution and it should not ICE.
+
+! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
+! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } }
+! { dg-final { cleanup-tree-dump "ldist" } }
free (bbs);
}
+/* Returns true when STMT is an assignment that contains a data
+ reference on its LHS with a stride of the same size as its unit
+ type. */
+
+static bool
+mem_write_stride_of_same_size_as_unit_type_p (gimple stmt)
+{
+ struct data_reference *dr = XCNEW (struct data_reference);
+ tree op0 = gimple_assign_lhs (stmt);
+ bool res;
+
+ DR_STMT (dr) = stmt;
+ DR_REF (dr) = op0;
+
+ res = dr_analyze_innermost (dr)
+ && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
+
+ free_data_ref (dr);
+ return res;
+}
+
/* Initialize STMTS with all the statements of LOOP that contain a
store to memory of the form "A[i] = 0". */
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
&& (op = gimple_assign_rhs1 (stmt))
- && (integer_zerop (op) || real_zerop (op)))
+ && (integer_zerop (op) || real_zerop (op))
+ && mem_write_stride_of_same_size_as_unit_type_p (stmt))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
+/* Returns true when STRIDE is equal in absolute value to the size of
+ the unit type of TYPE. */
+
+static inline bool
+stride_of_unit_type_p (tree stride, tree type)
+{
+ return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride),
+ stride),
+ TYPE_SIZE_UNIT (type));
+}
+
/* Determines whether RDG vertices V1 and V2 access to similar memory
locations, in which case they have to be in the same partition. */
if (!dr_analyze_innermost (dr))
goto end;
- /* Test for a positive stride, iterating over every element. */
- if (integer_zerop (size_binop (MINUS_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
- {
- addr_base = fold_convert_loc (loc, sizetype,
- size_binop_loc (loc, PLUS_EXPR,
- DR_OFFSET (dr),
- DR_INIT (dr)));
- addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
- TREE_TYPE (DR_BASE_ADDRESS (dr)),
- DR_BASE_ADDRESS (dr), addr_base);
-
- nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
- }
+ if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)))
+ goto end;
+
+ nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
+ addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
+ addr_base = fold_convert_loc (loc, sizetype, addr_base);
/* Test for a negative stride, iterating over every element. */
- else if (integer_zerop (size_binop (PLUS_EXPR,
- TYPE_SIZE_UNIT (TREE_TYPE (op0)),
- fold_convert (sizetype, DR_STEP (dr)))))
+ if (integer_zerop (size_binop (PLUS_EXPR,
+ TYPE_SIZE_UNIT (TREE_TYPE (op0)),
+ fold_convert (sizetype, DR_STEP (dr)))))
{
- nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
-
- addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
- addr_base = fold_convert_loc (loc, sizetype, addr_base);
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
fold_convert_loc (loc, sizetype, nb_bytes));
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
TYPE_SIZE_UNIT (TREE_TYPE (op0)));
- addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
- TREE_TYPE (DR_BASE_ADDRESS (dr)),
- DR_BASE_ADDRESS (dr), addr_base);
}
- else
- goto end;
+ addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
+ TREE_TYPE (DR_BASE_ADDRESS (dr)),
+ DR_BASE_ADDRESS (dr), addr_base);
mem = force_gimple_operand (addr_base, &stmts, true, NULL);
gimple_seq_add_seq (&stmt_list, stmts);
and determine those vertices that have some memory affinity with
the current nodes in the component: these are stores to the same
arrays, i.e. we're taking care of cache locality. */
- rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
- other_stores);
+ if (!flag_tree_loop_distribute_patterns)
+ rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
+ other_stores);
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);