From: spop Date: Fri, 29 Feb 2008 12:41:14 +0000 (+0000) Subject: * tree-loop-linear.c (try_interchange_loops): Compare memory access X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=commitdiff_plain;h=2fcf1fbb13e889c2503d7311f591622288376138 * tree-loop-linear.c (try_interchange_loops): Compare memory access strides against cache sizes. * testsuite/gcc.dg/tree-ssa/ltrans-8.c: Increase the size of strides to make the interchange profitable. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@132765 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4a7fcb7c0d3..617782a0584 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2008-02-29 Sebastian Pop + + * tree-loop-linear.c (try_interchange_loops): Compare memory access + strides against cache sizes. + 2008-02-29 Kaz Kojima * config/sh/sh.c (sh_secondary_reload): Handle loading a float diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b600cbc1dc9..93f2c458dbd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2008-02-29 Sebastian Pop + + * testsuite/gcc.dg/tree-ssa/ltrans-8.c: Increase the size of strides + to make the interchange profitable. + 2008-02-28 Daniel Franke PR fortran/31463 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ltrans-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ltrans-8.c index 1b22e961c62..21f8ffafce6 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ltrans-8.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ltrans-8.c @@ -4,9 +4,9 @@ double foo(double *a) { int i,j; double r = 0.0; - for (i=0; i<8; ++i) - for (j=0; j<8; ++j) - r += a[j*8+i]; + for (i=0; i<100; ++i) + for (j=0; j<1000; ++j) + r += a[j*100+i]; return r; } diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c index 88a77dd228c..806d9e6d1cb 100644 --- a/gcc/tree-loop-linear.c +++ b/gcc/tree-loop-linear.c @@ -179,10 +179,14 @@ try_interchange_loops (lambda_trans_matrix trans, VEC (data_reference_p, heap) *datarefs, struct loop *first_loop) { + bool res; struct loop *loop_i; struct loop *loop_j; unsigned int dependence_steps_i, dependence_steps_j; double_int access_strides_i, access_strides_j; + double_int small, large, nb_iter; + double_int l1_cache_size, l2_cache_size; + int cmp; unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; struct data_dependence_relation *ddr; @@ -194,7 +198,10 @@ try_interchange_loops (lambda_trans_matrix trans, ddr = VEC_index (ddr_p, dependence_relations, 0); if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) return trans; - + + l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024); + l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024); + /* LOOP_I is always the outer loop. */ for (loop_j = first_loop->inner; loop_j; @@ -216,18 +223,38 @@ try_interchange_loops (lambda_trans_matrix trans, /* Heuristics for loop interchange profitability: + 0. Don't transform if the smallest stride is larger than + the L2 cache, or if the largest stride multiplied by the + number of iterations is smaller than the L1 cache. + 1. (spatial locality) Inner loops should have smallest dependence steps. 2. (spatial locality) Inner loops should contain more dependence relations not carried by the loop. - 3. (temporal locality) Inner loops should have smallest + 3. (temporal locality) Inner loops should have smallest array access strides. */ + + cmp = double_int_ucmp (access_strides_i, access_strides_j); + small = cmp < 0 ? access_strides_i : access_strides_j; + large = cmp < 0 ? access_strides_j : access_strides_i; + + if (double_int_ucmp (small, l2_cache_size) > 0) + continue; + + res = cmp < 0 ? + estimated_loop_iterations (loop_j, false, &nb_iter): + estimated_loop_iterations (loop_i, false, &nb_iter); + large = double_int_mul (large, nb_iter); + + if (res && double_int_ucmp (large, l1_cache_size) < 0) + continue; + if (dependence_steps_i < dependence_steps_j || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j - || double_int_ucmp (access_strides_i, access_strides_j) < 0) + || cmp < 0) { lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_depth (loop_i) - loop_depth (first_loop),