gcc/graphite-interchange.c

   1 /* Interchange heuristics and transform for loop interchange on
   2    polyhedral representation.
   3
   4    Copyright (C) 2009 Free Software Foundation, Inc.
   5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
   6    Harsha Jagasia <harsha.jagasia@amd.com>.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 3, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "rtl.h"
  30 #include "output.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "toplev.h"
  35 #include "tree-dump.h"
  36 #include "timevar.h"
  37 #include "cfgloop.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-pass.h"
  42 #include "domwalk.h"
  43 #include "value-prof.h"
  44 #include "pointer-set.h"
  45 #include "gimple.h"
  46 #include "params.h"
  47
  48 #ifdef HAVE_cloog
  49 #include "cloog/cloog.h"
  50 #include "ppl_c.h"
  51 #include "sese.h"
  52 #include "graphite-ppl.h"
  53 #include "graphite.h"
  54 #include "graphite-poly.h"
  55
  56 /* Builds a linear expression, of dimension DIM, representing PDR's
  57    memory access:
  58
  59    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
  60
  61    For an array A[10][20] with two subscript locations s0 and s1, the
  62    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
  63    corresponds to a memory stride of 20.  */
  64
  65 static ppl_Linear_Expression_t
  66 build_linearized_memory_access (poly_dr_p pdr)
  67 {
  68   ppl_Linear_Expression_t res;
  69   ppl_Linear_Expression_t le;
  70   ppl_dimension_type i;
  71   ppl_dimension_type first = pdr_subscript_dim (pdr, 0);
  72   ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr));
  73   Value size, sub_size;
  74   graphite_dim_t dim = pdr_dim (pdr);
  75
  76   ppl_new_Linear_Expression_with_dimension (&res, dim);
  77
  78   value_init (size);
  79   value_set_si (size, 1);
  80   value_init (sub_size);
  81   value_set_si (sub_size, 1);
  82
  83   for (i = last - 1; i >= first; i--)
  84     {
  85       ppl_set_coef_gmp (res, i, size);
  86
  87       ppl_new_Linear_Expression_with_dimension (&le, dim);
  88       ppl_set_coef (le, i, 1);
  89       ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size);
  90       value_multiply (size, size, sub_size);
  91       ppl_delete_Linear_Expression (le);
  92     }
  93
  94   value_clear (sub_size);
  95   value_clear (size);
  96   return res;
  97 }
  98
  99 /* Set STRIDE to the stride of PDR in memory by advancing by one in
 100    loop DEPTH.  */
 101
 102 static void
 103 memory_stride_in_loop (Value stride, graphite_dim_t depth, poly_dr_p pdr)
 104 {
 105   ppl_Linear_Expression_t le, lma;
 106   ppl_Constraint_t new_cstr;
 107   ppl_Pointset_Powerset_C_Polyhedron_t p1, p2;
 108   graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr);
 109   ppl_dimension_type i, *map;
 110   ppl_dimension_type dim = pdr_dim (pdr);
 111   ppl_dimension_type dim_i = pdr_iterator_dim (pdr, depth);
 112   ppl_dimension_type dim_k = dim;
 113   ppl_dimension_type dim_L1 = dim + nb_subscripts + 1;
 114   ppl_dimension_type dim_L2 = dim + nb_subscripts + 2;
 115   ppl_dimension_type new_dim = dim + nb_subscripts + 3;
 116
 117   /* Add new dimensions to the polyhedron corresponding to
 118      k, s0', s1',..., L1, and L2.  These new variables are at
 119      dimensions dim, dim + 1,... of the polyhedron P1 respectively.  */
 120   ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
 121     (&p1, PDR_ACCESSES (pdr));
 122   ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
 123     (p1, nb_subscripts + 3);
 124
 125   lma = build_linearized_memory_access (pdr);
 126   ppl_set_coef (lma, dim_L1, -1);
 127   ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL);
 128   ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr);
 129
 130   /* Build P2.  */
 131   ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
 132     (&p2, p1);
 133   map = ppl_new_id_map (new_dim);
 134   ppl_interchange (map, dim_L1, dim_L2);
 135   ppl_interchange (map, dim_i, dim_k);
 136   for (i = 0; i < PDR_NB_SUBSCRIPTS (pdr); i++)
 137     ppl_interchange (map, pdr_subscript_dim (pdr, i), dim + i + 1);
 138   ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim);
 139   free (map);
 140
 141   /* Add constraint k = i + 1.  */
 142   ppl_new_Linear_Expression_with_dimension (&le, new_dim);
 143   ppl_set_coef (le, dim_i, 1);
 144   ppl_set_coef (le, dim_k, -1);
 145   ppl_set_inhomogeneous (le, 1);
 146   ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
 147   ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p2, new_cstr);
 148   ppl_delete_Linear_Expression (le);
 149   ppl_delete_Constraint (new_cstr);
 150
 151   /* P1 = P1 inter P2.  */
 152   ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2);
 153   ppl_delete_Pointset_Powerset_C_Polyhedron (p2);
 154
 155   /* Maximise the expression L2 - L1.  */
 156   ppl_new_Linear_Expression_with_dimension (&le, new_dim);
 157   ppl_set_coef (le, dim_L2, 1);
 158   ppl_set_coef (le, dim_L1, -1);
 159   ppl_max_for_le_pointset (p1, le, stride);
 160   ppl_delete_Linear_Expression (le);
 161 }
 162
 163
 164 /* Returns true when it is profitable to interchange loop at DEPTH1
 165    and loop at DEPTH2 with DEPTH1 < DEPTH2 for PBB.
 166
 167    Example:
 168
 169    | int a[100][100];
 170    |
 171    | int
 172    | foo (int N)
 173    | {
 174    |   int j;
 175    |   int i;
 176    |
 177    |   for (i = 0; i < N; i++)
 178    |     for (j = 0; j < N; j++)
 179    |       a[j][2 * i] += 1;
 180    |
 181    |   return a[N][12];
 182    | }
 183
 184    The data access A[j][i] is described like this:
 185
 186    | i   j   N   a  s0  s1   1
 187    | 0   0   0   1   0   0  -5    = 0
 188    | 0  -1   0   0   1   0   0    = 0
 189    |-2   0   0   0   0   1   0    = 0
 190    | 0   0   0   0   1   0   0   >= 0
 191    | 0   0   0   0   0   1   0   >= 0
 192    | 0   0   0   0  -1   0 100   >= 0
 193    | 0   0   0   0   0  -1 100   >= 0
 194
 195    The linearized memory access L to A[100][100] is:
 196
 197    | i   j   N   a  s0  s1   1
 198    | 0   0   0   0 100   1   0
 199
 200    Next, to measure the impact of iterating once in loop "i", we build
 201    a maximization problem: first, we add to DR accesses the dimensions
 202    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: polyhedron P1.
 203
 204    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 205    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 206    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
 207    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
 208    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
 209    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
 210    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
 211    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
 212    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
 213
 214    Then, we generate the polyhedron P2 by interchanging the dimensions
 215    (s0, s2), (s1, s3), (L1, L2), (i0, i)
 216
 217    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 218    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 219    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
 220    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
 221    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
 222    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
 223    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
 224    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
 225    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
 226
 227    then we add to P2 the equality k = i + 1:
 228
 229    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
 230
 231    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
 232
 233    For determining the impact of one iteration on loop "j", we
 234    interchange (k, j), we add "k = j + 1", and we compute D2 the
 235    maximal value of the difference.
 236
 237    Finally, the profitability test is D1 < D2: if in the outer loop
 238    the strides are smaller than in the inner loop, then it is
 239    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
 240
 241 static bool
 242 pbb_interchange_profitable_p (graphite_dim_t depth1, graphite_dim_t depth2,
 243                               poly_bb_p pbb)
 244 {
 245   int i;
 246   poly_dr_p pdr;
 247   Value d1, d2, s, n;
 248   bool res;
 249
 250   gcc_assert (depth1 < depth2);
 251
 252   value_init (d1);
 253   value_set_si (d1, 0);
 254   value_init (d2);
 255   value_set_si (d2, 0);
 256   value_init (s);
 257   value_init (n);
 258
 259   for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb), i, pdr); i++)
 260     {
 261       value_set_si (n, PDR_NB_REFS (pdr));
 262
 263       memory_stride_in_loop (s, depth1, pdr);
 264       value_multiply (s, s, n);
 265       value_addto (d1, d1, s);
 266
 267       memory_stride_in_loop (s, depth2, pdr);
 268       value_multiply (s, s, n);
 269       value_addto (d2, d2, s);
 270     }
 271
 272   res = value_lt (d1, d2);
 273
 274   value_clear (d1);
 275   value_clear (d2);
 276   value_clear (s);
 277   value_clear (n);
 278
 279   return res;
 280 }
 281
 282 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
 283    scattering and assigns the resulting polyhedron to the transformed
 284    scattering.  */
 285
 286 static void
 287 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2, poly_bb_p pbb)
 288 {
 289   ppl_dimension_type i, dim;
 290   ppl_dimension_type *map;
 291   ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
 292   ppl_dimension_type dim1 = psct_iterator_dim (pbb, depth1);
 293   ppl_dimension_type dim2 = psct_iterator_dim (pbb, depth2);
 294
 295   ppl_Polyhedron_space_dimension (poly, &dim);
 296   map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim);
 297
 298   for (i = 0; i < dim; i++)
 299     map[i] = i;
 300
 301   map[dim1] = dim2;
 302   map[dim2] = dim1;
 303
 304   ppl_Polyhedron_map_space_dimensions (poly, map, dim);
 305   free (map);
 306 }
 307
 308 /* Interchanges all the loop depths that are considered profitable for PBB.  */
 309
 310 static bool
 311 pbb_do_interchange (poly_bb_p pbb, scop_p scop)
 312 {
 313   graphite_dim_t i, j;
 314   bool transform_done = false;
 315
 316   for (i = 0; i < pbb_dim_iter_domain (pbb); i++)
 317     for (j = i + 1; j < pbb_dim_iter_domain (pbb); j++)
 318       if (pbb_interchange_profitable_p (i, j, pbb))
 319         {
 320           pbb_interchange_loop_depths (i, j, pbb);
 321
 322           if (graphite_legal_transform (scop))
 323             {
 324               transform_done = true;
 325
 326               if (dump_file && (dump_flags & TDF_DETAILS))
 327                 fprintf (dump_file,
 328                          "PBB %d: loops at depths %d and %d will be interchanged.\n",
 329                          GBB_BB (PBB_BLACK_BOX (pbb))->index, (int) i, (int) j);
 330             }
 331           else
 332             /* Undo the transform.  */
 333             pbb_interchange_loop_depths (j, i, pbb);
 334         }
 335
 336   return transform_done;
 337 }
 338
 339 /* Interchanges all the loop depths that are considered profitable for SCOP.  */
 340
 341 bool
 342 scop_do_interchange (scop_p scop)
 343 {
 344   int i;
 345   poly_bb_p pbb;
 346   bool transform_done = false;
 347
 348   store_scattering (scop);
 349
 350   for (i = 0; VEC_iterate (poly_bb_p, SCOP_BBS (scop), i, pbb); i++)
 351     transform_done |= pbb_do_interchange (pbb, scop);
 352
 353   if (!transform_done)
 354     return false;
 355
 356   if (!graphite_legal_transform (scop))
 357     {
 358       restore_scattering (scop);
 359       return false;
 360     }
 361
 362   return transform_done;
 363 }
 364
 365 #endif
 366