gcc/graphite-interchange.c

   1 /* Interchange heuristics and transform for loop interchange on
   2    polyhedral representation.
   3
   4    Copyright (C) 2009 Free Software Foundation, Inc.
   5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
   6    Harsha Jagasia <harsha.jagasia@amd.com>.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 3, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "rtl.h"
  30 #include "output.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "toplev.h"
  35 #include "tree-dump.h"
  36 #include "timevar.h"
  37 #include "cfgloop.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-pass.h"
  42 #include "domwalk.h"
  43 #include "value-prof.h"
  44 #include "pointer-set.h"
  45 #include "gimple.h"
  46 #include "params.h"
  47
  48 #ifdef HAVE_cloog
  49 #include "cloog/cloog.h"
  50 #include "ppl_c.h"
  51 #include "sese.h"
  52 #include "graphite-ppl.h"
  53 #include "graphite.h"
  54 #include "graphite-poly.h"
  55
  56 /* Builds a linear expression, of dimension DIM, representing PDR's
  57    memory access:
  58
  59    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
  60
  61    For an array A[10][20] with two subscript locations s0 and s1, the
  62    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
  63    corresponds to a memory stride of 20.
  64
  65    OFFSET is a number of dimensions to prepend before the
  66    subscript dimensions: s_0, s_1, ..., s_n.
  67
  68    Thus, the final linear expression has the following format:
  69    0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
  70    where the expression itself is:
  71    c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
  72
  73 static ppl_Linear_Expression_t
  74 build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr)
  75 {
  76   ppl_Linear_Expression_t res;
  77   ppl_Linear_Expression_t le;
  78   ppl_dimension_type i;
  79   ppl_dimension_type first = pdr_subscript_dim (pdr, 0);
  80   ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr));
  81   Value size, sub_size;
  82   graphite_dim_t dim = offset + pdr_dim (pdr);
  83
  84   ppl_new_Linear_Expression_with_dimension (&res, dim);
  85
  86   value_init (size);
  87   value_set_si (size, 1);
  88   value_init (sub_size);
  89   value_set_si (sub_size, 1);
  90
  91   for (i = last - 1; i >= first; i--)
  92     {
  93       ppl_set_coef_gmp (res, i + offset, size);
  94
  95       ppl_new_Linear_Expression_with_dimension (&le, dim - offset);
  96       ppl_set_coef (le, i, 1);
  97       ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size);
  98       value_multiply (size, size, sub_size);
  99       ppl_delete_Linear_Expression (le);
 100     }
 101
 102   value_clear (sub_size);
 103   value_clear (size);
 104   return res;
 105 }
 106
 107 /* Set STRIDE to the stride of PDR in memory by advancing by one in
 108    time dimension DEPTH.  */
 109
 110 static void
 111 memory_stride_in_loop (Value stride, graphite_dim_t depth, poly_dr_p pdr)
 112 {
 113   ppl_Linear_Expression_t le, lma;
 114   ppl_Constraint_t new_cstr;
 115   ppl_dimension_type i, *map;
 116   ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr;
 117   graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1;
 118   poly_bb_p pbb = PDR_PBB (pdr);
 119   ppl_dimension_type offset = pbb_nb_scattering_transform (pbb)
 120                               + pbb_nb_local_vars (pbb)
 121                               + pbb_dim_iter_domain (pbb);
 122   ppl_dimension_type offsetg = offset + pbb_nb_params (pbb);
 123   ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb)
 124                                 + pbb_nb_local_vars (pbb);
 125   ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts;
 126   ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1;
 127   ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2;
 128
 129   /* The resulting polyhedron should have the following format:
 130      T|I|T'|I'|G|S|S'|l1|l2
 131      where:
 132      | T = t_1..t_{dim_sctr}
 133      | I = i_1..i_{dim_iter_domain}
 134      | T'= t'_1..t'_{dim_sctr}
 135      | I'= i'_1..i'_{dim_iter_domain}
 136      | G = g_1..g_{nb_params}
 137      | S = s_1..s_{nb_subscripts}
 138      | S'= s'_1..s'_{nb_subscripts}
 139      | l1 and l2 are scalars.
 140
 141      Some invariants:
 142      offset = dim_sctr + dim_iter_domain + nb_local_vars
 143      offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params.  */
 144
 145   /* Construct the T|I|0|0|G|0|0|0|0 part.  */
 146   {
 147     ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
 148       (&sctr, PBB_TRANSFORMED_SCATTERING (pbb));
 149     ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
 150       (sctr, 2 * nb_subscripts + 2);
 151     ppl_insert_dimensions_pointset (sctr, offset, offset);
 152   }
 153
 154   /* Construct the 0|I|0|0|G|S|0|0|0 part.  */
 155   {
 156     ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
 157       (&p1, PDR_ACCESSES (pdr));
 158     ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed
 159       (p1, nb_subscripts + 2);
 160     ppl_insert_dimensions_pointset (p1, 0, dim_sctr);
 161     ppl_insert_dimensions_pointset (p1, offset, offset);
 162   }
 163
 164   /* Construct the 0|0|0|0|0|S|0|l1|0 part.  */
 165   {
 166     lma = build_linearized_memory_access (offset + dim_sctr, pdr);
 167     ppl_set_coef (lma, dim_L1, -1);
 168     ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL);
 169     ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr);
 170   }
 171
 172   /* Now intersect all the parts to get the polyhedron P1:
 173      T|I|0|0|G|0|0|0 |0
 174      0|I|0|0|G|S|0|0 |0
 175      0|0|0|0|0|S|0|l1|0
 176      ------------------
 177      T|I|0|0|G|S|0|l1|0.  */
 178
 179   ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr);
 180   ppl_delete_Pointset_Powerset_C_Polyhedron (sctr);
 181
 182   /* Build P2, which would have the following form:
 183      0|0|T'|I'|G|0|S'|0|l2
 184
 185      P2 is built, by remapping the P1 polyhedron:
 186      T|I|0|0|G|S|0|l1|0
 187
 188      using the following mapping:
 189      T->T'
 190      I->I'
 191      S->S'
 192      l1->l2.  */
 193   {
 194     ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron
 195       (&p2, p1);
 196
 197     map = ppl_new_id_map (new_dim);
 198
 199     /* TI -> T'I'.  */
 200     for (i = 0; i < offset; i++)
 201       ppl_interchange (map, i, i + offset);
 202
 203     /* l1 -> l2.  */
 204     ppl_interchange (map, dim_L1, dim_L2);
 205
 206     /* S -> S'.  */
 207     for (i = 0; i < nb_subscripts; i++)
 208       ppl_interchange (map, offset + offsetg + i,
 209                        offset + offsetg + nb_subscripts + i);
 210
 211     ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim);
 212     free (map);
 213   }
 214
 215   /* Add equalities:
 216      | t1 = t1'
 217      | ...
 218      | t_{depth-1} = t'_{depth-1}
 219      | t_{depth+1} = t'_{depth+1}
 220      | ...
 221      | t_{dim_sctr} = t'_{dim_sctr}
 222
 223      This means that all the time dimensions are equal except for
 224      depth, where we will add t_{depth} = t'_{depth} + 1 in the next
 225      step.  */
 226   for (i = 0; i < dim_sctr; i++)
 227     if (i != depth)
 228       {
 229         ppl_new_Linear_Expression_with_dimension (&le, new_dim);
 230         ppl_set_coef (le, i, 1);
 231         ppl_set_coef (le, i + offset, -1);
 232         ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
 233         ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p2, new_cstr);
 234         ppl_delete_Linear_Expression (le);
 235         ppl_delete_Constraint (new_cstr);
 236       }
 237
 238   /* Add equality : t_{depth} = t'_{depth} + 1.
 239      This is the core part of this alogrithm, since this
 240      constraint asks for the memory access stride (difference)
 241      between two consecutive points in time dimensions.  */
 242   {
 243     ppl_new_Linear_Expression_with_dimension (&le, new_dim);
 244     ppl_set_coef (le, depth, 1);
 245     ppl_set_coef (le, depth + offset, -1);
 246     ppl_set_inhomogeneous (le, 1);
 247     ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL);
 248     ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p2, new_cstr);
 249     ppl_delete_Linear_Expression (le);
 250     ppl_delete_Constraint (new_cstr);
 251   }
 252
 253   /* P1 = P1 inter P2.  */
 254   {
 255     ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2);
 256     ppl_delete_Pointset_Powerset_C_Polyhedron (p2);
 257   }
 258
 259   /* Maximise the expression L2 - L1.  */
 260   {
 261     ppl_new_Linear_Expression_with_dimension (&le, new_dim);
 262     ppl_set_coef (le, dim_L2, 1);
 263     ppl_set_coef (le, dim_L1, -1);
 264     ppl_max_for_le_pointset (p1, le, stride);
 265     ppl_delete_Linear_Expression (le);
 266   }
 267 }
 268
 269 /* Returns true when it is profitable to interchange time dimensions DEPTH1
 270    and DEPTH2 with DEPTH1 < DEPTH2 for PBB.
 271
 272    Example:
 273
 274    | int a[100][100];
 275    |
 276    | int
 277    | foo (int N)
 278    | {
 279    |   int j;
 280    |   int i;
 281    |
 282    |   for (i = 0; i < N; i++)
 283    |     for (j = 0; j < N; j++)
 284    |       a[j][2 * i] += 1;
 285    |
 286    |   return a[N][12];
 287    | }
 288
 289    The data access A[j][i] is described like this:
 290
 291    | i   j   N   a  s0  s1   1
 292    | 0   0   0   1   0   0  -5    = 0
 293    | 0  -1   0   0   1   0   0    = 0
 294    |-2   0   0   0   0   1   0    = 0
 295    | 0   0   0   0   1   0   0   >= 0
 296    | 0   0   0   0   0   1   0   >= 0
 297    | 0   0   0   0  -1   0 100   >= 0
 298    | 0   0   0   0   0  -1 100   >= 0
 299
 300    The linearized memory access L to A[100][100] is:
 301
 302    | i   j   N   a  s0  s1   1
 303    | 0   0   0   0 100   1   0
 304
 305    TODO: the shown format is not valid as it does not show the fact
 306    that the iteration domain "i j" is transformed using the scattering.
 307
 308    Next, to measure the impact of iterating once in loop "i", we build
 309    a maximization problem: first, we add to DR accesses the dimensions
 310    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: polyhedron P1.
 311
 312    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 313    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 314    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
 315    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
 316    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
 317    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
 318    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
 319    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
 320    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
 321
 322    Then, we generate the polyhedron P2 by interchanging the dimensions
 323    (s0, s2), (s1, s3), (L1, L2), (k, i)
 324
 325    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 326    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 327    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
 328    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
 329    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
 330    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
 331    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
 332    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
 333    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
 334
 335    then we add to P2 the equality k = i + 1:
 336
 337    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
 338
 339    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
 340
 341    Similarly, to determine the impact of one iteration on loop "j", we
 342    interchange (k, j), we add "k = j + 1", and we compute D2 the
 343    maximal value of the difference.
 344
 345    Finally, the profitability test is D1 < D2: if in the outer loop
 346    the strides are smaller than in the inner loop, then it is
 347    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
 348
 349 static bool
 350 pbb_interchange_profitable_p (graphite_dim_t depth1, graphite_dim_t depth2,
 351                               poly_bb_p pbb)
 352 {
 353   int i;
 354   poly_dr_p pdr;
 355   Value d1, d2, s, n;
 356   bool res;
 357
 358   gcc_assert (depth1 < depth2);
 359
 360   value_init (d1);
 361   value_set_si (d1, 0);
 362   value_init (d2);
 363   value_set_si (d2, 0);
 364   value_init (s);
 365   value_init (n);
 366
 367   for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb), i, pdr); i++)
 368     {
 369       value_set_si (n, PDR_NB_REFS (pdr));
 370
 371       memory_stride_in_loop (s, depth1, pdr);
 372       value_multiply (s, s, n);
 373       value_addto (d1, d1, s);
 374
 375       memory_stride_in_loop (s, depth2, pdr);
 376       value_multiply (s, s, n);
 377       value_addto (d2, d2, s);
 378     }
 379
 380   res = value_lt (d1, d2);
 381
 382   value_clear (d1);
 383   value_clear (d2);
 384   value_clear (s);
 385   value_clear (n);
 386
 387   return res;
 388 }
 389
 390 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
 391    scattering and assigns the resulting polyhedron to the transformed
 392    scattering.  */
 393
 394 static void
 395 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
 396                              poly_bb_p pbb)
 397 {
 398   ppl_dimension_type i, dim;
 399   ppl_dimension_type *map;
 400   ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
 401   ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1);
 402   ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2);
 403
 404   ppl_Polyhedron_space_dimension (poly, &dim);
 405   map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim);
 406
 407   for (i = 0; i < dim; i++)
 408     map[i] = i;
 409
 410   map[dim1] = dim2;
 411   map[dim2] = dim1;
 412
 413   ppl_Polyhedron_map_space_dimensions (poly, map, dim);
 414   free (map);
 415 }
 416
 417 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
 418    the statements below LST.  */
 419
 420 static void
 421 lst_apply_interchange (lst_p lst, int depth1, int depth2)
 422 {
 423   if (!lst)
 424     return;
 425
 426   if (LST_LOOP_P (lst))
 427     {
 428       int i;
 429       lst_p l;
 430
 431       for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
 432         lst_apply_interchange (l, depth1, depth2);
 433     }
 434   else
 435     pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
 436 }
 437
 438 /* Return true when the interchange of loops at depths DEPTH1 and
 439    DEPTH2 to all the statements below LST is profitable.  */
 440
 441 static bool
 442 lst_interchange_profitable_p (lst_p lst, int depth1, int depth2)
 443 {
 444   if (!lst)
 445     return false;
 446
 447   if (LST_LOOP_P (lst))
 448     {
 449       int i;
 450       lst_p l;
 451       bool res = false;
 452
 453       for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
 454         {
 455           bool profitable = lst_interchange_profitable_p (l, depth1, depth2);
 456
 457           if (profitable && !LST_LOOP_P (lst)
 458               && dump_file && (dump_flags & TDF_DETAILS))
 459             fprintf (dump_file,
 460                      "Interchanging loops at depths %d and %d is profitable for stmt_%d.\n",
 461                      depth1, depth2, pbb_index (LST_PBB (lst)));
 462
 463           res |= profitable;
 464         }
 465
 466       return res;
 467     }
 468   else
 469     return pbb_interchange_profitable_p (depth1, depth2, LST_PBB (lst));
 470 }
 471
 472
 473 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
 474    body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
 475    interchange.  */
 476
 477 static bool
 478 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
 479 {
 480   int depth1 = lst_depth (loop1);
 481   int depth2 = lst_depth (loop2);
 482
 483   if (!lst_interchange_profitable_p (loop2, depth1, depth2))
 484     return false;
 485
 486   lst_apply_interchange (loop2, depth1, depth2);
 487
 488   if (graphite_legal_transform (scop))
 489     {
 490       if (dump_file && (dump_flags & TDF_DETAILS))
 491         fprintf (dump_file,
 492                  "Loops at depths %d and %d will be interchanged.\n",
 493                  depth1, depth2);
 494
 495       return true;
 496     }
 497
 498   /* Undo the transform.  */
 499   lst_apply_interchange (loop2, depth2, depth1);
 500   return false;
 501 }
 502
 503 /* Try to interchange LOOP with all the loops contained in the body of
 504    LST.  Return true if it did interchanged some loops.  */
 505
 506 static bool
 507 lst_try_interchange (scop_p scop, lst_p loop, lst_p lst)
 508 {
 509   if (!lst)
 510     return false;
 511
 512   if (LST_LOOP_P (lst))
 513     {
 514       int i;
 515       lst_p l;
 516       bool res = lst_try_interchange_loops (scop, loop, lst);
 517
 518       for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
 519         res |= lst_try_interchange (scop, loop, l);
 520
 521       return res;
 522     }
 523
 524   return false;
 525 }
 526
 527 /* Interchanges all the loops of LST that are considered profitable to
 528    interchange.  Return true if it did interchanged some loops.  */
 529
 530 static bool
 531 lst_do_interchange (scop_p scop, lst_p lst)
 532 {
 533   if (!lst)
 534     return false;
 535
 536   if (LST_LOOP_P (lst))
 537     {
 538       int i;
 539       lst_p l;
 540       bool res = false;
 541
 542       if (lst_depth (lst) >= 0)
 543         for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
 544           res |= lst_try_interchange (scop, lst, l);
 545
 546       for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++)
 547         res |= lst_do_interchange (scop, l);
 548
 549       return res;
 550     }
 551
 552   return false;
 553 }
 554
 555 /* Interchanges all the loop depths that are considered profitable for SCOP.  */
 556
 557 bool
 558 scop_do_interchange (scop_p scop)
 559 {
 560   bool transform_done = false;
 561
 562   store_scattering (scop);
 563
 564   transform_done = lst_do_interchange (scop, SCOP_TRANSFORMED_SCHEDULE (scop));
 565
 566   if (!transform_done)
 567     return false;
 568
 569   if (!graphite_legal_transform (scop))
 570     {
 571       restore_scattering (scop);
 572       return false;
 573     }
 574
 575   return transform_done;
 576 }
 577
 578
 579 #endif
 580