gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "timevar.h"
  34 #include "cfgloop.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "tree-chrec.h"
  38 #include "tree-data-ref.h"
  39 #include "tree-pass.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "vec.h"
  42 #include "lambda.h"
  43 #include "vecprim.h"
  44
  45 /* This loop nest code generation is based on non-singular matrix
  46    math.
  47
  48  A little terminology and a general sketch of the algorithm.  See "A singular
  49  loop transformation framework based on non-singular matrices" by Wei Li and
  50  Keshav Pingali for formal proofs that the various statements below are
  51  correct.
  52
  53  A loop iteration space represents the points traversed by the loop.  A point in the
  54  iteration space can be represented by a vector of size <loop depth>.  You can
  55  therefore represent the iteration space as an integral combinations of a set
  56  of basis vectors.
  57
  58  A loop iteration space is dense if every integer point between the loop
  59  bounds is a point in the iteration space.  Every loop with a step of 1
  60  therefore has a dense iteration space.
  61
  62  for i = 1 to 3, step 1 is a dense iteration space.
  63
  64  A loop iteration space is sparse if it is not dense.  That is, the iteration
  65  space skips integer points that are within the loop bounds.
  66
  67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  68  2 is skipped.
  69
  70  Dense source spaces are easy to transform, because they don't skip any
  71  points to begin with.  Thus we can compute the exact bounds of the target
  72  space using min/max and floor/ceil.
  73
  74  For a dense source space, we take the transformation matrix, decompose it
  75  into a lower triangular part (H) and a unimodular part (U).
  76  We then compute the auxiliary space from the unimodular part (source loop
  77  nest . U = auxiliary space) , which has two important properties:
  78   1. It traverses the iterations in the same lexicographic order as the source
  79   space.
  80   2. It is a dense space when the source is a dense space (even if the target
  81   space is going to be sparse).
  82
  83  Given the auxiliary space, we use the lower triangular part to compute the
  84  bounds in the target space by simple matrix multiplication.
  85  The gaps in the target space (IE the new loop step sizes) will be the
  86  diagonals of the H matrix.
  87
  88  Sparse source spaces require another step, because you can't directly compute
  89  the exact bounds of the auxiliary and target space from the sparse space.
  90  Rather than try to come up with a separate algorithm to handle sparse source
  91  spaces directly, we just find a legal transformation matrix that gives you
  92  the sparse source space, from a dense space, and then transform the dense
  93  space.
  94
  95  For a regular sparse space, you can represent the source space as an integer
  96  lattice, and the base space of that lattice will always be dense.  Thus, we
  97  effectively use the lattice to figure out the transformation from the lattice
  98  base space, to the sparse iteration space (IE what transform was applied to
  99  the dense space to make it sparse).  We then compose this transform with the
 100  transformation matrix specified by the user (since our matrix transformations
 101  are closed under composition, this is okay).  We can then use the base space
 102  (which is dense) plus the composed transformation matrix, to compute the rest
 103  of the transform using the dense space algorithm above.
 104
 105  In other words, our sparse source space (B) is decomposed into a dense base
 106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 107  We then compute the composition of L and the user transformation matrix (T),
 108  so that T is now a transform from A to the result, instead of from B to the
 109  result.
 110  IE A.(LT) = result instead of B.T = result
 111  Since A is now a dense source space, we can use the dense source space
 112  algorithm above to compute the result of applying transform (LT) to A.
 113
 114  Fourier-Motzkin elimination is used to compute the bounds of the base space
 115  of the lattice.  */
 116
 117 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 118                              VEC(tree,heap) *, VEC(int,heap) *,
 119                              VEC(tree,heap) *);
 120 /* Lattice stuff that is internal to the code generation algorithm.  */
 121
 122 typedef struct lambda_lattice_s
 123 {
 124   /* Lattice base matrix.  */
 125   lambda_matrix base;
 126   /* Lattice dimension.  */
 127   int dimension;
 128   /* Origin vector for the coefficients.  */
 129   lambda_vector origin;
 130   /* Origin matrix for the invariants.  */
 131   lambda_matrix origin_invariants;
 132   /* Number of invariants.  */
 133   int invariants;
 134 } *lambda_lattice;
 135
 136 #define LATTICE_BASE(T) ((T)->base)
 137 #define LATTICE_DIMENSION(T) ((T)->dimension)
 138 #define LATTICE_ORIGIN(T) ((T)->origin)
 139 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 140 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 141
 142 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 143                        int, int);
 144 static lambda_lattice lambda_lattice_new (int, int);
 145 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 146
 147 static tree find_induction_var_from_exit_cond (struct loop *);
 148 static bool can_convert_to_perfect_nest (struct loop *);
 149
 150 /* Create a new lambda body vector.  */
 151
 152 lambda_body_vector
 153 lambda_body_vector_new (int size)
 154 {
 155   lambda_body_vector ret;
 156
 157   ret = GGC_NEW (struct lambda_body_vector_s);
 158   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 159   LBV_SIZE (ret) = size;
 160   LBV_DENOMINATOR (ret) = 1;
 161   return ret;
 162 }
 163
 164 /* Compute the new coefficients for the vector based on the
 165   *inverse* of the transformation matrix.  */
 166
 167 lambda_body_vector
 168 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 169                                 lambda_body_vector vect)
 170 {
 171   lambda_body_vector temp;
 172   int depth;
 173
 174   /* Make sure the matrix is square.  */
 175   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 176
 177   depth = LTM_ROWSIZE (transform);
 178
 179   temp = lambda_body_vector_new (depth);
 180   LBV_DENOMINATOR (temp) =
 181     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 182   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 183                              LTM_MATRIX (transform), depth,
 184                              LBV_COEFFICIENTS (temp));
 185   LBV_SIZE (temp) = LBV_SIZE (vect);
 186   return temp;
 187 }
 188
 189 /* Print out a lambda body vector.  */
 190
 191 void
 192 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 193 {
 194   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 195 }
 196
 197 /* Return TRUE if two linear expressions are equal.  */
 198
 199 static bool
 200 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 201            int depth, int invariants)
 202 {
 203   int i;
 204
 205   if (lle1 == NULL || lle2 == NULL)
 206     return false;
 207   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 208     return false;
 209   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 210     return false;
 211   for (i = 0; i < depth; i++)
 212     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 213       return false;
 214   for (i = 0; i < invariants; i++)
 215     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 216         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 217       return false;
 218   return true;
 219 }
 220
 221 /* Create a new linear expression with dimension DIM, and total number
 222    of invariants INVARIANTS.  */
 223
 224 lambda_linear_expression
 225 lambda_linear_expression_new (int dim, int invariants)
 226 {
 227   lambda_linear_expression ret;
 228
 229   ret = GGC_CNEW (struct lambda_linear_expression_s);
 230
 231   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 232   LLE_CONSTANT (ret) = 0;
 233   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 234   LLE_DENOMINATOR (ret) = 1;
 235   LLE_NEXT (ret) = NULL;
 236
 237   return ret;
 238 }
 239
 240 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 241    The starting letter used for variable names is START.  */
 242
 243 static void
 244 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 245                          char start)
 246 {
 247   int i;
 248   bool first = true;
 249   for (i = 0; i < size; i++)
 250     {
 251       if (expr[i] != 0)
 252         {
 253           if (first)
 254             {
 255               if (expr[i] < 0)
 256                 fprintf (outfile, "-");
 257               first = false;
 258             }
 259           else if (expr[i] > 0)
 260             fprintf (outfile, " + ");
 261           else
 262             fprintf (outfile, " - ");
 263           if (abs (expr[i]) == 1)
 264             fprintf (outfile, "%c", start + i);
 265           else
 266             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 267         }
 268     }
 269 }
 270
 271 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 272    depth/number of coefficients is given by DEPTH, the number of invariants is
 273    given by INVARIANTS, and the character to start variable names with is given
 274    by START.  */
 275
 276 void
 277 print_lambda_linear_expression (FILE * outfile,
 278                                 lambda_linear_expression expr,
 279                                 int depth, int invariants, char start)
 280 {
 281   fprintf (outfile, "\tLinear expression: ");
 282   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 283   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 284   fprintf (outfile, "  invariants: ");
 285   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 286                            invariants, 'A');
 287   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 288 }
 289
 290 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 291    coefficients is given by DEPTH, the number of invariants is
 292    given by INVARIANTS, and the character to start variable names with is given
 293    by START.  */
 294
 295 void
 296 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 297                    int invariants, char start)
 298 {
 299   int step;
 300   lambda_linear_expression expr;
 301
 302   gcc_assert (loop);
 303
 304   expr = LL_LINEAR_OFFSET (loop);
 305   step = LL_STEP (loop);
 306   fprintf (outfile, "  step size = %d \n", step);
 307
 308   if (expr)
 309     {
 310       fprintf (outfile, "  linear offset: \n");
 311       print_lambda_linear_expression (outfile, expr, depth, invariants,
 312                                       start);
 313     }
 314
 315   fprintf (outfile, "  lower bound: \n");
 316   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 317     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 318   fprintf (outfile, "  upper bound: \n");
 319   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 321 }
 322
 323 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 324    number of invariants.  */
 325
 326 lambda_loopnest
 327 lambda_loopnest_new (int depth, int invariants)
 328 {
 329   lambda_loopnest ret;
 330   ret = GGC_NEW (struct lambda_loopnest_s);
 331
 332   LN_LOOPS (ret) = GGC_CNEWVEC (lambda_loop, depth);
 333   LN_DEPTH (ret) = depth;
 334   LN_INVARIANTS (ret) = invariants;
 335
 336   return ret;
 337 }
 338
 339 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 340    character to use for loop names is given by START.  */
 341
 342 void
 343 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 344 {
 345   int i;
 346   for (i = 0; i < LN_DEPTH (nest); i++)
 347     {
 348       fprintf (outfile, "Loop %c\n", start + i);
 349       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 350                          LN_INVARIANTS (nest), 'i');
 351       fprintf (outfile, "\n");
 352     }
 353 }
 354
 355 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 356    of invariants.  */
 357
 358 static lambda_lattice
 359 lambda_lattice_new (int depth, int invariants)
 360 {
 361   lambda_lattice ret;
 362   ret = GGC_NEW (struct lambda_lattice_s);
 363   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 364   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 365   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 366   LATTICE_DIMENSION (ret) = depth;
 367   LATTICE_INVARIANTS (ret) = invariants;
 368   return ret;
 369 }
 370
 371 /* Compute the lattice base for NEST.  The lattice base is essentially a
 372    non-singular transform from a dense base space to a sparse iteration space.
 373    We use it so that we don't have to specially handle the case of a sparse
 374    iteration space in other parts of the algorithm.  As a result, this routine
 375    only does something interesting (IE produce a matrix that isn't the
 376    identity matrix) if NEST is a sparse space.  */
 377
 378 static lambda_lattice
 379 lambda_lattice_compute_base (lambda_loopnest nest)
 380 {
 381   lambda_lattice ret;
 382   int depth, invariants;
 383   lambda_matrix base;
 384
 385   int i, j, step;
 386   lambda_loop loop;
 387   lambda_linear_expression expression;
 388
 389   depth = LN_DEPTH (nest);
 390   invariants = LN_INVARIANTS (nest);
 391
 392   ret = lambda_lattice_new (depth, invariants);
 393   base = LATTICE_BASE (ret);
 394   for (i = 0; i < depth; i++)
 395     {
 396       loop = LN_LOOPS (nest)[i];
 397       gcc_assert (loop);
 398       step = LL_STEP (loop);
 399       /* If we have a step of 1, then the base is one, and the
 400          origin and invariant coefficients are 0.  */
 401       if (step == 1)
 402         {
 403           for (j = 0; j < depth; j++)
 404             base[i][j] = 0;
 405           base[i][i] = 1;
 406           LATTICE_ORIGIN (ret)[i] = 0;
 407           for (j = 0; j < invariants; j++)
 408             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 409         }
 410       else
 411         {
 412           /* Otherwise, we need the lower bound expression (which must
 413              be an affine function)  to determine the base.  */
 414           expression = LL_LOWER_BOUND (loop);
 415           gcc_assert (expression && !LLE_NEXT (expression)
 416                       && LLE_DENOMINATOR (expression) == 1);
 417
 418           /* The lower triangular portion of the base is going to be the
 419              coefficient times the step */
 420           for (j = 0; j < i; j++)
 421             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 422               * LL_STEP (LN_LOOPS (nest)[j]);
 423           base[i][i] = step;
 424           for (j = i + 1; j < depth; j++)
 425             base[i][j] = 0;
 426
 427           /* Origin for this loop is the constant of the lower bound
 428              expression.  */
 429           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 430
 431           /* Coefficient for the invariants are equal to the invariant
 432              coefficients in the expression.  */
 433           for (j = 0; j < invariants; j++)
 434             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 435               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 436         }
 437     }
 438   return ret;
 439 }
 440
 441 /* Compute the least common multiple of two numbers A and B .  */
 442
 443 int
 444 least_common_multiple (int a, int b)
 445 {
 446   return (abs (a) * abs (b) / gcd (a, b));
 447 }
 448
 449 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 450    auxiliary nest.
 451    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 452    it is easy to calculate the answer and bounds.
 453    A sketch of how it works:
 454    Given a system of linear inequalities, ai * xj >= bk, you can always
 455    rewrite the constraints so they are all of the form
 456    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 457    in b1 ... bk, and some a in a1...ai)
 458    You can then eliminate this x from the non-constant inequalities by
 459    rewriting these as a <= b, x >= constant, and delete the x variable.
 460    You can then repeat this for any remaining x variables, and then we have
 461    an easy to use variable <= constant (or no variables at all) form that we
 462    can construct our bounds from.
 463
 464    In our case, each time we eliminate, we construct part of the bound from
 465    the ith variable, then delete the ith variable.
 466
 467    Remember the constant are in our vector a, our coefficient matrix is A,
 468    and our invariant coefficient matrix is B.
 469
 470    SIZE is the size of the matrices being passed.
 471    DEPTH is the loop nest depth.
 472    INVARIANTS is the number of loop invariants.
 473    A, B, and a are the coefficient matrix, invariant coefficient, and a
 474    vector of constants, respectively.  */
 475
 476 static lambda_loopnest
 477 compute_nest_using_fourier_motzkin (int size,
 478                                     int depth,
 479                                     int invariants,
 480                                     lambda_matrix A,
 481                                     lambda_matrix B,
 482                                     lambda_vector a)
 483 {
 484
 485   int multiple, f1, f2;
 486   int i, j, k;
 487   lambda_linear_expression expression;
 488   lambda_loop loop;
 489   lambda_loopnest auxillary_nest;
 490   lambda_matrix swapmatrix, A1, B1;
 491   lambda_vector swapvector, a1;
 492   int newsize;
 493
 494   A1 = lambda_matrix_new (128, depth);
 495   B1 = lambda_matrix_new (128, invariants);
 496   a1 = lambda_vector_new (128);
 497
 498   auxillary_nest = lambda_loopnest_new (depth, invariants);
 499
 500   for (i = depth - 1; i >= 0; i--)
 501     {
 502       loop = lambda_loop_new ();
 503       LN_LOOPS (auxillary_nest)[i] = loop;
 504       LL_STEP (loop) = 1;
 505
 506       for (j = 0; j < size; j++)
 507         {
 508           if (A[j][i] < 0)
 509             {
 510               /* Any linear expression in the matrix with a coefficient less
 511                  than 0 becomes part of the new lower bound.  */
 512               expression = lambda_linear_expression_new (depth, invariants);
 513
 514               for (k = 0; k < i; k++)
 515                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 516
 517               for (k = 0; k < invariants; k++)
 518                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 519
 520               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 521               LLE_CONSTANT (expression) = -1 * a[j];
 522
 523               /* Ignore if identical to the existing lower bound.  */
 524               if (!lle_equal (LL_LOWER_BOUND (loop),
 525                               expression, depth, invariants))
 526                 {
 527                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 528                   LL_LOWER_BOUND (loop) = expression;
 529                 }
 530
 531             }
 532           else if (A[j][i] > 0)
 533             {
 534               /* Any linear expression with a coefficient greater than 0
 535                  becomes part of the new upper bound.  */
 536               expression = lambda_linear_expression_new (depth, invariants);
 537               for (k = 0; k < i; k++)
 538                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 539
 540               for (k = 0; k < invariants; k++)
 541                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 542
 543               LLE_DENOMINATOR (expression) = A[j][i];
 544               LLE_CONSTANT (expression) = a[j];
 545
 546               /* Ignore if identical to the existing upper bound.  */
 547               if (!lle_equal (LL_UPPER_BOUND (loop),
 548                               expression, depth, invariants))
 549                 {
 550                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 551                   LL_UPPER_BOUND (loop) = expression;
 552                 }
 553
 554             }
 555         }
 556
 557       /* This portion creates a new system of linear inequalities by deleting
 558          the i'th variable, reducing the system by one variable.  */
 559       newsize = 0;
 560       for (j = 0; j < size; j++)
 561         {
 562           /* If the coefficient for the i'th variable is 0, then we can just
 563              eliminate the variable straightaway.  Otherwise, we have to
 564              multiply through by the coefficients we are eliminating.  */
 565           if (A[j][i] == 0)
 566             {
 567               lambda_vector_copy (A[j], A1[newsize], depth);
 568               lambda_vector_copy (B[j], B1[newsize], invariants);
 569               a1[newsize] = a[j];
 570               newsize++;
 571             }
 572           else if (A[j][i] > 0)
 573             {
 574               for (k = 0; k < size; k++)
 575                 {
 576                   if (A[k][i] < 0)
 577                     {
 578                       multiple = least_common_multiple (A[j][i], A[k][i]);
 579                       f1 = multiple / A[j][i];
 580                       f2 = -1 * multiple / A[k][i];
 581
 582                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 583                                             A1[newsize], depth);
 584                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 585                                             B1[newsize], invariants);
 586                       a1[newsize] = f1 * a[j] + f2 * a[k];
 587                       newsize++;
 588                     }
 589                 }
 590             }
 591         }
 592
 593       swapmatrix = A;
 594       A = A1;
 595       A1 = swapmatrix;
 596
 597       swapmatrix = B;
 598       B = B1;
 599       B1 = swapmatrix;
 600
 601       swapvector = a;
 602       a = a1;
 603       a1 = swapvector;
 604
 605       size = newsize;
 606     }
 607
 608   return auxillary_nest;
 609 }
 610
 611 /* Compute the loop bounds for the auxiliary space NEST.
 612    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 613    Given the original nest, this function will
 614    1. Convert the nest into matrix form, which consists of a matrix for the
 615    coefficients, a matrix for the
 616    invariant coefficients, and a vector for the constants.
 617    2. Use the matrix form to calculate the lattice base for the nest (which is
 618    a dense space)
 619    3. Compose the dense space transform with the user specified transform, to
 620    get a transform we can easily calculate transformed bounds for.
 621    4. Multiply the composed transformation matrix times the matrix form of the
 622    loop.
 623    5. Transform the newly created matrix (from step 4) back into a loop nest
 624    using Fourier-Motzkin elimination to figure out the bounds.  */
 625
 626 static lambda_loopnest
 627 lambda_compute_auxillary_space (lambda_loopnest nest,
 628                                 lambda_trans_matrix trans)
 629 {
 630   lambda_matrix A, B, A1, B1;
 631   lambda_vector a, a1;
 632   lambda_matrix invertedtrans;
 633   int depth, invariants, size;
 634   int i, j;
 635   lambda_loop loop;
 636   lambda_linear_expression expression;
 637   lambda_lattice lattice;
 638
 639   depth = LN_DEPTH (nest);
 640   invariants = LN_INVARIANTS (nest);
 641
 642   /* Unfortunately, we can't know the number of constraints we'll have
 643      ahead of time, but this should be enough even in ridiculous loop nest
 644      cases. We must not go over this limit.  */
 645   A = lambda_matrix_new (128, depth);
 646   B = lambda_matrix_new (128, invariants);
 647   a = lambda_vector_new (128);
 648
 649   A1 = lambda_matrix_new (128, depth);
 650   B1 = lambda_matrix_new (128, invariants);
 651   a1 = lambda_vector_new (128);
 652
 653   /* Store the bounds in the equation matrix A, constant vector a, and
 654      invariant matrix B, so that we have Ax <= a + B.
 655      This requires a little equation rearranging so that everything is on the
 656      correct side of the inequality.  */
 657   size = 0;
 658   for (i = 0; i < depth; i++)
 659     {
 660       loop = LN_LOOPS (nest)[i];
 661
 662       /* First we do the lower bound.  */
 663       if (LL_STEP (loop) > 0)
 664         expression = LL_LOWER_BOUND (loop);
 665       else
 666         expression = LL_UPPER_BOUND (loop);
 667
 668       for (; expression != NULL; expression = LLE_NEXT (expression))
 669         {
 670           /* Fill in the coefficient.  */
 671           for (j = 0; j < i; j++)
 672             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 673
 674           /* And the invariant coefficient.  */
 675           for (j = 0; j < invariants; j++)
 676             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 677
 678           /* And the constant.  */
 679           a[size] = LLE_CONSTANT (expression);
 680
 681           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 682              constants and single variables on   */
 683           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 684           a[size] *= -1;
 685           for (j = 0; j < invariants; j++)
 686             B[size][j] *= -1;
 687
 688           size++;
 689           /* Need to increase matrix sizes above.  */
 690           gcc_assert (size <= 127);
 691
 692         }
 693
 694       /* Then do the exact same thing for the upper bounds.  */
 695       if (LL_STEP (loop) > 0)
 696         expression = LL_UPPER_BOUND (loop);
 697       else
 698         expression = LL_LOWER_BOUND (loop);
 699
 700       for (; expression != NULL; expression = LLE_NEXT (expression))
 701         {
 702           /* Fill in the coefficient.  */
 703           for (j = 0; j < i; j++)
 704             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 705
 706           /* And the invariant coefficient.  */
 707           for (j = 0; j < invariants; j++)
 708             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 709
 710           /* And the constant.  */
 711           a[size] = LLE_CONSTANT (expression);
 712
 713           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] *= -1;
 716           A[size][i] = LLE_DENOMINATOR (expression);
 717           size++;
 718           /* Need to increase matrix sizes above.  */
 719           gcc_assert (size <= 127);
 720
 721         }
 722     }
 723
 724   /* Compute the lattice base x = base * y + origin, where y is the
 725      base space.  */
 726   lattice = lambda_lattice_compute_base (nest);
 727
 728   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 729
 730   /* A1 = A * L */
 731   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 732
 733   /* a1 = a - A * origin constant.  */
 734   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 735   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 736
 737   /* B1 = B - A * origin invariant.  */
 738   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 739                       invariants);
 740   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 741
 742   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 743      it by A1, then performing Fourier-Motzkin.  */
 744
 745   invertedtrans = lambda_matrix_new (depth, depth);
 746
 747   /* Compute the inverse of U.  */
 748   lambda_matrix_inverse (LTM_MATRIX (trans),
 749                          invertedtrans, depth);
 750
 751   /* A = A1 inv(U).  */
 752   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 753
 754   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 755                                              A, B1, a1);
 756 }
 757
 758 /* Compute the loop bounds for the target space, using the bounds of
 759    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 760    The target space loop bounds are computed by multiplying the triangular
 761    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 762    the loop steps (positive or negative) is then used to swap the bounds if
 763    the loop counts downwards.
 764    Return the target loopnest.  */
 765
 766 static lambda_loopnest
 767 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 768                              lambda_trans_matrix H, lambda_vector stepsigns)
 769 {
 770   lambda_matrix inverse, H1;
 771   int determinant, i, j;
 772   int gcd1, gcd2;
 773   int factor;
 774
 775   lambda_loopnest target_nest;
 776   int depth, invariants;
 777   lambda_matrix target;
 778
 779   lambda_loop auxillary_loop, target_loop;
 780   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 781
 782   depth = LN_DEPTH (auxillary_nest);
 783   invariants = LN_INVARIANTS (auxillary_nest);
 784
 785   inverse = lambda_matrix_new (depth, depth);
 786   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 787
 788   /* H1 is H excluding its diagonal.  */
 789   H1 = lambda_matrix_new (depth, depth);
 790   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 791
 792   for (i = 0; i < depth; i++)
 793     H1[i][i] = 0;
 794
 795   /* Computes the linear offsets of the loop bounds.  */
 796   target = lambda_matrix_new (depth, depth);
 797   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 798
 799   target_nest = lambda_loopnest_new (depth, invariants);
 800
 801   for (i = 0; i < depth; i++)
 802     {
 803
 804       /* Get a new loop structure.  */
 805       target_loop = lambda_loop_new ();
 806       LN_LOOPS (target_nest)[i] = target_loop;
 807
 808       /* Computes the gcd of the coefficients of the linear part.  */
 809       gcd1 = lambda_vector_gcd (target[i], i);
 810
 811       /* Include the denominator in the GCD.  */
 812       gcd1 = gcd (gcd1, determinant);
 813
 814       /* Now divide through by the gcd.  */
 815       for (j = 0; j < i; j++)
 816         target[i][j] = target[i][j] / gcd1;
 817
 818       expression = lambda_linear_expression_new (depth, invariants);
 819       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 820       LLE_DENOMINATOR (expression) = determinant / gcd1;
 821       LLE_CONSTANT (expression) = 0;
 822       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 823                            invariants);
 824       LL_LINEAR_OFFSET (target_loop) = expression;
 825     }
 826
 827   /* For each loop, compute the new bounds from H.  */
 828   for (i = 0; i < depth; i++)
 829     {
 830       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 831       target_loop = LN_LOOPS (target_nest)[i];
 832       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 833       factor = LTM_MATRIX (H)[i][i];
 834
 835       /* First we do the lower bound.  */
 836       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 837
 838       for (; auxillary_expr != NULL;
 839            auxillary_expr = LLE_NEXT (auxillary_expr))
 840         {
 841           target_expr = lambda_linear_expression_new (depth, invariants);
 842           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 843                                      depth, inverse, depth,
 844                                      LLE_COEFFICIENTS (target_expr));
 845           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 846                                     LLE_COEFFICIENTS (target_expr), depth,
 847                                     factor);
 848
 849           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 850           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 851                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 852                               invariants);
 853           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 854                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 855                                     invariants, factor);
 856           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 857
 858           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 859             {
 860               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 861                 * determinant;
 862               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 863                                         (target_expr),
 864                                         LLE_INVARIANT_COEFFICIENTS
 865                                         (target_expr), invariants,
 866                                         determinant);
 867               LLE_DENOMINATOR (target_expr) =
 868                 LLE_DENOMINATOR (target_expr) * determinant;
 869             }
 870           /* Find the gcd and divide by it here, rather than doing it
 871              at the tree level.  */
 872           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 873           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 874                                     invariants);
 875           gcd1 = gcd (gcd1, gcd2);
 876           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 877           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 878           for (j = 0; j < depth; j++)
 879             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 880           for (j = 0; j < invariants; j++)
 881             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 882           LLE_CONSTANT (target_expr) /= gcd1;
 883           LLE_DENOMINATOR (target_expr) /= gcd1;
 884           /* Ignore if identical to existing bound.  */
 885           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 886                           invariants))
 887             {
 888               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 889               LL_LOWER_BOUND (target_loop) = target_expr;
 890             }
 891         }
 892       /* Now do the upper bound.  */
 893       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 894
 895       for (; auxillary_expr != NULL;
 896            auxillary_expr = LLE_NEXT (auxillary_expr))
 897         {
 898           target_expr = lambda_linear_expression_new (depth, invariants);
 899           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 900                                      depth, inverse, depth,
 901                                      LLE_COEFFICIENTS (target_expr));
 902           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 903                                     LLE_COEFFICIENTS (target_expr), depth,
 904                                     factor);
 905           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 906           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 907                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 908                               invariants);
 909           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 910                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 911                                     invariants, factor);
 912           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 913
 914           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 915             {
 916               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 917                 * determinant;
 918               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 919                                         (target_expr),
 920                                         LLE_INVARIANT_COEFFICIENTS
 921                                         (target_expr), invariants,
 922                                         determinant);
 923               LLE_DENOMINATOR (target_expr) =
 924                 LLE_DENOMINATOR (target_expr) * determinant;
 925             }
 926           /* Find the gcd and divide by it here, instead of at the
 927              tree level.  */
 928           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 929           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 930                                     invariants);
 931           gcd1 = gcd (gcd1, gcd2);
 932           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 933           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 934           for (j = 0; j < depth; j++)
 935             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 936           for (j = 0; j < invariants; j++)
 937             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 938           LLE_CONSTANT (target_expr) /= gcd1;
 939           LLE_DENOMINATOR (target_expr) /= gcd1;
 940           /* Ignore if equal to existing bound.  */
 941           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 942                           invariants))
 943             {
 944               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 945               LL_UPPER_BOUND (target_loop) = target_expr;
 946             }
 947         }
 948     }
 949   for (i = 0; i < depth; i++)
 950     {
 951       target_loop = LN_LOOPS (target_nest)[i];
 952       /* If necessary, exchange the upper and lower bounds and negate
 953          the step size.  */
 954       if (stepsigns[i] < 0)
 955         {
 956           LL_STEP (target_loop) *= -1;
 957           tmp_expr = LL_LOWER_BOUND (target_loop);
 958           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 959           LL_UPPER_BOUND (target_loop) = tmp_expr;
 960         }
 961     }
 962   return target_nest;
 963 }
 964
 965 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 966    result.  */
 967
 968 static lambda_vector
 969 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 970 {
 971   lambda_matrix matrix, H;
 972   int size;
 973   lambda_vector newsteps;
 974   int i, j, factor, minimum_column;
 975   int temp;
 976
 977   matrix = LTM_MATRIX (trans);
 978   size = LTM_ROWSIZE (trans);
 979   H = lambda_matrix_new (size, size);
 980
 981   newsteps = lambda_vector_new (size);
 982   lambda_vector_copy (stepsigns, newsteps, size);
 983
 984   lambda_matrix_copy (matrix, H, size, size);
 985
 986   for (j = 0; j < size; j++)
 987     {
 988       lambda_vector row;
 989       row = H[j];
 990       for (i = j; i < size; i++)
 991         if (row[i] < 0)
 992           lambda_matrix_col_negate (H, size, i);
 993       while (lambda_vector_first_nz (row, size, j + 1) < size)
 994         {
 995           minimum_column = lambda_vector_min_nz (row, size, j);
 996           lambda_matrix_col_exchange (H, size, j, minimum_column);
 997
 998           temp = newsteps[j];
 999           newsteps[j] = newsteps[minimum_column];
1000           newsteps[minimum_column] = temp;
1001
1002           for (i = j + 1; i < size; i++)
1003             {
1004               factor = row[i] / row[j];
1005               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1006             }
1007         }
1008     }
1009   return newsteps;
1010 }
1011
1012 /* Transform NEST according to TRANS, and return the new loopnest.
1013    This involves
1014    1. Computing a lattice base for the transformation
1015    2. Composing the dense base with the specified transformation (TRANS)
1016    3. Decomposing the combined transformation into a lower triangular portion,
1017    and a unimodular portion.
1018    4. Computing the auxiliary nest using the unimodular portion.
1019    5. Computing the target nest using the auxiliary nest and the lower
1020    triangular portion.  */
1021
1022 lambda_loopnest
1023 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1024 {
1025   lambda_loopnest auxillary_nest, target_nest;
1026
1027   int depth, invariants;
1028   int i, j;
1029   lambda_lattice lattice;
1030   lambda_trans_matrix trans1, H, U;
1031   lambda_loop loop;
1032   lambda_linear_expression expression;
1033   lambda_vector origin;
1034   lambda_matrix origin_invariants;
1035   lambda_vector stepsigns;
1036   int f;
1037
1038   depth = LN_DEPTH (nest);
1039   invariants = LN_INVARIANTS (nest);
1040
1041   /* Keep track of the signs of the loop steps.  */
1042   stepsigns = lambda_vector_new (depth);
1043   for (i = 0; i < depth; i++)
1044     {
1045       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1046         stepsigns[i] = 1;
1047       else
1048         stepsigns[i] = -1;
1049     }
1050
1051   /* Compute the lattice base.  */
1052   lattice = lambda_lattice_compute_base (nest);
1053   trans1 = lambda_trans_matrix_new (depth, depth);
1054
1055   /* Multiply the transformation matrix by the lattice base.  */
1056
1057   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1058                       LTM_MATRIX (trans1), depth, depth, depth);
1059
1060   /* Compute the Hermite normal form for the new transformation matrix.  */
1061   H = lambda_trans_matrix_new (depth, depth);
1062   U = lambda_trans_matrix_new (depth, depth);
1063   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1064                          LTM_MATRIX (U));
1065
1066   /* Compute the auxiliary loop nest's space from the unimodular
1067      portion.  */
1068   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1069
1070   /* Compute the loop step signs from the old step signs and the
1071      transformation matrix.  */
1072   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1073
1074   /* Compute the target loop nest space from the auxiliary nest and
1075      the lower triangular matrix H.  */
1076   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1077   origin = lambda_vector_new (depth);
1078   origin_invariants = lambda_matrix_new (depth, invariants);
1079   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1080                              LATTICE_ORIGIN (lattice), origin);
1081   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1082                       origin_invariants, depth, depth, invariants);
1083
1084   for (i = 0; i < depth; i++)
1085     {
1086       loop = LN_LOOPS (target_nest)[i];
1087       expression = LL_LINEAR_OFFSET (loop);
1088       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1089         f = 1;
1090       else
1091         f = LLE_DENOMINATOR (expression);
1092
1093       LLE_CONSTANT (expression) += f * origin[i];
1094
1095       for (j = 0; j < invariants; j++)
1096         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1097           f * origin_invariants[i][j];
1098     }
1099
1100   return target_nest;
1101
1102 }
1103
1104 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1105    return the new expression.  DEPTH is the depth of the loopnest.
1106    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1107    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1108    is the amount we have to add/subtract from the expression because of the
1109    type of comparison it is used in.  */
1110
1111 static lambda_linear_expression
1112 gcc_tree_to_linear_expression (int depth, tree expr,
1113                                VEC(tree,heap) *outerinductionvars,
1114                                VEC(tree,heap) *invariants, int extra)
1115 {
1116   lambda_linear_expression lle = NULL;
1117   switch (TREE_CODE (expr))
1118     {
1119     case INTEGER_CST:
1120       {
1121         lle = lambda_linear_expression_new (depth, 2 * depth);
1122         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1123         if (extra != 0)
1124           LLE_CONSTANT (lle) += extra;
1125
1126         LLE_DENOMINATOR (lle) = 1;
1127       }
1128       break;
1129     case SSA_NAME:
1130       {
1131         tree iv, invar;
1132         size_t i;
1133         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1134           if (iv != NULL)
1135             {
1136               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1137                 {
1138                   lle = lambda_linear_expression_new (depth, 2 * depth);
1139                   LLE_COEFFICIENTS (lle)[i] = 1;
1140                   if (extra != 0)
1141                     LLE_CONSTANT (lle) = extra;
1142
1143                   LLE_DENOMINATOR (lle) = 1;
1144                 }
1145             }
1146         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1147           if (invar != NULL)
1148             {
1149               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1150                 {
1151                   lle = lambda_linear_expression_new (depth, 2 * depth);
1152                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1153                   if (extra != 0)
1154                     LLE_CONSTANT (lle) = extra;
1155                   LLE_DENOMINATOR (lle) = 1;
1156                 }
1157             }
1158       }
1159       break;
1160     default:
1161       return NULL;
1162     }
1163
1164   return lle;
1165 }
1166
1167 /* Return the depth of the loopnest NEST */
1168
1169 static int
1170 depth_of_nest (struct loop *nest)
1171 {
1172   size_t depth = 0;
1173   while (nest)
1174     {
1175       depth++;
1176       nest = nest->inner;
1177     }
1178   return depth;
1179 }
1180
1181
1182 /* Return true if OP is invariant in LOOP and all outer loops.  */
1183
1184 static bool
1185 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1186 {
1187   if (is_gimple_min_invariant (op))
1188     return true;
1189   if (loop_depth (loop) == 0)
1190     return true;
1191   if (!expr_invariant_in_loop_p (loop, op))
1192     return false;
1193   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1194     return false;
1195   return true;
1196 }
1197
1198 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1199    or NULL if it could not be converted.
1200    DEPTH is the depth of the loop.
1201    INVARIANTS is a pointer to the array of loop invariants.
1202    The induction variable for this loop should be stored in the parameter
1203    OURINDUCTIONVAR.
1204    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1205
1206 static lambda_loop
1207 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1208                          VEC(tree,heap) ** invariants,
1209                          tree * ourinductionvar,
1210                          VEC(tree,heap) * outerinductionvars,
1211                          VEC(tree,heap) ** lboundvars,
1212                          VEC(tree,heap) ** uboundvars,
1213                          VEC(int,heap) ** steps)
1214 {
1215   tree phi;
1216   tree exit_cond;
1217   tree access_fn, inductionvar;
1218   tree step;
1219   lambda_loop lloop = NULL;
1220   lambda_linear_expression lbound, ubound;
1221   tree test;
1222   int stepint;
1223   int extra = 0;
1224   tree lboundvar, uboundvar, uboundresult;
1225
1226   /* Find out induction var and exit condition.  */
1227   inductionvar = find_induction_var_from_exit_cond (loop);
1228   exit_cond = get_loop_exit_condition (loop);
1229
1230   if (inductionvar == NULL || exit_cond == NULL)
1231     {
1232       if (dump_file && (dump_flags & TDF_DETAILS))
1233         fprintf (dump_file,
1234                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1235       return NULL;
1236     }
1237
1238   test = TREE_OPERAND (exit_cond, 0);
1239
1240   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1241     {
1242
1243       if (dump_file && (dump_flags & TDF_DETAILS))
1244         fprintf (dump_file,
1245                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1246
1247       return NULL;
1248     }
1249
1250   phi = SSA_NAME_DEF_STMT (inductionvar);
1251   if (TREE_CODE (phi) != PHI_NODE)
1252     {
1253       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1254       if (!phi)
1255         {
1256
1257           if (dump_file && (dump_flags & TDF_DETAILS))
1258             fprintf (dump_file,
1259                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1260
1261           return NULL;
1262         }
1263
1264       phi = SSA_NAME_DEF_STMT (phi);
1265       if (TREE_CODE (phi) != PHI_NODE)
1266         {
1267
1268           if (dump_file && (dump_flags & TDF_DETAILS))
1269             fprintf (dump_file,
1270                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1271           return NULL;
1272         }
1273
1274     }
1275
1276   /* The induction variable name/version we want to put in the array is the
1277      result of the induction variable phi node.  */
1278   *ourinductionvar = PHI_RESULT (phi);
1279   access_fn = instantiate_parameters
1280     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1281   if (access_fn == chrec_dont_know)
1282     {
1283       if (dump_file && (dump_flags & TDF_DETAILS))
1284         fprintf (dump_file,
1285                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1286
1287       return NULL;
1288     }
1289
1290   step = evolution_part_in_loop_num (access_fn, loop->num);
1291   if (!step || step == chrec_dont_know)
1292     {
1293       if (dump_file && (dump_flags & TDF_DETAILS))
1294         fprintf (dump_file,
1295                  "Unable to convert loop: Cannot determine step of loop.\n");
1296
1297       return NULL;
1298     }
1299   if (TREE_CODE (step) != INTEGER_CST)
1300     {
1301
1302       if (dump_file && (dump_flags & TDF_DETAILS))
1303         fprintf (dump_file,
1304                  "Unable to convert loop: Step of loop is not integer.\n");
1305       return NULL;
1306     }
1307
1308   stepint = TREE_INT_CST_LOW (step);
1309
1310   /* Only want phis for induction vars, which will have two
1311      arguments.  */
1312   if (PHI_NUM_ARGS (phi) != 2)
1313     {
1314       if (dump_file && (dump_flags & TDF_DETAILS))
1315         fprintf (dump_file,
1316                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1317       return NULL;
1318     }
1319
1320   /* Another induction variable check. One argument's source should be
1321      in the loop, one outside the loop.  */
1322   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1323       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1324     {
1325
1326       if (dump_file && (dump_flags & TDF_DETAILS))
1327         fprintf (dump_file,
1328                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1329
1330       return NULL;
1331     }
1332
1333   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1334     {
1335       lboundvar = PHI_ARG_DEF (phi, 1);
1336       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1337                                               outerinductionvars, *invariants,
1338                                               0);
1339     }
1340   else
1341     {
1342       lboundvar = PHI_ARG_DEF (phi, 0);
1343       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1344                                               outerinductionvars, *invariants,
1345                                               0);
1346     }
1347
1348   if (!lbound)
1349     {
1350
1351       if (dump_file && (dump_flags & TDF_DETAILS))
1352         fprintf (dump_file,
1353                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1354
1355       return NULL;
1356     }
1357   /* One part of the test may be a loop invariant tree.  */
1358   VEC_reserve (tree, heap, *invariants, 1);
1359   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1360       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1361     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1362   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1363            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1364     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1365
1366   /* The non-induction variable part of the test is the upper bound variable.
1367    */
1368   if (TREE_OPERAND (test, 0) == inductionvar)
1369     uboundvar = TREE_OPERAND (test, 1);
1370   else
1371     uboundvar = TREE_OPERAND (test, 0);
1372
1373
1374   /* We only size the vectors assuming we have, at max, 2 times as many
1375      invariants as we do loops (one for each bound).
1376      This is just an arbitrary number, but it has to be matched against the
1377      code below.  */
1378   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1379
1380
1381   /* We might have some leftover.  */
1382   if (TREE_CODE (test) == LT_EXPR)
1383     extra = -1 * stepint;
1384   else if (TREE_CODE (test) == NE_EXPR)
1385     extra = -1 * stepint;
1386   else if (TREE_CODE (test) == GT_EXPR)
1387     extra = -1 * stepint;
1388   else if (TREE_CODE (test) == EQ_EXPR)
1389     extra = 1 * stepint;
1390
1391   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1392                                           outerinductionvars,
1393                                           *invariants, extra);
1394   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1395                          build_int_cst (TREE_TYPE (uboundvar), extra));
1396   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1397   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1398   VEC_safe_push (int, heap, *steps, stepint);
1399   if (!ubound)
1400     {
1401       if (dump_file && (dump_flags & TDF_DETAILS))
1402         fprintf (dump_file,
1403                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1404       return NULL;
1405     }
1406
1407   lloop = lambda_loop_new ();
1408   LL_STEP (lloop) = stepint;
1409   LL_LOWER_BOUND (lloop) = lbound;
1410   LL_UPPER_BOUND (lloop) = ubound;
1411   return lloop;
1412 }
1413
1414 /* Given a LOOP, find the induction variable it is testing against in the exit
1415    condition.  Return the induction variable if found, NULL otherwise.  */
1416
1417 static tree
1418 find_induction_var_from_exit_cond (struct loop *loop)
1419 {
1420   tree expr = get_loop_exit_condition (loop);
1421   tree ivarop;
1422   tree test;
1423   if (expr == NULL_TREE)
1424     return NULL_TREE;
1425   if (TREE_CODE (expr) != COND_EXPR)
1426     return NULL_TREE;
1427   test = TREE_OPERAND (expr, 0);
1428   if (!COMPARISON_CLASS_P (test))
1429     return NULL_TREE;
1430
1431   /* Find the side that is invariant in this loop. The ivar must be the other
1432      side.  */
1433
1434   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1435       ivarop = TREE_OPERAND (test, 1);
1436   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1437       ivarop = TREE_OPERAND (test, 0);
1438   else
1439     return NULL_TREE;
1440
1441   if (TREE_CODE (ivarop) != SSA_NAME)
1442     return NULL_TREE;
1443   return ivarop;
1444 }
1445
1446 DEF_VEC_P(lambda_loop);
1447 DEF_VEC_ALLOC_P(lambda_loop,heap);
1448
1449 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1450    Return the new loop nest.
1451    INDUCTIONVARS is a pointer to an array of induction variables for the
1452    loopnest that will be filled in during this process.
1453    INVARIANTS is a pointer to an array of invariants that will be filled in
1454    during this process.  */
1455
1456 lambda_loopnest
1457 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1458                                  VEC(tree,heap) **inductionvars,
1459                                  VEC(tree,heap) **invariants)
1460 {
1461   lambda_loopnest ret = NULL;
1462   struct loop *temp = loop_nest;
1463   int depth = depth_of_nest (loop_nest);
1464   size_t i;
1465   VEC(lambda_loop,heap) *loops = NULL;
1466   VEC(tree,heap) *uboundvars = NULL;
1467   VEC(tree,heap) *lboundvars  = NULL;
1468   VEC(int,heap) *steps = NULL;
1469   lambda_loop newloop;
1470   tree inductionvar = NULL;
1471   bool perfect_nest = perfect_nest_p (loop_nest);
1472
1473   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1474     goto fail;
1475
1476   while (temp)
1477     {
1478       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1479                                          &inductionvar, *inductionvars,
1480                                          &lboundvars, &uboundvars,
1481                                          &steps);
1482       if (!newloop)
1483         goto fail;
1484
1485       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1486       VEC_safe_push (lambda_loop, heap, loops, newloop);
1487       temp = temp->inner;
1488     }
1489
1490   if (!perfect_nest)
1491     {
1492       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1493                             *inductionvars))
1494         {
1495           if (dump_file)
1496             fprintf (dump_file,
1497                      "Not a perfect loop nest and couldn't convert to one.\n");
1498           goto fail;
1499         }
1500       else if (dump_file)
1501         fprintf (dump_file,
1502                  "Successfully converted loop nest to perfect loop nest.\n");
1503     }
1504
1505   ret = lambda_loopnest_new (depth, 2 * depth);
1506
1507   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1508     LN_LOOPS (ret)[i] = newloop;
1509
1510  fail:
1511   VEC_free (lambda_loop, heap, loops);
1512   VEC_free (tree, heap, uboundvars);
1513   VEC_free (tree, heap, lboundvars);
1514   VEC_free (int, heap, steps);
1515
1516   return ret;
1517 }
1518
1519 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1520    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1521    inserted for us are stored.  INDUCTION_VARS is the array of induction
1522    variables for the loop this LBV is from.  TYPE is the tree type to use for
1523    the variables and trees involved.  */
1524
1525 static tree
1526 lbv_to_gcc_expression (lambda_body_vector lbv,
1527                        tree type, VEC(tree,heap) *induction_vars,
1528                        tree *stmts_to_insert)
1529 {
1530   int k;
1531   tree resvar;
1532   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1533
1534   k = LBV_DENOMINATOR (lbv);
1535   gcc_assert (k != 0);
1536   if (k != 1)
1537     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1538
1539   resvar = create_tmp_var (type, "lbvtmp");
1540   add_referenced_var (resvar);
1541   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1542 }
1543
1544 /* Convert a linear expression from coefficient and constant form to a
1545    gcc tree.
1546    Return the tree that represents the final value of the expression.
1547    LLE is the linear expression to convert.
1548    OFFSET is the linear offset to apply to the expression.
1549    TYPE is the tree type to use for the variables and math.
1550    INDUCTION_VARS is a vector of induction variables for the loops.
1551    INVARIANTS is a vector of the loop nest invariants.
1552    WRAP specifies what tree code to wrap the results in, if there is more than
1553    one (it is either MAX_EXPR, or MIN_EXPR).
1554    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1555    statements that need to be inserted for the linear expression.  */
1556
1557 static tree
1558 lle_to_gcc_expression (lambda_linear_expression lle,
1559                        lambda_linear_expression offset,
1560                        tree type,
1561                        VEC(tree,heap) *induction_vars,
1562                        VEC(tree,heap) *invariants,
1563                        enum tree_code wrap, tree *stmts_to_insert)
1564 {
1565   int k;
1566   tree resvar;
1567   tree expr = NULL_TREE;
1568   VEC(tree,heap) *results = NULL;
1569
1570   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1571
1572   /* Build up the linear expressions.  */
1573   for (; lle != NULL; lle = LLE_NEXT (lle))
1574     {
1575       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1576       expr = fold_build2 (PLUS_EXPR, type, expr,
1577                           build_linear_expr (type,
1578                                              LLE_INVARIANT_COEFFICIENTS (lle),
1579                                              invariants));
1580
1581       k = LLE_CONSTANT (lle);
1582       if (k)
1583         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1584
1585       k = LLE_CONSTANT (offset);
1586       if (k)
1587         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1588
1589       k = LLE_DENOMINATOR (lle);
1590       if (k != 1)
1591         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1592                             type, expr, build_int_cst (type, k));
1593
1594       expr = fold (expr);
1595       VEC_safe_push (tree, heap, results, expr);
1596     }
1597
1598   gcc_assert (expr);
1599
1600   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1601   if (VEC_length (tree, results) > 1)
1602     {
1603       size_t i;
1604       tree op;
1605
1606       expr = VEC_index (tree, results, 0);
1607       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1608         expr = fold_build2 (wrap, type, expr, op);
1609     }
1610
1611   VEC_free (tree, heap, results);
1612
1613   resvar = create_tmp_var (type, "lletmp");
1614   add_referenced_var (resvar);
1615   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1616 }
1617
1618 /* Remove the induction variable defined at IV_STMT.  */
1619
1620 static void
1621 remove_iv (tree iv_stmt)
1622 {
1623   if (TREE_CODE (iv_stmt) == PHI_NODE)
1624     {
1625       int i;
1626
1627       for (i = 0; i < PHI_NUM_ARGS (iv_stmt); i++)
1628         {
1629           tree stmt;
1630           imm_use_iterator imm_iter;
1631           tree arg = PHI_ARG_DEF (iv_stmt, i);
1632           bool used = false;
1633
1634           if (TREE_CODE (arg) != SSA_NAME)
1635             continue;
1636
1637           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1638             if (stmt != iv_stmt)
1639               used = true;
1640
1641           if (!used)
1642             remove_iv (SSA_NAME_DEF_STMT (arg));
1643         }
1644
1645       remove_phi_node (iv_stmt, NULL_TREE, true);
1646     }
1647   else
1648     {
1649       block_stmt_iterator bsi = bsi_for_stmt (iv_stmt);
1650
1651       bsi_remove (&bsi, true);
1652       release_defs (iv_stmt);
1653     }
1654 }
1655
1656
1657 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1658    it, back into gcc code.  This changes the
1659    loops, their induction variables, and their bodies, so that they
1660    match the transformed loopnest.
1661    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1662    loopnest.
1663    OLD_IVS is a vector of induction variables from the old loopnest.
1664    INVARIANTS is a vector of loop invariants from the old loopnest.
1665    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1666    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1667    NEW_LOOPNEST.  */
1668
1669 void
1670 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1671                                  VEC(tree,heap) *old_ivs,
1672                                  VEC(tree,heap) *invariants,
1673                                  lambda_loopnest new_loopnest,
1674                                  lambda_trans_matrix transform)
1675 {
1676   struct loop *temp;
1677   size_t i = 0;
1678   size_t depth = 0;
1679   VEC(tree,heap) *new_ivs = NULL;
1680   tree oldiv;
1681
1682   block_stmt_iterator bsi;
1683
1684   if (dump_file)
1685     {
1686       transform = lambda_trans_matrix_inverse (transform);
1687       fprintf (dump_file, "Inverse of transformation matrix:\n");
1688       print_lambda_trans_matrix (dump_file, transform);
1689     }
1690   depth = depth_of_nest (old_loopnest);
1691   temp = old_loopnest;
1692
1693   while (temp)
1694     {
1695       lambda_loop newloop;
1696       basic_block bb;
1697       edge exit;
1698       tree ivvar, ivvarinced, exitcond, stmts;
1699       enum tree_code testtype;
1700       tree newupperbound, newlowerbound;
1701       lambda_linear_expression offset;
1702       tree type;
1703       bool insert_after;
1704       tree inc_stmt;
1705
1706       oldiv = VEC_index (tree, old_ivs, i);
1707       type = TREE_TYPE (oldiv);
1708
1709       /* First, build the new induction variable temporary  */
1710
1711       ivvar = create_tmp_var (type, "lnivtmp");
1712       add_referenced_var (ivvar);
1713
1714       VEC_safe_push (tree, heap, new_ivs, ivvar);
1715
1716       newloop = LN_LOOPS (new_loopnest)[i];
1717
1718       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1719          cases for now.  */
1720       offset = LL_LINEAR_OFFSET (newloop);
1721
1722       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1723                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1724
1725       /* Now build the  new lower bounds, and insert the statements
1726          necessary to generate it on the loop preheader.  */
1727       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1728                                              LL_LINEAR_OFFSET (newloop),
1729                                              type,
1730                                              new_ivs,
1731                                              invariants, MAX_EXPR, &stmts);
1732
1733       if (stmts)
1734         {
1735           bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1736           bsi_commit_edge_inserts ();
1737         }
1738       /* Build the new upper bound and insert its statements in the
1739          basic block of the exit condition */
1740       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1741                                              LL_LINEAR_OFFSET (newloop),
1742                                              type,
1743                                              new_ivs,
1744                                              invariants, MIN_EXPR, &stmts);
1745       exit = single_exit (temp);
1746       exitcond = get_loop_exit_condition (temp);
1747       bb = bb_for_stmt (exitcond);
1748       bsi = bsi_after_labels (bb);
1749       if (stmts)
1750         bsi_insert_before (&bsi, stmts, BSI_NEW_STMT);
1751
1752       /* Create the new iv.  */
1753
1754       standard_iv_increment_position (temp, &bsi, &insert_after);
1755       create_iv (newlowerbound,
1756                  build_int_cst (type, LL_STEP (newloop)),
1757                  ivvar, temp, &bsi, insert_after, &ivvar,
1758                  NULL);
1759
1760       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1761          dominate the block containing the exit condition.
1762          So we simply create our own incremented iv to use in the new exit
1763          test,  and let redundancy elimination sort it out.  */
1764       inc_stmt = build2 (PLUS_EXPR, type,
1765                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1766       inc_stmt = build_gimple_modify_stmt (SSA_NAME_VAR (ivvar), inc_stmt);
1767       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1768       GIMPLE_STMT_OPERAND (inc_stmt, 0) = ivvarinced;
1769       bsi = bsi_for_stmt (exitcond);
1770       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1771
1772       /* Replace the exit condition with the new upper bound
1773          comparison.  */
1774
1775       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1776
1777       /* We want to build a conditional where true means exit the loop, and
1778          false means continue the loop.
1779          So swap the testtype if this isn't the way things are.*/
1780
1781       if (exit->flags & EDGE_FALSE_VALUE)
1782         testtype = swap_tree_comparison (testtype);
1783
1784       COND_EXPR_COND (exitcond) = build2 (testtype,
1785                                           boolean_type_node,
1786                                           newupperbound, ivvarinced);
1787       update_stmt (exitcond);
1788       VEC_replace (tree, new_ivs, i, ivvar);
1789
1790       i++;
1791       temp = temp->inner;
1792     }
1793
1794   /* Rewrite uses of the old ivs so that they are now specified in terms of
1795      the new ivs.  */
1796
1797   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1798     {
1799       imm_use_iterator imm_iter;
1800       use_operand_p use_p;
1801       tree oldiv_def;
1802       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1803       tree stmt;
1804
1805       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1806         oldiv_def = PHI_RESULT (oldiv_stmt);
1807       else
1808         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1809       gcc_assert (oldiv_def != NULL_TREE);
1810
1811       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1812         {
1813           tree newiv, stmts;
1814           lambda_body_vector lbv, newlbv;
1815
1816           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1817
1818           /* Compute the new expression for the induction
1819              variable.  */
1820           depth = VEC_length (tree, new_ivs);
1821           lbv = lambda_body_vector_new (depth);
1822           LBV_COEFFICIENTS (lbv)[i] = 1;
1823
1824           newlbv = lambda_body_vector_compute_new (transform, lbv);
1825
1826           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1827                                          new_ivs, &stmts);
1828           if (stmts)
1829             {
1830               bsi = bsi_for_stmt (stmt);
1831               bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1832             }
1833
1834           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1835             propagate_value (use_p, newiv);
1836           update_stmt (stmt);
1837         }
1838
1839       /* Remove the now unused induction variable.  */
1840       remove_iv (oldiv_stmt);
1841     }
1842   VEC_free (tree, heap, new_ivs);
1843 }
1844
1845 /* Return TRUE if this is not interesting statement from the perspective of
1846    determining if we have a perfect loop nest.  */
1847
1848 static bool
1849 not_interesting_stmt (tree stmt)
1850 {
1851   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1852      loop, we would have already failed the number of exits tests.  */
1853   if (TREE_CODE (stmt) == LABEL_EXPR
1854       || TREE_CODE (stmt) == GOTO_EXPR
1855       || TREE_CODE (stmt) == COND_EXPR)
1856     return true;
1857   return false;
1858 }
1859
1860 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1861
1862 static bool
1863 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1864 {
1865   int i;
1866   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1867     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1868       if (PHI_ARG_DEF (phi, i) == def)
1869         return true;
1870   return false;
1871 }
1872
1873 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1874
1875 static bool
1876 stmt_uses_phi_result (tree stmt, tree phi_result)
1877 {
1878   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1879
1880   /* This is conservatively true, because we only want SIMPLE bumpers
1881      of the form x +- constant for our pass.  */
1882   return (use == phi_result);
1883 }
1884
1885 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1886    in-loop-edge in a phi node, and the operand it uses is the result of that
1887    phi node.
1888    I.E. i_29 = i_3 + 1
1889         i_3 = PHI (0, i_29);  */
1890
1891 static bool
1892 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
1893 {
1894   tree use;
1895   tree def;
1896   imm_use_iterator iter;
1897   use_operand_p use_p;
1898
1899   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1900   if (!def)
1901     return false;
1902
1903   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1904     {
1905       use = USE_STMT (use_p);
1906       if (TREE_CODE (use) == PHI_NODE)
1907         {
1908           if (phi_loop_edge_uses_def (loop, use, def))
1909             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1910               return true;
1911         }
1912     }
1913   return false;
1914 }
1915
1916
1917 /* Return true if LOOP is a perfect loop nest.
1918    Perfect loop nests are those loop nests where all code occurs in the
1919    innermost loop body.
1920    If S is a program statement, then
1921
1922    i.e.
1923    DO I = 1, 20
1924        S1
1925        DO J = 1, 20
1926        ...
1927        END DO
1928    END DO
1929    is not a perfect loop nest because of S1.
1930
1931    DO I = 1, 20
1932       DO J = 1, 20
1933         S1
1934         ...
1935       END DO
1936    END DO
1937    is a perfect loop nest.
1938
1939    Since we don't have high level loops anymore, we basically have to walk our
1940    statements and ignore those that are there because the loop needs them (IE
1941    the induction variable increment, and jump back to the top of the loop).  */
1942
1943 bool
1944 perfect_nest_p (struct loop *loop)
1945 {
1946   basic_block *bbs;
1947   size_t i;
1948   tree exit_cond;
1949
1950   if (!loop->inner)
1951     return true;
1952   bbs = get_loop_body (loop);
1953   exit_cond = get_loop_exit_condition (loop);
1954   for (i = 0; i < loop->num_nodes; i++)
1955     {
1956       if (bbs[i]->loop_father == loop)
1957         {
1958           block_stmt_iterator bsi;
1959           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1960             {
1961               tree stmt = bsi_stmt (bsi);
1962               if (stmt == exit_cond
1963                   || not_interesting_stmt (stmt)
1964                   || stmt_is_bumper_for_loop (loop, stmt))
1965                 continue;
1966               free (bbs);
1967               return false;
1968             }
1969         }
1970     }
1971   free (bbs);
1972   /* See if the inner loops are perfectly nested as well.  */
1973   if (loop->inner)
1974     return perfect_nest_p (loop->inner);
1975   return true;
1976 }
1977
1978 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
1979    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
1980    avoid creating duplicate temporaries and FIRSTBSI is statement
1981    iterator where new temporaries should be inserted at the beginning
1982    of body basic block.  */
1983
1984 static void
1985 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
1986                                 int xstep, tree y, tree yinit,
1987                                 htab_t replacements,
1988                                 block_stmt_iterator *firstbsi)
1989 {
1990   ssa_op_iter iter;
1991   use_operand_p use_p;
1992
1993   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1994     {
1995       tree use = USE_FROM_PTR (use_p);
1996       tree step = NULL_TREE;
1997       tree scev, init, val, var, setstmt;
1998       struct tree_map *h, in;
1999       void **loc;
2000
2001       /* Replace uses of X with Y right away.  */
2002       if (use == x)
2003         {
2004           SET_USE (use_p, y);
2005           continue;
2006         }
2007
2008       scev = instantiate_parameters (loop,
2009                                      analyze_scalar_evolution (loop, use));
2010
2011       if (scev == NULL || scev == chrec_dont_know)
2012         continue;
2013
2014       step = evolution_part_in_loop_num (scev, loop->num);
2015       if (step == NULL
2016           || step == chrec_dont_know
2017           || TREE_CODE (step) != INTEGER_CST
2018           || int_cst_value (step) != xstep)
2019         continue;
2020
2021       /* Use REPLACEMENTS hash table to cache already created
2022          temporaries.  */
2023       in.hash = htab_hash_pointer (use);
2024       in.base.from = use;
2025       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2026       if (h != NULL)
2027         {
2028           SET_USE (use_p, h->to);
2029           continue;
2030         }
2031
2032       /* USE which has the same step as X should be replaced
2033          with a temporary set to Y + YINIT - INIT.  */
2034       init = initial_condition_in_loop_num (scev, loop->num);
2035       gcc_assert (init != NULL && init != chrec_dont_know);
2036       if (TREE_TYPE (use) == TREE_TYPE (y))
2037         {
2038           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2039           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2040           if (val == y)
2041             {
2042               /* If X has the same type as USE, the same step
2043                  and same initial value, it can be replaced by Y.  */
2044               SET_USE (use_p, y);
2045               continue;
2046             }
2047         }
2048       else
2049         {
2050           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2051           val = fold_convert (TREE_TYPE (use), val);
2052           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2053         }
2054
2055       /* Create a temporary variable and insert it at the beginning
2056          of the loop body basic block, right after the PHI node
2057          which sets Y.  */
2058       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2059       add_referenced_var (var);
2060       val = force_gimple_operand_bsi (firstbsi, val, false, NULL,
2061                                       true, BSI_SAME_STMT);
2062       setstmt = build_gimple_modify_stmt (var, val);
2063       var = make_ssa_name (var, setstmt);
2064       GIMPLE_STMT_OPERAND (setstmt, 0) = var;
2065       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2066       update_stmt (setstmt);
2067       SET_USE (use_p, var);
2068       h = GGC_NEW (struct tree_map);
2069       h->hash = in.hash;
2070       h->base.from = use;
2071       h->to = var;
2072       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2073       gcc_assert ((*(struct tree_map **)loc) == NULL);
2074       *(struct tree_map **) loc = h;
2075     }
2076 }
2077
2078 /* Return true if STMT is an exit PHI for LOOP */
2079
2080 static bool
2081 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2082 {
2083
2084   if (TREE_CODE (stmt) != PHI_NODE
2085       || PHI_NUM_ARGS (stmt) != 1
2086       || bb_for_stmt (stmt) != single_exit (loop)->dest)
2087     return false;
2088
2089   return true;
2090 }
2091
2092 /* Return true if STMT can be put back into the loop INNER, by
2093    copying it to the beginning of that loop and changing the uses.  */
2094
2095 static bool
2096 can_put_in_inner_loop (struct loop *inner, tree stmt)
2097 {
2098   imm_use_iterator imm_iter;
2099   use_operand_p use_p;
2100
2101   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2102   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2103       || !expr_invariant_in_loop_p (inner, GIMPLE_STMT_OPERAND (stmt, 1)))
2104     return false;
2105
2106   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2107     {
2108       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2109         {
2110           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2111
2112           if (!flow_bb_inside_loop_p (inner, immbb))
2113             return false;
2114         }
2115     }
2116   return true;
2117 }
2118
2119 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2120 static bool
2121 can_put_after_inner_loop (struct loop *loop, tree stmt)
2122 {
2123   imm_use_iterator imm_iter;
2124   use_operand_p use_p;
2125
2126   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2127     return false;
2128
2129   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2130     {
2131       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2132         {
2133           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2134
2135           if (!dominated_by_p (CDI_DOMINATORS,
2136                                immbb,
2137                                loop->inner->header)
2138               && !can_put_in_inner_loop (loop->inner, stmt))
2139             return false;
2140         }
2141     }
2142   return true;
2143 }
2144
2145
2146
2147 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2148    perfect one.  At the moment, we only handle imperfect nests of
2149    depth 2, where all of the statements occur after the inner loop.  */
2150
2151 static bool
2152 can_convert_to_perfect_nest (struct loop *loop)
2153 {
2154   basic_block *bbs;
2155   tree exit_condition, phi;
2156   size_t i;
2157   block_stmt_iterator bsi;
2158   basic_block exitdest;
2159
2160   /* Can't handle triply nested+ loops yet.  */
2161   if (!loop->inner || loop->inner->inner)
2162     return false;
2163
2164   bbs = get_loop_body (loop);
2165   exit_condition = get_loop_exit_condition (loop);
2166   for (i = 0; i < loop->num_nodes; i++)
2167     {
2168       if (bbs[i]->loop_father == loop)
2169         {
2170           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2171             {
2172               tree stmt = bsi_stmt (bsi);
2173
2174               if (stmt == exit_condition
2175                   || not_interesting_stmt (stmt)
2176                   || stmt_is_bumper_for_loop (loop, stmt))
2177                 continue;
2178
2179               /* If this is a scalar operation that can be put back
2180                  into the inner loop, or after the inner loop, through
2181                  copying, then do so. This works on the theory that
2182                  any amount of scalar code we have to reduplicate
2183                  into or after the loops is less expensive that the
2184                  win we get from rearranging the memory walk
2185                  the loop is doing so that it has better
2186                  cache behavior.  */
2187               if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
2188                 {
2189                   use_operand_p use_a, use_b;
2190                   imm_use_iterator imm_iter;
2191                   ssa_op_iter op_iter, op_iter1;
2192                   tree op0 = GIMPLE_STMT_OPERAND (stmt, 0);
2193                   tree scev = instantiate_parameters
2194                     (loop, analyze_scalar_evolution (loop, op0));
2195
2196                   /* If the IV is simple, it can be duplicated.  */
2197                   if (!automatically_generated_chrec_p (scev))
2198                     {
2199                       tree step = evolution_part_in_loop_num (scev, loop->num);
2200                       if (step && step != chrec_dont_know
2201                           && TREE_CODE (step) == INTEGER_CST)
2202                         continue;
2203                     }
2204
2205                   /* The statement should not define a variable used
2206                      in the inner loop.  */
2207                   if (TREE_CODE (op0) == SSA_NAME)
2208                     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2209                       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2210                           == loop->inner)
2211                         goto fail;
2212
2213                   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2214                     {
2215                       tree node, op = USE_FROM_PTR (use_a);
2216
2217                       /* The variables should not be used in both loops.  */
2218                       FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2219                       if (bb_for_stmt (USE_STMT (use_b))->loop_father
2220                           == loop->inner)
2221                         goto fail;
2222
2223                       /* The statement should not use the value of a
2224                          scalar that was modified in the loop.  */
2225                       node = SSA_NAME_DEF_STMT (op);
2226                       if (TREE_CODE (node) == PHI_NODE)
2227                         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2228                           {
2229                             tree arg = USE_FROM_PTR (use_b);
2230
2231                             if (TREE_CODE (arg) == SSA_NAME)
2232                               {
2233                                 tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2234
2235                                 if (bb_for_stmt (arg_stmt)
2236                                     && (bb_for_stmt (arg_stmt)->loop_father
2237                                         == loop->inner))
2238                                   goto fail;
2239                               }
2240                           }
2241                     }
2242
2243                   if (can_put_in_inner_loop (loop->inner, stmt)
2244                       || can_put_after_inner_loop (loop, stmt))
2245                     continue;
2246                 }
2247
2248               /* Otherwise, if the bb of a statement we care about isn't
2249                  dominated by the header of the inner loop, then we can't
2250                  handle this case right now.  This test ensures that the
2251                  statement comes completely *after* the inner loop.  */
2252               if (!dominated_by_p (CDI_DOMINATORS,
2253                                    bb_for_stmt (stmt),
2254                                    loop->inner->header))
2255                 goto fail;
2256             }
2257         }
2258     }
2259
2260   /* We also need to make sure the loop exit only has simple copy phis in it,
2261      otherwise we don't know how to transform it into a perfect nest right
2262      now.  */
2263   exitdest = single_exit (loop)->dest;
2264
2265   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2266     if (PHI_NUM_ARGS (phi) != 1)
2267       goto fail;
2268
2269   free (bbs);
2270   return true;
2271
2272  fail:
2273   free (bbs);
2274   return false;
2275 }
2276
2277 /* Transform the loop nest into a perfect nest, if possible.
2278    LOOP is the loop nest to transform into a perfect nest
2279    LBOUNDS are the lower bounds for the loops to transform
2280    UBOUNDS are the upper bounds for the loops to transform
2281    STEPS is the STEPS for the loops to transform.
2282    LOOPIVS is the induction variables for the loops to transform.
2283
2284    Basically, for the case of
2285
2286    FOR (i = 0; i < 50; i++)
2287     {
2288      FOR (j =0; j < 50; j++)
2289      {
2290         <whatever>
2291      }
2292      <some code>
2293     }
2294
2295    This function will transform it into a perfect loop nest by splitting the
2296    outer loop into two loops, like so:
2297
2298    FOR (i = 0; i < 50; i++)
2299    {
2300      FOR (j = 0; j < 50; j++)
2301      {
2302          <whatever>
2303      }
2304    }
2305
2306    FOR (i = 0; i < 50; i ++)
2307    {
2308     <some code>
2309    }
2310
2311    Return FALSE if we can't make this loop into a perfect nest.  */
2312
2313 static bool
2314 perfect_nestify (struct loop *loop,
2315                  VEC(tree,heap) *lbounds,
2316                  VEC(tree,heap) *ubounds,
2317                  VEC(int,heap) *steps,
2318                  VEC(tree,heap) *loopivs)
2319 {
2320   basic_block *bbs;
2321   tree exit_condition;
2322   tree cond_stmt;
2323   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2324   int i;
2325   block_stmt_iterator bsi, firstbsi;
2326   bool insert_after;
2327   edge e;
2328   struct loop *newloop;
2329   tree phi;
2330   tree uboundvar;
2331   tree stmt;
2332   tree oldivvar, ivvar, ivvarinced;
2333   VEC(tree,heap) *phis = NULL;
2334   htab_t replacements = NULL;
2335
2336   /* Create the new loop.  */
2337   olddest = single_exit (loop)->dest;
2338   preheaderbb = split_edge (single_exit (loop));
2339   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2340
2341   /* Push the exit phi nodes that we are moving.  */
2342   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2343     {
2344       VEC_reserve (tree, heap, phis, 2);
2345       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2346       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2347     }
2348   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2349
2350   /* Remove the exit phis from the old basic block.  */
2351   while (phi_nodes (olddest) != NULL)
2352     remove_phi_node (phi_nodes (olddest), NULL, false);
2353
2354   /* and add them back to the new basic block.  */
2355   while (VEC_length (tree, phis) != 0)
2356     {
2357       tree def;
2358       tree phiname;
2359       def = VEC_pop (tree, phis);
2360       phiname = VEC_pop (tree, phis);
2361       phi = create_phi_node (phiname, preheaderbb);
2362       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2363     }
2364   flush_pending_stmts (e);
2365   VEC_free (tree, heap, phis);
2366
2367   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2368   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2369   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2370   cond_stmt = build3 (COND_EXPR, void_type_node,
2371                       build2 (NE_EXPR, boolean_type_node,
2372                               integer_one_node,
2373                               integer_zero_node),
2374                       NULL_TREE, NULL_TREE);
2375   bsi = bsi_start (bodybb);
2376   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2377   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2378   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2379   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2380
2381   /* Update the loop structures.  */
2382   newloop = duplicate_loop (loop, olddest->loop_father);
2383   newloop->header = headerbb;
2384   newloop->latch = latchbb;
2385   add_bb_to_loop (latchbb, newloop);
2386   add_bb_to_loop (bodybb, newloop);
2387   add_bb_to_loop (headerbb, newloop);
2388   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2389   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2390   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2391                            single_exit (loop)->src);
2392   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2393   set_immediate_dominator (CDI_DOMINATORS, olddest,
2394                            recompute_dominator (CDI_DOMINATORS, olddest));
2395   /* Create the new iv.  */
2396   oldivvar = VEC_index (tree, loopivs, 0);
2397   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2398   add_referenced_var (ivvar);
2399   standard_iv_increment_position (newloop, &bsi, &insert_after);
2400   create_iv (VEC_index (tree, lbounds, 0),
2401              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2402              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2403
2404   /* Create the new upper bound.  This may be not just a variable, so we copy
2405      it to one just in case.  */
2406
2407   exit_condition = get_loop_exit_condition (newloop);
2408   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2409   add_referenced_var (uboundvar);
2410   stmt = build_gimple_modify_stmt (uboundvar, VEC_index (tree, ubounds, 0));
2411   uboundvar = make_ssa_name (uboundvar, stmt);
2412   GIMPLE_STMT_OPERAND (stmt, 0) = uboundvar;
2413
2414   if (insert_after)
2415     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2416   else
2417     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2418   update_stmt (stmt);
2419   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2420                                             boolean_type_node,
2421                                             uboundvar,
2422                                             ivvarinced);
2423   update_stmt (exit_condition);
2424   replacements = htab_create_ggc (20, tree_map_hash,
2425                                   tree_map_eq, NULL);
2426   bbs = get_loop_body_in_dom_order (loop);
2427   /* Now move the statements, and replace the induction variable in the moved
2428      statements with the correct loop induction variable.  */
2429   oldivvar = VEC_index (tree, loopivs, 0);
2430   firstbsi = bsi_start (bodybb);
2431   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2432     {
2433       block_stmt_iterator tobsi = bsi_last (bodybb);
2434       if (bbs[i]->loop_father == loop)
2435         {
2436           /* If this is true, we are *before* the inner loop.
2437              If this isn't true, we are *after* it.
2438
2439              The only time can_convert_to_perfect_nest returns true when we
2440              have statements before the inner loop is if they can be moved
2441              into the inner loop.
2442
2443              The only time can_convert_to_perfect_nest returns true when we
2444              have statements after the inner loop is if they can be moved into
2445              the new split loop.  */
2446
2447           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2448             {
2449               block_stmt_iterator header_bsi
2450                 = bsi_after_labels (loop->inner->header);
2451
2452               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2453                 {
2454                   tree stmt = bsi_stmt (bsi);
2455
2456                   if (stmt == exit_condition
2457                       || not_interesting_stmt (stmt)
2458                       || stmt_is_bumper_for_loop (loop, stmt))
2459                     {
2460                       bsi_next (&bsi);
2461                       continue;
2462                     }
2463
2464                   bsi_move_before (&bsi, &header_bsi);
2465                 }
2466             }
2467           else
2468             {
2469               /* Note that the bsi only needs to be explicitly incremented
2470                  when we don't move something, since it is automatically
2471                  incremented when we do.  */
2472               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2473                 {
2474                   ssa_op_iter i;
2475                   tree n, stmt = bsi_stmt (bsi);
2476
2477                   if (stmt == exit_condition
2478                       || not_interesting_stmt (stmt)
2479                       || stmt_is_bumper_for_loop (loop, stmt))
2480                     {
2481                       bsi_next (&bsi);
2482                       continue;
2483                     }
2484
2485                   replace_uses_equiv_to_x_with_y
2486                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2487                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2488
2489                   bsi_move_before (&bsi, &tobsi);
2490
2491                   /* If the statement has any virtual operands, they may
2492                      need to be rewired because the original loop may
2493                      still reference them.  */
2494                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2495                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2496                 }
2497             }
2498
2499         }
2500     }
2501
2502   free (bbs);
2503   htab_delete (replacements);
2504   return perfect_nest_p (loop);
2505 }
2506
2507 /* Return true if TRANS is a legal transformation matrix that respects
2508    the dependence vectors in DISTS and DIRS.  The conservative answer
2509    is false.
2510
2511    "Wolfe proves that a unimodular transformation represented by the
2512    matrix T is legal when applied to a loop nest with a set of
2513    lexicographically non-negative distance vectors RDG if and only if
2514    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2515    i.e.: if and only if it transforms the lexicographically positive
2516    distance vectors to lexicographically positive vectors.  Note that
2517    a unimodular matrix must transform the zero vector (and only it) to
2518    the zero vector." S.Muchnick.  */
2519
2520 bool
2521 lambda_transform_legal_p (lambda_trans_matrix trans,
2522                           int nb_loops,
2523                           VEC (ddr_p, heap) *dependence_relations)
2524 {
2525   unsigned int i, j;
2526   lambda_vector distres;
2527   struct data_dependence_relation *ddr;
2528
2529   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2530               && LTM_ROWSIZE (trans) == nb_loops);
2531
2532   /* When there is an unknown relation in the dependence_relations, we
2533      know that it is no worth looking at this loop nest: give up.  */
2534   ddr = VEC_index (ddr_p, dependence_relations, 0);
2535   if (ddr == NULL)
2536     return true;
2537   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2538     return false;
2539
2540   distres = lambda_vector_new (nb_loops);
2541
2542   /* For each distance vector in the dependence graph.  */
2543   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2544     {
2545       /* Don't care about relations for which we know that there is no
2546          dependence, nor about read-read (aka. output-dependences):
2547          these data accesses can happen in any order.  */
2548       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2549           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2550         continue;
2551
2552       /* Conservatively answer: "this transformation is not valid".  */
2553       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2554         return false;
2555
2556       /* If the dependence could not be captured by a distance vector,
2557          conservatively answer that the transform is not valid.  */
2558       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2559         return false;
2560
2561       /* Compute trans.dist_vect */
2562       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2563         {
2564           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2565                                      DDR_DIST_VECT (ddr, j), distres);
2566
2567           if (!lambda_vector_lexico_pos (distres, nb_loops))
2568             return false;
2569         }
2570     }
2571   return true;
2572 }