gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "errors.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 DEF_VEC_P(int);
 119 DEF_VEC_ALLOC_P(int,heap);
 120
 121 static bool perfect_nestify (struct loops *,
 122                              struct loop *, VEC(tree,heap) *,
 123                              VEC(tree,heap) *, VEC(int,heap) *,
 124                              VEC(tree,heap) *);
 125 /* Lattice stuff that is internal to the code generation algorithm.  */
 126
 127 typedef struct
 128 {
 129   /* Lattice base matrix.  */
 130   lambda_matrix base;
 131   /* Lattice dimension.  */
 132   int dimension;
 133   /* Origin vector for the coefficients.  */
 134   lambda_vector origin;
 135   /* Origin matrix for the invariants.  */
 136   lambda_matrix origin_invariants;
 137   /* Number of invariants.  */
 138   int invariants;
 139 } *lambda_lattice;
 140
 141 #define LATTICE_BASE(T) ((T)->base)
 142 #define LATTICE_DIMENSION(T) ((T)->dimension)
 143 #define LATTICE_ORIGIN(T) ((T)->origin)
 144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 146
 147 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 148                        int, int);
 149 static lambda_lattice lambda_lattice_new (int, int);
 150 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 151
 152 static tree find_induction_var_from_exit_cond (struct loop *);
 153
 154 /* Create a new lambda body vector.  */
 155
 156 lambda_body_vector
 157 lambda_body_vector_new (int size)
 158 {
 159   lambda_body_vector ret;
 160
 161   ret = ggc_alloc (sizeof (*ret));
 162   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 163   LBV_SIZE (ret) = size;
 164   LBV_DENOMINATOR (ret) = 1;
 165   return ret;
 166 }
 167
 168 /* Compute the new coefficients for the vector based on the
 169   *inverse* of the transformation matrix.  */
 170
 171 lambda_body_vector
 172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 173                                 lambda_body_vector vect)
 174 {
 175   lambda_body_vector temp;
 176   int depth;
 177
 178   /* Make sure the matrix is square.  */
 179   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 180
 181   depth = LTM_ROWSIZE (transform);
 182
 183   temp = lambda_body_vector_new (depth);
 184   LBV_DENOMINATOR (temp) =
 185     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 186   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 187                              LTM_MATRIX (transform), depth,
 188                              LBV_COEFFICIENTS (temp));
 189   LBV_SIZE (temp) = LBV_SIZE (vect);
 190   return temp;
 191 }
 192
 193 /* Print out a lambda body vector.  */
 194
 195 void
 196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 197 {
 198   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 199 }
 200
 201 /* Return TRUE if two linear expressions are equal.  */
 202
 203 static bool
 204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 205            int depth, int invariants)
 206 {
 207   int i;
 208
 209   if (lle1 == NULL || lle2 == NULL)
 210     return false;
 211   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 212     return false;
 213   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 214     return false;
 215   for (i = 0; i < depth; i++)
 216     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 217       return false;
 218   for (i = 0; i < invariants; i++)
 219     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 220         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 221       return false;
 222   return true;
 223 }
 224
 225 /* Create a new linear expression with dimension DIM, and total number
 226    of invariants INVARIANTS.  */
 227
 228 lambda_linear_expression
 229 lambda_linear_expression_new (int dim, int invariants)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = ggc_alloc_cleared (sizeof (*ret));
 234
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants)
 332 {
 333   lambda_loopnest ret;
 334   ret = ggc_alloc (sizeof (*ret));
 335
 336   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 337   LN_DEPTH (ret) = depth;
 338   LN_INVARIANTS (ret) = invariants;
 339
 340   return ret;
 341 }
 342
 343 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 344    character to use for loop names is given by START.  */
 345
 346 void
 347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 348 {
 349   int i;
 350   for (i = 0; i < LN_DEPTH (nest); i++)
 351     {
 352       fprintf (outfile, "Loop %c\n", start + i);
 353       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 354                          LN_INVARIANTS (nest), 'i');
 355       fprintf (outfile, "\n");
 356     }
 357 }
 358
 359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 360    of invariants.  */
 361
 362 static lambda_lattice
 363 lambda_lattice_new (int depth, int invariants)
 364 {
 365   lambda_lattice ret;
 366   ret = ggc_alloc (sizeof (*ret));
 367   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 368   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 369   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 370   LATTICE_DIMENSION (ret) = depth;
 371   LATTICE_INVARIANTS (ret) = invariants;
 372   return ret;
 373 }
 374
 375 /* Compute the lattice base for NEST.  The lattice base is essentially a
 376    non-singular transform from a dense base space to a sparse iteration space.
 377    We use it so that we don't have to specially handle the case of a sparse
 378    iteration space in other parts of the algorithm.  As a result, this routine
 379    only does something interesting (IE produce a matrix that isn't the
 380    identity matrix) if NEST is a sparse space.  */
 381
 382 static lambda_lattice
 383 lambda_lattice_compute_base (lambda_loopnest nest)
 384 {
 385   lambda_lattice ret;
 386   int depth, invariants;
 387   lambda_matrix base;
 388
 389   int i, j, step;
 390   lambda_loop loop;
 391   lambda_linear_expression expression;
 392
 393   depth = LN_DEPTH (nest);
 394   invariants = LN_INVARIANTS (nest);
 395
 396   ret = lambda_lattice_new (depth, invariants);
 397   base = LATTICE_BASE (ret);
 398   for (i = 0; i < depth; i++)
 399     {
 400       loop = LN_LOOPS (nest)[i];
 401       gcc_assert (loop);
 402       step = LL_STEP (loop);
 403       /* If we have a step of 1, then the base is one, and the
 404          origin and invariant coefficients are 0.  */
 405       if (step == 1)
 406         {
 407           for (j = 0; j < depth; j++)
 408             base[i][j] = 0;
 409           base[i][i] = 1;
 410           LATTICE_ORIGIN (ret)[i] = 0;
 411           for (j = 0; j < invariants; j++)
 412             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 413         }
 414       else
 415         {
 416           /* Otherwise, we need the lower bound expression (which must
 417              be an affine function)  to determine the base.  */
 418           expression = LL_LOWER_BOUND (loop);
 419           gcc_assert (expression && !LLE_NEXT (expression)
 420                       && LLE_DENOMINATOR (expression) == 1);
 421
 422           /* The lower triangular portion of the base is going to be the
 423              coefficient times the step */
 424           for (j = 0; j < i; j++)
 425             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 426               * LL_STEP (LN_LOOPS (nest)[j]);
 427           base[i][i] = step;
 428           for (j = i + 1; j < depth; j++)
 429             base[i][j] = 0;
 430
 431           /* Origin for this loop is the constant of the lower bound
 432              expression.  */
 433           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 434
 435           /* Coefficient for the invariants are equal to the invariant
 436              coefficients in the expression.  */
 437           for (j = 0; j < invariants; j++)
 438             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 439               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 440         }
 441     }
 442   return ret;
 443 }
 444
 445 /* Compute the greatest common denominator of two numbers (A and B) using
 446    Euclid's algorithm.  */
 447
 448 static int
 449 gcd (int a, int b)
 450 {
 451
 452   int x, y, z;
 453
 454   x = abs (a);
 455   y = abs (b);
 456
 457   while (x > 0)
 458     {
 459       z = y % x;
 460       y = x;
 461       x = z;
 462     }
 463
 464   return (y);
 465 }
 466
 467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 468
 469 static int
 470 gcd_vector (lambda_vector vector, int size)
 471 {
 472   int i;
 473   int gcd1 = 0;
 474
 475   if (size > 0)
 476     {
 477       gcd1 = vector[0];
 478       for (i = 1; i < size; i++)
 479         gcd1 = gcd (gcd1, vector[i]);
 480     }
 481   return gcd1;
 482 }
 483
 484 /* Compute the least common multiple of two numbers A and B .  */
 485
 486 static int
 487 lcm (int a, int b)
 488 {
 489   return (abs (a) * abs (b) / gcd (a, b));
 490 }
 491
 492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 493    auxiliary nest.
 494    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 495    it is easy to calculate the answer and bounds.
 496    A sketch of how it works:
 497    Given a system of linear inequalities, ai * xj >= bk, you can always
 498    rewrite the constraints so they are all of the form
 499    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 500    in b1 ... bk, and some a in a1...ai)
 501    You can then eliminate this x from the non-constant inequalities by
 502    rewriting these as a <= b, x >= constant, and delete the x variable.
 503    You can then repeat this for any remaining x variables, and then we have
 504    an easy to use variable <= constant (or no variables at all) form that we
 505    can construct our bounds from.
 506
 507    In our case, each time we eliminate, we construct part of the bound from
 508    the ith variable, then delete the ith variable.
 509
 510    Remember the constant are in our vector a, our coefficient matrix is A,
 511    and our invariant coefficient matrix is B.
 512
 513    SIZE is the size of the matrices being passed.
 514    DEPTH is the loop nest depth.
 515    INVARIANTS is the number of loop invariants.
 516    A, B, and a are the coefficient matrix, invariant coefficient, and a
 517    vector of constants, respectively.  */
 518
 519 static lambda_loopnest
 520 compute_nest_using_fourier_motzkin (int size,
 521                                     int depth,
 522                                     int invariants,
 523                                     lambda_matrix A,
 524                                     lambda_matrix B,
 525                                     lambda_vector a)
 526 {
 527
 528   int multiple, f1, f2;
 529   int i, j, k;
 530   lambda_linear_expression expression;
 531   lambda_loop loop;
 532   lambda_loopnest auxillary_nest;
 533   lambda_matrix swapmatrix, A1, B1;
 534   lambda_vector swapvector, a1;
 535   int newsize;
 536
 537   A1 = lambda_matrix_new (128, depth);
 538   B1 = lambda_matrix_new (128, invariants);
 539   a1 = lambda_vector_new (128);
 540
 541   auxillary_nest = lambda_loopnest_new (depth, invariants);
 542
 543   for (i = depth - 1; i >= 0; i--)
 544     {
 545       loop = lambda_loop_new ();
 546       LN_LOOPS (auxillary_nest)[i] = loop;
 547       LL_STEP (loop) = 1;
 548
 549       for (j = 0; j < size; j++)
 550         {
 551           if (A[j][i] < 0)
 552             {
 553               /* Any linear expression in the matrix with a coefficient less
 554                  than 0 becomes part of the new lower bound.  */
 555               expression = lambda_linear_expression_new (depth, invariants);
 556
 557               for (k = 0; k < i; k++)
 558                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 559
 560               for (k = 0; k < invariants; k++)
 561                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 562
 563               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 564               LLE_CONSTANT (expression) = -1 * a[j];
 565
 566               /* Ignore if identical to the existing lower bound.  */
 567               if (!lle_equal (LL_LOWER_BOUND (loop),
 568                               expression, depth, invariants))
 569                 {
 570                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 571                   LL_LOWER_BOUND (loop) = expression;
 572                 }
 573
 574             }
 575           else if (A[j][i] > 0)
 576             {
 577               /* Any linear expression with a coefficient greater than 0
 578                  becomes part of the new upper bound.  */
 579               expression = lambda_linear_expression_new (depth, invariants);
 580               for (k = 0; k < i; k++)
 581                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 582
 583               for (k = 0; k < invariants; k++)
 584                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 585
 586               LLE_DENOMINATOR (expression) = A[j][i];
 587               LLE_CONSTANT (expression) = a[j];
 588
 589               /* Ignore if identical to the existing upper bound.  */
 590               if (!lle_equal (LL_UPPER_BOUND (loop),
 591                               expression, depth, invariants))
 592                 {
 593                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 594                   LL_UPPER_BOUND (loop) = expression;
 595                 }
 596
 597             }
 598         }
 599
 600       /* This portion creates a new system of linear inequalities by deleting
 601          the i'th variable, reducing the system by one variable.  */
 602       newsize = 0;
 603       for (j = 0; j < size; j++)
 604         {
 605           /* If the coefficient for the i'th variable is 0, then we can just
 606              eliminate the variable straightaway.  Otherwise, we have to
 607              multiply through by the coefficients we are eliminating.  */
 608           if (A[j][i] == 0)
 609             {
 610               lambda_vector_copy (A[j], A1[newsize], depth);
 611               lambda_vector_copy (B[j], B1[newsize], invariants);
 612               a1[newsize] = a[j];
 613               newsize++;
 614             }
 615           else if (A[j][i] > 0)
 616             {
 617               for (k = 0; k < size; k++)
 618                 {
 619                   if (A[k][i] < 0)
 620                     {
 621                       multiple = lcm (A[j][i], A[k][i]);
 622                       f1 = multiple / A[j][i];
 623                       f2 = -1 * multiple / A[k][i];
 624
 625                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 626                                             A1[newsize], depth);
 627                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 628                                             B1[newsize], invariants);
 629                       a1[newsize] = f1 * a[j] + f2 * a[k];
 630                       newsize++;
 631                     }
 632                 }
 633             }
 634         }
 635
 636       swapmatrix = A;
 637       A = A1;
 638       A1 = swapmatrix;
 639
 640       swapmatrix = B;
 641       B = B1;
 642       B1 = swapmatrix;
 643
 644       swapvector = a;
 645       a = a1;
 646       a1 = swapvector;
 647
 648       size = newsize;
 649     }
 650
 651   return auxillary_nest;
 652 }
 653
 654 /* Compute the loop bounds for the auxiliary space NEST.
 655    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 656    Given the original nest, this function will
 657    1. Convert the nest into matrix form, which consists of a matrix for the
 658    coefficients, a matrix for the
 659    invariant coefficients, and a vector for the constants.
 660    2. Use the matrix form to calculate the lattice base for the nest (which is
 661    a dense space)
 662    3. Compose the dense space transform with the user specified transform, to
 663    get a transform we can easily calculate transformed bounds for.
 664    4. Multiply the composed transformation matrix times the matrix form of the
 665    loop.
 666    5. Transform the newly created matrix (from step 4) back into a loop nest
 667    using fourier motzkin elimination to figure out the bounds.  */
 668
 669 static lambda_loopnest
 670 lambda_compute_auxillary_space (lambda_loopnest nest,
 671                                 lambda_trans_matrix trans)
 672 {
 673   lambda_matrix A, B, A1, B1;
 674   lambda_vector a, a1;
 675   lambda_matrix invertedtrans;
 676   int depth, invariants, size;
 677   int i, j;
 678   lambda_loop loop;
 679   lambda_linear_expression expression;
 680   lambda_lattice lattice;
 681
 682   depth = LN_DEPTH (nest);
 683   invariants = LN_INVARIANTS (nest);
 684
 685   /* Unfortunately, we can't know the number of constraints we'll have
 686      ahead of time, but this should be enough even in ridiculous loop nest
 687      cases. We must not go over this limit.  */
 688   A = lambda_matrix_new (128, depth);
 689   B = lambda_matrix_new (128, invariants);
 690   a = lambda_vector_new (128);
 691
 692   A1 = lambda_matrix_new (128, depth);
 693   B1 = lambda_matrix_new (128, invariants);
 694   a1 = lambda_vector_new (128);
 695
 696   /* Store the bounds in the equation matrix A, constant vector a, and
 697      invariant matrix B, so that we have Ax <= a + B.
 698      This requires a little equation rearranging so that everything is on the
 699      correct side of the inequality.  */
 700   size = 0;
 701   for (i = 0; i < depth; i++)
 702     {
 703       loop = LN_LOOPS (nest)[i];
 704
 705       /* First we do the lower bound.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_LOWER_BOUND (loop);
 708       else
 709         expression = LL_UPPER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 725              constants and single variables on   */
 726           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 727           a[size] *= -1;
 728           for (j = 0; j < invariants; j++)
 729             B[size][j] *= -1;
 730
 731           size++;
 732           /* Need to increase matrix sizes above.  */
 733           gcc_assert (size <= 127);
 734
 735         }
 736
 737       /* Then do the exact same thing for the upper bounds.  */
 738       if (LL_STEP (loop) > 0)
 739         expression = LL_UPPER_BOUND (loop);
 740       else
 741         expression = LL_LOWER_BOUND (loop);
 742
 743       for (; expression != NULL; expression = LLE_NEXT (expression))
 744         {
 745           /* Fill in the coefficient.  */
 746           for (j = 0; j < i; j++)
 747             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 748
 749           /* And the invariant coefficient.  */
 750           for (j = 0; j < invariants; j++)
 751             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 752
 753           /* And the constant.  */
 754           a[size] = LLE_CONSTANT (expression);
 755
 756           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 757           for (j = 0; j < i; j++)
 758             A[size][j] *= -1;
 759           A[size][i] = LLE_DENOMINATOR (expression);
 760           size++;
 761           /* Need to increase matrix sizes above.  */
 762           gcc_assert (size <= 127);
 763
 764         }
 765     }
 766
 767   /* Compute the lattice base x = base * y + origin, where y is the
 768      base space.  */
 769   lattice = lambda_lattice_compute_base (nest);
 770
 771   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 772
 773   /* A1 = A * L */
 774   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 775
 776   /* a1 = a - A * origin constant.  */
 777   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 778   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 779
 780   /* B1 = B - A * origin invariant.  */
 781   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 782                       invariants);
 783   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 784
 785   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 786      it by A1, then performing fourier motzkin.  */
 787
 788   invertedtrans = lambda_matrix_new (depth, depth);
 789
 790   /* Compute the inverse of U.  */
 791   lambda_matrix_inverse (LTM_MATRIX (trans),
 792                          invertedtrans, depth);
 793
 794   /* A = A1 inv(U).  */
 795   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 796
 797   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 798                                              A, B1, a1);
 799 }
 800
 801 /* Compute the loop bounds for the target space, using the bounds of
 802    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 803    The target space loop bounds are computed by multiplying the triangular
 804    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 805    the loop steps (positive or negative) is then used to swap the bounds if
 806    the loop counts downwards.
 807    Return the target loopnest.  */
 808
 809 static lambda_loopnest
 810 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 811                              lambda_trans_matrix H, lambda_vector stepsigns)
 812 {
 813   lambda_matrix inverse, H1;
 814   int determinant, i, j;
 815   int gcd1, gcd2;
 816   int factor;
 817
 818   lambda_loopnest target_nest;
 819   int depth, invariants;
 820   lambda_matrix target;
 821
 822   lambda_loop auxillary_loop, target_loop;
 823   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 824
 825   depth = LN_DEPTH (auxillary_nest);
 826   invariants = LN_INVARIANTS (auxillary_nest);
 827
 828   inverse = lambda_matrix_new (depth, depth);
 829   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 830
 831   /* H1 is H excluding its diagonal.  */
 832   H1 = lambda_matrix_new (depth, depth);
 833   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 834
 835   for (i = 0; i < depth; i++)
 836     H1[i][i] = 0;
 837
 838   /* Computes the linear offsets of the loop bounds.  */
 839   target = lambda_matrix_new (depth, depth);
 840   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 841
 842   target_nest = lambda_loopnest_new (depth, invariants);
 843
 844   for (i = 0; i < depth; i++)
 845     {
 846
 847       /* Get a new loop structure.  */
 848       target_loop = lambda_loop_new ();
 849       LN_LOOPS (target_nest)[i] = target_loop;
 850
 851       /* Computes the gcd of the coefficients of the linear part.  */
 852       gcd1 = gcd_vector (target[i], i);
 853
 854       /* Include the denominator in the GCD.  */
 855       gcd1 = gcd (gcd1, determinant);
 856
 857       /* Now divide through by the gcd.  */
 858       for (j = 0; j < i; j++)
 859         target[i][j] = target[i][j] / gcd1;
 860
 861       expression = lambda_linear_expression_new (depth, invariants);
 862       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 863       LLE_DENOMINATOR (expression) = determinant / gcd1;
 864       LLE_CONSTANT (expression) = 0;
 865       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 866                            invariants);
 867       LL_LINEAR_OFFSET (target_loop) = expression;
 868     }
 869
 870   /* For each loop, compute the new bounds from H.  */
 871   for (i = 0; i < depth; i++)
 872     {
 873       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 874       target_loop = LN_LOOPS (target_nest)[i];
 875       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 876       factor = LTM_MATRIX (H)[i][i];
 877
 878       /* First we do the lower bound.  */
 879       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 880
 881       for (; auxillary_expr != NULL;
 882            auxillary_expr = LLE_NEXT (auxillary_expr))
 883         {
 884           target_expr = lambda_linear_expression_new (depth, invariants);
 885           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 886                                      depth, inverse, depth,
 887                                      LLE_COEFFICIENTS (target_expr));
 888           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 889                                     LLE_COEFFICIENTS (target_expr), depth,
 890                                     factor);
 891
 892           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 893           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 894                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 895                               invariants);
 896           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 898                                     invariants, factor);
 899           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 900
 901           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 902             {
 903               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 904                 * determinant;
 905               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 906                                         (target_expr),
 907                                         LLE_INVARIANT_COEFFICIENTS
 908                                         (target_expr), invariants,
 909                                         determinant);
 910               LLE_DENOMINATOR (target_expr) =
 911                 LLE_DENOMINATOR (target_expr) * determinant;
 912             }
 913           /* Find the gcd and divide by it here, rather than doing it
 914              at the tree level.  */
 915           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 916           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 917                              invariants);
 918           gcd1 = gcd (gcd1, gcd2);
 919           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 920           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 921           for (j = 0; j < depth; j++)
 922             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 923           for (j = 0; j < invariants; j++)
 924             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 925           LLE_CONSTANT (target_expr) /= gcd1;
 926           LLE_DENOMINATOR (target_expr) /= gcd1;
 927           /* Ignore if identical to existing bound.  */
 928           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 929                           invariants))
 930             {
 931               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 932               LL_LOWER_BOUND (target_loop) = target_expr;
 933             }
 934         }
 935       /* Now do the upper bound.  */
 936       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 937
 938       for (; auxillary_expr != NULL;
 939            auxillary_expr = LLE_NEXT (auxillary_expr))
 940         {
 941           target_expr = lambda_linear_expression_new (depth, invariants);
 942           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 943                                      depth, inverse, depth,
 944                                      LLE_COEFFICIENTS (target_expr));
 945           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 946                                     LLE_COEFFICIENTS (target_expr), depth,
 947                                     factor);
 948           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 949           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 950                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 951                               invariants);
 952           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 954                                     invariants, factor);
 955           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 956
 957           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 958             {
 959               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 960                 * determinant;
 961               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 962                                         (target_expr),
 963                                         LLE_INVARIANT_COEFFICIENTS
 964                                         (target_expr), invariants,
 965                                         determinant);
 966               LLE_DENOMINATOR (target_expr) =
 967                 LLE_DENOMINATOR (target_expr) * determinant;
 968             }
 969           /* Find the gcd and divide by it here, instead of at the
 970              tree level.  */
 971           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 972           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 973                              invariants);
 974           gcd1 = gcd (gcd1, gcd2);
 975           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 976           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 977           for (j = 0; j < depth; j++)
 978             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 979           for (j = 0; j < invariants; j++)
 980             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 981           LLE_CONSTANT (target_expr) /= gcd1;
 982           LLE_DENOMINATOR (target_expr) /= gcd1;
 983           /* Ignore if equal to existing bound.  */
 984           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 985                           invariants))
 986             {
 987               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 988               LL_UPPER_BOUND (target_loop) = target_expr;
 989             }
 990         }
 991     }
 992   for (i = 0; i < depth; i++)
 993     {
 994       target_loop = LN_LOOPS (target_nest)[i];
 995       /* If necessary, exchange the upper and lower bounds and negate
 996          the step size.  */
 997       if (stepsigns[i] < 0)
 998         {
 999           LL_STEP (target_loop) *= -1;
1000           tmp_expr = LL_LOWER_BOUND (target_loop);
1001           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1002           LL_UPPER_BOUND (target_loop) = tmp_expr;
1003         }
1004     }
1005   return target_nest;
1006 }
1007
1008 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1009    result.  */
1010
1011 static lambda_vector
1012 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1013 {
1014   lambda_matrix matrix, H;
1015   int size;
1016   lambda_vector newsteps;
1017   int i, j, factor, minimum_column;
1018   int temp;
1019
1020   matrix = LTM_MATRIX (trans);
1021   size = LTM_ROWSIZE (trans);
1022   H = lambda_matrix_new (size, size);
1023
1024   newsteps = lambda_vector_new (size);
1025   lambda_vector_copy (stepsigns, newsteps, size);
1026
1027   lambda_matrix_copy (matrix, H, size, size);
1028
1029   for (j = 0; j < size; j++)
1030     {
1031       lambda_vector row;
1032       row = H[j];
1033       for (i = j; i < size; i++)
1034         if (row[i] < 0)
1035           lambda_matrix_col_negate (H, size, i);
1036       while (lambda_vector_first_nz (row, size, j + 1) < size)
1037         {
1038           minimum_column = lambda_vector_min_nz (row, size, j);
1039           lambda_matrix_col_exchange (H, size, j, minimum_column);
1040
1041           temp = newsteps[j];
1042           newsteps[j] = newsteps[minimum_column];
1043           newsteps[minimum_column] = temp;
1044
1045           for (i = j + 1; i < size; i++)
1046             {
1047               factor = row[i] / row[j];
1048               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1049             }
1050         }
1051     }
1052   return newsteps;
1053 }
1054
1055 /* Transform NEST according to TRANS, and return the new loopnest.
1056    This involves
1057    1. Computing a lattice base for the transformation
1058    2. Composing the dense base with the specified transformation (TRANS)
1059    3. Decomposing the combined transformation into a lower triangular portion,
1060    and a unimodular portion.
1061    4. Computing the auxiliary nest using the unimodular portion.
1062    5. Computing the target nest using the auxiliary nest and the lower
1063    triangular portion.  */
1064
1065 lambda_loopnest
1066 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1067 {
1068   lambda_loopnest auxillary_nest, target_nest;
1069
1070   int depth, invariants;
1071   int i, j;
1072   lambda_lattice lattice;
1073   lambda_trans_matrix trans1, H, U;
1074   lambda_loop loop;
1075   lambda_linear_expression expression;
1076   lambda_vector origin;
1077   lambda_matrix origin_invariants;
1078   lambda_vector stepsigns;
1079   int f;
1080
1081   depth = LN_DEPTH (nest);
1082   invariants = LN_INVARIANTS (nest);
1083
1084   /* Keep track of the signs of the loop steps.  */
1085   stepsigns = lambda_vector_new (depth);
1086   for (i = 0; i < depth; i++)
1087     {
1088       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1089         stepsigns[i] = 1;
1090       else
1091         stepsigns[i] = -1;
1092     }
1093
1094   /* Compute the lattice base.  */
1095   lattice = lambda_lattice_compute_base (nest);
1096   trans1 = lambda_trans_matrix_new (depth, depth);
1097
1098   /* Multiply the transformation matrix by the lattice base.  */
1099
1100   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1101                       LTM_MATRIX (trans1), depth, depth, depth);
1102
1103   /* Compute the Hermite normal form for the new transformation matrix.  */
1104   H = lambda_trans_matrix_new (depth, depth);
1105   U = lambda_trans_matrix_new (depth, depth);
1106   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1107                          LTM_MATRIX (U));
1108
1109   /* Compute the auxiliary loop nest's space from the unimodular
1110      portion.  */
1111   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1112
1113   /* Compute the loop step signs from the old step signs and the
1114      transformation matrix.  */
1115   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1116
1117   /* Compute the target loop nest space from the auxiliary nest and
1118      the lower triangular matrix H.  */
1119   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1120   origin = lambda_vector_new (depth);
1121   origin_invariants = lambda_matrix_new (depth, invariants);
1122   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1123                              LATTICE_ORIGIN (lattice), origin);
1124   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1125                       origin_invariants, depth, depth, invariants);
1126
1127   for (i = 0; i < depth; i++)
1128     {
1129       loop = LN_LOOPS (target_nest)[i];
1130       expression = LL_LINEAR_OFFSET (loop);
1131       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1132         f = 1;
1133       else
1134         f = LLE_DENOMINATOR (expression);
1135
1136       LLE_CONSTANT (expression) += f * origin[i];
1137
1138       for (j = 0; j < invariants; j++)
1139         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1140           f * origin_invariants[i][j];
1141     }
1142
1143   return target_nest;
1144
1145 }
1146
1147 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148    return the new expression.  DEPTH is the depth of the loopnest.
1149    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1151    is the amount we have to add/subtract from the expression because of the
1152    type of comparison it is used in.  */
1153
1154 static lambda_linear_expression
1155 gcc_tree_to_linear_expression (int depth, tree expr,
1156                                VEC(tree,heap) *outerinductionvars,
1157                                VEC(tree,heap) *invariants, int extra)
1158 {
1159   lambda_linear_expression lle = NULL;
1160   switch (TREE_CODE (expr))
1161     {
1162     case INTEGER_CST:
1163       {
1164         lle = lambda_linear_expression_new (depth, 2 * depth);
1165         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1166         if (extra != 0)
1167           LLE_CONSTANT (lle) += extra;
1168
1169         LLE_DENOMINATOR (lle) = 1;
1170       }
1171       break;
1172     case SSA_NAME:
1173       {
1174         tree iv, invar;
1175         size_t i;
1176         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1177           if (iv != NULL)
1178             {
1179               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1180                 {
1181                   lle = lambda_linear_expression_new (depth, 2 * depth);
1182                   LLE_COEFFICIENTS (lle)[i] = 1;
1183                   if (extra != 0)
1184                     LLE_CONSTANT (lle) = extra;
1185
1186                   LLE_DENOMINATOR (lle) = 1;
1187                 }
1188             }
1189         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1190           if (invar != NULL)
1191             {
1192               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1193                 {
1194                   lle = lambda_linear_expression_new (depth, 2 * depth);
1195                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1196                   if (extra != 0)
1197                     LLE_CONSTANT (lle) = extra;
1198                   LLE_DENOMINATOR (lle) = 1;
1199                 }
1200             }
1201       }
1202       break;
1203     default:
1204       return NULL;
1205     }
1206
1207   return lle;
1208 }
1209
1210 /* Return the depth of the loopnest NEST */
1211
1212 static int
1213 depth_of_nest (struct loop *nest)
1214 {
1215   size_t depth = 0;
1216   while (nest)
1217     {
1218       depth++;
1219       nest = nest->inner;
1220     }
1221   return depth;
1222 }
1223
1224
1225 /* Return true if OP is invariant in LOOP and all outer loops.  */
1226
1227 static bool
1228 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1229 {
1230   if (is_gimple_min_invariant (op))
1231     return true;
1232   if (loop->depth == 0)
1233     return true;
1234   if (!expr_invariant_in_loop_p (loop, op))
1235     return false;
1236   if (loop->outer
1237       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1238     return false;
1239   return true;
1240 }
1241
1242 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1243    or NULL if it could not be converted.
1244    DEPTH is the depth of the loop.
1245    INVARIANTS is a pointer to the array of loop invariants.
1246    The induction variable for this loop should be stored in the parameter
1247    OURINDUCTIONVAR.
1248    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1249
1250 static lambda_loop
1251 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1252                          VEC(tree,heap) ** invariants,
1253                          tree * ourinductionvar,
1254                          VEC(tree,heap) * outerinductionvars,
1255                          VEC(tree,heap) ** lboundvars,
1256                          VEC(tree,heap) ** uboundvars,
1257                          VEC(int,heap) ** steps)
1258 {
1259   tree phi;
1260   tree exit_cond;
1261   tree access_fn, inductionvar;
1262   tree step;
1263   lambda_loop lloop = NULL;
1264   lambda_linear_expression lbound, ubound;
1265   tree test;
1266   int stepint;
1267   int extra = 0;
1268   tree lboundvar, uboundvar, uboundresult;
1269
1270   /* Find out induction var and exit condition.  */
1271   inductionvar = find_induction_var_from_exit_cond (loop);
1272   exit_cond = get_loop_exit_condition (loop);
1273
1274   if (inductionvar == NULL || exit_cond == NULL)
1275     {
1276       if (dump_file && (dump_flags & TDF_DETAILS))
1277         fprintf (dump_file,
1278                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1279       return NULL;
1280     }
1281
1282   test = TREE_OPERAND (exit_cond, 0);
1283
1284   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1285     {
1286
1287       if (dump_file && (dump_flags & TDF_DETAILS))
1288         fprintf (dump_file,
1289                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
1291       return NULL;
1292     }
1293
1294   phi = SSA_NAME_DEF_STMT (inductionvar);
1295   if (TREE_CODE (phi) != PHI_NODE)
1296     {
1297       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1298       if (!phi)
1299         {
1300
1301           if (dump_file && (dump_flags & TDF_DETAILS))
1302             fprintf (dump_file,
1303                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1304
1305           return NULL;
1306         }
1307
1308       phi = SSA_NAME_DEF_STMT (phi);
1309       if (TREE_CODE (phi) != PHI_NODE)
1310         {
1311
1312           if (dump_file && (dump_flags & TDF_DETAILS))
1313             fprintf (dump_file,
1314                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1315           return NULL;
1316         }
1317
1318     }
1319
1320   /* The induction variable name/version we want to put in the array is the
1321      result of the induction variable phi node.  */
1322   *ourinductionvar = PHI_RESULT (phi);
1323   access_fn = instantiate_parameters
1324     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1325   if (access_fn == chrec_dont_know)
1326     {
1327       if (dump_file && (dump_flags & TDF_DETAILS))
1328         fprintf (dump_file,
1329                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1330
1331       return NULL;
1332     }
1333
1334   step = evolution_part_in_loop_num (access_fn, loop->num);
1335   if (!step || step == chrec_dont_know)
1336     {
1337       if (dump_file && (dump_flags & TDF_DETAILS))
1338         fprintf (dump_file,
1339                  "Unable to convert loop: Cannot determine step of loop.\n");
1340
1341       return NULL;
1342     }
1343   if (TREE_CODE (step) != INTEGER_CST)
1344     {
1345
1346       if (dump_file && (dump_flags & TDF_DETAILS))
1347         fprintf (dump_file,
1348                  "Unable to convert loop: Step of loop is not integer.\n");
1349       return NULL;
1350     }
1351
1352   stepint = TREE_INT_CST_LOW (step);
1353
1354   /* Only want phis for induction vars, which will have two
1355      arguments.  */
1356   if (PHI_NUM_ARGS (phi) != 2)
1357     {
1358       if (dump_file && (dump_flags & TDF_DETAILS))
1359         fprintf (dump_file,
1360                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1361       return NULL;
1362     }
1363
1364   /* Another induction variable check. One argument's source should be
1365      in the loop, one outside the loop.  */
1366   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1367       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1368     {
1369
1370       if (dump_file && (dump_flags & TDF_DETAILS))
1371         fprintf (dump_file,
1372                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1373
1374       return NULL;
1375     }
1376
1377   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1378     {
1379       lboundvar = PHI_ARG_DEF (phi, 1);
1380       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1381                                               outerinductionvars, *invariants,
1382                                               0);
1383     }
1384   else
1385     {
1386       lboundvar = PHI_ARG_DEF (phi, 0);
1387       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1388                                               outerinductionvars, *invariants,
1389                                               0);
1390     }
1391
1392   if (!lbound)
1393     {
1394
1395       if (dump_file && (dump_flags & TDF_DETAILS))
1396         fprintf (dump_file,
1397                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1398
1399       return NULL;
1400     }
1401   /* One part of the test may be a loop invariant tree.  */
1402   VEC_reserve (tree, heap, *invariants, 1);
1403   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1404       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1405     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1406   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1407            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1408     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1409
1410   /* The non-induction variable part of the test is the upper bound variable.
1411    */
1412   if (TREE_OPERAND (test, 0) == inductionvar)
1413     uboundvar = TREE_OPERAND (test, 1);
1414   else
1415     uboundvar = TREE_OPERAND (test, 0);
1416
1417
1418   /* We only size the vectors assuming we have, at max, 2 times as many
1419      invariants as we do loops (one for each bound).
1420      This is just an arbitrary number, but it has to be matched against the
1421      code below.  */
1422   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1423
1424
1425   /* We might have some leftover.  */
1426   if (TREE_CODE (test) == LT_EXPR)
1427     extra = -1 * stepint;
1428   else if (TREE_CODE (test) == NE_EXPR)
1429     extra = -1 * stepint;
1430   else if (TREE_CODE (test) == GT_EXPR)
1431     extra = -1 * stepint;
1432   else if (TREE_CODE (test) == EQ_EXPR)
1433     extra = 1 * stepint;
1434
1435   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1436                                           outerinductionvars,
1437                                           *invariants, extra);
1438   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1439                         build_int_cst (TREE_TYPE (uboundvar), extra));
1440   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1441   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1442   VEC_safe_push (int, heap, *steps, stepint);
1443   if (!ubound)
1444     {
1445       if (dump_file && (dump_flags & TDF_DETAILS))
1446         fprintf (dump_file,
1447                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1448       return NULL;
1449     }
1450
1451   lloop = lambda_loop_new ();
1452   LL_STEP (lloop) = stepint;
1453   LL_LOWER_BOUND (lloop) = lbound;
1454   LL_UPPER_BOUND (lloop) = ubound;
1455   return lloop;
1456 }
1457
1458 /* Given a LOOP, find the induction variable it is testing against in the exit
1459    condition.  Return the induction variable if found, NULL otherwise.  */
1460
1461 static tree
1462 find_induction_var_from_exit_cond (struct loop *loop)
1463 {
1464   tree expr = get_loop_exit_condition (loop);
1465   tree ivarop;
1466   tree test;
1467   if (expr == NULL_TREE)
1468     return NULL_TREE;
1469   if (TREE_CODE (expr) != COND_EXPR)
1470     return NULL_TREE;
1471   test = TREE_OPERAND (expr, 0);
1472   if (!COMPARISON_CLASS_P (test))
1473     return NULL_TREE;
1474
1475   /* Find the side that is invariant in this loop. The ivar must be the other
1476      side.  */
1477
1478   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1479       ivarop = TREE_OPERAND (test, 1);
1480   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1481       ivarop = TREE_OPERAND (test, 0);
1482   else
1483     return NULL_TREE;
1484
1485   if (TREE_CODE (ivarop) != SSA_NAME)
1486     return NULL_TREE;
1487   return ivarop;
1488 }
1489
1490 DEF_VEC_P(lambda_loop);
1491 DEF_VEC_ALLOC_P(lambda_loop,heap);
1492
1493 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1494    Return the new loop nest.
1495    INDUCTIONVARS is a pointer to an array of induction variables for the
1496    loopnest that will be filled in during this process.
1497    INVARIANTS is a pointer to an array of invariants that will be filled in
1498    during this process.  */
1499
1500 lambda_loopnest
1501 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1502                                  struct loop * loop_nest,
1503                                  VEC(tree,heap) **inductionvars,
1504                                  VEC(tree,heap) **invariants,
1505                                  bool need_perfect_nest)
1506 {
1507   lambda_loopnest ret = NULL;
1508   struct loop *temp;
1509   int depth = 0;
1510   size_t i;
1511   VEC(lambda_loop,heap) *loops = NULL;
1512   VEC(tree,heap) *uboundvars = NULL;
1513   VEC(tree,heap) *lboundvars  = NULL;
1514   VEC(int,heap) *steps = NULL;
1515   lambda_loop newloop;
1516   tree inductionvar = NULL;
1517
1518   depth = depth_of_nest (loop_nest);
1519   temp = loop_nest;
1520   while (temp)
1521     {
1522       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1523                                          &inductionvar, *inductionvars,
1524                                          &lboundvars, &uboundvars,
1525                                          &steps);
1526       if (!newloop)
1527         return NULL;
1528       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1529       VEC_safe_push (lambda_loop, heap, loops, newloop);
1530       temp = temp->inner;
1531     }
1532   if (need_perfect_nest)
1533     {
1534       if (!perfect_nestify (currloops, loop_nest,
1535                             lboundvars, uboundvars, steps, *inductionvars))
1536         {
1537           if (dump_file)
1538             fprintf (dump_file,
1539                      "Not a perfect loop nest and couldn't convert to one.\n");
1540           goto fail;
1541         }
1542       else if (dump_file)
1543         fprintf (dump_file,
1544                  "Successfully converted loop nest to perfect loop nest.\n");
1545     }
1546   ret = lambda_loopnest_new (depth, 2 * depth);
1547   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1548     LN_LOOPS (ret)[i] = newloop;
1549  fail:
1550   VEC_free (lambda_loop, heap, loops);
1551   VEC_free (tree, heap, uboundvars);
1552   VEC_free (tree, heap, lboundvars);
1553   VEC_free (int, heap, steps);
1554
1555   return ret;
1556 }
1557
1558 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1559    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1560    inserted for us are stored.  INDUCTION_VARS is the array of induction
1561    variables for the loop this LBV is from.  TYPE is the tree type to use for
1562    the variables and trees involved.  */
1563
1564 static tree
1565 lbv_to_gcc_expression (lambda_body_vector lbv,
1566                        tree type, VEC(tree,heap) *induction_vars,
1567                        tree *stmts_to_insert)
1568 {
1569   tree stmts, stmt, resvar, name;
1570   tree iv;
1571   size_t i;
1572   tree_stmt_iterator tsi;
1573
1574   /* Create a statement list and a linear expression temporary.  */
1575   stmts = alloc_stmt_list ();
1576   resvar = create_tmp_var (type, "lbvtmp");
1577   add_referenced_tmp_var (resvar);
1578
1579   /* Start at 0.  */
1580   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1581   name = make_ssa_name (resvar, stmt);
1582   TREE_OPERAND (stmt, 0) = name;
1583   tsi = tsi_last (stmts);
1584   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1585
1586   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1587     {
1588       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1589         {
1590           tree newname;
1591           tree coeffmult;
1592
1593           /* newname = coefficient * induction_variable */
1594           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1595           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1596                         fold (build (MULT_EXPR, type, iv, coeffmult)));
1597
1598           newname = make_ssa_name (resvar, stmt);
1599           TREE_OPERAND (stmt, 0) = newname;
1600           fold_stmt (&stmt);
1601           tsi = tsi_last (stmts);
1602           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1603
1604           /* name = name + newname */
1605           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1606                         build (PLUS_EXPR, type, name, newname));
1607           name = make_ssa_name (resvar, stmt);
1608           TREE_OPERAND (stmt, 0) = name;
1609           fold_stmt (&stmt);
1610           tsi = tsi_last (stmts);
1611           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1612
1613         }
1614     }
1615
1616   /* Handle any denominator that occurs.  */
1617   if (LBV_DENOMINATOR (lbv) != 1)
1618     {
1619       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1620       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1621                     build (CEIL_DIV_EXPR, type, name, denominator));
1622       name = make_ssa_name (resvar, stmt);
1623       TREE_OPERAND (stmt, 0) = name;
1624       fold_stmt (&stmt);
1625       tsi = tsi_last (stmts);
1626       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1627     }
1628   *stmts_to_insert = stmts;
1629   return name;
1630 }
1631
1632 /* Convert a linear expression from coefficient and constant form to a
1633    gcc tree.
1634    Return the tree that represents the final value of the expression.
1635    LLE is the linear expression to convert.
1636    OFFSET is the linear offset to apply to the expression.
1637    TYPE is the tree type to use for the variables and math.
1638    INDUCTION_VARS is a vector of induction variables for the loops.
1639    INVARIANTS is a vector of the loop nest invariants.
1640    WRAP specifies what tree code to wrap the results in, if there is more than
1641    one (it is either MAX_EXPR, or MIN_EXPR).
1642    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1643    statements that need to be inserted for the linear expression.  */
1644
1645 static tree
1646 lle_to_gcc_expression (lambda_linear_expression lle,
1647                        lambda_linear_expression offset,
1648                        tree type,
1649                        VEC(tree,heap) *induction_vars,
1650                        VEC(tree,heap) *invariants,
1651                        enum tree_code wrap, tree *stmts_to_insert)
1652 {
1653   tree stmts, stmt, resvar, name;
1654   size_t i;
1655   tree_stmt_iterator tsi;
1656   tree iv, invar;
1657   VEC(tree,heap) *results = NULL;
1658
1659   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1660   name = NULL_TREE;
1661   /* Create a statement list and a linear expression temporary.  */
1662   stmts = alloc_stmt_list ();
1663   resvar = create_tmp_var (type, "lletmp");
1664   add_referenced_tmp_var (resvar);
1665
1666   /* Build up the linear expressions, and put the variable representing the
1667      result in the results array.  */
1668   for (; lle != NULL; lle = LLE_NEXT (lle))
1669     {
1670       /* Start at name = 0.  */
1671       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1672       name = make_ssa_name (resvar, stmt);
1673       TREE_OPERAND (stmt, 0) = name;
1674       fold_stmt (&stmt);
1675       tsi = tsi_last (stmts);
1676       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1677
1678       /* First do the induction variables.
1679          at the end, name = name + all the induction variables added
1680          together.  */
1681       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1682         {
1683           if (LLE_COEFFICIENTS (lle)[i] != 0)
1684             {
1685               tree newname;
1686               tree mult;
1687               tree coeff;
1688
1689               /* mult = induction variable * coefficient.  */
1690               if (LLE_COEFFICIENTS (lle)[i] == 1)
1691                 {
1692                   mult = VEC_index (tree, induction_vars, i);
1693                 }
1694               else
1695                 {
1696                   coeff = build_int_cst (type,
1697                                          LLE_COEFFICIENTS (lle)[i]);
1698                   mult = fold (build (MULT_EXPR, type, iv, coeff));
1699                 }
1700
1701               /* newname = mult */
1702               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1703               newname = make_ssa_name (resvar, stmt);
1704               TREE_OPERAND (stmt, 0) = newname;
1705               fold_stmt (&stmt);
1706               tsi = tsi_last (stmts);
1707               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1708
1709               /* name = name + newname */
1710               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1711                             build (PLUS_EXPR, type, name, newname));
1712               name = make_ssa_name (resvar, stmt);
1713               TREE_OPERAND (stmt, 0) = name;
1714               fold_stmt (&stmt);
1715               tsi = tsi_last (stmts);
1716               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1717             }
1718         }
1719
1720       /* Handle our invariants.
1721          At the end, we have name = name + result of adding all multiplied
1722          invariants.  */
1723       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1724         {
1725           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1726             {
1727               tree newname;
1728               tree mult;
1729               tree coeff;
1730               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1731               /* mult = invariant * coefficient  */
1732               if (invcoeff == 1)
1733                 {
1734                   mult = invar;
1735                 }
1736               else
1737                 {
1738                   coeff = build_int_cst (type, invcoeff);
1739                   mult = fold (build (MULT_EXPR, type, invar, coeff));
1740                 }
1741
1742               /* newname = mult */
1743               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1744               newname = make_ssa_name (resvar, stmt);
1745               TREE_OPERAND (stmt, 0) = newname;
1746               fold_stmt (&stmt);
1747               tsi = tsi_last (stmts);
1748               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1749
1750               /* name = name + newname */
1751               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1752                             build (PLUS_EXPR, type, name, newname));
1753               name = make_ssa_name (resvar, stmt);
1754               TREE_OPERAND (stmt, 0) = name;
1755               fold_stmt (&stmt);
1756               tsi = tsi_last (stmts);
1757               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1758             }
1759         }
1760
1761       /* Now handle the constant.
1762          name = name + constant.  */
1763       if (LLE_CONSTANT (lle) != 0)
1764         {
1765           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1766                         build (PLUS_EXPR, type, name,
1767                                build_int_cst (type, LLE_CONSTANT (lle))));
1768           name = make_ssa_name (resvar, stmt);
1769           TREE_OPERAND (stmt, 0) = name;
1770           fold_stmt (&stmt);
1771           tsi = tsi_last (stmts);
1772           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1773         }
1774
1775       /* Now handle the offset.
1776          name = name + linear offset.  */
1777       if (LLE_CONSTANT (offset) != 0)
1778         {
1779           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1780                         build (PLUS_EXPR, type, name,
1781                                build_int_cst (type, LLE_CONSTANT (offset))));
1782           name = make_ssa_name (resvar, stmt);
1783           TREE_OPERAND (stmt, 0) = name;
1784           fold_stmt (&stmt);
1785           tsi = tsi_last (stmts);
1786           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1787         }
1788
1789       /* Handle any denominator that occurs.  */
1790       if (LLE_DENOMINATOR (lle) != 1)
1791         {
1792           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1793           stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1794                         type, name, stmt);
1795           stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1796
1797           /* name = {ceil, floor}(name/denominator) */
1798           name = make_ssa_name (resvar, stmt);
1799           TREE_OPERAND (stmt, 0) = name;
1800           tsi = tsi_last (stmts);
1801           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1802         }
1803       VEC_safe_push (tree, heap, results, name);
1804     }
1805
1806   /* Again, out of laziness, we don't handle this case yet.  It's not
1807      hard, it just hasn't occurred.  */
1808   gcc_assert (VEC_length (tree, results) <= 2);
1809
1810   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1811   if (VEC_length (tree, results) > 1)
1812     {
1813       tree op1 = VEC_index (tree, results, 0);
1814       tree op2 = VEC_index (tree, results, 1);
1815       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1816                     build (wrap, type, op1, op2));
1817       name = make_ssa_name (resvar, stmt);
1818       TREE_OPERAND (stmt, 0) = name;
1819       tsi = tsi_last (stmts);
1820       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1821     }
1822
1823   VEC_free (tree, heap, results);
1824
1825   *stmts_to_insert = stmts;
1826   return name;
1827 }
1828
1829 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1830    it, back into gcc code.  This changes the
1831    loops, their induction variables, and their bodies, so that they
1832    match the transformed loopnest.
1833    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1834    loopnest.
1835    OLD_IVS is a vector of induction variables from the old loopnest.
1836    INVARIANTS is a vector of loop invariants from the old loopnest.
1837    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1838    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1839    NEW_LOOPNEST.  */
1840
1841 void
1842 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1843                                  VEC(tree,heap) *old_ivs,
1844                                  VEC(tree,heap) *invariants,
1845                                  lambda_loopnest new_loopnest,
1846                                  lambda_trans_matrix transform)
1847 {
1848   struct loop *temp;
1849   size_t i = 0;
1850   size_t depth = 0;
1851   VEC(tree,heap) *new_ivs = NULL;
1852   tree oldiv;
1853
1854   block_stmt_iterator bsi;
1855
1856   if (dump_file)
1857     {
1858       transform = lambda_trans_matrix_inverse (transform);
1859       fprintf (dump_file, "Inverse of transformation matrix:\n");
1860       print_lambda_trans_matrix (dump_file, transform);
1861     }
1862   depth = depth_of_nest (old_loopnest);
1863   temp = old_loopnest;
1864
1865   while (temp)
1866     {
1867       lambda_loop newloop;
1868       basic_block bb;
1869       edge exit;
1870       tree ivvar, ivvarinced, exitcond, stmts;
1871       enum tree_code testtype;
1872       tree newupperbound, newlowerbound;
1873       lambda_linear_expression offset;
1874       tree type;
1875       bool insert_after;
1876       tree inc_stmt;
1877
1878       oldiv = VEC_index (tree, old_ivs, i);
1879       type = TREE_TYPE (oldiv);
1880
1881       /* First, build the new induction variable temporary  */
1882
1883       ivvar = create_tmp_var (type, "lnivtmp");
1884       add_referenced_tmp_var (ivvar);
1885
1886       VEC_safe_push (tree, heap, new_ivs, ivvar);
1887
1888       newloop = LN_LOOPS (new_loopnest)[i];
1889
1890       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1891          cases for now.  */
1892       offset = LL_LINEAR_OFFSET (newloop);
1893
1894       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1895                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1896
1897       /* Now build the  new lower bounds, and insert the statements
1898          necessary to generate it on the loop preheader.  */
1899       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1900                                              LL_LINEAR_OFFSET (newloop),
1901                                              type,
1902                                              new_ivs,
1903                                              invariants, MAX_EXPR, &stmts);
1904       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1905       bsi_commit_edge_inserts ();
1906       /* Build the new upper bound and insert its statements in the
1907          basic block of the exit condition */
1908       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1909                                              LL_LINEAR_OFFSET (newloop),
1910                                              type,
1911                                              new_ivs,
1912                                              invariants, MIN_EXPR, &stmts);
1913       exit = temp->single_exit;
1914       exitcond = get_loop_exit_condition (temp);
1915       bb = bb_for_stmt (exitcond);
1916       bsi = bsi_start (bb);
1917       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1918
1919       /* Create the new iv.  */
1920
1921       standard_iv_increment_position (temp, &bsi, &insert_after);
1922       create_iv (newlowerbound,
1923                  build_int_cst (type, LL_STEP (newloop)),
1924                  ivvar, temp, &bsi, insert_after, &ivvar,
1925                  NULL);
1926
1927       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1928          dominate the block containing the exit condition.
1929          So we simply create our own incremented iv to use in the new exit
1930          test,  and let redundancy elimination sort it out.  */
1931       inc_stmt = build (PLUS_EXPR, type,
1932                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1933       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1934                         inc_stmt);
1935       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1936       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1937       bsi = bsi_for_stmt (exitcond);
1938       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1939
1940       /* Replace the exit condition with the new upper bound
1941          comparison.  */
1942
1943       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1944
1945       /* We want to build a conditional where true means exit the loop, and
1946          false means continue the loop.
1947          So swap the testtype if this isn't the way things are.*/
1948
1949       if (exit->flags & EDGE_FALSE_VALUE)
1950         testtype = swap_tree_comparison (testtype);
1951
1952       COND_EXPR_COND (exitcond) = build (testtype,
1953                                          boolean_type_node,
1954                                          newupperbound, ivvarinced);
1955       update_stmt (exitcond);
1956       VEC_replace (tree, new_ivs, i, ivvar);
1957
1958       i++;
1959       temp = temp->inner;
1960     }
1961
1962   /* Rewrite uses of the old ivs so that they are now specified in terms of
1963      the new ivs.  */
1964
1965   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1966     {
1967       imm_use_iterator imm_iter;
1968       use_operand_p imm_use;
1969       tree oldiv_def;
1970       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1971
1972       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1973         oldiv_def = PHI_RESULT (oldiv_stmt);
1974       else
1975         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1976       gcc_assert (oldiv_def != NULL_TREE);
1977
1978       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1979         {
1980           tree stmt = USE_STMT (imm_use);
1981           use_operand_p use_p;
1982           ssa_op_iter iter;
1983           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1984           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1985             {
1986               if (USE_FROM_PTR (use_p) == oldiv)
1987                 {
1988                   tree newiv, stmts;
1989                   lambda_body_vector lbv, newlbv;
1990                   /* Compute the new expression for the induction
1991                      variable.  */
1992                   depth = VEC_length (tree, new_ivs);
1993                   lbv = lambda_body_vector_new (depth);
1994                   LBV_COEFFICIENTS (lbv)[i] = 1;
1995
1996                   newlbv = lambda_body_vector_compute_new (transform, lbv);
1997
1998                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1999                                                  new_ivs, &stmts);
2000                   bsi = bsi_for_stmt (stmt);
2001                   /* Insert the statements to build that
2002                      expression.  */
2003                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2004                   propagate_value (use_p, newiv);
2005                   update_stmt (stmt);
2006
2007                 }
2008             }
2009         }
2010     }
2011   VEC_free (tree, heap, new_ivs);
2012 }
2013
2014 /* Returns true when the vector V is lexicographically positive, in
2015    other words, when the first nonzero element is positive.  */
2016
2017 static bool
2018 lambda_vector_lexico_pos (lambda_vector v,
2019                           unsigned n)
2020 {
2021   unsigned i;
2022   for (i = 0; i < n; i++)
2023     {
2024       if (v[i] == 0)
2025         continue;
2026       if (v[i] < 0)
2027         return false;
2028       if (v[i] > 0)
2029         return true;
2030     }
2031   return true;
2032 }
2033
2034
2035 /* Return TRUE if this is not interesting statement from the perspective of
2036    determining if we have a perfect loop nest.  */
2037
2038 static bool
2039 not_interesting_stmt (tree stmt)
2040 {
2041   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2042      loop, we would have already failed the number of exits tests.  */
2043   if (TREE_CODE (stmt) == LABEL_EXPR
2044       || TREE_CODE (stmt) == GOTO_EXPR
2045       || TREE_CODE (stmt) == COND_EXPR)
2046     return true;
2047   return false;
2048 }
2049
2050 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2051
2052 static bool
2053 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2054 {
2055   int i;
2056   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2057     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2058       if (PHI_ARG_DEF (phi, i) == def)
2059         return true;
2060   return false;
2061 }
2062
2063 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2064
2065 static bool
2066 stmt_uses_phi_result (tree stmt, tree phi_result)
2067 {
2068   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2069
2070   /* This is conservatively true, because we only want SIMPLE bumpers
2071      of the form x +- constant for our pass.  */
2072   return (use == phi_result);
2073 }
2074
2075 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2076    in-loop-edge in a phi node, and the operand it uses is the result of that
2077    phi node.
2078    I.E. i_29 = i_3 + 1
2079         i_3 = PHI (0, i_29);  */
2080
2081 static bool
2082 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2083 {
2084   tree use;
2085   tree def;
2086   imm_use_iterator iter;
2087   use_operand_p use_p;
2088
2089   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2090   if (!def)
2091     return false;
2092
2093   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2094     {
2095       use = USE_STMT (use_p);
2096       if (TREE_CODE (use) == PHI_NODE)
2097         {
2098           if (phi_loop_edge_uses_def (loop, use, def))
2099             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2100               return true;
2101         }
2102     }
2103   return false;
2104 }
2105
2106
2107 /* Return true if LOOP is a perfect loop nest.
2108    Perfect loop nests are those loop nests where all code occurs in the
2109    innermost loop body.
2110    If S is a program statement, then
2111
2112    i.e.
2113    DO I = 1, 20
2114        S1
2115        DO J = 1, 20
2116        ...
2117        END DO
2118    END DO
2119    is not a perfect loop nest because of S1.
2120
2121    DO I = 1, 20
2122       DO J = 1, 20
2123         S1
2124         ...
2125       END DO
2126    END DO
2127    is a perfect loop nest.
2128
2129    Since we don't have high level loops anymore, we basically have to walk our
2130    statements and ignore those that are there because the loop needs them (IE
2131    the induction variable increment, and jump back to the top of the loop).  */
2132
2133 bool
2134 perfect_nest_p (struct loop *loop)
2135 {
2136   basic_block *bbs;
2137   size_t i;
2138   tree exit_cond;
2139
2140   if (!loop->inner)
2141     return true;
2142   bbs = get_loop_body (loop);
2143   exit_cond = get_loop_exit_condition (loop);
2144   for (i = 0; i < loop->num_nodes; i++)
2145     {
2146       if (bbs[i]->loop_father == loop)
2147         {
2148           block_stmt_iterator bsi;
2149           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2150             {
2151               tree stmt = bsi_stmt (bsi);
2152               if (stmt == exit_cond
2153                   || not_interesting_stmt (stmt)
2154                   || stmt_is_bumper_for_loop (loop, stmt))
2155                 continue;
2156               free (bbs);
2157               return false;
2158             }
2159         }
2160     }
2161   free (bbs);
2162   /* See if the inner loops are perfectly nested as well.  */
2163   if (loop->inner)
2164     return perfect_nest_p (loop->inner);
2165   return true;
2166 }
2167
2168 /* Replace the USES of tree X in STMT with tree Y */
2169
2170 static void
2171 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2172 {
2173   ssa_op_iter iter;
2174   use_operand_p use_p;
2175
2176   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2177     {
2178       if (USE_FROM_PTR (use_p) == x)
2179         SET_USE (use_p, y);
2180     }
2181 }
2182
2183 /* Return TRUE if STMT uses tree OP in it's uses.  */
2184
2185 static bool
2186 stmt_uses_op (tree stmt, tree op)
2187 {
2188   ssa_op_iter iter;
2189   tree use;
2190
2191   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2192     {
2193       if (use == op)
2194         return true;
2195     }
2196   return false;
2197 }
2198
2199 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2200    one.  LOOPIVS is a vector of induction variables, one per loop.
2201    ATM, we only handle imperfect nests of depth 2, where all of the statements
2202    occur after the inner loop.  */
2203
2204 static bool
2205 can_convert_to_perfect_nest (struct loop *loop,
2206                              VEC(tree,heap) *loopivs)
2207 {
2208   basic_block *bbs;
2209   tree exit_condition, phi;
2210   size_t i;
2211   block_stmt_iterator bsi;
2212   basic_block exitdest;
2213
2214   /* Can't handle triply nested+ loops yet.  */
2215   if (!loop->inner || loop->inner->inner)
2216     return false;
2217
2218   /* We only handle moving the after-inner-body statements right now, so make
2219      sure all the statements we need to move are located in that position.  */
2220   bbs = get_loop_body (loop);
2221   exit_condition = get_loop_exit_condition (loop);
2222   for (i = 0; i < loop->num_nodes; i++)
2223     {
2224       if (bbs[i]->loop_father == loop)
2225         {
2226           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2227             {
2228               size_t j;
2229               tree stmt = bsi_stmt (bsi);
2230               tree iv;
2231
2232               if (stmt == exit_condition
2233                   || not_interesting_stmt (stmt)
2234                   || stmt_is_bumper_for_loop (loop, stmt))
2235                 continue;
2236               /* If the statement uses inner loop ivs, we == screwed.  */
2237               for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2238                 if (stmt_uses_op (stmt, iv))
2239                   goto fail;
2240
2241               /* If the bb of a statement we care about isn't dominated by
2242                  the header of the inner loop, then we are also screwed.  */
2243               if (!dominated_by_p (CDI_DOMINATORS,
2244                                    bb_for_stmt (stmt),
2245                                    loop->inner->header))
2246                 goto fail;
2247             }
2248         }
2249     }
2250
2251   /* We also need to make sure the loop exit only has simple copy phis in it,
2252      otherwise we don't know how to transform it into a perfect nest right
2253      now.  */
2254   exitdest = loop->single_exit->dest;
2255
2256   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2257     if (PHI_NUM_ARGS (phi) != 1)
2258       goto fail;
2259
2260   free (bbs);
2261   return true;
2262
2263  fail:
2264   free (bbs);
2265   return false;
2266 }
2267
2268 /* Transform the loop nest into a perfect nest, if possible.
2269    LOOPS is the current struct loops *
2270    LOOP is the loop nest to transform into a perfect nest
2271    LBOUNDS are the lower bounds for the loops to transform
2272    UBOUNDS are the upper bounds for the loops to transform
2273    STEPS is the STEPS for the loops to transform.
2274    LOOPIVS is the induction variables for the loops to transform.
2275
2276    Basically, for the case of
2277
2278    FOR (i = 0; i < 50; i++)
2279     {
2280      FOR (j =0; j < 50; j++)
2281      {
2282         <whatever>
2283      }
2284      <some code>
2285     }
2286
2287    This function will transform it into a perfect loop nest by splitting the
2288    outer loop into two loops, like so:
2289
2290    FOR (i = 0; i < 50; i++)
2291    {
2292      FOR (j = 0; j < 50; j++)
2293      {
2294          <whatever>
2295      }
2296    }
2297
2298    FOR (i = 0; i < 50; i ++)
2299    {
2300     <some code>
2301    }
2302
2303    Return FALSE if we can't make this loop into a perfect nest.  */
2304 static bool
2305 perfect_nestify (struct loops *loops,
2306                  struct loop *loop,
2307                  VEC(tree,heap) *lbounds,
2308                  VEC(tree,heap) *ubounds,
2309                  VEC(int,heap) *steps,
2310                  VEC(tree,heap) *loopivs)
2311 {
2312   basic_block *bbs;
2313   tree exit_condition;
2314   tree then_label, else_label, cond_stmt;
2315   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2316   size_t i;
2317   block_stmt_iterator bsi;
2318   bool insert_after;
2319   edge e;
2320   struct loop *newloop;
2321   tree phi;
2322   tree uboundvar;
2323   tree stmt;
2324   tree oldivvar, ivvar, ivvarinced;
2325   VEC(tree,heap) *phis = NULL;
2326
2327   if (!can_convert_to_perfect_nest (loop, loopivs))
2328     return false;
2329
2330   /* Create the new loop */
2331
2332   olddest = loop->single_exit->dest;
2333   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2334   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2335
2336   /* Push the exit phi nodes that we are moving.  */
2337   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2338     {
2339       VEC_reserve (tree, heap, phis, 2);
2340       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2341       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2342     }
2343   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2344
2345   /* Remove the exit phis from the old basic block.  Make sure to set
2346      PHI_RESULT to null so it doesn't get released.  */
2347   while (phi_nodes (olddest) != NULL)
2348     {
2349       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2350       remove_phi_node (phi_nodes (olddest), NULL);
2351     }
2352
2353   /* and add them back to the new basic block.  */
2354   while (VEC_length (tree, phis) != 0)
2355     {
2356       tree def;
2357       tree phiname;
2358       def = VEC_pop (tree, phis);
2359       phiname = VEC_pop (tree, phis);
2360       phi = create_phi_node (phiname, preheaderbb);
2361       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2362     }
2363   flush_pending_stmts (e);
2364   VEC_free (tree, heap, phis);
2365
2366   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2367   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2368   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2369   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2370   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2371   cond_stmt = build (COND_EXPR, void_type_node,
2372                      build (NE_EXPR, boolean_type_node,
2373                             integer_one_node,
2374                             integer_zero_node),
2375                      then_label, else_label);
2376   bsi = bsi_start (bodybb);
2377   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2378   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2379   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2380   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2381
2382   /* Update the loop structures.  */
2383   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2384   newloop->header = headerbb;
2385   newloop->latch = latchbb;
2386   newloop->single_exit = e;
2387   add_bb_to_loop (latchbb, newloop);
2388   add_bb_to_loop (bodybb, newloop);
2389   add_bb_to_loop (headerbb, newloop);
2390   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2391   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2392   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2393                            loop->single_exit->src);
2394   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2395   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2396   /* Create the new iv.  */
2397   ivvar = create_tmp_var (integer_type_node, "perfectiv");
2398   add_referenced_tmp_var (ivvar);
2399   standard_iv_increment_position (newloop, &bsi, &insert_after);
2400   create_iv (VEC_index (tree, lbounds, 0),
2401              build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2402              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2403
2404   /* Create the new upper bound.  This may be not just a variable, so we copy
2405      it to one just in case.  */
2406
2407   exit_condition = get_loop_exit_condition (newloop);
2408   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2409   add_referenced_tmp_var (uboundvar);
2410   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2411                 VEC_index (tree, ubounds, 0));
2412   uboundvar = make_ssa_name (uboundvar, stmt);
2413   TREE_OPERAND (stmt, 0) = uboundvar;
2414
2415   if (insert_after)
2416     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2417   else
2418     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2419
2420   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2421                                            boolean_type_node,
2422                                            uboundvar,
2423                                            ivvarinced);
2424
2425   bbs = get_loop_body (loop);
2426   /* Now replace the induction variable in the moved statements with the
2427      correct loop induction variable.  */
2428   oldivvar = VEC_index (tree, loopivs, 0);
2429   for (i = 0; i < loop->num_nodes; i++)
2430     {
2431       block_stmt_iterator tobsi = bsi_last (bodybb);
2432       if (bbs[i]->loop_father == loop)
2433         {
2434           /* Note that the bsi only needs to be explicitly incremented
2435              when we don't move something, since it is automatically
2436              incremented when we do.  */
2437           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2438             {
2439               ssa_op_iter i;
2440               tree n, stmt = bsi_stmt (bsi);
2441
2442               if (stmt == exit_condition
2443                   || not_interesting_stmt (stmt)
2444                   || stmt_is_bumper_for_loop (loop, stmt))
2445                 {
2446                   bsi_next (&bsi);
2447                   continue;
2448                 }
2449
2450               replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2451               bsi_move_before (&bsi, &tobsi);
2452
2453               /* If the statement has any virtual operands, they may
2454                  need to be rewired because the original loop may
2455                  still reference them.  */
2456               FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2457                 mark_sym_for_renaming (SSA_NAME_VAR (n));
2458             }
2459         }
2460     }
2461
2462   free (bbs);
2463   return perfect_nest_p (loop);
2464 }
2465
2466 /* Return true if TRANS is a legal transformation matrix that respects
2467    the dependence vectors in DISTS and DIRS.  The conservative answer
2468    is false.
2469
2470    "Wolfe proves that a unimodular transformation represented by the
2471    matrix T is legal when applied to a loop nest with a set of
2472    lexicographically non-negative distance vectors RDG if and only if
2473    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2474    i.e.: if and only if it transforms the lexicographically positive
2475    distance vectors to lexicographically positive vectors.  Note that
2476    a unimodular matrix must transform the zero vector (and only it) to
2477    the zero vector." S.Muchnick.  */
2478
2479 bool
2480 lambda_transform_legal_p (lambda_trans_matrix trans,
2481                           int nb_loops,
2482                           varray_type dependence_relations)
2483 {
2484   unsigned int i;
2485   lambda_vector distres;
2486   struct data_dependence_relation *ddr;
2487
2488   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2489               && LTM_ROWSIZE (trans) == nb_loops);
2490
2491   /* When there is an unknown relation in the dependence_relations, we
2492      know that it is no worth looking at this loop nest: give up.  */
2493   ddr = (struct data_dependence_relation *)
2494     VARRAY_GENERIC_PTR (dependence_relations, 0);
2495   if (ddr == NULL)
2496     return true;
2497   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2498     return false;
2499
2500   distres = lambda_vector_new (nb_loops);
2501
2502   /* For each distance vector in the dependence graph.  */
2503   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2504     {
2505       ddr = (struct data_dependence_relation *)
2506         VARRAY_GENERIC_PTR (dependence_relations, i);
2507
2508       /* Don't care about relations for which we know that there is no
2509          dependence, nor about read-read (aka. output-dependences):
2510          these data accesses can happen in any order.  */
2511       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2512           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2513         continue;
2514
2515       /* Conservatively answer: "this transformation is not valid".  */
2516       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2517         return false;
2518
2519       /* If the dependence could not be captured by a distance vector,
2520          conservatively answer that the transform is not valid.  */
2521       if (DDR_DIST_VECT (ddr) == NULL)
2522         return false;
2523
2524       /* Compute trans.dist_vect */
2525       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2526                                  DDR_DIST_VECT (ddr), distres);
2527
2528       if (!lambda_vector_lexico_pos (distres, nb_loops))
2529         return false;
2530     }
2531   return true;
2532 }