gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "errors.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformatrion framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space are the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as a integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxillary space from the unimodular part (source loop
  78  nest . U = auxillary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxillary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxillary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 /* Lattice stuff that is internal to the code generation algorithm.  */
 119
 120 typedef struct
 121 {
 122   /* Lattice base matrix.  */
 123   lambda_matrix base;
 124   /* Lattice dimension.  */
 125   int dimension;
 126   /* Origin vector for the coefficients.  */
 127   lambda_vector origin;
 128   /* Origin matrix for the invariants.  */
 129   lambda_matrix origin_invariants;
 130   /* Number of invariants.  */
 131   int invariants;
 132 } *lambda_lattice;
 133
 134 #define LATTICE_BASE(T) ((T)->base)
 135 #define LATTICE_DIMENSION(T) ((T)->dimension)
 136 #define LATTICE_ORIGIN(T) ((T)->origin)
 137 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 138 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 139
 140 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 141                        int, int);
 142 static lambda_lattice lambda_lattice_new (int, int);
 143 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 144
 145 static tree find_induction_var_from_exit_cond (struct loop *);
 146
 147 /* Create a new lambda body vector.  */
 148
 149 lambda_body_vector
 150 lambda_body_vector_new (int size)
 151 {
 152   lambda_body_vector ret;
 153
 154   ret = ggc_alloc (sizeof (*ret));
 155   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 156   LBV_SIZE (ret) = size;
 157   LBV_DENOMINATOR (ret) = 1;
 158   return ret;
 159 }
 160
 161 /* Compute the new coefficients for the vector based on the
 162   *inverse* of the transformation matrix.  */
 163
 164 lambda_body_vector
 165 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 166                                 lambda_body_vector vect)
 167 {
 168   lambda_body_vector temp;
 169   int depth;
 170
 171   /* Make sure the matrix is square.  */
 172   if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform))
 173     abort ();
 174
 175   depth = LTM_ROWSIZE (transform);
 176
 177   temp = lambda_body_vector_new (depth);
 178   LBV_DENOMINATOR (temp) =
 179     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 180   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 181                              LTM_MATRIX (transform), depth,
 182                              LBV_COEFFICIENTS (temp));
 183   LBV_SIZE (temp) = LBV_SIZE (vect);
 184   return temp;
 185 }
 186
 187 /* Print out a lambda body vector.  */
 188
 189 void
 190 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 191 {
 192   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 193 }
 194
 195 /* Return TRUE if two linear expressions are equal.  */
 196
 197 static bool
 198 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 199            int depth, int invariants)
 200 {
 201   int i;
 202
 203   if (lle1 == NULL || lle2 == NULL)
 204     return false;
 205   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 206     return false;
 207   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 208     return false;
 209   for (i = 0; i < depth; i++)
 210     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 211       return false;
 212   for (i = 0; i < invariants; i++)
 213     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 214         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 215       return false;
 216   return true;
 217 }
 218
 219 /* Create a new linear expression with dimension DIM, and total number
 220    of invariants INVARIANTS.  */
 221
 222 lambda_linear_expression
 223 lambda_linear_expression_new (int dim, int invariants)
 224 {
 225   lambda_linear_expression ret;
 226
 227   ret = ggc_alloc_cleared (sizeof (*ret));
 228
 229   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 230   LLE_CONSTANT (ret) = 0;
 231   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 232   LLE_DENOMINATOR (ret) = 1;
 233   LLE_NEXT (ret) = NULL;
 234
 235   return ret;
 236 }
 237
 238 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 239    The starting letter used for variable names is START.  */
 240
 241 static void
 242 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 243                          char start)
 244 {
 245   int i;
 246   bool first = true;
 247   for (i = 0; i < size; i++)
 248     {
 249       if (expr[i] != 0)
 250         {
 251           if (first)
 252             {
 253               if (expr[i] < 0)
 254                 fprintf (outfile, "-");
 255               first = false;
 256             }
 257           else if (expr[i] > 0)
 258             fprintf (outfile, " + ");
 259           else
 260             fprintf (outfile, " - ");
 261           if (abs (expr[i]) == 1)
 262             fprintf (outfile, "%c", start + i);
 263           else
 264             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 265         }
 266     }
 267 }
 268
 269 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 270    depth/number of coefficients is given by DEPTH, the number of invariants is
 271    given by INVARIANTS, and the character to start variable names with is given
 272    by START.  */
 273
 274 void
 275 print_lambda_linear_expression (FILE * outfile,
 276                                 lambda_linear_expression expr,
 277                                 int depth, int invariants, char start)
 278 {
 279   fprintf (outfile, "\tLinear expression: ");
 280   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 281   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 282   fprintf (outfile, "  invariants: ");
 283   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 284                            invariants, 'A');
 285   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 286 }
 287
 288 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 289    coefficients is given by DEPTH, the number of invariants is
 290    given by INVARIANTS, and the character to start variable names with is given
 291    by START. */
 292
 293 void
 294 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 295                    int invariants, char start)
 296 {
 297   int step;
 298   lambda_linear_expression expr;
 299
 300   if (!loop)
 301     abort ();
 302
 303   expr = LL_LINEAR_OFFSET (loop);
 304   step = LL_STEP (loop);
 305   fprintf (outfile, "  step size = %d \n", step);
 306
 307   if (expr)
 308     {
 309       fprintf (outfile, "  linear offset: \n");
 310       print_lambda_linear_expression (outfile, expr, depth, invariants,
 311                                       start);
 312     }
 313
 314   fprintf (outfile, "  lower bound: \n");
 315   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 316     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 317   fprintf (outfile, "  upper bound: \n");
 318   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 319     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 320 }
 321
 322 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 323    number of invariants.  */
 324
 325 lambda_loopnest
 326 lambda_loopnest_new (int depth, int invariants)
 327 {
 328   lambda_loopnest ret;
 329   ret = ggc_alloc (sizeof (*ret));
 330
 331   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 332   LN_DEPTH (ret) = depth;
 333   LN_INVARIANTS (ret) = invariants;
 334
 335   return ret;
 336 }
 337
 338 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 339    character to use for loop names is given by START.  */
 340
 341 void
 342 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 343 {
 344   int i;
 345   for (i = 0; i < LN_DEPTH (nest); i++)
 346     {
 347       fprintf (outfile, "Loop %c\n", start + i);
 348       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 349                          LN_INVARIANTS (nest), 'i');
 350       fprintf (outfile, "\n");
 351     }
 352 }
 353
 354 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 355    of invariants.    */
 356
 357 static lambda_lattice
 358 lambda_lattice_new (int depth, int invariants)
 359 {
 360   lambda_lattice ret;
 361   ret = ggc_alloc (sizeof (*ret));
 362   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 363   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 364   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 365   LATTICE_DIMENSION (ret) = depth;
 366   LATTICE_INVARIANTS (ret) = invariants;
 367   return ret;
 368 }
 369
 370 /* Compute the lattice base for NEST.  The lattice base is essentially a
 371    non-singular transform from a dense base space to a sparse iteration space.
 372    We use it so that we don't have to specially handle the case of a sparse
 373    iteration space in other parts of the algorithm.  As a result, this routine
 374    only does something interesting (IE produce a matrix that isn't the
 375    identity matrix) if NEST is a sparse space.  */
 376
 377 static lambda_lattice
 378 lambda_lattice_compute_base (lambda_loopnest nest)
 379 {
 380   lambda_lattice ret;
 381   int depth, invariants;
 382   lambda_matrix base;
 383
 384   int i, j, step;
 385   lambda_loop loop;
 386   lambda_linear_expression expression;
 387
 388   depth = LN_DEPTH (nest);
 389   invariants = LN_INVARIANTS (nest);
 390
 391   ret = lambda_lattice_new (depth, invariants);
 392   base = LATTICE_BASE (ret);
 393   for (i = 0; i < depth; i++)
 394     {
 395       loop = LN_LOOPS (nest)[i];
 396       if (!loop)
 397         abort ();
 398       step = LL_STEP (loop);
 399       /* If we have a step of 1, then the base is one, and the
 400          origin and invariant coefficients are 0.  */
 401       if (step == 1)
 402         {
 403           for (j = 0; j < depth; j++)
 404             base[i][j] = 0;
 405           base[i][i] = 1;
 406           LATTICE_ORIGIN (ret)[i] = 0;
 407           for (j = 0; j < invariants; j++)
 408             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 409         }
 410       else
 411         {
 412           /* Otherwise, we need the lower bound expression (which must
 413              be an affine function)  to determine the base.  */
 414           expression = LL_LOWER_BOUND (loop);
 415           if (!expression
 416               || LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1)
 417             abort ();
 418
 419           /* The lower triangular portion of the base is going to be the
 420              coefficient times the step */
 421           for (j = 0; j < i; j++)
 422             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 423               * LL_STEP (LN_LOOPS (nest)[j]);
 424           base[i][i] = step;
 425           for (j = i + 1; j < depth; j++)
 426             base[i][j] = 0;
 427
 428           /* Origin for this loop is the constant of the lower bound
 429              expression.  */
 430           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 431
 432           /* Coefficient for the invariants are equal to the invariant
 433              coefficients in the expression.  */
 434           for (j = 0; j < invariants; j++)
 435             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 436               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 437         }
 438     }
 439   return ret;
 440 }
 441
 442 /* Compute the greatest common denominator of two numbers (A and B) using
 443    Euclid's algorithm.  */
 444
 445 static int
 446 gcd (int a, int b)
 447 {
 448
 449   int x, y, z;
 450
 451   x = abs (a);
 452   y = abs (b);
 453
 454   while (x > 0)
 455     {
 456       z = y % x;
 457       y = x;
 458       x = z;
 459     }
 460
 461   return (y);
 462 }
 463
 464 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 465
 466 static int
 467 gcd_vector (lambda_vector vector, int size)
 468 {
 469   int i;
 470   int gcd1 = 0;
 471
 472   if (size > 0)
 473     {
 474       gcd1 = vector[0];
 475       for (i = 1; i < size; i++)
 476         gcd1 = gcd (gcd1, vector[i]);
 477     }
 478   return gcd1;
 479 }
 480
 481 /* Compute the least common multiple of two numbers A and B .  */
 482
 483 static int
 484 lcm (int a, int b)
 485 {
 486   return (abs (a) * abs (b) / gcd (a, b));
 487 }
 488
 489 /* Compute the loop bounds for the auxiliary space NEST.
 490    Input system used is Ax <= b.  TRANS is the unimodular transformation. */
 491
 492 static lambda_loopnest
 493 lambda_compute_auxillary_space (lambda_loopnest nest,
 494                                 lambda_trans_matrix trans)
 495 {
 496   lambda_matrix A, B, A1, B1, temp0;
 497   lambda_vector a, a1, temp1;
 498   lambda_matrix invertedtrans;
 499   int determinant, depth, invariants, size, newsize;
 500   int i, j, k;
 501   lambda_loopnest auxillary_nest;
 502   lambda_loop loop;
 503   lambda_linear_expression expression;
 504   lambda_lattice lattice;
 505
 506   int multiple, f1, f2;
 507
 508   depth = LN_DEPTH (nest);
 509   invariants = LN_INVARIANTS (nest);
 510
 511   /* Unfortunately, we can't know the number of constraints we'll have
 512      ahead of time, but this should be enough even in ridiculous loop nest
 513      cases. We abort if we go over this limit.  */
 514   A = lambda_matrix_new (128, depth);
 515   B = lambda_matrix_new (128, invariants);
 516   a = lambda_vector_new (128);
 517
 518   A1 = lambda_matrix_new (128, depth);
 519   B1 = lambda_matrix_new (128, invariants);
 520   a1 = lambda_vector_new (128);
 521
 522   /* Store the bounds in the equation matrix A, constant vector a, and
 523      invariant matrix B, so that we have Ax <= a + B.
 524      This requires a little equation rearranging so that everything is on the
 525      correct side of the inequality.  */
 526   size = 0;
 527   for (i = 0; i < depth; i++)
 528     {
 529       loop = LN_LOOPS (nest)[i];
 530
 531       /* First we do the lower bound.  */
 532       if (LL_STEP (loop) > 0)
 533         expression = LL_LOWER_BOUND (loop);
 534       else
 535         expression = LL_UPPER_BOUND (loop);
 536
 537       for (; expression != NULL; expression = LLE_NEXT (expression))
 538         {
 539           /* Fill in the coefficient.  */
 540           for (j = 0; j < i; j++)
 541             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 542
 543           /* And the invariant coefficient.  */
 544           for (j = 0; j < invariants; j++)
 545             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 546
 547           /* And the constant.  */
 548           a[size] = LLE_CONSTANT (expression);
 549
 550           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 551              constants and single variables on   */
 552           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 553           a[size] *= -1;
 554           for (j = 0; j < invariants; j++)
 555             B[size][j] *= -1;
 556
 557           size++;
 558           /* Need to increase matrix sizes above.  */
 559           if (size > 127)
 560             abort ();
 561         }
 562
 563       /* Then do the exact same thing for the upper bounds.  */
 564       if (LL_STEP (loop) > 0)
 565         expression = LL_UPPER_BOUND (loop);
 566       else
 567         expression = LL_LOWER_BOUND (loop);
 568
 569       for (; expression != NULL; expression = LLE_NEXT (expression))
 570         {
 571           /* Fill in the coefficient.  */
 572           for (j = 0; j < i; j++)
 573             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 574
 575           /* And the invariant coefficient.  */
 576           for (j = 0; j < invariants; j++)
 577             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 578
 579           /* And the constant.  */
 580           a[size] = LLE_CONSTANT (expression);
 581
 582           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 583           for (j = 0; j < i; j++)
 584             A[size][j] *= -1;
 585           A[size][i] = LLE_DENOMINATOR (expression);
 586           size++;
 587           /* Need to increase matrix sizes above.  */
 588           if (size > 127)
 589             abort ();
 590         }
 591     }
 592
 593   /* Compute the lattice base x = base * y + origin, where y is the
 594      base space.  */
 595   lattice = lambda_lattice_compute_base (nest);
 596
 597   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 598
 599   /* A1 = A * L */
 600   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 601
 602   /* a1 = a - A * origin constant.  */
 603   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 604   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 605
 606   /* B1 = B - A * origin invariant.  */
 607   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 608                       invariants);
 609   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 610
 611   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 612      it by A1, then performing fourier motzkin.  */
 613
 614   invertedtrans = lambda_matrix_new (depth, depth);
 615
 616   /* Compute the inverse of U.  */
 617   determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
 618                                        invertedtrans, depth);
 619
 620   /* A = A1 inv(U).  */
 621   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 622
 623   /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 624      auxillary nest.
 625      Fourier-Motzkin is a way of reducing systems of linear inequality so that
 626      it is easy to calculate the answer and bounds.
 627      A sketch of how it works:
 628      Given a system of linear inequalities, ai * xj >= bk, you can always
 629      rewrite the constraints so they are all of the form
 630      a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 631      in b1 ... bk, and some a in a1...ai)
 632      You can then eliminate this x from the non-constant inequalities by
 633      rewriting these as a <= b, x >= constant, and delete the x variable.
 634      You can then repeat this for any remaining x variables, and then we have
 635      an easy to use variable <= constant (or no variables at all) form that we
 636      can construct our bounds from.
 637
 638      In our case, each time we eliminate, we construct part of the bound from
 639      the ith variable, then delete the ith variable.
 640
 641      Remember the constant are in our vector a, our coefficient matrix is A,
 642      and our invariant coefficient matrix is B  */
 643
 644   /* Swap B and B1, and a1 and a */
 645   temp0 = B1;
 646   B1 = B;
 647   B = temp0;
 648
 649   temp1 = a1;
 650   a1 = a;
 651   a = temp1;
 652
 653   auxillary_nest = lambda_loopnest_new (depth, invariants);
 654
 655   for (i = depth - 1; i >= 0; i--)
 656     {
 657       loop = lambda_loop_new ();
 658       LN_LOOPS (auxillary_nest)[i] = loop;
 659       LL_STEP (loop) = 1;
 660
 661       for (j = 0; j < size; j++)
 662         {
 663           if (A[j][i] < 0)
 664             {
 665               /* Lower bound.  */
 666               expression = lambda_linear_expression_new (depth, invariants);
 667
 668               for (k = 0; k < i; k++)
 669                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 670               for (k = 0; k < invariants; k++)
 671                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 672               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 673               LLE_CONSTANT (expression) = -1 * a[j];
 674               /* Ignore if identical to the existing lower bound.  */
 675               if (!lle_equal (LL_LOWER_BOUND (loop),
 676                               expression, depth, invariants))
 677                 {
 678                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 679                   LL_LOWER_BOUND (loop) = expression;
 680                 }
 681
 682             }
 683           else if (A[j][i] > 0)
 684             {
 685               /* Upper bound.  */
 686               expression = lambda_linear_expression_new (depth, invariants);
 687               for (k = 0; k < i; k++)
 688                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 689               LLE_CONSTANT (expression) = a[j];
 690
 691               for (k = 0; k < invariants; k++)
 692                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 693
 694               LLE_DENOMINATOR (expression) = A[j][i];
 695               /* Ignore if identical to the existing upper bound.  */
 696               if (!lle_equal (LL_UPPER_BOUND (loop),
 697                               expression, depth, invariants))
 698                 {
 699                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 700                   LL_UPPER_BOUND (loop) = expression;
 701                 }
 702
 703             }
 704         }
 705       /* creates a new system by deleting the i'th variable. */
 706       newsize = 0;
 707       for (j = 0; j < size; j++)
 708         {
 709           if (A[j][i] == 0)
 710             {
 711               lambda_vector_copy (A[j], A1[newsize], depth);
 712               lambda_vector_copy (B[j], B1[newsize], invariants);
 713               a1[newsize] = a[j];
 714               newsize++;
 715             }
 716           else if (A[j][i] > 0)
 717             {
 718               for (k = 0; k < size; k++)
 719                 {
 720                   if (A[k][i] < 0)
 721                     {
 722                       multiple = lcm (A[j][i], A[k][i]);
 723                       f1 = multiple / A[j][i];
 724                       f2 = -1 * multiple / A[k][i];
 725
 726                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 727                                             A1[newsize], depth);
 728                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 729                                             B1[newsize], invariants);
 730                       a1[newsize] = f1 * a[j] + f2 * a[k];
 731                       newsize++;
 732                     }
 733                 }
 734             }
 735         }
 736
 737       temp0 = A;
 738       A = A1;
 739       A1 = temp0;
 740
 741       temp0 = B;
 742       B = B1;
 743       B1 = temp0;
 744
 745       temp1 = a;
 746       a = a1;
 747       a1 = temp1;
 748
 749       size = newsize;
 750     }
 751
 752   return auxillary_nest;
 753 }
 754
 755 /* Compute the loop bounds for the target space, using the bounds of
 756    the auxillary nest AUXILLARY_NEST, and the triangular matrix H.  This is
 757    done by matrix multiplication and then transformation of the new matrix
 758    back into linear expression form.
 759    Return the target loopnest.  */
 760
 761 static lambda_loopnest
 762 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 763                              lambda_trans_matrix H, lambda_vector stepsigns)
 764 {
 765   lambda_matrix inverse, H1;
 766   int determinant, i, j;
 767   int gcd1, gcd2;
 768   int factor;
 769
 770   lambda_loopnest target_nest;
 771   int depth, invariants;
 772   lambda_matrix target;
 773
 774   lambda_loop auxillary_loop, target_loop;
 775   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 776
 777   depth = LN_DEPTH (auxillary_nest);
 778   invariants = LN_INVARIANTS (auxillary_nest);
 779
 780   inverse = lambda_matrix_new (depth, depth);
 781   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 782
 783   /* H1 is H excluding its diagonal.  */
 784   H1 = lambda_matrix_new (depth, depth);
 785   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 786
 787   for (i = 0; i < depth; i++)
 788     H1[i][i] = 0;
 789
 790   /* Computes the linear offsets of the loop bounds.  */
 791   target = lambda_matrix_new (depth, depth);
 792   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 793
 794   target_nest = lambda_loopnest_new (depth, invariants);
 795
 796   for (i = 0; i < depth; i++)
 797     {
 798
 799       /* Get a new loop structure.  */
 800       target_loop = lambda_loop_new ();
 801       LN_LOOPS (target_nest)[i] = target_loop;
 802
 803       /* Computes the gcd of the coefficients of the linear part.  */
 804       gcd1 = gcd_vector (target[i], i);
 805
 806       /* Include the denominator in the GCD  */
 807       gcd1 = gcd (gcd1, determinant);
 808
 809       /* Now divide through by the gcd  */
 810       for (j = 0; j < i; j++)
 811         target[i][j] = target[i][j] / gcd1;
 812
 813       expression = lambda_linear_expression_new (depth, invariants);
 814       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 815       LLE_DENOMINATOR (expression) = determinant / gcd1;
 816       LLE_CONSTANT (expression) = 0;
 817       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 818                            invariants);
 819       LL_LINEAR_OFFSET (target_loop) = expression;
 820     }
 821
 822   /* For each loop, compute the new bounds from H */
 823   for (i = 0; i < depth; i++)
 824     {
 825       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 826       target_loop = LN_LOOPS (target_nest)[i];
 827       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 828       factor = LTM_MATRIX (H)[i][i];
 829
 830       /* First we do the lower bound.  */
 831       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 832
 833       for (; auxillary_expr != NULL;
 834            auxillary_expr = LLE_NEXT (auxillary_expr))
 835         {
 836           target_expr = lambda_linear_expression_new (depth, invariants);
 837           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 838                                      depth, inverse, depth,
 839                                      LLE_COEFFICIENTS (target_expr));
 840           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 841                                     LLE_COEFFICIENTS (target_expr), depth,
 842                                     factor);
 843
 844           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 845           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 846                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 847                               invariants);
 848           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 849                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 850                                     invariants, factor);
 851           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 852
 853           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 854             {
 855               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 856                 * determinant;
 857               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 858                                         (target_expr),
 859                                         LLE_INVARIANT_COEFFICIENTS
 860                                         (target_expr), invariants,
 861                                         determinant);
 862               LLE_DENOMINATOR (target_expr) =
 863                 LLE_DENOMINATOR (target_expr) * determinant;
 864             }
 865           /* Find the gcd and divide by it here, rather than doing it
 866              at the tree level.  */
 867           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 868           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                              invariants);
 870           gcd1 = gcd (gcd1, gcd2);
 871           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 872           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 873           for (j = 0; j < depth; j++)
 874             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 875           for (j = 0; j < invariants; j++)
 876             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 877           LLE_CONSTANT (target_expr) /= gcd1;
 878           LLE_DENOMINATOR (target_expr) /= gcd1;
 879           /* Ignore if identical to existing bound.  */
 880           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 881                           invariants))
 882             {
 883               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 884               LL_LOWER_BOUND (target_loop) = target_expr;
 885             }
 886         }
 887       /* Now do the upper bound.  */
 888       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 889
 890       for (; auxillary_expr != NULL;
 891            auxillary_expr = LLE_NEXT (auxillary_expr))
 892         {
 893           target_expr = lambda_linear_expression_new (depth, invariants);
 894           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 895                                      depth, inverse, depth,
 896                                      LLE_COEFFICIENTS (target_expr));
 897           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 898                                     LLE_COEFFICIENTS (target_expr), depth,
 899                                     factor);
 900           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 901           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 902                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 903                               invariants);
 904           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 905                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 906                                     invariants, factor);
 907           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 908
 909           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 910             {
 911               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 912                 * determinant;
 913               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 914                                         (target_expr),
 915                                         LLE_INVARIANT_COEFFICIENTS
 916                                         (target_expr), invariants,
 917                                         determinant);
 918               LLE_DENOMINATOR (target_expr) =
 919                 LLE_DENOMINATOR (target_expr) * determinant;
 920             }
 921           /* Find the gcd and divide by it here, instead of at the
 922              tree level.  */
 923           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 924           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                              invariants);
 926           gcd1 = gcd (gcd1, gcd2);
 927           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 928           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 929           for (j = 0; j < depth; j++)
 930             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 931           for (j = 0; j < invariants; j++)
 932             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 933           LLE_CONSTANT (target_expr) /= gcd1;
 934           LLE_DENOMINATOR (target_expr) /= gcd1;
 935           /* Ignore if equal to existing bound.  */
 936           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 937                           invariants))
 938             {
 939               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 940               LL_UPPER_BOUND (target_loop) = target_expr;
 941             }
 942         }
 943     }
 944   for (i = 0; i < depth; i++)
 945     {
 946       target_loop = LN_LOOPS (target_nest)[i];
 947       /* If necessary, exchange the upper and lower bounds and negate
 948          the step size.  */
 949       if (stepsigns[i] < 0)
 950         {
 951           LL_STEP (target_loop) *= -1;
 952           tmp_expr = LL_LOWER_BOUND (target_loop);
 953           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 954           LL_UPPER_BOUND (target_loop) = tmp_expr;
 955         }
 956     }
 957   return target_nest;
 958 }
 959
 960 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 961    result.  */
 962
 963 static lambda_vector
 964 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 965 {
 966   lambda_matrix matrix, H;
 967   int size;
 968   lambda_vector newsteps;
 969   int i, j, factor, minimum_column;
 970   int temp;
 971
 972   matrix = LTM_MATRIX (trans);
 973   size = LTM_ROWSIZE (trans);
 974   H = lambda_matrix_new (size, size);
 975
 976   newsteps = lambda_vector_new (size);
 977   lambda_vector_copy (stepsigns, newsteps, size);
 978
 979   lambda_matrix_copy (matrix, H, size, size);
 980
 981   for (j = 0; j < size; j++)
 982     {
 983       lambda_vector row;
 984       row = H[j];
 985       for (i = j; i < size; i++)
 986         if (row[i] < 0)
 987           lambda_matrix_col_negate (H, size, i);
 988       while (lambda_vector_first_nz (row, size, j + 1) < size)
 989         {
 990           minimum_column = lambda_vector_min_nz (row, size, j);
 991           lambda_matrix_col_exchange (H, size, j, minimum_column);
 992
 993           temp = newsteps[j];
 994           newsteps[j] = newsteps[minimum_column];
 995           newsteps[minimum_column] = temp;
 996
 997           for (i = j + 1; i < size; i++)
 998             {
 999               factor = row[i] / row[j];
1000               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1001             }
1002         }
1003     }
1004   return newsteps;
1005 }
1006
1007 /* Transform NEST according to TRANS, and return the new loopnest.
1008    This involves
1009    1. Computing a lattice base for the transformation
1010    2. Composing the dense base with the specified transformation (TRANS)
1011    3. Decomposing the combined transformation into a lower triangular portion,
1012    and a unimodular portion.
1013    4. Computing the auxillary nest using the unimodular portion.
1014    5. Computing the target nest using the auxillary nest and the lower
1015    triangular portion.  */
1016
1017 lambda_loopnest
1018 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1019 {
1020   lambda_loopnest auxillary_nest, target_nest;
1021
1022   int depth, invariants;
1023   int i, j;
1024   lambda_lattice lattice;
1025   lambda_trans_matrix trans1, H, U;
1026   lambda_loop loop;
1027   lambda_linear_expression expression;
1028   lambda_vector origin;
1029   lambda_matrix origin_invariants;
1030   lambda_vector stepsigns;
1031   int f;
1032
1033   depth = LN_DEPTH (nest);
1034   invariants = LN_INVARIANTS (nest);
1035
1036   /* Keep track of the signs of the loop steps.  */
1037   stepsigns = lambda_vector_new (depth);
1038   for (i = 0; i < depth; i++)
1039     {
1040       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1041         stepsigns[i] = 1;
1042       else
1043         stepsigns[i] = -1;
1044     }
1045
1046   /* Compute the lattice base.  */
1047   lattice = lambda_lattice_compute_base (nest);
1048   trans1 = lambda_trans_matrix_new (depth, depth);
1049
1050   /* Multiply the transformation matrix by the lattice base.  */
1051
1052   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1053                       LTM_MATRIX (trans1), depth, depth, depth);
1054
1055   /* Compute the Hermite normal form for the new transformation matrix.  */
1056   H = lambda_trans_matrix_new (depth, depth);
1057   U = lambda_trans_matrix_new (depth, depth);
1058   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1059                          LTM_MATRIX (U));
1060
1061   /* Compute the auxiliary loop nest's space from the unimodular
1062      portion.  */
1063   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1064
1065   /* Compute the loop step signs from the old step signs and the
1066      transformation matrix.  */
1067   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1068
1069   /* Compute the target loop nest space from the auxiliary nest and
1070      the lower triangular matrix H.  */
1071   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1072   origin = lambda_vector_new (depth);
1073   origin_invariants = lambda_matrix_new (depth, invariants);
1074   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1075                              LATTICE_ORIGIN (lattice), origin);
1076   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1077                       origin_invariants, depth, depth, invariants);
1078
1079   for (i = 0; i < depth; i++)
1080     {
1081       loop = LN_LOOPS (target_nest)[i];
1082       expression = LL_LINEAR_OFFSET (loop);
1083       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1084         f = 1;
1085       else
1086         f = LLE_DENOMINATOR (expression);
1087
1088       LLE_CONSTANT (expression) += f * origin[i];
1089
1090       for (j = 0; j < invariants; j++)
1091         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1092           f * origin_invariants[i][j];
1093     }
1094
1095   return target_nest;
1096
1097 }
1098
1099 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1100    return the new expression.  DEPTH is the depth of the loopnest.
1101    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1102    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1103    is the amount we have to add/subtract from the expression because of the
1104    type of comparison it is used in.  */
1105
1106 static lambda_linear_expression
1107 gcc_tree_to_linear_expression (int depth, tree expr,
1108                                VEC(tree) *outerinductionvars,
1109                                VEC(tree) *invariants, int extra)
1110 {
1111   lambda_linear_expression lle = NULL;
1112   switch (TREE_CODE (expr))
1113     {
1114     case INTEGER_CST:
1115       {
1116         lle = lambda_linear_expression_new (depth, 2 * depth);
1117         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1118         if (extra != 0)
1119           LLE_CONSTANT (lle) = extra;
1120
1121         LLE_DENOMINATOR (lle) = 1;
1122       }
1123       break;
1124     case SSA_NAME:
1125       {
1126         tree iv, invar;
1127         size_t i;
1128         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1129           if (iv != NULL)
1130             {
1131               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1132                 {
1133                   lle = lambda_linear_expression_new (depth, 2 * depth);
1134                   LLE_COEFFICIENTS (lle)[i] = 1;
1135                   if (extra != 0)
1136                     LLE_CONSTANT (lle) = extra;
1137
1138                   LLE_DENOMINATOR (lle) = 1;
1139                 }
1140             }
1141         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1142           if (invar != NULL)
1143             {
1144               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1145                 {
1146                   lle = lambda_linear_expression_new (depth, 2 * depth);
1147                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1148                   if (extra != 0)
1149                     LLE_CONSTANT (lle) = extra;
1150                   LLE_DENOMINATOR (lle) = 1;
1151                 }
1152             }
1153       }
1154       break;
1155     default:
1156       return NULL;
1157     }
1158
1159   return lle;
1160 }
1161
1162 /* Return true if OP is invariant in LOOP and all outer loops.  */
1163
1164 static bool
1165 invariant_in_loop (struct loop *loop, tree op)
1166 {
1167   if (loop->depth == 0)
1168     return true;
1169   if (TREE_CODE (op) == SSA_NAME)
1170     {
1171       if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
1172           && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1173         return true;
1174       if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op)))
1175         return false;
1176       if (loop->outer)
1177         if (!invariant_in_loop (loop->outer, op))
1178           return false;
1179       return !flow_bb_inside_loop_p (loop,
1180                                      bb_for_stmt (SSA_NAME_DEF_STMT (op)));
1181     }
1182   return false;
1183 }
1184
1185 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1186    or NULL if it could not be converted.
1187    DEPTH is the depth of the loop.
1188    INVARIANTS is a pointer to the array of loop invariants.
1189    The induction variable for this loop should be stored in the parameter
1190    OURINDUCTIONVAR.
1191    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1192
1193 static lambda_loop
1194 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1195                          VEC (tree) ** invariants,
1196                          tree * ourinductionvar,
1197                          VEC (tree) * outerinductionvars)
1198 {
1199   tree phi;
1200   tree exit_cond;
1201   tree access_fn, inductionvar;
1202   tree step;
1203   lambda_loop lloop = NULL;
1204   lambda_linear_expression lbound, ubound;
1205   tree test;
1206   int stepint;
1207   int extra = 0;
1208
1209   use_optype uses;
1210
1211   /* Find out induction var and set the pointer so that the caller can
1212      append it to the outerinductionvars array later.  */
1213
1214   inductionvar = find_induction_var_from_exit_cond (loop);
1215   *ourinductionvar = inductionvar;
1216
1217   exit_cond = get_loop_exit_condition (loop);
1218
1219   if (inductionvar == NULL || exit_cond == NULL)
1220     {
1221       if (dump_file && (dump_flags & TDF_DETAILS))
1222         fprintf (dump_file,
1223                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1224       return NULL;
1225     }
1226
1227   test = TREE_OPERAND (exit_cond, 0);
1228   if (TREE_CODE (test) != LE_EXPR
1229       && TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR)
1230     {
1231
1232       if (dump_file && (dump_flags & TDF_DETAILS))
1233         {
1234           fprintf (dump_file,
1235                    "Unable to convert loop: Loop exit test uses unhandled test condition:");
1236           print_generic_stmt (dump_file, test, 0);
1237           fprintf (dump_file, "\n");
1238         }
1239       return NULL;
1240     }
1241   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1242     {
1243
1244       if (dump_file && (dump_flags & TDF_DETAILS))
1245         fprintf (dump_file,
1246                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1247
1248       return NULL;
1249     }
1250
1251   phi = SSA_NAME_DEF_STMT (inductionvar);
1252   if (TREE_CODE (phi) != PHI_NODE)
1253     {
1254       get_stmt_operands (phi);
1255       uses = STMT_USE_OPS (phi);
1256
1257       if (!uses)
1258         {
1259
1260           if (dump_file && (dump_flags & TDF_DETAILS))
1261             fprintf (dump_file,
1262                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1263
1264           return NULL;
1265         }
1266
1267       phi = USE_OP (uses, 0);
1268       phi = SSA_NAME_DEF_STMT (phi);
1269       if (TREE_CODE (phi) != PHI_NODE)
1270         {
1271
1272           if (dump_file && (dump_flags & TDF_DETAILS))
1273             fprintf (dump_file,
1274                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1275           return NULL;
1276         }
1277
1278     }
1279
1280   access_fn = instantiate_parameters
1281     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1282   if (!access_fn)
1283     {
1284       if (dump_file && (dump_flags & TDF_DETAILS))
1285         fprintf (dump_file,
1286                  "Unable to convert loop: Access function for induction variable phi is NULL\n");
1287
1288       return NULL;
1289     }
1290
1291   step = evolution_part_in_loop_num (access_fn, loop->num);
1292   if (!step || step == chrec_dont_know)
1293     {
1294       if (dump_file && (dump_flags & TDF_DETAILS))
1295         fprintf (dump_file,
1296                  "Unable to convert loop: Cannot determine step of loop.\n");
1297
1298       return NULL;
1299     }
1300   if (TREE_CODE (step) != INTEGER_CST)
1301     {
1302
1303       if (dump_file && (dump_flags & TDF_DETAILS))
1304         fprintf (dump_file,
1305                  "Unable to convert loop: Step of loop is not integer.\n");
1306       return NULL;
1307     }
1308
1309   stepint = TREE_INT_CST_LOW (step);
1310
1311   /* Only want phis for induction vars, which will have two
1312      arguments.  */
1313   if (PHI_NUM_ARGS (phi) != 2)
1314     {
1315       if (dump_file && (dump_flags & TDF_DETAILS))
1316         fprintf (dump_file,
1317                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1318       return NULL;
1319     }
1320
1321   /* Another induction variable check. One argument's source should be
1322      in the loop, one outside the loop.  */
1323   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1324       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1325     {
1326
1327       if (dump_file && (dump_flags & TDF_DETAILS))
1328         fprintf (dump_file,
1329                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1330
1331       return NULL;
1332     }
1333
1334   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1335
1336     lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
1337                                             outerinductionvars, *invariants,
1338                                             0);
1339   else
1340     lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
1341                                             outerinductionvars, *invariants,
1342                                             0);
1343   if (!lbound)
1344     {
1345
1346       if (dump_file && (dump_flags & TDF_DETAILS))
1347         fprintf (dump_file,
1348                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1349
1350       return NULL;
1351     }
1352   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME)
1353     if (invariant_in_loop (loop, TREE_OPERAND (test, 1)))
1354       VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1355
1356   /* We only size the vectors assuming we have, at max, 2 times as many
1357      invariants as we do loops (one for each bound).
1358      This is just an arbitrary number, but it has to be matched against the
1359      code below.  */
1360   if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth))
1361     abort ();
1362
1363   /* We might have some leftover. */
1364   if (TREE_CODE (test) == LT_EXPR)
1365     extra = -1 * stepint;
1366   else if (TREE_CODE (test) == NE_EXPR)
1367     extra = -1 * stepint;
1368
1369   ubound = gcc_tree_to_linear_expression (depth,
1370                                           TREE_OPERAND (test, 1),
1371                                           outerinductionvars,
1372                                           *invariants, extra);
1373   if (!ubound)
1374     {
1375
1376       if (dump_file && (dump_flags & TDF_DETAILS))
1377         fprintf (dump_file,
1378                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1379       return NULL;
1380     }
1381
1382   lloop = lambda_loop_new ();
1383   LL_STEP (lloop) = stepint;
1384   LL_LOWER_BOUND (lloop) = lbound;
1385   LL_UPPER_BOUND (lloop) = ubound;
1386   return lloop;
1387 }
1388
1389 /* Given a LOOP, find the induction variable it is testing against in the exit
1390    condition.  Return the induction variable if found, NULL otherwise.  */
1391
1392 static tree
1393 find_induction_var_from_exit_cond (struct loop *loop)
1394 {
1395   tree expr = get_loop_exit_condition (loop);
1396   tree test;
1397   if (expr == NULL_TREE)
1398     return NULL_TREE;
1399   if (TREE_CODE (expr) != COND_EXPR)
1400     return NULL_TREE;
1401   test = TREE_OPERAND (expr, 0);
1402   if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
1403     return NULL_TREE;
1404   if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME)
1405     return NULL_TREE;
1406   return TREE_OPERAND (test, 0);
1407 }
1408
1409 DEF_VEC_P(lambda_loop);
1410 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1411    Return the new loop nest.
1412    INDUCTIONVARS is a pointer to an array of induction variables for the
1413    loopnest that will be filled in during this process.
1414    INVARIANTS is a pointer to an array of invariants that will be filled in
1415    during this process.  */
1416
1417 lambda_loopnest
1418 gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest,
1419                                  VEC (tree) **inductionvars,
1420                                  VEC (tree) **invariants)
1421 {
1422   lambda_loopnest ret;
1423   struct loop *temp;
1424   int depth = 0;
1425   size_t i;
1426   VEC (lambda_loop) *loops;
1427   lambda_loop newloop;
1428   tree inductionvar = NULL;
1429
1430   temp = loop_nest;
1431   while (temp)
1432     {
1433       depth++;
1434       temp = temp->inner;
1435     }
1436   loops = VEC_alloc (lambda_loop, 1);
1437   *inductionvars = VEC_alloc (tree, 1);
1438   *invariants = VEC_alloc (tree, 1);
1439   temp = loop_nest;
1440   while (temp)
1441     {
1442       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1443                                          &inductionvar, *inductionvars);
1444       if (!newloop)
1445         return NULL;
1446       VEC_safe_push (tree, *inductionvars, inductionvar);
1447       VEC_safe_push (lambda_loop, loops, newloop);
1448       temp = temp->inner;
1449     }
1450
1451   ret = lambda_loopnest_new (depth, 2 * depth);
1452   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1453     LN_LOOPS (ret)[i] = newloop;
1454
1455   return ret;
1456
1457 }
1458
1459 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1460    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1461    inserted for us are stored.  INDUCTION_VARS is the array of induction
1462    variables for the loop this LBV is from.  */
1463
1464 static tree
1465 lbv_to_gcc_expression (lambda_body_vector lbv,
1466                        VEC (tree) *induction_vars, tree * stmts_to_insert)
1467 {
1468   tree stmts, stmt, resvar, name;
1469   size_t i;
1470   tree_stmt_iterator tsi;
1471
1472   /* Create a statement list and a linear expression temporary.  */
1473   stmts = alloc_stmt_list ();
1474   resvar = create_tmp_var (integer_type_node, "lletmp");
1475   add_referenced_tmp_var (resvar);
1476
1477   /* Start at 0.  */
1478   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1479   name = make_ssa_name (resvar, stmt);
1480   TREE_OPERAND (stmt, 0) = name;
1481   tsi = tsi_last (stmts);
1482   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1483
1484   for (i = 0; i < VEC_length (tree ,induction_vars) ; i++)
1485     {
1486       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1487         {
1488           tree newname;
1489
1490           /* newname = coefficient * induction_variable */
1491           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1492                         fold (build (MULT_EXPR, integer_type_node,
1493                                      VEC_index (tree, induction_vars, i),
1494                                      build_int_cst (integer_type_node,
1495                                                     LBV_COEFFICIENTS (lbv)[i]))));
1496           newname = make_ssa_name (resvar, stmt);
1497           TREE_OPERAND (stmt, 0) = newname;
1498           tsi = tsi_last (stmts);
1499           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1500           /* name = name + newname */
1501           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1502                         build (PLUS_EXPR, integer_type_node, name, newname));
1503           name = make_ssa_name (resvar, stmt);
1504           TREE_OPERAND (stmt, 0) = name;
1505           tsi = tsi_last (stmts);
1506           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1507         }
1508     }
1509
1510   /* Handle any denominator that occurs.  */
1511   if (LBV_DENOMINATOR (lbv) != 1)
1512     {
1513       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1514                     build (CEIL_DIV_EXPR, integer_type_node,
1515                            name, build_int_cst (integer_type_node,
1516                                                 LBV_DENOMINATOR (lbv))));
1517       name = make_ssa_name (resvar, stmt);
1518       TREE_OPERAND (stmt, 0) = name;
1519       tsi = tsi_last (stmts);
1520       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1521     }
1522   *stmts_to_insert = stmts;
1523   return name;
1524 }
1525
1526 /* Convert a linear expression from coefficient and constant form to a
1527    gcc tree.
1528    Return the tree that represents the final value of the expression.
1529    LLE is the linear expression to convert.
1530    OFFSET is the linear offset to apply to the expression.
1531    INDUCTION_VARS is a vector of induction variables for the loops.
1532    INVARIANTS is a vector of the loop nest invariants.
1533    WRAP specifies what tree code to wrap the results in, if there is more than
1534    one (it is either MAX_EXPR, or MIN_EXPR).
1535    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1536    statements that need to be inserted for the linear expression.  */
1537
1538 static tree
1539 lle_to_gcc_expression (lambda_linear_expression lle,
1540                        lambda_linear_expression offset,
1541                        VEC(tree) *induction_vars,
1542                        VEC(tree) *invariants,
1543                        enum tree_code wrap, tree * stmts_to_insert)
1544 {
1545   tree stmts, stmt, resvar, name;
1546   size_t i;
1547   tree_stmt_iterator tsi;
1548   VEC(tree) *results;
1549
1550   name = NULL_TREE;
1551   /* Create a statement list and a linear expression temporary.  */
1552   stmts = alloc_stmt_list ();
1553   resvar = create_tmp_var (integer_type_node, "lletmp");
1554   add_referenced_tmp_var (resvar);
1555   results = VEC_alloc (tree, 1);
1556
1557   /* Build up the linear expressions, and put the variable representing the
1558      result in the results array.  */
1559   for (; lle != NULL; lle = LLE_NEXT (lle))
1560     {
1561       /* Start at name = 0.  */
1562       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1563       name = make_ssa_name (resvar, stmt);
1564       TREE_OPERAND (stmt, 0) = name;
1565       tsi = tsi_last (stmts);
1566       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1567
1568       /* First do the induction variables.
1569          at the end, name = name + all the induction variables added
1570          together.  */
1571       for (i = 0; i < VEC_length (tree ,induction_vars); i++)
1572         {
1573           if (LLE_COEFFICIENTS (lle)[i] != 0)
1574             {
1575               tree newname;
1576               tree mult;
1577               tree coeff;
1578
1579               /* mult = induction variable * coefficient.  */
1580               if (LLE_COEFFICIENTS (lle)[i] == 1)
1581                 {
1582                   mult = VEC_index (tree, induction_vars, i);
1583                 }
1584               else
1585                 {
1586                   coeff = build_int_cst (integer_type_node,
1587                                          LLE_COEFFICIENTS (lle)[i]);
1588                   mult = fold (build (MULT_EXPR, integer_type_node,
1589                                       VEC_index (tree, induction_vars, i),
1590                                       coeff));
1591                 }
1592
1593               /* newname = mult */
1594               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1595               newname = make_ssa_name (resvar, stmt);
1596               TREE_OPERAND (stmt, 0) = newname;
1597               tsi = tsi_last (stmts);
1598               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1599
1600               /* name = name + newname */
1601               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1602                             build (PLUS_EXPR, integer_type_node,
1603                                    name, newname));
1604               name = make_ssa_name (resvar, stmt);
1605               TREE_OPERAND (stmt, 0) = name;
1606               tsi = tsi_last (stmts);
1607               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1608             }
1609         }
1610
1611       /* Handle our invariants.
1612          At the end, we have name = name + result of adding all multiplied
1613          invariants.  */
1614       for (i = 0; i < VEC_length (tree, invariants); i++)
1615         {
1616           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1617             {
1618               tree newname;
1619               tree mult;
1620               tree coeff;
1621
1622               /* mult = invariant * coefficient  */
1623               if (LLE_INVARIANT_COEFFICIENTS (lle)[i] == 1)
1624                 {
1625                   mult = VEC_index (tree, invariants, i);
1626                 }
1627               else
1628                 {
1629                   coeff = build_int_cst (integer_type_node,
1630                                          LLE_INVARIANT_COEFFICIENTS (lle)[i]);
1631                   mult = fold (build (MULT_EXPR, integer_type_node,
1632                                       VEC_index (tree, invariants, i),
1633                                       coeff));
1634                 }
1635
1636               /* newname = mult */
1637               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1638               newname = make_ssa_name (resvar, stmt);
1639               TREE_OPERAND (stmt, 0) = newname;
1640               tsi = tsi_last (stmts);
1641               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1642
1643               /* name = name + newname */
1644               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1645                             build (PLUS_EXPR, integer_type_node,
1646                                    name, newname));
1647               name = make_ssa_name (resvar, stmt);
1648               TREE_OPERAND (stmt, 0) = name;
1649               tsi = tsi_last (stmts);
1650               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1651             }
1652         }
1653
1654       /* Now handle the constant.
1655          name = name + constant.  */
1656       if (LLE_CONSTANT (lle) != 0)
1657         {
1658           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1659                         build (PLUS_EXPR, integer_type_node,
1660                                name, build_int_cst (integer_type_node,
1661                                                     LLE_CONSTANT (lle))));
1662           name = make_ssa_name (resvar, stmt);
1663           TREE_OPERAND (stmt, 0) = name;
1664           tsi = tsi_last (stmts);
1665           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1666         }
1667
1668       /* Now handle the offset.
1669          name = name + linear offset.  */
1670       if (LLE_CONSTANT (offset) != 0)
1671         {
1672           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1673                         build (PLUS_EXPR, integer_type_node,
1674                                name, build_int_cst (integer_type_node,
1675                                                     LLE_CONSTANT (offset))));
1676           name = make_ssa_name (resvar, stmt);
1677           TREE_OPERAND (stmt, 0) = name;
1678           tsi = tsi_last (stmts);
1679           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1680         }
1681
1682       /* Handle any denominator that occurs.  */
1683       if (LLE_DENOMINATOR (lle) != 1)
1684         {
1685           if (wrap == MAX_EXPR)
1686             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1687                           build (CEIL_DIV_EXPR, integer_type_node,
1688                                  name, build_int_cst (integer_type_node,
1689                                                       LLE_DENOMINATOR (lle))));
1690           else if (wrap == MIN_EXPR)
1691             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1692                           build (FLOOR_DIV_EXPR, integer_type_node,
1693                                  name, build_int_cst (integer_type_node,
1694                                                       LLE_DENOMINATOR (lle))));
1695           else
1696             abort ();
1697
1698           /* name = {ceil, floor}(name/denominator) */
1699           name = make_ssa_name (resvar, stmt);
1700           TREE_OPERAND (stmt, 0) = name;
1701           tsi = tsi_last (stmts);
1702           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1703         }
1704       VEC_safe_push (tree, results, name);
1705     }
1706
1707   /* Again, out of laziness, we don't handle this case yet.  It's not
1708      hard, it just hasn't occurred.  */
1709   if (VEC_length (tree, results) > 2)
1710     abort ();
1711
1712   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1713   if (VEC_length (tree, results) > 1)
1714     {
1715       tree op1 = VEC_index (tree, results, 0);
1716       tree op2 = VEC_index (tree, results, 1);
1717       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1718                     build (wrap, integer_type_node, op1, op2));
1719       name = make_ssa_name (resvar, stmt);
1720       TREE_OPERAND (stmt, 0) = name;
1721       tsi = tsi_last (stmts);
1722       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723     }
1724
1725   *stmts_to_insert = stmts;
1726   return name;
1727 }
1728
1729 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1730    it, back into gcc code.  This changes the
1731    loops, their induction variables, and their bodies, so that they
1732    match the transformed loopnest.
1733    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1734    loopnest.
1735    OLD_IVS is a vector of induction variables from the old loopnest.
1736    INVARIANTS is a vector of loop invariants from the old loopnest.
1737    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1738    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1739    NEW_LOOPNEST.  */
1740 void
1741 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1742                                  VEC(tree) *old_ivs,
1743                                  VEC(tree) *invariants,
1744                                  lambda_loopnest new_loopnest,
1745                                  lambda_trans_matrix transform)
1746 {
1747
1748   struct loop *temp;
1749   size_t i = 0;
1750   size_t depth = 0;
1751   VEC(tree) *new_ivs;
1752   block_stmt_iterator bsi;
1753   basic_block *bbs;
1754
1755   if (dump_file)
1756     {
1757       transform = lambda_trans_matrix_inverse (transform);
1758       fprintf (dump_file, "Inverse of transformation matrix:\n");
1759       print_lambda_trans_matrix (dump_file, transform);
1760     }
1761   temp = old_loopnest;
1762   new_ivs = VEC_alloc (tree, 1);
1763   while (temp)
1764     {
1765       temp = temp->inner;
1766       depth++;
1767     }
1768   temp = old_loopnest;
1769
1770   while (temp)
1771     {
1772       lambda_loop newloop;
1773       basic_block bb;
1774       tree ivvar, ivvarinced, exitcond, stmts;
1775       enum tree_code testtype;
1776       tree newupperbound, newlowerbound;
1777       lambda_linear_expression offset;
1778       /* First, build the new induction variable temporary  */
1779
1780       ivvar = create_tmp_var (integer_type_node, "lnivtmp");
1781       add_referenced_tmp_var (ivvar);
1782
1783       VEC_safe_push (tree, new_ivs, ivvar);
1784
1785       newloop = LN_LOOPS (new_loopnest)[i];
1786
1787       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1788          cases for now. */
1789       offset = LL_LINEAR_OFFSET (newloop);
1790
1791       if (LLE_DENOMINATOR (offset) != 1
1792           || !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth))
1793         abort ();
1794
1795       /* Now build the  new lower bounds, and insert the statements
1796          necessary to generate it on the loop preheader. */
1797       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1798                                              LL_LINEAR_OFFSET (newloop),
1799                                              new_ivs,
1800                                              invariants, MAX_EXPR, &stmts);
1801       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1802       bsi_commit_edge_inserts (NULL);
1803       /* Build the new upper bound and insert its statements in the
1804          basic block of the exit condition */
1805       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1806                                              LL_LINEAR_OFFSET (newloop),
1807                                              new_ivs,
1808                                              invariants, MIN_EXPR, &stmts);
1809       exitcond = get_loop_exit_condition (temp);
1810       bb = bb_for_stmt (exitcond);
1811       bsi = bsi_start (bb);
1812       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1813
1814       /* Create the new iv, and insert it's increment on the latch
1815          block.  */
1816
1817       bb = temp->latch->pred->src;
1818       bsi = bsi_last (bb);
1819       create_iv (newlowerbound,
1820                  build_int_cst (integer_type_node, LL_STEP (newloop)),
1821                  ivvar, temp, &bsi, false, &ivvar,
1822                  &ivvarinced);
1823
1824       /* Replace the exit condition with the new upper bound
1825          comparison.  */
1826       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1827       COND_EXPR_COND (exitcond) = build (testtype,
1828                                          boolean_type_node,
1829                                          ivvarinced, newupperbound);
1830       modify_stmt (exitcond);
1831       VEC_replace (tree, new_ivs, i, ivvar);
1832
1833       i++;
1834       temp = temp->inner;
1835     }
1836
1837   /* Go through the loop and make iv replacements.  */
1838   bbs = get_loop_body (old_loopnest);
1839   for (i = 0; i < old_loopnest->num_nodes; i++)
1840     for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1841       {
1842         tree stmt = bsi_stmt (bsi);
1843         use_optype uses;
1844         size_t j;
1845
1846         get_stmt_operands (stmt);
1847         uses = STMT_USE_OPS (stmt);
1848         for (j = 0; j < NUM_USES (uses); j++)
1849           {
1850             size_t k;
1851             use_operand_p use = USE_OP_PTR (uses, j);
1852             for (k = 0; k <  VEC_length (tree, old_ivs); k++)
1853               {
1854                 tree oldiv = VEC_index (tree, old_ivs, k);
1855                 if (USE_FROM_PTR (use) == oldiv)
1856                   {
1857                     tree newiv, stmts;
1858                     lambda_body_vector lbv;
1859
1860                     /* Compute the new expression for the induction
1861                        variable.  */
1862                     depth = VEC_length (tree, new_ivs);
1863                     lbv = lambda_body_vector_new (depth);
1864                     LBV_COEFFICIENTS (lbv)[k] = 1;
1865                     lbv = lambda_body_vector_compute_new (transform, lbv);
1866                     newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
1867
1868                     /* Insert the statements to build that
1869                        expression.  */
1870                     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1871
1872                     /* Replace the use with the result of that
1873                        expression.  */
1874                     if (dump_file)
1875                       {
1876                         fprintf (dump_file,
1877                                  "Replacing induction variable use of ");
1878                         print_generic_stmt (dump_file, USE_FROM_PTR (use), 0);
1879                         fprintf (dump_file, " with ");
1880                         print_generic_stmt (dump_file, newiv, 0);
1881                         fprintf (dump_file, "\n");
1882                       }
1883                     SET_USE (use, newiv);
1884                   }
1885               }
1886
1887           }
1888       }
1889 }
1890
1891 /* Returns true when the vector V is lexicographically positive, in
1892    other words, when the first non zero element is positive.  */
1893
1894 static bool
1895 lambda_vector_lexico_pos (lambda_vector v, unsigned n)
1896 {
1897   unsigned i;
1898   for (i = 0; i < n; i++)
1899     {
1900       if (v[i] == 0)
1901         continue;
1902       if (v[i] < 0)
1903         return false;
1904       if (v[i] > 0)
1905         return true;
1906     }
1907   return true;
1908 }
1909
1910 /* Return true if TRANS is a legal transformation matrix that respects
1911    the dependence vectors in DISTS and DIRS.  The conservative answer
1912    is false.
1913
1914    "Wolfe proves that a unimodular transformation represented by the
1915    matrix T is legal when applied to a loop nest with a set of
1916    lexicographically non-negative distance vectors RDG if and only if
1917    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1918    ie.: if and only if it transforms the lexicographically positive
1919    distance vectors to lexicographically positive vectors.  Note that
1920    a unimodular matrix must transform the zero vector (and only it) to
1921    the zero vector." S.Muchnick.  */
1922
1923 bool
1924 lambda_transform_legal_p (lambda_trans_matrix trans,
1925                           int nb_loops, varray_type dependence_relations)
1926 {
1927   unsigned int i;
1928   lambda_vector distres;
1929   struct data_dependence_relation *ddr;
1930
1931 #if defined ENABLE_CHECKING
1932   if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops)
1933     abort ();
1934 #endif
1935
1936   /* When there is an unknown relation in the dependence_relations, we
1937      know that it is no worth looking at this loop nest: give up.  */
1938   ddr = (struct data_dependence_relation *)
1939     VARRAY_GENERIC_PTR (dependence_relations, 0);
1940   if (ddr == NULL)
1941     return true;
1942   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1943     return false;
1944
1945   distres = lambda_vector_new (nb_loops);
1946
1947   /* For each distance vector in the dependence graph.  */
1948   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
1949     {
1950       ddr = (struct data_dependence_relation *)
1951         VARRAY_GENERIC_PTR (dependence_relations, i);
1952
1953       /* Don't care about relations for which we know that there is no
1954          dependence, nor about read-read (aka. output-dependences):
1955          these data accesses can happen in any order.  */
1956       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1957           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1958         continue;
1959       /* Conservatively answer: "this transformation is not valid".  */
1960       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1961         return false;
1962
1963       /* Compute trans.dist_vect */
1964       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1965                                  DDR_DIST_VECT (ddr), distres);
1966
1967       if (!lambda_vector_lexico_pos (distres, nb_loops))
1968         return false;
1969     }
1970
1971   return true;
1972 }