gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "errors.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118
 119 DEF_VEC_GC_P(int);
 120
 121 static bool perfect_nestify (struct loops *,
 122                              struct loop *, VEC (tree) *,
 123                              VEC (tree) *, VEC (int) *, VEC (tree) *);
 124 /* Lattice stuff that is internal to the code generation algorithm.  */
 125
 126 typedef struct
 127 {
 128   /* Lattice base matrix.  */
 129   lambda_matrix base;
 130   /* Lattice dimension.  */
 131   int dimension;
 132   /* Origin vector for the coefficients.  */
 133   lambda_vector origin;
 134   /* Origin matrix for the invariants.  */
 135   lambda_matrix origin_invariants;
 136   /* Number of invariants.  */
 137   int invariants;
 138 } *lambda_lattice;
 139
 140 #define LATTICE_BASE(T) ((T)->base)
 141 #define LATTICE_DIMENSION(T) ((T)->dimension)
 142 #define LATTICE_ORIGIN(T) ((T)->origin)
 143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 145
 146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 147                        int, int);
 148 static lambda_lattice lambda_lattice_new (int, int);
 149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 150
 151 static tree find_induction_var_from_exit_cond (struct loop *);
 152
 153 /* Create a new lambda body vector.  */
 154
 155 lambda_body_vector
 156 lambda_body_vector_new (int size)
 157 {
 158   lambda_body_vector ret;
 159
 160   ret = ggc_alloc (sizeof (*ret));
 161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 162   LBV_SIZE (ret) = size;
 163   LBV_DENOMINATOR (ret) = 1;
 164   return ret;
 165 }
 166
 167 /* Compute the new coefficients for the vector based on the
 168   *inverse* of the transformation matrix.  */
 169
 170 lambda_body_vector
 171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 172                                 lambda_body_vector vect)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants)
 229 {
 230   lambda_linear_expression ret;
 231
 232   ret = ggc_alloc_cleared (sizeof (*ret));
 233
 234   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 235   LLE_CONSTANT (ret) = 0;
 236   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 237   LLE_DENOMINATOR (ret) = 1;
 238   LLE_NEXT (ret) = NULL;
 239
 240   return ret;
 241 }
 242
 243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 244    The starting letter used for variable names is START.  */
 245
 246 static void
 247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 248                          char start)
 249 {
 250   int i;
 251   bool first = true;
 252   for (i = 0; i < size; i++)
 253     {
 254       if (expr[i] != 0)
 255         {
 256           if (first)
 257             {
 258               if (expr[i] < 0)
 259                 fprintf (outfile, "-");
 260               first = false;
 261             }
 262           else if (expr[i] > 0)
 263             fprintf (outfile, " + ");
 264           else
 265             fprintf (outfile, " - ");
 266           if (abs (expr[i]) == 1)
 267             fprintf (outfile, "%c", start + i);
 268           else
 269             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 270         }
 271     }
 272 }
 273
 274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 275    depth/number of coefficients is given by DEPTH, the number of invariants is
 276    given by INVARIANTS, and the character to start variable names with is given
 277    by START.  */
 278
 279 void
 280 print_lambda_linear_expression (FILE * outfile,
 281                                 lambda_linear_expression expr,
 282                                 int depth, int invariants, char start)
 283 {
 284   fprintf (outfile, "\tLinear expression: ");
 285   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 286   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 287   fprintf (outfile, "  invariants: ");
 288   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 289                            invariants, 'A');
 290   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 291 }
 292
 293 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 294    coefficients is given by DEPTH, the number of invariants is
 295    given by INVARIANTS, and the character to start variable names with is given
 296    by START.  */
 297
 298 void
 299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 300                    int invariants, char start)
 301 {
 302   int step;
 303   lambda_linear_expression expr;
 304
 305   gcc_assert (loop);
 306
 307   expr = LL_LINEAR_OFFSET (loop);
 308   step = LL_STEP (loop);
 309   fprintf (outfile, "  step size = %d \n", step);
 310
 311   if (expr)
 312     {
 313       fprintf (outfile, "  linear offset: \n");
 314       print_lambda_linear_expression (outfile, expr, depth, invariants,
 315                                       start);
 316     }
 317
 318   fprintf (outfile, "  lower bound: \n");
 319   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 321   fprintf (outfile, "  upper bound: \n");
 322   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 323     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 324 }
 325
 326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 327    number of invariants.  */
 328
 329 lambda_loopnest
 330 lambda_loopnest_new (int depth, int invariants)
 331 {
 332   lambda_loopnest ret;
 333   ret = ggc_alloc (sizeof (*ret));
 334
 335   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 336   LN_DEPTH (ret) = depth;
 337   LN_INVARIANTS (ret) = invariants;
 338
 339   return ret;
 340 }
 341
 342 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 343    character to use for loop names is given by START.  */
 344
 345 void
 346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 347 {
 348   int i;
 349   for (i = 0; i < LN_DEPTH (nest); i++)
 350     {
 351       fprintf (outfile, "Loop %c\n", start + i);
 352       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 353                          LN_INVARIANTS (nest), 'i');
 354       fprintf (outfile, "\n");
 355     }
 356 }
 357
 358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 359    of invariants.  */
 360
 361 static lambda_lattice
 362 lambda_lattice_new (int depth, int invariants)
 363 {
 364   lambda_lattice ret;
 365   ret = ggc_alloc (sizeof (*ret));
 366   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 367   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 368   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 369   LATTICE_DIMENSION (ret) = depth;
 370   LATTICE_INVARIANTS (ret) = invariants;
 371   return ret;
 372 }
 373
 374 /* Compute the lattice base for NEST.  The lattice base is essentially a
 375    non-singular transform from a dense base space to a sparse iteration space.
 376    We use it so that we don't have to specially handle the case of a sparse
 377    iteration space in other parts of the algorithm.  As a result, this routine
 378    only does something interesting (IE produce a matrix that isn't the
 379    identity matrix) if NEST is a sparse space.  */
 380
 381 static lambda_lattice
 382 lambda_lattice_compute_base (lambda_loopnest nest)
 383 {
 384   lambda_lattice ret;
 385   int depth, invariants;
 386   lambda_matrix base;
 387
 388   int i, j, step;
 389   lambda_loop loop;
 390   lambda_linear_expression expression;
 391
 392   depth = LN_DEPTH (nest);
 393   invariants = LN_INVARIANTS (nest);
 394
 395   ret = lambda_lattice_new (depth, invariants);
 396   base = LATTICE_BASE (ret);
 397   for (i = 0; i < depth; i++)
 398     {
 399       loop = LN_LOOPS (nest)[i];
 400       gcc_assert (loop);
 401       step = LL_STEP (loop);
 402       /* If we have a step of 1, then the base is one, and the
 403          origin and invariant coefficients are 0.  */
 404       if (step == 1)
 405         {
 406           for (j = 0; j < depth; j++)
 407             base[i][j] = 0;
 408           base[i][i] = 1;
 409           LATTICE_ORIGIN (ret)[i] = 0;
 410           for (j = 0; j < invariants; j++)
 411             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 412         }
 413       else
 414         {
 415           /* Otherwise, we need the lower bound expression (which must
 416              be an affine function)  to determine the base.  */
 417           expression = LL_LOWER_BOUND (loop);
 418           gcc_assert (expression && !LLE_NEXT (expression)
 419                       && LLE_DENOMINATOR (expression) == 1);
 420
 421           /* The lower triangular portion of the base is going to be the
 422              coefficient times the step */
 423           for (j = 0; j < i; j++)
 424             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 425               * LL_STEP (LN_LOOPS (nest)[j]);
 426           base[i][i] = step;
 427           for (j = i + 1; j < depth; j++)
 428             base[i][j] = 0;
 429
 430           /* Origin for this loop is the constant of the lower bound
 431              expression.  */
 432           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 433
 434           /* Coefficient for the invariants are equal to the invariant
 435              coefficients in the expression.  */
 436           for (j = 0; j < invariants; j++)
 437             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 438               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 439         }
 440     }
 441   return ret;
 442 }
 443
 444 /* Compute the greatest common denominator of two numbers (A and B) using
 445    Euclid's algorithm.  */
 446
 447 static int
 448 gcd (int a, int b)
 449 {
 450
 451   int x, y, z;
 452
 453   x = abs (a);
 454   y = abs (b);
 455
 456   while (x > 0)
 457     {
 458       z = y % x;
 459       y = x;
 460       x = z;
 461     }
 462
 463   return (y);
 464 }
 465
 466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 467
 468 static int
 469 gcd_vector (lambda_vector vector, int size)
 470 {
 471   int i;
 472   int gcd1 = 0;
 473
 474   if (size > 0)
 475     {
 476       gcd1 = vector[0];
 477       for (i = 1; i < size; i++)
 478         gcd1 = gcd (gcd1, vector[i]);
 479     }
 480   return gcd1;
 481 }
 482
 483 /* Compute the least common multiple of two numbers A and B .  */
 484
 485 static int
 486 lcm (int a, int b)
 487 {
 488   return (abs (a) * abs (b) / gcd (a, b));
 489 }
 490
 491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 492    auxiliary nest.
 493    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 494    it is easy to calculate the answer and bounds.
 495    A sketch of how it works:
 496    Given a system of linear inequalities, ai * xj >= bk, you can always
 497    rewrite the constraints so they are all of the form
 498    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 499    in b1 ... bk, and some a in a1...ai)
 500    You can then eliminate this x from the non-constant inequalities by
 501    rewriting these as a <= b, x >= constant, and delete the x variable.
 502    You can then repeat this for any remaining x variables, and then we have
 503    an easy to use variable <= constant (or no variables at all) form that we
 504    can construct our bounds from.
 505
 506    In our case, each time we eliminate, we construct part of the bound from
 507    the ith variable, then delete the ith variable.
 508
 509    Remember the constant are in our vector a, our coefficient matrix is A,
 510    and our invariant coefficient matrix is B.
 511
 512    SIZE is the size of the matrices being passed.
 513    DEPTH is the loop nest depth.
 514    INVARIANTS is the number of loop invariants.
 515    A, B, and a are the coefficient matrix, invariant coefficient, and a
 516    vector of constants, respectively.  */
 517
 518 static lambda_loopnest
 519 compute_nest_using_fourier_motzkin (int size,
 520                                     int depth,
 521                                     int invariants,
 522                                     lambda_matrix A,
 523                                     lambda_matrix B,
 524                                     lambda_vector a)
 525 {
 526
 527   int multiple, f1, f2;
 528   int i, j, k;
 529   lambda_linear_expression expression;
 530   lambda_loop loop;
 531   lambda_loopnest auxillary_nest;
 532   lambda_matrix swapmatrix, A1, B1;
 533   lambda_vector swapvector, a1;
 534   int newsize;
 535
 536   A1 = lambda_matrix_new (128, depth);
 537   B1 = lambda_matrix_new (128, invariants);
 538   a1 = lambda_vector_new (128);
 539
 540   auxillary_nest = lambda_loopnest_new (depth, invariants);
 541
 542   for (i = depth - 1; i >= 0; i--)
 543     {
 544       loop = lambda_loop_new ();
 545       LN_LOOPS (auxillary_nest)[i] = loop;
 546       LL_STEP (loop) = 1;
 547
 548       for (j = 0; j < size; j++)
 549         {
 550           if (A[j][i] < 0)
 551             {
 552               /* Any linear expression in the matrix with a coefficient less
 553                  than 0 becomes part of the new lower bound.  */
 554               expression = lambda_linear_expression_new (depth, invariants);
 555
 556               for (k = 0; k < i; k++)
 557                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 558
 559               for (k = 0; k < invariants; k++)
 560                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 561
 562               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 563               LLE_CONSTANT (expression) = -1 * a[j];
 564
 565               /* Ignore if identical to the existing lower bound.  */
 566               if (!lle_equal (LL_LOWER_BOUND (loop),
 567                               expression, depth, invariants))
 568                 {
 569                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 570                   LL_LOWER_BOUND (loop) = expression;
 571                 }
 572
 573             }
 574           else if (A[j][i] > 0)
 575             {
 576               /* Any linear expression with a coefficient greater than 0
 577                  becomes part of the new upper bound.  */
 578               expression = lambda_linear_expression_new (depth, invariants);
 579               for (k = 0; k < i; k++)
 580                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 581
 582               for (k = 0; k < invariants; k++)
 583                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 584
 585               LLE_DENOMINATOR (expression) = A[j][i];
 586               LLE_CONSTANT (expression) = a[j];
 587
 588               /* Ignore if identical to the existing upper bound.  */
 589               if (!lle_equal (LL_UPPER_BOUND (loop),
 590                               expression, depth, invariants))
 591                 {
 592                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 593                   LL_UPPER_BOUND (loop) = expression;
 594                 }
 595
 596             }
 597         }
 598
 599       /* This portion creates a new system of linear inequalities by deleting
 600          the i'th variable, reducing the system by one variable.  */
 601       newsize = 0;
 602       for (j = 0; j < size; j++)
 603         {
 604           /* If the coefficient for the i'th variable is 0, then we can just
 605              eliminate the variable straightaway.  Otherwise, we have to
 606              multiply through by the coefficients we are eliminating.  */
 607           if (A[j][i] == 0)
 608             {
 609               lambda_vector_copy (A[j], A1[newsize], depth);
 610               lambda_vector_copy (B[j], B1[newsize], invariants);
 611               a1[newsize] = a[j];
 612               newsize++;
 613             }
 614           else if (A[j][i] > 0)
 615             {
 616               for (k = 0; k < size; k++)
 617                 {
 618                   if (A[k][i] < 0)
 619                     {
 620                       multiple = lcm (A[j][i], A[k][i]);
 621                       f1 = multiple / A[j][i];
 622                       f2 = -1 * multiple / A[k][i];
 623
 624                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 625                                             A1[newsize], depth);
 626                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 627                                             B1[newsize], invariants);
 628                       a1[newsize] = f1 * a[j] + f2 * a[k];
 629                       newsize++;
 630                     }
 631                 }
 632             }
 633         }
 634
 635       swapmatrix = A;
 636       A = A1;
 637       A1 = swapmatrix;
 638
 639       swapmatrix = B;
 640       B = B1;
 641       B1 = swapmatrix;
 642
 643       swapvector = a;
 644       a = a1;
 645       a1 = swapvector;
 646
 647       size = newsize;
 648     }
 649
 650   return auxillary_nest;
 651 }
 652
 653 /* Compute the loop bounds for the auxiliary space NEST.
 654    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 655    Given the original nest, this function will
 656    1. Convert the nest into matrix form, which consists of a matrix for the
 657    coefficients, a matrix for the
 658    invariant coefficients, and a vector for the constants.
 659    2. Use the matrix form to calculate the lattice base for the nest (which is
 660    a dense space)
 661    3. Compose the dense space transform with the user specified transform, to
 662    get a transform we can easily calculate transformed bounds for.
 663    4. Multiply the composed transformation matrix times the matrix form of the
 664    loop.
 665    5. Transform the newly created matrix (from step 4) back into a loop nest
 666    using fourier motzkin elimination to figure out the bounds.  */
 667
 668 static lambda_loopnest
 669 lambda_compute_auxillary_space (lambda_loopnest nest,
 670                                 lambda_trans_matrix trans)
 671 {
 672   lambda_matrix A, B, A1, B1;
 673   lambda_vector a, a1;
 674   lambda_matrix invertedtrans;
 675   int depth, invariants, size;
 676   int i, j;
 677   lambda_loop loop;
 678   lambda_linear_expression expression;
 679   lambda_lattice lattice;
 680
 681   depth = LN_DEPTH (nest);
 682   invariants = LN_INVARIANTS (nest);
 683
 684   /* Unfortunately, we can't know the number of constraints we'll have
 685      ahead of time, but this should be enough even in ridiculous loop nest
 686      cases. We abort if we go over this limit.  */
 687   A = lambda_matrix_new (128, depth);
 688   B = lambda_matrix_new (128, invariants);
 689   a = lambda_vector_new (128);
 690
 691   A1 = lambda_matrix_new (128, depth);
 692   B1 = lambda_matrix_new (128, invariants);
 693   a1 = lambda_vector_new (128);
 694
 695   /* Store the bounds in the equation matrix A, constant vector a, and
 696      invariant matrix B, so that we have Ax <= a + B.
 697      This requires a little equation rearranging so that everything is on the
 698      correct side of the inequality.  */
 699   size = 0;
 700   for (i = 0; i < depth; i++)
 701     {
 702       loop = LN_LOOPS (nest)[i];
 703
 704       /* First we do the lower bound.  */
 705       if (LL_STEP (loop) > 0)
 706         expression = LL_LOWER_BOUND (loop);
 707       else
 708         expression = LL_UPPER_BOUND (loop);
 709
 710       for (; expression != NULL; expression = LLE_NEXT (expression))
 711         {
 712           /* Fill in the coefficient.  */
 713           for (j = 0; j < i; j++)
 714             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 715
 716           /* And the invariant coefficient.  */
 717           for (j = 0; j < invariants; j++)
 718             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 719
 720           /* And the constant.  */
 721           a[size] = LLE_CONSTANT (expression);
 722
 723           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 724              constants and single variables on   */
 725           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 726           a[size] *= -1;
 727           for (j = 0; j < invariants; j++)
 728             B[size][j] *= -1;
 729
 730           size++;
 731           /* Need to increase matrix sizes above.  */
 732           gcc_assert (size <= 127);
 733
 734         }
 735
 736       /* Then do the exact same thing for the upper bounds.  */
 737       if (LL_STEP (loop) > 0)
 738         expression = LL_UPPER_BOUND (loop);
 739       else
 740         expression = LL_LOWER_BOUND (loop);
 741
 742       for (; expression != NULL; expression = LLE_NEXT (expression))
 743         {
 744           /* Fill in the coefficient.  */
 745           for (j = 0; j < i; j++)
 746             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 747
 748           /* And the invariant coefficient.  */
 749           for (j = 0; j < invariants; j++)
 750             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 751
 752           /* And the constant.  */
 753           a[size] = LLE_CONSTANT (expression);
 754
 755           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 756           for (j = 0; j < i; j++)
 757             A[size][j] *= -1;
 758           A[size][i] = LLE_DENOMINATOR (expression);
 759           size++;
 760           /* Need to increase matrix sizes above.  */
 761           gcc_assert (size <= 127);
 762
 763         }
 764     }
 765
 766   /* Compute the lattice base x = base * y + origin, where y is the
 767      base space.  */
 768   lattice = lambda_lattice_compute_base (nest);
 769
 770   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 771
 772   /* A1 = A * L */
 773   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 774
 775   /* a1 = a - A * origin constant.  */
 776   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 777   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 778
 779   /* B1 = B - A * origin invariant.  */
 780   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 781                       invariants);
 782   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 783
 784   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 785      it by A1, then performing fourier motzkin.  */
 786
 787   invertedtrans = lambda_matrix_new (depth, depth);
 788
 789   /* Compute the inverse of U.  */
 790   lambda_matrix_inverse (LTM_MATRIX (trans),
 791                          invertedtrans, depth);
 792
 793   /* A = A1 inv(U).  */
 794   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 795
 796   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 797                                              A, B1, a1);
 798 }
 799
 800 /* Compute the loop bounds for the target space, using the bounds of
 801    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 802    The target space loop bounds are computed by multiplying the triangular
 803    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 804    the loop steps (positive or negative) is then used to swap the bounds if
 805    the loop counts downwards.
 806    Return the target loopnest.  */
 807
 808 static lambda_loopnest
 809 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 810                              lambda_trans_matrix H, lambda_vector stepsigns)
 811 {
 812   lambda_matrix inverse, H1;
 813   int determinant, i, j;
 814   int gcd1, gcd2;
 815   int factor;
 816
 817   lambda_loopnest target_nest;
 818   int depth, invariants;
 819   lambda_matrix target;
 820
 821   lambda_loop auxillary_loop, target_loop;
 822   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 823
 824   depth = LN_DEPTH (auxillary_nest);
 825   invariants = LN_INVARIANTS (auxillary_nest);
 826
 827   inverse = lambda_matrix_new (depth, depth);
 828   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 829
 830   /* H1 is H excluding its diagonal.  */
 831   H1 = lambda_matrix_new (depth, depth);
 832   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 833
 834   for (i = 0; i < depth; i++)
 835     H1[i][i] = 0;
 836
 837   /* Computes the linear offsets of the loop bounds.  */
 838   target = lambda_matrix_new (depth, depth);
 839   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 840
 841   target_nest = lambda_loopnest_new (depth, invariants);
 842
 843   for (i = 0; i < depth; i++)
 844     {
 845
 846       /* Get a new loop structure.  */
 847       target_loop = lambda_loop_new ();
 848       LN_LOOPS (target_nest)[i] = target_loop;
 849
 850       /* Computes the gcd of the coefficients of the linear part.  */
 851       gcd1 = gcd_vector (target[i], i);
 852
 853       /* Include the denominator in the GCD.  */
 854       gcd1 = gcd (gcd1, determinant);
 855
 856       /* Now divide through by the gcd.  */
 857       for (j = 0; j < i; j++)
 858         target[i][j] = target[i][j] / gcd1;
 859
 860       expression = lambda_linear_expression_new (depth, invariants);
 861       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 862       LLE_DENOMINATOR (expression) = determinant / gcd1;
 863       LLE_CONSTANT (expression) = 0;
 864       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 865                            invariants);
 866       LL_LINEAR_OFFSET (target_loop) = expression;
 867     }
 868
 869   /* For each loop, compute the new bounds from H.  */
 870   for (i = 0; i < depth; i++)
 871     {
 872       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 873       target_loop = LN_LOOPS (target_nest)[i];
 874       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 875       factor = LTM_MATRIX (H)[i][i];
 876
 877       /* First we do the lower bound.  */
 878       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 879
 880       for (; auxillary_expr != NULL;
 881            auxillary_expr = LLE_NEXT (auxillary_expr))
 882         {
 883           target_expr = lambda_linear_expression_new (depth, invariants);
 884           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 885                                      depth, inverse, depth,
 886                                      LLE_COEFFICIENTS (target_expr));
 887           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 888                                     LLE_COEFFICIENTS (target_expr), depth,
 889                                     factor);
 890
 891           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 892           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 893                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 894                               invariants);
 895           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 896                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     invariants, factor);
 898           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 899
 900           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 901             {
 902               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 903                 * determinant;
 904               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 905                                         (target_expr),
 906                                         LLE_INVARIANT_COEFFICIENTS
 907                                         (target_expr), invariants,
 908                                         determinant);
 909               LLE_DENOMINATOR (target_expr) =
 910                 LLE_DENOMINATOR (target_expr) * determinant;
 911             }
 912           /* Find the gcd and divide by it here, rather than doing it
 913              at the tree level.  */
 914           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 915           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 916                              invariants);
 917           gcd1 = gcd (gcd1, gcd2);
 918           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 919           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 920           for (j = 0; j < depth; j++)
 921             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 922           for (j = 0; j < invariants; j++)
 923             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 924           LLE_CONSTANT (target_expr) /= gcd1;
 925           LLE_DENOMINATOR (target_expr) /= gcd1;
 926           /* Ignore if identical to existing bound.  */
 927           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 928                           invariants))
 929             {
 930               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 931               LL_LOWER_BOUND (target_loop) = target_expr;
 932             }
 933         }
 934       /* Now do the upper bound.  */
 935       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 936
 937       for (; auxillary_expr != NULL;
 938            auxillary_expr = LLE_NEXT (auxillary_expr))
 939         {
 940           target_expr = lambda_linear_expression_new (depth, invariants);
 941           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 942                                      depth, inverse, depth,
 943                                      LLE_COEFFICIENTS (target_expr));
 944           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 945                                     LLE_COEFFICIENTS (target_expr), depth,
 946                                     factor);
 947           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 948           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 949                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 950                               invariants);
 951           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 952                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     invariants, factor);
 954           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 955
 956           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 957             {
 958               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 959                 * determinant;
 960               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 961                                         (target_expr),
 962                                         LLE_INVARIANT_COEFFICIENTS
 963                                         (target_expr), invariants,
 964                                         determinant);
 965               LLE_DENOMINATOR (target_expr) =
 966                 LLE_DENOMINATOR (target_expr) * determinant;
 967             }
 968           /* Find the gcd and divide by it here, instead of at the
 969              tree level.  */
 970           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 971           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 972                              invariants);
 973           gcd1 = gcd (gcd1, gcd2);
 974           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 975           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 976           for (j = 0; j < depth; j++)
 977             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 978           for (j = 0; j < invariants; j++)
 979             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 980           LLE_CONSTANT (target_expr) /= gcd1;
 981           LLE_DENOMINATOR (target_expr) /= gcd1;
 982           /* Ignore if equal to existing bound.  */
 983           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 984                           invariants))
 985             {
 986               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 987               LL_UPPER_BOUND (target_loop) = target_expr;
 988             }
 989         }
 990     }
 991   for (i = 0; i < depth; i++)
 992     {
 993       target_loop = LN_LOOPS (target_nest)[i];
 994       /* If necessary, exchange the upper and lower bounds and negate
 995          the step size.  */
 996       if (stepsigns[i] < 0)
 997         {
 998           LL_STEP (target_loop) *= -1;
 999           tmp_expr = LL_LOWER_BOUND (target_loop);
1000           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1001           LL_UPPER_BOUND (target_loop) = tmp_expr;
1002         }
1003     }
1004   return target_nest;
1005 }
1006
1007 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1008    result.  */
1009
1010 static lambda_vector
1011 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1012 {
1013   lambda_matrix matrix, H;
1014   int size;
1015   lambda_vector newsteps;
1016   int i, j, factor, minimum_column;
1017   int temp;
1018
1019   matrix = LTM_MATRIX (trans);
1020   size = LTM_ROWSIZE (trans);
1021   H = lambda_matrix_new (size, size);
1022
1023   newsteps = lambda_vector_new (size);
1024   lambda_vector_copy (stepsigns, newsteps, size);
1025
1026   lambda_matrix_copy (matrix, H, size, size);
1027
1028   for (j = 0; j < size; j++)
1029     {
1030       lambda_vector row;
1031       row = H[j];
1032       for (i = j; i < size; i++)
1033         if (row[i] < 0)
1034           lambda_matrix_col_negate (H, size, i);
1035       while (lambda_vector_first_nz (row, size, j + 1) < size)
1036         {
1037           minimum_column = lambda_vector_min_nz (row, size, j);
1038           lambda_matrix_col_exchange (H, size, j, minimum_column);
1039
1040           temp = newsteps[j];
1041           newsteps[j] = newsteps[minimum_column];
1042           newsteps[minimum_column] = temp;
1043
1044           for (i = j + 1; i < size; i++)
1045             {
1046               factor = row[i] / row[j];
1047               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1048             }
1049         }
1050     }
1051   return newsteps;
1052 }
1053
1054 /* Transform NEST according to TRANS, and return the new loopnest.
1055    This involves
1056    1. Computing a lattice base for the transformation
1057    2. Composing the dense base with the specified transformation (TRANS)
1058    3. Decomposing the combined transformation into a lower triangular portion,
1059    and a unimodular portion.
1060    4. Computing the auxiliary nest using the unimodular portion.
1061    5. Computing the target nest using the auxiliary nest and the lower
1062    triangular portion.  */
1063
1064 lambda_loopnest
1065 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1066 {
1067   lambda_loopnest auxillary_nest, target_nest;
1068
1069   int depth, invariants;
1070   int i, j;
1071   lambda_lattice lattice;
1072   lambda_trans_matrix trans1, H, U;
1073   lambda_loop loop;
1074   lambda_linear_expression expression;
1075   lambda_vector origin;
1076   lambda_matrix origin_invariants;
1077   lambda_vector stepsigns;
1078   int f;
1079
1080   depth = LN_DEPTH (nest);
1081   invariants = LN_INVARIANTS (nest);
1082
1083   /* Keep track of the signs of the loop steps.  */
1084   stepsigns = lambda_vector_new (depth);
1085   for (i = 0; i < depth; i++)
1086     {
1087       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1088         stepsigns[i] = 1;
1089       else
1090         stepsigns[i] = -1;
1091     }
1092
1093   /* Compute the lattice base.  */
1094   lattice = lambda_lattice_compute_base (nest);
1095   trans1 = lambda_trans_matrix_new (depth, depth);
1096
1097   /* Multiply the transformation matrix by the lattice base.  */
1098
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1100                       LTM_MATRIX (trans1), depth, depth, depth);
1101
1102   /* Compute the Hermite normal form for the new transformation matrix.  */
1103   H = lambda_trans_matrix_new (depth, depth);
1104   U = lambda_trans_matrix_new (depth, depth);
1105   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1106                          LTM_MATRIX (U));
1107
1108   /* Compute the auxiliary loop nest's space from the unimodular
1109      portion.  */
1110   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1111
1112   /* Compute the loop step signs from the old step signs and the
1113      transformation matrix.  */
1114   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1115
1116   /* Compute the target loop nest space from the auxiliary nest and
1117      the lower triangular matrix H.  */
1118   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1119   origin = lambda_vector_new (depth);
1120   origin_invariants = lambda_matrix_new (depth, invariants);
1121   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1122                              LATTICE_ORIGIN (lattice), origin);
1123   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1124                       origin_invariants, depth, depth, invariants);
1125
1126   for (i = 0; i < depth; i++)
1127     {
1128       loop = LN_LOOPS (target_nest)[i];
1129       expression = LL_LINEAR_OFFSET (loop);
1130       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1131         f = 1;
1132       else
1133         f = LLE_DENOMINATOR (expression);
1134
1135       LLE_CONSTANT (expression) += f * origin[i];
1136
1137       for (j = 0; j < invariants; j++)
1138         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1139           f * origin_invariants[i][j];
1140     }
1141
1142   return target_nest;
1143
1144 }
1145
1146 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1147    return the new expression.  DEPTH is the depth of the loopnest.
1148    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1149    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1150    is the amount we have to add/subtract from the expression because of the
1151    type of comparison it is used in.  */
1152
1153 static lambda_linear_expression
1154 gcc_tree_to_linear_expression (int depth, tree expr,
1155                                VEC(tree) *outerinductionvars,
1156                                VEC(tree) *invariants, int extra)
1157 {
1158   lambda_linear_expression lle = NULL;
1159   switch (TREE_CODE (expr))
1160     {
1161     case INTEGER_CST:
1162       {
1163         lle = lambda_linear_expression_new (depth, 2 * depth);
1164         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1165         if (extra != 0)
1166           LLE_CONSTANT (lle) += extra;
1167
1168         LLE_DENOMINATOR (lle) = 1;
1169       }
1170       break;
1171     case SSA_NAME:
1172       {
1173         tree iv, invar;
1174         size_t i;
1175         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1176           if (iv != NULL)
1177             {
1178               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1179                 {
1180                   lle = lambda_linear_expression_new (depth, 2 * depth);
1181                   LLE_COEFFICIENTS (lle)[i] = 1;
1182                   if (extra != 0)
1183                     LLE_CONSTANT (lle) = extra;
1184
1185                   LLE_DENOMINATOR (lle) = 1;
1186                 }
1187             }
1188         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1189           if (invar != NULL)
1190             {
1191               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1192                 {
1193                   lle = lambda_linear_expression_new (depth, 2 * depth);
1194                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1195                   if (extra != 0)
1196                     LLE_CONSTANT (lle) = extra;
1197                   LLE_DENOMINATOR (lle) = 1;
1198                 }
1199             }
1200       }
1201       break;
1202     default:
1203       return NULL;
1204     }
1205
1206   return lle;
1207 }
1208
1209 /* Return the depth of the loopnest NEST */
1210
1211 static int
1212 depth_of_nest (struct loop *nest)
1213 {
1214   size_t depth = 0;
1215   while (nest)
1216     {
1217       depth++;
1218       nest = nest->inner;
1219     }
1220   return depth;
1221 }
1222
1223
1224 /* Return true if OP is invariant in LOOP and all outer loops.  */
1225
1226 static bool
1227 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1228 {
1229   if (is_gimple_min_invariant (op))
1230     return true;
1231   if (loop->depth == 0)
1232     return true;
1233   if (!expr_invariant_in_loop_p (loop, op))
1234     return false;
1235   if (loop->outer
1236       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1237     return false;
1238   return true;
1239 }
1240
1241 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1242    or NULL if it could not be converted.
1243    DEPTH is the depth of the loop.
1244    INVARIANTS is a pointer to the array of loop invariants.
1245    The induction variable for this loop should be stored in the parameter
1246    OURINDUCTIONVAR.
1247    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1248
1249 static lambda_loop
1250 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1251                          VEC (tree) ** invariants,
1252                          tree * ourinductionvar,
1253                          VEC (tree) * outerinductionvars,
1254                          VEC (tree) ** lboundvars,
1255                          VEC (tree) ** uboundvars,
1256                          VEC (int) ** steps)
1257 {
1258   tree phi;
1259   tree exit_cond;
1260   tree access_fn, inductionvar;
1261   tree step;
1262   lambda_loop lloop = NULL;
1263   lambda_linear_expression lbound, ubound;
1264   tree test;
1265   int stepint;
1266   int extra = 0;
1267   tree lboundvar, uboundvar, uboundresult;
1268   use_optype uses;
1269
1270   /* Find out induction var and exit condition.  */
1271   inductionvar = find_induction_var_from_exit_cond (loop);
1272   exit_cond = get_loop_exit_condition (loop);
1273
1274   if (inductionvar == NULL || exit_cond == NULL)
1275     {
1276       if (dump_file && (dump_flags & TDF_DETAILS))
1277         fprintf (dump_file,
1278                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1279       return NULL;
1280     }
1281
1282   test = TREE_OPERAND (exit_cond, 0);
1283
1284   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1285     {
1286
1287       if (dump_file && (dump_flags & TDF_DETAILS))
1288         fprintf (dump_file,
1289                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
1291       return NULL;
1292     }
1293
1294   phi = SSA_NAME_DEF_STMT (inductionvar);
1295   if (TREE_CODE (phi) != PHI_NODE)
1296     {
1297       uses = STMT_USE_OPS (phi);
1298
1299       if (!uses)
1300         {
1301
1302           if (dump_file && (dump_flags & TDF_DETAILS))
1303             fprintf (dump_file,
1304                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1305
1306           return NULL;
1307         }
1308
1309       phi = USE_OP (uses, 0);
1310       phi = SSA_NAME_DEF_STMT (phi);
1311       if (TREE_CODE (phi) != PHI_NODE)
1312         {
1313
1314           if (dump_file && (dump_flags & TDF_DETAILS))
1315             fprintf (dump_file,
1316                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1317           return NULL;
1318         }
1319
1320     }
1321
1322   /* The induction variable name/version we want to put in the array is the
1323      result of the induction variable phi node.  */
1324   *ourinductionvar = PHI_RESULT (phi);
1325   access_fn = instantiate_parameters
1326     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1327   if (access_fn == chrec_dont_know)
1328     {
1329       if (dump_file && (dump_flags & TDF_DETAILS))
1330         fprintf (dump_file,
1331                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1332
1333       return NULL;
1334     }
1335
1336   step = evolution_part_in_loop_num (access_fn, loop->num);
1337   if (!step || step == chrec_dont_know)
1338     {
1339       if (dump_file && (dump_flags & TDF_DETAILS))
1340         fprintf (dump_file,
1341                  "Unable to convert loop: Cannot determine step of loop.\n");
1342
1343       return NULL;
1344     }
1345   if (TREE_CODE (step) != INTEGER_CST)
1346     {
1347
1348       if (dump_file && (dump_flags & TDF_DETAILS))
1349         fprintf (dump_file,
1350                  "Unable to convert loop: Step of loop is not integer.\n");
1351       return NULL;
1352     }
1353
1354   stepint = TREE_INT_CST_LOW (step);
1355
1356   /* Only want phis for induction vars, which will have two
1357      arguments.  */
1358   if (PHI_NUM_ARGS (phi) != 2)
1359     {
1360       if (dump_file && (dump_flags & TDF_DETAILS))
1361         fprintf (dump_file,
1362                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1363       return NULL;
1364     }
1365
1366   /* Another induction variable check. One argument's source should be
1367      in the loop, one outside the loop.  */
1368   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1369       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1370     {
1371
1372       if (dump_file && (dump_flags & TDF_DETAILS))
1373         fprintf (dump_file,
1374                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1375
1376       return NULL;
1377     }
1378
1379   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1380     {
1381       lboundvar = PHI_ARG_DEF (phi, 1);
1382       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1383                                               outerinductionvars, *invariants,
1384                                               0);
1385     }
1386   else
1387     {
1388       lboundvar = PHI_ARG_DEF (phi, 0);
1389       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1390                                               outerinductionvars, *invariants,
1391                                               0);
1392     }
1393
1394   if (!lbound)
1395     {
1396
1397       if (dump_file && (dump_flags & TDF_DETAILS))
1398         fprintf (dump_file,
1399                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1400
1401       return NULL;
1402     }
1403   /* One part of the test may be a loop invariant tree.  */
1404   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1405       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1406     VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1407   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1408            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1409     VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1410
1411   /* The non-induction variable part of the test is the upper bound variable.
1412    */
1413   if (TREE_OPERAND (test, 0) == inductionvar)
1414     uboundvar = TREE_OPERAND (test, 1);
1415   else
1416     uboundvar = TREE_OPERAND (test, 0);
1417
1418
1419   /* We only size the vectors assuming we have, at max, 2 times as many
1420      invariants as we do loops (one for each bound).
1421      This is just an arbitrary number, but it has to be matched against the
1422      code below.  */
1423   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1424
1425
1426   /* We might have some leftover.  */
1427   if (TREE_CODE (test) == LT_EXPR)
1428     extra = -1 * stepint;
1429   else if (TREE_CODE (test) == NE_EXPR)
1430     extra = -1 * stepint;
1431   else if (TREE_CODE (test) == GT_EXPR)
1432     extra = -1 * stepint;
1433   else if (TREE_CODE (test) == EQ_EXPR)
1434     extra = 1 * stepint;
1435
1436   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1437                                           outerinductionvars,
1438                                           *invariants, extra);
1439   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1440                         build_int_cst (TREE_TYPE (uboundvar), extra));
1441   VEC_safe_push (tree, *uboundvars, uboundresult);
1442   VEC_safe_push (tree, *lboundvars, lboundvar);
1443   VEC_safe_push (int, *steps, stepint);
1444   if (!ubound)
1445     {
1446       if (dump_file && (dump_flags & TDF_DETAILS))
1447         fprintf (dump_file,
1448                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1449       return NULL;
1450     }
1451
1452   lloop = lambda_loop_new ();
1453   LL_STEP (lloop) = stepint;
1454   LL_LOWER_BOUND (lloop) = lbound;
1455   LL_UPPER_BOUND (lloop) = ubound;
1456   return lloop;
1457 }
1458
1459 /* Given a LOOP, find the induction variable it is testing against in the exit
1460    condition.  Return the induction variable if found, NULL otherwise.  */
1461
1462 static tree
1463 find_induction_var_from_exit_cond (struct loop *loop)
1464 {
1465   tree expr = get_loop_exit_condition (loop);
1466   tree ivarop;
1467   tree test;
1468   if (expr == NULL_TREE)
1469     return NULL_TREE;
1470   if (TREE_CODE (expr) != COND_EXPR)
1471     return NULL_TREE;
1472   test = TREE_OPERAND (expr, 0);
1473   if (!COMPARISON_CLASS_P (test))
1474     return NULL_TREE;
1475
1476   /* Find the side that is invariant in this loop. The ivar must be the other
1477      side.  */
1478
1479   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1480       ivarop = TREE_OPERAND (test, 1);
1481   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1482       ivarop = TREE_OPERAND (test, 0);
1483   else
1484     return NULL_TREE;
1485
1486   if (TREE_CODE (ivarop) != SSA_NAME)
1487     return NULL_TREE;
1488   return ivarop;
1489 }
1490
1491 DEF_VEC_GC_P(lambda_loop);
1492 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1493    Return the new loop nest.
1494    INDUCTIONVARS is a pointer to an array of induction variables for the
1495    loopnest that will be filled in during this process.
1496    INVARIANTS is a pointer to an array of invariants that will be filled in
1497    during this process.  */
1498
1499 lambda_loopnest
1500 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1501                                  struct loop * loop_nest,
1502                                  VEC (tree) **inductionvars,
1503                                  VEC (tree) **invariants,
1504                                  bool need_perfect_nest)
1505 {
1506   lambda_loopnest ret;
1507   struct loop *temp;
1508   int depth = 0;
1509   size_t i;
1510   VEC (lambda_loop) *loops = NULL;
1511   VEC (tree) *uboundvars = NULL;
1512   VEC (tree) *lboundvars  = NULL;
1513   VEC (int) *steps = NULL;
1514   lambda_loop newloop;
1515   tree inductionvar = NULL;
1516
1517   depth = depth_of_nest (loop_nest);
1518   temp = loop_nest;
1519   while (temp)
1520     {
1521       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1522                                          &inductionvar, *inductionvars,
1523                                          &lboundvars, &uboundvars,
1524                                          &steps);
1525       if (!newloop)
1526         return NULL;
1527       VEC_safe_push (tree, *inductionvars, inductionvar);
1528       VEC_safe_push (lambda_loop, loops, newloop);
1529       temp = temp->inner;
1530     }
1531   if (need_perfect_nest)
1532     {
1533       if (!perfect_nestify (currloops, loop_nest,
1534                             lboundvars, uboundvars, steps, *inductionvars))
1535         {
1536           if (dump_file)
1537             fprintf (dump_file, "Not a perfect loop nest and couldn't convert to one.\n");
1538           return NULL;
1539         }
1540       else if (dump_file)
1541         fprintf (dump_file, "Successfully converted loop nest to perfect loop nest.\n");
1542
1543
1544     }
1545   ret = lambda_loopnest_new (depth, 2 * depth);
1546   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1547     LN_LOOPS (ret)[i] = newloop;
1548
1549   return ret;
1550
1551 }
1552
1553
1554 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1555    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1556    inserted for us are stored.  INDUCTION_VARS is the array of induction
1557    variables for the loop this LBV is from.  TYPE is the tree type to use for
1558    the variables and trees involved.  */
1559
1560 static tree
1561 lbv_to_gcc_expression (lambda_body_vector lbv,
1562                        tree type, VEC (tree) *induction_vars,
1563                        tree * stmts_to_insert)
1564 {
1565   tree stmts, stmt, resvar, name;
1566   tree iv;
1567   size_t i;
1568   tree_stmt_iterator tsi;
1569
1570   /* Create a statement list and a linear expression temporary.  */
1571   stmts = alloc_stmt_list ();
1572   resvar = create_tmp_var (type, "lbvtmp");
1573   add_referenced_tmp_var (resvar);
1574
1575   /* Start at 0.  */
1576   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1577   name = make_ssa_name (resvar, stmt);
1578   TREE_OPERAND (stmt, 0) = name;
1579   tsi = tsi_last (stmts);
1580   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1581
1582   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1583     {
1584       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1585         {
1586           tree newname;
1587           tree coeffmult;
1588
1589           /* newname = coefficient * induction_variable */
1590           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1591           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1592                         fold (build (MULT_EXPR, type, iv, coeffmult)));
1593
1594           newname = make_ssa_name (resvar, stmt);
1595           TREE_OPERAND (stmt, 0) = newname;
1596           fold_stmt (&stmt);
1597           tsi = tsi_last (stmts);
1598           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1599
1600           /* name = name + newname */
1601           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1602                         build (PLUS_EXPR, type, name, newname));
1603           name = make_ssa_name (resvar, stmt);
1604           TREE_OPERAND (stmt, 0) = name;
1605           fold_stmt (&stmt);
1606           tsi = tsi_last (stmts);
1607           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1608
1609         }
1610     }
1611
1612   /* Handle any denominator that occurs.  */
1613   if (LBV_DENOMINATOR (lbv) != 1)
1614     {
1615       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1616       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1617                     build (CEIL_DIV_EXPR, type, name, denominator));
1618       name = make_ssa_name (resvar, stmt);
1619       TREE_OPERAND (stmt, 0) = name;
1620       fold_stmt (&stmt);
1621       tsi = tsi_last (stmts);
1622       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1623     }
1624   *stmts_to_insert = stmts;
1625   return name;
1626 }
1627
1628 /* Convert a linear expression from coefficient and constant form to a
1629    gcc tree.
1630    Return the tree that represents the final value of the expression.
1631    LLE is the linear expression to convert.
1632    OFFSET is the linear offset to apply to the expression.
1633    TYPE is the tree type to use for the variables and math.
1634    INDUCTION_VARS is a vector of induction variables for the loops.
1635    INVARIANTS is a vector of the loop nest invariants.
1636    WRAP specifies what tree code to wrap the results in, if there is more than
1637    one (it is either MAX_EXPR, or MIN_EXPR).
1638    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1639    statements that need to be inserted for the linear expression.  */
1640
1641 static tree
1642 lle_to_gcc_expression (lambda_linear_expression lle,
1643                        lambda_linear_expression offset,
1644                        tree type,
1645                        VEC(tree) *induction_vars,
1646                        VEC(tree) *invariants,
1647                        enum tree_code wrap, tree * stmts_to_insert)
1648 {
1649   tree stmts, stmt, resvar, name;
1650   size_t i;
1651   tree_stmt_iterator tsi;
1652   tree iv, invar;
1653   VEC(tree) *results = NULL;
1654
1655   name = NULL_TREE;
1656   /* Create a statement list and a linear expression temporary.  */
1657   stmts = alloc_stmt_list ();
1658   resvar = create_tmp_var (type, "lletmp");
1659   add_referenced_tmp_var (resvar);
1660
1661   /* Build up the linear expressions, and put the variable representing the
1662      result in the results array.  */
1663   for (; lle != NULL; lle = LLE_NEXT (lle))
1664     {
1665       /* Start at name = 0.  */
1666       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1667       name = make_ssa_name (resvar, stmt);
1668       TREE_OPERAND (stmt, 0) = name;
1669       fold_stmt (&stmt);
1670       tsi = tsi_last (stmts);
1671       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1672
1673       /* First do the induction variables.
1674          at the end, name = name + all the induction variables added
1675          together.  */
1676       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1677         {
1678           if (LLE_COEFFICIENTS (lle)[i] != 0)
1679             {
1680               tree newname;
1681               tree mult;
1682               tree coeff;
1683
1684               /* mult = induction variable * coefficient.  */
1685               if (LLE_COEFFICIENTS (lle)[i] == 1)
1686                 {
1687                   mult = VEC_index (tree, induction_vars, i);
1688                 }
1689               else
1690                 {
1691                   coeff = build_int_cst (type,
1692                                          LLE_COEFFICIENTS (lle)[i]);
1693                   mult = fold (build (MULT_EXPR, type, iv, coeff));
1694                 }
1695
1696               /* newname = mult */
1697               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1698               newname = make_ssa_name (resvar, stmt);
1699               TREE_OPERAND (stmt, 0) = newname;
1700               fold_stmt (&stmt);
1701               tsi = tsi_last (stmts);
1702               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1703
1704               /* name = name + newname */
1705               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1706                             build (PLUS_EXPR, type, name, newname));
1707               name = make_ssa_name (resvar, stmt);
1708               TREE_OPERAND (stmt, 0) = name;
1709               fold_stmt (&stmt);
1710               tsi = tsi_last (stmts);
1711               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1712             }
1713         }
1714
1715       /* Handle our invariants.
1716          At the end, we have name = name + result of adding all multiplied
1717          invariants.  */
1718       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1719         {
1720           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1721             {
1722               tree newname;
1723               tree mult;
1724               tree coeff;
1725               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1726               /* mult = invariant * coefficient  */
1727               if (invcoeff == 1)
1728                 {
1729                   mult = invar;
1730                 }
1731               else
1732                 {
1733                   coeff = build_int_cst (type, invcoeff);
1734                   mult = fold (build (MULT_EXPR, type, invar, coeff));
1735                 }
1736
1737               /* newname = mult */
1738               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1739               newname = make_ssa_name (resvar, stmt);
1740               TREE_OPERAND (stmt, 0) = newname;
1741               fold_stmt (&stmt);
1742               tsi = tsi_last (stmts);
1743               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1744
1745               /* name = name + newname */
1746               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1747                             build (PLUS_EXPR, type, name, newname));
1748               name = make_ssa_name (resvar, stmt);
1749               TREE_OPERAND (stmt, 0) = name;
1750               fold_stmt (&stmt);
1751               tsi = tsi_last (stmts);
1752               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1753             }
1754         }
1755
1756       /* Now handle the constant.
1757          name = name + constant.  */
1758       if (LLE_CONSTANT (lle) != 0)
1759         {
1760           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1761                         build (PLUS_EXPR, type, name,
1762                                build_int_cst (type, LLE_CONSTANT (lle))));
1763           name = make_ssa_name (resvar, stmt);
1764           TREE_OPERAND (stmt, 0) = name;
1765           fold_stmt (&stmt);
1766           tsi = tsi_last (stmts);
1767           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1768         }
1769
1770       /* Now handle the offset.
1771          name = name + linear offset.  */
1772       if (LLE_CONSTANT (offset) != 0)
1773         {
1774           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1775                         build (PLUS_EXPR, type, name,
1776                                build_int_cst (type, LLE_CONSTANT (offset))));
1777           name = make_ssa_name (resvar, stmt);
1778           TREE_OPERAND (stmt, 0) = name;
1779           fold_stmt (&stmt);
1780           tsi = tsi_last (stmts);
1781           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1782         }
1783
1784       /* Handle any denominator that occurs.  */
1785       if (LLE_DENOMINATOR (lle) != 1)
1786         {
1787           if (wrap == MAX_EXPR)
1788             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1789                           build (CEIL_DIV_EXPR, type, name,
1790                                  build_int_cst (type, LLE_DENOMINATOR (lle))));
1791           else if (wrap == MIN_EXPR)
1792             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1793                           build (FLOOR_DIV_EXPR, type, name,
1794                                  build_int_cst (type, LLE_DENOMINATOR (lle))));
1795           else
1796             gcc_unreachable();
1797
1798           /* name = {ceil, floor}(name/denominator) */
1799           name = make_ssa_name (resvar, stmt);
1800           TREE_OPERAND (stmt, 0) = name;
1801           tsi = tsi_last (stmts);
1802           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1803         }
1804       VEC_safe_push (tree, results, name);
1805     }
1806
1807   /* Again, out of laziness, we don't handle this case yet.  It's not
1808      hard, it just hasn't occurred.  */
1809   gcc_assert (VEC_length (tree, results) <= 2);
1810
1811   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1812   if (VEC_length (tree, results) > 1)
1813     {
1814       tree op1 = VEC_index (tree, results, 0);
1815       tree op2 = VEC_index (tree, results, 1);
1816       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1817                     build (wrap, type, op1, op2));
1818       name = make_ssa_name (resvar, stmt);
1819       TREE_OPERAND (stmt, 0) = name;
1820       tsi = tsi_last (stmts);
1821       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1822     }
1823
1824   *stmts_to_insert = stmts;
1825   return name;
1826 }
1827
1828 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1829    it, back into gcc code.  This changes the
1830    loops, their induction variables, and their bodies, so that they
1831    match the transformed loopnest.
1832    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1833    loopnest.
1834    OLD_IVS is a vector of induction variables from the old loopnest.
1835    INVARIANTS is a vector of loop invariants from the old loopnest.
1836    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1837    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1838    NEW_LOOPNEST.  */
1839
1840 void
1841 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1842                                  VEC(tree) *old_ivs,
1843                                  VEC(tree) *invariants,
1844                                  lambda_loopnest new_loopnest,
1845                                  lambda_trans_matrix transform)
1846 {
1847
1848   struct loop *temp;
1849   size_t i = 0;
1850   size_t depth = 0;
1851   VEC(tree) *new_ivs = NULL;
1852   tree oldiv;
1853
1854   block_stmt_iterator bsi;
1855
1856   if (dump_file)
1857     {
1858       transform = lambda_trans_matrix_inverse (transform);
1859       fprintf (dump_file, "Inverse of transformation matrix:\n");
1860       print_lambda_trans_matrix (dump_file, transform);
1861     }
1862   depth = depth_of_nest (old_loopnest);
1863   temp = old_loopnest;
1864
1865   while (temp)
1866     {
1867       lambda_loop newloop;
1868       basic_block bb;
1869       edge exit;
1870       tree ivvar, ivvarinced, exitcond, stmts;
1871       enum tree_code testtype;
1872       tree newupperbound, newlowerbound;
1873       lambda_linear_expression offset;
1874       tree type;
1875       bool insert_after;
1876       tree inc_stmt;
1877
1878       oldiv = VEC_index (tree, old_ivs, i);
1879       type = TREE_TYPE (oldiv);
1880
1881       /* First, build the new induction variable temporary  */
1882
1883       ivvar = create_tmp_var (type, "lnivtmp");
1884       add_referenced_tmp_var (ivvar);
1885
1886       VEC_safe_push (tree, new_ivs, ivvar);
1887
1888       newloop = LN_LOOPS (new_loopnest)[i];
1889
1890       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1891          cases for now.  */
1892       offset = LL_LINEAR_OFFSET (newloop);
1893
1894       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1895                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1896
1897       /* Now build the  new lower bounds, and insert the statements
1898          necessary to generate it on the loop preheader.  */
1899       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1900                                              LL_LINEAR_OFFSET (newloop),
1901                                              type,
1902                                              new_ivs,
1903                                              invariants, MAX_EXPR, &stmts);
1904       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1905       bsi_commit_edge_inserts ();
1906       /* Build the new upper bound and insert its statements in the
1907          basic block of the exit condition */
1908       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1909                                              LL_LINEAR_OFFSET (newloop),
1910                                              type,
1911                                              new_ivs,
1912                                              invariants, MIN_EXPR, &stmts);
1913       exit = temp->single_exit;
1914       exitcond = get_loop_exit_condition (temp);
1915       bb = bb_for_stmt (exitcond);
1916       bsi = bsi_start (bb);
1917       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1918
1919       /* Create the new iv.  */
1920
1921       standard_iv_increment_position (temp, &bsi, &insert_after);
1922       create_iv (newlowerbound,
1923                  build_int_cst (type, LL_STEP (newloop)),
1924                  ivvar, temp, &bsi, insert_after, &ivvar,
1925                  NULL);
1926
1927       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1928          dominate the block containing the exit condition.
1929          So we simply create our own incremented iv to use in the new exit
1930          test,  and let redundancy elimination sort it out.  */
1931       inc_stmt = build (PLUS_EXPR, type,
1932                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1933       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1934                         inc_stmt);
1935       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1936       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1937       bsi = bsi_for_stmt (exitcond);
1938       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1939
1940       /* Replace the exit condition with the new upper bound
1941          comparison.  */
1942
1943       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1944
1945       /* We want to build a conditional where true means exit the loop, and
1946          false means continue the loop.
1947          So swap the testtype if this isn't the way things are.*/
1948
1949       if (exit->flags & EDGE_FALSE_VALUE)
1950         testtype = swap_tree_comparison (testtype);
1951
1952       COND_EXPR_COND (exitcond) = build (testtype,
1953                                          boolean_type_node,
1954                                          newupperbound, ivvarinced);
1955       update_stmt (exitcond);
1956       VEC_replace (tree, new_ivs, i, ivvar);
1957
1958       i++;
1959       temp = temp->inner;
1960     }
1961
1962   /* Rewrite uses of the old ivs so that they are now specified in terms of
1963      the new ivs.  */
1964
1965   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1966     {
1967       imm_use_iterator imm_iter;
1968       use_operand_p imm_use;
1969       tree oldiv_def;
1970       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1971
1972       gcc_assert (TREE_CODE (oldiv_stmt) == PHI_NODE
1973                   || NUM_DEFS (STMT_DEF_OPS (oldiv_stmt)) == 1);
1974       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1975         oldiv_def = PHI_RESULT (oldiv_stmt);
1976       else
1977         oldiv_def = DEF_OP (STMT_DEF_OPS (oldiv_stmt), 0);
1978
1979       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1980         {
1981           tree stmt = USE_STMT (imm_use);
1982           use_operand_p use_p;
1983           ssa_op_iter iter;
1984           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1985           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1986             {
1987               if (USE_FROM_PTR (use_p) == oldiv)
1988                 {
1989                   tree newiv, stmts;
1990                   lambda_body_vector lbv, newlbv;
1991                   /* Compute the new expression for the induction
1992                      variable.  */
1993                   depth = VEC_length (tree, new_ivs);
1994                   lbv = lambda_body_vector_new (depth);
1995                   LBV_COEFFICIENTS (lbv)[i] = 1;
1996
1997                   newlbv = lambda_body_vector_compute_new (transform, lbv);
1998
1999                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2000                                                  new_ivs, &stmts);
2001                   bsi = bsi_for_stmt (stmt);
2002                   /* Insert the statements to build that
2003                      expression.  */
2004                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2005                   propagate_value (use_p, newiv);
2006                   update_stmt (stmt);
2007
2008                 }
2009             }
2010         }
2011     }
2012 }
2013
2014
2015 /* Returns true when the vector V is lexicographically positive, in
2016    other words, when the first nonzero element is positive.  */
2017
2018 static bool
2019 lambda_vector_lexico_pos (lambda_vector v,
2020                           unsigned n)
2021 {
2022   unsigned i;
2023   for (i = 0; i < n; i++)
2024     {
2025       if (v[i] == 0)
2026         continue;
2027       if (v[i] < 0)
2028         return false;
2029       if (v[i] > 0)
2030         return true;
2031     }
2032   return true;
2033 }
2034
2035
2036 /* Return TRUE if this is not interesting statement from the perspective of
2037    determining if we have a perfect loop nest.  */
2038
2039 static bool
2040 not_interesting_stmt (tree stmt)
2041 {
2042   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2043      loop, we would have already failed the number of exits tests.  */
2044   if (TREE_CODE (stmt) == LABEL_EXPR
2045       || TREE_CODE (stmt) == GOTO_EXPR
2046       || TREE_CODE (stmt) == COND_EXPR)
2047     return true;
2048   return false;
2049 }
2050
2051 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2052
2053 static bool
2054 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2055 {
2056   int i;
2057   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2058     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2059       if (PHI_ARG_DEF (phi, i) == def)
2060         return true;
2061   return false;
2062 }
2063
2064 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2065
2066 static bool
2067 stmt_uses_phi_result (tree stmt, tree phi_result)
2068 {
2069   use_optype uses = STMT_USE_OPS (stmt);
2070
2071   /* This is conservatively true, because we only want SIMPLE bumpers
2072      of the form x +- constant for our pass.  */
2073   if (NUM_USES (uses) != 1)
2074     return false;
2075   if (USE_OP (uses, 0) == phi_result)
2076     return true;
2077
2078   return false;
2079 }
2080
2081 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2082    in-loop-edge in a phi node, and the operand it uses is the result of that
2083    phi node.
2084    I.E. i_29 = i_3 + 1
2085         i_3 = PHI (0, i_29);  */
2086
2087 static bool
2088 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2089 {
2090   tree use;
2091   tree def;
2092   def_optype defs = STMT_DEF_OPS (stmt);
2093   imm_use_iterator iter;
2094   use_operand_p use_p;
2095
2096   if (NUM_DEFS (defs) != 1)
2097     return false;
2098   def = DEF_OP (defs, 0);
2099   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2100     {
2101       use = USE_STMT (use_p);
2102       if (TREE_CODE (use) == PHI_NODE)
2103         {
2104           if (phi_loop_edge_uses_def (loop, use, def))
2105             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2106               return true;
2107         }
2108     }
2109   return false;
2110 }
2111
2112
2113 /* Return true if LOOP is a perfect loop nest.
2114    Perfect loop nests are those loop nests where all code occurs in the
2115    innermost loop body.
2116    If S is a program statement, then
2117
2118    i.e.
2119    DO I = 1, 20
2120        S1
2121        DO J = 1, 20
2122        ...
2123        END DO
2124    END DO
2125    is not a perfect loop nest because of S1.
2126
2127    DO I = 1, 20
2128       DO J = 1, 20
2129         S1
2130         ...
2131       END DO
2132    END DO
2133    is a perfect loop nest.
2134
2135    Since we don't have high level loops anymore, we basically have to walk our
2136    statements and ignore those that are there because the loop needs them (IE
2137    the induction variable increment, and jump back to the top of the loop).  */
2138
2139 bool
2140 perfect_nest_p (struct loop *loop)
2141 {
2142   basic_block *bbs;
2143   size_t i;
2144   tree exit_cond;
2145
2146   if (!loop->inner)
2147     return true;
2148   bbs = get_loop_body (loop);
2149   exit_cond = get_loop_exit_condition (loop);
2150   for (i = 0; i < loop->num_nodes; i++)
2151     {
2152       if (bbs[i]->loop_father == loop)
2153         {
2154           block_stmt_iterator bsi;
2155           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2156             {
2157               tree stmt = bsi_stmt (bsi);
2158               if (stmt == exit_cond
2159                   || not_interesting_stmt (stmt)
2160                   || stmt_is_bumper_for_loop (loop, stmt))
2161                 continue;
2162               free (bbs);
2163               return false;
2164             }
2165         }
2166     }
2167   free (bbs);
2168   /* See if the inner loops are perfectly nested as well.  */
2169   if (loop->inner)
2170     return perfect_nest_p (loop->inner);
2171   return true;
2172 }
2173
2174 /* Replace the USES of tree X in STMT with tree Y */
2175
2176 static void
2177 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2178 {
2179   use_optype uses = STMT_USE_OPS (stmt);
2180   size_t i;
2181   for (i = 0; i < NUM_USES (uses); i++)
2182     {
2183       if (USE_OP (uses, i) == x)
2184         SET_USE_OP (uses, i, y);
2185     }
2186 }
2187
2188 /* Return TRUE if STMT uses tree OP in it's uses.  */
2189
2190 static bool
2191 stmt_uses_op (tree stmt, tree op)
2192 {
2193   use_optype uses = STMT_USE_OPS (stmt);
2194   size_t i;
2195   for (i = 0; i < NUM_USES (uses); i++)
2196     {
2197       if (USE_OP (uses, i) == op)
2198         return true;
2199     }
2200   return false;
2201 }
2202
2203 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2204    one.  LOOPIVS is a vector of induction variables, one per loop.
2205    ATM, we only handle imperfect nests of depth 2, where all of the statements
2206    occur after the inner loop.  */
2207
2208 static bool
2209 can_convert_to_perfect_nest (struct loop *loop,
2210                              VEC (tree) *loopivs)
2211 {
2212   basic_block *bbs;
2213   tree exit_condition, phi;
2214   size_t i;
2215   block_stmt_iterator bsi;
2216   basic_block exitdest;
2217
2218   /* Can't handle triply nested+ loops yet.  */
2219   if (!loop->inner || loop->inner->inner)
2220     return false;
2221
2222   /* We only handle moving the after-inner-body statements right now, so make
2223      sure all the statements we need to move are located in that position.  */
2224   bbs = get_loop_body (loop);
2225   exit_condition = get_loop_exit_condition (loop);
2226   for (i = 0; i < loop->num_nodes; i++)
2227     {
2228       if (bbs[i]->loop_father == loop)
2229         {
2230           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2231             {
2232               size_t j;
2233               tree stmt = bsi_stmt (bsi);
2234               if (stmt == exit_condition
2235                   || not_interesting_stmt (stmt)
2236                   || stmt_is_bumper_for_loop (loop, stmt))
2237                 continue;
2238               /* If the statement uses inner loop ivs, we == screwed.  */
2239               for (j = 1; j < VEC_length (tree, loopivs); j++)
2240                 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2241                   {
2242                     free (bbs);
2243                     return false;
2244                   }
2245
2246               /* If the bb of a statement we care about isn't dominated by
2247                  the header of the inner loop, then we are also screwed.  */
2248               if (!dominated_by_p (CDI_DOMINATORS,
2249                                    bb_for_stmt (stmt),
2250                                    loop->inner->header))
2251                 {
2252                   free (bbs);
2253                   return false;
2254                 }
2255             }
2256         }
2257     }
2258
2259   /* We also need to make sure the loop exit only has simple copy phis in it,
2260      otherwise we don't know how to transform it into a perfect nest right
2261      now.  */
2262   exitdest = loop->single_exit->dest;
2263
2264   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2265     if (PHI_NUM_ARGS (phi) != 1)
2266       return false;
2267
2268   return true;
2269 }
2270
2271 /* Transform the loop nest into a perfect nest, if possible.
2272    LOOPS is the current struct loops *
2273    LOOP is the loop nest to transform into a perfect nest
2274    LBOUNDS are the lower bounds for the loops to transform
2275    UBOUNDS are the upper bounds for the loops to transform
2276    STEPS is the STEPS for the loops to transform.
2277    LOOPIVS is the induction variables for the loops to transform.
2278
2279    Basically, for the case of
2280
2281    FOR (i = 0; i < 50; i++)
2282     {
2283      FOR (j =0; j < 50; j++)
2284      {
2285         <whatever>
2286      }
2287      <some code>
2288     }
2289
2290    This function will transform it into a perfect loop nest by splitting the
2291    outer loop into two loops, like so:
2292
2293    FOR (i = 0; i < 50; i++)
2294    {
2295      FOR (j = 0; j < 50; j++)
2296      {
2297          <whatever>
2298      }
2299    }
2300
2301    FOR (i = 0; i < 50; i ++)
2302    {
2303     <some code>
2304    }
2305
2306    Return FALSE if we can't make this loop into a perfect nest.  */
2307 static bool
2308 perfect_nestify (struct loops *loops,
2309                  struct loop *loop,
2310                  VEC (tree) *lbounds,
2311                  VEC (tree) *ubounds,
2312                  VEC (int) *steps,
2313                  VEC (tree) *loopivs)
2314 {
2315   basic_block *bbs;
2316   tree exit_condition;
2317   tree then_label, else_label, cond_stmt;
2318   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2319   size_t i;
2320   block_stmt_iterator bsi;
2321   bool insert_after;
2322   edge e;
2323   struct loop *newloop;
2324   tree phi;
2325   tree uboundvar;
2326   tree stmt;
2327   tree oldivvar, ivvar, ivvarinced;
2328   VEC (tree) *phis = NULL;
2329
2330   if (!can_convert_to_perfect_nest (loop, loopivs))
2331     return false;
2332
2333   /* Create the new loop */
2334
2335   olddest = loop->single_exit->dest;
2336   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2337   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2338
2339   /* Push the exit phi nodes that we are moving.  */
2340   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2341     {
2342       VEC_safe_push (tree, phis, PHI_RESULT (phi));
2343       VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
2344     }
2345   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2346
2347   /* Remove the exit phis from the old basic block.  Make sure to set
2348      PHI_RESULT to null so it doesn't get released.  */
2349   while (phi_nodes (olddest) != NULL)
2350     {
2351       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2352       remove_phi_node (phi_nodes (olddest), NULL);
2353     }
2354
2355   /* and add them back to the new basic block.  */
2356   while (VEC_length (tree, phis) != 0)
2357     {
2358       tree def;
2359       tree phiname;
2360       def = VEC_pop (tree, phis);
2361       phiname = VEC_pop (tree, phis);
2362       phi = create_phi_node (phiname, preheaderbb);
2363       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2364     }
2365   flush_pending_stmts (e);
2366
2367   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2368   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2369   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2370   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2371   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2372   cond_stmt = build (COND_EXPR, void_type_node,
2373                      build (NE_EXPR, boolean_type_node,
2374                             integer_one_node,
2375                             integer_zero_node),
2376                      then_label, else_label);
2377   bsi = bsi_start (bodybb);
2378   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2379   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2380   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2381   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2382
2383   /* Update the loop structures.  */
2384   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2385   newloop->header = headerbb;
2386   newloop->latch = latchbb;
2387   newloop->single_exit = e;
2388   add_bb_to_loop (latchbb, newloop);
2389   add_bb_to_loop (bodybb, newloop);
2390   add_bb_to_loop (headerbb, newloop);
2391   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2392   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2393   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2394                            loop->single_exit->src);
2395   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2396   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2397   /* Create the new iv.  */
2398   ivvar = create_tmp_var (integer_type_node, "perfectiv");
2399   add_referenced_tmp_var (ivvar);
2400   standard_iv_increment_position (newloop, &bsi, &insert_after);
2401   create_iv (VEC_index (tree, lbounds, 0),
2402              build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2403              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2404
2405   /* Create the new upper bound.  This may be not just a variable, so we copy
2406      it to one just in case.  */
2407
2408   exit_condition = get_loop_exit_condition (newloop);
2409   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2410   add_referenced_tmp_var (uboundvar);
2411   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2412                 VEC_index (tree, ubounds, 0));
2413   uboundvar = make_ssa_name (uboundvar, stmt);
2414   TREE_OPERAND (stmt, 0) = uboundvar;
2415
2416   if (insert_after)
2417     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2418   else
2419     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2420
2421   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2422                                            boolean_type_node,
2423                                            uboundvar,
2424                                            ivvarinced);
2425
2426   bbs = get_loop_body (loop);
2427   /* Now replace the induction variable in the moved statements with the
2428      correct loop induction variable.  */
2429   oldivvar = VEC_index (tree, loopivs, 0);
2430   for (i = 0; i < loop->num_nodes; i++)
2431     {
2432       block_stmt_iterator tobsi = bsi_last (bodybb);
2433       if (bbs[i]->loop_father == loop)
2434         {
2435           /* Note that the bsi only needs to be explicitly incremented
2436              when we don't move something, since it is automatically
2437              incremented when we do.  */
2438           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2439             {
2440               tree stmt = bsi_stmt (bsi);
2441               if (stmt == exit_condition
2442                   || not_interesting_stmt (stmt)
2443                   || stmt_is_bumper_for_loop (loop, stmt))
2444                 {
2445                   bsi_next (&bsi);
2446                   continue;
2447                 }
2448               replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2449               bsi_move_before (&bsi, &tobsi);
2450             }
2451         }
2452     }
2453   free (bbs);
2454   return perfect_nest_p (loop);
2455 }
2456
2457 /* Return true if TRANS is a legal transformation matrix that respects
2458    the dependence vectors in DISTS and DIRS.  The conservative answer
2459    is false.
2460
2461    "Wolfe proves that a unimodular transformation represented by the
2462    matrix T is legal when applied to a loop nest with a set of
2463    lexicographically non-negative distance vectors RDG if and only if
2464    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2465    i.e.: if and only if it transforms the lexicographically positive
2466    distance vectors to lexicographically positive vectors.  Note that
2467    a unimodular matrix must transform the zero vector (and only it) to
2468    the zero vector." S.Muchnick.  */
2469
2470 bool
2471 lambda_transform_legal_p (lambda_trans_matrix trans,
2472                           int nb_loops,
2473                           varray_type dependence_relations)
2474 {
2475   unsigned int i;
2476   lambda_vector distres;
2477   struct data_dependence_relation *ddr;
2478
2479 #if defined ENABLE_CHECKING
2480   if (LTM_COLSIZE (trans) != nb_loops
2481       || LTM_ROWSIZE (trans) != nb_loops)
2482     abort ();
2483 #endif
2484
2485   /* When there is an unknown relation in the dependence_relations, we
2486      know that it is no worth looking at this loop nest: give up.  */
2487   ddr = (struct data_dependence_relation *)
2488     VARRAY_GENERIC_PTR (dependence_relations, 0);
2489   if (ddr == NULL)
2490     return true;
2491   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2492     return false;
2493
2494   distres = lambda_vector_new (nb_loops);
2495
2496   /* For each distance vector in the dependence graph.  */
2497   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2498     {
2499       ddr = (struct data_dependence_relation *)
2500         VARRAY_GENERIC_PTR (dependence_relations, i);
2501
2502       /* Don't care about relations for which we know that there is no
2503          dependence, nor about read-read (aka. output-dependences):
2504          these data accesses can happen in any order.  */
2505       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2506           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2507         continue;
2508
2509       /* Conservatively answer: "this transformation is not valid".  */
2510       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2511         return false;
2512
2513       /* If the dependence could not be captured by a distance vector,
2514          conservatively answer that the transform is not valid.  */
2515       if (DDR_DIST_VECT (ddr) == NULL)
2516         return false;
2517
2518       /* Compute trans.dist_vect */
2519       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2520                                  DDR_DIST_VECT (ddr), distres);
2521
2522       if (!lambda_vector_lexico_pos (distres, nb_loops))
2523         return false;
2524     }
2525   return true;
2526 }