gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44
  45 /* This loop nest code generation is based on non-singular matrix
  46    math.
  47
  48  A little terminology and a general sketch of the algorithm.  See "A singular
  49  loop transformation framework based on non-singular matrices" by Wei Li and
  50  Keshav Pingali for formal proofs that the various statements below are
  51  correct.
  52
  53  A loop iteration space represents the points traversed by the loop.  A point in the
  54  iteration space can be represented by a vector of size <loop depth>.  You can
  55  therefore represent the iteration space as an integral combinations of a set
  56  of basis vectors.
  57
  58  A loop iteration space is dense if every integer point between the loop
  59  bounds is a point in the iteration space.  Every loop with a step of 1
  60  therefore has a dense iteration space.
  61
  62  for i = 1 to 3, step 1 is a dense iteration space.
  63
  64  A loop iteration space is sparse if it is not dense.  That is, the iteration
  65  space skips integer points that are within the loop bounds.
  66
  67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  68  2 is skipped.
  69
  70  Dense source spaces are easy to transform, because they don't skip any
  71  points to begin with.  Thus we can compute the exact bounds of the target
  72  space using min/max and floor/ceil.
  73
  74  For a dense source space, we take the transformation matrix, decompose it
  75  into a lower triangular part (H) and a unimodular part (U).
  76  We then compute the auxiliary space from the unimodular part (source loop
  77  nest . U = auxiliary space) , which has two important properties:
  78   1. It traverses the iterations in the same lexicographic order as the source
  79   space.
  80   2. It is a dense space when the source is a dense space (even if the target
  81   space is going to be sparse).
  82
  83  Given the auxiliary space, we use the lower triangular part to compute the
  84  bounds in the target space by simple matrix multiplication.
  85  The gaps in the target space (IE the new loop step sizes) will be the
  86  diagonals of the H matrix.
  87
  88  Sparse source spaces require another step, because you can't directly compute
  89  the exact bounds of the auxiliary and target space from the sparse space.
  90  Rather than try to come up with a separate algorithm to handle sparse source
  91  spaces directly, we just find a legal transformation matrix that gives you
  92  the sparse source space, from a dense space, and then transform the dense
  93  space.
  94
  95  For a regular sparse space, you can represent the source space as an integer
  96  lattice, and the base space of that lattice will always be dense.  Thus, we
  97  effectively use the lattice to figure out the transformation from the lattice
  98  base space, to the sparse iteration space (IE what transform was applied to
  99  the dense space to make it sparse).  We then compose this transform with the
 100  transformation matrix specified by the user (since our matrix transformations
 101  are closed under composition, this is okay).  We can then use the base space
 102  (which is dense) plus the composed transformation matrix, to compute the rest
 103  of the transform using the dense space algorithm above.
 104
 105  In other words, our sparse source space (B) is decomposed into a dense base
 106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 107  We then compute the composition of L and the user transformation matrix (T),
 108  so that T is now a transform from A to the result, instead of from B to the
 109  result.
 110  IE A.(LT) = result instead of B.T = result
 111  Since A is now a dense source space, we can use the dense source space
 112  algorithm above to compute the result of applying transform (LT) to A.
 113
 114  Fourier-Motzkin elimination is used to compute the bounds of the base space
 115  of the lattice.  */
 116
 117 DEF_VEC_I(int);
 118 DEF_VEC_ALLOC_I(int,heap);
 119
 120 static bool perfect_nestify (struct loops *,
 121                              struct loop *, VEC(tree,heap) *,
 122                              VEC(tree,heap) *, VEC(int,heap) *,
 123                              VEC(tree,heap) *);
 124 /* Lattice stuff that is internal to the code generation algorithm.  */
 125
 126 typedef struct
 127 {
 128   /* Lattice base matrix.  */
 129   lambda_matrix base;
 130   /* Lattice dimension.  */
 131   int dimension;
 132   /* Origin vector for the coefficients.  */
 133   lambda_vector origin;
 134   /* Origin matrix for the invariants.  */
 135   lambda_matrix origin_invariants;
 136   /* Number of invariants.  */
 137   int invariants;
 138 } *lambda_lattice;
 139
 140 #define LATTICE_BASE(T) ((T)->base)
 141 #define LATTICE_DIMENSION(T) ((T)->dimension)
 142 #define LATTICE_ORIGIN(T) ((T)->origin)
 143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 145
 146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 147                        int, int);
 148 static lambda_lattice lambda_lattice_new (int, int);
 149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 150
 151 static tree find_induction_var_from_exit_cond (struct loop *);
 152
 153 /* Create a new lambda body vector.  */
 154
 155 lambda_body_vector
 156 lambda_body_vector_new (int size)
 157 {
 158   lambda_body_vector ret;
 159
 160   ret = ggc_alloc (sizeof (*ret));
 161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 162   LBV_SIZE (ret) = size;
 163   LBV_DENOMINATOR (ret) = 1;
 164   return ret;
 165 }
 166
 167 /* Compute the new coefficients for the vector based on the
 168   *inverse* of the transformation matrix.  */
 169
 170 lambda_body_vector
 171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 172                                 lambda_body_vector vect)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants)
 229 {
 230   lambda_linear_expression ret;
 231
 232   ret = ggc_alloc_cleared (sizeof (*ret));
 233
 234   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 235   LLE_CONSTANT (ret) = 0;
 236   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 237   LLE_DENOMINATOR (ret) = 1;
 238   LLE_NEXT (ret) = NULL;
 239
 240   return ret;
 241 }
 242
 243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 244    The starting letter used for variable names is START.  */
 245
 246 static void
 247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 248                          char start)
 249 {
 250   int i;
 251   bool first = true;
 252   for (i = 0; i < size; i++)
 253     {
 254       if (expr[i] != 0)
 255         {
 256           if (first)
 257             {
 258               if (expr[i] < 0)
 259                 fprintf (outfile, "-");
 260               first = false;
 261             }
 262           else if (expr[i] > 0)
 263             fprintf (outfile, " + ");
 264           else
 265             fprintf (outfile, " - ");
 266           if (abs (expr[i]) == 1)
 267             fprintf (outfile, "%c", start + i);
 268           else
 269             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 270         }
 271     }
 272 }
 273
 274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 275    depth/number of coefficients is given by DEPTH, the number of invariants is
 276    given by INVARIANTS, and the character to start variable names with is given
 277    by START.  */
 278
 279 void
 280 print_lambda_linear_expression (FILE * outfile,
 281                                 lambda_linear_expression expr,
 282                                 int depth, int invariants, char start)
 283 {
 284   fprintf (outfile, "\tLinear expression: ");
 285   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 286   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 287   fprintf (outfile, "  invariants: ");
 288   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 289                            invariants, 'A');
 290   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 291 }
 292
 293 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 294    coefficients is given by DEPTH, the number of invariants is
 295    given by INVARIANTS, and the character to start variable names with is given
 296    by START.  */
 297
 298 void
 299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 300                    int invariants, char start)
 301 {
 302   int step;
 303   lambda_linear_expression expr;
 304
 305   gcc_assert (loop);
 306
 307   expr = LL_LINEAR_OFFSET (loop);
 308   step = LL_STEP (loop);
 309   fprintf (outfile, "  step size = %d \n", step);
 310
 311   if (expr)
 312     {
 313       fprintf (outfile, "  linear offset: \n");
 314       print_lambda_linear_expression (outfile, expr, depth, invariants,
 315                                       start);
 316     }
 317
 318   fprintf (outfile, "  lower bound: \n");
 319   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 321   fprintf (outfile, "  upper bound: \n");
 322   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 323     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 324 }
 325
 326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 327    number of invariants.  */
 328
 329 lambda_loopnest
 330 lambda_loopnest_new (int depth, int invariants)
 331 {
 332   lambda_loopnest ret;
 333   ret = ggc_alloc (sizeof (*ret));
 334
 335   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 336   LN_DEPTH (ret) = depth;
 337   LN_INVARIANTS (ret) = invariants;
 338
 339   return ret;
 340 }
 341
 342 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 343    character to use for loop names is given by START.  */
 344
 345 void
 346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 347 {
 348   int i;
 349   for (i = 0; i < LN_DEPTH (nest); i++)
 350     {
 351       fprintf (outfile, "Loop %c\n", start + i);
 352       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 353                          LN_INVARIANTS (nest), 'i');
 354       fprintf (outfile, "\n");
 355     }
 356 }
 357
 358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 359    of invariants.  */
 360
 361 static lambda_lattice
 362 lambda_lattice_new (int depth, int invariants)
 363 {
 364   lambda_lattice ret;
 365   ret = ggc_alloc (sizeof (*ret));
 366   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 367   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 368   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 369   LATTICE_DIMENSION (ret) = depth;
 370   LATTICE_INVARIANTS (ret) = invariants;
 371   return ret;
 372 }
 373
 374 /* Compute the lattice base for NEST.  The lattice base is essentially a
 375    non-singular transform from a dense base space to a sparse iteration space.
 376    We use it so that we don't have to specially handle the case of a sparse
 377    iteration space in other parts of the algorithm.  As a result, this routine
 378    only does something interesting (IE produce a matrix that isn't the
 379    identity matrix) if NEST is a sparse space.  */
 380
 381 static lambda_lattice
 382 lambda_lattice_compute_base (lambda_loopnest nest)
 383 {
 384   lambda_lattice ret;
 385   int depth, invariants;
 386   lambda_matrix base;
 387
 388   int i, j, step;
 389   lambda_loop loop;
 390   lambda_linear_expression expression;
 391
 392   depth = LN_DEPTH (nest);
 393   invariants = LN_INVARIANTS (nest);
 394
 395   ret = lambda_lattice_new (depth, invariants);
 396   base = LATTICE_BASE (ret);
 397   for (i = 0; i < depth; i++)
 398     {
 399       loop = LN_LOOPS (nest)[i];
 400       gcc_assert (loop);
 401       step = LL_STEP (loop);
 402       /* If we have a step of 1, then the base is one, and the
 403          origin and invariant coefficients are 0.  */
 404       if (step == 1)
 405         {
 406           for (j = 0; j < depth; j++)
 407             base[i][j] = 0;
 408           base[i][i] = 1;
 409           LATTICE_ORIGIN (ret)[i] = 0;
 410           for (j = 0; j < invariants; j++)
 411             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 412         }
 413       else
 414         {
 415           /* Otherwise, we need the lower bound expression (which must
 416              be an affine function)  to determine the base.  */
 417           expression = LL_LOWER_BOUND (loop);
 418           gcc_assert (expression && !LLE_NEXT (expression)
 419                       && LLE_DENOMINATOR (expression) == 1);
 420
 421           /* The lower triangular portion of the base is going to be the
 422              coefficient times the step */
 423           for (j = 0; j < i; j++)
 424             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 425               * LL_STEP (LN_LOOPS (nest)[j]);
 426           base[i][i] = step;
 427           for (j = i + 1; j < depth; j++)
 428             base[i][j] = 0;
 429
 430           /* Origin for this loop is the constant of the lower bound
 431              expression.  */
 432           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 433
 434           /* Coefficient for the invariants are equal to the invariant
 435              coefficients in the expression.  */
 436           for (j = 0; j < invariants; j++)
 437             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 438               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 439         }
 440     }
 441   return ret;
 442 }
 443
 444 /* Compute the greatest common denominator of two numbers (A and B) using
 445    Euclid's algorithm.  */
 446
 447 static int
 448 gcd (int a, int b)
 449 {
 450
 451   int x, y, z;
 452
 453   x = abs (a);
 454   y = abs (b);
 455
 456   while (x > 0)
 457     {
 458       z = y % x;
 459       y = x;
 460       x = z;
 461     }
 462
 463   return (y);
 464 }
 465
 466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 467
 468 static int
 469 gcd_vector (lambda_vector vector, int size)
 470 {
 471   int i;
 472   int gcd1 = 0;
 473
 474   if (size > 0)
 475     {
 476       gcd1 = vector[0];
 477       for (i = 1; i < size; i++)
 478         gcd1 = gcd (gcd1, vector[i]);
 479     }
 480   return gcd1;
 481 }
 482
 483 /* Compute the least common multiple of two numbers A and B .  */
 484
 485 static int
 486 lcm (int a, int b)
 487 {
 488   return (abs (a) * abs (b) / gcd (a, b));
 489 }
 490
 491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 492    auxiliary nest.
 493    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 494    it is easy to calculate the answer and bounds.
 495    A sketch of how it works:
 496    Given a system of linear inequalities, ai * xj >= bk, you can always
 497    rewrite the constraints so they are all of the form
 498    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 499    in b1 ... bk, and some a in a1...ai)
 500    You can then eliminate this x from the non-constant inequalities by
 501    rewriting these as a <= b, x >= constant, and delete the x variable.
 502    You can then repeat this for any remaining x variables, and then we have
 503    an easy to use variable <= constant (or no variables at all) form that we
 504    can construct our bounds from.
 505
 506    In our case, each time we eliminate, we construct part of the bound from
 507    the ith variable, then delete the ith variable.
 508
 509    Remember the constant are in our vector a, our coefficient matrix is A,
 510    and our invariant coefficient matrix is B.
 511
 512    SIZE is the size of the matrices being passed.
 513    DEPTH is the loop nest depth.
 514    INVARIANTS is the number of loop invariants.
 515    A, B, and a are the coefficient matrix, invariant coefficient, and a
 516    vector of constants, respectively.  */
 517
 518 static lambda_loopnest
 519 compute_nest_using_fourier_motzkin (int size,
 520                                     int depth,
 521                                     int invariants,
 522                                     lambda_matrix A,
 523                                     lambda_matrix B,
 524                                     lambda_vector a)
 525 {
 526
 527   int multiple, f1, f2;
 528   int i, j, k;
 529   lambda_linear_expression expression;
 530   lambda_loop loop;
 531   lambda_loopnest auxillary_nest;
 532   lambda_matrix swapmatrix, A1, B1;
 533   lambda_vector swapvector, a1;
 534   int newsize;
 535
 536   A1 = lambda_matrix_new (128, depth);
 537   B1 = lambda_matrix_new (128, invariants);
 538   a1 = lambda_vector_new (128);
 539
 540   auxillary_nest = lambda_loopnest_new (depth, invariants);
 541
 542   for (i = depth - 1; i >= 0; i--)
 543     {
 544       loop = lambda_loop_new ();
 545       LN_LOOPS (auxillary_nest)[i] = loop;
 546       LL_STEP (loop) = 1;
 547
 548       for (j = 0; j < size; j++)
 549         {
 550           if (A[j][i] < 0)
 551             {
 552               /* Any linear expression in the matrix with a coefficient less
 553                  than 0 becomes part of the new lower bound.  */
 554               expression = lambda_linear_expression_new (depth, invariants);
 555
 556               for (k = 0; k < i; k++)
 557                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 558
 559               for (k = 0; k < invariants; k++)
 560                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 561
 562               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 563               LLE_CONSTANT (expression) = -1 * a[j];
 564
 565               /* Ignore if identical to the existing lower bound.  */
 566               if (!lle_equal (LL_LOWER_BOUND (loop),
 567                               expression, depth, invariants))
 568                 {
 569                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 570                   LL_LOWER_BOUND (loop) = expression;
 571                 }
 572
 573             }
 574           else if (A[j][i] > 0)
 575             {
 576               /* Any linear expression with a coefficient greater than 0
 577                  becomes part of the new upper bound.  */
 578               expression = lambda_linear_expression_new (depth, invariants);
 579               for (k = 0; k < i; k++)
 580                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 581
 582               for (k = 0; k < invariants; k++)
 583                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 584
 585               LLE_DENOMINATOR (expression) = A[j][i];
 586               LLE_CONSTANT (expression) = a[j];
 587
 588               /* Ignore if identical to the existing upper bound.  */
 589               if (!lle_equal (LL_UPPER_BOUND (loop),
 590                               expression, depth, invariants))
 591                 {
 592                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 593                   LL_UPPER_BOUND (loop) = expression;
 594                 }
 595
 596             }
 597         }
 598
 599       /* This portion creates a new system of linear inequalities by deleting
 600          the i'th variable, reducing the system by one variable.  */
 601       newsize = 0;
 602       for (j = 0; j < size; j++)
 603         {
 604           /* If the coefficient for the i'th variable is 0, then we can just
 605              eliminate the variable straightaway.  Otherwise, we have to
 606              multiply through by the coefficients we are eliminating.  */
 607           if (A[j][i] == 0)
 608             {
 609               lambda_vector_copy (A[j], A1[newsize], depth);
 610               lambda_vector_copy (B[j], B1[newsize], invariants);
 611               a1[newsize] = a[j];
 612               newsize++;
 613             }
 614           else if (A[j][i] > 0)
 615             {
 616               for (k = 0; k < size; k++)
 617                 {
 618                   if (A[k][i] < 0)
 619                     {
 620                       multiple = lcm (A[j][i], A[k][i]);
 621                       f1 = multiple / A[j][i];
 622                       f2 = -1 * multiple / A[k][i];
 623
 624                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 625                                             A1[newsize], depth);
 626                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 627                                             B1[newsize], invariants);
 628                       a1[newsize] = f1 * a[j] + f2 * a[k];
 629                       newsize++;
 630                     }
 631                 }
 632             }
 633         }
 634
 635       swapmatrix = A;
 636       A = A1;
 637       A1 = swapmatrix;
 638
 639       swapmatrix = B;
 640       B = B1;
 641       B1 = swapmatrix;
 642
 643       swapvector = a;
 644       a = a1;
 645       a1 = swapvector;
 646
 647       size = newsize;
 648     }
 649
 650   return auxillary_nest;
 651 }
 652
 653 /* Compute the loop bounds for the auxiliary space NEST.
 654    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 655    Given the original nest, this function will
 656    1. Convert the nest into matrix form, which consists of a matrix for the
 657    coefficients, a matrix for the
 658    invariant coefficients, and a vector for the constants.
 659    2. Use the matrix form to calculate the lattice base for the nest (which is
 660    a dense space)
 661    3. Compose the dense space transform with the user specified transform, to
 662    get a transform we can easily calculate transformed bounds for.
 663    4. Multiply the composed transformation matrix times the matrix form of the
 664    loop.
 665    5. Transform the newly created matrix (from step 4) back into a loop nest
 666    using fourier motzkin elimination to figure out the bounds.  */
 667
 668 static lambda_loopnest
 669 lambda_compute_auxillary_space (lambda_loopnest nest,
 670                                 lambda_trans_matrix trans)
 671 {
 672   lambda_matrix A, B, A1, B1;
 673   lambda_vector a, a1;
 674   lambda_matrix invertedtrans;
 675   int depth, invariants, size;
 676   int i, j;
 677   lambda_loop loop;
 678   lambda_linear_expression expression;
 679   lambda_lattice lattice;
 680
 681   depth = LN_DEPTH (nest);
 682   invariants = LN_INVARIANTS (nest);
 683
 684   /* Unfortunately, we can't know the number of constraints we'll have
 685      ahead of time, but this should be enough even in ridiculous loop nest
 686      cases. We must not go over this limit.  */
 687   A = lambda_matrix_new (128, depth);
 688   B = lambda_matrix_new (128, invariants);
 689   a = lambda_vector_new (128);
 690
 691   A1 = lambda_matrix_new (128, depth);
 692   B1 = lambda_matrix_new (128, invariants);
 693   a1 = lambda_vector_new (128);
 694
 695   /* Store the bounds in the equation matrix A, constant vector a, and
 696      invariant matrix B, so that we have Ax <= a + B.
 697      This requires a little equation rearranging so that everything is on the
 698      correct side of the inequality.  */
 699   size = 0;
 700   for (i = 0; i < depth; i++)
 701     {
 702       loop = LN_LOOPS (nest)[i];
 703
 704       /* First we do the lower bound.  */
 705       if (LL_STEP (loop) > 0)
 706         expression = LL_LOWER_BOUND (loop);
 707       else
 708         expression = LL_UPPER_BOUND (loop);
 709
 710       for (; expression != NULL; expression = LLE_NEXT (expression))
 711         {
 712           /* Fill in the coefficient.  */
 713           for (j = 0; j < i; j++)
 714             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 715
 716           /* And the invariant coefficient.  */
 717           for (j = 0; j < invariants; j++)
 718             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 719
 720           /* And the constant.  */
 721           a[size] = LLE_CONSTANT (expression);
 722
 723           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 724              constants and single variables on   */
 725           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 726           a[size] *= -1;
 727           for (j = 0; j < invariants; j++)
 728             B[size][j] *= -1;
 729
 730           size++;
 731           /* Need to increase matrix sizes above.  */
 732           gcc_assert (size <= 127);
 733
 734         }
 735
 736       /* Then do the exact same thing for the upper bounds.  */
 737       if (LL_STEP (loop) > 0)
 738         expression = LL_UPPER_BOUND (loop);
 739       else
 740         expression = LL_LOWER_BOUND (loop);
 741
 742       for (; expression != NULL; expression = LLE_NEXT (expression))
 743         {
 744           /* Fill in the coefficient.  */
 745           for (j = 0; j < i; j++)
 746             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 747
 748           /* And the invariant coefficient.  */
 749           for (j = 0; j < invariants; j++)
 750             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 751
 752           /* And the constant.  */
 753           a[size] = LLE_CONSTANT (expression);
 754
 755           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 756           for (j = 0; j < i; j++)
 757             A[size][j] *= -1;
 758           A[size][i] = LLE_DENOMINATOR (expression);
 759           size++;
 760           /* Need to increase matrix sizes above.  */
 761           gcc_assert (size <= 127);
 762
 763         }
 764     }
 765
 766   /* Compute the lattice base x = base * y + origin, where y is the
 767      base space.  */
 768   lattice = lambda_lattice_compute_base (nest);
 769
 770   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 771
 772   /* A1 = A * L */
 773   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 774
 775   /* a1 = a - A * origin constant.  */
 776   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 777   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 778
 779   /* B1 = B - A * origin invariant.  */
 780   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 781                       invariants);
 782   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 783
 784   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 785      it by A1, then performing fourier motzkin.  */
 786
 787   invertedtrans = lambda_matrix_new (depth, depth);
 788
 789   /* Compute the inverse of U.  */
 790   lambda_matrix_inverse (LTM_MATRIX (trans),
 791                          invertedtrans, depth);
 792
 793   /* A = A1 inv(U).  */
 794   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 795
 796   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 797                                              A, B1, a1);
 798 }
 799
 800 /* Compute the loop bounds for the target space, using the bounds of
 801    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 802    The target space loop bounds are computed by multiplying the triangular
 803    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 804    the loop steps (positive or negative) is then used to swap the bounds if
 805    the loop counts downwards.
 806    Return the target loopnest.  */
 807
 808 static lambda_loopnest
 809 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 810                              lambda_trans_matrix H, lambda_vector stepsigns)
 811 {
 812   lambda_matrix inverse, H1;
 813   int determinant, i, j;
 814   int gcd1, gcd2;
 815   int factor;
 816
 817   lambda_loopnest target_nest;
 818   int depth, invariants;
 819   lambda_matrix target;
 820
 821   lambda_loop auxillary_loop, target_loop;
 822   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 823
 824   depth = LN_DEPTH (auxillary_nest);
 825   invariants = LN_INVARIANTS (auxillary_nest);
 826
 827   inverse = lambda_matrix_new (depth, depth);
 828   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 829
 830   /* H1 is H excluding its diagonal.  */
 831   H1 = lambda_matrix_new (depth, depth);
 832   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 833
 834   for (i = 0; i < depth; i++)
 835     H1[i][i] = 0;
 836
 837   /* Computes the linear offsets of the loop bounds.  */
 838   target = lambda_matrix_new (depth, depth);
 839   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 840
 841   target_nest = lambda_loopnest_new (depth, invariants);
 842
 843   for (i = 0; i < depth; i++)
 844     {
 845
 846       /* Get a new loop structure.  */
 847       target_loop = lambda_loop_new ();
 848       LN_LOOPS (target_nest)[i] = target_loop;
 849
 850       /* Computes the gcd of the coefficients of the linear part.  */
 851       gcd1 = gcd_vector (target[i], i);
 852
 853       /* Include the denominator in the GCD.  */
 854       gcd1 = gcd (gcd1, determinant);
 855
 856       /* Now divide through by the gcd.  */
 857       for (j = 0; j < i; j++)
 858         target[i][j] = target[i][j] / gcd1;
 859
 860       expression = lambda_linear_expression_new (depth, invariants);
 861       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 862       LLE_DENOMINATOR (expression) = determinant / gcd1;
 863       LLE_CONSTANT (expression) = 0;
 864       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 865                            invariants);
 866       LL_LINEAR_OFFSET (target_loop) = expression;
 867     }
 868
 869   /* For each loop, compute the new bounds from H.  */
 870   for (i = 0; i < depth; i++)
 871     {
 872       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 873       target_loop = LN_LOOPS (target_nest)[i];
 874       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 875       factor = LTM_MATRIX (H)[i][i];
 876
 877       /* First we do the lower bound.  */
 878       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 879
 880       for (; auxillary_expr != NULL;
 881            auxillary_expr = LLE_NEXT (auxillary_expr))
 882         {
 883           target_expr = lambda_linear_expression_new (depth, invariants);
 884           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 885                                      depth, inverse, depth,
 886                                      LLE_COEFFICIENTS (target_expr));
 887           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 888                                     LLE_COEFFICIENTS (target_expr), depth,
 889                                     factor);
 890
 891           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 892           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 893                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 894                               invariants);
 895           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 896                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     invariants, factor);
 898           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 899
 900           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 901             {
 902               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 903                 * determinant;
 904               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 905                                         (target_expr),
 906                                         LLE_INVARIANT_COEFFICIENTS
 907                                         (target_expr), invariants,
 908                                         determinant);
 909               LLE_DENOMINATOR (target_expr) =
 910                 LLE_DENOMINATOR (target_expr) * determinant;
 911             }
 912           /* Find the gcd and divide by it here, rather than doing it
 913              at the tree level.  */
 914           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 915           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 916                              invariants);
 917           gcd1 = gcd (gcd1, gcd2);
 918           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 919           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 920           for (j = 0; j < depth; j++)
 921             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 922           for (j = 0; j < invariants; j++)
 923             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 924           LLE_CONSTANT (target_expr) /= gcd1;
 925           LLE_DENOMINATOR (target_expr) /= gcd1;
 926           /* Ignore if identical to existing bound.  */
 927           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 928                           invariants))
 929             {
 930               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 931               LL_LOWER_BOUND (target_loop) = target_expr;
 932             }
 933         }
 934       /* Now do the upper bound.  */
 935       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 936
 937       for (; auxillary_expr != NULL;
 938            auxillary_expr = LLE_NEXT (auxillary_expr))
 939         {
 940           target_expr = lambda_linear_expression_new (depth, invariants);
 941           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 942                                      depth, inverse, depth,
 943                                      LLE_COEFFICIENTS (target_expr));
 944           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 945                                     LLE_COEFFICIENTS (target_expr), depth,
 946                                     factor);
 947           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 948           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 949                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 950                               invariants);
 951           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 952                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     invariants, factor);
 954           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 955
 956           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 957             {
 958               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 959                 * determinant;
 960               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 961                                         (target_expr),
 962                                         LLE_INVARIANT_COEFFICIENTS
 963                                         (target_expr), invariants,
 964                                         determinant);
 965               LLE_DENOMINATOR (target_expr) =
 966                 LLE_DENOMINATOR (target_expr) * determinant;
 967             }
 968           /* Find the gcd and divide by it here, instead of at the
 969              tree level.  */
 970           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 971           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 972                              invariants);
 973           gcd1 = gcd (gcd1, gcd2);
 974           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 975           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 976           for (j = 0; j < depth; j++)
 977             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 978           for (j = 0; j < invariants; j++)
 979             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 980           LLE_CONSTANT (target_expr) /= gcd1;
 981           LLE_DENOMINATOR (target_expr) /= gcd1;
 982           /* Ignore if equal to existing bound.  */
 983           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 984                           invariants))
 985             {
 986               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 987               LL_UPPER_BOUND (target_loop) = target_expr;
 988             }
 989         }
 990     }
 991   for (i = 0; i < depth; i++)
 992     {
 993       target_loop = LN_LOOPS (target_nest)[i];
 994       /* If necessary, exchange the upper and lower bounds and negate
 995          the step size.  */
 996       if (stepsigns[i] < 0)
 997         {
 998           LL_STEP (target_loop) *= -1;
 999           tmp_expr = LL_LOWER_BOUND (target_loop);
1000           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1001           LL_UPPER_BOUND (target_loop) = tmp_expr;
1002         }
1003     }
1004   return target_nest;
1005 }
1006
1007 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1008    result.  */
1009
1010 static lambda_vector
1011 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1012 {
1013   lambda_matrix matrix, H;
1014   int size;
1015   lambda_vector newsteps;
1016   int i, j, factor, minimum_column;
1017   int temp;
1018
1019   matrix = LTM_MATRIX (trans);
1020   size = LTM_ROWSIZE (trans);
1021   H = lambda_matrix_new (size, size);
1022
1023   newsteps = lambda_vector_new (size);
1024   lambda_vector_copy (stepsigns, newsteps, size);
1025
1026   lambda_matrix_copy (matrix, H, size, size);
1027
1028   for (j = 0; j < size; j++)
1029     {
1030       lambda_vector row;
1031       row = H[j];
1032       for (i = j; i < size; i++)
1033         if (row[i] < 0)
1034           lambda_matrix_col_negate (H, size, i);
1035       while (lambda_vector_first_nz (row, size, j + 1) < size)
1036         {
1037           minimum_column = lambda_vector_min_nz (row, size, j);
1038           lambda_matrix_col_exchange (H, size, j, minimum_column);
1039
1040           temp = newsteps[j];
1041           newsteps[j] = newsteps[minimum_column];
1042           newsteps[minimum_column] = temp;
1043
1044           for (i = j + 1; i < size; i++)
1045             {
1046               factor = row[i] / row[j];
1047               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1048             }
1049         }
1050     }
1051   return newsteps;
1052 }
1053
1054 /* Transform NEST according to TRANS, and return the new loopnest.
1055    This involves
1056    1. Computing a lattice base for the transformation
1057    2. Composing the dense base with the specified transformation (TRANS)
1058    3. Decomposing the combined transformation into a lower triangular portion,
1059    and a unimodular portion.
1060    4. Computing the auxiliary nest using the unimodular portion.
1061    5. Computing the target nest using the auxiliary nest and the lower
1062    triangular portion.  */
1063
1064 lambda_loopnest
1065 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1066 {
1067   lambda_loopnest auxillary_nest, target_nest;
1068
1069   int depth, invariants;
1070   int i, j;
1071   lambda_lattice lattice;
1072   lambda_trans_matrix trans1, H, U;
1073   lambda_loop loop;
1074   lambda_linear_expression expression;
1075   lambda_vector origin;
1076   lambda_matrix origin_invariants;
1077   lambda_vector stepsigns;
1078   int f;
1079
1080   depth = LN_DEPTH (nest);
1081   invariants = LN_INVARIANTS (nest);
1082
1083   /* Keep track of the signs of the loop steps.  */
1084   stepsigns = lambda_vector_new (depth);
1085   for (i = 0; i < depth; i++)
1086     {
1087       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1088         stepsigns[i] = 1;
1089       else
1090         stepsigns[i] = -1;
1091     }
1092
1093   /* Compute the lattice base.  */
1094   lattice = lambda_lattice_compute_base (nest);
1095   trans1 = lambda_trans_matrix_new (depth, depth);
1096
1097   /* Multiply the transformation matrix by the lattice base.  */
1098
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1100                       LTM_MATRIX (trans1), depth, depth, depth);
1101
1102   /* Compute the Hermite normal form for the new transformation matrix.  */
1103   H = lambda_trans_matrix_new (depth, depth);
1104   U = lambda_trans_matrix_new (depth, depth);
1105   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1106                          LTM_MATRIX (U));
1107
1108   /* Compute the auxiliary loop nest's space from the unimodular
1109      portion.  */
1110   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1111
1112   /* Compute the loop step signs from the old step signs and the
1113      transformation matrix.  */
1114   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1115
1116   /* Compute the target loop nest space from the auxiliary nest and
1117      the lower triangular matrix H.  */
1118   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1119   origin = lambda_vector_new (depth);
1120   origin_invariants = lambda_matrix_new (depth, invariants);
1121   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1122                              LATTICE_ORIGIN (lattice), origin);
1123   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1124                       origin_invariants, depth, depth, invariants);
1125
1126   for (i = 0; i < depth; i++)
1127     {
1128       loop = LN_LOOPS (target_nest)[i];
1129       expression = LL_LINEAR_OFFSET (loop);
1130       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1131         f = 1;
1132       else
1133         f = LLE_DENOMINATOR (expression);
1134
1135       LLE_CONSTANT (expression) += f * origin[i];
1136
1137       for (j = 0; j < invariants; j++)
1138         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1139           f * origin_invariants[i][j];
1140     }
1141
1142   return target_nest;
1143
1144 }
1145
1146 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1147    return the new expression.  DEPTH is the depth of the loopnest.
1148    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1149    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1150    is the amount we have to add/subtract from the expression because of the
1151    type of comparison it is used in.  */
1152
1153 static lambda_linear_expression
1154 gcc_tree_to_linear_expression (int depth, tree expr,
1155                                VEC(tree,heap) *outerinductionvars,
1156                                VEC(tree,heap) *invariants, int extra)
1157 {
1158   lambda_linear_expression lle = NULL;
1159   switch (TREE_CODE (expr))
1160     {
1161     case INTEGER_CST:
1162       {
1163         lle = lambda_linear_expression_new (depth, 2 * depth);
1164         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1165         if (extra != 0)
1166           LLE_CONSTANT (lle) += extra;
1167
1168         LLE_DENOMINATOR (lle) = 1;
1169       }
1170       break;
1171     case SSA_NAME:
1172       {
1173         tree iv, invar;
1174         size_t i;
1175         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1176           if (iv != NULL)
1177             {
1178               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1179                 {
1180                   lle = lambda_linear_expression_new (depth, 2 * depth);
1181                   LLE_COEFFICIENTS (lle)[i] = 1;
1182                   if (extra != 0)
1183                     LLE_CONSTANT (lle) = extra;
1184
1185                   LLE_DENOMINATOR (lle) = 1;
1186                 }
1187             }
1188         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1189           if (invar != NULL)
1190             {
1191               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1192                 {
1193                   lle = lambda_linear_expression_new (depth, 2 * depth);
1194                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1195                   if (extra != 0)
1196                     LLE_CONSTANT (lle) = extra;
1197                   LLE_DENOMINATOR (lle) = 1;
1198                 }
1199             }
1200       }
1201       break;
1202     default:
1203       return NULL;
1204     }
1205
1206   return lle;
1207 }
1208
1209 /* Return the depth of the loopnest NEST */
1210
1211 static int
1212 depth_of_nest (struct loop *nest)
1213 {
1214   size_t depth = 0;
1215   while (nest)
1216     {
1217       depth++;
1218       nest = nest->inner;
1219     }
1220   return depth;
1221 }
1222
1223
1224 /* Return true if OP is invariant in LOOP and all outer loops.  */
1225
1226 static bool
1227 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1228 {
1229   if (is_gimple_min_invariant (op))
1230     return true;
1231   if (loop->depth == 0)
1232     return true;
1233   if (!expr_invariant_in_loop_p (loop, op))
1234     return false;
1235   if (loop->outer
1236       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1237     return false;
1238   return true;
1239 }
1240
1241 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1242    or NULL if it could not be converted.
1243    DEPTH is the depth of the loop.
1244    INVARIANTS is a pointer to the array of loop invariants.
1245    The induction variable for this loop should be stored in the parameter
1246    OURINDUCTIONVAR.
1247    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1248
1249 static lambda_loop
1250 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1251                          VEC(tree,heap) ** invariants,
1252                          tree * ourinductionvar,
1253                          VEC(tree,heap) * outerinductionvars,
1254                          VEC(tree,heap) ** lboundvars,
1255                          VEC(tree,heap) ** uboundvars,
1256                          VEC(int,heap) ** steps)
1257 {
1258   tree phi;
1259   tree exit_cond;
1260   tree access_fn, inductionvar;
1261   tree step;
1262   lambda_loop lloop = NULL;
1263   lambda_linear_expression lbound, ubound;
1264   tree test;
1265   int stepint;
1266   int extra = 0;
1267   tree lboundvar, uboundvar, uboundresult;
1268
1269   /* Find out induction var and exit condition.  */
1270   inductionvar = find_induction_var_from_exit_cond (loop);
1271   exit_cond = get_loop_exit_condition (loop);
1272
1273   if (inductionvar == NULL || exit_cond == NULL)
1274     {
1275       if (dump_file && (dump_flags & TDF_DETAILS))
1276         fprintf (dump_file,
1277                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1278       return NULL;
1279     }
1280
1281   test = TREE_OPERAND (exit_cond, 0);
1282
1283   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1284     {
1285
1286       if (dump_file && (dump_flags & TDF_DETAILS))
1287         fprintf (dump_file,
1288                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1289
1290       return NULL;
1291     }
1292
1293   phi = SSA_NAME_DEF_STMT (inductionvar);
1294   if (TREE_CODE (phi) != PHI_NODE)
1295     {
1296       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1297       if (!phi)
1298         {
1299
1300           if (dump_file && (dump_flags & TDF_DETAILS))
1301             fprintf (dump_file,
1302                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1303
1304           return NULL;
1305         }
1306
1307       phi = SSA_NAME_DEF_STMT (phi);
1308       if (TREE_CODE (phi) != PHI_NODE)
1309         {
1310
1311           if (dump_file && (dump_flags & TDF_DETAILS))
1312             fprintf (dump_file,
1313                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1314           return NULL;
1315         }
1316
1317     }
1318
1319   /* The induction variable name/version we want to put in the array is the
1320      result of the induction variable phi node.  */
1321   *ourinductionvar = PHI_RESULT (phi);
1322   access_fn = instantiate_parameters
1323     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1324   if (access_fn == chrec_dont_know)
1325     {
1326       if (dump_file && (dump_flags & TDF_DETAILS))
1327         fprintf (dump_file,
1328                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1329
1330       return NULL;
1331     }
1332
1333   step = evolution_part_in_loop_num (access_fn, loop->num);
1334   if (!step || step == chrec_dont_know)
1335     {
1336       if (dump_file && (dump_flags & TDF_DETAILS))
1337         fprintf (dump_file,
1338                  "Unable to convert loop: Cannot determine step of loop.\n");
1339
1340       return NULL;
1341     }
1342   if (TREE_CODE (step) != INTEGER_CST)
1343     {
1344
1345       if (dump_file && (dump_flags & TDF_DETAILS))
1346         fprintf (dump_file,
1347                  "Unable to convert loop: Step of loop is not integer.\n");
1348       return NULL;
1349     }
1350
1351   stepint = TREE_INT_CST_LOW (step);
1352
1353   /* Only want phis for induction vars, which will have two
1354      arguments.  */
1355   if (PHI_NUM_ARGS (phi) != 2)
1356     {
1357       if (dump_file && (dump_flags & TDF_DETAILS))
1358         fprintf (dump_file,
1359                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1360       return NULL;
1361     }
1362
1363   /* Another induction variable check. One argument's source should be
1364      in the loop, one outside the loop.  */
1365   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1366       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1367     {
1368
1369       if (dump_file && (dump_flags & TDF_DETAILS))
1370         fprintf (dump_file,
1371                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1372
1373       return NULL;
1374     }
1375
1376   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1377     {
1378       lboundvar = PHI_ARG_DEF (phi, 1);
1379       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1380                                               outerinductionvars, *invariants,
1381                                               0);
1382     }
1383   else
1384     {
1385       lboundvar = PHI_ARG_DEF (phi, 0);
1386       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1387                                               outerinductionvars, *invariants,
1388                                               0);
1389     }
1390
1391   if (!lbound)
1392     {
1393
1394       if (dump_file && (dump_flags & TDF_DETAILS))
1395         fprintf (dump_file,
1396                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1397
1398       return NULL;
1399     }
1400   /* One part of the test may be a loop invariant tree.  */
1401   VEC_reserve (tree, heap, *invariants, 1);
1402   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1403       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1404     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1405   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1406            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1407     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1408
1409   /* The non-induction variable part of the test is the upper bound variable.
1410    */
1411   if (TREE_OPERAND (test, 0) == inductionvar)
1412     uboundvar = TREE_OPERAND (test, 1);
1413   else
1414     uboundvar = TREE_OPERAND (test, 0);
1415
1416
1417   /* We only size the vectors assuming we have, at max, 2 times as many
1418      invariants as we do loops (one for each bound).
1419      This is just an arbitrary number, but it has to be matched against the
1420      code below.  */
1421   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1422
1423
1424   /* We might have some leftover.  */
1425   if (TREE_CODE (test) == LT_EXPR)
1426     extra = -1 * stepint;
1427   else if (TREE_CODE (test) == NE_EXPR)
1428     extra = -1 * stepint;
1429   else if (TREE_CODE (test) == GT_EXPR)
1430     extra = -1 * stepint;
1431   else if (TREE_CODE (test) == EQ_EXPR)
1432     extra = 1 * stepint;
1433
1434   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1435                                           outerinductionvars,
1436                                           *invariants, extra);
1437   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1438                         build_int_cst (TREE_TYPE (uboundvar), extra));
1439   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1440   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1441   VEC_safe_push (int, heap, *steps, stepint);
1442   if (!ubound)
1443     {
1444       if (dump_file && (dump_flags & TDF_DETAILS))
1445         fprintf (dump_file,
1446                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1447       return NULL;
1448     }
1449
1450   lloop = lambda_loop_new ();
1451   LL_STEP (lloop) = stepint;
1452   LL_LOWER_BOUND (lloop) = lbound;
1453   LL_UPPER_BOUND (lloop) = ubound;
1454   return lloop;
1455 }
1456
1457 /* Given a LOOP, find the induction variable it is testing against in the exit
1458    condition.  Return the induction variable if found, NULL otherwise.  */
1459
1460 static tree
1461 find_induction_var_from_exit_cond (struct loop *loop)
1462 {
1463   tree expr = get_loop_exit_condition (loop);
1464   tree ivarop;
1465   tree test;
1466   if (expr == NULL_TREE)
1467     return NULL_TREE;
1468   if (TREE_CODE (expr) != COND_EXPR)
1469     return NULL_TREE;
1470   test = TREE_OPERAND (expr, 0);
1471   if (!COMPARISON_CLASS_P (test))
1472     return NULL_TREE;
1473
1474   /* Find the side that is invariant in this loop. The ivar must be the other
1475      side.  */
1476
1477   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1478       ivarop = TREE_OPERAND (test, 1);
1479   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1480       ivarop = TREE_OPERAND (test, 0);
1481   else
1482     return NULL_TREE;
1483
1484   if (TREE_CODE (ivarop) != SSA_NAME)
1485     return NULL_TREE;
1486   return ivarop;
1487 }
1488
1489 DEF_VEC_P(lambda_loop);
1490 DEF_VEC_ALLOC_P(lambda_loop,heap);
1491
1492 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1493    Return the new loop nest.
1494    INDUCTIONVARS is a pointer to an array of induction variables for the
1495    loopnest that will be filled in during this process.
1496    INVARIANTS is a pointer to an array of invariants that will be filled in
1497    during this process.  */
1498
1499 lambda_loopnest
1500 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1501                                  struct loop * loop_nest,
1502                                  VEC(tree,heap) **inductionvars,
1503                                  VEC(tree,heap) **invariants,
1504                                  bool need_perfect_nest)
1505 {
1506   lambda_loopnest ret = NULL;
1507   struct loop *temp;
1508   int depth = 0;
1509   size_t i;
1510   VEC(lambda_loop,heap) *loops = NULL;
1511   VEC(tree,heap) *uboundvars = NULL;
1512   VEC(tree,heap) *lboundvars  = NULL;
1513   VEC(int,heap) *steps = NULL;
1514   lambda_loop newloop;
1515   tree inductionvar = NULL;
1516
1517   depth = depth_of_nest (loop_nest);
1518   temp = loop_nest;
1519   while (temp)
1520     {
1521       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1522                                          &inductionvar, *inductionvars,
1523                                          &lboundvars, &uboundvars,
1524                                          &steps);
1525       if (!newloop)
1526         return NULL;
1527       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1528       VEC_safe_push (lambda_loop, heap, loops, newloop);
1529       temp = temp->inner;
1530     }
1531   if (need_perfect_nest)
1532     {
1533       if (!perfect_nestify (currloops, loop_nest,
1534                             lboundvars, uboundvars, steps, *inductionvars))
1535         {
1536           if (dump_file)
1537             fprintf (dump_file,
1538                      "Not a perfect loop nest and couldn't convert to one.\n");
1539           goto fail;
1540         }
1541       else if (dump_file)
1542         fprintf (dump_file,
1543                  "Successfully converted loop nest to perfect loop nest.\n");
1544     }
1545   ret = lambda_loopnest_new (depth, 2 * depth);
1546   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1547     LN_LOOPS (ret)[i] = newloop;
1548  fail:
1549   VEC_free (lambda_loop, heap, loops);
1550   VEC_free (tree, heap, uboundvars);
1551   VEC_free (tree, heap, lboundvars);
1552   VEC_free (int, heap, steps);
1553
1554   return ret;
1555 }
1556
1557 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1558    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1559    inserted for us are stored.  INDUCTION_VARS is the array of induction
1560    variables for the loop this LBV is from.  TYPE is the tree type to use for
1561    the variables and trees involved.  */
1562
1563 static tree
1564 lbv_to_gcc_expression (lambda_body_vector lbv,
1565                        tree type, VEC(tree,heap) *induction_vars,
1566                        tree *stmts_to_insert)
1567 {
1568   tree stmts, stmt, resvar, name;
1569   tree iv;
1570   size_t i;
1571   tree_stmt_iterator tsi;
1572
1573   /* Create a statement list and a linear expression temporary.  */
1574   stmts = alloc_stmt_list ();
1575   resvar = create_tmp_var (type, "lbvtmp");
1576   add_referenced_tmp_var (resvar);
1577
1578   /* Start at 0.  */
1579   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1580   name = make_ssa_name (resvar, stmt);
1581   TREE_OPERAND (stmt, 0) = name;
1582   tsi = tsi_last (stmts);
1583   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1584
1585   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1586     {
1587       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1588         {
1589           tree newname;
1590           tree coeffmult;
1591
1592           /* newname = coefficient * induction_variable */
1593           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1594           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1595                         fold (build (MULT_EXPR, type, iv, coeffmult)));
1596
1597           newname = make_ssa_name (resvar, stmt);
1598           TREE_OPERAND (stmt, 0) = newname;
1599           fold_stmt (&stmt);
1600           tsi = tsi_last (stmts);
1601           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1602
1603           /* name = name + newname */
1604           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1605                         build (PLUS_EXPR, type, name, newname));
1606           name = make_ssa_name (resvar, stmt);
1607           TREE_OPERAND (stmt, 0) = name;
1608           fold_stmt (&stmt);
1609           tsi = tsi_last (stmts);
1610           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1611
1612         }
1613     }
1614
1615   /* Handle any denominator that occurs.  */
1616   if (LBV_DENOMINATOR (lbv) != 1)
1617     {
1618       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1619       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1620                     build (CEIL_DIV_EXPR, type, name, denominator));
1621       name = make_ssa_name (resvar, stmt);
1622       TREE_OPERAND (stmt, 0) = name;
1623       fold_stmt (&stmt);
1624       tsi = tsi_last (stmts);
1625       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1626     }
1627   *stmts_to_insert = stmts;
1628   return name;
1629 }
1630
1631 /* Convert a linear expression from coefficient and constant form to a
1632    gcc tree.
1633    Return the tree that represents the final value of the expression.
1634    LLE is the linear expression to convert.
1635    OFFSET is the linear offset to apply to the expression.
1636    TYPE is the tree type to use for the variables and math.
1637    INDUCTION_VARS is a vector of induction variables for the loops.
1638    INVARIANTS is a vector of the loop nest invariants.
1639    WRAP specifies what tree code to wrap the results in, if there is more than
1640    one (it is either MAX_EXPR, or MIN_EXPR).
1641    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1642    statements that need to be inserted for the linear expression.  */
1643
1644 static tree
1645 lle_to_gcc_expression (lambda_linear_expression lle,
1646                        lambda_linear_expression offset,
1647                        tree type,
1648                        VEC(tree,heap) *induction_vars,
1649                        VEC(tree,heap) *invariants,
1650                        enum tree_code wrap, tree *stmts_to_insert)
1651 {
1652   tree stmts, stmt, resvar, name;
1653   size_t i;
1654   tree_stmt_iterator tsi;
1655   tree iv, invar;
1656   VEC(tree,heap) *results = NULL;
1657
1658   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1659   name = NULL_TREE;
1660   /* Create a statement list and a linear expression temporary.  */
1661   stmts = alloc_stmt_list ();
1662   resvar = create_tmp_var (type, "lletmp");
1663   add_referenced_tmp_var (resvar);
1664
1665   /* Build up the linear expressions, and put the variable representing the
1666      result in the results array.  */
1667   for (; lle != NULL; lle = LLE_NEXT (lle))
1668     {
1669       /* Start at name = 0.  */
1670       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1671       name = make_ssa_name (resvar, stmt);
1672       TREE_OPERAND (stmt, 0) = name;
1673       fold_stmt (&stmt);
1674       tsi = tsi_last (stmts);
1675       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1676
1677       /* First do the induction variables.
1678          at the end, name = name + all the induction variables added
1679          together.  */
1680       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1681         {
1682           if (LLE_COEFFICIENTS (lle)[i] != 0)
1683             {
1684               tree newname;
1685               tree mult;
1686               tree coeff;
1687
1688               /* mult = induction variable * coefficient.  */
1689               if (LLE_COEFFICIENTS (lle)[i] == 1)
1690                 {
1691                   mult = VEC_index (tree, induction_vars, i);
1692                 }
1693               else
1694                 {
1695                   coeff = build_int_cst (type,
1696                                          LLE_COEFFICIENTS (lle)[i]);
1697                   mult = fold (build (MULT_EXPR, type, iv, coeff));
1698                 }
1699
1700               /* newname = mult */
1701               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1702               newname = make_ssa_name (resvar, stmt);
1703               TREE_OPERAND (stmt, 0) = newname;
1704               fold_stmt (&stmt);
1705               tsi = tsi_last (stmts);
1706               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1707
1708               /* name = name + newname */
1709               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1710                             build (PLUS_EXPR, type, name, newname));
1711               name = make_ssa_name (resvar, stmt);
1712               TREE_OPERAND (stmt, 0) = name;
1713               fold_stmt (&stmt);
1714               tsi = tsi_last (stmts);
1715               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1716             }
1717         }
1718
1719       /* Handle our invariants.
1720          At the end, we have name = name + result of adding all multiplied
1721          invariants.  */
1722       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1723         {
1724           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1725             {
1726               tree newname;
1727               tree mult;
1728               tree coeff;
1729               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1730               /* mult = invariant * coefficient  */
1731               if (invcoeff == 1)
1732                 {
1733                   mult = invar;
1734                 }
1735               else
1736                 {
1737                   coeff = build_int_cst (type, invcoeff);
1738                   mult = fold (build (MULT_EXPR, type, invar, coeff));
1739                 }
1740
1741               /* newname = mult */
1742               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1743               newname = make_ssa_name (resvar, stmt);
1744               TREE_OPERAND (stmt, 0) = newname;
1745               fold_stmt (&stmt);
1746               tsi = tsi_last (stmts);
1747               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1748
1749               /* name = name + newname */
1750               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1751                             build (PLUS_EXPR, type, name, newname));
1752               name = make_ssa_name (resvar, stmt);
1753               TREE_OPERAND (stmt, 0) = name;
1754               fold_stmt (&stmt);
1755               tsi = tsi_last (stmts);
1756               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1757             }
1758         }
1759
1760       /* Now handle the constant.
1761          name = name + constant.  */
1762       if (LLE_CONSTANT (lle) != 0)
1763         {
1764           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1765                         build (PLUS_EXPR, type, name,
1766                                build_int_cst (type, LLE_CONSTANT (lle))));
1767           name = make_ssa_name (resvar, stmt);
1768           TREE_OPERAND (stmt, 0) = name;
1769           fold_stmt (&stmt);
1770           tsi = tsi_last (stmts);
1771           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1772         }
1773
1774       /* Now handle the offset.
1775          name = name + linear offset.  */
1776       if (LLE_CONSTANT (offset) != 0)
1777         {
1778           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1779                         build (PLUS_EXPR, type, name,
1780                                build_int_cst (type, LLE_CONSTANT (offset))));
1781           name = make_ssa_name (resvar, stmt);
1782           TREE_OPERAND (stmt, 0) = name;
1783           fold_stmt (&stmt);
1784           tsi = tsi_last (stmts);
1785           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786         }
1787
1788       /* Handle any denominator that occurs.  */
1789       if (LLE_DENOMINATOR (lle) != 1)
1790         {
1791           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1792           stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1793                         type, name, stmt);
1794           stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1795
1796           /* name = {ceil, floor}(name/denominator) */
1797           name = make_ssa_name (resvar, stmt);
1798           TREE_OPERAND (stmt, 0) = name;
1799           tsi = tsi_last (stmts);
1800           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1801         }
1802       VEC_safe_push (tree, heap, results, name);
1803     }
1804
1805   /* Again, out of laziness, we don't handle this case yet.  It's not
1806      hard, it just hasn't occurred.  */
1807   gcc_assert (VEC_length (tree, results) <= 2);
1808
1809   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1810   if (VEC_length (tree, results) > 1)
1811     {
1812       tree op1 = VEC_index (tree, results, 0);
1813       tree op2 = VEC_index (tree, results, 1);
1814       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1815                     build (wrap, type, op1, op2));
1816       name = make_ssa_name (resvar, stmt);
1817       TREE_OPERAND (stmt, 0) = name;
1818       tsi = tsi_last (stmts);
1819       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1820     }
1821
1822   VEC_free (tree, heap, results);
1823
1824   *stmts_to_insert = stmts;
1825   return name;
1826 }
1827
1828 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1829    it, back into gcc code.  This changes the
1830    loops, their induction variables, and their bodies, so that they
1831    match the transformed loopnest.
1832    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1833    loopnest.
1834    OLD_IVS is a vector of induction variables from the old loopnest.
1835    INVARIANTS is a vector of loop invariants from the old loopnest.
1836    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1837    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1838    NEW_LOOPNEST.  */
1839
1840 void
1841 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1842                                  VEC(tree,heap) *old_ivs,
1843                                  VEC(tree,heap) *invariants,
1844                                  lambda_loopnest new_loopnest,
1845                                  lambda_trans_matrix transform)
1846 {
1847   struct loop *temp;
1848   size_t i = 0;
1849   size_t depth = 0;
1850   VEC(tree,heap) *new_ivs = NULL;
1851   tree oldiv;
1852
1853   block_stmt_iterator bsi;
1854
1855   if (dump_file)
1856     {
1857       transform = lambda_trans_matrix_inverse (transform);
1858       fprintf (dump_file, "Inverse of transformation matrix:\n");
1859       print_lambda_trans_matrix (dump_file, transform);
1860     }
1861   depth = depth_of_nest (old_loopnest);
1862   temp = old_loopnest;
1863
1864   while (temp)
1865     {
1866       lambda_loop newloop;
1867       basic_block bb;
1868       edge exit;
1869       tree ivvar, ivvarinced, exitcond, stmts;
1870       enum tree_code testtype;
1871       tree newupperbound, newlowerbound;
1872       lambda_linear_expression offset;
1873       tree type;
1874       bool insert_after;
1875       tree inc_stmt;
1876
1877       oldiv = VEC_index (tree, old_ivs, i);
1878       type = TREE_TYPE (oldiv);
1879
1880       /* First, build the new induction variable temporary  */
1881
1882       ivvar = create_tmp_var (type, "lnivtmp");
1883       add_referenced_tmp_var (ivvar);
1884
1885       VEC_safe_push (tree, heap, new_ivs, ivvar);
1886
1887       newloop = LN_LOOPS (new_loopnest)[i];
1888
1889       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1890          cases for now.  */
1891       offset = LL_LINEAR_OFFSET (newloop);
1892
1893       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1894                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1895
1896       /* Now build the  new lower bounds, and insert the statements
1897          necessary to generate it on the loop preheader.  */
1898       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1899                                              LL_LINEAR_OFFSET (newloop),
1900                                              type,
1901                                              new_ivs,
1902                                              invariants, MAX_EXPR, &stmts);
1903       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1904       bsi_commit_edge_inserts ();
1905       /* Build the new upper bound and insert its statements in the
1906          basic block of the exit condition */
1907       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1908                                              LL_LINEAR_OFFSET (newloop),
1909                                              type,
1910                                              new_ivs,
1911                                              invariants, MIN_EXPR, &stmts);
1912       exit = temp->single_exit;
1913       exitcond = get_loop_exit_condition (temp);
1914       bb = bb_for_stmt (exitcond);
1915       bsi = bsi_start (bb);
1916       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1917
1918       /* Create the new iv.  */
1919
1920       standard_iv_increment_position (temp, &bsi, &insert_after);
1921       create_iv (newlowerbound,
1922                  build_int_cst (type, LL_STEP (newloop)),
1923                  ivvar, temp, &bsi, insert_after, &ivvar,
1924                  NULL);
1925
1926       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1927          dominate the block containing the exit condition.
1928          So we simply create our own incremented iv to use in the new exit
1929          test,  and let redundancy elimination sort it out.  */
1930       inc_stmt = build (PLUS_EXPR, type,
1931                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1932       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1933                         inc_stmt);
1934       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1935       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1936       bsi = bsi_for_stmt (exitcond);
1937       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1938
1939       /* Replace the exit condition with the new upper bound
1940          comparison.  */
1941
1942       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1943
1944       /* We want to build a conditional where true means exit the loop, and
1945          false means continue the loop.
1946          So swap the testtype if this isn't the way things are.*/
1947
1948       if (exit->flags & EDGE_FALSE_VALUE)
1949         testtype = swap_tree_comparison (testtype);
1950
1951       COND_EXPR_COND (exitcond) = build (testtype,
1952                                          boolean_type_node,
1953                                          newupperbound, ivvarinced);
1954       update_stmt (exitcond);
1955       VEC_replace (tree, new_ivs, i, ivvar);
1956
1957       i++;
1958       temp = temp->inner;
1959     }
1960
1961   /* Rewrite uses of the old ivs so that they are now specified in terms of
1962      the new ivs.  */
1963
1964   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1965     {
1966       imm_use_iterator imm_iter;
1967       use_operand_p imm_use;
1968       tree oldiv_def;
1969       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1970
1971       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1972         oldiv_def = PHI_RESULT (oldiv_stmt);
1973       else
1974         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1975       gcc_assert (oldiv_def != NULL_TREE);
1976
1977       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1978         {
1979           tree stmt = USE_STMT (imm_use);
1980           use_operand_p use_p;
1981           ssa_op_iter iter;
1982           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1983           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1984             {
1985               if (USE_FROM_PTR (use_p) == oldiv)
1986                 {
1987                   tree newiv, stmts;
1988                   lambda_body_vector lbv, newlbv;
1989                   /* Compute the new expression for the induction
1990                      variable.  */
1991                   depth = VEC_length (tree, new_ivs);
1992                   lbv = lambda_body_vector_new (depth);
1993                   LBV_COEFFICIENTS (lbv)[i] = 1;
1994
1995                   newlbv = lambda_body_vector_compute_new (transform, lbv);
1996
1997                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1998                                                  new_ivs, &stmts);
1999                   bsi = bsi_for_stmt (stmt);
2000                   /* Insert the statements to build that
2001                      expression.  */
2002                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2003                   propagate_value (use_p, newiv);
2004                   update_stmt (stmt);
2005
2006                 }
2007             }
2008         }
2009     }
2010   VEC_free (tree, heap, new_ivs);
2011 }
2012
2013 /* Returns true when the vector V is lexicographically positive, in
2014    other words, when the first nonzero element is positive.  */
2015
2016 static bool
2017 lambda_vector_lexico_pos (lambda_vector v,
2018                           unsigned n)
2019 {
2020   unsigned i;
2021   for (i = 0; i < n; i++)
2022     {
2023       if (v[i] == 0)
2024         continue;
2025       if (v[i] < 0)
2026         return false;
2027       if (v[i] > 0)
2028         return true;
2029     }
2030   return true;
2031 }
2032
2033
2034 /* Return TRUE if this is not interesting statement from the perspective of
2035    determining if we have a perfect loop nest.  */
2036
2037 static bool
2038 not_interesting_stmt (tree stmt)
2039 {
2040   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2041      loop, we would have already failed the number of exits tests.  */
2042   if (TREE_CODE (stmt) == LABEL_EXPR
2043       || TREE_CODE (stmt) == GOTO_EXPR
2044       || TREE_CODE (stmt) == COND_EXPR)
2045     return true;
2046   return false;
2047 }
2048
2049 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2050
2051 static bool
2052 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2053 {
2054   int i;
2055   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2056     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2057       if (PHI_ARG_DEF (phi, i) == def)
2058         return true;
2059   return false;
2060 }
2061
2062 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2063
2064 static bool
2065 stmt_uses_phi_result (tree stmt, tree phi_result)
2066 {
2067   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2068
2069   /* This is conservatively true, because we only want SIMPLE bumpers
2070      of the form x +- constant for our pass.  */
2071   return (use == phi_result);
2072 }
2073
2074 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2075    in-loop-edge in a phi node, and the operand it uses is the result of that
2076    phi node.
2077    I.E. i_29 = i_3 + 1
2078         i_3 = PHI (0, i_29);  */
2079
2080 static bool
2081 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2082 {
2083   tree use;
2084   tree def;
2085   imm_use_iterator iter;
2086   use_operand_p use_p;
2087
2088   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2089   if (!def)
2090     return false;
2091
2092   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2093     {
2094       use = USE_STMT (use_p);
2095       if (TREE_CODE (use) == PHI_NODE)
2096         {
2097           if (phi_loop_edge_uses_def (loop, use, def))
2098             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2099               return true;
2100         }
2101     }
2102   return false;
2103 }
2104
2105
2106 /* Return true if LOOP is a perfect loop nest.
2107    Perfect loop nests are those loop nests where all code occurs in the
2108    innermost loop body.
2109    If S is a program statement, then
2110
2111    i.e.
2112    DO I = 1, 20
2113        S1
2114        DO J = 1, 20
2115        ...
2116        END DO
2117    END DO
2118    is not a perfect loop nest because of S1.
2119
2120    DO I = 1, 20
2121       DO J = 1, 20
2122         S1
2123         ...
2124       END DO
2125    END DO
2126    is a perfect loop nest.
2127
2128    Since we don't have high level loops anymore, we basically have to walk our
2129    statements and ignore those that are there because the loop needs them (IE
2130    the induction variable increment, and jump back to the top of the loop).  */
2131
2132 bool
2133 perfect_nest_p (struct loop *loop)
2134 {
2135   basic_block *bbs;
2136   size_t i;
2137   tree exit_cond;
2138
2139   if (!loop->inner)
2140     return true;
2141   bbs = get_loop_body (loop);
2142   exit_cond = get_loop_exit_condition (loop);
2143   for (i = 0; i < loop->num_nodes; i++)
2144     {
2145       if (bbs[i]->loop_father == loop)
2146         {
2147           block_stmt_iterator bsi;
2148           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2149             {
2150               tree stmt = bsi_stmt (bsi);
2151               if (stmt == exit_cond
2152                   || not_interesting_stmt (stmt)
2153                   || stmt_is_bumper_for_loop (loop, stmt))
2154                 continue;
2155               free (bbs);
2156               return false;
2157             }
2158         }
2159     }
2160   free (bbs);
2161   /* See if the inner loops are perfectly nested as well.  */
2162   if (loop->inner)
2163     return perfect_nest_p (loop->inner);
2164   return true;
2165 }
2166
2167 /* Replace the USES of X in STMT, or uses with the same step as X  with Y.  */
2168
2169 static void
2170 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2171                                 int xstep, tree y)
2172 {
2173   ssa_op_iter iter;
2174   use_operand_p use_p;
2175
2176   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2177     {
2178       tree use = USE_FROM_PTR (use_p);
2179       tree step = NULL_TREE;
2180       tree access_fn = NULL_TREE;
2181
2182
2183       access_fn = instantiate_parameters
2184         (loop, analyze_scalar_evolution (loop, use));
2185       if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
2186         step = evolution_part_in_loop_num (access_fn, loop->num);
2187       if ((step && step != chrec_dont_know && int_cst_value (step) == xstep)
2188           || USE_FROM_PTR (use_p) == x)
2189         SET_USE (use_p, y);
2190     }
2191 }
2192
2193 /* Return TRUE if STMT uses tree OP in it's uses.  */
2194
2195 static bool
2196 stmt_uses_op (tree stmt, tree op)
2197 {
2198   ssa_op_iter iter;
2199   tree use;
2200
2201   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2202     {
2203       if (use == op)
2204         return true;
2205     }
2206   return false;
2207 }
2208
2209 /* Return true if STMT is an exit PHI for LOOP */
2210
2211 static bool
2212 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2213 {
2214
2215   if (TREE_CODE (stmt) != PHI_NODE
2216       || PHI_NUM_ARGS (stmt) != 1
2217       || bb_for_stmt (stmt) != loop->single_exit->dest)
2218     return false;
2219
2220   return true;
2221 }
2222
2223 /* Return true if STMT can be put back into INNER, a loop by moving it to the
2224    beginning of that loop.  */
2225
2226 static bool
2227 can_put_in_inner_loop (struct loop *inner, tree stmt)
2228 {
2229   imm_use_iterator imm_iter;
2230   use_operand_p use_p;
2231   basic_block use_bb = NULL;
2232
2233   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2234   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2235       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2236     return false;
2237
2238   /* We require that the basic block of all uses be the same, or the use be an
2239      exit phi.  */
2240   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2241     {
2242       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2243         {
2244           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2245
2246           if (!flow_bb_inside_loop_p (inner, immbb))
2247             return false;
2248           if (use_bb == NULL)
2249             use_bb = immbb;
2250           else if (immbb != use_bb)
2251             return false;
2252         }
2253     }
2254   return true;
2255
2256 }
2257
2258
2259 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2260    one.  LOOPIVS is a vector of induction variables, one per loop.
2261    ATM, we only handle imperfect nests of depth 2, where all of the statements
2262    occur after the inner loop.  */
2263
2264 static bool
2265 can_convert_to_perfect_nest (struct loop *loop,
2266                              VEC(tree,heap) *loopivs)
2267 {
2268   basic_block *bbs;
2269   tree exit_condition, phi;
2270   size_t i;
2271   block_stmt_iterator bsi;
2272   basic_block exitdest;
2273
2274   /* Can't handle triply nested+ loops yet.  */
2275   if (!loop->inner || loop->inner->inner)
2276     return false;
2277
2278   bbs = get_loop_body (loop);
2279   exit_condition = get_loop_exit_condition (loop);
2280   for (i = 0; i < loop->num_nodes; i++)
2281     {
2282       if (bbs[i]->loop_father == loop)
2283         {
2284           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2285             {
2286               size_t j;
2287               tree stmt = bsi_stmt (bsi);
2288               tree iv;
2289
2290               if (stmt == exit_condition
2291                   || not_interesting_stmt (stmt)
2292                   || stmt_is_bumper_for_loop (loop, stmt))
2293                 continue;
2294               /* If the statement uses inner loop ivs, we == screwed.  */
2295               for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2296                 if (stmt_uses_op (stmt, iv))
2297                   goto fail;
2298
2299               /* If this is a simple operation like a cast that is invariant
2300                  in the inner loop, only used there, and we can place it
2301                  there, then it's not going to hurt us.
2302                  This means that we will propagate casts and other cheap
2303                  invariant operations *back*
2304                  into the inner loop if we can interchange the loop, on the
2305                  theory that we are going to gain a lot more by interchanging
2306                  the loop than we are by leaving some invariant code there for
2307                  some other pass to clean up.  */
2308               if (TREE_CODE (stmt) == MODIFY_EXPR
2309                   && is_gimple_cast (TREE_OPERAND (stmt, 1))
2310                   && can_put_in_inner_loop (loop->inner, stmt))
2311                 continue;
2312
2313               /* Otherwise, if the bb of a statement we care about isn't
2314                  dominated by the header of the inner loop, then we can't
2315                  handle this case right now.  This test ensures that the
2316                  statement comes completely *after* the inner loop.  */
2317               if (!dominated_by_p (CDI_DOMINATORS,
2318                                    bb_for_stmt (stmt),
2319                                    loop->inner->header))
2320                 goto fail;
2321             }
2322         }
2323     }
2324
2325   /* We also need to make sure the loop exit only has simple copy phis in it,
2326      otherwise we don't know how to transform it into a perfect nest right
2327      now.  */
2328   exitdest = loop->single_exit->dest;
2329
2330   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2331     if (PHI_NUM_ARGS (phi) != 1)
2332       goto fail;
2333
2334   free (bbs);
2335   return true;
2336
2337  fail:
2338   free (bbs);
2339   return false;
2340 }
2341
2342 /* Transform the loop nest into a perfect nest, if possible.
2343    LOOPS is the current struct loops *
2344    LOOP is the loop nest to transform into a perfect nest
2345    LBOUNDS are the lower bounds for the loops to transform
2346    UBOUNDS are the upper bounds for the loops to transform
2347    STEPS is the STEPS for the loops to transform.
2348    LOOPIVS is the induction variables for the loops to transform.
2349
2350    Basically, for the case of
2351
2352    FOR (i = 0; i < 50; i++)
2353     {
2354      FOR (j =0; j < 50; j++)
2355      {
2356         <whatever>
2357      }
2358      <some code>
2359     }
2360
2361    This function will transform it into a perfect loop nest by splitting the
2362    outer loop into two loops, like so:
2363
2364    FOR (i = 0; i < 50; i++)
2365    {
2366      FOR (j = 0; j < 50; j++)
2367      {
2368          <whatever>
2369      }
2370    }
2371
2372    FOR (i = 0; i < 50; i ++)
2373    {
2374     <some code>
2375    }
2376
2377    Return FALSE if we can't make this loop into a perfect nest.  */
2378
2379 static bool
2380 perfect_nestify (struct loops *loops,
2381                  struct loop *loop,
2382                  VEC(tree,heap) *lbounds,
2383                  VEC(tree,heap) *ubounds,
2384                  VEC(int,heap) *steps,
2385                  VEC(tree,heap) *loopivs)
2386 {
2387   basic_block *bbs;
2388   tree exit_condition;
2389   tree then_label, else_label, cond_stmt;
2390   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2391   int i;
2392   block_stmt_iterator bsi;
2393   bool insert_after;
2394   edge e;
2395   struct loop *newloop;
2396   tree phi;
2397   tree uboundvar;
2398   tree stmt;
2399   tree oldivvar, ivvar, ivvarinced;
2400   VEC(tree,heap) *phis = NULL;
2401
2402   if (!can_convert_to_perfect_nest (loop, loopivs))
2403     return false;
2404
2405   /* Create the new loop */
2406
2407   olddest = loop->single_exit->dest;
2408   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2409   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2410
2411   /* Push the exit phi nodes that we are moving.  */
2412   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2413     {
2414       VEC_reserve (tree, heap, phis, 2);
2415       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2416       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2417     }
2418   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2419
2420   /* Remove the exit phis from the old basic block.  Make sure to set
2421      PHI_RESULT to null so it doesn't get released.  */
2422   while (phi_nodes (olddest) != NULL)
2423     {
2424       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2425       remove_phi_node (phi_nodes (olddest), NULL);
2426     }
2427
2428   /* and add them back to the new basic block.  */
2429   while (VEC_length (tree, phis) != 0)
2430     {
2431       tree def;
2432       tree phiname;
2433       def = VEC_pop (tree, phis);
2434       phiname = VEC_pop (tree, phis);
2435       phi = create_phi_node (phiname, preheaderbb);
2436       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2437     }
2438   flush_pending_stmts (e);
2439   VEC_free (tree, heap, phis);
2440
2441   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2442   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2443   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2444   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2445   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2446   cond_stmt = build (COND_EXPR, void_type_node,
2447                      build (NE_EXPR, boolean_type_node,
2448                             integer_one_node,
2449                             integer_zero_node),
2450                      then_label, else_label);
2451   bsi = bsi_start (bodybb);
2452   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2453   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2454   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2455   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2456
2457   /* Update the loop structures.  */
2458   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2459   newloop->header = headerbb;
2460   newloop->latch = latchbb;
2461   newloop->single_exit = e;
2462   add_bb_to_loop (latchbb, newloop);
2463   add_bb_to_loop (bodybb, newloop);
2464   add_bb_to_loop (headerbb, newloop);
2465   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2466   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2467   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2468                            loop->single_exit->src);
2469   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2470   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2471   /* Create the new iv.  */
2472   oldivvar = VEC_index (tree, loopivs, 0);
2473   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2474   add_referenced_tmp_var (ivvar);
2475   standard_iv_increment_position (newloop, &bsi, &insert_after);
2476   create_iv (VEC_index (tree, lbounds, 0),
2477              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2478              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2479
2480   /* Create the new upper bound.  This may be not just a variable, so we copy
2481      it to one just in case.  */
2482
2483   exit_condition = get_loop_exit_condition (newloop);
2484   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2485   add_referenced_tmp_var (uboundvar);
2486   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2487                 VEC_index (tree, ubounds, 0));
2488   uboundvar = make_ssa_name (uboundvar, stmt);
2489   TREE_OPERAND (stmt, 0) = uboundvar;
2490
2491   if (insert_after)
2492     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2493   else
2494     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2495   update_stmt (stmt);
2496   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2497                                            boolean_type_node,
2498                                            uboundvar,
2499                                            ivvarinced);
2500   update_stmt (exit_condition);
2501   bbs = get_loop_body_in_dom_order (loop);
2502   /* Now move the statements, and replace the induction variable in the moved
2503      statements with the correct loop induction variable.  */
2504   oldivvar = VEC_index (tree, loopivs, 0);
2505   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2506     {
2507       block_stmt_iterator tobsi = bsi_last (bodybb);
2508       if (bbs[i]->loop_father == loop)
2509         {
2510           /* If this is true, we are *before* the inner loop.
2511              If this isn't true, we are *after* it.
2512
2513              The only time can_convert_to_perfect_nest returns true when we
2514              have statements before the inner loop is if they can be moved
2515              into the inner loop.
2516
2517              The only time can_convert_to_perfect_nest returns true when we
2518              have statements after the inner loop is if they can be moved into
2519              the new split loop.  */
2520
2521           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2522             {
2523               for (bsi = bsi_last (bbs[i]); !bsi_end_p (bsi);)
2524                 {
2525                   use_operand_p use_p;
2526                   imm_use_iterator imm_iter;
2527                   tree stmt = bsi_stmt (bsi);
2528
2529                   if (stmt == exit_condition
2530                       || not_interesting_stmt (stmt)
2531                       || stmt_is_bumper_for_loop (loop, stmt))
2532                     {
2533                       if (!bsi_end_p (bsi))
2534                         bsi_prev (&bsi);
2535                       continue;
2536                     }
2537                   /* Move this statement back into the inner loop.
2538                      This looks a bit confusing, but we are really just
2539                      finding the first non-exit phi use and moving the
2540                      statement to the beginning of that use's basic
2541                      block.  */
2542                   FOR_EACH_IMM_USE_SAFE (use_p, imm_iter,
2543                                          TREE_OPERAND (stmt, 0))
2544                     {
2545                       tree imm_stmt = USE_STMT (use_p);
2546                       if (!exit_phi_for_loop_p (loop->inner, imm_stmt))
2547                         {
2548                           block_stmt_iterator tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
2549                           bsi_move_after (&bsi, &tobsi);
2550                           update_stmt (stmt);
2551                           BREAK_FROM_SAFE_IMM_USE (imm_iter);
2552                         }
2553                     }
2554                 }
2555             }
2556           else
2557             {
2558               /* Note that the bsi only needs to be explicitly incremented
2559                  when we don't move something, since it is automatically
2560                  incremented when we do.  */
2561               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2562                 {
2563                   ssa_op_iter i;
2564                   tree n, stmt = bsi_stmt (bsi);
2565
2566                   if (stmt == exit_condition
2567                       || not_interesting_stmt (stmt)
2568                       || stmt_is_bumper_for_loop (loop, stmt))
2569                     {
2570                       bsi_next (&bsi);
2571                       continue;
2572                     }
2573
2574                   replace_uses_equiv_to_x_with_y (loop, stmt,
2575                                                   oldivvar,
2576                                                   VEC_index (int, steps, 0),
2577                                                   ivvar);
2578                   bsi_move_before (&bsi, &tobsi);
2579
2580                   /* If the statement has any virtual operands, they may
2581                      need to be rewired because the original loop may
2582                      still reference them.  */
2583                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2584                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2585                 }
2586             }
2587
2588         }
2589     }
2590
2591   free (bbs);
2592   return perfect_nest_p (loop);
2593 }
2594
2595 /* Return true if TRANS is a legal transformation matrix that respects
2596    the dependence vectors in DISTS and DIRS.  The conservative answer
2597    is false.
2598
2599    "Wolfe proves that a unimodular transformation represented by the
2600    matrix T is legal when applied to a loop nest with a set of
2601    lexicographically non-negative distance vectors RDG if and only if
2602    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2603    i.e.: if and only if it transforms the lexicographically positive
2604    distance vectors to lexicographically positive vectors.  Note that
2605    a unimodular matrix must transform the zero vector (and only it) to
2606    the zero vector." S.Muchnick.  */
2607
2608 bool
2609 lambda_transform_legal_p (lambda_trans_matrix trans,
2610                           int nb_loops,
2611                           varray_type dependence_relations)
2612 {
2613   unsigned int i;
2614   lambda_vector distres;
2615   struct data_dependence_relation *ddr;
2616
2617   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2618               && LTM_ROWSIZE (trans) == nb_loops);
2619
2620   /* When there is an unknown relation in the dependence_relations, we
2621      know that it is no worth looking at this loop nest: give up.  */
2622   ddr = (struct data_dependence_relation *)
2623     VARRAY_GENERIC_PTR (dependence_relations, 0);
2624   if (ddr == NULL)
2625     return true;
2626   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2627     return false;
2628
2629   distres = lambda_vector_new (nb_loops);
2630
2631   /* For each distance vector in the dependence graph.  */
2632   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2633     {
2634       ddr = (struct data_dependence_relation *)
2635         VARRAY_GENERIC_PTR (dependence_relations, i);
2636
2637       /* Don't care about relations for which we know that there is no
2638          dependence, nor about read-read (aka. output-dependences):
2639          these data accesses can happen in any order.  */
2640       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2641           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2642         continue;
2643
2644       /* Conservatively answer: "this transformation is not valid".  */
2645       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2646         return false;
2647
2648       /* If the dependence could not be captured by a distance vector,
2649          conservatively answer that the transform is not valid.  */
2650       if (DDR_DIST_VECT (ddr) == NULL)
2651         return false;
2652
2653       /* Compute trans.dist_vect */
2654       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2655                                  DDR_DIST_VECT (ddr), distres);
2656
2657       if (!lambda_vector_lexico_pos (distres, nb_loops))
2658         return false;
2659     }
2660   return true;
2661 }