gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20     02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44
  45 /* This loop nest code generation is based on non-singular matrix
  46    math.
  47
  48  A little terminology and a general sketch of the algorithm.  See "A singular
  49  loop transformation framework based on non-singular matrices" by Wei Li and
  50  Keshav Pingali for formal proofs that the various statements below are
  51  correct.
  52
  53  A loop iteration space represents the points traversed by the loop.  A point in the
  54  iteration space can be represented by a vector of size <loop depth>.  You can
  55  therefore represent the iteration space as an integral combinations of a set
  56  of basis vectors.
  57
  58  A loop iteration space is dense if every integer point between the loop
  59  bounds is a point in the iteration space.  Every loop with a step of 1
  60  therefore has a dense iteration space.
  61
  62  for i = 1 to 3, step 1 is a dense iteration space.
  63
  64  A loop iteration space is sparse if it is not dense.  That is, the iteration
  65  space skips integer points that are within the loop bounds.
  66
  67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  68  2 is skipped.
  69
  70  Dense source spaces are easy to transform, because they don't skip any
  71  points to begin with.  Thus we can compute the exact bounds of the target
  72  space using min/max and floor/ceil.
  73
  74  For a dense source space, we take the transformation matrix, decompose it
  75  into a lower triangular part (H) and a unimodular part (U).
  76  We then compute the auxiliary space from the unimodular part (source loop
  77  nest . U = auxiliary space) , which has two important properties:
  78   1. It traverses the iterations in the same lexicographic order as the source
  79   space.
  80   2. It is a dense space when the source is a dense space (even if the target
  81   space is going to be sparse).
  82
  83  Given the auxiliary space, we use the lower triangular part to compute the
  84  bounds in the target space by simple matrix multiplication.
  85  The gaps in the target space (IE the new loop step sizes) will be the
  86  diagonals of the H matrix.
  87
  88  Sparse source spaces require another step, because you can't directly compute
  89  the exact bounds of the auxiliary and target space from the sparse space.
  90  Rather than try to come up with a separate algorithm to handle sparse source
  91  spaces directly, we just find a legal transformation matrix that gives you
  92  the sparse source space, from a dense space, and then transform the dense
  93  space.
  94
  95  For a regular sparse space, you can represent the source space as an integer
  96  lattice, and the base space of that lattice will always be dense.  Thus, we
  97  effectively use the lattice to figure out the transformation from the lattice
  98  base space, to the sparse iteration space (IE what transform was applied to
  99  the dense space to make it sparse).  We then compose this transform with the
 100  transformation matrix specified by the user (since our matrix transformations
 101  are closed under composition, this is okay).  We can then use the base space
 102  (which is dense) plus the composed transformation matrix, to compute the rest
 103  of the transform using the dense space algorithm above.
 104
 105  In other words, our sparse source space (B) is decomposed into a dense base
 106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 107  We then compute the composition of L and the user transformation matrix (T),
 108  so that T is now a transform from A to the result, instead of from B to the
 109  result.
 110  IE A.(LT) = result instead of B.T = result
 111  Since A is now a dense source space, we can use the dense source space
 112  algorithm above to compute the result of applying transform (LT) to A.
 113
 114  Fourier-Motzkin elimination is used to compute the bounds of the base space
 115  of the lattice.  */
 116
 117 DEF_VEC_I(int);
 118 DEF_VEC_ALLOC_I(int,heap);
 119
 120 static bool perfect_nestify (struct loops *,
 121                              struct loop *, VEC(tree,heap) *,
 122                              VEC(tree,heap) *, VEC(int,heap) *,
 123                              VEC(tree,heap) *);
 124 /* Lattice stuff that is internal to the code generation algorithm.  */
 125
 126 typedef struct
 127 {
 128   /* Lattice base matrix.  */
 129   lambda_matrix base;
 130   /* Lattice dimension.  */
 131   int dimension;
 132   /* Origin vector for the coefficients.  */
 133   lambda_vector origin;
 134   /* Origin matrix for the invariants.  */
 135   lambda_matrix origin_invariants;
 136   /* Number of invariants.  */
 137   int invariants;
 138 } *lambda_lattice;
 139
 140 #define LATTICE_BASE(T) ((T)->base)
 141 #define LATTICE_DIMENSION(T) ((T)->dimension)
 142 #define LATTICE_ORIGIN(T) ((T)->origin)
 143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 145
 146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 147                        int, int);
 148 static lambda_lattice lambda_lattice_new (int, int);
 149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 150
 151 static tree find_induction_var_from_exit_cond (struct loop *);
 152
 153 /* Create a new lambda body vector.  */
 154
 155 lambda_body_vector
 156 lambda_body_vector_new (int size)
 157 {
 158   lambda_body_vector ret;
 159
 160   ret = ggc_alloc (sizeof (*ret));
 161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 162   LBV_SIZE (ret) = size;
 163   LBV_DENOMINATOR (ret) = 1;
 164   return ret;
 165 }
 166
 167 /* Compute the new coefficients for the vector based on the
 168   *inverse* of the transformation matrix.  */
 169
 170 lambda_body_vector
 171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 172                                 lambda_body_vector vect)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants)
 229 {
 230   lambda_linear_expression ret;
 231
 232   ret = ggc_alloc_cleared (sizeof (*ret));
 233
 234   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 235   LLE_CONSTANT (ret) = 0;
 236   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 237   LLE_DENOMINATOR (ret) = 1;
 238   LLE_NEXT (ret) = NULL;
 239
 240   return ret;
 241 }
 242
 243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 244    The starting letter used for variable names is START.  */
 245
 246 static void
 247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 248                          char start)
 249 {
 250   int i;
 251   bool first = true;
 252   for (i = 0; i < size; i++)
 253     {
 254       if (expr[i] != 0)
 255         {
 256           if (first)
 257             {
 258               if (expr[i] < 0)
 259                 fprintf (outfile, "-");
 260               first = false;
 261             }
 262           else if (expr[i] > 0)
 263             fprintf (outfile, " + ");
 264           else
 265             fprintf (outfile, " - ");
 266           if (abs (expr[i]) == 1)
 267             fprintf (outfile, "%c", start + i);
 268           else
 269             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 270         }
 271     }
 272 }
 273
 274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 275    depth/number of coefficients is given by DEPTH, the number of invariants is
 276    given by INVARIANTS, and the character to start variable names with is given
 277    by START.  */
 278
 279 void
 280 print_lambda_linear_expression (FILE * outfile,
 281                                 lambda_linear_expression expr,
 282                                 int depth, int invariants, char start)
 283 {
 284   fprintf (outfile, "\tLinear expression: ");
 285   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 286   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 287   fprintf (outfile, "  invariants: ");
 288   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 289                            invariants, 'A');
 290   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 291 }
 292
 293 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 294    coefficients is given by DEPTH, the number of invariants is
 295    given by INVARIANTS, and the character to start variable names with is given
 296    by START.  */
 297
 298 void
 299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 300                    int invariants, char start)
 301 {
 302   int step;
 303   lambda_linear_expression expr;
 304
 305   gcc_assert (loop);
 306
 307   expr = LL_LINEAR_OFFSET (loop);
 308   step = LL_STEP (loop);
 309   fprintf (outfile, "  step size = %d \n", step);
 310
 311   if (expr)
 312     {
 313       fprintf (outfile, "  linear offset: \n");
 314       print_lambda_linear_expression (outfile, expr, depth, invariants,
 315                                       start);
 316     }
 317
 318   fprintf (outfile, "  lower bound: \n");
 319   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 321   fprintf (outfile, "  upper bound: \n");
 322   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 323     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 324 }
 325
 326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 327    number of invariants.  */
 328
 329 lambda_loopnest
 330 lambda_loopnest_new (int depth, int invariants)
 331 {
 332   lambda_loopnest ret;
 333   ret = ggc_alloc (sizeof (*ret));
 334
 335   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 336   LN_DEPTH (ret) = depth;
 337   LN_INVARIANTS (ret) = invariants;
 338
 339   return ret;
 340 }
 341
 342 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 343    character to use for loop names is given by START.  */
 344
 345 void
 346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 347 {
 348   int i;
 349   for (i = 0; i < LN_DEPTH (nest); i++)
 350     {
 351       fprintf (outfile, "Loop %c\n", start + i);
 352       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 353                          LN_INVARIANTS (nest), 'i');
 354       fprintf (outfile, "\n");
 355     }
 356 }
 357
 358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 359    of invariants.  */
 360
 361 static lambda_lattice
 362 lambda_lattice_new (int depth, int invariants)
 363 {
 364   lambda_lattice ret;
 365   ret = ggc_alloc (sizeof (*ret));
 366   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 367   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 368   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 369   LATTICE_DIMENSION (ret) = depth;
 370   LATTICE_INVARIANTS (ret) = invariants;
 371   return ret;
 372 }
 373
 374 /* Compute the lattice base for NEST.  The lattice base is essentially a
 375    non-singular transform from a dense base space to a sparse iteration space.
 376    We use it so that we don't have to specially handle the case of a sparse
 377    iteration space in other parts of the algorithm.  As a result, this routine
 378    only does something interesting (IE produce a matrix that isn't the
 379    identity matrix) if NEST is a sparse space.  */
 380
 381 static lambda_lattice
 382 lambda_lattice_compute_base (lambda_loopnest nest)
 383 {
 384   lambda_lattice ret;
 385   int depth, invariants;
 386   lambda_matrix base;
 387
 388   int i, j, step;
 389   lambda_loop loop;
 390   lambda_linear_expression expression;
 391
 392   depth = LN_DEPTH (nest);
 393   invariants = LN_INVARIANTS (nest);
 394
 395   ret = lambda_lattice_new (depth, invariants);
 396   base = LATTICE_BASE (ret);
 397   for (i = 0; i < depth; i++)
 398     {
 399       loop = LN_LOOPS (nest)[i];
 400       gcc_assert (loop);
 401       step = LL_STEP (loop);
 402       /* If we have a step of 1, then the base is one, and the
 403          origin and invariant coefficients are 0.  */
 404       if (step == 1)
 405         {
 406           for (j = 0; j < depth; j++)
 407             base[i][j] = 0;
 408           base[i][i] = 1;
 409           LATTICE_ORIGIN (ret)[i] = 0;
 410           for (j = 0; j < invariants; j++)
 411             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 412         }
 413       else
 414         {
 415           /* Otherwise, we need the lower bound expression (which must
 416              be an affine function)  to determine the base.  */
 417           expression = LL_LOWER_BOUND (loop);
 418           gcc_assert (expression && !LLE_NEXT (expression)
 419                       && LLE_DENOMINATOR (expression) == 1);
 420
 421           /* The lower triangular portion of the base is going to be the
 422              coefficient times the step */
 423           for (j = 0; j < i; j++)
 424             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 425               * LL_STEP (LN_LOOPS (nest)[j]);
 426           base[i][i] = step;
 427           for (j = i + 1; j < depth; j++)
 428             base[i][j] = 0;
 429
 430           /* Origin for this loop is the constant of the lower bound
 431              expression.  */
 432           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 433
 434           /* Coefficient for the invariants are equal to the invariant
 435              coefficients in the expression.  */
 436           for (j = 0; j < invariants; j++)
 437             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 438               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 439         }
 440     }
 441   return ret;
 442 }
 443
 444 /* Compute the greatest common denominator of two numbers (A and B) using
 445    Euclid's algorithm.  */
 446
 447 static int
 448 gcd (int a, int b)
 449 {
 450
 451   int x, y, z;
 452
 453   x = abs (a);
 454   y = abs (b);
 455
 456   while (x > 0)
 457     {
 458       z = y % x;
 459       y = x;
 460       x = z;
 461     }
 462
 463   return (y);
 464 }
 465
 466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 467
 468 static int
 469 gcd_vector (lambda_vector vector, int size)
 470 {
 471   int i;
 472   int gcd1 = 0;
 473
 474   if (size > 0)
 475     {
 476       gcd1 = vector[0];
 477       for (i = 1; i < size; i++)
 478         gcd1 = gcd (gcd1, vector[i]);
 479     }
 480   return gcd1;
 481 }
 482
 483 /* Compute the least common multiple of two numbers A and B .  */
 484
 485 static int
 486 lcm (int a, int b)
 487 {
 488   return (abs (a) * abs (b) / gcd (a, b));
 489 }
 490
 491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 492    auxiliary nest.
 493    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 494    it is easy to calculate the answer and bounds.
 495    A sketch of how it works:
 496    Given a system of linear inequalities, ai * xj >= bk, you can always
 497    rewrite the constraints so they are all of the form
 498    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 499    in b1 ... bk, and some a in a1...ai)
 500    You can then eliminate this x from the non-constant inequalities by
 501    rewriting these as a <= b, x >= constant, and delete the x variable.
 502    You can then repeat this for any remaining x variables, and then we have
 503    an easy to use variable <= constant (or no variables at all) form that we
 504    can construct our bounds from.
 505
 506    In our case, each time we eliminate, we construct part of the bound from
 507    the ith variable, then delete the ith variable.
 508
 509    Remember the constant are in our vector a, our coefficient matrix is A,
 510    and our invariant coefficient matrix is B.
 511
 512    SIZE is the size of the matrices being passed.
 513    DEPTH is the loop nest depth.
 514    INVARIANTS is the number of loop invariants.
 515    A, B, and a are the coefficient matrix, invariant coefficient, and a
 516    vector of constants, respectively.  */
 517
 518 static lambda_loopnest
 519 compute_nest_using_fourier_motzkin (int size,
 520                                     int depth,
 521                                     int invariants,
 522                                     lambda_matrix A,
 523                                     lambda_matrix B,
 524                                     lambda_vector a)
 525 {
 526
 527   int multiple, f1, f2;
 528   int i, j, k;
 529   lambda_linear_expression expression;
 530   lambda_loop loop;
 531   lambda_loopnest auxillary_nest;
 532   lambda_matrix swapmatrix, A1, B1;
 533   lambda_vector swapvector, a1;
 534   int newsize;
 535
 536   A1 = lambda_matrix_new (128, depth);
 537   B1 = lambda_matrix_new (128, invariants);
 538   a1 = lambda_vector_new (128);
 539
 540   auxillary_nest = lambda_loopnest_new (depth, invariants);
 541
 542   for (i = depth - 1; i >= 0; i--)
 543     {
 544       loop = lambda_loop_new ();
 545       LN_LOOPS (auxillary_nest)[i] = loop;
 546       LL_STEP (loop) = 1;
 547
 548       for (j = 0; j < size; j++)
 549         {
 550           if (A[j][i] < 0)
 551             {
 552               /* Any linear expression in the matrix with a coefficient less
 553                  than 0 becomes part of the new lower bound.  */
 554               expression = lambda_linear_expression_new (depth, invariants);
 555
 556               for (k = 0; k < i; k++)
 557                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 558
 559               for (k = 0; k < invariants; k++)
 560                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 561
 562               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 563               LLE_CONSTANT (expression) = -1 * a[j];
 564
 565               /* Ignore if identical to the existing lower bound.  */
 566               if (!lle_equal (LL_LOWER_BOUND (loop),
 567                               expression, depth, invariants))
 568                 {
 569                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 570                   LL_LOWER_BOUND (loop) = expression;
 571                 }
 572
 573             }
 574           else if (A[j][i] > 0)
 575             {
 576               /* Any linear expression with a coefficient greater than 0
 577                  becomes part of the new upper bound.  */
 578               expression = lambda_linear_expression_new (depth, invariants);
 579               for (k = 0; k < i; k++)
 580                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 581
 582               for (k = 0; k < invariants; k++)
 583                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 584
 585               LLE_DENOMINATOR (expression) = A[j][i];
 586               LLE_CONSTANT (expression) = a[j];
 587
 588               /* Ignore if identical to the existing upper bound.  */
 589               if (!lle_equal (LL_UPPER_BOUND (loop),
 590                               expression, depth, invariants))
 591                 {
 592                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 593                   LL_UPPER_BOUND (loop) = expression;
 594                 }
 595
 596             }
 597         }
 598
 599       /* This portion creates a new system of linear inequalities by deleting
 600          the i'th variable, reducing the system by one variable.  */
 601       newsize = 0;
 602       for (j = 0; j < size; j++)
 603         {
 604           /* If the coefficient for the i'th variable is 0, then we can just
 605              eliminate the variable straightaway.  Otherwise, we have to
 606              multiply through by the coefficients we are eliminating.  */
 607           if (A[j][i] == 0)
 608             {
 609               lambda_vector_copy (A[j], A1[newsize], depth);
 610               lambda_vector_copy (B[j], B1[newsize], invariants);
 611               a1[newsize] = a[j];
 612               newsize++;
 613             }
 614           else if (A[j][i] > 0)
 615             {
 616               for (k = 0; k < size; k++)
 617                 {
 618                   if (A[k][i] < 0)
 619                     {
 620                       multiple = lcm (A[j][i], A[k][i]);
 621                       f1 = multiple / A[j][i];
 622                       f2 = -1 * multiple / A[k][i];
 623
 624                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 625                                             A1[newsize], depth);
 626                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 627                                             B1[newsize], invariants);
 628                       a1[newsize] = f1 * a[j] + f2 * a[k];
 629                       newsize++;
 630                     }
 631                 }
 632             }
 633         }
 634
 635       swapmatrix = A;
 636       A = A1;
 637       A1 = swapmatrix;
 638
 639       swapmatrix = B;
 640       B = B1;
 641       B1 = swapmatrix;
 642
 643       swapvector = a;
 644       a = a1;
 645       a1 = swapvector;
 646
 647       size = newsize;
 648     }
 649
 650   return auxillary_nest;
 651 }
 652
 653 /* Compute the loop bounds for the auxiliary space NEST.
 654    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 655    Given the original nest, this function will
 656    1. Convert the nest into matrix form, which consists of a matrix for the
 657    coefficients, a matrix for the
 658    invariant coefficients, and a vector for the constants.
 659    2. Use the matrix form to calculate the lattice base for the nest (which is
 660    a dense space)
 661    3. Compose the dense space transform with the user specified transform, to
 662    get a transform we can easily calculate transformed bounds for.
 663    4. Multiply the composed transformation matrix times the matrix form of the
 664    loop.
 665    5. Transform the newly created matrix (from step 4) back into a loop nest
 666    using fourier motzkin elimination to figure out the bounds.  */
 667
 668 static lambda_loopnest
 669 lambda_compute_auxillary_space (lambda_loopnest nest,
 670                                 lambda_trans_matrix trans)
 671 {
 672   lambda_matrix A, B, A1, B1;
 673   lambda_vector a, a1;
 674   lambda_matrix invertedtrans;
 675   int depth, invariants, size;
 676   int i, j;
 677   lambda_loop loop;
 678   lambda_linear_expression expression;
 679   lambda_lattice lattice;
 680
 681   depth = LN_DEPTH (nest);
 682   invariants = LN_INVARIANTS (nest);
 683
 684   /* Unfortunately, we can't know the number of constraints we'll have
 685      ahead of time, but this should be enough even in ridiculous loop nest
 686      cases. We must not go over this limit.  */
 687   A = lambda_matrix_new (128, depth);
 688   B = lambda_matrix_new (128, invariants);
 689   a = lambda_vector_new (128);
 690
 691   A1 = lambda_matrix_new (128, depth);
 692   B1 = lambda_matrix_new (128, invariants);
 693   a1 = lambda_vector_new (128);
 694
 695   /* Store the bounds in the equation matrix A, constant vector a, and
 696      invariant matrix B, so that we have Ax <= a + B.
 697      This requires a little equation rearranging so that everything is on the
 698      correct side of the inequality.  */
 699   size = 0;
 700   for (i = 0; i < depth; i++)
 701     {
 702       loop = LN_LOOPS (nest)[i];
 703
 704       /* First we do the lower bound.  */
 705       if (LL_STEP (loop) > 0)
 706         expression = LL_LOWER_BOUND (loop);
 707       else
 708         expression = LL_UPPER_BOUND (loop);
 709
 710       for (; expression != NULL; expression = LLE_NEXT (expression))
 711         {
 712           /* Fill in the coefficient.  */
 713           for (j = 0; j < i; j++)
 714             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 715
 716           /* And the invariant coefficient.  */
 717           for (j = 0; j < invariants; j++)
 718             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 719
 720           /* And the constant.  */
 721           a[size] = LLE_CONSTANT (expression);
 722
 723           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 724              constants and single variables on   */
 725           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 726           a[size] *= -1;
 727           for (j = 0; j < invariants; j++)
 728             B[size][j] *= -1;
 729
 730           size++;
 731           /* Need to increase matrix sizes above.  */
 732           gcc_assert (size <= 127);
 733
 734         }
 735
 736       /* Then do the exact same thing for the upper bounds.  */
 737       if (LL_STEP (loop) > 0)
 738         expression = LL_UPPER_BOUND (loop);
 739       else
 740         expression = LL_LOWER_BOUND (loop);
 741
 742       for (; expression != NULL; expression = LLE_NEXT (expression))
 743         {
 744           /* Fill in the coefficient.  */
 745           for (j = 0; j < i; j++)
 746             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 747
 748           /* And the invariant coefficient.  */
 749           for (j = 0; j < invariants; j++)
 750             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 751
 752           /* And the constant.  */
 753           a[size] = LLE_CONSTANT (expression);
 754
 755           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 756           for (j = 0; j < i; j++)
 757             A[size][j] *= -1;
 758           A[size][i] = LLE_DENOMINATOR (expression);
 759           size++;
 760           /* Need to increase matrix sizes above.  */
 761           gcc_assert (size <= 127);
 762
 763         }
 764     }
 765
 766   /* Compute the lattice base x = base * y + origin, where y is the
 767      base space.  */
 768   lattice = lambda_lattice_compute_base (nest);
 769
 770   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 771
 772   /* A1 = A * L */
 773   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 774
 775   /* a1 = a - A * origin constant.  */
 776   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 777   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 778
 779   /* B1 = B - A * origin invariant.  */
 780   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 781                       invariants);
 782   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 783
 784   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 785      it by A1, then performing fourier motzkin.  */
 786
 787   invertedtrans = lambda_matrix_new (depth, depth);
 788
 789   /* Compute the inverse of U.  */
 790   lambda_matrix_inverse (LTM_MATRIX (trans),
 791                          invertedtrans, depth);
 792
 793   /* A = A1 inv(U).  */
 794   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 795
 796   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 797                                              A, B1, a1);
 798 }
 799
 800 /* Compute the loop bounds for the target space, using the bounds of
 801    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 802    The target space loop bounds are computed by multiplying the triangular
 803    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 804    the loop steps (positive or negative) is then used to swap the bounds if
 805    the loop counts downwards.
 806    Return the target loopnest.  */
 807
 808 static lambda_loopnest
 809 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 810                              lambda_trans_matrix H, lambda_vector stepsigns)
 811 {
 812   lambda_matrix inverse, H1;
 813   int determinant, i, j;
 814   int gcd1, gcd2;
 815   int factor;
 816
 817   lambda_loopnest target_nest;
 818   int depth, invariants;
 819   lambda_matrix target;
 820
 821   lambda_loop auxillary_loop, target_loop;
 822   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 823
 824   depth = LN_DEPTH (auxillary_nest);
 825   invariants = LN_INVARIANTS (auxillary_nest);
 826
 827   inverse = lambda_matrix_new (depth, depth);
 828   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 829
 830   /* H1 is H excluding its diagonal.  */
 831   H1 = lambda_matrix_new (depth, depth);
 832   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 833
 834   for (i = 0; i < depth; i++)
 835     H1[i][i] = 0;
 836
 837   /* Computes the linear offsets of the loop bounds.  */
 838   target = lambda_matrix_new (depth, depth);
 839   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 840
 841   target_nest = lambda_loopnest_new (depth, invariants);
 842
 843   for (i = 0; i < depth; i++)
 844     {
 845
 846       /* Get a new loop structure.  */
 847       target_loop = lambda_loop_new ();
 848       LN_LOOPS (target_nest)[i] = target_loop;
 849
 850       /* Computes the gcd of the coefficients of the linear part.  */
 851       gcd1 = gcd_vector (target[i], i);
 852
 853       /* Include the denominator in the GCD.  */
 854       gcd1 = gcd (gcd1, determinant);
 855
 856       /* Now divide through by the gcd.  */
 857       for (j = 0; j < i; j++)
 858         target[i][j] = target[i][j] / gcd1;
 859
 860       expression = lambda_linear_expression_new (depth, invariants);
 861       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 862       LLE_DENOMINATOR (expression) = determinant / gcd1;
 863       LLE_CONSTANT (expression) = 0;
 864       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 865                            invariants);
 866       LL_LINEAR_OFFSET (target_loop) = expression;
 867     }
 868
 869   /* For each loop, compute the new bounds from H.  */
 870   for (i = 0; i < depth; i++)
 871     {
 872       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 873       target_loop = LN_LOOPS (target_nest)[i];
 874       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 875       factor = LTM_MATRIX (H)[i][i];
 876
 877       /* First we do the lower bound.  */
 878       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 879
 880       for (; auxillary_expr != NULL;
 881            auxillary_expr = LLE_NEXT (auxillary_expr))
 882         {
 883           target_expr = lambda_linear_expression_new (depth, invariants);
 884           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 885                                      depth, inverse, depth,
 886                                      LLE_COEFFICIENTS (target_expr));
 887           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 888                                     LLE_COEFFICIENTS (target_expr), depth,
 889                                     factor);
 890
 891           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 892           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 893                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 894                               invariants);
 895           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 896                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     invariants, factor);
 898           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 899
 900           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 901             {
 902               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 903                 * determinant;
 904               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 905                                         (target_expr),
 906                                         LLE_INVARIANT_COEFFICIENTS
 907                                         (target_expr), invariants,
 908                                         determinant);
 909               LLE_DENOMINATOR (target_expr) =
 910                 LLE_DENOMINATOR (target_expr) * determinant;
 911             }
 912           /* Find the gcd and divide by it here, rather than doing it
 913              at the tree level.  */
 914           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 915           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 916                              invariants);
 917           gcd1 = gcd (gcd1, gcd2);
 918           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 919           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 920           for (j = 0; j < depth; j++)
 921             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 922           for (j = 0; j < invariants; j++)
 923             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 924           LLE_CONSTANT (target_expr) /= gcd1;
 925           LLE_DENOMINATOR (target_expr) /= gcd1;
 926           /* Ignore if identical to existing bound.  */
 927           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 928                           invariants))
 929             {
 930               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 931               LL_LOWER_BOUND (target_loop) = target_expr;
 932             }
 933         }
 934       /* Now do the upper bound.  */
 935       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 936
 937       for (; auxillary_expr != NULL;
 938            auxillary_expr = LLE_NEXT (auxillary_expr))
 939         {
 940           target_expr = lambda_linear_expression_new (depth, invariants);
 941           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 942                                      depth, inverse, depth,
 943                                      LLE_COEFFICIENTS (target_expr));
 944           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 945                                     LLE_COEFFICIENTS (target_expr), depth,
 946                                     factor);
 947           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 948           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 949                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 950                               invariants);
 951           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 952                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     invariants, factor);
 954           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 955
 956           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 957             {
 958               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 959                 * determinant;
 960               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 961                                         (target_expr),
 962                                         LLE_INVARIANT_COEFFICIENTS
 963                                         (target_expr), invariants,
 964                                         determinant);
 965               LLE_DENOMINATOR (target_expr) =
 966                 LLE_DENOMINATOR (target_expr) * determinant;
 967             }
 968           /* Find the gcd and divide by it here, instead of at the
 969              tree level.  */
 970           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 971           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 972                              invariants);
 973           gcd1 = gcd (gcd1, gcd2);
 974           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 975           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 976           for (j = 0; j < depth; j++)
 977             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 978           for (j = 0; j < invariants; j++)
 979             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 980           LLE_CONSTANT (target_expr) /= gcd1;
 981           LLE_DENOMINATOR (target_expr) /= gcd1;
 982           /* Ignore if equal to existing bound.  */
 983           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 984                           invariants))
 985             {
 986               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 987               LL_UPPER_BOUND (target_loop) = target_expr;
 988             }
 989         }
 990     }
 991   for (i = 0; i < depth; i++)
 992     {
 993       target_loop = LN_LOOPS (target_nest)[i];
 994       /* If necessary, exchange the upper and lower bounds and negate
 995          the step size.  */
 996       if (stepsigns[i] < 0)
 997         {
 998           LL_STEP (target_loop) *= -1;
 999           tmp_expr = LL_LOWER_BOUND (target_loop);
1000           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1001           LL_UPPER_BOUND (target_loop) = tmp_expr;
1002         }
1003     }
1004   return target_nest;
1005 }
1006
1007 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1008    result.  */
1009
1010 static lambda_vector
1011 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1012 {
1013   lambda_matrix matrix, H;
1014   int size;
1015   lambda_vector newsteps;
1016   int i, j, factor, minimum_column;
1017   int temp;
1018
1019   matrix = LTM_MATRIX (trans);
1020   size = LTM_ROWSIZE (trans);
1021   H = lambda_matrix_new (size, size);
1022
1023   newsteps = lambda_vector_new (size);
1024   lambda_vector_copy (stepsigns, newsteps, size);
1025
1026   lambda_matrix_copy (matrix, H, size, size);
1027
1028   for (j = 0; j < size; j++)
1029     {
1030       lambda_vector row;
1031       row = H[j];
1032       for (i = j; i < size; i++)
1033         if (row[i] < 0)
1034           lambda_matrix_col_negate (H, size, i);
1035       while (lambda_vector_first_nz (row, size, j + 1) < size)
1036         {
1037           minimum_column = lambda_vector_min_nz (row, size, j);
1038           lambda_matrix_col_exchange (H, size, j, minimum_column);
1039
1040           temp = newsteps[j];
1041           newsteps[j] = newsteps[minimum_column];
1042           newsteps[minimum_column] = temp;
1043
1044           for (i = j + 1; i < size; i++)
1045             {
1046               factor = row[i] / row[j];
1047               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1048             }
1049         }
1050     }
1051   return newsteps;
1052 }
1053
1054 /* Transform NEST according to TRANS, and return the new loopnest.
1055    This involves
1056    1. Computing a lattice base for the transformation
1057    2. Composing the dense base with the specified transformation (TRANS)
1058    3. Decomposing the combined transformation into a lower triangular portion,
1059    and a unimodular portion.
1060    4. Computing the auxiliary nest using the unimodular portion.
1061    5. Computing the target nest using the auxiliary nest and the lower
1062    triangular portion.  */
1063
1064 lambda_loopnest
1065 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1066 {
1067   lambda_loopnest auxillary_nest, target_nest;
1068
1069   int depth, invariants;
1070   int i, j;
1071   lambda_lattice lattice;
1072   lambda_trans_matrix trans1, H, U;
1073   lambda_loop loop;
1074   lambda_linear_expression expression;
1075   lambda_vector origin;
1076   lambda_matrix origin_invariants;
1077   lambda_vector stepsigns;
1078   int f;
1079
1080   depth = LN_DEPTH (nest);
1081   invariants = LN_INVARIANTS (nest);
1082
1083   /* Keep track of the signs of the loop steps.  */
1084   stepsigns = lambda_vector_new (depth);
1085   for (i = 0; i < depth; i++)
1086     {
1087       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1088         stepsigns[i] = 1;
1089       else
1090         stepsigns[i] = -1;
1091     }
1092
1093   /* Compute the lattice base.  */
1094   lattice = lambda_lattice_compute_base (nest);
1095   trans1 = lambda_trans_matrix_new (depth, depth);
1096
1097   /* Multiply the transformation matrix by the lattice base.  */
1098
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1100                       LTM_MATRIX (trans1), depth, depth, depth);
1101
1102   /* Compute the Hermite normal form for the new transformation matrix.  */
1103   H = lambda_trans_matrix_new (depth, depth);
1104   U = lambda_trans_matrix_new (depth, depth);
1105   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1106                          LTM_MATRIX (U));
1107
1108   /* Compute the auxiliary loop nest's space from the unimodular
1109      portion.  */
1110   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1111
1112   /* Compute the loop step signs from the old step signs and the
1113      transformation matrix.  */
1114   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1115
1116   /* Compute the target loop nest space from the auxiliary nest and
1117      the lower triangular matrix H.  */
1118   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1119   origin = lambda_vector_new (depth);
1120   origin_invariants = lambda_matrix_new (depth, invariants);
1121   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1122                              LATTICE_ORIGIN (lattice), origin);
1123   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1124                       origin_invariants, depth, depth, invariants);
1125
1126   for (i = 0; i < depth; i++)
1127     {
1128       loop = LN_LOOPS (target_nest)[i];
1129       expression = LL_LINEAR_OFFSET (loop);
1130       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1131         f = 1;
1132       else
1133         f = LLE_DENOMINATOR (expression);
1134
1135       LLE_CONSTANT (expression) += f * origin[i];
1136
1137       for (j = 0; j < invariants; j++)
1138         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1139           f * origin_invariants[i][j];
1140     }
1141
1142   return target_nest;
1143
1144 }
1145
1146 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1147    return the new expression.  DEPTH is the depth of the loopnest.
1148    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1149    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1150    is the amount we have to add/subtract from the expression because of the
1151    type of comparison it is used in.  */
1152
1153 static lambda_linear_expression
1154 gcc_tree_to_linear_expression (int depth, tree expr,
1155                                VEC(tree,heap) *outerinductionvars,
1156                                VEC(tree,heap) *invariants, int extra)
1157 {
1158   lambda_linear_expression lle = NULL;
1159   switch (TREE_CODE (expr))
1160     {
1161     case INTEGER_CST:
1162       {
1163         lle = lambda_linear_expression_new (depth, 2 * depth);
1164         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1165         if (extra != 0)
1166           LLE_CONSTANT (lle) += extra;
1167
1168         LLE_DENOMINATOR (lle) = 1;
1169       }
1170       break;
1171     case SSA_NAME:
1172       {
1173         tree iv, invar;
1174         size_t i;
1175         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1176           if (iv != NULL)
1177             {
1178               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1179                 {
1180                   lle = lambda_linear_expression_new (depth, 2 * depth);
1181                   LLE_COEFFICIENTS (lle)[i] = 1;
1182                   if (extra != 0)
1183                     LLE_CONSTANT (lle) = extra;
1184
1185                   LLE_DENOMINATOR (lle) = 1;
1186                 }
1187             }
1188         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1189           if (invar != NULL)
1190             {
1191               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1192                 {
1193                   lle = lambda_linear_expression_new (depth, 2 * depth);
1194                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1195                   if (extra != 0)
1196                     LLE_CONSTANT (lle) = extra;
1197                   LLE_DENOMINATOR (lle) = 1;
1198                 }
1199             }
1200       }
1201       break;
1202     default:
1203       return NULL;
1204     }
1205
1206   return lle;
1207 }
1208
1209 /* Return the depth of the loopnest NEST */
1210
1211 static int
1212 depth_of_nest (struct loop *nest)
1213 {
1214   size_t depth = 0;
1215   while (nest)
1216     {
1217       depth++;
1218       nest = nest->inner;
1219     }
1220   return depth;
1221 }
1222
1223
1224 /* Return true if OP is invariant in LOOP and all outer loops.  */
1225
1226 static bool
1227 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1228 {
1229   if (is_gimple_min_invariant (op))
1230     return true;
1231   if (loop->depth == 0)
1232     return true;
1233   if (!expr_invariant_in_loop_p (loop, op))
1234     return false;
1235   if (loop->outer
1236       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1237     return false;
1238   return true;
1239 }
1240
1241 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1242    or NULL if it could not be converted.
1243    DEPTH is the depth of the loop.
1244    INVARIANTS is a pointer to the array of loop invariants.
1245    The induction variable for this loop should be stored in the parameter
1246    OURINDUCTIONVAR.
1247    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1248
1249 static lambda_loop
1250 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1251                          VEC(tree,heap) ** invariants,
1252                          tree * ourinductionvar,
1253                          VEC(tree,heap) * outerinductionvars,
1254                          VEC(tree,heap) ** lboundvars,
1255                          VEC(tree,heap) ** uboundvars,
1256                          VEC(int,heap) ** steps)
1257 {
1258   tree phi;
1259   tree exit_cond;
1260   tree access_fn, inductionvar;
1261   tree step;
1262   lambda_loop lloop = NULL;
1263   lambda_linear_expression lbound, ubound;
1264   tree test;
1265   int stepint;
1266   int extra = 0;
1267   tree lboundvar, uboundvar, uboundresult;
1268
1269   /* Find out induction var and exit condition.  */
1270   inductionvar = find_induction_var_from_exit_cond (loop);
1271   exit_cond = get_loop_exit_condition (loop);
1272
1273   if (inductionvar == NULL || exit_cond == NULL)
1274     {
1275       if (dump_file && (dump_flags & TDF_DETAILS))
1276         fprintf (dump_file,
1277                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1278       return NULL;
1279     }
1280
1281   test = TREE_OPERAND (exit_cond, 0);
1282
1283   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1284     {
1285
1286       if (dump_file && (dump_flags & TDF_DETAILS))
1287         fprintf (dump_file,
1288                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1289
1290       return NULL;
1291     }
1292
1293   phi = SSA_NAME_DEF_STMT (inductionvar);
1294   if (TREE_CODE (phi) != PHI_NODE)
1295     {
1296       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1297       if (!phi)
1298         {
1299
1300           if (dump_file && (dump_flags & TDF_DETAILS))
1301             fprintf (dump_file,
1302                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1303
1304           return NULL;
1305         }
1306
1307       phi = SSA_NAME_DEF_STMT (phi);
1308       if (TREE_CODE (phi) != PHI_NODE)
1309         {
1310
1311           if (dump_file && (dump_flags & TDF_DETAILS))
1312             fprintf (dump_file,
1313                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1314           return NULL;
1315         }
1316
1317     }
1318
1319   /* The induction variable name/version we want to put in the array is the
1320      result of the induction variable phi node.  */
1321   *ourinductionvar = PHI_RESULT (phi);
1322   access_fn = instantiate_parameters
1323     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1324   if (access_fn == chrec_dont_know)
1325     {
1326       if (dump_file && (dump_flags & TDF_DETAILS))
1327         fprintf (dump_file,
1328                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1329
1330       return NULL;
1331     }
1332
1333   step = evolution_part_in_loop_num (access_fn, loop->num);
1334   if (!step || step == chrec_dont_know)
1335     {
1336       if (dump_file && (dump_flags & TDF_DETAILS))
1337         fprintf (dump_file,
1338                  "Unable to convert loop: Cannot determine step of loop.\n");
1339
1340       return NULL;
1341     }
1342   if (TREE_CODE (step) != INTEGER_CST)
1343     {
1344
1345       if (dump_file && (dump_flags & TDF_DETAILS))
1346         fprintf (dump_file,
1347                  "Unable to convert loop: Step of loop is not integer.\n");
1348       return NULL;
1349     }
1350
1351   stepint = TREE_INT_CST_LOW (step);
1352
1353   /* Only want phis for induction vars, which will have two
1354      arguments.  */
1355   if (PHI_NUM_ARGS (phi) != 2)
1356     {
1357       if (dump_file && (dump_flags & TDF_DETAILS))
1358         fprintf (dump_file,
1359                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1360       return NULL;
1361     }
1362
1363   /* Another induction variable check. One argument's source should be
1364      in the loop, one outside the loop.  */
1365   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1366       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1367     {
1368
1369       if (dump_file && (dump_flags & TDF_DETAILS))
1370         fprintf (dump_file,
1371                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1372
1373       return NULL;
1374     }
1375
1376   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1377     {
1378       lboundvar = PHI_ARG_DEF (phi, 1);
1379       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1380                                               outerinductionvars, *invariants,
1381                                               0);
1382     }
1383   else
1384     {
1385       lboundvar = PHI_ARG_DEF (phi, 0);
1386       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1387                                               outerinductionvars, *invariants,
1388                                               0);
1389     }
1390
1391   if (!lbound)
1392     {
1393
1394       if (dump_file && (dump_flags & TDF_DETAILS))
1395         fprintf (dump_file,
1396                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1397
1398       return NULL;
1399     }
1400   /* One part of the test may be a loop invariant tree.  */
1401   VEC_reserve (tree, heap, *invariants, 1);
1402   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1403       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1404     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1405   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1406            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1407     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1408
1409   /* The non-induction variable part of the test is the upper bound variable.
1410    */
1411   if (TREE_OPERAND (test, 0) == inductionvar)
1412     uboundvar = TREE_OPERAND (test, 1);
1413   else
1414     uboundvar = TREE_OPERAND (test, 0);
1415
1416
1417   /* We only size the vectors assuming we have, at max, 2 times as many
1418      invariants as we do loops (one for each bound).
1419      This is just an arbitrary number, but it has to be matched against the
1420      code below.  */
1421   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1422
1423
1424   /* We might have some leftover.  */
1425   if (TREE_CODE (test) == LT_EXPR)
1426     extra = -1 * stepint;
1427   else if (TREE_CODE (test) == NE_EXPR)
1428     extra = -1 * stepint;
1429   else if (TREE_CODE (test) == GT_EXPR)
1430     extra = -1 * stepint;
1431   else if (TREE_CODE (test) == EQ_EXPR)
1432     extra = 1 * stepint;
1433
1434   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1435                                           outerinductionvars,
1436                                           *invariants, extra);
1437   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1438                         build_int_cst (TREE_TYPE (uboundvar), extra));
1439   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1440   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1441   VEC_safe_push (int, heap, *steps, stepint);
1442   if (!ubound)
1443     {
1444       if (dump_file && (dump_flags & TDF_DETAILS))
1445         fprintf (dump_file,
1446                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1447       return NULL;
1448     }
1449
1450   lloop = lambda_loop_new ();
1451   LL_STEP (lloop) = stepint;
1452   LL_LOWER_BOUND (lloop) = lbound;
1453   LL_UPPER_BOUND (lloop) = ubound;
1454   return lloop;
1455 }
1456
1457 /* Given a LOOP, find the induction variable it is testing against in the exit
1458    condition.  Return the induction variable if found, NULL otherwise.  */
1459
1460 static tree
1461 find_induction_var_from_exit_cond (struct loop *loop)
1462 {
1463   tree expr = get_loop_exit_condition (loop);
1464   tree ivarop;
1465   tree test;
1466   if (expr == NULL_TREE)
1467     return NULL_TREE;
1468   if (TREE_CODE (expr) != COND_EXPR)
1469     return NULL_TREE;
1470   test = TREE_OPERAND (expr, 0);
1471   if (!COMPARISON_CLASS_P (test))
1472     return NULL_TREE;
1473
1474   /* Find the side that is invariant in this loop. The ivar must be the other
1475      side.  */
1476
1477   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1478       ivarop = TREE_OPERAND (test, 1);
1479   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1480       ivarop = TREE_OPERAND (test, 0);
1481   else
1482     return NULL_TREE;
1483
1484   if (TREE_CODE (ivarop) != SSA_NAME)
1485     return NULL_TREE;
1486   return ivarop;
1487 }
1488
1489 DEF_VEC_P(lambda_loop);
1490 DEF_VEC_ALLOC_P(lambda_loop,heap);
1491
1492 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1493    Return the new loop nest.
1494    INDUCTIONVARS is a pointer to an array of induction variables for the
1495    loopnest that will be filled in during this process.
1496    INVARIANTS is a pointer to an array of invariants that will be filled in
1497    during this process.  */
1498
1499 lambda_loopnest
1500 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1501                                  struct loop * loop_nest,
1502                                  VEC(tree,heap) **inductionvars,
1503                                  VEC(tree,heap) **invariants,
1504                                  bool need_perfect_nest)
1505 {
1506   lambda_loopnest ret = NULL;
1507   struct loop *temp;
1508   int depth = 0;
1509   size_t i;
1510   VEC(lambda_loop,heap) *loops = NULL;
1511   VEC(tree,heap) *uboundvars = NULL;
1512   VEC(tree,heap) *lboundvars  = NULL;
1513   VEC(int,heap) *steps = NULL;
1514   lambda_loop newloop;
1515   tree inductionvar = NULL;
1516
1517   depth = depth_of_nest (loop_nest);
1518   temp = loop_nest;
1519   while (temp)
1520     {
1521       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1522                                          &inductionvar, *inductionvars,
1523                                          &lboundvars, &uboundvars,
1524                                          &steps);
1525       if (!newloop)
1526         return NULL;
1527       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1528       VEC_safe_push (lambda_loop, heap, loops, newloop);
1529       temp = temp->inner;
1530     }
1531   if (need_perfect_nest)
1532     {
1533       if (!perfect_nestify (currloops, loop_nest,
1534                             lboundvars, uboundvars, steps, *inductionvars))
1535         {
1536           if (dump_file)
1537             fprintf (dump_file,
1538                      "Not a perfect loop nest and couldn't convert to one.\n");
1539           goto fail;
1540         }
1541       else if (dump_file)
1542         fprintf (dump_file,
1543                  "Successfully converted loop nest to perfect loop nest.\n");
1544     }
1545   ret = lambda_loopnest_new (depth, 2 * depth);
1546   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1547     LN_LOOPS (ret)[i] = newloop;
1548  fail:
1549   VEC_free (lambda_loop, heap, loops);
1550   VEC_free (tree, heap, uboundvars);
1551   VEC_free (tree, heap, lboundvars);
1552   VEC_free (int, heap, steps);
1553
1554   return ret;
1555 }
1556
1557 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1558    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1559    inserted for us are stored.  INDUCTION_VARS is the array of induction
1560    variables for the loop this LBV is from.  TYPE is the tree type to use for
1561    the variables and trees involved.  */
1562
1563 static tree
1564 lbv_to_gcc_expression (lambda_body_vector lbv,
1565                        tree type, VEC(tree,heap) *induction_vars,
1566                        tree *stmts_to_insert)
1567 {
1568   tree stmts, stmt, resvar, name;
1569   tree iv;
1570   size_t i;
1571   tree_stmt_iterator tsi;
1572
1573   /* Create a statement list and a linear expression temporary.  */
1574   stmts = alloc_stmt_list ();
1575   resvar = create_tmp_var (type, "lbvtmp");
1576   add_referenced_tmp_var (resvar);
1577
1578   /* Start at 0.  */
1579   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1580   name = make_ssa_name (resvar, stmt);
1581   TREE_OPERAND (stmt, 0) = name;
1582   tsi = tsi_last (stmts);
1583   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1584
1585   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1586     {
1587       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1588         {
1589           tree newname;
1590           tree coeffmult;
1591
1592           /* newname = coefficient * induction_variable */
1593           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1594           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1595                         fold_build2 (MULT_EXPR, type, iv, coeffmult));
1596
1597           newname = make_ssa_name (resvar, stmt);
1598           TREE_OPERAND (stmt, 0) = newname;
1599           fold_stmt (&stmt);
1600           tsi = tsi_last (stmts);
1601           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1602
1603           /* name = name + newname */
1604           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1605                         build (PLUS_EXPR, type, name, newname));
1606           name = make_ssa_name (resvar, stmt);
1607           TREE_OPERAND (stmt, 0) = name;
1608           fold_stmt (&stmt);
1609           tsi = tsi_last (stmts);
1610           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1611
1612         }
1613     }
1614
1615   /* Handle any denominator that occurs.  */
1616   if (LBV_DENOMINATOR (lbv) != 1)
1617     {
1618       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1619       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1620                     build (CEIL_DIV_EXPR, type, name, denominator));
1621       name = make_ssa_name (resvar, stmt);
1622       TREE_OPERAND (stmt, 0) = name;
1623       fold_stmt (&stmt);
1624       tsi = tsi_last (stmts);
1625       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1626     }
1627   *stmts_to_insert = stmts;
1628   return name;
1629 }
1630
1631 /* Convert a linear expression from coefficient and constant form to a
1632    gcc tree.
1633    Return the tree that represents the final value of the expression.
1634    LLE is the linear expression to convert.
1635    OFFSET is the linear offset to apply to the expression.
1636    TYPE is the tree type to use for the variables and math.
1637    INDUCTION_VARS is a vector of induction variables for the loops.
1638    INVARIANTS is a vector of the loop nest invariants.
1639    WRAP specifies what tree code to wrap the results in, if there is more than
1640    one (it is either MAX_EXPR, or MIN_EXPR).
1641    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1642    statements that need to be inserted for the linear expression.  */
1643
1644 static tree
1645 lle_to_gcc_expression (lambda_linear_expression lle,
1646                        lambda_linear_expression offset,
1647                        tree type,
1648                        VEC(tree,heap) *induction_vars,
1649                        VEC(tree,heap) *invariants,
1650                        enum tree_code wrap, tree *stmts_to_insert)
1651 {
1652   tree stmts, stmt, resvar, name;
1653   size_t i;
1654   tree_stmt_iterator tsi;
1655   tree iv, invar;
1656   VEC(tree,heap) *results = NULL;
1657
1658   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1659   name = NULL_TREE;
1660   /* Create a statement list and a linear expression temporary.  */
1661   stmts = alloc_stmt_list ();
1662   resvar = create_tmp_var (type, "lletmp");
1663   add_referenced_tmp_var (resvar);
1664
1665   /* Build up the linear expressions, and put the variable representing the
1666      result in the results array.  */
1667   for (; lle != NULL; lle = LLE_NEXT (lle))
1668     {
1669       /* Start at name = 0.  */
1670       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1671       name = make_ssa_name (resvar, stmt);
1672       TREE_OPERAND (stmt, 0) = name;
1673       fold_stmt (&stmt);
1674       tsi = tsi_last (stmts);
1675       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1676
1677       /* First do the induction variables.
1678          at the end, name = name + all the induction variables added
1679          together.  */
1680       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1681         {
1682           if (LLE_COEFFICIENTS (lle)[i] != 0)
1683             {
1684               tree newname;
1685               tree mult;
1686               tree coeff;
1687
1688               /* mult = induction variable * coefficient.  */
1689               if (LLE_COEFFICIENTS (lle)[i] == 1)
1690                 {
1691                   mult = VEC_index (tree, induction_vars, i);
1692                 }
1693               else
1694                 {
1695                   coeff = build_int_cst (type,
1696                                          LLE_COEFFICIENTS (lle)[i]);
1697                   mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1698                 }
1699
1700               /* newname = mult */
1701               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1702               newname = make_ssa_name (resvar, stmt);
1703               TREE_OPERAND (stmt, 0) = newname;
1704               fold_stmt (&stmt);
1705               tsi = tsi_last (stmts);
1706               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1707
1708               /* name = name + newname */
1709               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1710                             build (PLUS_EXPR, type, name, newname));
1711               name = make_ssa_name (resvar, stmt);
1712               TREE_OPERAND (stmt, 0) = name;
1713               fold_stmt (&stmt);
1714               tsi = tsi_last (stmts);
1715               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1716             }
1717         }
1718
1719       /* Handle our invariants.
1720          At the end, we have name = name + result of adding all multiplied
1721          invariants.  */
1722       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1723         {
1724           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1725             {
1726               tree newname;
1727               tree mult;
1728               tree coeff;
1729               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1730               /* mult = invariant * coefficient  */
1731               if (invcoeff == 1)
1732                 {
1733                   mult = invar;
1734                 }
1735               else
1736                 {
1737                   coeff = build_int_cst (type, invcoeff);
1738                   mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1739                 }
1740
1741               /* newname = mult */
1742               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1743               newname = make_ssa_name (resvar, stmt);
1744               TREE_OPERAND (stmt, 0) = newname;
1745               fold_stmt (&stmt);
1746               tsi = tsi_last (stmts);
1747               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1748
1749               /* name = name + newname */
1750               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1751                             build (PLUS_EXPR, type, name, newname));
1752               name = make_ssa_name (resvar, stmt);
1753               TREE_OPERAND (stmt, 0) = name;
1754               fold_stmt (&stmt);
1755               tsi = tsi_last (stmts);
1756               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1757             }
1758         }
1759
1760       /* Now handle the constant.
1761          name = name + constant.  */
1762       if (LLE_CONSTANT (lle) != 0)
1763         {
1764           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1765                         build (PLUS_EXPR, type, name,
1766                                build_int_cst (type, LLE_CONSTANT (lle))));
1767           name = make_ssa_name (resvar, stmt);
1768           TREE_OPERAND (stmt, 0) = name;
1769           fold_stmt (&stmt);
1770           tsi = tsi_last (stmts);
1771           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1772         }
1773
1774       /* Now handle the offset.
1775          name = name + linear offset.  */
1776       if (LLE_CONSTANT (offset) != 0)
1777         {
1778           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1779                         build (PLUS_EXPR, type, name,
1780                                build_int_cst (type, LLE_CONSTANT (offset))));
1781           name = make_ssa_name (resvar, stmt);
1782           TREE_OPERAND (stmt, 0) = name;
1783           fold_stmt (&stmt);
1784           tsi = tsi_last (stmts);
1785           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786         }
1787
1788       /* Handle any denominator that occurs.  */
1789       if (LLE_DENOMINATOR (lle) != 1)
1790         {
1791           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1792           stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1793                         type, name, stmt);
1794           stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1795
1796           /* name = {ceil, floor}(name/denominator) */
1797           name = make_ssa_name (resvar, stmt);
1798           TREE_OPERAND (stmt, 0) = name;
1799           tsi = tsi_last (stmts);
1800           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1801         }
1802       VEC_safe_push (tree, heap, results, name);
1803     }
1804
1805   /* Again, out of laziness, we don't handle this case yet.  It's not
1806      hard, it just hasn't occurred.  */
1807   gcc_assert (VEC_length (tree, results) <= 2);
1808
1809   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1810   if (VEC_length (tree, results) > 1)
1811     {
1812       tree op1 = VEC_index (tree, results, 0);
1813       tree op2 = VEC_index (tree, results, 1);
1814       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1815                     build (wrap, type, op1, op2));
1816       name = make_ssa_name (resvar, stmt);
1817       TREE_OPERAND (stmt, 0) = name;
1818       tsi = tsi_last (stmts);
1819       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1820     }
1821
1822   VEC_free (tree, heap, results);
1823
1824   *stmts_to_insert = stmts;
1825   return name;
1826 }
1827
1828 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1829    it, back into gcc code.  This changes the
1830    loops, their induction variables, and their bodies, so that they
1831    match the transformed loopnest.
1832    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1833    loopnest.
1834    OLD_IVS is a vector of induction variables from the old loopnest.
1835    INVARIANTS is a vector of loop invariants from the old loopnest.
1836    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1837    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1838    NEW_LOOPNEST.  */
1839
1840 void
1841 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1842                                  VEC(tree,heap) *old_ivs,
1843                                  VEC(tree,heap) *invariants,
1844                                  lambda_loopnest new_loopnest,
1845                                  lambda_trans_matrix transform)
1846 {
1847   struct loop *temp;
1848   size_t i = 0;
1849   size_t depth = 0;
1850   VEC(tree,heap) *new_ivs = NULL;
1851   tree oldiv;
1852
1853   block_stmt_iterator bsi;
1854
1855   if (dump_file)
1856     {
1857       transform = lambda_trans_matrix_inverse (transform);
1858       fprintf (dump_file, "Inverse of transformation matrix:\n");
1859       print_lambda_trans_matrix (dump_file, transform);
1860     }
1861   depth = depth_of_nest (old_loopnest);
1862   temp = old_loopnest;
1863
1864   while (temp)
1865     {
1866       lambda_loop newloop;
1867       basic_block bb;
1868       edge exit;
1869       tree ivvar, ivvarinced, exitcond, stmts;
1870       enum tree_code testtype;
1871       tree newupperbound, newlowerbound;
1872       lambda_linear_expression offset;
1873       tree type;
1874       bool insert_after;
1875       tree inc_stmt;
1876
1877       oldiv = VEC_index (tree, old_ivs, i);
1878       type = TREE_TYPE (oldiv);
1879
1880       /* First, build the new induction variable temporary  */
1881
1882       ivvar = create_tmp_var (type, "lnivtmp");
1883       add_referenced_tmp_var (ivvar);
1884
1885       VEC_safe_push (tree, heap, new_ivs, ivvar);
1886
1887       newloop = LN_LOOPS (new_loopnest)[i];
1888
1889       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1890          cases for now.  */
1891       offset = LL_LINEAR_OFFSET (newloop);
1892
1893       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1894                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1895
1896       /* Now build the  new lower bounds, and insert the statements
1897          necessary to generate it on the loop preheader.  */
1898       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1899                                              LL_LINEAR_OFFSET (newloop),
1900                                              type,
1901                                              new_ivs,
1902                                              invariants, MAX_EXPR, &stmts);
1903       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1904       bsi_commit_edge_inserts ();
1905       /* Build the new upper bound and insert its statements in the
1906          basic block of the exit condition */
1907       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1908                                              LL_LINEAR_OFFSET (newloop),
1909                                              type,
1910                                              new_ivs,
1911                                              invariants, MIN_EXPR, &stmts);
1912       exit = temp->single_exit;
1913       exitcond = get_loop_exit_condition (temp);
1914       bb = bb_for_stmt (exitcond);
1915       bsi = bsi_start (bb);
1916       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1917
1918       /* Create the new iv.  */
1919
1920       standard_iv_increment_position (temp, &bsi, &insert_after);
1921       create_iv (newlowerbound,
1922                  build_int_cst (type, LL_STEP (newloop)),
1923                  ivvar, temp, &bsi, insert_after, &ivvar,
1924                  NULL);
1925
1926       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1927          dominate the block containing the exit condition.
1928          So we simply create our own incremented iv to use in the new exit
1929          test,  and let redundancy elimination sort it out.  */
1930       inc_stmt = build (PLUS_EXPR, type,
1931                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1932       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1933                         inc_stmt);
1934       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1935       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1936       bsi = bsi_for_stmt (exitcond);
1937       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1938
1939       /* Replace the exit condition with the new upper bound
1940          comparison.  */
1941
1942       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1943
1944       /* We want to build a conditional where true means exit the loop, and
1945          false means continue the loop.
1946          So swap the testtype if this isn't the way things are.*/
1947
1948       if (exit->flags & EDGE_FALSE_VALUE)
1949         testtype = swap_tree_comparison (testtype);
1950
1951       COND_EXPR_COND (exitcond) = build (testtype,
1952                                          boolean_type_node,
1953                                          newupperbound, ivvarinced);
1954       update_stmt (exitcond);
1955       VEC_replace (tree, new_ivs, i, ivvar);
1956
1957       i++;
1958       temp = temp->inner;
1959     }
1960
1961   /* Rewrite uses of the old ivs so that they are now specified in terms of
1962      the new ivs.  */
1963
1964   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1965     {
1966       imm_use_iterator imm_iter;
1967       use_operand_p imm_use;
1968       tree oldiv_def;
1969       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1970
1971       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1972         oldiv_def = PHI_RESULT (oldiv_stmt);
1973       else
1974         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1975       gcc_assert (oldiv_def != NULL_TREE);
1976
1977       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1978         {
1979           tree stmt = USE_STMT (imm_use);
1980           use_operand_p use_p;
1981           ssa_op_iter iter;
1982           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1983           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1984             {
1985               if (USE_FROM_PTR (use_p) == oldiv)
1986                 {
1987                   tree newiv, stmts;
1988                   lambda_body_vector lbv, newlbv;
1989                   /* Compute the new expression for the induction
1990                      variable.  */
1991                   depth = VEC_length (tree, new_ivs);
1992                   lbv = lambda_body_vector_new (depth);
1993                   LBV_COEFFICIENTS (lbv)[i] = 1;
1994
1995                   newlbv = lambda_body_vector_compute_new (transform, lbv);
1996
1997                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1998                                                  new_ivs, &stmts);
1999                   bsi = bsi_for_stmt (stmt);
2000                   /* Insert the statements to build that
2001                      expression.  */
2002                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2003                   propagate_value (use_p, newiv);
2004                   update_stmt (stmt);
2005
2006                 }
2007             }
2008         }
2009     }
2010   VEC_free (tree, heap, new_ivs);
2011 }
2012
2013 /* Return TRUE if this is not interesting statement from the perspective of
2014    determining if we have a perfect loop nest.  */
2015
2016 static bool
2017 not_interesting_stmt (tree stmt)
2018 {
2019   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2020      loop, we would have already failed the number of exits tests.  */
2021   if (TREE_CODE (stmt) == LABEL_EXPR
2022       || TREE_CODE (stmt) == GOTO_EXPR
2023       || TREE_CODE (stmt) == COND_EXPR)
2024     return true;
2025   return false;
2026 }
2027
2028 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2029
2030 static bool
2031 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2032 {
2033   int i;
2034   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2035     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2036       if (PHI_ARG_DEF (phi, i) == def)
2037         return true;
2038   return false;
2039 }
2040
2041 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2042
2043 static bool
2044 stmt_uses_phi_result (tree stmt, tree phi_result)
2045 {
2046   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2047
2048   /* This is conservatively true, because we only want SIMPLE bumpers
2049      of the form x +- constant for our pass.  */
2050   return (use == phi_result);
2051 }
2052
2053 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2054    in-loop-edge in a phi node, and the operand it uses is the result of that
2055    phi node.
2056    I.E. i_29 = i_3 + 1
2057         i_3 = PHI (0, i_29);  */
2058
2059 static bool
2060 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2061 {
2062   tree use;
2063   tree def;
2064   imm_use_iterator iter;
2065   use_operand_p use_p;
2066
2067   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2068   if (!def)
2069     return false;
2070
2071   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2072     {
2073       use = USE_STMT (use_p);
2074       if (TREE_CODE (use) == PHI_NODE)
2075         {
2076           if (phi_loop_edge_uses_def (loop, use, def))
2077             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2078               return true;
2079         }
2080     }
2081   return false;
2082 }
2083
2084
2085 /* Return true if LOOP is a perfect loop nest.
2086    Perfect loop nests are those loop nests where all code occurs in the
2087    innermost loop body.
2088    If S is a program statement, then
2089
2090    i.e.
2091    DO I = 1, 20
2092        S1
2093        DO J = 1, 20
2094        ...
2095        END DO
2096    END DO
2097    is not a perfect loop nest because of S1.
2098
2099    DO I = 1, 20
2100       DO J = 1, 20
2101         S1
2102         ...
2103       END DO
2104    END DO
2105    is a perfect loop nest.
2106
2107    Since we don't have high level loops anymore, we basically have to walk our
2108    statements and ignore those that are there because the loop needs them (IE
2109    the induction variable increment, and jump back to the top of the loop).  */
2110
2111 bool
2112 perfect_nest_p (struct loop *loop)
2113 {
2114   basic_block *bbs;
2115   size_t i;
2116   tree exit_cond;
2117
2118   if (!loop->inner)
2119     return true;
2120   bbs = get_loop_body (loop);
2121   exit_cond = get_loop_exit_condition (loop);
2122   for (i = 0; i < loop->num_nodes; i++)
2123     {
2124       if (bbs[i]->loop_father == loop)
2125         {
2126           block_stmt_iterator bsi;
2127           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2128             {
2129               tree stmt = bsi_stmt (bsi);
2130               if (stmt == exit_cond
2131                   || not_interesting_stmt (stmt)
2132                   || stmt_is_bumper_for_loop (loop, stmt))
2133                 continue;
2134               free (bbs);
2135               return false;
2136             }
2137         }
2138     }
2139   free (bbs);
2140   /* See if the inner loops are perfectly nested as well.  */
2141   if (loop->inner)
2142     return perfect_nest_p (loop->inner);
2143   return true;
2144 }
2145
2146 /* Replace the USES of X in STMT, or uses with the same step as X  with Y.  */
2147
2148 static void
2149 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2150                                 int xstep, tree y)
2151 {
2152   ssa_op_iter iter;
2153   use_operand_p use_p;
2154
2155   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2156     {
2157       tree use = USE_FROM_PTR (use_p);
2158       tree step = NULL_TREE;
2159       tree access_fn = NULL_TREE;
2160
2161
2162       access_fn = instantiate_parameters
2163         (loop, analyze_scalar_evolution (loop, use));
2164       if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
2165         step = evolution_part_in_loop_num (access_fn, loop->num);
2166       if ((step && step != chrec_dont_know
2167            && TREE_CODE (step) == INTEGER_CST
2168            && int_cst_value (step) == xstep)
2169           || USE_FROM_PTR (use_p) == x)
2170         SET_USE (use_p, y);
2171     }
2172 }
2173
2174 /* Return TRUE if STMT uses tree OP in it's uses.  */
2175
2176 static bool
2177 stmt_uses_op (tree stmt, tree op)
2178 {
2179   ssa_op_iter iter;
2180   tree use;
2181
2182   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2183     {
2184       if (use == op)
2185         return true;
2186     }
2187   return false;
2188 }
2189
2190 /* Return true if STMT is an exit PHI for LOOP */
2191
2192 static bool
2193 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2194 {
2195
2196   if (TREE_CODE (stmt) != PHI_NODE
2197       || PHI_NUM_ARGS (stmt) != 1
2198       || bb_for_stmt (stmt) != loop->single_exit->dest)
2199     return false;
2200
2201   return true;
2202 }
2203
2204 /* Return true if STMT can be put back into INNER, a loop by moving it to the
2205    beginning of that loop.  */
2206
2207 static bool
2208 can_put_in_inner_loop (struct loop *inner, tree stmt)
2209 {
2210   imm_use_iterator imm_iter;
2211   use_operand_p use_p;
2212   basic_block use_bb = NULL;
2213
2214   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2215   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2216       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2217     return false;
2218
2219   /* We require that the basic block of all uses be the same, or the use be an
2220      exit phi.  */
2221   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2222     {
2223       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2224         {
2225           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2226
2227           if (!flow_bb_inside_loop_p (inner, immbb))
2228             return false;
2229           if (use_bb == NULL)
2230             use_bb = immbb;
2231           else if (immbb != use_bb)
2232             return false;
2233         }
2234     }
2235   return true;
2236
2237 }
2238
2239
2240 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2241    one.  LOOPIVS is a vector of induction variables, one per loop.
2242    ATM, we only handle imperfect nests of depth 2, where all of the statements
2243    occur after the inner loop.  */
2244
2245 static bool
2246 can_convert_to_perfect_nest (struct loop *loop,
2247                              VEC(tree,heap) *loopivs)
2248 {
2249   basic_block *bbs;
2250   tree exit_condition, phi;
2251   size_t i;
2252   block_stmt_iterator bsi;
2253   basic_block exitdest;
2254
2255   /* Can't handle triply nested+ loops yet.  */
2256   if (!loop->inner || loop->inner->inner)
2257     return false;
2258
2259   bbs = get_loop_body (loop);
2260   exit_condition = get_loop_exit_condition (loop);
2261   for (i = 0; i < loop->num_nodes; i++)
2262     {
2263       if (bbs[i]->loop_father == loop)
2264         {
2265           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2266             {
2267               size_t j;
2268               tree stmt = bsi_stmt (bsi);
2269               tree iv;
2270
2271               if (stmt == exit_condition
2272                   || not_interesting_stmt (stmt)
2273                   || stmt_is_bumper_for_loop (loop, stmt))
2274                 continue;
2275               /* If the statement uses inner loop ivs, we == screwed.  */
2276               for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2277                 if (stmt_uses_op (stmt, iv))
2278                   goto fail;
2279
2280               /* If this is a simple operation like a cast that is invariant
2281                  in the inner loop, only used there, and we can place it
2282                  there, then it's not going to hurt us.
2283                  This means that we will propagate casts and other cheap
2284                  invariant operations *back*
2285                  into the inner loop if we can interchange the loop, on the
2286                  theory that we are going to gain a lot more by interchanging
2287                  the loop than we are by leaving some invariant code there for
2288                  some other pass to clean up.  */
2289               if (TREE_CODE (stmt) == MODIFY_EXPR
2290                   && is_gimple_cast (TREE_OPERAND (stmt, 1))
2291                   && can_put_in_inner_loop (loop->inner, stmt))
2292                 continue;
2293
2294               /* Otherwise, if the bb of a statement we care about isn't
2295                  dominated by the header of the inner loop, then we can't
2296                  handle this case right now.  This test ensures that the
2297                  statement comes completely *after* the inner loop.  */
2298               if (!dominated_by_p (CDI_DOMINATORS,
2299                                    bb_for_stmt (stmt),
2300                                    loop->inner->header))
2301                 goto fail;
2302             }
2303         }
2304     }
2305
2306   /* We also need to make sure the loop exit only has simple copy phis in it,
2307      otherwise we don't know how to transform it into a perfect nest right
2308      now.  */
2309   exitdest = loop->single_exit->dest;
2310
2311   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2312     if (PHI_NUM_ARGS (phi) != 1)
2313       goto fail;
2314
2315   free (bbs);
2316   return true;
2317
2318  fail:
2319   free (bbs);
2320   return false;
2321 }
2322
2323 /* Transform the loop nest into a perfect nest, if possible.
2324    LOOPS is the current struct loops *
2325    LOOP is the loop nest to transform into a perfect nest
2326    LBOUNDS are the lower bounds for the loops to transform
2327    UBOUNDS are the upper bounds for the loops to transform
2328    STEPS is the STEPS for the loops to transform.
2329    LOOPIVS is the induction variables for the loops to transform.
2330
2331    Basically, for the case of
2332
2333    FOR (i = 0; i < 50; i++)
2334     {
2335      FOR (j =0; j < 50; j++)
2336      {
2337         <whatever>
2338      }
2339      <some code>
2340     }
2341
2342    This function will transform it into a perfect loop nest by splitting the
2343    outer loop into two loops, like so:
2344
2345    FOR (i = 0; i < 50; i++)
2346    {
2347      FOR (j = 0; j < 50; j++)
2348      {
2349          <whatever>
2350      }
2351    }
2352
2353    FOR (i = 0; i < 50; i ++)
2354    {
2355     <some code>
2356    }
2357
2358    Return FALSE if we can't make this loop into a perfect nest.  */
2359
2360 static bool
2361 perfect_nestify (struct loops *loops,
2362                  struct loop *loop,
2363                  VEC(tree,heap) *lbounds,
2364                  VEC(tree,heap) *ubounds,
2365                  VEC(int,heap) *steps,
2366                  VEC(tree,heap) *loopivs)
2367 {
2368   basic_block *bbs;
2369   tree exit_condition;
2370   tree then_label, else_label, cond_stmt;
2371   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2372   int i;
2373   block_stmt_iterator bsi;
2374   bool insert_after;
2375   edge e;
2376   struct loop *newloop;
2377   tree phi;
2378   tree uboundvar;
2379   tree stmt;
2380   tree oldivvar, ivvar, ivvarinced;
2381   VEC(tree,heap) *phis = NULL;
2382
2383   if (!can_convert_to_perfect_nest (loop, loopivs))
2384     return false;
2385
2386   /* Create the new loop */
2387
2388   olddest = loop->single_exit->dest;
2389   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2390   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2391
2392   /* Push the exit phi nodes that we are moving.  */
2393   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2394     {
2395       VEC_reserve (tree, heap, phis, 2);
2396       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2397       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2398     }
2399   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2400
2401   /* Remove the exit phis from the old basic block.  Make sure to set
2402      PHI_RESULT to null so it doesn't get released.  */
2403   while (phi_nodes (olddest) != NULL)
2404     {
2405       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2406       remove_phi_node (phi_nodes (olddest), NULL);
2407     }
2408
2409   /* and add them back to the new basic block.  */
2410   while (VEC_length (tree, phis) != 0)
2411     {
2412       tree def;
2413       tree phiname;
2414       def = VEC_pop (tree, phis);
2415       phiname = VEC_pop (tree, phis);
2416       phi = create_phi_node (phiname, preheaderbb);
2417       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2418     }
2419   flush_pending_stmts (e);
2420   VEC_free (tree, heap, phis);
2421
2422   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2423   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2424   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2425   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2426   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2427   cond_stmt = build (COND_EXPR, void_type_node,
2428                      build (NE_EXPR, boolean_type_node,
2429                             integer_one_node,
2430                             integer_zero_node),
2431                      then_label, else_label);
2432   bsi = bsi_start (bodybb);
2433   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2434   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2435   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2436   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2437
2438   /* Update the loop structures.  */
2439   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2440   newloop->header = headerbb;
2441   newloop->latch = latchbb;
2442   newloop->single_exit = e;
2443   add_bb_to_loop (latchbb, newloop);
2444   add_bb_to_loop (bodybb, newloop);
2445   add_bb_to_loop (headerbb, newloop);
2446   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2447   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2448   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2449                            loop->single_exit->src);
2450   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2451   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2452   /* Create the new iv.  */
2453   oldivvar = VEC_index (tree, loopivs, 0);
2454   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2455   add_referenced_tmp_var (ivvar);
2456   standard_iv_increment_position (newloop, &bsi, &insert_after);
2457   create_iv (VEC_index (tree, lbounds, 0),
2458              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2459              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2460
2461   /* Create the new upper bound.  This may be not just a variable, so we copy
2462      it to one just in case.  */
2463
2464   exit_condition = get_loop_exit_condition (newloop);
2465   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2466   add_referenced_tmp_var (uboundvar);
2467   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2468                 VEC_index (tree, ubounds, 0));
2469   uboundvar = make_ssa_name (uboundvar, stmt);
2470   TREE_OPERAND (stmt, 0) = uboundvar;
2471
2472   if (insert_after)
2473     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2474   else
2475     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2476   update_stmt (stmt);
2477   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2478                                            boolean_type_node,
2479                                            uboundvar,
2480                                            ivvarinced);
2481   update_stmt (exit_condition);
2482   bbs = get_loop_body_in_dom_order (loop);
2483   /* Now move the statements, and replace the induction variable in the moved
2484      statements with the correct loop induction variable.  */
2485   oldivvar = VEC_index (tree, loopivs, 0);
2486   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2487     {
2488       block_stmt_iterator tobsi = bsi_last (bodybb);
2489       if (bbs[i]->loop_father == loop)
2490         {
2491           /* If this is true, we are *before* the inner loop.
2492              If this isn't true, we are *after* it.
2493
2494              The only time can_convert_to_perfect_nest returns true when we
2495              have statements before the inner loop is if they can be moved
2496              into the inner loop.
2497
2498              The only time can_convert_to_perfect_nest returns true when we
2499              have statements after the inner loop is if they can be moved into
2500              the new split loop.  */
2501
2502           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2503             {
2504               for (bsi = bsi_last (bbs[i]); !bsi_end_p (bsi);)
2505                 {
2506                   use_operand_p use_p;
2507                   imm_use_iterator imm_iter;
2508                   tree stmt = bsi_stmt (bsi);
2509
2510                   if (stmt == exit_condition
2511                       || not_interesting_stmt (stmt)
2512                       || stmt_is_bumper_for_loop (loop, stmt))
2513                     {
2514                       if (!bsi_end_p (bsi))
2515                         bsi_prev (&bsi);
2516                       continue;
2517                     }
2518                   /* Move this statement back into the inner loop.
2519                      This looks a bit confusing, but we are really just
2520                      finding the first non-exit phi use and moving the
2521                      statement to the beginning of that use's basic
2522                      block.  */
2523                   FOR_EACH_IMM_USE_SAFE (use_p, imm_iter,
2524                                          TREE_OPERAND (stmt, 0))
2525                     {
2526                       tree imm_stmt = USE_STMT (use_p);
2527                       if (!exit_phi_for_loop_p (loop->inner, imm_stmt))
2528                         {
2529                           block_stmt_iterator tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
2530                           bsi_move_after (&bsi, &tobsi);
2531                           update_stmt (stmt);
2532                           BREAK_FROM_SAFE_IMM_USE (imm_iter);
2533                         }
2534                     }
2535                 }
2536             }
2537           else
2538             {
2539               /* Note that the bsi only needs to be explicitly incremented
2540                  when we don't move something, since it is automatically
2541                  incremented when we do.  */
2542               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2543                 {
2544                   ssa_op_iter i;
2545                   tree n, stmt = bsi_stmt (bsi);
2546
2547                   if (stmt == exit_condition
2548                       || not_interesting_stmt (stmt)
2549                       || stmt_is_bumper_for_loop (loop, stmt))
2550                     {
2551                       bsi_next (&bsi);
2552                       continue;
2553                     }
2554
2555                   replace_uses_equiv_to_x_with_y (loop, stmt,
2556                                                   oldivvar,
2557                                                   VEC_index (int, steps, 0),
2558                                                   ivvar);
2559                   bsi_move_before (&bsi, &tobsi);
2560
2561                   /* If the statement has any virtual operands, they may
2562                      need to be rewired because the original loop may
2563                      still reference them.  */
2564                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2565                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2566                 }
2567             }
2568
2569         }
2570     }
2571
2572   free (bbs);
2573   return perfect_nest_p (loop);
2574 }
2575
2576 /* Return true if TRANS is a legal transformation matrix that respects
2577    the dependence vectors in DISTS and DIRS.  The conservative answer
2578    is false.
2579
2580    "Wolfe proves that a unimodular transformation represented by the
2581    matrix T is legal when applied to a loop nest with a set of
2582    lexicographically non-negative distance vectors RDG if and only if
2583    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2584    i.e.: if and only if it transforms the lexicographically positive
2585    distance vectors to lexicographically positive vectors.  Note that
2586    a unimodular matrix must transform the zero vector (and only it) to
2587    the zero vector." S.Muchnick.  */
2588
2589 bool
2590 lambda_transform_legal_p (lambda_trans_matrix trans,
2591                           int nb_loops,
2592                           varray_type dependence_relations)
2593 {
2594   unsigned int i, j;
2595   lambda_vector distres;
2596   struct data_dependence_relation *ddr;
2597
2598   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2599               && LTM_ROWSIZE (trans) == nb_loops);
2600
2601   /* When there is an unknown relation in the dependence_relations, we
2602      know that it is no worth looking at this loop nest: give up.  */
2603   ddr = (struct data_dependence_relation *)
2604     VARRAY_GENERIC_PTR (dependence_relations, 0);
2605   if (ddr == NULL)
2606     return true;
2607   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2608     return false;
2609
2610   distres = lambda_vector_new (nb_loops);
2611
2612   /* For each distance vector in the dependence graph.  */
2613   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2614     {
2615       ddr = (struct data_dependence_relation *)
2616         VARRAY_GENERIC_PTR (dependence_relations, i);
2617
2618       /* Don't care about relations for which we know that there is no
2619          dependence, nor about read-read (aka. output-dependences):
2620          these data accesses can happen in any order.  */
2621       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2622           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2623         continue;
2624
2625       /* Conservatively answer: "this transformation is not valid".  */
2626       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2627         return false;
2628
2629       /* If the dependence could not be captured by a distance vector,
2630          conservatively answer that the transform is not valid.  */
2631       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2632         return false;
2633
2634       /* Compute trans.dist_vect */
2635       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2636         {
2637           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2638                                      DDR_DIST_VECT (ddr, j), distres);
2639
2640           if (!lambda_vector_lexico_pos (distres, nb_loops))
2641             return false;
2642         }
2643     }
2644   return true;
2645 }