gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "obstack.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 119                              VEC(tree,heap) *, VEC(int,heap) *,
 120                              VEC(tree,heap) *);
 121 /* Lattice stuff that is internal to the code generation algorithm.  */
 122
 123 typedef struct lambda_lattice_s
 124 {
 125   /* Lattice base matrix.  */
 126   lambda_matrix base;
 127   /* Lattice dimension.  */
 128   int dimension;
 129   /* Origin vector for the coefficients.  */
 130   lambda_vector origin;
 131   /* Origin matrix for the invariants.  */
 132   lambda_matrix origin_invariants;
 133   /* Number of invariants.  */
 134   int invariants;
 135 } *lambda_lattice;
 136
 137 #define LATTICE_BASE(T) ((T)->base)
 138 #define LATTICE_DIMENSION(T) ((T)->dimension)
 139 #define LATTICE_ORIGIN(T) ((T)->origin)
 140 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 141 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 142
 143 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 144                        int, int);
 145 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 146 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 147                                                    struct obstack *);
 148
 149 static tree find_induction_var_from_exit_cond (struct loop *);
 150 static bool can_convert_to_perfect_nest (struct loop *);
 151
 152 /* Create a new lambda body vector.  */
 153
 154 lambda_body_vector
 155 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 156 {
 157   lambda_body_vector ret;
 158
 159   ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
 160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 161   LBV_SIZE (ret) = size;
 162   LBV_DENOMINATOR (ret) = 1;
 163   return ret;
 164 }
 165
 166 /* Compute the new coefficients for the vector based on the
 167   *inverse* of the transformation matrix.  */
 168
 169 lambda_body_vector
 170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 171                                 lambda_body_vector vect,
 172                                 struct obstack * lambda_obstack)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth, lambda_obstack);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants,
 229                               struct obstack * lambda_obstack)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 234                                                  sizeof (*ret));
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants,
 332                      struct obstack * lambda_obstack)
 333 {
 334   lambda_loopnest ret;
 335   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 336
 337   LN_LOOPS (ret) = (lambda_loop *)
 338       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 339   LN_DEPTH (ret) = depth;
 340   LN_INVARIANTS (ret) = invariants;
 341
 342   return ret;
 343 }
 344
 345 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 346    character to use for loop names is given by START.  */
 347
 348 void
 349 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 350 {
 351   int i;
 352   for (i = 0; i < LN_DEPTH (nest); i++)
 353     {
 354       fprintf (outfile, "Loop %c\n", start + i);
 355       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 356                          LN_INVARIANTS (nest), 'i');
 357       fprintf (outfile, "\n");
 358     }
 359 }
 360
 361 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 362    of invariants.  */
 363
 364 static lambda_lattice
 365 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 366 {
 367   lambda_lattice ret
 368       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 369   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 370   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 371   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 372   LATTICE_DIMENSION (ret) = depth;
 373   LATTICE_INVARIANTS (ret) = invariants;
 374   return ret;
 375 }
 376
 377 /* Compute the lattice base for NEST.  The lattice base is essentially a
 378    non-singular transform from a dense base space to a sparse iteration space.
 379    We use it so that we don't have to specially handle the case of a sparse
 380    iteration space in other parts of the algorithm.  As a result, this routine
 381    only does something interesting (IE produce a matrix that isn't the
 382    identity matrix) if NEST is a sparse space.  */
 383
 384 static lambda_lattice
 385 lambda_lattice_compute_base (lambda_loopnest nest,
 386                              struct obstack * lambda_obstack)
 387 {
 388   lambda_lattice ret;
 389   int depth, invariants;
 390   lambda_matrix base;
 391
 392   int i, j, step;
 393   lambda_loop loop;
 394   lambda_linear_expression expression;
 395
 396   depth = LN_DEPTH (nest);
 397   invariants = LN_INVARIANTS (nest);
 398
 399   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 400   base = LATTICE_BASE (ret);
 401   for (i = 0; i < depth; i++)
 402     {
 403       loop = LN_LOOPS (nest)[i];
 404       gcc_assert (loop);
 405       step = LL_STEP (loop);
 406       /* If we have a step of 1, then the base is one, and the
 407          origin and invariant coefficients are 0.  */
 408       if (step == 1)
 409         {
 410           for (j = 0; j < depth; j++)
 411             base[i][j] = 0;
 412           base[i][i] = 1;
 413           LATTICE_ORIGIN (ret)[i] = 0;
 414           for (j = 0; j < invariants; j++)
 415             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 416         }
 417       else
 418         {
 419           /* Otherwise, we need the lower bound expression (which must
 420              be an affine function)  to determine the base.  */
 421           expression = LL_LOWER_BOUND (loop);
 422           gcc_assert (expression && !LLE_NEXT (expression)
 423                       && LLE_DENOMINATOR (expression) == 1);
 424
 425           /* The lower triangular portion of the base is going to be the
 426              coefficient times the step */
 427           for (j = 0; j < i; j++)
 428             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 429               * LL_STEP (LN_LOOPS (nest)[j]);
 430           base[i][i] = step;
 431           for (j = i + 1; j < depth; j++)
 432             base[i][j] = 0;
 433
 434           /* Origin for this loop is the constant of the lower bound
 435              expression.  */
 436           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 437
 438           /* Coefficient for the invariants are equal to the invariant
 439              coefficients in the expression.  */
 440           for (j = 0; j < invariants; j++)
 441             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 442               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 443         }
 444     }
 445   return ret;
 446 }
 447
 448 /* Compute the least common multiple of two numbers A and B .  */
 449
 450 int
 451 least_common_multiple (int a, int b)
 452 {
 453   return (abs (a) * abs (b) / gcd (a, b));
 454 }
 455
 456 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 457    auxiliary nest.
 458    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 459    it is easy to calculate the answer and bounds.
 460    A sketch of how it works:
 461    Given a system of linear inequalities, ai * xj >= bk, you can always
 462    rewrite the constraints so they are all of the form
 463    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 464    in b1 ... bk, and some a in a1...ai)
 465    You can then eliminate this x from the non-constant inequalities by
 466    rewriting these as a <= b, x >= constant, and delete the x variable.
 467    You can then repeat this for any remaining x variables, and then we have
 468    an easy to use variable <= constant (or no variables at all) form that we
 469    can construct our bounds from.
 470
 471    In our case, each time we eliminate, we construct part of the bound from
 472    the ith variable, then delete the ith variable.
 473
 474    Remember the constant are in our vector a, our coefficient matrix is A,
 475    and our invariant coefficient matrix is B.
 476
 477    SIZE is the size of the matrices being passed.
 478    DEPTH is the loop nest depth.
 479    INVARIANTS is the number of loop invariants.
 480    A, B, and a are the coefficient matrix, invariant coefficient, and a
 481    vector of constants, respectively.  */
 482
 483 static lambda_loopnest
 484 compute_nest_using_fourier_motzkin (int size,
 485                                     int depth,
 486                                     int invariants,
 487                                     lambda_matrix A,
 488                                     lambda_matrix B,
 489                                     lambda_vector a,
 490                                     struct obstack * lambda_obstack)
 491 {
 492
 493   int multiple, f1, f2;
 494   int i, j, k;
 495   lambda_linear_expression expression;
 496   lambda_loop loop;
 497   lambda_loopnest auxillary_nest;
 498   lambda_matrix swapmatrix, A1, B1;
 499   lambda_vector swapvector, a1;
 500   int newsize;
 501
 502   A1 = lambda_matrix_new (128, depth);
 503   B1 = lambda_matrix_new (128, invariants);
 504   a1 = lambda_vector_new (128);
 505
 506   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 507
 508   for (i = depth - 1; i >= 0; i--)
 509     {
 510       loop = lambda_loop_new ();
 511       LN_LOOPS (auxillary_nest)[i] = loop;
 512       LL_STEP (loop) = 1;
 513
 514       for (j = 0; j < size; j++)
 515         {
 516           if (A[j][i] < 0)
 517             {
 518               /* Any linear expression in the matrix with a coefficient less
 519                  than 0 becomes part of the new lower bound.  */
 520               expression = lambda_linear_expression_new (depth, invariants,
 521                                                          lambda_obstack);
 522
 523               for (k = 0; k < i; k++)
 524                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 525
 526               for (k = 0; k < invariants; k++)
 527                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 528
 529               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 530               LLE_CONSTANT (expression) = -1 * a[j];
 531
 532               /* Ignore if identical to the existing lower bound.  */
 533               if (!lle_equal (LL_LOWER_BOUND (loop),
 534                               expression, depth, invariants))
 535                 {
 536                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 537                   LL_LOWER_BOUND (loop) = expression;
 538                 }
 539
 540             }
 541           else if (A[j][i] > 0)
 542             {
 543               /* Any linear expression with a coefficient greater than 0
 544                  becomes part of the new upper bound.  */
 545               expression = lambda_linear_expression_new (depth, invariants,
 546                                                          lambda_obstack);
 547               for (k = 0; k < i; k++)
 548                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 549
 550               for (k = 0; k < invariants; k++)
 551                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 552
 553               LLE_DENOMINATOR (expression) = A[j][i];
 554               LLE_CONSTANT (expression) = a[j];
 555
 556               /* Ignore if identical to the existing upper bound.  */
 557               if (!lle_equal (LL_UPPER_BOUND (loop),
 558                               expression, depth, invariants))
 559                 {
 560                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 561                   LL_UPPER_BOUND (loop) = expression;
 562                 }
 563
 564             }
 565         }
 566
 567       /* This portion creates a new system of linear inequalities by deleting
 568          the i'th variable, reducing the system by one variable.  */
 569       newsize = 0;
 570       for (j = 0; j < size; j++)
 571         {
 572           /* If the coefficient for the i'th variable is 0, then we can just
 573              eliminate the variable straightaway.  Otherwise, we have to
 574              multiply through by the coefficients we are eliminating.  */
 575           if (A[j][i] == 0)
 576             {
 577               lambda_vector_copy (A[j], A1[newsize], depth);
 578               lambda_vector_copy (B[j], B1[newsize], invariants);
 579               a1[newsize] = a[j];
 580               newsize++;
 581             }
 582           else if (A[j][i] > 0)
 583             {
 584               for (k = 0; k < size; k++)
 585                 {
 586                   if (A[k][i] < 0)
 587                     {
 588                       multiple = least_common_multiple (A[j][i], A[k][i]);
 589                       f1 = multiple / A[j][i];
 590                       f2 = -1 * multiple / A[k][i];
 591
 592                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 593                                             A1[newsize], depth);
 594                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 595                                             B1[newsize], invariants);
 596                       a1[newsize] = f1 * a[j] + f2 * a[k];
 597                       newsize++;
 598                     }
 599                 }
 600             }
 601         }
 602
 603       swapmatrix = A;
 604       A = A1;
 605       A1 = swapmatrix;
 606
 607       swapmatrix = B;
 608       B = B1;
 609       B1 = swapmatrix;
 610
 611       swapvector = a;
 612       a = a1;
 613       a1 = swapvector;
 614
 615       size = newsize;
 616     }
 617
 618   return auxillary_nest;
 619 }
 620
 621 /* Compute the loop bounds for the auxiliary space NEST.
 622    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 623    Given the original nest, this function will
 624    1. Convert the nest into matrix form, which consists of a matrix for the
 625    coefficients, a matrix for the
 626    invariant coefficients, and a vector for the constants.
 627    2. Use the matrix form to calculate the lattice base for the nest (which is
 628    a dense space)
 629    3. Compose the dense space transform with the user specified transform, to
 630    get a transform we can easily calculate transformed bounds for.
 631    4. Multiply the composed transformation matrix times the matrix form of the
 632    loop.
 633    5. Transform the newly created matrix (from step 4) back into a loop nest
 634    using Fourier-Motzkin elimination to figure out the bounds.  */
 635
 636 static lambda_loopnest
 637 lambda_compute_auxillary_space (lambda_loopnest nest,
 638                                 lambda_trans_matrix trans,
 639                                 struct obstack * lambda_obstack)
 640 {
 641   lambda_matrix A, B, A1, B1;
 642   lambda_vector a, a1;
 643   lambda_matrix invertedtrans;
 644   int depth, invariants, size;
 645   int i, j;
 646   lambda_loop loop;
 647   lambda_linear_expression expression;
 648   lambda_lattice lattice;
 649
 650   depth = LN_DEPTH (nest);
 651   invariants = LN_INVARIANTS (nest);
 652
 653   /* Unfortunately, we can't know the number of constraints we'll have
 654      ahead of time, but this should be enough even in ridiculous loop nest
 655      cases. We must not go over this limit.  */
 656   A = lambda_matrix_new (128, depth);
 657   B = lambda_matrix_new (128, invariants);
 658   a = lambda_vector_new (128);
 659
 660   A1 = lambda_matrix_new (128, depth);
 661   B1 = lambda_matrix_new (128, invariants);
 662   a1 = lambda_vector_new (128);
 663
 664   /* Store the bounds in the equation matrix A, constant vector a, and
 665      invariant matrix B, so that we have Ax <= a + B.
 666      This requires a little equation rearranging so that everything is on the
 667      correct side of the inequality.  */
 668   size = 0;
 669   for (i = 0; i < depth; i++)
 670     {
 671       loop = LN_LOOPS (nest)[i];
 672
 673       /* First we do the lower bound.  */
 674       if (LL_STEP (loop) > 0)
 675         expression = LL_LOWER_BOUND (loop);
 676       else
 677         expression = LL_UPPER_BOUND (loop);
 678
 679       for (; expression != NULL; expression = LLE_NEXT (expression))
 680         {
 681           /* Fill in the coefficient.  */
 682           for (j = 0; j < i; j++)
 683             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 684
 685           /* And the invariant coefficient.  */
 686           for (j = 0; j < invariants; j++)
 687             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 688
 689           /* And the constant.  */
 690           a[size] = LLE_CONSTANT (expression);
 691
 692           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 693              constants and single variables on   */
 694           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 695           a[size] *= -1;
 696           for (j = 0; j < invariants; j++)
 697             B[size][j] *= -1;
 698
 699           size++;
 700           /* Need to increase matrix sizes above.  */
 701           gcc_assert (size <= 127);
 702
 703         }
 704
 705       /* Then do the exact same thing for the upper bounds.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_UPPER_BOUND (loop);
 708       else
 709         expression = LL_LOWER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 725           for (j = 0; j < i; j++)
 726             A[size][j] *= -1;
 727           A[size][i] = LLE_DENOMINATOR (expression);
 728           size++;
 729           /* Need to increase matrix sizes above.  */
 730           gcc_assert (size <= 127);
 731
 732         }
 733     }
 734
 735   /* Compute the lattice base x = base * y + origin, where y is the
 736      base space.  */
 737   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 738
 739   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 740
 741   /* A1 = A * L */
 742   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 743
 744   /* a1 = a - A * origin constant.  */
 745   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 746   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 747
 748   /* B1 = B - A * origin invariant.  */
 749   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 750                       invariants);
 751   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 752
 753   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 754      it by A1, then performing Fourier-Motzkin.  */
 755
 756   invertedtrans = lambda_matrix_new (depth, depth);
 757
 758   /* Compute the inverse of U.  */
 759   lambda_matrix_inverse (LTM_MATRIX (trans),
 760                          invertedtrans, depth);
 761
 762   /* A = A1 inv(U).  */
 763   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 764
 765   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 766                                              A, B1, a1, lambda_obstack);
 767 }
 768
 769 /* Compute the loop bounds for the target space, using the bounds of
 770    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 771    The target space loop bounds are computed by multiplying the triangular
 772    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 773    the loop steps (positive or negative) is then used to swap the bounds if
 774    the loop counts downwards.
 775    Return the target loopnest.  */
 776
 777 static lambda_loopnest
 778 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 779                              lambda_trans_matrix H, lambda_vector stepsigns,
 780                              struct obstack * lambda_obstack)
 781 {
 782   lambda_matrix inverse, H1;
 783   int determinant, i, j;
 784   int gcd1, gcd2;
 785   int factor;
 786
 787   lambda_loopnest target_nest;
 788   int depth, invariants;
 789   lambda_matrix target;
 790
 791   lambda_loop auxillary_loop, target_loop;
 792   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 793
 794   depth = LN_DEPTH (auxillary_nest);
 795   invariants = LN_INVARIANTS (auxillary_nest);
 796
 797   inverse = lambda_matrix_new (depth, depth);
 798   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 799
 800   /* H1 is H excluding its diagonal.  */
 801   H1 = lambda_matrix_new (depth, depth);
 802   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 803
 804   for (i = 0; i < depth; i++)
 805     H1[i][i] = 0;
 806
 807   /* Computes the linear offsets of the loop bounds.  */
 808   target = lambda_matrix_new (depth, depth);
 809   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 810
 811   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 812
 813   for (i = 0; i < depth; i++)
 814     {
 815
 816       /* Get a new loop structure.  */
 817       target_loop = lambda_loop_new ();
 818       LN_LOOPS (target_nest)[i] = target_loop;
 819
 820       /* Computes the gcd of the coefficients of the linear part.  */
 821       gcd1 = lambda_vector_gcd (target[i], i);
 822
 823       /* Include the denominator in the GCD.  */
 824       gcd1 = gcd (gcd1, determinant);
 825
 826       /* Now divide through by the gcd.  */
 827       for (j = 0; j < i; j++)
 828         target[i][j] = target[i][j] / gcd1;
 829
 830       expression = lambda_linear_expression_new (depth, invariants,
 831                                                  lambda_obstack);
 832       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 833       LLE_DENOMINATOR (expression) = determinant / gcd1;
 834       LLE_CONSTANT (expression) = 0;
 835       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 836                            invariants);
 837       LL_LINEAR_OFFSET (target_loop) = expression;
 838     }
 839
 840   /* For each loop, compute the new bounds from H.  */
 841   for (i = 0; i < depth; i++)
 842     {
 843       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 844       target_loop = LN_LOOPS (target_nest)[i];
 845       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 846       factor = LTM_MATRIX (H)[i][i];
 847
 848       /* First we do the lower bound.  */
 849       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 850
 851       for (; auxillary_expr != NULL;
 852            auxillary_expr = LLE_NEXT (auxillary_expr))
 853         {
 854           target_expr = lambda_linear_expression_new (depth, invariants,
 855                                                       lambda_obstack);
 856           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 857                                      depth, inverse, depth,
 858                                      LLE_COEFFICIENTS (target_expr));
 859           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 860                                     LLE_COEFFICIENTS (target_expr), depth,
 861                                     factor);
 862
 863           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 864           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 865                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 866                               invariants);
 867           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 868                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     invariants, factor);
 870           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 871
 872           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 873             {
 874               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 875                 * determinant;
 876               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 877                                         (target_expr),
 878                                         LLE_INVARIANT_COEFFICIENTS
 879                                         (target_expr), invariants,
 880                                         determinant);
 881               LLE_DENOMINATOR (target_expr) =
 882                 LLE_DENOMINATOR (target_expr) * determinant;
 883             }
 884           /* Find the gcd and divide by it here, rather than doing it
 885              at the tree level.  */
 886           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 887           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 888                                     invariants);
 889           gcd1 = gcd (gcd1, gcd2);
 890           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 891           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 892           for (j = 0; j < depth; j++)
 893             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 894           for (j = 0; j < invariants; j++)
 895             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 896           LLE_CONSTANT (target_expr) /= gcd1;
 897           LLE_DENOMINATOR (target_expr) /= gcd1;
 898           /* Ignore if identical to existing bound.  */
 899           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 900                           invariants))
 901             {
 902               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 903               LL_LOWER_BOUND (target_loop) = target_expr;
 904             }
 905         }
 906       /* Now do the upper bound.  */
 907       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 908
 909       for (; auxillary_expr != NULL;
 910            auxillary_expr = LLE_NEXT (auxillary_expr))
 911         {
 912           target_expr = lambda_linear_expression_new (depth, invariants,
 913                                                       lambda_obstack);
 914           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 915                                      depth, inverse, depth,
 916                                      LLE_COEFFICIENTS (target_expr));
 917           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 918                                     LLE_COEFFICIENTS (target_expr), depth,
 919                                     factor);
 920           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 921           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 922                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 923                               invariants);
 924           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     invariants, factor);
 927           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 928
 929           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 930             {
 931               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 932                 * determinant;
 933               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 934                                         (target_expr),
 935                                         LLE_INVARIANT_COEFFICIENTS
 936                                         (target_expr), invariants,
 937                                         determinant);
 938               LLE_DENOMINATOR (target_expr) =
 939                 LLE_DENOMINATOR (target_expr) * determinant;
 940             }
 941           /* Find the gcd and divide by it here, instead of at the
 942              tree level.  */
 943           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 944           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 945                                     invariants);
 946           gcd1 = gcd (gcd1, gcd2);
 947           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 948           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 949           for (j = 0; j < depth; j++)
 950             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 951           for (j = 0; j < invariants; j++)
 952             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 953           LLE_CONSTANT (target_expr) /= gcd1;
 954           LLE_DENOMINATOR (target_expr) /= gcd1;
 955           /* Ignore if equal to existing bound.  */
 956           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 957                           invariants))
 958             {
 959               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 960               LL_UPPER_BOUND (target_loop) = target_expr;
 961             }
 962         }
 963     }
 964   for (i = 0; i < depth; i++)
 965     {
 966       target_loop = LN_LOOPS (target_nest)[i];
 967       /* If necessary, exchange the upper and lower bounds and negate
 968          the step size.  */
 969       if (stepsigns[i] < 0)
 970         {
 971           LL_STEP (target_loop) *= -1;
 972           tmp_expr = LL_LOWER_BOUND (target_loop);
 973           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 974           LL_UPPER_BOUND (target_loop) = tmp_expr;
 975         }
 976     }
 977   return target_nest;
 978 }
 979
 980 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 981    result.  */
 982
 983 static lambda_vector
 984 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 985 {
 986   lambda_matrix matrix, H;
 987   int size;
 988   lambda_vector newsteps;
 989   int i, j, factor, minimum_column;
 990   int temp;
 991
 992   matrix = LTM_MATRIX (trans);
 993   size = LTM_ROWSIZE (trans);
 994   H = lambda_matrix_new (size, size);
 995
 996   newsteps = lambda_vector_new (size);
 997   lambda_vector_copy (stepsigns, newsteps, size);
 998
 999   lambda_matrix_copy (matrix, H, size, size);
1000
1001   for (j = 0; j < size; j++)
1002     {
1003       lambda_vector row;
1004       row = H[j];
1005       for (i = j; i < size; i++)
1006         if (row[i] < 0)
1007           lambda_matrix_col_negate (H, size, i);
1008       while (lambda_vector_first_nz (row, size, j + 1) < size)
1009         {
1010           minimum_column = lambda_vector_min_nz (row, size, j);
1011           lambda_matrix_col_exchange (H, size, j, minimum_column);
1012
1013           temp = newsteps[j];
1014           newsteps[j] = newsteps[minimum_column];
1015           newsteps[minimum_column] = temp;
1016
1017           for (i = j + 1; i < size; i++)
1018             {
1019               factor = row[i] / row[j];
1020               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1021             }
1022         }
1023     }
1024   return newsteps;
1025 }
1026
1027 /* Transform NEST according to TRANS, and return the new loopnest.
1028    This involves
1029    1. Computing a lattice base for the transformation
1030    2. Composing the dense base with the specified transformation (TRANS)
1031    3. Decomposing the combined transformation into a lower triangular portion,
1032    and a unimodular portion.
1033    4. Computing the auxiliary nest using the unimodular portion.
1034    5. Computing the target nest using the auxiliary nest and the lower
1035    triangular portion.  */
1036
1037 lambda_loopnest
1038 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1039                            struct obstack * lambda_obstack)
1040 {
1041   lambda_loopnest auxillary_nest, target_nest;
1042
1043   int depth, invariants;
1044   int i, j;
1045   lambda_lattice lattice;
1046   lambda_trans_matrix trans1, H, U;
1047   lambda_loop loop;
1048   lambda_linear_expression expression;
1049   lambda_vector origin;
1050   lambda_matrix origin_invariants;
1051   lambda_vector stepsigns;
1052   int f;
1053
1054   depth = LN_DEPTH (nest);
1055   invariants = LN_INVARIANTS (nest);
1056
1057   /* Keep track of the signs of the loop steps.  */
1058   stepsigns = lambda_vector_new (depth);
1059   for (i = 0; i < depth; i++)
1060     {
1061       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1062         stepsigns[i] = 1;
1063       else
1064         stepsigns[i] = -1;
1065     }
1066
1067   /* Compute the lattice base.  */
1068   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1069   trans1 = lambda_trans_matrix_new (depth, depth);
1070
1071   /* Multiply the transformation matrix by the lattice base.  */
1072
1073   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1074                       LTM_MATRIX (trans1), depth, depth, depth);
1075
1076   /* Compute the Hermite normal form for the new transformation matrix.  */
1077   H = lambda_trans_matrix_new (depth, depth);
1078   U = lambda_trans_matrix_new (depth, depth);
1079   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1080                          LTM_MATRIX (U));
1081
1082   /* Compute the auxiliary loop nest's space from the unimodular
1083      portion.  */
1084   auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1085
1086   /* Compute the loop step signs from the old step signs and the
1087      transformation matrix.  */
1088   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1089
1090   /* Compute the target loop nest space from the auxiliary nest and
1091      the lower triangular matrix H.  */
1092   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1093                                              lambda_obstack);
1094   origin = lambda_vector_new (depth);
1095   origin_invariants = lambda_matrix_new (depth, invariants);
1096   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1097                              LATTICE_ORIGIN (lattice), origin);
1098   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1099                       origin_invariants, depth, depth, invariants);
1100
1101   for (i = 0; i < depth; i++)
1102     {
1103       loop = LN_LOOPS (target_nest)[i];
1104       expression = LL_LINEAR_OFFSET (loop);
1105       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1106         f = 1;
1107       else
1108         f = LLE_DENOMINATOR (expression);
1109
1110       LLE_CONSTANT (expression) += f * origin[i];
1111
1112       for (j = 0; j < invariants; j++)
1113         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1114           f * origin_invariants[i][j];
1115     }
1116
1117   return target_nest;
1118
1119 }
1120
1121 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1122    return the new expression.  DEPTH is the depth of the loopnest.
1123    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1124    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1125    is the amount we have to add/subtract from the expression because of the
1126    type of comparison it is used in.  */
1127
1128 static lambda_linear_expression
1129 gcc_tree_to_linear_expression (int depth, tree expr,
1130                                VEC(tree,heap) *outerinductionvars,
1131                                VEC(tree,heap) *invariants, int extra,
1132                                struct obstack * lambda_obstack)
1133 {
1134   lambda_linear_expression lle = NULL;
1135   switch (TREE_CODE (expr))
1136     {
1137     case INTEGER_CST:
1138       {
1139         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1140         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1141         if (extra != 0)
1142           LLE_CONSTANT (lle) += extra;
1143
1144         LLE_DENOMINATOR (lle) = 1;
1145       }
1146       break;
1147     case SSA_NAME:
1148       {
1149         tree iv, invar;
1150         size_t i;
1151         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1152           if (iv != NULL)
1153             {
1154               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1155                 {
1156                   lle = lambda_linear_expression_new (depth, 2 * depth,
1157                                                       lambda_obstack);
1158                   LLE_COEFFICIENTS (lle)[i] = 1;
1159                   if (extra != 0)
1160                     LLE_CONSTANT (lle) = extra;
1161
1162                   LLE_DENOMINATOR (lle) = 1;
1163                 }
1164             }
1165         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1166           if (invar != NULL)
1167             {
1168               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1169                 {
1170                   lle = lambda_linear_expression_new (depth, 2 * depth,
1171                                                       lambda_obstack);
1172                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1173                   if (extra != 0)
1174                     LLE_CONSTANT (lle) = extra;
1175                   LLE_DENOMINATOR (lle) = 1;
1176                 }
1177             }
1178       }
1179       break;
1180     default:
1181       return NULL;
1182     }
1183
1184   return lle;
1185 }
1186
1187 /* Return the depth of the loopnest NEST */
1188
1189 static int
1190 depth_of_nest (struct loop *nest)
1191 {
1192   size_t depth = 0;
1193   while (nest)
1194     {
1195       depth++;
1196       nest = nest->inner;
1197     }
1198   return depth;
1199 }
1200
1201
1202 /* Return true if OP is invariant in LOOP and all outer loops.  */
1203
1204 static bool
1205 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1206 {
1207   if (is_gimple_min_invariant (op))
1208     return true;
1209   if (loop_depth (loop) == 0)
1210     return true;
1211   if (!expr_invariant_in_loop_p (loop, op))
1212     return false;
1213   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1214     return false;
1215   return true;
1216 }
1217
1218 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1219    or NULL if it could not be converted.
1220    DEPTH is the depth of the loop.
1221    INVARIANTS is a pointer to the array of loop invariants.
1222    The induction variable for this loop should be stored in the parameter
1223    OURINDUCTIONVAR.
1224    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1225
1226 static lambda_loop
1227 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1228                          VEC(tree,heap) ** invariants,
1229                          tree * ourinductionvar,
1230                          VEC(tree,heap) * outerinductionvars,
1231                          VEC(tree,heap) ** lboundvars,
1232                          VEC(tree,heap) ** uboundvars,
1233                          VEC(int,heap) ** steps,
1234                          struct obstack * lambda_obstack)
1235 {
1236   tree phi;
1237   tree exit_cond;
1238   tree access_fn, inductionvar;
1239   tree step;
1240   lambda_loop lloop = NULL;
1241   lambda_linear_expression lbound, ubound;
1242   tree test;
1243   int stepint;
1244   int extra = 0;
1245   tree lboundvar, uboundvar, uboundresult;
1246
1247   /* Find out induction var and exit condition.  */
1248   inductionvar = find_induction_var_from_exit_cond (loop);
1249   exit_cond = get_loop_exit_condition (loop);
1250
1251   if (inductionvar == NULL || exit_cond == NULL)
1252     {
1253       if (dump_file && (dump_flags & TDF_DETAILS))
1254         fprintf (dump_file,
1255                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1256       return NULL;
1257     }
1258
1259   test = TREE_OPERAND (exit_cond, 0);
1260
1261   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1262     {
1263
1264       if (dump_file && (dump_flags & TDF_DETAILS))
1265         fprintf (dump_file,
1266                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1267
1268       return NULL;
1269     }
1270
1271   phi = SSA_NAME_DEF_STMT (inductionvar);
1272   if (TREE_CODE (phi) != PHI_NODE)
1273     {
1274       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1275       if (!phi)
1276         {
1277
1278           if (dump_file && (dump_flags & TDF_DETAILS))
1279             fprintf (dump_file,
1280                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1281
1282           return NULL;
1283         }
1284
1285       phi = SSA_NAME_DEF_STMT (phi);
1286       if (TREE_CODE (phi) != PHI_NODE)
1287         {
1288
1289           if (dump_file && (dump_flags & TDF_DETAILS))
1290             fprintf (dump_file,
1291                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1292           return NULL;
1293         }
1294
1295     }
1296
1297   /* The induction variable name/version we want to put in the array is the
1298      result of the induction variable phi node.  */
1299   *ourinductionvar = PHI_RESULT (phi);
1300   access_fn = instantiate_parameters
1301     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1302   if (access_fn == chrec_dont_know)
1303     {
1304       if (dump_file && (dump_flags & TDF_DETAILS))
1305         fprintf (dump_file,
1306                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1307
1308       return NULL;
1309     }
1310
1311   step = evolution_part_in_loop_num (access_fn, loop->num);
1312   if (!step || step == chrec_dont_know)
1313     {
1314       if (dump_file && (dump_flags & TDF_DETAILS))
1315         fprintf (dump_file,
1316                  "Unable to convert loop: Cannot determine step of loop.\n");
1317
1318       return NULL;
1319     }
1320   if (TREE_CODE (step) != INTEGER_CST)
1321     {
1322
1323       if (dump_file && (dump_flags & TDF_DETAILS))
1324         fprintf (dump_file,
1325                  "Unable to convert loop: Step of loop is not integer.\n");
1326       return NULL;
1327     }
1328
1329   stepint = TREE_INT_CST_LOW (step);
1330
1331   /* Only want phis for induction vars, which will have two
1332      arguments.  */
1333   if (PHI_NUM_ARGS (phi) != 2)
1334     {
1335       if (dump_file && (dump_flags & TDF_DETAILS))
1336         fprintf (dump_file,
1337                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1338       return NULL;
1339     }
1340
1341   /* Another induction variable check. One argument's source should be
1342      in the loop, one outside the loop.  */
1343   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1344       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1345     {
1346
1347       if (dump_file && (dump_flags & TDF_DETAILS))
1348         fprintf (dump_file,
1349                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1350
1351       return NULL;
1352     }
1353
1354   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1355     {
1356       lboundvar = PHI_ARG_DEF (phi, 1);
1357       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1358                                               outerinductionvars, *invariants,
1359                                               0, lambda_obstack);
1360     }
1361   else
1362     {
1363       lboundvar = PHI_ARG_DEF (phi, 0);
1364       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1365                                               outerinductionvars, *invariants,
1366                                               0, lambda_obstack);
1367     }
1368
1369   if (!lbound)
1370     {
1371
1372       if (dump_file && (dump_flags & TDF_DETAILS))
1373         fprintf (dump_file,
1374                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1375
1376       return NULL;
1377     }
1378   /* One part of the test may be a loop invariant tree.  */
1379   VEC_reserve (tree, heap, *invariants, 1);
1380   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1381       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1382     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1383   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1384            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1385     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1386
1387   /* The non-induction variable part of the test is the upper bound variable.
1388    */
1389   if (TREE_OPERAND (test, 0) == inductionvar)
1390     uboundvar = TREE_OPERAND (test, 1);
1391   else
1392     uboundvar = TREE_OPERAND (test, 0);
1393
1394
1395   /* We only size the vectors assuming we have, at max, 2 times as many
1396      invariants as we do loops (one for each bound).
1397      This is just an arbitrary number, but it has to be matched against the
1398      code below.  */
1399   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1400
1401
1402   /* We might have some leftover.  */
1403   if (TREE_CODE (test) == LT_EXPR)
1404     extra = -1 * stepint;
1405   else if (TREE_CODE (test) == NE_EXPR)
1406     extra = -1 * stepint;
1407   else if (TREE_CODE (test) == GT_EXPR)
1408     extra = -1 * stepint;
1409   else if (TREE_CODE (test) == EQ_EXPR)
1410     extra = 1 * stepint;
1411
1412   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1413                                           outerinductionvars,
1414                                           *invariants, extra, lambda_obstack);
1415   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1416                          build_int_cst (TREE_TYPE (uboundvar), extra));
1417   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1418   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1419   VEC_safe_push (int, heap, *steps, stepint);
1420   if (!ubound)
1421     {
1422       if (dump_file && (dump_flags & TDF_DETAILS))
1423         fprintf (dump_file,
1424                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1425       return NULL;
1426     }
1427
1428   lloop = lambda_loop_new ();
1429   LL_STEP (lloop) = stepint;
1430   LL_LOWER_BOUND (lloop) = lbound;
1431   LL_UPPER_BOUND (lloop) = ubound;
1432   return lloop;
1433 }
1434
1435 /* Given a LOOP, find the induction variable it is testing against in the exit
1436    condition.  Return the induction variable if found, NULL otherwise.  */
1437
1438 static tree
1439 find_induction_var_from_exit_cond (struct loop *loop)
1440 {
1441   tree expr = get_loop_exit_condition (loop);
1442   tree ivarop;
1443   tree test;
1444   if (expr == NULL_TREE)
1445     return NULL_TREE;
1446   if (TREE_CODE (expr) != COND_EXPR)
1447     return NULL_TREE;
1448   test = TREE_OPERAND (expr, 0);
1449   if (!COMPARISON_CLASS_P (test))
1450     return NULL_TREE;
1451
1452   /* Find the side that is invariant in this loop. The ivar must be the other
1453      side.  */
1454
1455   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1456       ivarop = TREE_OPERAND (test, 1);
1457   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1458       ivarop = TREE_OPERAND (test, 0);
1459   else
1460     return NULL_TREE;
1461
1462   if (TREE_CODE (ivarop) != SSA_NAME)
1463     return NULL_TREE;
1464   return ivarop;
1465 }
1466
1467 DEF_VEC_P(lambda_loop);
1468 DEF_VEC_ALLOC_P(lambda_loop,heap);
1469
1470 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1471    Return the new loop nest.
1472    INDUCTIONVARS is a pointer to an array of induction variables for the
1473    loopnest that will be filled in during this process.
1474    INVARIANTS is a pointer to an array of invariants that will be filled in
1475    during this process.  */
1476
1477 lambda_loopnest
1478 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1479                                  VEC(tree,heap) **inductionvars,
1480                                  VEC(tree,heap) **invariants,
1481                                  struct obstack * lambda_obstack)
1482 {
1483   lambda_loopnest ret = NULL;
1484   struct loop *temp = loop_nest;
1485   int depth = depth_of_nest (loop_nest);
1486   size_t i;
1487   VEC(lambda_loop,heap) *loops = NULL;
1488   VEC(tree,heap) *uboundvars = NULL;
1489   VEC(tree,heap) *lboundvars  = NULL;
1490   VEC(int,heap) *steps = NULL;
1491   lambda_loop newloop;
1492   tree inductionvar = NULL;
1493   bool perfect_nest = perfect_nest_p (loop_nest);
1494
1495   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1496     goto fail;
1497
1498   while (temp)
1499     {
1500       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1501                                          &inductionvar, *inductionvars,
1502                                          &lboundvars, &uboundvars,
1503                                          &steps, lambda_obstack);
1504       if (!newloop)
1505         goto fail;
1506
1507       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1508       VEC_safe_push (lambda_loop, heap, loops, newloop);
1509       temp = temp->inner;
1510     }
1511
1512   if (!perfect_nest)
1513     {
1514       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1515                             *inductionvars))
1516         {
1517           if (dump_file)
1518             fprintf (dump_file,
1519                      "Not a perfect loop nest and couldn't convert to one.\n");
1520           goto fail;
1521         }
1522       else if (dump_file)
1523         fprintf (dump_file,
1524                  "Successfully converted loop nest to perfect loop nest.\n");
1525     }
1526
1527   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1528
1529   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1530     LN_LOOPS (ret)[i] = newloop;
1531
1532  fail:
1533   VEC_free (lambda_loop, heap, loops);
1534   VEC_free (tree, heap, uboundvars);
1535   VEC_free (tree, heap, lboundvars);
1536   VEC_free (int, heap, steps);
1537
1538   return ret;
1539 }
1540
1541 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1542    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1543    inserted for us are stored.  INDUCTION_VARS is the array of induction
1544    variables for the loop this LBV is from.  TYPE is the tree type to use for
1545    the variables and trees involved.  */
1546
1547 static tree
1548 lbv_to_gcc_expression (lambda_body_vector lbv,
1549                        tree type, VEC(tree,heap) *induction_vars,
1550                        tree *stmts_to_insert)
1551 {
1552   int k;
1553   tree resvar;
1554   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1555
1556   k = LBV_DENOMINATOR (lbv);
1557   gcc_assert (k != 0);
1558   if (k != 1)
1559     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1560
1561   resvar = create_tmp_var (type, "lbvtmp");
1562   add_referenced_var (resvar);
1563   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1564 }
1565
1566 /* Convert a linear expression from coefficient and constant form to a
1567    gcc tree.
1568    Return the tree that represents the final value of the expression.
1569    LLE is the linear expression to convert.
1570    OFFSET is the linear offset to apply to the expression.
1571    TYPE is the tree type to use for the variables and math.
1572    INDUCTION_VARS is a vector of induction variables for the loops.
1573    INVARIANTS is a vector of the loop nest invariants.
1574    WRAP specifies what tree code to wrap the results in, if there is more than
1575    one (it is either MAX_EXPR, or MIN_EXPR).
1576    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1577    statements that need to be inserted for the linear expression.  */
1578
1579 static tree
1580 lle_to_gcc_expression (lambda_linear_expression lle,
1581                        lambda_linear_expression offset,
1582                        tree type,
1583                        VEC(tree,heap) *induction_vars,
1584                        VEC(tree,heap) *invariants,
1585                        enum tree_code wrap, tree *stmts_to_insert)
1586 {
1587   int k;
1588   tree resvar;
1589   tree expr = NULL_TREE;
1590   VEC(tree,heap) *results = NULL;
1591
1592   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1593
1594   /* Build up the linear expressions.  */
1595   for (; lle != NULL; lle = LLE_NEXT (lle))
1596     {
1597       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1598       expr = fold_build2 (PLUS_EXPR, type, expr,
1599                           build_linear_expr (type,
1600                                              LLE_INVARIANT_COEFFICIENTS (lle),
1601                                              invariants));
1602
1603       k = LLE_CONSTANT (lle);
1604       if (k)
1605         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1606
1607       k = LLE_CONSTANT (offset);
1608       if (k)
1609         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1610
1611       k = LLE_DENOMINATOR (lle);
1612       if (k != 1)
1613         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1614                             type, expr, build_int_cst (type, k));
1615
1616       expr = fold (expr);
1617       VEC_safe_push (tree, heap, results, expr);
1618     }
1619
1620   gcc_assert (expr);
1621
1622   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1623   if (VEC_length (tree, results) > 1)
1624     {
1625       size_t i;
1626       tree op;
1627
1628       expr = VEC_index (tree, results, 0);
1629       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1630         expr = fold_build2 (wrap, type, expr, op);
1631     }
1632
1633   VEC_free (tree, heap, results);
1634
1635   resvar = create_tmp_var (type, "lletmp");
1636   add_referenced_var (resvar);
1637   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1638 }
1639
1640 /* Remove the induction variable defined at IV_STMT.  */
1641
1642 void
1643 remove_iv (tree iv_stmt)
1644 {
1645   if (TREE_CODE (iv_stmt) == PHI_NODE)
1646     {
1647       int i;
1648
1649       for (i = 0; i < PHI_NUM_ARGS (iv_stmt); i++)
1650         {
1651           tree stmt;
1652           imm_use_iterator imm_iter;
1653           tree arg = PHI_ARG_DEF (iv_stmt, i);
1654           bool used = false;
1655
1656           if (TREE_CODE (arg) != SSA_NAME)
1657             continue;
1658
1659           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1660             if (stmt != iv_stmt)
1661               used = true;
1662
1663           if (!used)
1664             remove_iv (SSA_NAME_DEF_STMT (arg));
1665         }
1666
1667       remove_phi_node (iv_stmt, NULL_TREE, true);
1668     }
1669   else
1670     {
1671       block_stmt_iterator bsi = bsi_for_stmt (iv_stmt);
1672
1673       bsi_remove (&bsi, true);
1674       release_defs (iv_stmt);
1675     }
1676 }
1677
1678 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1679    it, back into gcc code.  This changes the
1680    loops, their induction variables, and their bodies, so that they
1681    match the transformed loopnest.
1682    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1683    loopnest.
1684    OLD_IVS is a vector of induction variables from the old loopnest.
1685    INVARIANTS is a vector of loop invariants from the old loopnest.
1686    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1687    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1688    NEW_LOOPNEST.  */
1689
1690 void
1691 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1692                                  VEC(tree,heap) *old_ivs,
1693                                  VEC(tree,heap) *invariants,
1694                                  VEC(tree,heap) **remove_ivs,
1695                                  lambda_loopnest new_loopnest,
1696                                  lambda_trans_matrix transform,
1697                                  struct obstack * lambda_obstack)
1698 {
1699   struct loop *temp;
1700   size_t i = 0;
1701   int j;
1702   size_t depth = 0;
1703   VEC(tree,heap) *new_ivs = NULL;
1704   tree oldiv;
1705   block_stmt_iterator bsi;
1706
1707   transform = lambda_trans_matrix_inverse (transform);
1708
1709   if (dump_file)
1710     {
1711       fprintf (dump_file, "Inverse of transformation matrix:\n");
1712       print_lambda_trans_matrix (dump_file, transform);
1713     }
1714   depth = depth_of_nest (old_loopnest);
1715   temp = old_loopnest;
1716
1717   while (temp)
1718     {
1719       lambda_loop newloop;
1720       basic_block bb;
1721       edge exit;
1722       tree ivvar, ivvarinced, exitcond, stmts;
1723       enum tree_code testtype;
1724       tree newupperbound, newlowerbound;
1725       lambda_linear_expression offset;
1726       tree type;
1727       bool insert_after;
1728       tree inc_stmt;
1729
1730       oldiv = VEC_index (tree, old_ivs, i);
1731       type = TREE_TYPE (oldiv);
1732
1733       /* First, build the new induction variable temporary  */
1734
1735       ivvar = create_tmp_var (type, "lnivtmp");
1736       add_referenced_var (ivvar);
1737
1738       VEC_safe_push (tree, heap, new_ivs, ivvar);
1739
1740       newloop = LN_LOOPS (new_loopnest)[i];
1741
1742       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1743          cases for now.  */
1744       offset = LL_LINEAR_OFFSET (newloop);
1745
1746       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1747                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1748
1749       /* Now build the  new lower bounds, and insert the statements
1750          necessary to generate it on the loop preheader.  */
1751       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1752                                              LL_LINEAR_OFFSET (newloop),
1753                                              type,
1754                                              new_ivs,
1755                                              invariants, MAX_EXPR, &stmts);
1756
1757       if (stmts)
1758         {
1759           bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1760           bsi_commit_edge_inserts ();
1761         }
1762       /* Build the new upper bound and insert its statements in the
1763          basic block of the exit condition */
1764       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1765                                              LL_LINEAR_OFFSET (newloop),
1766                                              type,
1767                                              new_ivs,
1768                                              invariants, MIN_EXPR, &stmts);
1769       exit = single_exit (temp);
1770       exitcond = get_loop_exit_condition (temp);
1771       bb = bb_for_stmt (exitcond);
1772       bsi = bsi_after_labels (bb);
1773       if (stmts)
1774         bsi_insert_before (&bsi, stmts, BSI_NEW_STMT);
1775
1776       /* Create the new iv.  */
1777
1778       standard_iv_increment_position (temp, &bsi, &insert_after);
1779       create_iv (newlowerbound,
1780                  build_int_cst (type, LL_STEP (newloop)),
1781                  ivvar, temp, &bsi, insert_after, &ivvar,
1782                  NULL);
1783
1784       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1785          dominate the block containing the exit condition.
1786          So we simply create our own incremented iv to use in the new exit
1787          test,  and let redundancy elimination sort it out.  */
1788       inc_stmt = build2 (PLUS_EXPR, type,
1789                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1790       inc_stmt = build_gimple_modify_stmt (SSA_NAME_VAR (ivvar), inc_stmt);
1791       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1792       GIMPLE_STMT_OPERAND (inc_stmt, 0) = ivvarinced;
1793       bsi = bsi_for_stmt (exitcond);
1794       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1795
1796       /* Replace the exit condition with the new upper bound
1797          comparison.  */
1798
1799       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1800
1801       /* We want to build a conditional where true means exit the loop, and
1802          false means continue the loop.
1803          So swap the testtype if this isn't the way things are.*/
1804
1805       if (exit->flags & EDGE_FALSE_VALUE)
1806         testtype = swap_tree_comparison (testtype);
1807
1808       COND_EXPR_COND (exitcond) = build2 (testtype,
1809                                           boolean_type_node,
1810                                           newupperbound, ivvarinced);
1811       update_stmt (exitcond);
1812       VEC_replace (tree, new_ivs, i, ivvar);
1813
1814       i++;
1815       temp = temp->inner;
1816     }
1817
1818   /* Rewrite uses of the old ivs so that they are now specified in terms of
1819      the new ivs.  */
1820
1821   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1822     {
1823       imm_use_iterator imm_iter;
1824       use_operand_p use_p;
1825       tree oldiv_def;
1826       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1827       tree stmt;
1828
1829       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1830         oldiv_def = PHI_RESULT (oldiv_stmt);
1831       else
1832         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1833       gcc_assert (oldiv_def != NULL_TREE);
1834
1835       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1836         {
1837           tree newiv, stmts;
1838           lambda_body_vector lbv, newlbv;
1839
1840           /* Compute the new expression for the induction
1841              variable.  */
1842           depth = VEC_length (tree, new_ivs);
1843           lbv = lambda_body_vector_new (depth, lambda_obstack);
1844           LBV_COEFFICIENTS (lbv)[i] = 1;
1845
1846           newlbv = lambda_body_vector_compute_new (transform, lbv,
1847                                                    lambda_obstack);
1848
1849           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1850                                          new_ivs, &stmts);
1851
1852           if (stmts && TREE_CODE (stmt) != PHI_NODE)
1853             {
1854               bsi = bsi_for_stmt (stmt);
1855               bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1856             }
1857
1858           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1859             propagate_value (use_p, newiv);
1860
1861           if (stmts && TREE_CODE (stmt) == PHI_NODE)
1862             for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
1863               if (PHI_ARG_DEF (stmt, j) == newiv)
1864                 bsi_insert_on_edge (PHI_ARG_EDGE (stmt, j), stmts);
1865
1866           update_stmt (stmt);
1867         }
1868
1869       /* Remove the now unused induction variable.  */
1870       VEC_safe_push (tree, heap, *remove_ivs, oldiv_stmt);
1871     }
1872   VEC_free (tree, heap, new_ivs);
1873 }
1874
1875 /* Return TRUE if this is not interesting statement from the perspective of
1876    determining if we have a perfect loop nest.  */
1877
1878 static bool
1879 not_interesting_stmt (tree stmt)
1880 {
1881   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1882      loop, we would have already failed the number of exits tests.  */
1883   if (TREE_CODE (stmt) == LABEL_EXPR
1884       || TREE_CODE (stmt) == GOTO_EXPR
1885       || TREE_CODE (stmt) == COND_EXPR)
1886     return true;
1887   return false;
1888 }
1889
1890 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1891
1892 static bool
1893 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1894 {
1895   int i;
1896   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1897     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1898       if (PHI_ARG_DEF (phi, i) == def)
1899         return true;
1900   return false;
1901 }
1902
1903 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1904
1905 static bool
1906 stmt_uses_phi_result (tree stmt, tree phi_result)
1907 {
1908   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1909
1910   /* This is conservatively true, because we only want SIMPLE bumpers
1911      of the form x +- constant for our pass.  */
1912   return (use == phi_result);
1913 }
1914
1915 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1916    in-loop-edge in a phi node, and the operand it uses is the result of that
1917    phi node.
1918    I.E. i_29 = i_3 + 1
1919         i_3 = PHI (0, i_29);  */
1920
1921 static bool
1922 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
1923 {
1924   tree use;
1925   tree def;
1926   imm_use_iterator iter;
1927   use_operand_p use_p;
1928
1929   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1930   if (!def)
1931     return false;
1932
1933   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1934     {
1935       use = USE_STMT (use_p);
1936       if (TREE_CODE (use) == PHI_NODE)
1937         {
1938           if (phi_loop_edge_uses_def (loop, use, def))
1939             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1940               return true;
1941         }
1942     }
1943   return false;
1944 }
1945
1946
1947 /* Return true if LOOP is a perfect loop nest.
1948    Perfect loop nests are those loop nests where all code occurs in the
1949    innermost loop body.
1950    If S is a program statement, then
1951
1952    i.e.
1953    DO I = 1, 20
1954        S1
1955        DO J = 1, 20
1956        ...
1957        END DO
1958    END DO
1959    is not a perfect loop nest because of S1.
1960
1961    DO I = 1, 20
1962       DO J = 1, 20
1963         S1
1964         ...
1965       END DO
1966    END DO
1967    is a perfect loop nest.
1968
1969    Since we don't have high level loops anymore, we basically have to walk our
1970    statements and ignore those that are there because the loop needs them (IE
1971    the induction variable increment, and jump back to the top of the loop).  */
1972
1973 bool
1974 perfect_nest_p (struct loop *loop)
1975 {
1976   basic_block *bbs;
1977   size_t i;
1978   tree exit_cond;
1979
1980   /* Loops at depth 0 are perfect nests.  */
1981   if (!loop->inner)
1982     return true;
1983
1984   bbs = get_loop_body (loop);
1985   exit_cond = get_loop_exit_condition (loop);
1986
1987   for (i = 0; i < loop->num_nodes; i++)
1988     {
1989       if (bbs[i]->loop_father == loop)
1990         {
1991           block_stmt_iterator bsi;
1992
1993           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1994             {
1995               tree stmt = bsi_stmt (bsi);
1996
1997               if (TREE_CODE (stmt) == COND_EXPR
1998                   && exit_cond != stmt)
1999                 goto non_perfectly_nested;
2000
2001               if (stmt == exit_cond
2002                   || not_interesting_stmt (stmt)
2003                   || stmt_is_bumper_for_loop (loop, stmt))
2004                 continue;
2005
2006             non_perfectly_nested:
2007               free (bbs);
2008               return false;
2009             }
2010         }
2011     }
2012
2013   free (bbs);
2014
2015   return perfect_nest_p (loop->inner);
2016 }
2017
2018 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2019    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2020    avoid creating duplicate temporaries and FIRSTBSI is statement
2021    iterator where new temporaries should be inserted at the beginning
2022    of body basic block.  */
2023
2024 static void
2025 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2026                                 int xstep, tree y, tree yinit,
2027                                 htab_t replacements,
2028                                 block_stmt_iterator *firstbsi)
2029 {
2030   ssa_op_iter iter;
2031   use_operand_p use_p;
2032
2033   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2034     {
2035       tree use = USE_FROM_PTR (use_p);
2036       tree step = NULL_TREE;
2037       tree scev, init, val, var, setstmt;
2038       struct tree_map *h, in;
2039       void **loc;
2040
2041       /* Replace uses of X with Y right away.  */
2042       if (use == x)
2043         {
2044           SET_USE (use_p, y);
2045           continue;
2046         }
2047
2048       scev = instantiate_parameters (loop,
2049                                      analyze_scalar_evolution (loop, use));
2050
2051       if (scev == NULL || scev == chrec_dont_know)
2052         continue;
2053
2054       step = evolution_part_in_loop_num (scev, loop->num);
2055       if (step == NULL
2056           || step == chrec_dont_know
2057           || TREE_CODE (step) != INTEGER_CST
2058           || int_cst_value (step) != xstep)
2059         continue;
2060
2061       /* Use REPLACEMENTS hash table to cache already created
2062          temporaries.  */
2063       in.hash = htab_hash_pointer (use);
2064       in.base.from = use;
2065       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2066       if (h != NULL)
2067         {
2068           SET_USE (use_p, h->to);
2069           continue;
2070         }
2071
2072       /* USE which has the same step as X should be replaced
2073          with a temporary set to Y + YINIT - INIT.  */
2074       init = initial_condition_in_loop_num (scev, loop->num);
2075       gcc_assert (init != NULL && init != chrec_dont_know);
2076       if (TREE_TYPE (use) == TREE_TYPE (y))
2077         {
2078           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2079           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2080           if (val == y)
2081             {
2082               /* If X has the same type as USE, the same step
2083                  and same initial value, it can be replaced by Y.  */
2084               SET_USE (use_p, y);
2085               continue;
2086             }
2087         }
2088       else
2089         {
2090           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2091           val = fold_convert (TREE_TYPE (use), val);
2092           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2093         }
2094
2095       /* Create a temporary variable and insert it at the beginning
2096          of the loop body basic block, right after the PHI node
2097          which sets Y.  */
2098       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2099       add_referenced_var (var);
2100       val = force_gimple_operand_bsi (firstbsi, val, false, NULL,
2101                                       true, BSI_SAME_STMT);
2102       setstmt = build_gimple_modify_stmt (var, val);
2103       var = make_ssa_name (var, setstmt);
2104       GIMPLE_STMT_OPERAND (setstmt, 0) = var;
2105       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2106       update_stmt (setstmt);
2107       SET_USE (use_p, var);
2108       h = GGC_NEW (struct tree_map);
2109       h->hash = in.hash;
2110       h->base.from = use;
2111       h->to = var;
2112       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2113       gcc_assert ((*(struct tree_map **)loc) == NULL);
2114       *(struct tree_map **) loc = h;
2115     }
2116 }
2117
2118 /* Return true if STMT is an exit PHI for LOOP */
2119
2120 static bool
2121 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2122 {
2123
2124   if (TREE_CODE (stmt) != PHI_NODE
2125       || PHI_NUM_ARGS (stmt) != 1
2126       || bb_for_stmt (stmt) != single_exit (loop)->dest)
2127     return false;
2128
2129   return true;
2130 }
2131
2132 /* Return true if STMT can be put back into the loop INNER, by
2133    copying it to the beginning of that loop and changing the uses.  */
2134
2135 static bool
2136 can_put_in_inner_loop (struct loop *inner, tree stmt)
2137 {
2138   imm_use_iterator imm_iter;
2139   use_operand_p use_p;
2140
2141   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2142   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2143       || !expr_invariant_in_loop_p (inner, GIMPLE_STMT_OPERAND (stmt, 1)))
2144     return false;
2145
2146   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2147     {
2148       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2149         {
2150           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2151
2152           if (!flow_bb_inside_loop_p (inner, immbb))
2153             return false;
2154         }
2155     }
2156   return true;
2157 }
2158
2159 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2160 static bool
2161 can_put_after_inner_loop (struct loop *loop, tree stmt)
2162 {
2163   imm_use_iterator imm_iter;
2164   use_operand_p use_p;
2165
2166   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2167     return false;
2168
2169   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2170     {
2171       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2172         {
2173           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2174
2175           if (!dominated_by_p (CDI_DOMINATORS,
2176                                immbb,
2177                                loop->inner->header)
2178               && !can_put_in_inner_loop (loop->inner, stmt))
2179             return false;
2180         }
2181     }
2182   return true;
2183 }
2184
2185 /* Return true when the induction variable IV is simple enough to be
2186    re-synthesized.  */
2187
2188 static bool
2189 can_duplicate_iv (tree iv, struct loop *loop)
2190 {
2191   tree scev = instantiate_parameters
2192     (loop, analyze_scalar_evolution (loop, iv));
2193
2194   if (!automatically_generated_chrec_p (scev))
2195     {
2196       tree step = evolution_part_in_loop_num (scev, loop->num);
2197
2198       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2199         return true;
2200     }
2201
2202   return false;
2203 }
2204
2205 /* If this is a scalar operation that can be put back into the inner
2206    loop, or after the inner loop, through copying, then do so. This
2207    works on the theory that any amount of scalar code we have to
2208    reduplicate into or after the loops is less expensive that the win
2209    we get from rearranging the memory walk the loop is doing so that
2210    it has better cache behavior.  */
2211
2212 static bool
2213 cannot_convert_modify_to_perfect_nest (tree stmt, struct loop *loop)
2214 {
2215
2216   use_operand_p use_a, use_b;
2217   imm_use_iterator imm_iter;
2218   ssa_op_iter op_iter, op_iter1;
2219   tree op0 = GIMPLE_STMT_OPERAND (stmt, 0);
2220
2221   /* The statement should not define a variable used in the inner
2222      loop.  */
2223   if (TREE_CODE (op0) == SSA_NAME
2224       && !can_duplicate_iv (op0, loop))
2225     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2226       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2227           == loop->inner)
2228         return true;
2229
2230   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2231     {
2232       tree node, op = USE_FROM_PTR (use_a);
2233
2234       /* The variables should not be used in both loops.  */
2235       if (!can_duplicate_iv (op, loop))
2236         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2237           if (bb_for_stmt (USE_STMT (use_b))->loop_father
2238               == loop->inner)
2239             return true;
2240
2241       /* The statement should not use the value of a scalar that was
2242          modified in the loop.  */
2243       node = SSA_NAME_DEF_STMT (op);
2244       if (TREE_CODE (node) == PHI_NODE)
2245         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2246         {
2247           tree arg = USE_FROM_PTR (use_b);
2248
2249           if (TREE_CODE (arg) == SSA_NAME)
2250             {
2251               tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2252
2253               if (bb_for_stmt (arg_stmt)
2254                   && (bb_for_stmt (arg_stmt)->loop_father
2255                       == loop->inner))
2256                 return true;
2257             }
2258         }
2259     }
2260
2261   return false;
2262 }
2263
2264 /* Return true when BB contains statements that can harm the transform
2265    to a perfect loop nest.  */
2266
2267 static bool
2268 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2269 {
2270   block_stmt_iterator bsi;
2271   tree exit_condition = get_loop_exit_condition (loop);
2272
2273   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
2274     {
2275       tree stmt = bsi_stmt (bsi);
2276
2277       if (stmt == exit_condition
2278           || not_interesting_stmt (stmt)
2279           || stmt_is_bumper_for_loop (loop, stmt))
2280         continue;
2281
2282       if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
2283         {
2284           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2285             return true;
2286
2287           if (can_duplicate_iv (GIMPLE_STMT_OPERAND (stmt, 0), loop))
2288             continue;
2289
2290           if (can_put_in_inner_loop (loop->inner, stmt)
2291               || can_put_after_inner_loop (loop, stmt))
2292             continue;
2293         }
2294
2295       /* If the bb of a statement we care about isn't dominated by the
2296          header of the inner loop, then we can't handle this case
2297          right now.  This test ensures that the statement comes
2298          completely *after* the inner loop.  */
2299       if (!dominated_by_p (CDI_DOMINATORS,
2300                            bb_for_stmt (stmt),
2301                            loop->inner->header))
2302         return true;
2303     }
2304
2305   return false;
2306 }
2307
2308 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2309    perfect one.  At the moment, we only handle imperfect nests of
2310    depth 2, where all of the statements occur after the inner loop.  */
2311
2312 static bool
2313 can_convert_to_perfect_nest (struct loop *loop)
2314 {
2315   basic_block *bbs;
2316   tree phi;
2317   size_t i;
2318
2319   /* Can't handle triply nested+ loops yet.  */
2320   if (!loop->inner || loop->inner->inner)
2321     return false;
2322
2323   bbs = get_loop_body (loop);
2324   for (i = 0; i < loop->num_nodes; i++)
2325     if (bbs[i]->loop_father == loop
2326         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2327       goto fail;
2328
2329   /* We also need to make sure the loop exit only has simple copy phis in it,
2330      otherwise we don't know how to transform it into a perfect nest.  */
2331   for (phi = phi_nodes (single_exit (loop)->dest); phi; phi = PHI_CHAIN (phi))
2332     if (PHI_NUM_ARGS (phi) != 1)
2333       goto fail;
2334
2335   free (bbs);
2336   return true;
2337
2338  fail:
2339   free (bbs);
2340   return false;
2341 }
2342
2343 /* Transform the loop nest into a perfect nest, if possible.
2344    LOOP is the loop nest to transform into a perfect nest
2345    LBOUNDS are the lower bounds for the loops to transform
2346    UBOUNDS are the upper bounds for the loops to transform
2347    STEPS is the STEPS for the loops to transform.
2348    LOOPIVS is the induction variables for the loops to transform.
2349
2350    Basically, for the case of
2351
2352    FOR (i = 0; i < 50; i++)
2353     {
2354      FOR (j =0; j < 50; j++)
2355      {
2356         <whatever>
2357      }
2358      <some code>
2359     }
2360
2361    This function will transform it into a perfect loop nest by splitting the
2362    outer loop into two loops, like so:
2363
2364    FOR (i = 0; i < 50; i++)
2365    {
2366      FOR (j = 0; j < 50; j++)
2367      {
2368          <whatever>
2369      }
2370    }
2371
2372    FOR (i = 0; i < 50; i ++)
2373    {
2374     <some code>
2375    }
2376
2377    Return FALSE if we can't make this loop into a perfect nest.  */
2378
2379 static bool
2380 perfect_nestify (struct loop *loop,
2381                  VEC(tree,heap) *lbounds,
2382                  VEC(tree,heap) *ubounds,
2383                  VEC(int,heap) *steps,
2384                  VEC(tree,heap) *loopivs)
2385 {
2386   basic_block *bbs;
2387   tree exit_condition;
2388   tree cond_stmt;
2389   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2390   int i;
2391   block_stmt_iterator bsi, firstbsi;
2392   bool insert_after;
2393   edge e;
2394   struct loop *newloop;
2395   tree phi;
2396   tree uboundvar;
2397   tree stmt;
2398   tree oldivvar, ivvar, ivvarinced;
2399   VEC(tree,heap) *phis = NULL;
2400   htab_t replacements = NULL;
2401
2402   /* Create the new loop.  */
2403   olddest = single_exit (loop)->dest;
2404   preheaderbb = split_edge (single_exit (loop));
2405   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2406
2407   /* Push the exit phi nodes that we are moving.  */
2408   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2409     {
2410       VEC_reserve (tree, heap, phis, 2);
2411       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2412       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2413     }
2414   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2415
2416   /* Remove the exit phis from the old basic block.  */
2417   while (phi_nodes (olddest) != NULL)
2418     remove_phi_node (phi_nodes (olddest), NULL, false);
2419
2420   /* and add them back to the new basic block.  */
2421   while (VEC_length (tree, phis) != 0)
2422     {
2423       tree def;
2424       tree phiname;
2425       def = VEC_pop (tree, phis);
2426       phiname = VEC_pop (tree, phis);
2427       phi = create_phi_node (phiname, preheaderbb);
2428       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2429     }
2430   flush_pending_stmts (e);
2431   VEC_free (tree, heap, phis);
2432
2433   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2434   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2435   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2436   cond_stmt = build3 (COND_EXPR, void_type_node,
2437                       build2 (NE_EXPR, boolean_type_node,
2438                               integer_one_node,
2439                               integer_zero_node),
2440                       NULL_TREE, NULL_TREE);
2441   bsi = bsi_start (bodybb);
2442   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2443   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2444   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2445   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2446
2447   /* Update the loop structures.  */
2448   newloop = duplicate_loop (loop, olddest->loop_father);
2449   newloop->header = headerbb;
2450   newloop->latch = latchbb;
2451   add_bb_to_loop (latchbb, newloop);
2452   add_bb_to_loop (bodybb, newloop);
2453   add_bb_to_loop (headerbb, newloop);
2454   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2455   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2456   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2457                            single_exit (loop)->src);
2458   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2459   set_immediate_dominator (CDI_DOMINATORS, olddest,
2460                            recompute_dominator (CDI_DOMINATORS, olddest));
2461   /* Create the new iv.  */
2462   oldivvar = VEC_index (tree, loopivs, 0);
2463   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2464   add_referenced_var (ivvar);
2465   standard_iv_increment_position (newloop, &bsi, &insert_after);
2466   create_iv (VEC_index (tree, lbounds, 0),
2467              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2468              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2469
2470   /* Create the new upper bound.  This may be not just a variable, so we copy
2471      it to one just in case.  */
2472
2473   exit_condition = get_loop_exit_condition (newloop);
2474   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2475   add_referenced_var (uboundvar);
2476   stmt = build_gimple_modify_stmt (uboundvar, VEC_index (tree, ubounds, 0));
2477   uboundvar = make_ssa_name (uboundvar, stmt);
2478   GIMPLE_STMT_OPERAND (stmt, 0) = uboundvar;
2479
2480   if (insert_after)
2481     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2482   else
2483     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2484   update_stmt (stmt);
2485   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2486                                             boolean_type_node,
2487                                             uboundvar,
2488                                             ivvarinced);
2489   update_stmt (exit_condition);
2490   replacements = htab_create_ggc (20, tree_map_hash,
2491                                   tree_map_eq, NULL);
2492   bbs = get_loop_body_in_dom_order (loop);
2493   /* Now move the statements, and replace the induction variable in the moved
2494      statements with the correct loop induction variable.  */
2495   oldivvar = VEC_index (tree, loopivs, 0);
2496   firstbsi = bsi_start (bodybb);
2497   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2498     {
2499       block_stmt_iterator tobsi = bsi_last (bodybb);
2500       if (bbs[i]->loop_father == loop)
2501         {
2502           /* If this is true, we are *before* the inner loop.
2503              If this isn't true, we are *after* it.
2504
2505              The only time can_convert_to_perfect_nest returns true when we
2506              have statements before the inner loop is if they can be moved
2507              into the inner loop.
2508
2509              The only time can_convert_to_perfect_nest returns true when we
2510              have statements after the inner loop is if they can be moved into
2511              the new split loop.  */
2512
2513           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2514             {
2515               block_stmt_iterator header_bsi
2516                 = bsi_after_labels (loop->inner->header);
2517
2518               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2519                 {
2520                   tree stmt = bsi_stmt (bsi);
2521
2522                   if (stmt == exit_condition
2523                       || not_interesting_stmt (stmt)
2524                       || stmt_is_bumper_for_loop (loop, stmt))
2525                     {
2526                       bsi_next (&bsi);
2527                       continue;
2528                     }
2529
2530                   bsi_move_before (&bsi, &header_bsi);
2531                 }
2532             }
2533           else
2534             {
2535               /* Note that the bsi only needs to be explicitly incremented
2536                  when we don't move something, since it is automatically
2537                  incremented when we do.  */
2538               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2539                 {
2540                   ssa_op_iter i;
2541                   tree n, stmt = bsi_stmt (bsi);
2542
2543                   if (stmt == exit_condition
2544                       || not_interesting_stmt (stmt)
2545                       || stmt_is_bumper_for_loop (loop, stmt))
2546                     {
2547                       bsi_next (&bsi);
2548                       continue;
2549                     }
2550
2551                   replace_uses_equiv_to_x_with_y
2552                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2553                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2554
2555                   bsi_move_before (&bsi, &tobsi);
2556
2557                   /* If the statement has any virtual operands, they may
2558                      need to be rewired because the original loop may
2559                      still reference them.  */
2560                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2561                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2562                 }
2563             }
2564
2565         }
2566     }
2567
2568   free (bbs);
2569   htab_delete (replacements);
2570   return perfect_nest_p (loop);
2571 }
2572
2573 /* Return true if TRANS is a legal transformation matrix that respects
2574    the dependence vectors in DISTS and DIRS.  The conservative answer
2575    is false.
2576
2577    "Wolfe proves that a unimodular transformation represented by the
2578    matrix T is legal when applied to a loop nest with a set of
2579    lexicographically non-negative distance vectors RDG if and only if
2580    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2581    i.e.: if and only if it transforms the lexicographically positive
2582    distance vectors to lexicographically positive vectors.  Note that
2583    a unimodular matrix must transform the zero vector (and only it) to
2584    the zero vector." S.Muchnick.  */
2585
2586 bool
2587 lambda_transform_legal_p (lambda_trans_matrix trans,
2588                           int nb_loops,
2589                           VEC (ddr_p, heap) *dependence_relations)
2590 {
2591   unsigned int i, j;
2592   lambda_vector distres;
2593   struct data_dependence_relation *ddr;
2594
2595   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2596               && LTM_ROWSIZE (trans) == nb_loops);
2597
2598   /* When there is an unknown relation in the dependence_relations, we
2599      know that it is no worth looking at this loop nest: give up.  */
2600   ddr = VEC_index (ddr_p, dependence_relations, 0);
2601   if (ddr == NULL)
2602     return true;
2603   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2604     return false;
2605
2606   distres = lambda_vector_new (nb_loops);
2607
2608   /* For each distance vector in the dependence graph.  */
2609   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2610     {
2611       /* Don't care about relations for which we know that there is no
2612          dependence, nor about read-read (aka. output-dependences):
2613          these data accesses can happen in any order.  */
2614       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2615           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2616         continue;
2617
2618       /* Conservatively answer: "this transformation is not valid".  */
2619       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2620         return false;
2621
2622       /* If the dependence could not be captured by a distance vector,
2623          conservatively answer that the transform is not valid.  */
2624       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2625         return false;
2626
2627       /* Compute trans.dist_vect */
2628       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2629         {
2630           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2631                                      DDR_DIST_VECT (ddr, j), distres);
2632
2633           if (!lambda_vector_lexico_pos (distres, nb_loops))
2634             return false;
2635         }
2636     }
2637   return true;
2638 }