gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "obstack.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 119                              VEC(tree,heap) *, VEC(int,heap) *,
 120                              VEC(tree,heap) *);
 121 /* Lattice stuff that is internal to the code generation algorithm.  */
 122
 123 typedef struct lambda_lattice_s
 124 {
 125   /* Lattice base matrix.  */
 126   lambda_matrix base;
 127   /* Lattice dimension.  */
 128   int dimension;
 129   /* Origin vector for the coefficients.  */
 130   lambda_vector origin;
 131   /* Origin matrix for the invariants.  */
 132   lambda_matrix origin_invariants;
 133   /* Number of invariants.  */
 134   int invariants;
 135 } *lambda_lattice;
 136
 137 #define LATTICE_BASE(T) ((T)->base)
 138 #define LATTICE_DIMENSION(T) ((T)->dimension)
 139 #define LATTICE_ORIGIN(T) ((T)->origin)
 140 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 141 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 142
 143 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 144                        int, int);
 145 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 146 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 147                                                    struct obstack *);
 148
 149 static tree find_induction_var_from_exit_cond (struct loop *);
 150 static bool can_convert_to_perfect_nest (struct loop *);
 151
 152 /* Create a new lambda body vector.  */
 153
 154 lambda_body_vector
 155 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 156 {
 157   lambda_body_vector ret;
 158
 159   ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
 160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 161   LBV_SIZE (ret) = size;
 162   LBV_DENOMINATOR (ret) = 1;
 163   return ret;
 164 }
 165
 166 /* Compute the new coefficients for the vector based on the
 167   *inverse* of the transformation matrix.  */
 168
 169 lambda_body_vector
 170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 171                                 lambda_body_vector vect,
 172                                 struct obstack * lambda_obstack)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth, lambda_obstack);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants,
 229                               struct obstack * lambda_obstack)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 234                                                  sizeof (*ret));
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants,
 332                      struct obstack * lambda_obstack)
 333 {
 334   lambda_loopnest ret;
 335   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 336
 337   LN_LOOPS (ret) = (lambda_loop *)
 338       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 339   LN_DEPTH (ret) = depth;
 340   LN_INVARIANTS (ret) = invariants;
 341
 342   return ret;
 343 }
 344
 345 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 346    character to use for loop names is given by START.  */
 347
 348 void
 349 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 350 {
 351   int i;
 352   for (i = 0; i < LN_DEPTH (nest); i++)
 353     {
 354       fprintf (outfile, "Loop %c\n", start + i);
 355       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 356                          LN_INVARIANTS (nest), 'i');
 357       fprintf (outfile, "\n");
 358     }
 359 }
 360
 361 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 362    of invariants.  */
 363
 364 static lambda_lattice
 365 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 366 {
 367   lambda_lattice ret
 368       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 369   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 370   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 371   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 372   LATTICE_DIMENSION (ret) = depth;
 373   LATTICE_INVARIANTS (ret) = invariants;
 374   return ret;
 375 }
 376
 377 /* Compute the lattice base for NEST.  The lattice base is essentially a
 378    non-singular transform from a dense base space to a sparse iteration space.
 379    We use it so that we don't have to specially handle the case of a sparse
 380    iteration space in other parts of the algorithm.  As a result, this routine
 381    only does something interesting (IE produce a matrix that isn't the
 382    identity matrix) if NEST is a sparse space.  */
 383
 384 static lambda_lattice
 385 lambda_lattice_compute_base (lambda_loopnest nest,
 386                              struct obstack * lambda_obstack)
 387 {
 388   lambda_lattice ret;
 389   int depth, invariants;
 390   lambda_matrix base;
 391
 392   int i, j, step;
 393   lambda_loop loop;
 394   lambda_linear_expression expression;
 395
 396   depth = LN_DEPTH (nest);
 397   invariants = LN_INVARIANTS (nest);
 398
 399   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 400   base = LATTICE_BASE (ret);
 401   for (i = 0; i < depth; i++)
 402     {
 403       loop = LN_LOOPS (nest)[i];
 404       gcc_assert (loop);
 405       step = LL_STEP (loop);
 406       /* If we have a step of 1, then the base is one, and the
 407          origin and invariant coefficients are 0.  */
 408       if (step == 1)
 409         {
 410           for (j = 0; j < depth; j++)
 411             base[i][j] = 0;
 412           base[i][i] = 1;
 413           LATTICE_ORIGIN (ret)[i] = 0;
 414           for (j = 0; j < invariants; j++)
 415             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 416         }
 417       else
 418         {
 419           /* Otherwise, we need the lower bound expression (which must
 420              be an affine function)  to determine the base.  */
 421           expression = LL_LOWER_BOUND (loop);
 422           gcc_assert (expression && !LLE_NEXT (expression)
 423                       && LLE_DENOMINATOR (expression) == 1);
 424
 425           /* The lower triangular portion of the base is going to be the
 426              coefficient times the step */
 427           for (j = 0; j < i; j++)
 428             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 429               * LL_STEP (LN_LOOPS (nest)[j]);
 430           base[i][i] = step;
 431           for (j = i + 1; j < depth; j++)
 432             base[i][j] = 0;
 433
 434           /* Origin for this loop is the constant of the lower bound
 435              expression.  */
 436           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 437
 438           /* Coefficient for the invariants are equal to the invariant
 439              coefficients in the expression.  */
 440           for (j = 0; j < invariants; j++)
 441             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 442               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 443         }
 444     }
 445   return ret;
 446 }
 447
 448 /* Compute the least common multiple of two numbers A and B .  */
 449
 450 int
 451 least_common_multiple (int a, int b)
 452 {
 453   return (abs (a) * abs (b) / gcd (a, b));
 454 }
 455
 456 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 457    auxiliary nest.
 458    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 459    it is easy to calculate the answer and bounds.
 460    A sketch of how it works:
 461    Given a system of linear inequalities, ai * xj >= bk, you can always
 462    rewrite the constraints so they are all of the form
 463    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 464    in b1 ... bk, and some a in a1...ai)
 465    You can then eliminate this x from the non-constant inequalities by
 466    rewriting these as a <= b, x >= constant, and delete the x variable.
 467    You can then repeat this for any remaining x variables, and then we have
 468    an easy to use variable <= constant (or no variables at all) form that we
 469    can construct our bounds from.
 470
 471    In our case, each time we eliminate, we construct part of the bound from
 472    the ith variable, then delete the ith variable.
 473
 474    Remember the constant are in our vector a, our coefficient matrix is A,
 475    and our invariant coefficient matrix is B.
 476
 477    SIZE is the size of the matrices being passed.
 478    DEPTH is the loop nest depth.
 479    INVARIANTS is the number of loop invariants.
 480    A, B, and a are the coefficient matrix, invariant coefficient, and a
 481    vector of constants, respectively.  */
 482
 483 static lambda_loopnest
 484 compute_nest_using_fourier_motzkin (int size,
 485                                     int depth,
 486                                     int invariants,
 487                                     lambda_matrix A,
 488                                     lambda_matrix B,
 489                                     lambda_vector a,
 490                                     struct obstack * lambda_obstack)
 491 {
 492
 493   int multiple, f1, f2;
 494   int i, j, k;
 495   lambda_linear_expression expression;
 496   lambda_loop loop;
 497   lambda_loopnest auxillary_nest;
 498   lambda_matrix swapmatrix, A1, B1;
 499   lambda_vector swapvector, a1;
 500   int newsize;
 501
 502   A1 = lambda_matrix_new (128, depth);
 503   B1 = lambda_matrix_new (128, invariants);
 504   a1 = lambda_vector_new (128);
 505
 506   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 507
 508   for (i = depth - 1; i >= 0; i--)
 509     {
 510       loop = lambda_loop_new ();
 511       LN_LOOPS (auxillary_nest)[i] = loop;
 512       LL_STEP (loop) = 1;
 513
 514       for (j = 0; j < size; j++)
 515         {
 516           if (A[j][i] < 0)
 517             {
 518               /* Any linear expression in the matrix with a coefficient less
 519                  than 0 becomes part of the new lower bound.  */
 520               expression = lambda_linear_expression_new (depth, invariants,
 521                                                          lambda_obstack);
 522
 523               for (k = 0; k < i; k++)
 524                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 525
 526               for (k = 0; k < invariants; k++)
 527                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 528
 529               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 530               LLE_CONSTANT (expression) = -1 * a[j];
 531
 532               /* Ignore if identical to the existing lower bound.  */
 533               if (!lle_equal (LL_LOWER_BOUND (loop),
 534                               expression, depth, invariants))
 535                 {
 536                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 537                   LL_LOWER_BOUND (loop) = expression;
 538                 }
 539
 540             }
 541           else if (A[j][i] > 0)
 542             {
 543               /* Any linear expression with a coefficient greater than 0
 544                  becomes part of the new upper bound.  */
 545               expression = lambda_linear_expression_new (depth, invariants,
 546                                                          lambda_obstack);
 547               for (k = 0; k < i; k++)
 548                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 549
 550               for (k = 0; k < invariants; k++)
 551                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 552
 553               LLE_DENOMINATOR (expression) = A[j][i];
 554               LLE_CONSTANT (expression) = a[j];
 555
 556               /* Ignore if identical to the existing upper bound.  */
 557               if (!lle_equal (LL_UPPER_BOUND (loop),
 558                               expression, depth, invariants))
 559                 {
 560                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 561                   LL_UPPER_BOUND (loop) = expression;
 562                 }
 563
 564             }
 565         }
 566
 567       /* This portion creates a new system of linear inequalities by deleting
 568          the i'th variable, reducing the system by one variable.  */
 569       newsize = 0;
 570       for (j = 0; j < size; j++)
 571         {
 572           /* If the coefficient for the i'th variable is 0, then we can just
 573              eliminate the variable straightaway.  Otherwise, we have to
 574              multiply through by the coefficients we are eliminating.  */
 575           if (A[j][i] == 0)
 576             {
 577               lambda_vector_copy (A[j], A1[newsize], depth);
 578               lambda_vector_copy (B[j], B1[newsize], invariants);
 579               a1[newsize] = a[j];
 580               newsize++;
 581             }
 582           else if (A[j][i] > 0)
 583             {
 584               for (k = 0; k < size; k++)
 585                 {
 586                   if (A[k][i] < 0)
 587                     {
 588                       multiple = least_common_multiple (A[j][i], A[k][i]);
 589                       f1 = multiple / A[j][i];
 590                       f2 = -1 * multiple / A[k][i];
 591
 592                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 593                                             A1[newsize], depth);
 594                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 595                                             B1[newsize], invariants);
 596                       a1[newsize] = f1 * a[j] + f2 * a[k];
 597                       newsize++;
 598                     }
 599                 }
 600             }
 601         }
 602
 603       swapmatrix = A;
 604       A = A1;
 605       A1 = swapmatrix;
 606
 607       swapmatrix = B;
 608       B = B1;
 609       B1 = swapmatrix;
 610
 611       swapvector = a;
 612       a = a1;
 613       a1 = swapvector;
 614
 615       size = newsize;
 616     }
 617
 618   return auxillary_nest;
 619 }
 620
 621 /* Compute the loop bounds for the auxiliary space NEST.
 622    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 623    Given the original nest, this function will
 624    1. Convert the nest into matrix form, which consists of a matrix for the
 625    coefficients, a matrix for the
 626    invariant coefficients, and a vector for the constants.
 627    2. Use the matrix form to calculate the lattice base for the nest (which is
 628    a dense space)
 629    3. Compose the dense space transform with the user specified transform, to
 630    get a transform we can easily calculate transformed bounds for.
 631    4. Multiply the composed transformation matrix times the matrix form of the
 632    loop.
 633    5. Transform the newly created matrix (from step 4) back into a loop nest
 634    using Fourier-Motzkin elimination to figure out the bounds.  */
 635
 636 static lambda_loopnest
 637 lambda_compute_auxillary_space (lambda_loopnest nest,
 638                                 lambda_trans_matrix trans,
 639                                 struct obstack * lambda_obstack)
 640 {
 641   lambda_matrix A, B, A1, B1;
 642   lambda_vector a, a1;
 643   lambda_matrix invertedtrans;
 644   int depth, invariants, size;
 645   int i, j;
 646   lambda_loop loop;
 647   lambda_linear_expression expression;
 648   lambda_lattice lattice;
 649
 650   depth = LN_DEPTH (nest);
 651   invariants = LN_INVARIANTS (nest);
 652
 653   /* Unfortunately, we can't know the number of constraints we'll have
 654      ahead of time, but this should be enough even in ridiculous loop nest
 655      cases. We must not go over this limit.  */
 656   A = lambda_matrix_new (128, depth);
 657   B = lambda_matrix_new (128, invariants);
 658   a = lambda_vector_new (128);
 659
 660   A1 = lambda_matrix_new (128, depth);
 661   B1 = lambda_matrix_new (128, invariants);
 662   a1 = lambda_vector_new (128);
 663
 664   /* Store the bounds in the equation matrix A, constant vector a, and
 665      invariant matrix B, so that we have Ax <= a + B.
 666      This requires a little equation rearranging so that everything is on the
 667      correct side of the inequality.  */
 668   size = 0;
 669   for (i = 0; i < depth; i++)
 670     {
 671       loop = LN_LOOPS (nest)[i];
 672
 673       /* First we do the lower bound.  */
 674       if (LL_STEP (loop) > 0)
 675         expression = LL_LOWER_BOUND (loop);
 676       else
 677         expression = LL_UPPER_BOUND (loop);
 678
 679       for (; expression != NULL; expression = LLE_NEXT (expression))
 680         {
 681           /* Fill in the coefficient.  */
 682           for (j = 0; j < i; j++)
 683             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 684
 685           /* And the invariant coefficient.  */
 686           for (j = 0; j < invariants; j++)
 687             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 688
 689           /* And the constant.  */
 690           a[size] = LLE_CONSTANT (expression);
 691
 692           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 693              constants and single variables on   */
 694           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 695           a[size] *= -1;
 696           for (j = 0; j < invariants; j++)
 697             B[size][j] *= -1;
 698
 699           size++;
 700           /* Need to increase matrix sizes above.  */
 701           gcc_assert (size <= 127);
 702
 703         }
 704
 705       /* Then do the exact same thing for the upper bounds.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_UPPER_BOUND (loop);
 708       else
 709         expression = LL_LOWER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 725           for (j = 0; j < i; j++)
 726             A[size][j] *= -1;
 727           A[size][i] = LLE_DENOMINATOR (expression);
 728           size++;
 729           /* Need to increase matrix sizes above.  */
 730           gcc_assert (size <= 127);
 731
 732         }
 733     }
 734
 735   /* Compute the lattice base x = base * y + origin, where y is the
 736      base space.  */
 737   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 738
 739   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 740
 741   /* A1 = A * L */
 742   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 743
 744   /* a1 = a - A * origin constant.  */
 745   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 746   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 747
 748   /* B1 = B - A * origin invariant.  */
 749   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 750                       invariants);
 751   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 752
 753   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 754      it by A1, then performing Fourier-Motzkin.  */
 755
 756   invertedtrans = lambda_matrix_new (depth, depth);
 757
 758   /* Compute the inverse of U.  */
 759   lambda_matrix_inverse (LTM_MATRIX (trans),
 760                          invertedtrans, depth);
 761
 762   /* A = A1 inv(U).  */
 763   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 764
 765   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 766                                              A, B1, a1, lambda_obstack);
 767 }
 768
 769 /* Compute the loop bounds for the target space, using the bounds of
 770    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 771    The target space loop bounds are computed by multiplying the triangular
 772    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 773    the loop steps (positive or negative) is then used to swap the bounds if
 774    the loop counts downwards.
 775    Return the target loopnest.  */
 776
 777 static lambda_loopnest
 778 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 779                              lambda_trans_matrix H, lambda_vector stepsigns,
 780                              struct obstack * lambda_obstack)
 781 {
 782   lambda_matrix inverse, H1;
 783   int determinant, i, j;
 784   int gcd1, gcd2;
 785   int factor;
 786
 787   lambda_loopnest target_nest;
 788   int depth, invariants;
 789   lambda_matrix target;
 790
 791   lambda_loop auxillary_loop, target_loop;
 792   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 793
 794   depth = LN_DEPTH (auxillary_nest);
 795   invariants = LN_INVARIANTS (auxillary_nest);
 796
 797   inverse = lambda_matrix_new (depth, depth);
 798   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 799
 800   /* H1 is H excluding its diagonal.  */
 801   H1 = lambda_matrix_new (depth, depth);
 802   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 803
 804   for (i = 0; i < depth; i++)
 805     H1[i][i] = 0;
 806
 807   /* Computes the linear offsets of the loop bounds.  */
 808   target = lambda_matrix_new (depth, depth);
 809   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 810
 811   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 812
 813   for (i = 0; i < depth; i++)
 814     {
 815
 816       /* Get a new loop structure.  */
 817       target_loop = lambda_loop_new ();
 818       LN_LOOPS (target_nest)[i] = target_loop;
 819
 820       /* Computes the gcd of the coefficients of the linear part.  */
 821       gcd1 = lambda_vector_gcd (target[i], i);
 822
 823       /* Include the denominator in the GCD.  */
 824       gcd1 = gcd (gcd1, determinant);
 825
 826       /* Now divide through by the gcd.  */
 827       for (j = 0; j < i; j++)
 828         target[i][j] = target[i][j] / gcd1;
 829
 830       expression = lambda_linear_expression_new (depth, invariants,
 831                                                  lambda_obstack);
 832       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 833       LLE_DENOMINATOR (expression) = determinant / gcd1;
 834       LLE_CONSTANT (expression) = 0;
 835       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 836                            invariants);
 837       LL_LINEAR_OFFSET (target_loop) = expression;
 838     }
 839
 840   /* For each loop, compute the new bounds from H.  */
 841   for (i = 0; i < depth; i++)
 842     {
 843       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 844       target_loop = LN_LOOPS (target_nest)[i];
 845       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 846       factor = LTM_MATRIX (H)[i][i];
 847
 848       /* First we do the lower bound.  */
 849       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 850
 851       for (; auxillary_expr != NULL;
 852            auxillary_expr = LLE_NEXT (auxillary_expr))
 853         {
 854           target_expr = lambda_linear_expression_new (depth, invariants,
 855                                                       lambda_obstack);
 856           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 857                                      depth, inverse, depth,
 858                                      LLE_COEFFICIENTS (target_expr));
 859           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 860                                     LLE_COEFFICIENTS (target_expr), depth,
 861                                     factor);
 862
 863           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 864           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 865                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 866                               invariants);
 867           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 868                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     invariants, factor);
 870           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 871
 872           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 873             {
 874               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 875                 * determinant;
 876               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 877                                         (target_expr),
 878                                         LLE_INVARIANT_COEFFICIENTS
 879                                         (target_expr), invariants,
 880                                         determinant);
 881               LLE_DENOMINATOR (target_expr) =
 882                 LLE_DENOMINATOR (target_expr) * determinant;
 883             }
 884           /* Find the gcd and divide by it here, rather than doing it
 885              at the tree level.  */
 886           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 887           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 888                                     invariants);
 889           gcd1 = gcd (gcd1, gcd2);
 890           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 891           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 892           for (j = 0; j < depth; j++)
 893             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 894           for (j = 0; j < invariants; j++)
 895             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 896           LLE_CONSTANT (target_expr) /= gcd1;
 897           LLE_DENOMINATOR (target_expr) /= gcd1;
 898           /* Ignore if identical to existing bound.  */
 899           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 900                           invariants))
 901             {
 902               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 903               LL_LOWER_BOUND (target_loop) = target_expr;
 904             }
 905         }
 906       /* Now do the upper bound.  */
 907       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 908
 909       for (; auxillary_expr != NULL;
 910            auxillary_expr = LLE_NEXT (auxillary_expr))
 911         {
 912           target_expr = lambda_linear_expression_new (depth, invariants,
 913                                                       lambda_obstack);
 914           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 915                                      depth, inverse, depth,
 916                                      LLE_COEFFICIENTS (target_expr));
 917           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 918                                     LLE_COEFFICIENTS (target_expr), depth,
 919                                     factor);
 920           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 921           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 922                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 923                               invariants);
 924           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     invariants, factor);
 927           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 928
 929           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 930             {
 931               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 932                 * determinant;
 933               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 934                                         (target_expr),
 935                                         LLE_INVARIANT_COEFFICIENTS
 936                                         (target_expr), invariants,
 937                                         determinant);
 938               LLE_DENOMINATOR (target_expr) =
 939                 LLE_DENOMINATOR (target_expr) * determinant;
 940             }
 941           /* Find the gcd and divide by it here, instead of at the
 942              tree level.  */
 943           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 944           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 945                                     invariants);
 946           gcd1 = gcd (gcd1, gcd2);
 947           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 948           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 949           for (j = 0; j < depth; j++)
 950             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 951           for (j = 0; j < invariants; j++)
 952             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 953           LLE_CONSTANT (target_expr) /= gcd1;
 954           LLE_DENOMINATOR (target_expr) /= gcd1;
 955           /* Ignore if equal to existing bound.  */
 956           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 957                           invariants))
 958             {
 959               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 960               LL_UPPER_BOUND (target_loop) = target_expr;
 961             }
 962         }
 963     }
 964   for (i = 0; i < depth; i++)
 965     {
 966       target_loop = LN_LOOPS (target_nest)[i];
 967       /* If necessary, exchange the upper and lower bounds and negate
 968          the step size.  */
 969       if (stepsigns[i] < 0)
 970         {
 971           LL_STEP (target_loop) *= -1;
 972           tmp_expr = LL_LOWER_BOUND (target_loop);
 973           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 974           LL_UPPER_BOUND (target_loop) = tmp_expr;
 975         }
 976     }
 977   return target_nest;
 978 }
 979
 980 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 981    result.  */
 982
 983 static lambda_vector
 984 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 985 {
 986   lambda_matrix matrix, H;
 987   int size;
 988   lambda_vector newsteps;
 989   int i, j, factor, minimum_column;
 990   int temp;
 991
 992   matrix = LTM_MATRIX (trans);
 993   size = LTM_ROWSIZE (trans);
 994   H = lambda_matrix_new (size, size);
 995
 996   newsteps = lambda_vector_new (size);
 997   lambda_vector_copy (stepsigns, newsteps, size);
 998
 999   lambda_matrix_copy (matrix, H, size, size);
1000
1001   for (j = 0; j < size; j++)
1002     {
1003       lambda_vector row;
1004       row = H[j];
1005       for (i = j; i < size; i++)
1006         if (row[i] < 0)
1007           lambda_matrix_col_negate (H, size, i);
1008       while (lambda_vector_first_nz (row, size, j + 1) < size)
1009         {
1010           minimum_column = lambda_vector_min_nz (row, size, j);
1011           lambda_matrix_col_exchange (H, size, j, minimum_column);
1012
1013           temp = newsteps[j];
1014           newsteps[j] = newsteps[minimum_column];
1015           newsteps[minimum_column] = temp;
1016
1017           for (i = j + 1; i < size; i++)
1018             {
1019               factor = row[i] / row[j];
1020               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1021             }
1022         }
1023     }
1024   return newsteps;
1025 }
1026
1027 /* Transform NEST according to TRANS, and return the new loopnest.
1028    This involves
1029    1. Computing a lattice base for the transformation
1030    2. Composing the dense base with the specified transformation (TRANS)
1031    3. Decomposing the combined transformation into a lower triangular portion,
1032    and a unimodular portion.
1033    4. Computing the auxiliary nest using the unimodular portion.
1034    5. Computing the target nest using the auxiliary nest and the lower
1035    triangular portion.  */
1036
1037 lambda_loopnest
1038 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1039                            struct obstack * lambda_obstack)
1040 {
1041   lambda_loopnest auxillary_nest, target_nest;
1042
1043   int depth, invariants;
1044   int i, j;
1045   lambda_lattice lattice;
1046   lambda_trans_matrix trans1, H, U;
1047   lambda_loop loop;
1048   lambda_linear_expression expression;
1049   lambda_vector origin;
1050   lambda_matrix origin_invariants;
1051   lambda_vector stepsigns;
1052   int f;
1053
1054   depth = LN_DEPTH (nest);
1055   invariants = LN_INVARIANTS (nest);
1056
1057   /* Keep track of the signs of the loop steps.  */
1058   stepsigns = lambda_vector_new (depth);
1059   for (i = 0; i < depth; i++)
1060     {
1061       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1062         stepsigns[i] = 1;
1063       else
1064         stepsigns[i] = -1;
1065     }
1066
1067   /* Compute the lattice base.  */
1068   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1069   trans1 = lambda_trans_matrix_new (depth, depth);
1070
1071   /* Multiply the transformation matrix by the lattice base.  */
1072
1073   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1074                       LTM_MATRIX (trans1), depth, depth, depth);
1075
1076   /* Compute the Hermite normal form for the new transformation matrix.  */
1077   H = lambda_trans_matrix_new (depth, depth);
1078   U = lambda_trans_matrix_new (depth, depth);
1079   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1080                          LTM_MATRIX (U));
1081
1082   /* Compute the auxiliary loop nest's space from the unimodular
1083      portion.  */
1084   auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1085
1086   /* Compute the loop step signs from the old step signs and the
1087      transformation matrix.  */
1088   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1089
1090   /* Compute the target loop nest space from the auxiliary nest and
1091      the lower triangular matrix H.  */
1092   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1093                                              lambda_obstack);
1094   origin = lambda_vector_new (depth);
1095   origin_invariants = lambda_matrix_new (depth, invariants);
1096   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1097                              LATTICE_ORIGIN (lattice), origin);
1098   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1099                       origin_invariants, depth, depth, invariants);
1100
1101   for (i = 0; i < depth; i++)
1102     {
1103       loop = LN_LOOPS (target_nest)[i];
1104       expression = LL_LINEAR_OFFSET (loop);
1105       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1106         f = 1;
1107       else
1108         f = LLE_DENOMINATOR (expression);
1109
1110       LLE_CONSTANT (expression) += f * origin[i];
1111
1112       for (j = 0; j < invariants; j++)
1113         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1114           f * origin_invariants[i][j];
1115     }
1116
1117   return target_nest;
1118
1119 }
1120
1121 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1122    return the new expression.  DEPTH is the depth of the loopnest.
1123    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1124    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1125    is the amount we have to add/subtract from the expression because of the
1126    type of comparison it is used in.  */
1127
1128 static lambda_linear_expression
1129 gcc_tree_to_linear_expression (int depth, tree expr,
1130                                VEC(tree,heap) *outerinductionvars,
1131                                VEC(tree,heap) *invariants, int extra,
1132                                struct obstack * lambda_obstack)
1133 {
1134   lambda_linear_expression lle = NULL;
1135   switch (TREE_CODE (expr))
1136     {
1137     case INTEGER_CST:
1138       {
1139         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1140         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1141         if (extra != 0)
1142           LLE_CONSTANT (lle) += extra;
1143
1144         LLE_DENOMINATOR (lle) = 1;
1145       }
1146       break;
1147     case SSA_NAME:
1148       {
1149         tree iv, invar;
1150         size_t i;
1151         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1152           if (iv != NULL)
1153             {
1154               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1155                 {
1156                   lle = lambda_linear_expression_new (depth, 2 * depth,
1157                                                       lambda_obstack);
1158                   LLE_COEFFICIENTS (lle)[i] = 1;
1159                   if (extra != 0)
1160                     LLE_CONSTANT (lle) = extra;
1161
1162                   LLE_DENOMINATOR (lle) = 1;
1163                 }
1164             }
1165         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1166           if (invar != NULL)
1167             {
1168               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1169                 {
1170                   lle = lambda_linear_expression_new (depth, 2 * depth,
1171                                                       lambda_obstack);
1172                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1173                   if (extra != 0)
1174                     LLE_CONSTANT (lle) = extra;
1175                   LLE_DENOMINATOR (lle) = 1;
1176                 }
1177             }
1178       }
1179       break;
1180     default:
1181       return NULL;
1182     }
1183
1184   return lle;
1185 }
1186
1187 /* Return the depth of the loopnest NEST */
1188
1189 static int
1190 depth_of_nest (struct loop *nest)
1191 {
1192   size_t depth = 0;
1193   while (nest)
1194     {
1195       depth++;
1196       nest = nest->inner;
1197     }
1198   return depth;
1199 }
1200
1201
1202 /* Return true if OP is invariant in LOOP and all outer loops.  */
1203
1204 static bool
1205 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1206 {
1207   if (is_gimple_min_invariant (op))
1208     return true;
1209   if (loop_depth (loop) == 0)
1210     return true;
1211   if (!expr_invariant_in_loop_p (loop, op))
1212     return false;
1213   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1214     return false;
1215   return true;
1216 }
1217
1218 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1219    or NULL if it could not be converted.
1220    DEPTH is the depth of the loop.
1221    INVARIANTS is a pointer to the array of loop invariants.
1222    The induction variable for this loop should be stored in the parameter
1223    OURINDUCTIONVAR.
1224    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1225
1226 static lambda_loop
1227 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1228                          VEC(tree,heap) ** invariants,
1229                          tree * ourinductionvar,
1230                          VEC(tree,heap) * outerinductionvars,
1231                          VEC(tree,heap) ** lboundvars,
1232                          VEC(tree,heap) ** uboundvars,
1233                          VEC(int,heap) ** steps,
1234                          struct obstack * lambda_obstack)
1235 {
1236   tree phi;
1237   tree exit_cond;
1238   tree access_fn, inductionvar;
1239   tree step;
1240   lambda_loop lloop = NULL;
1241   lambda_linear_expression lbound, ubound;
1242   tree test;
1243   int stepint;
1244   int extra = 0;
1245   tree lboundvar, uboundvar, uboundresult;
1246
1247   /* Find out induction var and exit condition.  */
1248   inductionvar = find_induction_var_from_exit_cond (loop);
1249   exit_cond = get_loop_exit_condition (loop);
1250
1251   if (inductionvar == NULL || exit_cond == NULL)
1252     {
1253       if (dump_file && (dump_flags & TDF_DETAILS))
1254         fprintf (dump_file,
1255                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1256       return NULL;
1257     }
1258
1259   test = TREE_OPERAND (exit_cond, 0);
1260
1261   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1262     {
1263
1264       if (dump_file && (dump_flags & TDF_DETAILS))
1265         fprintf (dump_file,
1266                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1267
1268       return NULL;
1269     }
1270
1271   phi = SSA_NAME_DEF_STMT (inductionvar);
1272   if (TREE_CODE (phi) != PHI_NODE)
1273     {
1274       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1275       if (!phi)
1276         {
1277
1278           if (dump_file && (dump_flags & TDF_DETAILS))
1279             fprintf (dump_file,
1280                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1281
1282           return NULL;
1283         }
1284
1285       phi = SSA_NAME_DEF_STMT (phi);
1286       if (TREE_CODE (phi) != PHI_NODE)
1287         {
1288
1289           if (dump_file && (dump_flags & TDF_DETAILS))
1290             fprintf (dump_file,
1291                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1292           return NULL;
1293         }
1294
1295     }
1296
1297   /* The induction variable name/version we want to put in the array is the
1298      result of the induction variable phi node.  */
1299   *ourinductionvar = PHI_RESULT (phi);
1300   access_fn = instantiate_parameters
1301     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1302   if (access_fn == chrec_dont_know)
1303     {
1304       if (dump_file && (dump_flags & TDF_DETAILS))
1305         fprintf (dump_file,
1306                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1307
1308       return NULL;
1309     }
1310
1311   step = evolution_part_in_loop_num (access_fn, loop->num);
1312   if (!step || step == chrec_dont_know)
1313     {
1314       if (dump_file && (dump_flags & TDF_DETAILS))
1315         fprintf (dump_file,
1316                  "Unable to convert loop: Cannot determine step of loop.\n");
1317
1318       return NULL;
1319     }
1320   if (TREE_CODE (step) != INTEGER_CST)
1321     {
1322
1323       if (dump_file && (dump_flags & TDF_DETAILS))
1324         fprintf (dump_file,
1325                  "Unable to convert loop: Step of loop is not integer.\n");
1326       return NULL;
1327     }
1328
1329   stepint = TREE_INT_CST_LOW (step);
1330
1331   /* Only want phis for induction vars, which will have two
1332      arguments.  */
1333   if (PHI_NUM_ARGS (phi) != 2)
1334     {
1335       if (dump_file && (dump_flags & TDF_DETAILS))
1336         fprintf (dump_file,
1337                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1338       return NULL;
1339     }
1340
1341   /* Another induction variable check. One argument's source should be
1342      in the loop, one outside the loop.  */
1343   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1344       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1345     {
1346
1347       if (dump_file && (dump_flags & TDF_DETAILS))
1348         fprintf (dump_file,
1349                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1350
1351       return NULL;
1352     }
1353
1354   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1355     {
1356       lboundvar = PHI_ARG_DEF (phi, 1);
1357       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1358                                               outerinductionvars, *invariants,
1359                                               0, lambda_obstack);
1360     }
1361   else
1362     {
1363       lboundvar = PHI_ARG_DEF (phi, 0);
1364       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1365                                               outerinductionvars, *invariants,
1366                                               0, lambda_obstack);
1367     }
1368
1369   if (!lbound)
1370     {
1371
1372       if (dump_file && (dump_flags & TDF_DETAILS))
1373         fprintf (dump_file,
1374                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1375
1376       return NULL;
1377     }
1378   /* One part of the test may be a loop invariant tree.  */
1379   VEC_reserve (tree, heap, *invariants, 1);
1380   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1381       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1382     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1383   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1384            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1385     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1386
1387   /* The non-induction variable part of the test is the upper bound variable.
1388    */
1389   if (TREE_OPERAND (test, 0) == inductionvar)
1390     uboundvar = TREE_OPERAND (test, 1);
1391   else
1392     uboundvar = TREE_OPERAND (test, 0);
1393
1394
1395   /* We only size the vectors assuming we have, at max, 2 times as many
1396      invariants as we do loops (one for each bound).
1397      This is just an arbitrary number, but it has to be matched against the
1398      code below.  */
1399   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1400
1401
1402   /* We might have some leftover.  */
1403   if (TREE_CODE (test) == LT_EXPR)
1404     extra = -1 * stepint;
1405   else if (TREE_CODE (test) == NE_EXPR)
1406     extra = -1 * stepint;
1407   else if (TREE_CODE (test) == GT_EXPR)
1408     extra = -1 * stepint;
1409   else if (TREE_CODE (test) == EQ_EXPR)
1410     extra = 1 * stepint;
1411
1412   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1413                                           outerinductionvars,
1414                                           *invariants, extra, lambda_obstack);
1415   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1416                          build_int_cst (TREE_TYPE (uboundvar), extra));
1417   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1418   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1419   VEC_safe_push (int, heap, *steps, stepint);
1420   if (!ubound)
1421     {
1422       if (dump_file && (dump_flags & TDF_DETAILS))
1423         fprintf (dump_file,
1424                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1425       return NULL;
1426     }
1427
1428   lloop = lambda_loop_new ();
1429   LL_STEP (lloop) = stepint;
1430   LL_LOWER_BOUND (lloop) = lbound;
1431   LL_UPPER_BOUND (lloop) = ubound;
1432   return lloop;
1433 }
1434
1435 /* Given a LOOP, find the induction variable it is testing against in the exit
1436    condition.  Return the induction variable if found, NULL otherwise.  */
1437
1438 static tree
1439 find_induction_var_from_exit_cond (struct loop *loop)
1440 {
1441   tree expr = get_loop_exit_condition (loop);
1442   tree ivarop;
1443   tree test;
1444   if (expr == NULL_TREE)
1445     return NULL_TREE;
1446   if (TREE_CODE (expr) != COND_EXPR)
1447     return NULL_TREE;
1448   test = TREE_OPERAND (expr, 0);
1449   if (!COMPARISON_CLASS_P (test))
1450     return NULL_TREE;
1451
1452   /* Find the side that is invariant in this loop. The ivar must be the other
1453      side.  */
1454
1455   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1456       ivarop = TREE_OPERAND (test, 1);
1457   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1458       ivarop = TREE_OPERAND (test, 0);
1459   else
1460     return NULL_TREE;
1461
1462   if (TREE_CODE (ivarop) != SSA_NAME)
1463     return NULL_TREE;
1464   return ivarop;
1465 }
1466
1467 DEF_VEC_P(lambda_loop);
1468 DEF_VEC_ALLOC_P(lambda_loop,heap);
1469
1470 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1471    Return the new loop nest.
1472    INDUCTIONVARS is a pointer to an array of induction variables for the
1473    loopnest that will be filled in during this process.
1474    INVARIANTS is a pointer to an array of invariants that will be filled in
1475    during this process.  */
1476
1477 lambda_loopnest
1478 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1479                                  VEC(tree,heap) **inductionvars,
1480                                  VEC(tree,heap) **invariants,
1481                                  struct obstack * lambda_obstack)
1482 {
1483   lambda_loopnest ret = NULL;
1484   struct loop *temp = loop_nest;
1485   int depth = depth_of_nest (loop_nest);
1486   size_t i;
1487   VEC(lambda_loop,heap) *loops = NULL;
1488   VEC(tree,heap) *uboundvars = NULL;
1489   VEC(tree,heap) *lboundvars  = NULL;
1490   VEC(int,heap) *steps = NULL;
1491   lambda_loop newloop;
1492   tree inductionvar = NULL;
1493   bool perfect_nest = perfect_nest_p (loop_nest);
1494
1495   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1496     goto fail;
1497
1498   while (temp)
1499     {
1500       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1501                                          &inductionvar, *inductionvars,
1502                                          &lboundvars, &uboundvars,
1503                                          &steps, lambda_obstack);
1504       if (!newloop)
1505         goto fail;
1506
1507       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1508       VEC_safe_push (lambda_loop, heap, loops, newloop);
1509       temp = temp->inner;
1510     }
1511
1512   if (!perfect_nest)
1513     {
1514       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1515                             *inductionvars))
1516         {
1517           if (dump_file)
1518             fprintf (dump_file,
1519                      "Not a perfect loop nest and couldn't convert to one.\n");
1520           goto fail;
1521         }
1522       else if (dump_file)
1523         fprintf (dump_file,
1524                  "Successfully converted loop nest to perfect loop nest.\n");
1525     }
1526
1527   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1528
1529   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1530     LN_LOOPS (ret)[i] = newloop;
1531
1532  fail:
1533   VEC_free (lambda_loop, heap, loops);
1534   VEC_free (tree, heap, uboundvars);
1535   VEC_free (tree, heap, lboundvars);
1536   VEC_free (int, heap, steps);
1537
1538   return ret;
1539 }
1540
1541 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1542    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1543    inserted for us are stored.  INDUCTION_VARS is the array of induction
1544    variables for the loop this LBV is from.  TYPE is the tree type to use for
1545    the variables and trees involved.  */
1546
1547 static tree
1548 lbv_to_gcc_expression (lambda_body_vector lbv,
1549                        tree type, VEC(tree,heap) *induction_vars,
1550                        tree *stmts_to_insert)
1551 {
1552   int k;
1553   tree resvar;
1554   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1555
1556   k = LBV_DENOMINATOR (lbv);
1557   gcc_assert (k != 0);
1558   if (k != 1)
1559     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1560
1561   resvar = create_tmp_var (type, "lbvtmp");
1562   add_referenced_var (resvar);
1563   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1564 }
1565
1566 /* Convert a linear expression from coefficient and constant form to a
1567    gcc tree.
1568    Return the tree that represents the final value of the expression.
1569    LLE is the linear expression to convert.
1570    OFFSET is the linear offset to apply to the expression.
1571    TYPE is the tree type to use for the variables and math.
1572    INDUCTION_VARS is a vector of induction variables for the loops.
1573    INVARIANTS is a vector of the loop nest invariants.
1574    WRAP specifies what tree code to wrap the results in, if there is more than
1575    one (it is either MAX_EXPR, or MIN_EXPR).
1576    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1577    statements that need to be inserted for the linear expression.  */
1578
1579 static tree
1580 lle_to_gcc_expression (lambda_linear_expression lle,
1581                        lambda_linear_expression offset,
1582                        tree type,
1583                        VEC(tree,heap) *induction_vars,
1584                        VEC(tree,heap) *invariants,
1585                        enum tree_code wrap, tree *stmts_to_insert)
1586 {
1587   int k;
1588   tree resvar;
1589   tree expr = NULL_TREE;
1590   VEC(tree,heap) *results = NULL;
1591
1592   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1593
1594   /* Build up the linear expressions.  */
1595   for (; lle != NULL; lle = LLE_NEXT (lle))
1596     {
1597       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1598       expr = fold_build2 (PLUS_EXPR, type, expr,
1599                           build_linear_expr (type,
1600                                              LLE_INVARIANT_COEFFICIENTS (lle),
1601                                              invariants));
1602
1603       k = LLE_CONSTANT (lle);
1604       if (k)
1605         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1606
1607       k = LLE_CONSTANT (offset);
1608       if (k)
1609         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1610
1611       k = LLE_DENOMINATOR (lle);
1612       if (k != 1)
1613         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1614                             type, expr, build_int_cst (type, k));
1615
1616       expr = fold (expr);
1617       VEC_safe_push (tree, heap, results, expr);
1618     }
1619
1620   gcc_assert (expr);
1621
1622   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1623   if (VEC_length (tree, results) > 1)
1624     {
1625       size_t i;
1626       tree op;
1627
1628       expr = VEC_index (tree, results, 0);
1629       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1630         expr = fold_build2 (wrap, type, expr, op);
1631     }
1632
1633   VEC_free (tree, heap, results);
1634
1635   resvar = create_tmp_var (type, "lletmp");
1636   add_referenced_var (resvar);
1637   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1638 }
1639
1640 /* Remove the induction variable defined at IV_STMT.  */
1641
1642 void
1643 remove_iv (tree iv_stmt)
1644 {
1645   if (TREE_CODE (iv_stmt) == PHI_NODE)
1646     {
1647       int i;
1648
1649       for (i = 0; i < PHI_NUM_ARGS (iv_stmt); i++)
1650         {
1651           tree stmt;
1652           imm_use_iterator imm_iter;
1653           tree arg = PHI_ARG_DEF (iv_stmt, i);
1654           bool used = false;
1655
1656           if (TREE_CODE (arg) != SSA_NAME)
1657             continue;
1658
1659           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1660             if (stmt != iv_stmt)
1661               used = true;
1662
1663           if (!used)
1664             remove_iv (SSA_NAME_DEF_STMT (arg));
1665         }
1666
1667       remove_phi_node (iv_stmt, NULL_TREE, true);
1668     }
1669   else
1670     {
1671       block_stmt_iterator bsi = bsi_for_stmt (iv_stmt);
1672
1673       bsi_remove (&bsi, true);
1674       release_defs (iv_stmt);
1675     }
1676 }
1677
1678
1679 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1680    it, back into gcc code.  This changes the
1681    loops, their induction variables, and their bodies, so that they
1682    match the transformed loopnest.
1683    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1684    loopnest.
1685    OLD_IVS is a vector of induction variables from the old loopnest.
1686    INVARIANTS is a vector of loop invariants from the old loopnest.
1687    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1688    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1689    NEW_LOOPNEST.  */
1690
1691 void
1692 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1693                                  VEC(tree,heap) *old_ivs,
1694                                  VEC(tree,heap) *invariants,
1695                                  VEC(tree,heap) **remove_ivs,
1696                                  lambda_loopnest new_loopnest,
1697                                  lambda_trans_matrix transform,
1698                                  struct obstack * lambda_obstack)
1699 {
1700   struct loop *temp;
1701   size_t i = 0;
1702   size_t depth = 0;
1703   VEC(tree,heap) *new_ivs = NULL;
1704   tree oldiv;
1705
1706   block_stmt_iterator bsi;
1707
1708   if (dump_file)
1709     {
1710       transform = lambda_trans_matrix_inverse (transform);
1711       fprintf (dump_file, "Inverse of transformation matrix:\n");
1712       print_lambda_trans_matrix (dump_file, transform);
1713     }
1714   depth = depth_of_nest (old_loopnest);
1715   temp = old_loopnest;
1716
1717   while (temp)
1718     {
1719       lambda_loop newloop;
1720       basic_block bb;
1721       edge exit;
1722       tree ivvar, ivvarinced, exitcond, stmts;
1723       enum tree_code testtype;
1724       tree newupperbound, newlowerbound;
1725       lambda_linear_expression offset;
1726       tree type;
1727       bool insert_after;
1728       tree inc_stmt;
1729
1730       oldiv = VEC_index (tree, old_ivs, i);
1731       type = TREE_TYPE (oldiv);
1732
1733       /* First, build the new induction variable temporary  */
1734
1735       ivvar = create_tmp_var (type, "lnivtmp");
1736       add_referenced_var (ivvar);
1737
1738       VEC_safe_push (tree, heap, new_ivs, ivvar);
1739
1740       newloop = LN_LOOPS (new_loopnest)[i];
1741
1742       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1743          cases for now.  */
1744       offset = LL_LINEAR_OFFSET (newloop);
1745
1746       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1747                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1748
1749       /* Now build the  new lower bounds, and insert the statements
1750          necessary to generate it on the loop preheader.  */
1751       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1752                                              LL_LINEAR_OFFSET (newloop),
1753                                              type,
1754                                              new_ivs,
1755                                              invariants, MAX_EXPR, &stmts);
1756
1757       if (stmts)
1758         {
1759           bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1760           bsi_commit_edge_inserts ();
1761         }
1762       /* Build the new upper bound and insert its statements in the
1763          basic block of the exit condition */
1764       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1765                                              LL_LINEAR_OFFSET (newloop),
1766                                              type,
1767                                              new_ivs,
1768                                              invariants, MIN_EXPR, &stmts);
1769       exit = single_exit (temp);
1770       exitcond = get_loop_exit_condition (temp);
1771       bb = bb_for_stmt (exitcond);
1772       bsi = bsi_after_labels (bb);
1773       if (stmts)
1774         bsi_insert_before (&bsi, stmts, BSI_NEW_STMT);
1775
1776       /* Create the new iv.  */
1777
1778       standard_iv_increment_position (temp, &bsi, &insert_after);
1779       create_iv (newlowerbound,
1780                  build_int_cst (type, LL_STEP (newloop)),
1781                  ivvar, temp, &bsi, insert_after, &ivvar,
1782                  NULL);
1783
1784       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1785          dominate the block containing the exit condition.
1786          So we simply create our own incremented iv to use in the new exit
1787          test,  and let redundancy elimination sort it out.  */
1788       inc_stmt = build2 (PLUS_EXPR, type,
1789                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1790       inc_stmt = build_gimple_modify_stmt (SSA_NAME_VAR (ivvar), inc_stmt);
1791       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1792       GIMPLE_STMT_OPERAND (inc_stmt, 0) = ivvarinced;
1793       bsi = bsi_for_stmt (exitcond);
1794       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1795
1796       /* Replace the exit condition with the new upper bound
1797          comparison.  */
1798
1799       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1800
1801       /* We want to build a conditional where true means exit the loop, and
1802          false means continue the loop.
1803          So swap the testtype if this isn't the way things are.*/
1804
1805       if (exit->flags & EDGE_FALSE_VALUE)
1806         testtype = swap_tree_comparison (testtype);
1807
1808       COND_EXPR_COND (exitcond) = build2 (testtype,
1809                                           boolean_type_node,
1810                                           newupperbound, ivvarinced);
1811       update_stmt (exitcond);
1812       VEC_replace (tree, new_ivs, i, ivvar);
1813
1814       i++;
1815       temp = temp->inner;
1816     }
1817
1818   /* Rewrite uses of the old ivs so that they are now specified in terms of
1819      the new ivs.  */
1820
1821   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1822     {
1823       imm_use_iterator imm_iter;
1824       use_operand_p use_p;
1825       tree oldiv_def;
1826       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1827       tree stmt;
1828
1829       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1830         oldiv_def = PHI_RESULT (oldiv_stmt);
1831       else
1832         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1833       gcc_assert (oldiv_def != NULL_TREE);
1834
1835       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1836         {
1837           tree newiv, stmts;
1838           lambda_body_vector lbv, newlbv;
1839
1840           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1841
1842           /* Compute the new expression for the induction
1843              variable.  */
1844           depth = VEC_length (tree, new_ivs);
1845           lbv = lambda_body_vector_new (depth, lambda_obstack);
1846           LBV_COEFFICIENTS (lbv)[i] = 1;
1847
1848           newlbv = lambda_body_vector_compute_new (transform, lbv,
1849                                                    lambda_obstack);
1850
1851           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1852                                          new_ivs, &stmts);
1853           if (stmts)
1854             {
1855               bsi = bsi_for_stmt (stmt);
1856               bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1857             }
1858
1859           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1860             propagate_value (use_p, newiv);
1861           update_stmt (stmt);
1862         }
1863
1864       /* Remove the now unused induction variable.  */
1865       VEC_safe_push (tree, heap, *remove_ivs, oldiv_stmt);
1866     }
1867   VEC_free (tree, heap, new_ivs);
1868 }
1869
1870 /* Return TRUE if this is not interesting statement from the perspective of
1871    determining if we have a perfect loop nest.  */
1872
1873 static bool
1874 not_interesting_stmt (tree stmt)
1875 {
1876   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1877      loop, we would have already failed the number of exits tests.  */
1878   if (TREE_CODE (stmt) == LABEL_EXPR
1879       || TREE_CODE (stmt) == GOTO_EXPR
1880       || TREE_CODE (stmt) == COND_EXPR)
1881     return true;
1882   return false;
1883 }
1884
1885 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1886
1887 static bool
1888 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1889 {
1890   int i;
1891   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1892     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1893       if (PHI_ARG_DEF (phi, i) == def)
1894         return true;
1895   return false;
1896 }
1897
1898 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1899
1900 static bool
1901 stmt_uses_phi_result (tree stmt, tree phi_result)
1902 {
1903   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1904
1905   /* This is conservatively true, because we only want SIMPLE bumpers
1906      of the form x +- constant for our pass.  */
1907   return (use == phi_result);
1908 }
1909
1910 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1911    in-loop-edge in a phi node, and the operand it uses is the result of that
1912    phi node.
1913    I.E. i_29 = i_3 + 1
1914         i_3 = PHI (0, i_29);  */
1915
1916 static bool
1917 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
1918 {
1919   tree use;
1920   tree def;
1921   imm_use_iterator iter;
1922   use_operand_p use_p;
1923
1924   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1925   if (!def)
1926     return false;
1927
1928   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1929     {
1930       use = USE_STMT (use_p);
1931       if (TREE_CODE (use) == PHI_NODE)
1932         {
1933           if (phi_loop_edge_uses_def (loop, use, def))
1934             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1935               return true;
1936         }
1937     }
1938   return false;
1939 }
1940
1941
1942 /* Return true if LOOP is a perfect loop nest.
1943    Perfect loop nests are those loop nests where all code occurs in the
1944    innermost loop body.
1945    If S is a program statement, then
1946
1947    i.e.
1948    DO I = 1, 20
1949        S1
1950        DO J = 1, 20
1951        ...
1952        END DO
1953    END DO
1954    is not a perfect loop nest because of S1.
1955
1956    DO I = 1, 20
1957       DO J = 1, 20
1958         S1
1959         ...
1960       END DO
1961    END DO
1962    is a perfect loop nest.
1963
1964    Since we don't have high level loops anymore, we basically have to walk our
1965    statements and ignore those that are there because the loop needs them (IE
1966    the induction variable increment, and jump back to the top of the loop).  */
1967
1968 bool
1969 perfect_nest_p (struct loop *loop)
1970 {
1971   basic_block *bbs;
1972   size_t i;
1973   tree exit_cond;
1974
1975   /* Loops at depth 0 are perfect nests.  */
1976   if (!loop->inner)
1977     return true;
1978
1979   bbs = get_loop_body (loop);
1980   exit_cond = get_loop_exit_condition (loop);
1981
1982   for (i = 0; i < loop->num_nodes; i++)
1983     {
1984       if (bbs[i]->loop_father == loop)
1985         {
1986           block_stmt_iterator bsi;
1987
1988           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1989             {
1990               tree stmt = bsi_stmt (bsi);
1991
1992               if (TREE_CODE (stmt) == COND_EXPR
1993                   && exit_cond != stmt)
1994                 goto non_perfectly_nested;
1995
1996               if (stmt == exit_cond
1997                   || not_interesting_stmt (stmt)
1998                   || stmt_is_bumper_for_loop (loop, stmt))
1999                 continue;
2000
2001             non_perfectly_nested:
2002               free (bbs);
2003               return false;
2004             }
2005         }
2006     }
2007
2008   free (bbs);
2009
2010   return perfect_nest_p (loop->inner);
2011 }
2012
2013 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2014    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2015    avoid creating duplicate temporaries and FIRSTBSI is statement
2016    iterator where new temporaries should be inserted at the beginning
2017    of body basic block.  */
2018
2019 static void
2020 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2021                                 int xstep, tree y, tree yinit,
2022                                 htab_t replacements,
2023                                 block_stmt_iterator *firstbsi)
2024 {
2025   ssa_op_iter iter;
2026   use_operand_p use_p;
2027
2028   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2029     {
2030       tree use = USE_FROM_PTR (use_p);
2031       tree step = NULL_TREE;
2032       tree scev, init, val, var, setstmt;
2033       struct tree_map *h, in;
2034       void **loc;
2035
2036       /* Replace uses of X with Y right away.  */
2037       if (use == x)
2038         {
2039           SET_USE (use_p, y);
2040           continue;
2041         }
2042
2043       scev = instantiate_parameters (loop,
2044                                      analyze_scalar_evolution (loop, use));
2045
2046       if (scev == NULL || scev == chrec_dont_know)
2047         continue;
2048
2049       step = evolution_part_in_loop_num (scev, loop->num);
2050       if (step == NULL
2051           || step == chrec_dont_know
2052           || TREE_CODE (step) != INTEGER_CST
2053           || int_cst_value (step) != xstep)
2054         continue;
2055
2056       /* Use REPLACEMENTS hash table to cache already created
2057          temporaries.  */
2058       in.hash = htab_hash_pointer (use);
2059       in.base.from = use;
2060       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2061       if (h != NULL)
2062         {
2063           SET_USE (use_p, h->to);
2064           continue;
2065         }
2066
2067       /* USE which has the same step as X should be replaced
2068          with a temporary set to Y + YINIT - INIT.  */
2069       init = initial_condition_in_loop_num (scev, loop->num);
2070       gcc_assert (init != NULL && init != chrec_dont_know);
2071       if (TREE_TYPE (use) == TREE_TYPE (y))
2072         {
2073           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2074           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2075           if (val == y)
2076             {
2077               /* If X has the same type as USE, the same step
2078                  and same initial value, it can be replaced by Y.  */
2079               SET_USE (use_p, y);
2080               continue;
2081             }
2082         }
2083       else
2084         {
2085           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2086           val = fold_convert (TREE_TYPE (use), val);
2087           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2088         }
2089
2090       /* Create a temporary variable and insert it at the beginning
2091          of the loop body basic block, right after the PHI node
2092          which sets Y.  */
2093       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2094       add_referenced_var (var);
2095       val = force_gimple_operand_bsi (firstbsi, val, false, NULL,
2096                                       true, BSI_SAME_STMT);
2097       setstmt = build_gimple_modify_stmt (var, val);
2098       var = make_ssa_name (var, setstmt);
2099       GIMPLE_STMT_OPERAND (setstmt, 0) = var;
2100       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2101       update_stmt (setstmt);
2102       SET_USE (use_p, var);
2103       h = GGC_NEW (struct tree_map);
2104       h->hash = in.hash;
2105       h->base.from = use;
2106       h->to = var;
2107       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2108       gcc_assert ((*(struct tree_map **)loc) == NULL);
2109       *(struct tree_map **) loc = h;
2110     }
2111 }
2112
2113 /* Return true if STMT is an exit PHI for LOOP */
2114
2115 static bool
2116 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2117 {
2118
2119   if (TREE_CODE (stmt) != PHI_NODE
2120       || PHI_NUM_ARGS (stmt) != 1
2121       || bb_for_stmt (stmt) != single_exit (loop)->dest)
2122     return false;
2123
2124   return true;
2125 }
2126
2127 /* Return true if STMT can be put back into the loop INNER, by
2128    copying it to the beginning of that loop and changing the uses.  */
2129
2130 static bool
2131 can_put_in_inner_loop (struct loop *inner, tree stmt)
2132 {
2133   imm_use_iterator imm_iter;
2134   use_operand_p use_p;
2135
2136   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2137   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2138       || !expr_invariant_in_loop_p (inner, GIMPLE_STMT_OPERAND (stmt, 1)))
2139     return false;
2140
2141   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2142     {
2143       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2144         {
2145           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2146
2147           if (!flow_bb_inside_loop_p (inner, immbb))
2148             return false;
2149         }
2150     }
2151   return true;
2152 }
2153
2154 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2155 static bool
2156 can_put_after_inner_loop (struct loop *loop, tree stmt)
2157 {
2158   imm_use_iterator imm_iter;
2159   use_operand_p use_p;
2160
2161   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2162     return false;
2163
2164   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2165     {
2166       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2167         {
2168           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2169
2170           if (!dominated_by_p (CDI_DOMINATORS,
2171                                immbb,
2172                                loop->inner->header)
2173               && !can_put_in_inner_loop (loop->inner, stmt))
2174             return false;
2175         }
2176     }
2177   return true;
2178 }
2179
2180 /* Return true when the induction variable IV is simple enough to be
2181    re-synthesized.  */
2182
2183 static bool
2184 can_duplicate_iv (tree iv, struct loop *loop)
2185 {
2186   tree scev = instantiate_parameters
2187     (loop, analyze_scalar_evolution (loop, iv));
2188
2189   if (!automatically_generated_chrec_p (scev))
2190     {
2191       tree step = evolution_part_in_loop_num (scev, loop->num);
2192
2193       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2194         return true;
2195     }
2196
2197   return false;
2198 }
2199
2200 /* If this is a scalar operation that can be put back into the inner
2201    loop, or after the inner loop, through copying, then do so. This
2202    works on the theory that any amount of scalar code we have to
2203    reduplicate into or after the loops is less expensive that the win
2204    we get from rearranging the memory walk the loop is doing so that
2205    it has better cache behavior.  */
2206
2207 static bool
2208 cannot_convert_modify_to_perfect_nest (tree stmt, struct loop *loop)
2209 {
2210
2211   use_operand_p use_a, use_b;
2212   imm_use_iterator imm_iter;
2213   ssa_op_iter op_iter, op_iter1;
2214   tree op0 = GIMPLE_STMT_OPERAND (stmt, 0);
2215
2216   /* The statement should not define a variable used in the inner
2217      loop.  */
2218   if (TREE_CODE (op0) == SSA_NAME
2219       && !can_duplicate_iv (op0, loop))
2220     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2221       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2222           == loop->inner)
2223         return true;
2224
2225   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2226     {
2227       tree node, op = USE_FROM_PTR (use_a);
2228
2229       /* The variables should not be used in both loops.  */
2230       if (!can_duplicate_iv (op, loop))
2231         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2232           if (bb_for_stmt (USE_STMT (use_b))->loop_father
2233               == loop->inner)
2234             return true;
2235
2236       /* The statement should not use the value of a scalar that was
2237          modified in the loop.  */
2238       node = SSA_NAME_DEF_STMT (op);
2239       if (TREE_CODE (node) == PHI_NODE)
2240         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2241         {
2242           tree arg = USE_FROM_PTR (use_b);
2243
2244           if (TREE_CODE (arg) == SSA_NAME)
2245             {
2246               tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2247
2248               if (bb_for_stmt (arg_stmt)
2249                   && (bb_for_stmt (arg_stmt)->loop_father
2250                       == loop->inner))
2251                 return true;
2252             }
2253         }
2254     }
2255
2256   return false;
2257 }
2258
2259 /* Return true when BB contains statements that can harm the transform
2260    to a perfect loop nest.  */
2261
2262 static bool
2263 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2264 {
2265   block_stmt_iterator bsi;
2266   tree exit_condition = get_loop_exit_condition (loop);
2267
2268   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
2269     {
2270       tree stmt = bsi_stmt (bsi);
2271
2272       if (stmt == exit_condition
2273           || not_interesting_stmt (stmt)
2274           || stmt_is_bumper_for_loop (loop, stmt))
2275         continue;
2276
2277       if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
2278         {
2279           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2280             return true;
2281
2282           if (can_duplicate_iv (GIMPLE_STMT_OPERAND (stmt, 0), loop))
2283             continue;
2284
2285           if (can_put_in_inner_loop (loop->inner, stmt)
2286               || can_put_after_inner_loop (loop, stmt))
2287             continue;
2288         }
2289
2290       /* If the bb of a statement we care about isn't dominated by the
2291          header of the inner loop, then we can't handle this case
2292          right now.  This test ensures that the statement comes
2293          completely *after* the inner loop.  */
2294       if (!dominated_by_p (CDI_DOMINATORS,
2295                            bb_for_stmt (stmt),
2296                            loop->inner->header))
2297         return true;
2298     }
2299
2300   return false;
2301 }
2302
2303 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2304    perfect one.  At the moment, we only handle imperfect nests of
2305    depth 2, where all of the statements occur after the inner loop.  */
2306
2307 static bool
2308 can_convert_to_perfect_nest (struct loop *loop)
2309 {
2310   basic_block *bbs;
2311   tree phi;
2312   size_t i;
2313
2314   /* Can't handle triply nested+ loops yet.  */
2315   if (!loop->inner || loop->inner->inner)
2316     return false;
2317
2318   bbs = get_loop_body (loop);
2319   for (i = 0; i < loop->num_nodes; i++)
2320     if (bbs[i]->loop_father == loop
2321         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2322       goto fail;
2323
2324   /* We also need to make sure the loop exit only has simple copy phis in it,
2325      otherwise we don't know how to transform it into a perfect nest.  */
2326   for (phi = phi_nodes (single_exit (loop)->dest); phi; phi = PHI_CHAIN (phi))
2327     if (PHI_NUM_ARGS (phi) != 1)
2328       goto fail;
2329
2330   free (bbs);
2331   return true;
2332
2333  fail:
2334   free (bbs);
2335   return false;
2336 }
2337
2338 /* Transform the loop nest into a perfect nest, if possible.
2339    LOOP is the loop nest to transform into a perfect nest
2340    LBOUNDS are the lower bounds for the loops to transform
2341    UBOUNDS are the upper bounds for the loops to transform
2342    STEPS is the STEPS for the loops to transform.
2343    LOOPIVS is the induction variables for the loops to transform.
2344
2345    Basically, for the case of
2346
2347    FOR (i = 0; i < 50; i++)
2348     {
2349      FOR (j =0; j < 50; j++)
2350      {
2351         <whatever>
2352      }
2353      <some code>
2354     }
2355
2356    This function will transform it into a perfect loop nest by splitting the
2357    outer loop into two loops, like so:
2358
2359    FOR (i = 0; i < 50; i++)
2360    {
2361      FOR (j = 0; j < 50; j++)
2362      {
2363          <whatever>
2364      }
2365    }
2366
2367    FOR (i = 0; i < 50; i ++)
2368    {
2369     <some code>
2370    }
2371
2372    Return FALSE if we can't make this loop into a perfect nest.  */
2373
2374 static bool
2375 perfect_nestify (struct loop *loop,
2376                  VEC(tree,heap) *lbounds,
2377                  VEC(tree,heap) *ubounds,
2378                  VEC(int,heap) *steps,
2379                  VEC(tree,heap) *loopivs)
2380 {
2381   basic_block *bbs;
2382   tree exit_condition;
2383   tree cond_stmt;
2384   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2385   int i;
2386   block_stmt_iterator bsi, firstbsi;
2387   bool insert_after;
2388   edge e;
2389   struct loop *newloop;
2390   tree phi;
2391   tree uboundvar;
2392   tree stmt;
2393   tree oldivvar, ivvar, ivvarinced;
2394   VEC(tree,heap) *phis = NULL;
2395   htab_t replacements = NULL;
2396
2397   /* Create the new loop.  */
2398   olddest = single_exit (loop)->dest;
2399   preheaderbb = split_edge (single_exit (loop));
2400   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2401
2402   /* Push the exit phi nodes that we are moving.  */
2403   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2404     {
2405       VEC_reserve (tree, heap, phis, 2);
2406       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2407       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2408     }
2409   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2410
2411   /* Remove the exit phis from the old basic block.  */
2412   while (phi_nodes (olddest) != NULL)
2413     remove_phi_node (phi_nodes (olddest), NULL, false);
2414
2415   /* and add them back to the new basic block.  */
2416   while (VEC_length (tree, phis) != 0)
2417     {
2418       tree def;
2419       tree phiname;
2420       def = VEC_pop (tree, phis);
2421       phiname = VEC_pop (tree, phis);
2422       phi = create_phi_node (phiname, preheaderbb);
2423       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2424     }
2425   flush_pending_stmts (e);
2426   VEC_free (tree, heap, phis);
2427
2428   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2429   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2430   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2431   cond_stmt = build3 (COND_EXPR, void_type_node,
2432                       build2 (NE_EXPR, boolean_type_node,
2433                               integer_one_node,
2434                               integer_zero_node),
2435                       NULL_TREE, NULL_TREE);
2436   bsi = bsi_start (bodybb);
2437   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2438   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2439   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2440   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2441
2442   /* Update the loop structures.  */
2443   newloop = duplicate_loop (loop, olddest->loop_father);
2444   newloop->header = headerbb;
2445   newloop->latch = latchbb;
2446   add_bb_to_loop (latchbb, newloop);
2447   add_bb_to_loop (bodybb, newloop);
2448   add_bb_to_loop (headerbb, newloop);
2449   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2450   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2451   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2452                            single_exit (loop)->src);
2453   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2454   set_immediate_dominator (CDI_DOMINATORS, olddest,
2455                            recompute_dominator (CDI_DOMINATORS, olddest));
2456   /* Create the new iv.  */
2457   oldivvar = VEC_index (tree, loopivs, 0);
2458   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2459   add_referenced_var (ivvar);
2460   standard_iv_increment_position (newloop, &bsi, &insert_after);
2461   create_iv (VEC_index (tree, lbounds, 0),
2462              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2463              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2464
2465   /* Create the new upper bound.  This may be not just a variable, so we copy
2466      it to one just in case.  */
2467
2468   exit_condition = get_loop_exit_condition (newloop);
2469   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2470   add_referenced_var (uboundvar);
2471   stmt = build_gimple_modify_stmt (uboundvar, VEC_index (tree, ubounds, 0));
2472   uboundvar = make_ssa_name (uboundvar, stmt);
2473   GIMPLE_STMT_OPERAND (stmt, 0) = uboundvar;
2474
2475   if (insert_after)
2476     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2477   else
2478     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2479   update_stmt (stmt);
2480   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2481                                             boolean_type_node,
2482                                             uboundvar,
2483                                             ivvarinced);
2484   update_stmt (exit_condition);
2485   replacements = htab_create_ggc (20, tree_map_hash,
2486                                   tree_map_eq, NULL);
2487   bbs = get_loop_body_in_dom_order (loop);
2488   /* Now move the statements, and replace the induction variable in the moved
2489      statements with the correct loop induction variable.  */
2490   oldivvar = VEC_index (tree, loopivs, 0);
2491   firstbsi = bsi_start (bodybb);
2492   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2493     {
2494       block_stmt_iterator tobsi = bsi_last (bodybb);
2495       if (bbs[i]->loop_father == loop)
2496         {
2497           /* If this is true, we are *before* the inner loop.
2498              If this isn't true, we are *after* it.
2499
2500              The only time can_convert_to_perfect_nest returns true when we
2501              have statements before the inner loop is if they can be moved
2502              into the inner loop.
2503
2504              The only time can_convert_to_perfect_nest returns true when we
2505              have statements after the inner loop is if they can be moved into
2506              the new split loop.  */
2507
2508           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2509             {
2510               block_stmt_iterator header_bsi
2511                 = bsi_after_labels (loop->inner->header);
2512
2513               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2514                 {
2515                   tree stmt = bsi_stmt (bsi);
2516
2517                   if (stmt == exit_condition
2518                       || not_interesting_stmt (stmt)
2519                       || stmt_is_bumper_for_loop (loop, stmt))
2520                     {
2521                       bsi_next (&bsi);
2522                       continue;
2523                     }
2524
2525                   bsi_move_before (&bsi, &header_bsi);
2526                 }
2527             }
2528           else
2529             {
2530               /* Note that the bsi only needs to be explicitly incremented
2531                  when we don't move something, since it is automatically
2532                  incremented when we do.  */
2533               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2534                 {
2535                   ssa_op_iter i;
2536                   tree n, stmt = bsi_stmt (bsi);
2537
2538                   if (stmt == exit_condition
2539                       || not_interesting_stmt (stmt)
2540                       || stmt_is_bumper_for_loop (loop, stmt))
2541                     {
2542                       bsi_next (&bsi);
2543                       continue;
2544                     }
2545
2546                   replace_uses_equiv_to_x_with_y
2547                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2548                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2549
2550                   bsi_move_before (&bsi, &tobsi);
2551
2552                   /* If the statement has any virtual operands, they may
2553                      need to be rewired because the original loop may
2554                      still reference them.  */
2555                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2556                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2557                 }
2558             }
2559
2560         }
2561     }
2562
2563   free (bbs);
2564   htab_delete (replacements);
2565   return perfect_nest_p (loop);
2566 }
2567
2568 /* Return true if TRANS is a legal transformation matrix that respects
2569    the dependence vectors in DISTS and DIRS.  The conservative answer
2570    is false.
2571
2572    "Wolfe proves that a unimodular transformation represented by the
2573    matrix T is legal when applied to a loop nest with a set of
2574    lexicographically non-negative distance vectors RDG if and only if
2575    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2576    i.e.: if and only if it transforms the lexicographically positive
2577    distance vectors to lexicographically positive vectors.  Note that
2578    a unimodular matrix must transform the zero vector (and only it) to
2579    the zero vector." S.Muchnick.  */
2580
2581 bool
2582 lambda_transform_legal_p (lambda_trans_matrix trans,
2583                           int nb_loops,
2584                           VEC (ddr_p, heap) *dependence_relations)
2585 {
2586   unsigned int i, j;
2587   lambda_vector distres;
2588   struct data_dependence_relation *ddr;
2589
2590   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2591               && LTM_ROWSIZE (trans) == nb_loops);
2592
2593   /* When there is an unknown relation in the dependence_relations, we
2594      know that it is no worth looking at this loop nest: give up.  */
2595   ddr = VEC_index (ddr_p, dependence_relations, 0);
2596   if (ddr == NULL)
2597     return true;
2598   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2599     return false;
2600
2601   distres = lambda_vector_new (nb_loops);
2602
2603   /* For each distance vector in the dependence graph.  */
2604   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2605     {
2606       /* Don't care about relations for which we know that there is no
2607          dependence, nor about read-read (aka. output-dependences):
2608          these data accesses can happen in any order.  */
2609       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2610           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2611         continue;
2612
2613       /* Conservatively answer: "this transformation is not valid".  */
2614       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2615         return false;
2616
2617       /* If the dependence could not be captured by a distance vector,
2618          conservatively answer that the transform is not valid.  */
2619       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2620         return false;
2621
2622       /* Compute trans.dist_vect */
2623       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2624         {
2625           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2626                                      DDR_DIST_VECT (ddr, j), distres);
2627
2628           if (!lambda_vector_lexico_pos (distres, nb_loops))
2629             return false;
2630         }
2631     }
2632   return true;
2633 }