gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "errors.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 DEF_VEC_P(int);
 119 DEF_VEC_ALLOC_P(int,heap);
 120
 121 static bool perfect_nestify (struct loops *,
 122                              struct loop *, VEC(tree,heap) *,
 123                              VEC(tree,heap) *, VEC(int,heap) *,
 124                              VEC(tree,heap) *);
 125 /* Lattice stuff that is internal to the code generation algorithm.  */
 126
 127 typedef struct
 128 {
 129   /* Lattice base matrix.  */
 130   lambda_matrix base;
 131   /* Lattice dimension.  */
 132   int dimension;
 133   /* Origin vector for the coefficients.  */
 134   lambda_vector origin;
 135   /* Origin matrix for the invariants.  */
 136   lambda_matrix origin_invariants;
 137   /* Number of invariants.  */
 138   int invariants;
 139 } *lambda_lattice;
 140
 141 #define LATTICE_BASE(T) ((T)->base)
 142 #define LATTICE_DIMENSION(T) ((T)->dimension)
 143 #define LATTICE_ORIGIN(T) ((T)->origin)
 144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 146
 147 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 148                        int, int);
 149 static lambda_lattice lambda_lattice_new (int, int);
 150 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 151
 152 static tree find_induction_var_from_exit_cond (struct loop *);
 153
 154 /* Create a new lambda body vector.  */
 155
 156 lambda_body_vector
 157 lambda_body_vector_new (int size)
 158 {
 159   lambda_body_vector ret;
 160
 161   ret = ggc_alloc (sizeof (*ret));
 162   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 163   LBV_SIZE (ret) = size;
 164   LBV_DENOMINATOR (ret) = 1;
 165   return ret;
 166 }
 167
 168 /* Compute the new coefficients for the vector based on the
 169   *inverse* of the transformation matrix.  */
 170
 171 lambda_body_vector
 172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 173                                 lambda_body_vector vect)
 174 {
 175   lambda_body_vector temp;
 176   int depth;
 177
 178   /* Make sure the matrix is square.  */
 179   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 180
 181   depth = LTM_ROWSIZE (transform);
 182
 183   temp = lambda_body_vector_new (depth);
 184   LBV_DENOMINATOR (temp) =
 185     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 186   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 187                              LTM_MATRIX (transform), depth,
 188                              LBV_COEFFICIENTS (temp));
 189   LBV_SIZE (temp) = LBV_SIZE (vect);
 190   return temp;
 191 }
 192
 193 /* Print out a lambda body vector.  */
 194
 195 void
 196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 197 {
 198   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 199 }
 200
 201 /* Return TRUE if two linear expressions are equal.  */
 202
 203 static bool
 204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 205            int depth, int invariants)
 206 {
 207   int i;
 208
 209   if (lle1 == NULL || lle2 == NULL)
 210     return false;
 211   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 212     return false;
 213   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 214     return false;
 215   for (i = 0; i < depth; i++)
 216     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 217       return false;
 218   for (i = 0; i < invariants; i++)
 219     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 220         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 221       return false;
 222   return true;
 223 }
 224
 225 /* Create a new linear expression with dimension DIM, and total number
 226    of invariants INVARIANTS.  */
 227
 228 lambda_linear_expression
 229 lambda_linear_expression_new (int dim, int invariants)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = ggc_alloc_cleared (sizeof (*ret));
 234
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants)
 332 {
 333   lambda_loopnest ret;
 334   ret = ggc_alloc (sizeof (*ret));
 335
 336   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 337   LN_DEPTH (ret) = depth;
 338   LN_INVARIANTS (ret) = invariants;
 339
 340   return ret;
 341 }
 342
 343 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 344    character to use for loop names is given by START.  */
 345
 346 void
 347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 348 {
 349   int i;
 350   for (i = 0; i < LN_DEPTH (nest); i++)
 351     {
 352       fprintf (outfile, "Loop %c\n", start + i);
 353       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 354                          LN_INVARIANTS (nest), 'i');
 355       fprintf (outfile, "\n");
 356     }
 357 }
 358
 359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 360    of invariants.  */
 361
 362 static lambda_lattice
 363 lambda_lattice_new (int depth, int invariants)
 364 {
 365   lambda_lattice ret;
 366   ret = ggc_alloc (sizeof (*ret));
 367   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 368   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 369   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 370   LATTICE_DIMENSION (ret) = depth;
 371   LATTICE_INVARIANTS (ret) = invariants;
 372   return ret;
 373 }
 374
 375 /* Compute the lattice base for NEST.  The lattice base is essentially a
 376    non-singular transform from a dense base space to a sparse iteration space.
 377    We use it so that we don't have to specially handle the case of a sparse
 378    iteration space in other parts of the algorithm.  As a result, this routine
 379    only does something interesting (IE produce a matrix that isn't the
 380    identity matrix) if NEST is a sparse space.  */
 381
 382 static lambda_lattice
 383 lambda_lattice_compute_base (lambda_loopnest nest)
 384 {
 385   lambda_lattice ret;
 386   int depth, invariants;
 387   lambda_matrix base;
 388
 389   int i, j, step;
 390   lambda_loop loop;
 391   lambda_linear_expression expression;
 392
 393   depth = LN_DEPTH (nest);
 394   invariants = LN_INVARIANTS (nest);
 395
 396   ret = lambda_lattice_new (depth, invariants);
 397   base = LATTICE_BASE (ret);
 398   for (i = 0; i < depth; i++)
 399     {
 400       loop = LN_LOOPS (nest)[i];
 401       gcc_assert (loop);
 402       step = LL_STEP (loop);
 403       /* If we have a step of 1, then the base is one, and the
 404          origin and invariant coefficients are 0.  */
 405       if (step == 1)
 406         {
 407           for (j = 0; j < depth; j++)
 408             base[i][j] = 0;
 409           base[i][i] = 1;
 410           LATTICE_ORIGIN (ret)[i] = 0;
 411           for (j = 0; j < invariants; j++)
 412             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 413         }
 414       else
 415         {
 416           /* Otherwise, we need the lower bound expression (which must
 417              be an affine function)  to determine the base.  */
 418           expression = LL_LOWER_BOUND (loop);
 419           gcc_assert (expression && !LLE_NEXT (expression)
 420                       && LLE_DENOMINATOR (expression) == 1);
 421
 422           /* The lower triangular portion of the base is going to be the
 423              coefficient times the step */
 424           for (j = 0; j < i; j++)
 425             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 426               * LL_STEP (LN_LOOPS (nest)[j]);
 427           base[i][i] = step;
 428           for (j = i + 1; j < depth; j++)
 429             base[i][j] = 0;
 430
 431           /* Origin for this loop is the constant of the lower bound
 432              expression.  */
 433           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 434
 435           /* Coefficient for the invariants are equal to the invariant
 436              coefficients in the expression.  */
 437           for (j = 0; j < invariants; j++)
 438             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 439               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 440         }
 441     }
 442   return ret;
 443 }
 444
 445 /* Compute the greatest common denominator of two numbers (A and B) using
 446    Euclid's algorithm.  */
 447
 448 static int
 449 gcd (int a, int b)
 450 {
 451
 452   int x, y, z;
 453
 454   x = abs (a);
 455   y = abs (b);
 456
 457   while (x > 0)
 458     {
 459       z = y % x;
 460       y = x;
 461       x = z;
 462     }
 463
 464   return (y);
 465 }
 466
 467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 468
 469 static int
 470 gcd_vector (lambda_vector vector, int size)
 471 {
 472   int i;
 473   int gcd1 = 0;
 474
 475   if (size > 0)
 476     {
 477       gcd1 = vector[0];
 478       for (i = 1; i < size; i++)
 479         gcd1 = gcd (gcd1, vector[i]);
 480     }
 481   return gcd1;
 482 }
 483
 484 /* Compute the least common multiple of two numbers A and B .  */
 485
 486 static int
 487 lcm (int a, int b)
 488 {
 489   return (abs (a) * abs (b) / gcd (a, b));
 490 }
 491
 492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 493    auxiliary nest.
 494    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 495    it is easy to calculate the answer and bounds.
 496    A sketch of how it works:
 497    Given a system of linear inequalities, ai * xj >= bk, you can always
 498    rewrite the constraints so they are all of the form
 499    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 500    in b1 ... bk, and some a in a1...ai)
 501    You can then eliminate this x from the non-constant inequalities by
 502    rewriting these as a <= b, x >= constant, and delete the x variable.
 503    You can then repeat this for any remaining x variables, and then we have
 504    an easy to use variable <= constant (or no variables at all) form that we
 505    can construct our bounds from.
 506
 507    In our case, each time we eliminate, we construct part of the bound from
 508    the ith variable, then delete the ith variable.
 509
 510    Remember the constant are in our vector a, our coefficient matrix is A,
 511    and our invariant coefficient matrix is B.
 512
 513    SIZE is the size of the matrices being passed.
 514    DEPTH is the loop nest depth.
 515    INVARIANTS is the number of loop invariants.
 516    A, B, and a are the coefficient matrix, invariant coefficient, and a
 517    vector of constants, respectively.  */
 518
 519 static lambda_loopnest
 520 compute_nest_using_fourier_motzkin (int size,
 521                                     int depth,
 522                                     int invariants,
 523                                     lambda_matrix A,
 524                                     lambda_matrix B,
 525                                     lambda_vector a)
 526 {
 527
 528   int multiple, f1, f2;
 529   int i, j, k;
 530   lambda_linear_expression expression;
 531   lambda_loop loop;
 532   lambda_loopnest auxillary_nest;
 533   lambda_matrix swapmatrix, A1, B1;
 534   lambda_vector swapvector, a1;
 535   int newsize;
 536
 537   A1 = lambda_matrix_new (128, depth);
 538   B1 = lambda_matrix_new (128, invariants);
 539   a1 = lambda_vector_new (128);
 540
 541   auxillary_nest = lambda_loopnest_new (depth, invariants);
 542
 543   for (i = depth - 1; i >= 0; i--)
 544     {
 545       loop = lambda_loop_new ();
 546       LN_LOOPS (auxillary_nest)[i] = loop;
 547       LL_STEP (loop) = 1;
 548
 549       for (j = 0; j < size; j++)
 550         {
 551           if (A[j][i] < 0)
 552             {
 553               /* Any linear expression in the matrix with a coefficient less
 554                  than 0 becomes part of the new lower bound.  */
 555               expression = lambda_linear_expression_new (depth, invariants);
 556
 557               for (k = 0; k < i; k++)
 558                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 559
 560               for (k = 0; k < invariants; k++)
 561                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 562
 563               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 564               LLE_CONSTANT (expression) = -1 * a[j];
 565
 566               /* Ignore if identical to the existing lower bound.  */
 567               if (!lle_equal (LL_LOWER_BOUND (loop),
 568                               expression, depth, invariants))
 569                 {
 570                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 571                   LL_LOWER_BOUND (loop) = expression;
 572                 }
 573
 574             }
 575           else if (A[j][i] > 0)
 576             {
 577               /* Any linear expression with a coefficient greater than 0
 578                  becomes part of the new upper bound.  */
 579               expression = lambda_linear_expression_new (depth, invariants);
 580               for (k = 0; k < i; k++)
 581                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 582
 583               for (k = 0; k < invariants; k++)
 584                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 585
 586               LLE_DENOMINATOR (expression) = A[j][i];
 587               LLE_CONSTANT (expression) = a[j];
 588
 589               /* Ignore if identical to the existing upper bound.  */
 590               if (!lle_equal (LL_UPPER_BOUND (loop),
 591                               expression, depth, invariants))
 592                 {
 593                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 594                   LL_UPPER_BOUND (loop) = expression;
 595                 }
 596
 597             }
 598         }
 599
 600       /* This portion creates a new system of linear inequalities by deleting
 601          the i'th variable, reducing the system by one variable.  */
 602       newsize = 0;
 603       for (j = 0; j < size; j++)
 604         {
 605           /* If the coefficient for the i'th variable is 0, then we can just
 606              eliminate the variable straightaway.  Otherwise, we have to
 607              multiply through by the coefficients we are eliminating.  */
 608           if (A[j][i] == 0)
 609             {
 610               lambda_vector_copy (A[j], A1[newsize], depth);
 611               lambda_vector_copy (B[j], B1[newsize], invariants);
 612               a1[newsize] = a[j];
 613               newsize++;
 614             }
 615           else if (A[j][i] > 0)
 616             {
 617               for (k = 0; k < size; k++)
 618                 {
 619                   if (A[k][i] < 0)
 620                     {
 621                       multiple = lcm (A[j][i], A[k][i]);
 622                       f1 = multiple / A[j][i];
 623                       f2 = -1 * multiple / A[k][i];
 624
 625                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 626                                             A1[newsize], depth);
 627                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 628                                             B1[newsize], invariants);
 629                       a1[newsize] = f1 * a[j] + f2 * a[k];
 630                       newsize++;
 631                     }
 632                 }
 633             }
 634         }
 635
 636       swapmatrix = A;
 637       A = A1;
 638       A1 = swapmatrix;
 639
 640       swapmatrix = B;
 641       B = B1;
 642       B1 = swapmatrix;
 643
 644       swapvector = a;
 645       a = a1;
 646       a1 = swapvector;
 647
 648       size = newsize;
 649     }
 650
 651   return auxillary_nest;
 652 }
 653
 654 /* Compute the loop bounds for the auxiliary space NEST.
 655    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 656    Given the original nest, this function will
 657    1. Convert the nest into matrix form, which consists of a matrix for the
 658    coefficients, a matrix for the
 659    invariant coefficients, and a vector for the constants.
 660    2. Use the matrix form to calculate the lattice base for the nest (which is
 661    a dense space)
 662    3. Compose the dense space transform with the user specified transform, to
 663    get a transform we can easily calculate transformed bounds for.
 664    4. Multiply the composed transformation matrix times the matrix form of the
 665    loop.
 666    5. Transform the newly created matrix (from step 4) back into a loop nest
 667    using fourier motzkin elimination to figure out the bounds.  */
 668
 669 static lambda_loopnest
 670 lambda_compute_auxillary_space (lambda_loopnest nest,
 671                                 lambda_trans_matrix trans)
 672 {
 673   lambda_matrix A, B, A1, B1;
 674   lambda_vector a, a1;
 675   lambda_matrix invertedtrans;
 676   int depth, invariants, size;
 677   int i, j;
 678   lambda_loop loop;
 679   lambda_linear_expression expression;
 680   lambda_lattice lattice;
 681
 682   depth = LN_DEPTH (nest);
 683   invariants = LN_INVARIANTS (nest);
 684
 685   /* Unfortunately, we can't know the number of constraints we'll have
 686      ahead of time, but this should be enough even in ridiculous loop nest
 687      cases. We must not go over this limit.  */
 688   A = lambda_matrix_new (128, depth);
 689   B = lambda_matrix_new (128, invariants);
 690   a = lambda_vector_new (128);
 691
 692   A1 = lambda_matrix_new (128, depth);
 693   B1 = lambda_matrix_new (128, invariants);
 694   a1 = lambda_vector_new (128);
 695
 696   /* Store the bounds in the equation matrix A, constant vector a, and
 697      invariant matrix B, so that we have Ax <= a + B.
 698      This requires a little equation rearranging so that everything is on the
 699      correct side of the inequality.  */
 700   size = 0;
 701   for (i = 0; i < depth; i++)
 702     {
 703       loop = LN_LOOPS (nest)[i];
 704
 705       /* First we do the lower bound.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_LOWER_BOUND (loop);
 708       else
 709         expression = LL_UPPER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 725              constants and single variables on   */
 726           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 727           a[size] *= -1;
 728           for (j = 0; j < invariants; j++)
 729             B[size][j] *= -1;
 730
 731           size++;
 732           /* Need to increase matrix sizes above.  */
 733           gcc_assert (size <= 127);
 734
 735         }
 736
 737       /* Then do the exact same thing for the upper bounds.  */
 738       if (LL_STEP (loop) > 0)
 739         expression = LL_UPPER_BOUND (loop);
 740       else
 741         expression = LL_LOWER_BOUND (loop);
 742
 743       for (; expression != NULL; expression = LLE_NEXT (expression))
 744         {
 745           /* Fill in the coefficient.  */
 746           for (j = 0; j < i; j++)
 747             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 748
 749           /* And the invariant coefficient.  */
 750           for (j = 0; j < invariants; j++)
 751             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 752
 753           /* And the constant.  */
 754           a[size] = LLE_CONSTANT (expression);
 755
 756           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 757           for (j = 0; j < i; j++)
 758             A[size][j] *= -1;
 759           A[size][i] = LLE_DENOMINATOR (expression);
 760           size++;
 761           /* Need to increase matrix sizes above.  */
 762           gcc_assert (size <= 127);
 763
 764         }
 765     }
 766
 767   /* Compute the lattice base x = base * y + origin, where y is the
 768      base space.  */
 769   lattice = lambda_lattice_compute_base (nest);
 770
 771   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 772
 773   /* A1 = A * L */
 774   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 775
 776   /* a1 = a - A * origin constant.  */
 777   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 778   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 779
 780   /* B1 = B - A * origin invariant.  */
 781   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 782                       invariants);
 783   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 784
 785   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 786      it by A1, then performing fourier motzkin.  */
 787
 788   invertedtrans = lambda_matrix_new (depth, depth);
 789
 790   /* Compute the inverse of U.  */
 791   lambda_matrix_inverse (LTM_MATRIX (trans),
 792                          invertedtrans, depth);
 793
 794   /* A = A1 inv(U).  */
 795   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 796
 797   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 798                                              A, B1, a1);
 799 }
 800
 801 /* Compute the loop bounds for the target space, using the bounds of
 802    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 803    The target space loop bounds are computed by multiplying the triangular
 804    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 805    the loop steps (positive or negative) is then used to swap the bounds if
 806    the loop counts downwards.
 807    Return the target loopnest.  */
 808
 809 static lambda_loopnest
 810 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 811                              lambda_trans_matrix H, lambda_vector stepsigns)
 812 {
 813   lambda_matrix inverse, H1;
 814   int determinant, i, j;
 815   int gcd1, gcd2;
 816   int factor;
 817
 818   lambda_loopnest target_nest;
 819   int depth, invariants;
 820   lambda_matrix target;
 821
 822   lambda_loop auxillary_loop, target_loop;
 823   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 824
 825   depth = LN_DEPTH (auxillary_nest);
 826   invariants = LN_INVARIANTS (auxillary_nest);
 827
 828   inverse = lambda_matrix_new (depth, depth);
 829   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 830
 831   /* H1 is H excluding its diagonal.  */
 832   H1 = lambda_matrix_new (depth, depth);
 833   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 834
 835   for (i = 0; i < depth; i++)
 836     H1[i][i] = 0;
 837
 838   /* Computes the linear offsets of the loop bounds.  */
 839   target = lambda_matrix_new (depth, depth);
 840   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 841
 842   target_nest = lambda_loopnest_new (depth, invariants);
 843
 844   for (i = 0; i < depth; i++)
 845     {
 846
 847       /* Get a new loop structure.  */
 848       target_loop = lambda_loop_new ();
 849       LN_LOOPS (target_nest)[i] = target_loop;
 850
 851       /* Computes the gcd of the coefficients of the linear part.  */
 852       gcd1 = gcd_vector (target[i], i);
 853
 854       /* Include the denominator in the GCD.  */
 855       gcd1 = gcd (gcd1, determinant);
 856
 857       /* Now divide through by the gcd.  */
 858       for (j = 0; j < i; j++)
 859         target[i][j] = target[i][j] / gcd1;
 860
 861       expression = lambda_linear_expression_new (depth, invariants);
 862       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 863       LLE_DENOMINATOR (expression) = determinant / gcd1;
 864       LLE_CONSTANT (expression) = 0;
 865       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 866                            invariants);
 867       LL_LINEAR_OFFSET (target_loop) = expression;
 868     }
 869
 870   /* For each loop, compute the new bounds from H.  */
 871   for (i = 0; i < depth; i++)
 872     {
 873       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 874       target_loop = LN_LOOPS (target_nest)[i];
 875       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 876       factor = LTM_MATRIX (H)[i][i];
 877
 878       /* First we do the lower bound.  */
 879       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 880
 881       for (; auxillary_expr != NULL;
 882            auxillary_expr = LLE_NEXT (auxillary_expr))
 883         {
 884           target_expr = lambda_linear_expression_new (depth, invariants);
 885           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 886                                      depth, inverse, depth,
 887                                      LLE_COEFFICIENTS (target_expr));
 888           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 889                                     LLE_COEFFICIENTS (target_expr), depth,
 890                                     factor);
 891
 892           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 893           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 894                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 895                               invariants);
 896           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 898                                     invariants, factor);
 899           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 900
 901           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 902             {
 903               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 904                 * determinant;
 905               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 906                                         (target_expr),
 907                                         LLE_INVARIANT_COEFFICIENTS
 908                                         (target_expr), invariants,
 909                                         determinant);
 910               LLE_DENOMINATOR (target_expr) =
 911                 LLE_DENOMINATOR (target_expr) * determinant;
 912             }
 913           /* Find the gcd and divide by it here, rather than doing it
 914              at the tree level.  */
 915           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 916           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 917                              invariants);
 918           gcd1 = gcd (gcd1, gcd2);
 919           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 920           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 921           for (j = 0; j < depth; j++)
 922             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 923           for (j = 0; j < invariants; j++)
 924             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 925           LLE_CONSTANT (target_expr) /= gcd1;
 926           LLE_DENOMINATOR (target_expr) /= gcd1;
 927           /* Ignore if identical to existing bound.  */
 928           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 929                           invariants))
 930             {
 931               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 932               LL_LOWER_BOUND (target_loop) = target_expr;
 933             }
 934         }
 935       /* Now do the upper bound.  */
 936       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 937
 938       for (; auxillary_expr != NULL;
 939            auxillary_expr = LLE_NEXT (auxillary_expr))
 940         {
 941           target_expr = lambda_linear_expression_new (depth, invariants);
 942           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 943                                      depth, inverse, depth,
 944                                      LLE_COEFFICIENTS (target_expr));
 945           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 946                                     LLE_COEFFICIENTS (target_expr), depth,
 947                                     factor);
 948           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 949           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 950                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 951                               invariants);
 952           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 954                                     invariants, factor);
 955           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 956
 957           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 958             {
 959               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 960                 * determinant;
 961               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 962                                         (target_expr),
 963                                         LLE_INVARIANT_COEFFICIENTS
 964                                         (target_expr), invariants,
 965                                         determinant);
 966               LLE_DENOMINATOR (target_expr) =
 967                 LLE_DENOMINATOR (target_expr) * determinant;
 968             }
 969           /* Find the gcd and divide by it here, instead of at the
 970              tree level.  */
 971           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 972           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 973                              invariants);
 974           gcd1 = gcd (gcd1, gcd2);
 975           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 976           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 977           for (j = 0; j < depth; j++)
 978             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 979           for (j = 0; j < invariants; j++)
 980             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 981           LLE_CONSTANT (target_expr) /= gcd1;
 982           LLE_DENOMINATOR (target_expr) /= gcd1;
 983           /* Ignore if equal to existing bound.  */
 984           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 985                           invariants))
 986             {
 987               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 988               LL_UPPER_BOUND (target_loop) = target_expr;
 989             }
 990         }
 991     }
 992   for (i = 0; i < depth; i++)
 993     {
 994       target_loop = LN_LOOPS (target_nest)[i];
 995       /* If necessary, exchange the upper and lower bounds and negate
 996          the step size.  */
 997       if (stepsigns[i] < 0)
 998         {
 999           LL_STEP (target_loop) *= -1;
1000           tmp_expr = LL_LOWER_BOUND (target_loop);
1001           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1002           LL_UPPER_BOUND (target_loop) = tmp_expr;
1003         }
1004     }
1005   return target_nest;
1006 }
1007
1008 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1009    result.  */
1010
1011 static lambda_vector
1012 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1013 {
1014   lambda_matrix matrix, H;
1015   int size;
1016   lambda_vector newsteps;
1017   int i, j, factor, minimum_column;
1018   int temp;
1019
1020   matrix = LTM_MATRIX (trans);
1021   size = LTM_ROWSIZE (trans);
1022   H = lambda_matrix_new (size, size);
1023
1024   newsteps = lambda_vector_new (size);
1025   lambda_vector_copy (stepsigns, newsteps, size);
1026
1027   lambda_matrix_copy (matrix, H, size, size);
1028
1029   for (j = 0; j < size; j++)
1030     {
1031       lambda_vector row;
1032       row = H[j];
1033       for (i = j; i < size; i++)
1034         if (row[i] < 0)
1035           lambda_matrix_col_negate (H, size, i);
1036       while (lambda_vector_first_nz (row, size, j + 1) < size)
1037         {
1038           minimum_column = lambda_vector_min_nz (row, size, j);
1039           lambda_matrix_col_exchange (H, size, j, minimum_column);
1040
1041           temp = newsteps[j];
1042           newsteps[j] = newsteps[minimum_column];
1043           newsteps[minimum_column] = temp;
1044
1045           for (i = j + 1; i < size; i++)
1046             {
1047               factor = row[i] / row[j];
1048               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1049             }
1050         }
1051     }
1052   return newsteps;
1053 }
1054
1055 /* Transform NEST according to TRANS, and return the new loopnest.
1056    This involves
1057    1. Computing a lattice base for the transformation
1058    2. Composing the dense base with the specified transformation (TRANS)
1059    3. Decomposing the combined transformation into a lower triangular portion,
1060    and a unimodular portion.
1061    4. Computing the auxiliary nest using the unimodular portion.
1062    5. Computing the target nest using the auxiliary nest and the lower
1063    triangular portion.  */
1064
1065 lambda_loopnest
1066 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1067 {
1068   lambda_loopnest auxillary_nest, target_nest;
1069
1070   int depth, invariants;
1071   int i, j;
1072   lambda_lattice lattice;
1073   lambda_trans_matrix trans1, H, U;
1074   lambda_loop loop;
1075   lambda_linear_expression expression;
1076   lambda_vector origin;
1077   lambda_matrix origin_invariants;
1078   lambda_vector stepsigns;
1079   int f;
1080
1081   depth = LN_DEPTH (nest);
1082   invariants = LN_INVARIANTS (nest);
1083
1084   /* Keep track of the signs of the loop steps.  */
1085   stepsigns = lambda_vector_new (depth);
1086   for (i = 0; i < depth; i++)
1087     {
1088       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1089         stepsigns[i] = 1;
1090       else
1091         stepsigns[i] = -1;
1092     }
1093
1094   /* Compute the lattice base.  */
1095   lattice = lambda_lattice_compute_base (nest);
1096   trans1 = lambda_trans_matrix_new (depth, depth);
1097
1098   /* Multiply the transformation matrix by the lattice base.  */
1099
1100   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1101                       LTM_MATRIX (trans1), depth, depth, depth);
1102
1103   /* Compute the Hermite normal form for the new transformation matrix.  */
1104   H = lambda_trans_matrix_new (depth, depth);
1105   U = lambda_trans_matrix_new (depth, depth);
1106   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1107                          LTM_MATRIX (U));
1108
1109   /* Compute the auxiliary loop nest's space from the unimodular
1110      portion.  */
1111   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1112
1113   /* Compute the loop step signs from the old step signs and the
1114      transformation matrix.  */
1115   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1116
1117   /* Compute the target loop nest space from the auxiliary nest and
1118      the lower triangular matrix H.  */
1119   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1120   origin = lambda_vector_new (depth);
1121   origin_invariants = lambda_matrix_new (depth, invariants);
1122   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1123                              LATTICE_ORIGIN (lattice), origin);
1124   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1125                       origin_invariants, depth, depth, invariants);
1126
1127   for (i = 0; i < depth; i++)
1128     {
1129       loop = LN_LOOPS (target_nest)[i];
1130       expression = LL_LINEAR_OFFSET (loop);
1131       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1132         f = 1;
1133       else
1134         f = LLE_DENOMINATOR (expression);
1135
1136       LLE_CONSTANT (expression) += f * origin[i];
1137
1138       for (j = 0; j < invariants; j++)
1139         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1140           f * origin_invariants[i][j];
1141     }
1142
1143   return target_nest;
1144
1145 }
1146
1147 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148    return the new expression.  DEPTH is the depth of the loopnest.
1149    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1151    is the amount we have to add/subtract from the expression because of the
1152    type of comparison it is used in.  */
1153
1154 static lambda_linear_expression
1155 gcc_tree_to_linear_expression (int depth, tree expr,
1156                                VEC(tree,heap) *outerinductionvars,
1157                                VEC(tree,heap) *invariants, int extra)
1158 {
1159   lambda_linear_expression lle = NULL;
1160   switch (TREE_CODE (expr))
1161     {
1162     case INTEGER_CST:
1163       {
1164         lle = lambda_linear_expression_new (depth, 2 * depth);
1165         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1166         if (extra != 0)
1167           LLE_CONSTANT (lle) += extra;
1168
1169         LLE_DENOMINATOR (lle) = 1;
1170       }
1171       break;
1172     case SSA_NAME:
1173       {
1174         tree iv, invar;
1175         size_t i;
1176         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1177           if (iv != NULL)
1178             {
1179               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1180                 {
1181                   lle = lambda_linear_expression_new (depth, 2 * depth);
1182                   LLE_COEFFICIENTS (lle)[i] = 1;
1183                   if (extra != 0)
1184                     LLE_CONSTANT (lle) = extra;
1185
1186                   LLE_DENOMINATOR (lle) = 1;
1187                 }
1188             }
1189         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1190           if (invar != NULL)
1191             {
1192               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1193                 {
1194                   lle = lambda_linear_expression_new (depth, 2 * depth);
1195                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1196                   if (extra != 0)
1197                     LLE_CONSTANT (lle) = extra;
1198                   LLE_DENOMINATOR (lle) = 1;
1199                 }
1200             }
1201       }
1202       break;
1203     default:
1204       return NULL;
1205     }
1206
1207   return lle;
1208 }
1209
1210 /* Return the depth of the loopnest NEST */
1211
1212 static int
1213 depth_of_nest (struct loop *nest)
1214 {
1215   size_t depth = 0;
1216   while (nest)
1217     {
1218       depth++;
1219       nest = nest->inner;
1220     }
1221   return depth;
1222 }
1223
1224
1225 /* Return true if OP is invariant in LOOP and all outer loops.  */
1226
1227 static bool
1228 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1229 {
1230   if (is_gimple_min_invariant (op))
1231     return true;
1232   if (loop->depth == 0)
1233     return true;
1234   if (!expr_invariant_in_loop_p (loop, op))
1235     return false;
1236   if (loop->outer
1237       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1238     return false;
1239   return true;
1240 }
1241
1242 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1243    or NULL if it could not be converted.
1244    DEPTH is the depth of the loop.
1245    INVARIANTS is a pointer to the array of loop invariants.
1246    The induction variable for this loop should be stored in the parameter
1247    OURINDUCTIONVAR.
1248    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1249
1250 static lambda_loop
1251 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1252                          VEC(tree,heap) ** invariants,
1253                          tree * ourinductionvar,
1254                          VEC(tree,heap) * outerinductionvars,
1255                          VEC(tree,heap) ** lboundvars,
1256                          VEC(tree,heap) ** uboundvars,
1257                          VEC(int,heap) ** steps)
1258 {
1259   tree phi;
1260   tree exit_cond;
1261   tree access_fn, inductionvar;
1262   tree step;
1263   lambda_loop lloop = NULL;
1264   lambda_linear_expression lbound, ubound;
1265   tree test;
1266   int stepint;
1267   int extra = 0;
1268   tree lboundvar, uboundvar, uboundresult;
1269   use_optype uses;
1270
1271   /* Find out induction var and exit condition.  */
1272   inductionvar = find_induction_var_from_exit_cond (loop);
1273   exit_cond = get_loop_exit_condition (loop);
1274
1275   if (inductionvar == NULL || exit_cond == NULL)
1276     {
1277       if (dump_file && (dump_flags & TDF_DETAILS))
1278         fprintf (dump_file,
1279                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1280       return NULL;
1281     }
1282
1283   test = TREE_OPERAND (exit_cond, 0);
1284
1285   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1286     {
1287
1288       if (dump_file && (dump_flags & TDF_DETAILS))
1289         fprintf (dump_file,
1290                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1291
1292       return NULL;
1293     }
1294
1295   phi = SSA_NAME_DEF_STMT (inductionvar);
1296   if (TREE_CODE (phi) != PHI_NODE)
1297     {
1298       uses = STMT_USE_OPS (phi);
1299
1300       if (!uses)
1301         {
1302
1303           if (dump_file && (dump_flags & TDF_DETAILS))
1304             fprintf (dump_file,
1305                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1306
1307           return NULL;
1308         }
1309
1310       phi = USE_OP (uses, 0);
1311       phi = SSA_NAME_DEF_STMT (phi);
1312       if (TREE_CODE (phi) != PHI_NODE)
1313         {
1314
1315           if (dump_file && (dump_flags & TDF_DETAILS))
1316             fprintf (dump_file,
1317                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1318           return NULL;
1319         }
1320
1321     }
1322
1323   /* The induction variable name/version we want to put in the array is the
1324      result of the induction variable phi node.  */
1325   *ourinductionvar = PHI_RESULT (phi);
1326   access_fn = instantiate_parameters
1327     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1328   if (access_fn == chrec_dont_know)
1329     {
1330       if (dump_file && (dump_flags & TDF_DETAILS))
1331         fprintf (dump_file,
1332                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1333
1334       return NULL;
1335     }
1336
1337   step = evolution_part_in_loop_num (access_fn, loop->num);
1338   if (!step || step == chrec_dont_know)
1339     {
1340       if (dump_file && (dump_flags & TDF_DETAILS))
1341         fprintf (dump_file,
1342                  "Unable to convert loop: Cannot determine step of loop.\n");
1343
1344       return NULL;
1345     }
1346   if (TREE_CODE (step) != INTEGER_CST)
1347     {
1348
1349       if (dump_file && (dump_flags & TDF_DETAILS))
1350         fprintf (dump_file,
1351                  "Unable to convert loop: Step of loop is not integer.\n");
1352       return NULL;
1353     }
1354
1355   stepint = TREE_INT_CST_LOW (step);
1356
1357   /* Only want phis for induction vars, which will have two
1358      arguments.  */
1359   if (PHI_NUM_ARGS (phi) != 2)
1360     {
1361       if (dump_file && (dump_flags & TDF_DETAILS))
1362         fprintf (dump_file,
1363                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1364       return NULL;
1365     }
1366
1367   /* Another induction variable check. One argument's source should be
1368      in the loop, one outside the loop.  */
1369   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1370       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1371     {
1372
1373       if (dump_file && (dump_flags & TDF_DETAILS))
1374         fprintf (dump_file,
1375                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1376
1377       return NULL;
1378     }
1379
1380   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1381     {
1382       lboundvar = PHI_ARG_DEF (phi, 1);
1383       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1384                                               outerinductionvars, *invariants,
1385                                               0);
1386     }
1387   else
1388     {
1389       lboundvar = PHI_ARG_DEF (phi, 0);
1390       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1391                                               outerinductionvars, *invariants,
1392                                               0);
1393     }
1394
1395   if (!lbound)
1396     {
1397
1398       if (dump_file && (dump_flags & TDF_DETAILS))
1399         fprintf (dump_file,
1400                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1401
1402       return NULL;
1403     }
1404   /* One part of the test may be a loop invariant tree.  */
1405   VEC_reserve (tree, heap, *invariants, 1);
1406   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1407       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1408     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1409   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1410            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1411     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1412
1413   /* The non-induction variable part of the test is the upper bound variable.
1414    */
1415   if (TREE_OPERAND (test, 0) == inductionvar)
1416     uboundvar = TREE_OPERAND (test, 1);
1417   else
1418     uboundvar = TREE_OPERAND (test, 0);
1419
1420
1421   /* We only size the vectors assuming we have, at max, 2 times as many
1422      invariants as we do loops (one for each bound).
1423      This is just an arbitrary number, but it has to be matched against the
1424      code below.  */
1425   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1426
1427
1428   /* We might have some leftover.  */
1429   if (TREE_CODE (test) == LT_EXPR)
1430     extra = -1 * stepint;
1431   else if (TREE_CODE (test) == NE_EXPR)
1432     extra = -1 * stepint;
1433   else if (TREE_CODE (test) == GT_EXPR)
1434     extra = -1 * stepint;
1435   else if (TREE_CODE (test) == EQ_EXPR)
1436     extra = 1 * stepint;
1437
1438   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1439                                           outerinductionvars,
1440                                           *invariants, extra);
1441   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1442                         build_int_cst (TREE_TYPE (uboundvar), extra));
1443   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1444   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1445   VEC_safe_push (int, heap, *steps, stepint);
1446   if (!ubound)
1447     {
1448       if (dump_file && (dump_flags & TDF_DETAILS))
1449         fprintf (dump_file,
1450                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1451       return NULL;
1452     }
1453
1454   lloop = lambda_loop_new ();
1455   LL_STEP (lloop) = stepint;
1456   LL_LOWER_BOUND (lloop) = lbound;
1457   LL_UPPER_BOUND (lloop) = ubound;
1458   return lloop;
1459 }
1460
1461 /* Given a LOOP, find the induction variable it is testing against in the exit
1462    condition.  Return the induction variable if found, NULL otherwise.  */
1463
1464 static tree
1465 find_induction_var_from_exit_cond (struct loop *loop)
1466 {
1467   tree expr = get_loop_exit_condition (loop);
1468   tree ivarop;
1469   tree test;
1470   if (expr == NULL_TREE)
1471     return NULL_TREE;
1472   if (TREE_CODE (expr) != COND_EXPR)
1473     return NULL_TREE;
1474   test = TREE_OPERAND (expr, 0);
1475   if (!COMPARISON_CLASS_P (test))
1476     return NULL_TREE;
1477
1478   /* Find the side that is invariant in this loop. The ivar must be the other
1479      side.  */
1480
1481   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1482       ivarop = TREE_OPERAND (test, 1);
1483   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1484       ivarop = TREE_OPERAND (test, 0);
1485   else
1486     return NULL_TREE;
1487
1488   if (TREE_CODE (ivarop) != SSA_NAME)
1489     return NULL_TREE;
1490   return ivarop;
1491 }
1492
1493 DEF_VEC_P(lambda_loop);
1494 DEF_VEC_ALLOC_P(lambda_loop,heap);
1495
1496 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1497    Return the new loop nest.
1498    INDUCTIONVARS is a pointer to an array of induction variables for the
1499    loopnest that will be filled in during this process.
1500    INVARIANTS is a pointer to an array of invariants that will be filled in
1501    during this process.  */
1502
1503 lambda_loopnest
1504 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1505                                  struct loop * loop_nest,
1506                                  VEC(tree,heap) **inductionvars,
1507                                  VEC(tree,heap) **invariants,
1508                                  bool need_perfect_nest)
1509 {
1510   lambda_loopnest ret = NULL;
1511   struct loop *temp;
1512   int depth = 0;
1513   size_t i;
1514   VEC(lambda_loop,heap) *loops = NULL;
1515   VEC(tree,heap) *uboundvars = NULL;
1516   VEC(tree,heap) *lboundvars  = NULL;
1517   VEC(int,heap) *steps = NULL;
1518   lambda_loop newloop;
1519   tree inductionvar = NULL;
1520
1521   depth = depth_of_nest (loop_nest);
1522   temp = loop_nest;
1523   while (temp)
1524     {
1525       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1526                                          &inductionvar, *inductionvars,
1527                                          &lboundvars, &uboundvars,
1528                                          &steps);
1529       if (!newloop)
1530         return NULL;
1531       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1532       VEC_safe_push (lambda_loop, heap, loops, newloop);
1533       temp = temp->inner;
1534     }
1535   if (need_perfect_nest)
1536     {
1537       if (!perfect_nestify (currloops, loop_nest,
1538                             lboundvars, uboundvars, steps, *inductionvars))
1539         {
1540           if (dump_file)
1541             fprintf (dump_file,
1542                      "Not a perfect loop nest and couldn't convert to one.\n");
1543           goto fail;
1544         }
1545       else if (dump_file)
1546         fprintf (dump_file,
1547                  "Successfully converted loop nest to perfect loop nest.\n");
1548     }
1549   ret = lambda_loopnest_new (depth, 2 * depth);
1550   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1551     LN_LOOPS (ret)[i] = newloop;
1552  fail:
1553   VEC_free (lambda_loop, heap, loops);
1554   VEC_free (tree, heap, uboundvars);
1555   VEC_free (tree, heap, lboundvars);
1556   VEC_free (int, heap, steps);
1557
1558   return ret;
1559 }
1560
1561 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1562    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1563    inserted for us are stored.  INDUCTION_VARS is the array of induction
1564    variables for the loop this LBV is from.  TYPE is the tree type to use for
1565    the variables and trees involved.  */
1566
1567 static tree
1568 lbv_to_gcc_expression (lambda_body_vector lbv,
1569                        tree type, VEC(tree,heap) *induction_vars,
1570                        tree *stmts_to_insert)
1571 {
1572   tree stmts, stmt, resvar, name;
1573   tree iv;
1574   size_t i;
1575   tree_stmt_iterator tsi;
1576
1577   /* Create a statement list and a linear expression temporary.  */
1578   stmts = alloc_stmt_list ();
1579   resvar = create_tmp_var (type, "lbvtmp");
1580   add_referenced_tmp_var (resvar);
1581
1582   /* Start at 0.  */
1583   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1584   name = make_ssa_name (resvar, stmt);
1585   TREE_OPERAND (stmt, 0) = name;
1586   tsi = tsi_last (stmts);
1587   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1588
1589   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1590     {
1591       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1592         {
1593           tree newname;
1594           tree coeffmult;
1595
1596           /* newname = coefficient * induction_variable */
1597           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1598           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1599                         fold (build (MULT_EXPR, type, iv, coeffmult)));
1600
1601           newname = make_ssa_name (resvar, stmt);
1602           TREE_OPERAND (stmt, 0) = newname;
1603           fold_stmt (&stmt);
1604           tsi = tsi_last (stmts);
1605           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1606
1607           /* name = name + newname */
1608           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1609                         build (PLUS_EXPR, type, name, newname));
1610           name = make_ssa_name (resvar, stmt);
1611           TREE_OPERAND (stmt, 0) = name;
1612           fold_stmt (&stmt);
1613           tsi = tsi_last (stmts);
1614           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1615
1616         }
1617     }
1618
1619   /* Handle any denominator that occurs.  */
1620   if (LBV_DENOMINATOR (lbv) != 1)
1621     {
1622       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1623       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1624                     build (CEIL_DIV_EXPR, type, name, denominator));
1625       name = make_ssa_name (resvar, stmt);
1626       TREE_OPERAND (stmt, 0) = name;
1627       fold_stmt (&stmt);
1628       tsi = tsi_last (stmts);
1629       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1630     }
1631   *stmts_to_insert = stmts;
1632   return name;
1633 }
1634
1635 /* Convert a linear expression from coefficient and constant form to a
1636    gcc tree.
1637    Return the tree that represents the final value of the expression.
1638    LLE is the linear expression to convert.
1639    OFFSET is the linear offset to apply to the expression.
1640    TYPE is the tree type to use for the variables and math.
1641    INDUCTION_VARS is a vector of induction variables for the loops.
1642    INVARIANTS is a vector of the loop nest invariants.
1643    WRAP specifies what tree code to wrap the results in, if there is more than
1644    one (it is either MAX_EXPR, or MIN_EXPR).
1645    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1646    statements that need to be inserted for the linear expression.  */
1647
1648 static tree
1649 lle_to_gcc_expression (lambda_linear_expression lle,
1650                        lambda_linear_expression offset,
1651                        tree type,
1652                        VEC(tree,heap) *induction_vars,
1653                        VEC(tree,heap) *invariants,
1654                        enum tree_code wrap, tree *stmts_to_insert)
1655 {
1656   tree stmts, stmt, resvar, name;
1657   size_t i;
1658   tree_stmt_iterator tsi;
1659   tree iv, invar;
1660   VEC(tree,heap) *results = NULL;
1661
1662   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1663   name = NULL_TREE;
1664   /* Create a statement list and a linear expression temporary.  */
1665   stmts = alloc_stmt_list ();
1666   resvar = create_tmp_var (type, "lletmp");
1667   add_referenced_tmp_var (resvar);
1668
1669   /* Build up the linear expressions, and put the variable representing the
1670      result in the results array.  */
1671   for (; lle != NULL; lle = LLE_NEXT (lle))
1672     {
1673       /* Start at name = 0.  */
1674       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1675       name = make_ssa_name (resvar, stmt);
1676       TREE_OPERAND (stmt, 0) = name;
1677       fold_stmt (&stmt);
1678       tsi = tsi_last (stmts);
1679       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1680
1681       /* First do the induction variables.
1682          at the end, name = name + all the induction variables added
1683          together.  */
1684       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1685         {
1686           if (LLE_COEFFICIENTS (lle)[i] != 0)
1687             {
1688               tree newname;
1689               tree mult;
1690               tree coeff;
1691
1692               /* mult = induction variable * coefficient.  */
1693               if (LLE_COEFFICIENTS (lle)[i] == 1)
1694                 {
1695                   mult = VEC_index (tree, induction_vars, i);
1696                 }
1697               else
1698                 {
1699                   coeff = build_int_cst (type,
1700                                          LLE_COEFFICIENTS (lle)[i]);
1701                   mult = fold (build (MULT_EXPR, type, iv, coeff));
1702                 }
1703
1704               /* newname = mult */
1705               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1706               newname = make_ssa_name (resvar, stmt);
1707               TREE_OPERAND (stmt, 0) = newname;
1708               fold_stmt (&stmt);
1709               tsi = tsi_last (stmts);
1710               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1711
1712               /* name = name + newname */
1713               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1714                             build (PLUS_EXPR, type, name, newname));
1715               name = make_ssa_name (resvar, stmt);
1716               TREE_OPERAND (stmt, 0) = name;
1717               fold_stmt (&stmt);
1718               tsi = tsi_last (stmts);
1719               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1720             }
1721         }
1722
1723       /* Handle our invariants.
1724          At the end, we have name = name + result of adding all multiplied
1725          invariants.  */
1726       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1727         {
1728           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1729             {
1730               tree newname;
1731               tree mult;
1732               tree coeff;
1733               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1734               /* mult = invariant * coefficient  */
1735               if (invcoeff == 1)
1736                 {
1737                   mult = invar;
1738                 }
1739               else
1740                 {
1741                   coeff = build_int_cst (type, invcoeff);
1742                   mult = fold (build (MULT_EXPR, type, invar, coeff));
1743                 }
1744
1745               /* newname = mult */
1746               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1747               newname = make_ssa_name (resvar, stmt);
1748               TREE_OPERAND (stmt, 0) = newname;
1749               fold_stmt (&stmt);
1750               tsi = tsi_last (stmts);
1751               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1752
1753               /* name = name + newname */
1754               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1755                             build (PLUS_EXPR, type, name, newname));
1756               name = make_ssa_name (resvar, stmt);
1757               TREE_OPERAND (stmt, 0) = name;
1758               fold_stmt (&stmt);
1759               tsi = tsi_last (stmts);
1760               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1761             }
1762         }
1763
1764       /* Now handle the constant.
1765          name = name + constant.  */
1766       if (LLE_CONSTANT (lle) != 0)
1767         {
1768           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1769                         build (PLUS_EXPR, type, name,
1770                                build_int_cst (type, LLE_CONSTANT (lle))));
1771           name = make_ssa_name (resvar, stmt);
1772           TREE_OPERAND (stmt, 0) = name;
1773           fold_stmt (&stmt);
1774           tsi = tsi_last (stmts);
1775           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1776         }
1777
1778       /* Now handle the offset.
1779          name = name + linear offset.  */
1780       if (LLE_CONSTANT (offset) != 0)
1781         {
1782           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1783                         build (PLUS_EXPR, type, name,
1784                                build_int_cst (type, LLE_CONSTANT (offset))));
1785           name = make_ssa_name (resvar, stmt);
1786           TREE_OPERAND (stmt, 0) = name;
1787           fold_stmt (&stmt);
1788           tsi = tsi_last (stmts);
1789           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1790         }
1791
1792       /* Handle any denominator that occurs.  */
1793       if (LLE_DENOMINATOR (lle) != 1)
1794         {
1795           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1796           stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1797                         type, name, stmt);
1798           stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1799
1800           /* name = {ceil, floor}(name/denominator) */
1801           name = make_ssa_name (resvar, stmt);
1802           TREE_OPERAND (stmt, 0) = name;
1803           tsi = tsi_last (stmts);
1804           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1805         }
1806       VEC_safe_push (tree, heap, results, name);
1807     }
1808
1809   /* Again, out of laziness, we don't handle this case yet.  It's not
1810      hard, it just hasn't occurred.  */
1811   gcc_assert (VEC_length (tree, results) <= 2);
1812
1813   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1814   if (VEC_length (tree, results) > 1)
1815     {
1816       tree op1 = VEC_index (tree, results, 0);
1817       tree op2 = VEC_index (tree, results, 1);
1818       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1819                     build (wrap, type, op1, op2));
1820       name = make_ssa_name (resvar, stmt);
1821       TREE_OPERAND (stmt, 0) = name;
1822       tsi = tsi_last (stmts);
1823       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1824     }
1825
1826   VEC_free (tree, heap, results);
1827
1828   *stmts_to_insert = stmts;
1829   return name;
1830 }
1831
1832 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1833    it, back into gcc code.  This changes the
1834    loops, their induction variables, and their bodies, so that they
1835    match the transformed loopnest.
1836    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1837    loopnest.
1838    OLD_IVS is a vector of induction variables from the old loopnest.
1839    INVARIANTS is a vector of loop invariants from the old loopnest.
1840    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1841    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1842    NEW_LOOPNEST.  */
1843
1844 void
1845 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1846                                  VEC(tree,heap) *old_ivs,
1847                                  VEC(tree,heap) *invariants,
1848                                  lambda_loopnest new_loopnest,
1849                                  lambda_trans_matrix transform)
1850 {
1851   struct loop *temp;
1852   size_t i = 0;
1853   size_t depth = 0;
1854   VEC(tree,heap) *new_ivs = NULL;
1855   tree oldiv;
1856
1857   block_stmt_iterator bsi;
1858
1859   if (dump_file)
1860     {
1861       transform = lambda_trans_matrix_inverse (transform);
1862       fprintf (dump_file, "Inverse of transformation matrix:\n");
1863       print_lambda_trans_matrix (dump_file, transform);
1864     }
1865   depth = depth_of_nest (old_loopnest);
1866   temp = old_loopnest;
1867
1868   while (temp)
1869     {
1870       lambda_loop newloop;
1871       basic_block bb;
1872       edge exit;
1873       tree ivvar, ivvarinced, exitcond, stmts;
1874       enum tree_code testtype;
1875       tree newupperbound, newlowerbound;
1876       lambda_linear_expression offset;
1877       tree type;
1878       bool insert_after;
1879       tree inc_stmt;
1880
1881       oldiv = VEC_index (tree, old_ivs, i);
1882       type = TREE_TYPE (oldiv);
1883
1884       /* First, build the new induction variable temporary  */
1885
1886       ivvar = create_tmp_var (type, "lnivtmp");
1887       add_referenced_tmp_var (ivvar);
1888
1889       VEC_safe_push (tree, heap, new_ivs, ivvar);
1890
1891       newloop = LN_LOOPS (new_loopnest)[i];
1892
1893       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1894          cases for now.  */
1895       offset = LL_LINEAR_OFFSET (newloop);
1896
1897       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1898                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1899
1900       /* Now build the  new lower bounds, and insert the statements
1901          necessary to generate it on the loop preheader.  */
1902       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1903                                              LL_LINEAR_OFFSET (newloop),
1904                                              type,
1905                                              new_ivs,
1906                                              invariants, MAX_EXPR, &stmts);
1907       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1908       bsi_commit_edge_inserts ();
1909       /* Build the new upper bound and insert its statements in the
1910          basic block of the exit condition */
1911       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1912                                              LL_LINEAR_OFFSET (newloop),
1913                                              type,
1914                                              new_ivs,
1915                                              invariants, MIN_EXPR, &stmts);
1916       exit = temp->single_exit;
1917       exitcond = get_loop_exit_condition (temp);
1918       bb = bb_for_stmt (exitcond);
1919       bsi = bsi_start (bb);
1920       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1921
1922       /* Create the new iv.  */
1923
1924       standard_iv_increment_position (temp, &bsi, &insert_after);
1925       create_iv (newlowerbound,
1926                  build_int_cst (type, LL_STEP (newloop)),
1927                  ivvar, temp, &bsi, insert_after, &ivvar,
1928                  NULL);
1929
1930       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1931          dominate the block containing the exit condition.
1932          So we simply create our own incremented iv to use in the new exit
1933          test,  and let redundancy elimination sort it out.  */
1934       inc_stmt = build (PLUS_EXPR, type,
1935                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1936       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1937                         inc_stmt);
1938       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1939       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1940       bsi = bsi_for_stmt (exitcond);
1941       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1942
1943       /* Replace the exit condition with the new upper bound
1944          comparison.  */
1945
1946       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1947
1948       /* We want to build a conditional where true means exit the loop, and
1949          false means continue the loop.
1950          So swap the testtype if this isn't the way things are.*/
1951
1952       if (exit->flags & EDGE_FALSE_VALUE)
1953         testtype = swap_tree_comparison (testtype);
1954
1955       COND_EXPR_COND (exitcond) = build (testtype,
1956                                          boolean_type_node,
1957                                          newupperbound, ivvarinced);
1958       update_stmt (exitcond);
1959       VEC_replace (tree, new_ivs, i, ivvar);
1960
1961       i++;
1962       temp = temp->inner;
1963     }
1964
1965   /* Rewrite uses of the old ivs so that they are now specified in terms of
1966      the new ivs.  */
1967
1968   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1969     {
1970       imm_use_iterator imm_iter;
1971       use_operand_p imm_use;
1972       tree oldiv_def;
1973       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1974
1975       gcc_assert (TREE_CODE (oldiv_stmt) == PHI_NODE
1976                   || NUM_DEFS (STMT_DEF_OPS (oldiv_stmt)) == 1);
1977       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1978         oldiv_def = PHI_RESULT (oldiv_stmt);
1979       else
1980         oldiv_def = DEF_OP (STMT_DEF_OPS (oldiv_stmt), 0);
1981
1982       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1983         {
1984           tree stmt = USE_STMT (imm_use);
1985           use_operand_p use_p;
1986           ssa_op_iter iter;
1987           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1988           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1989             {
1990               if (USE_FROM_PTR (use_p) == oldiv)
1991                 {
1992                   tree newiv, stmts;
1993                   lambda_body_vector lbv, newlbv;
1994                   /* Compute the new expression for the induction
1995                      variable.  */
1996                   depth = VEC_length (tree, new_ivs);
1997                   lbv = lambda_body_vector_new (depth);
1998                   LBV_COEFFICIENTS (lbv)[i] = 1;
1999
2000                   newlbv = lambda_body_vector_compute_new (transform, lbv);
2001
2002                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2003                                                  new_ivs, &stmts);
2004                   bsi = bsi_for_stmt (stmt);
2005                   /* Insert the statements to build that
2006                      expression.  */
2007                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2008                   propagate_value (use_p, newiv);
2009                   update_stmt (stmt);
2010
2011                 }
2012             }
2013         }
2014     }
2015   VEC_free (tree, heap, new_ivs);
2016 }
2017
2018 /* Returns true when the vector V is lexicographically positive, in
2019    other words, when the first nonzero element is positive.  */
2020
2021 static bool
2022 lambda_vector_lexico_pos (lambda_vector v,
2023                           unsigned n)
2024 {
2025   unsigned i;
2026   for (i = 0; i < n; i++)
2027     {
2028       if (v[i] == 0)
2029         continue;
2030       if (v[i] < 0)
2031         return false;
2032       if (v[i] > 0)
2033         return true;
2034     }
2035   return true;
2036 }
2037
2038
2039 /* Return TRUE if this is not interesting statement from the perspective of
2040    determining if we have a perfect loop nest.  */
2041
2042 static bool
2043 not_interesting_stmt (tree stmt)
2044 {
2045   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2046      loop, we would have already failed the number of exits tests.  */
2047   if (TREE_CODE (stmt) == LABEL_EXPR
2048       || TREE_CODE (stmt) == GOTO_EXPR
2049       || TREE_CODE (stmt) == COND_EXPR)
2050     return true;
2051   return false;
2052 }
2053
2054 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2055
2056 static bool
2057 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2058 {
2059   int i;
2060   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2061     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2062       if (PHI_ARG_DEF (phi, i) == def)
2063         return true;
2064   return false;
2065 }
2066
2067 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2068
2069 static bool
2070 stmt_uses_phi_result (tree stmt, tree phi_result)
2071 {
2072   use_optype uses = STMT_USE_OPS (stmt);
2073
2074   /* This is conservatively true, because we only want SIMPLE bumpers
2075      of the form x +- constant for our pass.  */
2076   if (NUM_USES (uses) != 1)
2077     return false;
2078   if (USE_OP (uses, 0) == phi_result)
2079     return true;
2080
2081   return false;
2082 }
2083
2084 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2085    in-loop-edge in a phi node, and the operand it uses is the result of that
2086    phi node.
2087    I.E. i_29 = i_3 + 1
2088         i_3 = PHI (0, i_29);  */
2089
2090 static bool
2091 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2092 {
2093   tree use;
2094   tree def;
2095   def_optype defs = STMT_DEF_OPS (stmt);
2096   imm_use_iterator iter;
2097   use_operand_p use_p;
2098
2099   if (NUM_DEFS (defs) != 1)
2100     return false;
2101   def = DEF_OP (defs, 0);
2102   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2103     {
2104       use = USE_STMT (use_p);
2105       if (TREE_CODE (use) == PHI_NODE)
2106         {
2107           if (phi_loop_edge_uses_def (loop, use, def))
2108             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2109               return true;
2110         }
2111     }
2112   return false;
2113 }
2114
2115
2116 /* Return true if LOOP is a perfect loop nest.
2117    Perfect loop nests are those loop nests where all code occurs in the
2118    innermost loop body.
2119    If S is a program statement, then
2120
2121    i.e.
2122    DO I = 1, 20
2123        S1
2124        DO J = 1, 20
2125        ...
2126        END DO
2127    END DO
2128    is not a perfect loop nest because of S1.
2129
2130    DO I = 1, 20
2131       DO J = 1, 20
2132         S1
2133         ...
2134       END DO
2135    END DO
2136    is a perfect loop nest.
2137
2138    Since we don't have high level loops anymore, we basically have to walk our
2139    statements and ignore those that are there because the loop needs them (IE
2140    the induction variable increment, and jump back to the top of the loop).  */
2141
2142 bool
2143 perfect_nest_p (struct loop *loop)
2144 {
2145   basic_block *bbs;
2146   size_t i;
2147   tree exit_cond;
2148
2149   if (!loop->inner)
2150     return true;
2151   bbs = get_loop_body (loop);
2152   exit_cond = get_loop_exit_condition (loop);
2153   for (i = 0; i < loop->num_nodes; i++)
2154     {
2155       if (bbs[i]->loop_father == loop)
2156         {
2157           block_stmt_iterator bsi;
2158           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2159             {
2160               tree stmt = bsi_stmt (bsi);
2161               if (stmt == exit_cond
2162                   || not_interesting_stmt (stmt)
2163                   || stmt_is_bumper_for_loop (loop, stmt))
2164                 continue;
2165               free (bbs);
2166               return false;
2167             }
2168         }
2169     }
2170   free (bbs);
2171   /* See if the inner loops are perfectly nested as well.  */
2172   if (loop->inner)
2173     return perfect_nest_p (loop->inner);
2174   return true;
2175 }
2176
2177 /* Replace the USES of tree X in STMT with tree Y */
2178
2179 static void
2180 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2181 {
2182   use_optype uses = STMT_USE_OPS (stmt);
2183   size_t i;
2184   for (i = 0; i < NUM_USES (uses); i++)
2185     {
2186       if (USE_OP (uses, i) == x)
2187         SET_USE_OP (uses, i, y);
2188     }
2189 }
2190
2191 /* Return TRUE if STMT uses tree OP in it's uses.  */
2192
2193 static bool
2194 stmt_uses_op (tree stmt, tree op)
2195 {
2196   use_optype uses = STMT_USE_OPS (stmt);
2197   size_t i;
2198   for (i = 0; i < NUM_USES (uses); i++)
2199     {
2200       if (USE_OP (uses, i) == op)
2201         return true;
2202     }
2203   return false;
2204 }
2205
2206 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2207    one.  LOOPIVS is a vector of induction variables, one per loop.
2208    ATM, we only handle imperfect nests of depth 2, where all of the statements
2209    occur after the inner loop.  */
2210
2211 static bool
2212 can_convert_to_perfect_nest (struct loop *loop,
2213                              VEC(tree,heap) *loopivs)
2214 {
2215   basic_block *bbs;
2216   tree exit_condition, phi;
2217   size_t i;
2218   block_stmt_iterator bsi;
2219   basic_block exitdest;
2220
2221   /* Can't handle triply nested+ loops yet.  */
2222   if (!loop->inner || loop->inner->inner)
2223     return false;
2224
2225   /* We only handle moving the after-inner-body statements right now, so make
2226      sure all the statements we need to move are located in that position.  */
2227   bbs = get_loop_body (loop);
2228   exit_condition = get_loop_exit_condition (loop);
2229   for (i = 0; i < loop->num_nodes; i++)
2230     {
2231       if (bbs[i]->loop_father == loop)
2232         {
2233           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2234             {
2235               size_t j;
2236               tree stmt = bsi_stmt (bsi);
2237               tree iv;
2238
2239               if (stmt == exit_condition
2240                   || not_interesting_stmt (stmt)
2241                   || stmt_is_bumper_for_loop (loop, stmt))
2242                 continue;
2243               /* If the statement uses inner loop ivs, we == screwed.  */
2244               for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2245                 if (stmt_uses_op (stmt, iv))
2246                   goto fail;
2247
2248               /* If the bb of a statement we care about isn't dominated by
2249                  the header of the inner loop, then we are also screwed.  */
2250               if (!dominated_by_p (CDI_DOMINATORS,
2251                                    bb_for_stmt (stmt),
2252                                    loop->inner->header))
2253                 goto fail;
2254             }
2255         }
2256     }
2257
2258   /* We also need to make sure the loop exit only has simple copy phis in it,
2259      otherwise we don't know how to transform it into a perfect nest right
2260      now.  */
2261   exitdest = loop->single_exit->dest;
2262
2263   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2264     if (PHI_NUM_ARGS (phi) != 1)
2265       goto fail;
2266
2267   free (bbs);
2268   return true;
2269
2270  fail:
2271   free (bbs);
2272   return false;
2273 }
2274
2275 /* Transform the loop nest into a perfect nest, if possible.
2276    LOOPS is the current struct loops *
2277    LOOP is the loop nest to transform into a perfect nest
2278    LBOUNDS are the lower bounds for the loops to transform
2279    UBOUNDS are the upper bounds for the loops to transform
2280    STEPS is the STEPS for the loops to transform.
2281    LOOPIVS is the induction variables for the loops to transform.
2282
2283    Basically, for the case of
2284
2285    FOR (i = 0; i < 50; i++)
2286     {
2287      FOR (j =0; j < 50; j++)
2288      {
2289         <whatever>
2290      }
2291      <some code>
2292     }
2293
2294    This function will transform it into a perfect loop nest by splitting the
2295    outer loop into two loops, like so:
2296
2297    FOR (i = 0; i < 50; i++)
2298    {
2299      FOR (j = 0; j < 50; j++)
2300      {
2301          <whatever>
2302      }
2303    }
2304
2305    FOR (i = 0; i < 50; i ++)
2306    {
2307     <some code>
2308    }
2309
2310    Return FALSE if we can't make this loop into a perfect nest.  */
2311 static bool
2312 perfect_nestify (struct loops *loops,
2313                  struct loop *loop,
2314                  VEC(tree,heap) *lbounds,
2315                  VEC(tree,heap) *ubounds,
2316                  VEC(int,heap) *steps,
2317                  VEC(tree,heap) *loopivs)
2318 {
2319   basic_block *bbs;
2320   tree exit_condition;
2321   tree then_label, else_label, cond_stmt;
2322   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2323   size_t i;
2324   block_stmt_iterator bsi;
2325   bool insert_after;
2326   edge e;
2327   struct loop *newloop;
2328   tree phi;
2329   tree uboundvar;
2330   tree stmt;
2331   tree oldivvar, ivvar, ivvarinced;
2332   VEC(tree,heap) *phis = NULL;
2333
2334   if (!can_convert_to_perfect_nest (loop, loopivs))
2335     return false;
2336
2337   /* Create the new loop */
2338
2339   olddest = loop->single_exit->dest;
2340   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2341   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2342
2343   /* Push the exit phi nodes that we are moving.  */
2344   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2345     {
2346       VEC_reserve (tree, heap, phis, 2);
2347       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2348       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2349     }
2350   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2351
2352   /* Remove the exit phis from the old basic block.  Make sure to set
2353      PHI_RESULT to null so it doesn't get released.  */
2354   while (phi_nodes (olddest) != NULL)
2355     {
2356       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2357       remove_phi_node (phi_nodes (olddest), NULL);
2358     }
2359
2360   /* and add them back to the new basic block.  */
2361   while (VEC_length (tree, phis) != 0)
2362     {
2363       tree def;
2364       tree phiname;
2365       def = VEC_pop (tree, phis);
2366       phiname = VEC_pop (tree, phis);
2367       phi = create_phi_node (phiname, preheaderbb);
2368       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2369     }
2370   flush_pending_stmts (e);
2371   VEC_free (tree, heap, phis);
2372
2373   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2374   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2375   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2376   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2377   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2378   cond_stmt = build (COND_EXPR, void_type_node,
2379                      build (NE_EXPR, boolean_type_node,
2380                             integer_one_node,
2381                             integer_zero_node),
2382                      then_label, else_label);
2383   bsi = bsi_start (bodybb);
2384   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2385   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2386   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2387   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2388
2389   /* Update the loop structures.  */
2390   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2391   newloop->header = headerbb;
2392   newloop->latch = latchbb;
2393   newloop->single_exit = e;
2394   add_bb_to_loop (latchbb, newloop);
2395   add_bb_to_loop (bodybb, newloop);
2396   add_bb_to_loop (headerbb, newloop);
2397   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2398   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2399   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2400                            loop->single_exit->src);
2401   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2402   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2403   /* Create the new iv.  */
2404   ivvar = create_tmp_var (integer_type_node, "perfectiv");
2405   add_referenced_tmp_var (ivvar);
2406   standard_iv_increment_position (newloop, &bsi, &insert_after);
2407   create_iv (VEC_index (tree, lbounds, 0),
2408              build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2409              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2410
2411   /* Create the new upper bound.  This may be not just a variable, so we copy
2412      it to one just in case.  */
2413
2414   exit_condition = get_loop_exit_condition (newloop);
2415   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2416   add_referenced_tmp_var (uboundvar);
2417   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2418                 VEC_index (tree, ubounds, 0));
2419   uboundvar = make_ssa_name (uboundvar, stmt);
2420   TREE_OPERAND (stmt, 0) = uboundvar;
2421
2422   if (insert_after)
2423     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2424   else
2425     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2426
2427   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2428                                            boolean_type_node,
2429                                            uboundvar,
2430                                            ivvarinced);
2431
2432   bbs = get_loop_body (loop);
2433   /* Now replace the induction variable in the moved statements with the
2434      correct loop induction variable.  */
2435   oldivvar = VEC_index (tree, loopivs, 0);
2436   for (i = 0; i < loop->num_nodes; i++)
2437     {
2438       block_stmt_iterator tobsi = bsi_last (bodybb);
2439       if (bbs[i]->loop_father == loop)
2440         {
2441           /* Note that the bsi only needs to be explicitly incremented
2442              when we don't move something, since it is automatically
2443              incremented when we do.  */
2444           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2445             {
2446               ssa_op_iter i;
2447               tree n, stmt = bsi_stmt (bsi);
2448
2449               if (stmt == exit_condition
2450                   || not_interesting_stmt (stmt)
2451                   || stmt_is_bumper_for_loop (loop, stmt))
2452                 {
2453                   bsi_next (&bsi);
2454                   continue;
2455                 }
2456
2457               replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2458               bsi_move_before (&bsi, &tobsi);
2459
2460               /* If the statement has any virtual operands, they may
2461                  need to be rewired because the original loop may
2462                  still reference them.  */
2463               FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2464                 mark_sym_for_renaming (SSA_NAME_VAR (n));
2465             }
2466         }
2467     }
2468
2469   free (bbs);
2470   return perfect_nest_p (loop);
2471 }
2472
2473 /* Return true if TRANS is a legal transformation matrix that respects
2474    the dependence vectors in DISTS and DIRS.  The conservative answer
2475    is false.
2476
2477    "Wolfe proves that a unimodular transformation represented by the
2478    matrix T is legal when applied to a loop nest with a set of
2479    lexicographically non-negative distance vectors RDG if and only if
2480    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2481    i.e.: if and only if it transforms the lexicographically positive
2482    distance vectors to lexicographically positive vectors.  Note that
2483    a unimodular matrix must transform the zero vector (and only it) to
2484    the zero vector." S.Muchnick.  */
2485
2486 bool
2487 lambda_transform_legal_p (lambda_trans_matrix trans,
2488                           int nb_loops,
2489                           varray_type dependence_relations)
2490 {
2491   unsigned int i;
2492   lambda_vector distres;
2493   struct data_dependence_relation *ddr;
2494
2495   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2496               && LTM_ROWSIZE (trans) == nb_loops);
2497
2498   /* When there is an unknown relation in the dependence_relations, we
2499      know that it is no worth looking at this loop nest: give up.  */
2500   ddr = (struct data_dependence_relation *)
2501     VARRAY_GENERIC_PTR (dependence_relations, 0);
2502   if (ddr == NULL)
2503     return true;
2504   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2505     return false;
2506
2507   distres = lambda_vector_new (nb_loops);
2508
2509   /* For each distance vector in the dependence graph.  */
2510   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2511     {
2512       ddr = (struct data_dependence_relation *)
2513         VARRAY_GENERIC_PTR (dependence_relations, i);
2514
2515       /* Don't care about relations for which we know that there is no
2516          dependence, nor about read-read (aka. output-dependences):
2517          these data accesses can happen in any order.  */
2518       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2519           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2520         continue;
2521
2522       /* Conservatively answer: "this transformation is not valid".  */
2523       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2524         return false;
2525
2526       /* If the dependence could not be captured by a distance vector,
2527          conservatively answer that the transform is not valid.  */
2528       if (DDR_DIST_VECT (ddr) == NULL)
2529         return false;
2530
2531       /* Compute trans.dist_vect */
2532       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2533                                  DDR_DIST_VECT (ddr), distres);
2534
2535       if (!lambda_vector_lexico_pos (distres, nb_loops))
2536         return false;
2537     }
2538   return true;
2539 }