gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* This pass walks a given loop structure searching for array
  23    references.  The information about the array accesses is recorded
  24    in DATA_REFERENCE structures.
  25
  26    The basic test for determining the dependences is:
  27    given two access functions chrec1 and chrec2 to a same array, and
  28    x and y two vectors from the iteration domain, the same element of
  29    the array is accessed twice at iterations x and y if and only if:
  30    |             chrec1 (x) == chrec2 (y).
  31
  32    The goals of this analysis are:
  33
  34    - to determine the independence: the relation between two
  35      independent accesses is qualified with the chrec_known (this
  36      information allows a loop parallelization),
  37
  38    - when two data references access the same data, to qualify the
  39      dependence relation with classic dependence representations:
  40
  41        - distance vectors
  42        - direction vectors
  43        - loop carried level dependence
  44        - polyhedron dependence
  45      or with the chains of recurrences based representation,
  46
  47    - to define a knowledge base for storing the data dependence
  48      information,
  49
  50    - to define an interface to access this data.
  51
  52
  53    Definitions:
  54
  55    - subscript: given two array accesses a subscript is the tuple
  56    composed of the access functions for a given dimension.  Example:
  57    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  58    (f1, g1), (f2, g2), (f3, g3).
  59
  60    - Diophantine equation: an equation whose coefficients and
  61    solutions are integer constants, for example the equation
  62    |   3*x + 2*y = 1
  63    has an integer solution x = 1 and y = -1.
  64
  65    References:
  66
  67    - "Advanced Compilation for High Performance Computing" by Randy
  68    Allen and Ken Kennedy.
  69    http://citeseer.ist.psu.edu/goff91practical.html
  70
  71    - "Loop Transformations for Restructuring Compilers - The Foundations"
  72    by Utpal Banerjee.
  73
  74
  75 */
  76
  77 #include "config.h"
  78 #include "system.h"
  79 #include "coretypes.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-flow.h"
  82 #include "cfgloop.h"
  83 #include "tree-data-ref.h"
  84 #include "tree-scalar-evolution.h"
  85 #include "tree-pass.h"
  86 #include "langhooks.h"
  87 #include "tree-affine.h"
  88
  89 static struct datadep_stats
  90 {
  91   int num_dependence_tests;
  92   int num_dependence_dependent;
  93   int num_dependence_independent;
  94   int num_dependence_undetermined;
  95
  96   int num_subscript_tests;
  97   int num_subscript_undetermined;
  98   int num_same_subscript_function;
  99
 100   int num_ziv;
 101   int num_ziv_independent;
 102   int num_ziv_dependent;
 103   int num_ziv_unimplemented;
 104
 105   int num_siv;
 106   int num_siv_independent;
 107   int num_siv_dependent;
 108   int num_siv_unimplemented;
 109
 110   int num_miv;
 111   int num_miv_independent;
 112   int num_miv_dependent;
 113   int num_miv_unimplemented;
 114 } dependence_stats;
 115
 116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 117                                            struct data_reference *,
 118                                            struct data_reference *,
 119                                            struct loop *);
 120 /* Returns true iff A divides B.  */
 121
 122 static inline bool
 123 tree_fold_divides_p (const_tree a, const_tree b)
 124 {
 125   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 126   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 127   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 128 }
 129
 130 /* Returns true iff A divides B.  */
 131
 132 static inline bool
 133 int_divides_p (int a, int b)
 134 {
 135   return ((b % a) == 0);
 136 }
 137
 138 \f
 139
 140 /* Dump into FILE all the data references from DATAREFS.  */
 141
 142 void
 143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
 144 {
 145   unsigned int i;
 146   struct data_reference *dr;
 147
 148   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
 149     dump_data_reference (file, dr);
 150 }
 151
 152 /* Dump into STDERR all the data references from DATAREFS.  */
 153
 154 DEBUG_FUNCTION void
 155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
 156 {
 157   dump_data_references (stderr, datarefs);
 158 }
 159
 160 /* Dump to STDERR all the dependence relations from DDRS.  */
 161
 162 DEBUG_FUNCTION void
 163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
 164 {
 165   dump_data_dependence_relations (stderr, ddrs);
 166 }
 167
 168 /* Dump into FILE all the dependence relations from DDRS.  */
 169
 170 void
 171 dump_data_dependence_relations (FILE *file,
 172                                 VEC (ddr_p, heap) *ddrs)
 173 {
 174   unsigned int i;
 175   struct data_dependence_relation *ddr;
 176
 177   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 178     dump_data_dependence_relation (file, ddr);
 179 }
 180
 181 /* Print to STDERR the data_reference DR.  */
 182
 183 DEBUG_FUNCTION void
 184 debug_data_reference (struct data_reference *dr)
 185 {
 186   dump_data_reference (stderr, dr);
 187 }
 188
 189 /* Dump function for a DATA_REFERENCE structure.  */
 190
 191 void
 192 dump_data_reference (FILE *outf,
 193                      struct data_reference *dr)
 194 {
 195   unsigned int i;
 196
 197   fprintf (outf, "#(Data Ref: \n");
 198   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 199   fprintf (outf, "#  stmt: ");
 200   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
 201   fprintf (outf, "#  ref: ");
 202   print_generic_stmt (outf, DR_REF (dr), 0);
 203   fprintf (outf, "#  base_object: ");
 204   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 205
 206   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 207     {
 208       fprintf (outf, "#  Access function %d: ", i);
 209       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 210     }
 211   fprintf (outf, "#)\n");
 212 }
 213
 214 /* Dumps the affine function described by FN to the file OUTF.  */
 215
 216 static void
 217 dump_affine_function (FILE *outf, affine_fn fn)
 218 {
 219   unsigned i;
 220   tree coef;
 221
 222   print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
 223   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
 224     {
 225       fprintf (outf, " + ");
 226       print_generic_expr (outf, coef, TDF_SLIM);
 227       fprintf (outf, " * x_%u", i);
 228     }
 229 }
 230
 231 /* Dumps the conflict function CF to the file OUTF.  */
 232
 233 static void
 234 dump_conflict_function (FILE *outf, conflict_function *cf)
 235 {
 236   unsigned i;
 237
 238   if (cf->n == NO_DEPENDENCE)
 239     fprintf (outf, "no dependence\n");
 240   else if (cf->n == NOT_KNOWN)
 241     fprintf (outf, "not known\n");
 242   else
 243     {
 244       for (i = 0; i < cf->n; i++)
 245         {
 246           fprintf (outf, "[");
 247           dump_affine_function (outf, cf->fns[i]);
 248           fprintf (outf, "]\n");
 249         }
 250     }
 251 }
 252
 253 /* Dump function for a SUBSCRIPT structure.  */
 254
 255 void
 256 dump_subscript (FILE *outf, struct subscript *subscript)
 257 {
 258   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 259
 260   fprintf (outf, "\n (subscript \n");
 261   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 262   dump_conflict_function (outf, cf);
 263   if (CF_NONTRIVIAL_P (cf))
 264     {
 265       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 266       fprintf (outf, "  last_conflict: ");
 267       print_generic_stmt (outf, last_iteration, 0);
 268     }
 269
 270   cf = SUB_CONFLICTS_IN_B (subscript);
 271   fprintf (outf, "  iterations_that_access_an_element_twice_in_B: ");
 272   dump_conflict_function (outf, cf);
 273   if (CF_NONTRIVIAL_P (cf))
 274     {
 275       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 276       fprintf (outf, "  last_conflict: ");
 277       print_generic_stmt (outf, last_iteration, 0);
 278     }
 279
 280   fprintf (outf, "  (Subscript distance: ");
 281   print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
 282   fprintf (outf, "  )\n");
 283   fprintf (outf, " )\n");
 284 }
 285
 286 /* Print the classic direction vector DIRV to OUTF.  */
 287
 288 void
 289 print_direction_vector (FILE *outf,
 290                         lambda_vector dirv,
 291                         int length)
 292 {
 293   int eq;
 294
 295   for (eq = 0; eq < length; eq++)
 296     {
 297       enum data_dependence_direction dir = ((enum data_dependence_direction)
 298                                             dirv[eq]);
 299
 300       switch (dir)
 301         {
 302         case dir_positive:
 303           fprintf (outf, "    +");
 304           break;
 305         case dir_negative:
 306           fprintf (outf, "    -");
 307           break;
 308         case dir_equal:
 309           fprintf (outf, "    =");
 310           break;
 311         case dir_positive_or_equal:
 312           fprintf (outf, "   +=");
 313           break;
 314         case dir_positive_or_negative:
 315           fprintf (outf, "   +-");
 316           break;
 317         case dir_negative_or_equal:
 318           fprintf (outf, "   -=");
 319           break;
 320         case dir_star:
 321           fprintf (outf, "    *");
 322           break;
 323         default:
 324           fprintf (outf, "indep");
 325           break;
 326         }
 327     }
 328   fprintf (outf, "\n");
 329 }
 330
 331 /* Print a vector of direction vectors.  */
 332
 333 void
 334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
 335                    int length)
 336 {
 337   unsigned j;
 338   lambda_vector v;
 339
 340   FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
 341     print_direction_vector (outf, v, length);
 342 }
 343
 344 /* Print out a vector VEC of length N to OUTFILE.  */
 345
 346 static inline void
 347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 348 {
 349   int i;
 350
 351   for (i = 0; i < n; i++)
 352     fprintf (outfile, "%3d ", vector[i]);
 353   fprintf (outfile, "\n");
 354 }
 355
 356 /* Print a vector of distance vectors.  */
 357
 358 void
 359 print_dist_vectors  (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
 360                      int length)
 361 {
 362   unsigned j;
 363   lambda_vector v;
 364
 365   FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
 366     print_lambda_vector (outf, v, length);
 367 }
 368
 369 /* Debug version.  */
 370
 371 DEBUG_FUNCTION void
 372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 373 {
 374   dump_data_dependence_relation (stderr, ddr);
 375 }
 376
 377 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 378
 379 void
 380 dump_data_dependence_relation (FILE *outf,
 381                                struct data_dependence_relation *ddr)
 382 {
 383   struct data_reference *dra, *drb;
 384
 385   fprintf (outf, "(Data Dep: \n");
 386
 387   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 388     {
 389       if (ddr)
 390         {
 391           dra = DDR_A (ddr);
 392           drb = DDR_B (ddr);
 393           if (dra)
 394             dump_data_reference (outf, dra);
 395           else
 396             fprintf (outf, "    (nil)\n");
 397           if (drb)
 398             dump_data_reference (outf, drb);
 399           else
 400             fprintf (outf, "    (nil)\n");
 401         }
 402       fprintf (outf, "    (don't know)\n)\n");
 403       return;
 404     }
 405
 406   dra = DDR_A (ddr);
 407   drb = DDR_B (ddr);
 408   dump_data_reference (outf, dra);
 409   dump_data_reference (outf, drb);
 410
 411   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 412     fprintf (outf, "    (no dependence)\n");
 413
 414   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 415     {
 416       unsigned int i;
 417       struct loop *loopi;
 418
 419       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 420         {
 421           fprintf (outf, "  access_fn_A: ");
 422           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 423           fprintf (outf, "  access_fn_B: ");
 424           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 425           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 426         }
 427
 428       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 429       fprintf (outf, "  loop nest: (");
 430       FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
 431         fprintf (outf, "%d ", loopi->num);
 432       fprintf (outf, ")\n");
 433
 434       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 435         {
 436           fprintf (outf, "  distance_vector: ");
 437           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 438                                DDR_NB_LOOPS (ddr));
 439         }
 440
 441       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 442         {
 443           fprintf (outf, "  direction_vector: ");
 444           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 445                                   DDR_NB_LOOPS (ddr));
 446         }
 447     }
 448
 449   fprintf (outf, ")\n");
 450 }
 451
 452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure.  */
 453
 454 void
 455 dump_data_dependence_direction (FILE *file,
 456                                 enum data_dependence_direction dir)
 457 {
 458   switch (dir)
 459     {
 460     case dir_positive:
 461       fprintf (file, "+");
 462       break;
 463
 464     case dir_negative:
 465       fprintf (file, "-");
 466       break;
 467
 468     case dir_equal:
 469       fprintf (file, "=");
 470       break;
 471
 472     case dir_positive_or_negative:
 473       fprintf (file, "+-");
 474       break;
 475
 476     case dir_positive_or_equal:
 477       fprintf (file, "+=");
 478       break;
 479
 480     case dir_negative_or_equal:
 481       fprintf (file, "-=");
 482       break;
 483
 484     case dir_star:
 485       fprintf (file, "*");
 486       break;
 487
 488     default:
 489       break;
 490     }
 491 }
 492
 493 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 494    the dependence relations, and VECT_SIZE is the size of the
 495    dependence vectors, or in other words the number of loops in the
 496    considered nest.  */
 497
 498 void
 499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
 500 {
 501   unsigned int i, j;
 502   struct data_dependence_relation *ddr;
 503   lambda_vector v;
 504
 505   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 506     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 507       {
 508         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
 509           {
 510             fprintf (file, "DISTANCE_V (");
 511             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 512             fprintf (file, ")\n");
 513           }
 514
 515         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
 516           {
 517             fprintf (file, "DIRECTION_V (");
 518             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 519             fprintf (file, ")\n");
 520           }
 521       }
 522
 523   fprintf (file, "\n\n");
 524 }
 525
 526 /* Dumps the data dependence relations DDRS in FILE.  */
 527
 528 void
 529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
 530 {
 531   unsigned int i;
 532   struct data_dependence_relation *ddr;
 533
 534   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 535     dump_data_dependence_relation (file, ddr);
 536
 537   fprintf (file, "\n\n");
 538 }
 539
 540 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 541    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 542    constant of type ssizetype, and returns true.  If we cannot do this
 543    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 544    is returned.  */
 545
 546 static bool
 547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 548                          tree *var, tree *off)
 549 {
 550   tree var0, var1;
 551   tree off0, off1;
 552   enum tree_code ocode = code;
 553
 554   *var = NULL_TREE;
 555   *off = NULL_TREE;
 556
 557   switch (code)
 558     {
 559     case INTEGER_CST:
 560       *var = build_int_cst (type, 0);
 561       *off = fold_convert (ssizetype, op0);
 562       return true;
 563
 564     case POINTER_PLUS_EXPR:
 565       ocode = PLUS_EXPR;
 566       /* FALLTHROUGH */
 567     case PLUS_EXPR:
 568     case MINUS_EXPR:
 569       split_constant_offset (op0, &var0, &off0);
 570       split_constant_offset (op1, &var1, &off1);
 571       *var = fold_build2 (code, type, var0, var1);
 572       *off = size_binop (ocode, off0, off1);
 573       return true;
 574
 575     case MULT_EXPR:
 576       if (TREE_CODE (op1) != INTEGER_CST)
 577         return false;
 578
 579       split_constant_offset (op0, &var0, &off0);
 580       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 581       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 582       return true;
 583
 584     case ADDR_EXPR:
 585       {
 586         tree base, poffset;
 587         HOST_WIDE_INT pbitsize, pbitpos;
 588         enum machine_mode pmode;
 589         int punsignedp, pvolatilep;
 590
 591         op0 = TREE_OPERAND (op0, 0);
 592         base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
 593                                     &pmode, &punsignedp, &pvolatilep, false);
 594
 595         if (pbitpos % BITS_PER_UNIT != 0)
 596           return false;
 597         base = build_fold_addr_expr (base);
 598         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 599
 600         if (poffset)
 601           {
 602             split_constant_offset (poffset, &poffset, &off1);
 603             off0 = size_binop (PLUS_EXPR, off0, off1);
 604             if (POINTER_TYPE_P (TREE_TYPE (base)))
 605               base = fold_build_pointer_plus (base, poffset);
 606             else
 607               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 608                                   fold_convert (TREE_TYPE (base), poffset));
 609           }
 610
 611         var0 = fold_convert (type, base);
 612
 613         /* If variable length types are involved, punt, otherwise casts
 614            might be converted into ARRAY_REFs in gimplify_conversion.
 615            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 616            possibly no longer appears in current GIMPLE, might resurface.
 617            This perhaps could run
 618            if (CONVERT_EXPR_P (var0))
 619              {
 620                gimplify_conversion (&var0);
 621                // Attempt to fill in any within var0 found ARRAY_REF's
 622                // element size from corresponding op embedded ARRAY_REF,
 623                // if unsuccessful, just punt.
 624              }  */
 625         while (POINTER_TYPE_P (type))
 626           type = TREE_TYPE (type);
 627         if (int_size_in_bytes (type) < 0)
 628           return false;
 629
 630         *var = var0;
 631         *off = off0;
 632         return true;
 633       }
 634
 635     case SSA_NAME:
 636       {
 637         gimple def_stmt = SSA_NAME_DEF_STMT (op0);
 638         enum tree_code subcode;
 639
 640         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 641           return false;
 642
 643         var0 = gimple_assign_rhs1 (def_stmt);
 644         subcode = gimple_assign_rhs_code (def_stmt);
 645         var1 = gimple_assign_rhs2 (def_stmt);
 646
 647         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 648       }
 649     CASE_CONVERT:
 650       {
 651         /* We must not introduce undefined overflow, and we must not change the value.
 652            Hence we're okay if the inner type doesn't overflow to start with
 653            (pointer or signed), the outer type also is an integer or pointer
 654            and the outer precision is at least as large as the inner.  */
 655         tree itype = TREE_TYPE (op0);
 656         if ((POINTER_TYPE_P (itype)
 657              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 658             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 659             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 660           {
 661             split_constant_offset (op0, &var0, off);
 662             *var = fold_convert (type, var0);
 663             return true;
 664           }
 665         return false;
 666       }
 667
 668     default:
 669       return false;
 670     }
 671 }
 672
 673 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 674    will be ssizetype.  */
 675
 676 void
 677 split_constant_offset (tree exp, tree *var, tree *off)
 678 {
 679   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 680   enum tree_code code;
 681
 682   *var = exp;
 683   *off = ssize_int (0);
 684   STRIP_NOPS (exp);
 685
 686   if (tree_is_chrec (exp)
 687       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 688     return;
 689
 690   otype = TREE_TYPE (exp);
 691   code = TREE_CODE (exp);
 692   extract_ops_from_tree (exp, &code, &op0, &op1);
 693   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 694     {
 695       *var = fold_convert (type, e);
 696       *off = o;
 697     }
 698 }
 699
 700 /* Returns the address ADDR of an object in a canonical shape (without nop
 701    casts, and with type of pointer to the object).  */
 702
 703 static tree
 704 canonicalize_base_object_address (tree addr)
 705 {
 706   tree orig = addr;
 707
 708   STRIP_NOPS (addr);
 709
 710   /* The base address may be obtained by casting from integer, in that case
 711      keep the cast.  */
 712   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 713     return orig;
 714
 715   if (TREE_CODE (addr) != ADDR_EXPR)
 716     return addr;
 717
 718   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 719 }
 720
 721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 722    basic block that contains it.  Returns true if analysis succeed or false
 723    otherwise.  */
 724
 725 bool
 726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 727 {
 728   gimple stmt = DR_STMT (dr);
 729   struct loop *loop = loop_containing_stmt (stmt);
 730   tree ref = DR_REF (dr);
 731   HOST_WIDE_INT pbitsize, pbitpos;
 732   tree base, poffset;
 733   enum machine_mode pmode;
 734   int punsignedp, pvolatilep;
 735   affine_iv base_iv, offset_iv;
 736   tree init, dinit, step;
 737   bool in_loop = (loop && loop->num);
 738
 739   if (dump_file && (dump_flags & TDF_DETAILS))
 740     fprintf (dump_file, "analyze_innermost: ");
 741
 742   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 743                               &pmode, &punsignedp, &pvolatilep, false);
 744   gcc_assert (base != NULL_TREE);
 745
 746   if (pbitpos % BITS_PER_UNIT != 0)
 747     {
 748       if (dump_file && (dump_flags & TDF_DETAILS))
 749         fprintf (dump_file, "failed: bit offset alignment.\n");
 750       return false;
 751     }
 752
 753   if (TREE_CODE (base) == MEM_REF)
 754     {
 755       if (!integer_zerop (TREE_OPERAND (base, 1)))
 756         {
 757           if (!poffset)
 758             {
 759               double_int moff = mem_ref_offset (base);
 760               poffset = double_int_to_tree (sizetype, moff);
 761             }
 762           else
 763             poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
 764         }
 765       base = TREE_OPERAND (base, 0);
 766     }
 767   else
 768     base = build_fold_addr_expr (base);
 769
 770   if (in_loop)
 771     {
 772       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 773                       false))
 774         {
 775           if (nest)
 776             {
 777               if (dump_file && (dump_flags & TDF_DETAILS))
 778                 fprintf (dump_file, "failed: evolution of base is not"
 779                                     " affine.\n");
 780               return false;
 781             }
 782           else
 783             {
 784               base_iv.base = base;
 785               base_iv.step = ssize_int (0);
 786               base_iv.no_overflow = true;
 787             }
 788         }
 789     }
 790   else
 791     {
 792       base_iv.base = base;
 793       base_iv.step = ssize_int (0);
 794       base_iv.no_overflow = true;
 795     }
 796
 797   if (!poffset)
 798     {
 799       offset_iv.base = ssize_int (0);
 800       offset_iv.step = ssize_int (0);
 801     }
 802   else
 803     {
 804       if (!in_loop)
 805         {
 806           offset_iv.base = poffset;
 807           offset_iv.step = ssize_int (0);
 808         }
 809       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 810                            poffset, &offset_iv, false))
 811         {
 812           if (nest)
 813             {
 814               if (dump_file && (dump_flags & TDF_DETAILS))
 815                 fprintf (dump_file, "failed: evolution of offset is not"
 816                                     " affine.\n");
 817               return false;
 818             }
 819           else
 820             {
 821               offset_iv.base = poffset;
 822               offset_iv.step = ssize_int (0);
 823             }
 824         }
 825     }
 826
 827   init = ssize_int (pbitpos / BITS_PER_UNIT);
 828   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 829   init =  size_binop (PLUS_EXPR, init, dinit);
 830   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 831   init =  size_binop (PLUS_EXPR, init, dinit);
 832
 833   step = size_binop (PLUS_EXPR,
 834                      fold_convert (ssizetype, base_iv.step),
 835                      fold_convert (ssizetype, offset_iv.step));
 836
 837   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 838
 839   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 840   DR_INIT (dr) = init;
 841   DR_STEP (dr) = step;
 842
 843   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 844
 845   if (dump_file && (dump_flags & TDF_DETAILS))
 846     fprintf (dump_file, "success.\n");
 847
 848   return true;
 849 }
 850
 851 /* Determines the base object and the list of indices of memory reference
 852    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 853
 854 static void
 855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 856 {
 857   VEC (tree, heap) *access_fns = NULL;
 858   tree ref, aref, op;
 859   tree base, off, access_fn;
 860   basic_block before_loop;
 861
 862   /* If analyzing a basic-block there are no indices to analyze
 863      and thus no access functions.  */
 864   if (!nest)
 865     {
 866       DR_BASE_OBJECT (dr) = DR_REF (dr);
 867       DR_ACCESS_FNS (dr) = NULL;
 868       return;
 869     }
 870
 871   ref = unshare_expr (DR_REF (dr));
 872   before_loop = block_before_loop (nest);
 873
 874   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 875      into a two element array with a constant index.  The base is
 876      then just the immediate underlying object.  */
 877   if (TREE_CODE (ref) == REALPART_EXPR)
 878     {
 879       ref = TREE_OPERAND (ref, 0);
 880       VEC_safe_push (tree, heap, access_fns, integer_zero_node);
 881     }
 882   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 883     {
 884       ref = TREE_OPERAND (ref, 0);
 885       VEC_safe_push (tree, heap, access_fns, integer_one_node);
 886     }
 887
 888   /* Analyze access functions of dimensions we know to be independent.  */
 889   aref = ref;
 890   while (handled_component_p (aref))
 891     {
 892       if (TREE_CODE (aref) == ARRAY_REF)
 893         {
 894           op = TREE_OPERAND (aref, 1);
 895           access_fn = analyze_scalar_evolution (loop, op);
 896           access_fn = instantiate_scev (before_loop, loop, access_fn);
 897           VEC_safe_push (tree, heap, access_fns, access_fn);
 898           /* For ARRAY_REFs the base is the reference with the index replaced
 899              by zero if we can not strip it as the outermost component.  */
 900           if (aref == ref)
 901             ref = TREE_OPERAND (ref, 0);
 902           else
 903             TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
 904         }
 905
 906       aref = TREE_OPERAND (aref, 0);
 907     }
 908
 909   /* If the address operand of a MEM_REF base has an evolution in the
 910      analyzed nest, add it as an additional independent access-function.  */
 911   if (TREE_CODE (aref) == MEM_REF)
 912     {
 913       op = TREE_OPERAND (aref, 0);
 914       access_fn = analyze_scalar_evolution (loop, op);
 915       access_fn = instantiate_scev (before_loop, loop, access_fn);
 916       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 917         {
 918           base = initial_condition (access_fn);
 919           split_constant_offset (base, &base, &off);
 920           /* Fold the MEM_REF offset into the evolutions initial
 921              value to make more bases comparable.  */
 922           if (!integer_zerop (TREE_OPERAND (aref, 1)))
 923             {
 924               off = size_binop (PLUS_EXPR, off,
 925                                 fold_convert (ssizetype,
 926                                               TREE_OPERAND (aref, 1)));
 927               TREE_OPERAND (aref, 1)
 928                 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
 929             }
 930           access_fn = chrec_replace_initial_condition
 931               (access_fn, fold_convert (TREE_TYPE (base), off));
 932           TREE_OPERAND (aref, 0) = base;
 933           VEC_safe_push (tree, heap, access_fns, access_fn);
 934         }
 935     }
 936
 937   DR_BASE_OBJECT (dr) = ref;
 938   DR_ACCESS_FNS (dr) = access_fns;
 939 }
 940
 941 /* Extracts the alias analysis information from the memory reference DR.  */
 942
 943 static void
 944 dr_analyze_alias (struct data_reference *dr)
 945 {
 946   tree ref = DR_REF (dr);
 947   tree base = get_base_address (ref), addr;
 948
 949   if (INDIRECT_REF_P (base)
 950       || TREE_CODE (base) == MEM_REF)
 951     {
 952       addr = TREE_OPERAND (base, 0);
 953       if (TREE_CODE (addr) == SSA_NAME)
 954         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
 955     }
 956 }
 957
 958 /* Frees data reference DR.  */
 959
 960 void
 961 free_data_ref (data_reference_p dr)
 962 {
 963   VEC_free (tree, heap, DR_ACCESS_FNS (dr));
 964   free (dr);
 965 }
 966
 967 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
 968    is read if IS_READ is true, write otherwise.  Returns the
 969    data_reference description of MEMREF.  NEST is the outermost loop
 970    in which the reference should be instantiated, LOOP is the loop in
 971    which the data reference should be analyzed.  */
 972
 973 struct data_reference *
 974 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
 975                  bool is_read)
 976 {
 977   struct data_reference *dr;
 978
 979   if (dump_file && (dump_flags & TDF_DETAILS))
 980     {
 981       fprintf (dump_file, "Creating dr for ");
 982       print_generic_expr (dump_file, memref, TDF_SLIM);
 983       fprintf (dump_file, "\n");
 984     }
 985
 986   dr = XCNEW (struct data_reference);
 987   DR_STMT (dr) = stmt;
 988   DR_REF (dr) = memref;
 989   DR_IS_READ (dr) = is_read;
 990
 991   dr_analyze_innermost (dr, nest);
 992   dr_analyze_indices (dr, nest, loop);
 993   dr_analyze_alias (dr);
 994
 995   if (dump_file && (dump_flags & TDF_DETAILS))
 996     {
 997       unsigned i;
 998       fprintf (dump_file, "\tbase_address: ");
 999       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1000       fprintf (dump_file, "\n\toffset from base address: ");
1001       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1002       fprintf (dump_file, "\n\tconstant offset from base address: ");
1003       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1004       fprintf (dump_file, "\n\tstep: ");
1005       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1006       fprintf (dump_file, "\n\taligned to: ");
1007       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1008       fprintf (dump_file, "\n\tbase_object: ");
1009       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1010       fprintf (dump_file, "\n");
1011       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1012         {
1013           fprintf (dump_file, "\tAccess function %d: ", i);
1014           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1015         }
1016     }
1017
1018   return dr;
1019 }
1020
1021 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1022    expressions.  */
1023 static bool
1024 dr_equal_offsets_p1 (tree offset1, tree offset2)
1025 {
1026   bool res;
1027
1028   STRIP_NOPS (offset1);
1029   STRIP_NOPS (offset2);
1030
1031   if (offset1 == offset2)
1032     return true;
1033
1034   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1035       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1036     return false;
1037
1038   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1039                              TREE_OPERAND (offset2, 0));
1040
1041   if (!res || !BINARY_CLASS_P (offset1))
1042     return res;
1043
1044   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1045                              TREE_OPERAND (offset2, 1));
1046
1047   return res;
1048 }
1049
1050 /* Check if DRA and DRB have equal offsets.  */
1051 bool
1052 dr_equal_offsets_p (struct data_reference *dra,
1053                     struct data_reference *drb)
1054 {
1055   tree offset1, offset2;
1056
1057   offset1 = DR_OFFSET (dra);
1058   offset2 = DR_OFFSET (drb);
1059
1060   return dr_equal_offsets_p1 (offset1, offset2);
1061 }
1062
1063 /* Returns true if FNA == FNB.  */
1064
1065 static bool
1066 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1067 {
1068   unsigned i, n = VEC_length (tree, fna);
1069
1070   if (n != VEC_length (tree, fnb))
1071     return false;
1072
1073   for (i = 0; i < n; i++)
1074     if (!operand_equal_p (VEC_index (tree, fna, i),
1075                           VEC_index (tree, fnb, i), 0))
1076       return false;
1077
1078   return true;
1079 }
1080
1081 /* If all the functions in CF are the same, returns one of them,
1082    otherwise returns NULL.  */
1083
1084 static affine_fn
1085 common_affine_function (conflict_function *cf)
1086 {
1087   unsigned i;
1088   affine_fn comm;
1089
1090   if (!CF_NONTRIVIAL_P (cf))
1091     return NULL;
1092
1093   comm = cf->fns[0];
1094
1095   for (i = 1; i < cf->n; i++)
1096     if (!affine_function_equal_p (comm, cf->fns[i]))
1097       return NULL;
1098
1099   return comm;
1100 }
1101
1102 /* Returns the base of the affine function FN.  */
1103
1104 static tree
1105 affine_function_base (affine_fn fn)
1106 {
1107   return VEC_index (tree, fn, 0);
1108 }
1109
1110 /* Returns true if FN is a constant.  */
1111
1112 static bool
1113 affine_function_constant_p (affine_fn fn)
1114 {
1115   unsigned i;
1116   tree coef;
1117
1118   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1119     if (!integer_zerop (coef))
1120       return false;
1121
1122   return true;
1123 }
1124
1125 /* Returns true if FN is the zero constant function.  */
1126
1127 static bool
1128 affine_function_zero_p (affine_fn fn)
1129 {
1130   return (integer_zerop (affine_function_base (fn))
1131           && affine_function_constant_p (fn));
1132 }
1133
1134 /* Returns a signed integer type with the largest precision from TA
1135    and TB.  */
1136
1137 static tree
1138 signed_type_for_types (tree ta, tree tb)
1139 {
1140   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1141     return signed_type_for (ta);
1142   else
1143     return signed_type_for (tb);
1144 }
1145
1146 /* Applies operation OP on affine functions FNA and FNB, and returns the
1147    result.  */
1148
1149 static affine_fn
1150 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1151 {
1152   unsigned i, n, m;
1153   affine_fn ret;
1154   tree coef;
1155
1156   if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1157     {
1158       n = VEC_length (tree, fna);
1159       m = VEC_length (tree, fnb);
1160     }
1161   else
1162     {
1163       n = VEC_length (tree, fnb);
1164       m = VEC_length (tree, fna);
1165     }
1166
1167   ret = VEC_alloc (tree, heap, m);
1168   for (i = 0; i < n; i++)
1169     {
1170       tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1171                                          TREE_TYPE (VEC_index (tree, fnb, i)));
1172
1173       VEC_quick_push (tree, ret,
1174                       fold_build2 (op, type,
1175                                    VEC_index (tree, fna, i),
1176                                    VEC_index (tree, fnb, i)));
1177     }
1178
1179   for (; VEC_iterate (tree, fna, i, coef); i++)
1180     VEC_quick_push (tree, ret,
1181                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1182                                  coef, integer_zero_node));
1183   for (; VEC_iterate (tree, fnb, i, coef); i++)
1184     VEC_quick_push (tree, ret,
1185                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1186                                  integer_zero_node, coef));
1187
1188   return ret;
1189 }
1190
1191 /* Returns the sum of affine functions FNA and FNB.  */
1192
1193 static affine_fn
1194 affine_fn_plus (affine_fn fna, affine_fn fnb)
1195 {
1196   return affine_fn_op (PLUS_EXPR, fna, fnb);
1197 }
1198
1199 /* Returns the difference of affine functions FNA and FNB.  */
1200
1201 static affine_fn
1202 affine_fn_minus (affine_fn fna, affine_fn fnb)
1203 {
1204   return affine_fn_op (MINUS_EXPR, fna, fnb);
1205 }
1206
1207 /* Frees affine function FN.  */
1208
1209 static void
1210 affine_fn_free (affine_fn fn)
1211 {
1212   VEC_free (tree, heap, fn);
1213 }
1214
1215 /* Determine for each subscript in the data dependence relation DDR
1216    the distance.  */
1217
1218 static void
1219 compute_subscript_distance (struct data_dependence_relation *ddr)
1220 {
1221   conflict_function *cf_a, *cf_b;
1222   affine_fn fn_a, fn_b, diff;
1223
1224   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1225     {
1226       unsigned int i;
1227
1228       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1229         {
1230           struct subscript *subscript;
1231
1232           subscript = DDR_SUBSCRIPT (ddr, i);
1233           cf_a = SUB_CONFLICTS_IN_A (subscript);
1234           cf_b = SUB_CONFLICTS_IN_B (subscript);
1235
1236           fn_a = common_affine_function (cf_a);
1237           fn_b = common_affine_function (cf_b);
1238           if (!fn_a || !fn_b)
1239             {
1240               SUB_DISTANCE (subscript) = chrec_dont_know;
1241               return;
1242             }
1243           diff = affine_fn_minus (fn_a, fn_b);
1244
1245           if (affine_function_constant_p (diff))
1246             SUB_DISTANCE (subscript) = affine_function_base (diff);
1247           else
1248             SUB_DISTANCE (subscript) = chrec_dont_know;
1249
1250           affine_fn_free (diff);
1251         }
1252     }
1253 }
1254
1255 /* Returns the conflict function for "unknown".  */
1256
1257 static conflict_function *
1258 conflict_fn_not_known (void)
1259 {
1260   conflict_function *fn = XCNEW (conflict_function);
1261   fn->n = NOT_KNOWN;
1262
1263   return fn;
1264 }
1265
1266 /* Returns the conflict function for "independent".  */
1267
1268 static conflict_function *
1269 conflict_fn_no_dependence (void)
1270 {
1271   conflict_function *fn = XCNEW (conflict_function);
1272   fn->n = NO_DEPENDENCE;
1273
1274   return fn;
1275 }
1276
1277 /* Returns true if the address of OBJ is invariant in LOOP.  */
1278
1279 static bool
1280 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1281 {
1282   while (handled_component_p (obj))
1283     {
1284       if (TREE_CODE (obj) == ARRAY_REF)
1285         {
1286           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1287              need to check the stride and the lower bound of the reference.  */
1288           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1289                                                       loop->num)
1290               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1291                                                          loop->num))
1292             return false;
1293         }
1294       else if (TREE_CODE (obj) == COMPONENT_REF)
1295         {
1296           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297                                                       loop->num))
1298             return false;
1299         }
1300       obj = TREE_OPERAND (obj, 0);
1301     }
1302
1303   if (!INDIRECT_REF_P (obj)
1304       && TREE_CODE (obj) != MEM_REF)
1305     return true;
1306
1307   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1308                                                   loop->num);
1309 }
1310
1311 /* Returns false if we can prove that data references A and B do not alias,
1312    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1313    considered.  */
1314
1315 bool
1316 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1317                 bool loop_nest)
1318 {
1319   tree addr_a = DR_BASE_OBJECT (a);
1320   tree addr_b = DR_BASE_OBJECT (b);
1321
1322   /* If we are not processing a loop nest but scalar code we
1323      do not need to care about possible cross-iteration dependences
1324      and thus can process the full original reference.  Do so,
1325      similar to how loop invariant motion applies extra offset-based
1326      disambiguation.  */
1327   if (!loop_nest)
1328     {
1329       aff_tree off1, off2;
1330       double_int size1, size2;
1331       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1332       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1333       aff_combination_scale (&off1, double_int_minus_one);
1334       aff_combination_add (&off2, &off1);
1335       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1336         return false;
1337     }
1338
1339   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1340     return refs_output_dependent_p (addr_a, addr_b);
1341   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1342     return refs_anti_dependent_p (addr_a, addr_b);
1343   return refs_may_alias_p (addr_a, addr_b);
1344 }
1345
1346 static void compute_self_dependence (struct data_dependence_relation *);
1347
1348 /* Initialize a data dependence relation between data accesses A and
1349    B.  NB_LOOPS is the number of loops surrounding the references: the
1350    size of the classic distance/direction vectors.  */
1351
1352 static struct data_dependence_relation *
1353 initialize_data_dependence_relation (struct data_reference *a,
1354                                      struct data_reference *b,
1355                                      VEC (loop_p, heap) *loop_nest)
1356 {
1357   struct data_dependence_relation *res;
1358   unsigned int i;
1359
1360   res = XNEW (struct data_dependence_relation);
1361   DDR_A (res) = a;
1362   DDR_B (res) = b;
1363   DDR_LOOP_NEST (res) = NULL;
1364   DDR_REVERSED_P (res) = false;
1365   DDR_SUBSCRIPTS (res) = NULL;
1366   DDR_DIR_VECTS (res) = NULL;
1367   DDR_DIST_VECTS (res) = NULL;
1368
1369   if (a == NULL || b == NULL)
1370     {
1371       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1372       return res;
1373     }
1374
1375   /* If the data references do not alias, then they are independent.  */
1376   if (!dr_may_alias_p (a, b, loop_nest != NULL))
1377     {
1378       DDR_ARE_DEPENDENT (res) = chrec_known;
1379       return res;
1380     }
1381
1382   /* When the references are exactly the same, don't spend time doing
1383      the data dependence tests, just initialize the ddr and return.  */
1384   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1385     {
1386       DDR_AFFINE_P (res) = true;
1387       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1388       DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1389       DDR_LOOP_NEST (res) = loop_nest;
1390       DDR_INNER_LOOP (res) = 0;
1391       DDR_SELF_REFERENCE (res) = true;
1392       compute_self_dependence (res);
1393       return res;
1394     }
1395
1396   /* If the references do not access the same object, we do not know
1397      whether they alias or not.  */
1398   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1399     {
1400       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1401       return res;
1402     }
1403
1404   /* If the base of the object is not invariant in the loop nest, we cannot
1405      analyze it.  TODO -- in fact, it would suffice to record that there may
1406      be arbitrary dependences in the loops where the base object varies.  */
1407   if (loop_nest
1408       && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1409                                               DR_BASE_OBJECT (a)))
1410     {
1411       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1412       return res;
1413     }
1414
1415   /* If the number of dimensions of the access to not agree we can have
1416      a pointer access to a component of the array element type and an
1417      array access while the base-objects are still the same.  Punt.  */
1418   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1419     {
1420       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1421       return res;
1422     }
1423
1424   DDR_AFFINE_P (res) = true;
1425   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1426   DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1427   DDR_LOOP_NEST (res) = loop_nest;
1428   DDR_INNER_LOOP (res) = 0;
1429   DDR_SELF_REFERENCE (res) = false;
1430
1431   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1432     {
1433       struct subscript *subscript;
1434
1435       subscript = XNEW (struct subscript);
1436       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1437       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1438       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1439       SUB_DISTANCE (subscript) = chrec_dont_know;
1440       VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1441     }
1442
1443   return res;
1444 }
1445
1446 /* Frees memory used by the conflict function F.  */
1447
1448 static void
1449 free_conflict_function (conflict_function *f)
1450 {
1451   unsigned i;
1452
1453   if (CF_NONTRIVIAL_P (f))
1454     {
1455       for (i = 0; i < f->n; i++)
1456         affine_fn_free (f->fns[i]);
1457     }
1458   free (f);
1459 }
1460
1461 /* Frees memory used by SUBSCRIPTS.  */
1462
1463 static void
1464 free_subscripts (VEC (subscript_p, heap) *subscripts)
1465 {
1466   unsigned i;
1467   subscript_p s;
1468
1469   FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1470     {
1471       free_conflict_function (s->conflicting_iterations_in_a);
1472       free_conflict_function (s->conflicting_iterations_in_b);
1473       free (s);
1474     }
1475   VEC_free (subscript_p, heap, subscripts);
1476 }
1477
1478 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1479    description.  */
1480
1481 static inline void
1482 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1483                         tree chrec)
1484 {
1485   if (dump_file && (dump_flags & TDF_DETAILS))
1486     {
1487       fprintf (dump_file, "(dependence classified: ");
1488       print_generic_expr (dump_file, chrec, 0);
1489       fprintf (dump_file, ")\n");
1490     }
1491
1492   DDR_ARE_DEPENDENT (ddr) = chrec;
1493   free_subscripts (DDR_SUBSCRIPTS (ddr));
1494   DDR_SUBSCRIPTS (ddr) = NULL;
1495 }
1496
1497 /* The dependence relation DDR cannot be represented by a distance
1498    vector.  */
1499
1500 static inline void
1501 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1502 {
1503   if (dump_file && (dump_flags & TDF_DETAILS))
1504     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1505
1506   DDR_AFFINE_P (ddr) = false;
1507 }
1508
1509 \f
1510
1511 /* This section contains the classic Banerjee tests.  */
1512
1513 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1514    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1515
1516 static inline bool
1517 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1518 {
1519   return (evolution_function_is_constant_p (chrec_a)
1520           && evolution_function_is_constant_p (chrec_b));
1521 }
1522
1523 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1524    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1525
1526 static bool
1527 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1528 {
1529   if ((evolution_function_is_constant_p (chrec_a)
1530        && evolution_function_is_univariate_p (chrec_b))
1531       || (evolution_function_is_constant_p (chrec_b)
1532           && evolution_function_is_univariate_p (chrec_a)))
1533     return true;
1534
1535   if (evolution_function_is_univariate_p (chrec_a)
1536       && evolution_function_is_univariate_p (chrec_b))
1537     {
1538       switch (TREE_CODE (chrec_a))
1539         {
1540         case POLYNOMIAL_CHREC:
1541           switch (TREE_CODE (chrec_b))
1542             {
1543             case POLYNOMIAL_CHREC:
1544               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1545                 return false;
1546
1547             default:
1548               return true;
1549             }
1550
1551         default:
1552           return true;
1553         }
1554     }
1555
1556   return false;
1557 }
1558
1559 /* Creates a conflict function with N dimensions.  The affine functions
1560    in each dimension follow.  */
1561
1562 static conflict_function *
1563 conflict_fn (unsigned n, ...)
1564 {
1565   unsigned i;
1566   conflict_function *ret = XCNEW (conflict_function);
1567   va_list ap;
1568
1569   gcc_assert (0 < n && n <= MAX_DIM);
1570   va_start(ap, n);
1571
1572   ret->n = n;
1573   for (i = 0; i < n; i++)
1574     ret->fns[i] = va_arg (ap, affine_fn);
1575   va_end(ap);
1576
1577   return ret;
1578 }
1579
1580 /* Returns constant affine function with value CST.  */
1581
1582 static affine_fn
1583 affine_fn_cst (tree cst)
1584 {
1585   affine_fn fn = VEC_alloc (tree, heap, 1);
1586   VEC_quick_push (tree, fn, cst);
1587   return fn;
1588 }
1589
1590 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1591
1592 static affine_fn
1593 affine_fn_univar (tree cst, unsigned dim, tree coef)
1594 {
1595   affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1596   unsigned i;
1597
1598   gcc_assert (dim > 0);
1599   VEC_quick_push (tree, fn, cst);
1600   for (i = 1; i < dim; i++)
1601     VEC_quick_push (tree, fn, integer_zero_node);
1602   VEC_quick_push (tree, fn, coef);
1603   return fn;
1604 }
1605
1606 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1607    *OVERLAPS_B are initialized to the functions that describe the
1608    relation between the elements accessed twice by CHREC_A and
1609    CHREC_B.  For k >= 0, the following property is verified:
1610
1611    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1612
1613 static void
1614 analyze_ziv_subscript (tree chrec_a,
1615                        tree chrec_b,
1616                        conflict_function **overlaps_a,
1617                        conflict_function **overlaps_b,
1618                        tree *last_conflicts)
1619 {
1620   tree type, difference;
1621   dependence_stats.num_ziv++;
1622
1623   if (dump_file && (dump_flags & TDF_DETAILS))
1624     fprintf (dump_file, "(analyze_ziv_subscript \n");
1625
1626   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1627   chrec_a = chrec_convert (type, chrec_a, NULL);
1628   chrec_b = chrec_convert (type, chrec_b, NULL);
1629   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1630
1631   switch (TREE_CODE (difference))
1632     {
1633     case INTEGER_CST:
1634       if (integer_zerop (difference))
1635         {
1636           /* The difference is equal to zero: the accessed index
1637              overlaps for each iteration in the loop.  */
1638           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1639           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1640           *last_conflicts = chrec_dont_know;
1641           dependence_stats.num_ziv_dependent++;
1642         }
1643       else
1644         {
1645           /* The accesses do not overlap.  */
1646           *overlaps_a = conflict_fn_no_dependence ();
1647           *overlaps_b = conflict_fn_no_dependence ();
1648           *last_conflicts = integer_zero_node;
1649           dependence_stats.num_ziv_independent++;
1650         }
1651       break;
1652
1653     default:
1654       /* We're not sure whether the indexes overlap.  For the moment,
1655          conservatively answer "don't know".  */
1656       if (dump_file && (dump_flags & TDF_DETAILS))
1657         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1658
1659       *overlaps_a = conflict_fn_not_known ();
1660       *overlaps_b = conflict_fn_not_known ();
1661       *last_conflicts = chrec_dont_know;
1662       dependence_stats.num_ziv_unimplemented++;
1663       break;
1664     }
1665
1666   if (dump_file && (dump_flags & TDF_DETAILS))
1667     fprintf (dump_file, ")\n");
1668 }
1669
1670 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1671    and only if it fits to the int type.  If this is not the case, or the
1672    bound  on the number of iterations of LOOP could not be derived, returns
1673    chrec_dont_know.  */
1674
1675 static tree
1676 max_stmt_executions_tree (struct loop *loop)
1677 {
1678   double_int nit;
1679
1680   if (!max_stmt_executions (loop, true, &nit))
1681     return chrec_dont_know;
1682
1683   if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1684     return chrec_dont_know;
1685
1686   return double_int_to_tree (unsigned_type_node, nit);
1687 }
1688
1689 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1690    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1691    *OVERLAPS_B are initialized to the functions that describe the
1692    relation between the elements accessed twice by CHREC_A and
1693    CHREC_B.  For k >= 0, the following property is verified:
1694
1695    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1696
1697 static void
1698 analyze_siv_subscript_cst_affine (tree chrec_a,
1699                                   tree chrec_b,
1700                                   conflict_function **overlaps_a,
1701                                   conflict_function **overlaps_b,
1702                                   tree *last_conflicts)
1703 {
1704   bool value0, value1, value2;
1705   tree type, difference, tmp;
1706
1707   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1708   chrec_a = chrec_convert (type, chrec_a, NULL);
1709   chrec_b = chrec_convert (type, chrec_b, NULL);
1710   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1711
1712   if (!chrec_is_positive (initial_condition (difference), &value0))
1713     {
1714       if (dump_file && (dump_flags & TDF_DETAILS))
1715         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1716
1717       dependence_stats.num_siv_unimplemented++;
1718       *overlaps_a = conflict_fn_not_known ();
1719       *overlaps_b = conflict_fn_not_known ();
1720       *last_conflicts = chrec_dont_know;
1721       return;
1722     }
1723   else
1724     {
1725       if (value0 == false)
1726         {
1727           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1728             {
1729               if (dump_file && (dump_flags & TDF_DETAILS))
1730                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1731
1732               *overlaps_a = conflict_fn_not_known ();
1733               *overlaps_b = conflict_fn_not_known ();
1734               *last_conflicts = chrec_dont_know;
1735               dependence_stats.num_siv_unimplemented++;
1736               return;
1737             }
1738           else
1739             {
1740               if (value1 == true)
1741                 {
1742                   /* Example:
1743                      chrec_a = 12
1744                      chrec_b = {10, +, 1}
1745                   */
1746
1747                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1748                     {
1749                       HOST_WIDE_INT numiter;
1750                       struct loop *loop = get_chrec_loop (chrec_b);
1751
1752                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1753                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1754                                          fold_build1 (ABS_EXPR, type, difference),
1755                                          CHREC_RIGHT (chrec_b));
1756                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1757                       *last_conflicts = integer_one_node;
1758
1759
1760                       /* Perform weak-zero siv test to see if overlap is
1761                          outside the loop bounds.  */
1762                       numiter = max_stmt_executions_int (loop, true);
1763
1764                       if (numiter >= 0
1765                           && compare_tree_int (tmp, numiter) > 0)
1766                         {
1767                           free_conflict_function (*overlaps_a);
1768                           free_conflict_function (*overlaps_b);
1769                           *overlaps_a = conflict_fn_no_dependence ();
1770                           *overlaps_b = conflict_fn_no_dependence ();
1771                           *last_conflicts = integer_zero_node;
1772                           dependence_stats.num_siv_independent++;
1773                           return;
1774                         }
1775                       dependence_stats.num_siv_dependent++;
1776                       return;
1777                     }
1778
1779                   /* When the step does not divide the difference, there are
1780                      no overlaps.  */
1781                   else
1782                     {
1783                       *overlaps_a = conflict_fn_no_dependence ();
1784                       *overlaps_b = conflict_fn_no_dependence ();
1785                       *last_conflicts = integer_zero_node;
1786                       dependence_stats.num_siv_independent++;
1787                       return;
1788                     }
1789                 }
1790
1791               else
1792                 {
1793                   /* Example:
1794                      chrec_a = 12
1795                      chrec_b = {10, +, -1}
1796
1797                      In this case, chrec_a will not overlap with chrec_b.  */
1798                   *overlaps_a = conflict_fn_no_dependence ();
1799                   *overlaps_b = conflict_fn_no_dependence ();
1800                   *last_conflicts = integer_zero_node;
1801                   dependence_stats.num_siv_independent++;
1802                   return;
1803                 }
1804             }
1805         }
1806       else
1807         {
1808           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1809             {
1810               if (dump_file && (dump_flags & TDF_DETAILS))
1811                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1812
1813               *overlaps_a = conflict_fn_not_known ();
1814               *overlaps_b = conflict_fn_not_known ();
1815               *last_conflicts = chrec_dont_know;
1816               dependence_stats.num_siv_unimplemented++;
1817               return;
1818             }
1819           else
1820             {
1821               if (value2 == false)
1822                 {
1823                   /* Example:
1824                      chrec_a = 3
1825                      chrec_b = {10, +, -1}
1826                   */
1827                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1828                     {
1829                       HOST_WIDE_INT numiter;
1830                       struct loop *loop = get_chrec_loop (chrec_b);
1831
1832                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1833                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1834                                          CHREC_RIGHT (chrec_b));
1835                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1836                       *last_conflicts = integer_one_node;
1837
1838                       /* Perform weak-zero siv test to see if overlap is
1839                          outside the loop bounds.  */
1840                       numiter = max_stmt_executions_int (loop, true);
1841
1842                       if (numiter >= 0
1843                           && compare_tree_int (tmp, numiter) > 0)
1844                         {
1845                           free_conflict_function (*overlaps_a);
1846                           free_conflict_function (*overlaps_b);
1847                           *overlaps_a = conflict_fn_no_dependence ();
1848                           *overlaps_b = conflict_fn_no_dependence ();
1849                           *last_conflicts = integer_zero_node;
1850                           dependence_stats.num_siv_independent++;
1851                           return;
1852                         }
1853                       dependence_stats.num_siv_dependent++;
1854                       return;
1855                     }
1856
1857                   /* When the step does not divide the difference, there
1858                      are no overlaps.  */
1859                   else
1860                     {
1861                       *overlaps_a = conflict_fn_no_dependence ();
1862                       *overlaps_b = conflict_fn_no_dependence ();
1863                       *last_conflicts = integer_zero_node;
1864                       dependence_stats.num_siv_independent++;
1865                       return;
1866                     }
1867                 }
1868               else
1869                 {
1870                   /* Example:
1871                      chrec_a = 3
1872                      chrec_b = {4, +, 1}
1873
1874                      In this case, chrec_a will not overlap with chrec_b.  */
1875                   *overlaps_a = conflict_fn_no_dependence ();
1876                   *overlaps_b = conflict_fn_no_dependence ();
1877                   *last_conflicts = integer_zero_node;
1878                   dependence_stats.num_siv_independent++;
1879                   return;
1880                 }
1881             }
1882         }
1883     }
1884 }
1885
1886 /* Helper recursive function for initializing the matrix A.  Returns
1887    the initial value of CHREC.  */
1888
1889 static tree
1890 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1891 {
1892   gcc_assert (chrec);
1893
1894   switch (TREE_CODE (chrec))
1895     {
1896     case POLYNOMIAL_CHREC:
1897       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1898
1899       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1900       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1901
1902     case PLUS_EXPR:
1903     case MULT_EXPR:
1904     case MINUS_EXPR:
1905       {
1906         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1907         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1908
1909         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1910       }
1911
1912     case NOP_EXPR:
1913       {
1914         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1915         return chrec_convert (chrec_type (chrec), op, NULL);
1916       }
1917
1918     case BIT_NOT_EXPR:
1919       {
1920         /* Handle ~X as -1 - X.  */
1921         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1922         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1923                               build_int_cst (TREE_TYPE (chrec), -1), op);
1924       }
1925
1926     case INTEGER_CST:
1927       return chrec;
1928
1929     default:
1930       gcc_unreachable ();
1931       return NULL_TREE;
1932     }
1933 }
1934
1935 #define FLOOR_DIV(x,y) ((x) / (y))
1936
1937 /* Solves the special case of the Diophantine equation:
1938    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1939
1940    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
1941    number of iterations that loops X and Y run.  The overlaps will be
1942    constructed as evolutions in dimension DIM.  */
1943
1944 static void
1945 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1946                                          affine_fn *overlaps_a,
1947                                          affine_fn *overlaps_b,
1948                                          tree *last_conflicts, int dim)
1949 {
1950   if (((step_a > 0 && step_b > 0)
1951        || (step_a < 0 && step_b < 0)))
1952     {
1953       int step_overlaps_a, step_overlaps_b;
1954       int gcd_steps_a_b, last_conflict, tau2;
1955
1956       gcd_steps_a_b = gcd (step_a, step_b);
1957       step_overlaps_a = step_b / gcd_steps_a_b;
1958       step_overlaps_b = step_a / gcd_steps_a_b;
1959
1960       if (niter > 0)
1961         {
1962           tau2 = FLOOR_DIV (niter, step_overlaps_a);
1963           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1964           last_conflict = tau2;
1965           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1966         }
1967       else
1968         *last_conflicts = chrec_dont_know;
1969
1970       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1971                                       build_int_cst (NULL_TREE,
1972                                                      step_overlaps_a));
1973       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1974                                       build_int_cst (NULL_TREE,
1975                                                      step_overlaps_b));
1976     }
1977
1978   else
1979     {
1980       *overlaps_a = affine_fn_cst (integer_zero_node);
1981       *overlaps_b = affine_fn_cst (integer_zero_node);
1982       *last_conflicts = integer_zero_node;
1983     }
1984 }
1985
1986 /* Solves the special case of a Diophantine equation where CHREC_A is
1987    an affine bivariate function, and CHREC_B is an affine univariate
1988    function.  For example,
1989
1990    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1991
1992    has the following overlapping functions:
1993
1994    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1995    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1996    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1997
1998    FORNOW: This is a specialized implementation for a case occurring in
1999    a common benchmark.  Implement the general algorithm.  */
2000
2001 static void
2002 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2003                                       conflict_function **overlaps_a,
2004                                       conflict_function **overlaps_b,
2005                                       tree *last_conflicts)
2006 {
2007   bool xz_p, yz_p, xyz_p;
2008   int step_x, step_y, step_z;
2009   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2010   affine_fn overlaps_a_xz, overlaps_b_xz;
2011   affine_fn overlaps_a_yz, overlaps_b_yz;
2012   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2013   affine_fn ova1, ova2, ovb;
2014   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2015
2016   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2017   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2018   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2019
2020   niter_x =
2021     max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2022   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2023   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2024
2025   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2026     {
2027       if (dump_file && (dump_flags & TDF_DETAILS))
2028         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2029
2030       *overlaps_a = conflict_fn_not_known ();
2031       *overlaps_b = conflict_fn_not_known ();
2032       *last_conflicts = chrec_dont_know;
2033       return;
2034     }
2035
2036   niter = MIN (niter_x, niter_z);
2037   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2038                                            &overlaps_a_xz,
2039                                            &overlaps_b_xz,
2040                                            &last_conflicts_xz, 1);
2041   niter = MIN (niter_y, niter_z);
2042   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2043                                            &overlaps_a_yz,
2044                                            &overlaps_b_yz,
2045                                            &last_conflicts_yz, 2);
2046   niter = MIN (niter_x, niter_z);
2047   niter = MIN (niter_y, niter);
2048   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2049                                            &overlaps_a_xyz,
2050                                            &overlaps_b_xyz,
2051                                            &last_conflicts_xyz, 3);
2052
2053   xz_p = !integer_zerop (last_conflicts_xz);
2054   yz_p = !integer_zerop (last_conflicts_yz);
2055   xyz_p = !integer_zerop (last_conflicts_xyz);
2056
2057   if (xz_p || yz_p || xyz_p)
2058     {
2059       ova1 = affine_fn_cst (integer_zero_node);
2060       ova2 = affine_fn_cst (integer_zero_node);
2061       ovb = affine_fn_cst (integer_zero_node);
2062       if (xz_p)
2063         {
2064           affine_fn t0 = ova1;
2065           affine_fn t2 = ovb;
2066
2067           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2068           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2069           affine_fn_free (t0);
2070           affine_fn_free (t2);
2071           *last_conflicts = last_conflicts_xz;
2072         }
2073       if (yz_p)
2074         {
2075           affine_fn t0 = ova2;
2076           affine_fn t2 = ovb;
2077
2078           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2079           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2080           affine_fn_free (t0);
2081           affine_fn_free (t2);
2082           *last_conflicts = last_conflicts_yz;
2083         }
2084       if (xyz_p)
2085         {
2086           affine_fn t0 = ova1;
2087           affine_fn t2 = ova2;
2088           affine_fn t4 = ovb;
2089
2090           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2091           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2092           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2093           affine_fn_free (t0);
2094           affine_fn_free (t2);
2095           affine_fn_free (t4);
2096           *last_conflicts = last_conflicts_xyz;
2097         }
2098       *overlaps_a = conflict_fn (2, ova1, ova2);
2099       *overlaps_b = conflict_fn (1, ovb);
2100     }
2101   else
2102     {
2103       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2104       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2105       *last_conflicts = integer_zero_node;
2106     }
2107
2108   affine_fn_free (overlaps_a_xz);
2109   affine_fn_free (overlaps_b_xz);
2110   affine_fn_free (overlaps_a_yz);
2111   affine_fn_free (overlaps_b_yz);
2112   affine_fn_free (overlaps_a_xyz);
2113   affine_fn_free (overlaps_b_xyz);
2114 }
2115
2116 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2117
2118 static void
2119 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2120                     int size)
2121 {
2122   memcpy (vec2, vec1, size * sizeof (*vec1));
2123 }
2124
2125 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2126
2127 static void
2128 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2129                     int m, int n)
2130 {
2131   int i;
2132
2133   for (i = 0; i < m; i++)
2134     lambda_vector_copy (mat1[i], mat2[i], n);
2135 }
2136
2137 /* Store the N x N identity matrix in MAT.  */
2138
2139 static void
2140 lambda_matrix_id (lambda_matrix mat, int size)
2141 {
2142   int i, j;
2143
2144   for (i = 0; i < size; i++)
2145     for (j = 0; j < size; j++)
2146       mat[i][j] = (i == j) ? 1 : 0;
2147 }
2148
2149 /* Return the first nonzero element of vector VEC1 between START and N.
2150    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2151
2152 static int
2153 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2154 {
2155   int j = start;
2156   while (j < n && vec1[j] == 0)
2157     j++;
2158   return j;
2159 }
2160
2161 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2162    R2 = R2 + CONST1 * R1.  */
2163
2164 static void
2165 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2166 {
2167   int i;
2168
2169   if (const1 == 0)
2170     return;
2171
2172   for (i = 0; i < n; i++)
2173     mat[r2][i] += const1 * mat[r1][i];
2174 }
2175
2176 /* Swap rows R1 and R2 in matrix MAT.  */
2177
2178 static void
2179 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2180 {
2181   lambda_vector row;
2182
2183   row = mat[r1];
2184   mat[r1] = mat[r2];
2185   mat[r2] = row;
2186 }
2187
2188 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2189    and store the result in VEC2.  */
2190
2191 static void
2192 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2193                           int size, int const1)
2194 {
2195   int i;
2196
2197   if (const1 == 0)
2198     lambda_vector_clear (vec2, size);
2199   else
2200     for (i = 0; i < size; i++)
2201       vec2[i] = const1 * vec1[i];
2202 }
2203
2204 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2205
2206 static void
2207 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2208                       int size)
2209 {
2210   lambda_vector_mult_const (vec1, vec2, size, -1);
2211 }
2212
2213 /* Negate row R1 of matrix MAT which has N columns.  */
2214
2215 static void
2216 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2217 {
2218   lambda_vector_negate (mat[r1], mat[r1], n);
2219 }
2220
2221 /* Return true if two vectors are equal.  */
2222
2223 static bool
2224 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2225 {
2226   int i;
2227   for (i = 0; i < size; i++)
2228     if (vec1[i] != vec2[i])
2229       return false;
2230   return true;
2231 }
2232
2233 /* Given an M x N integer matrix A, this function determines an M x
2234    M unimodular matrix U, and an M x N echelon matrix S such that
2235    "U.A = S".  This decomposition is also known as "right Hermite".
2236
2237    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2238    Restructuring Compilers" Utpal Banerjee.  */
2239
2240 static void
2241 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2242                              lambda_matrix S, lambda_matrix U)
2243 {
2244   int i, j, i0 = 0;
2245
2246   lambda_matrix_copy (A, S, m, n);
2247   lambda_matrix_id (U, m);
2248
2249   for (j = 0; j < n; j++)
2250     {
2251       if (lambda_vector_first_nz (S[j], m, i0) < m)
2252         {
2253           ++i0;
2254           for (i = m - 1; i >= i0; i--)
2255             {
2256               while (S[i][j] != 0)
2257                 {
2258                   int sigma, factor, a, b;
2259
2260                   a = S[i-1][j];
2261                   b = S[i][j];
2262                   sigma = (a * b < 0) ? -1: 1;
2263                   a = abs (a);
2264                   b = abs (b);
2265                   factor = sigma * (a / b);
2266
2267                   lambda_matrix_row_add (S, n, i, i-1, -factor);
2268                   lambda_matrix_row_exchange (S, i, i-1);
2269
2270                   lambda_matrix_row_add (U, m, i, i-1, -factor);
2271                   lambda_matrix_row_exchange (U, i, i-1);
2272                 }
2273             }
2274         }
2275     }
2276 }
2277
2278 /* Determines the overlapping elements due to accesses CHREC_A and
2279    CHREC_B, that are affine functions.  This function cannot handle
2280    symbolic evolution functions, ie. when initial conditions are
2281    parameters, because it uses lambda matrices of integers.  */
2282
2283 static void
2284 analyze_subscript_affine_affine (tree chrec_a,
2285                                  tree chrec_b,
2286                                  conflict_function **overlaps_a,
2287                                  conflict_function **overlaps_b,
2288                                  tree *last_conflicts)
2289 {
2290   unsigned nb_vars_a, nb_vars_b, dim;
2291   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2292   lambda_matrix A, U, S;
2293   struct obstack scratch_obstack;
2294
2295   if (eq_evolutions_p (chrec_a, chrec_b))
2296     {
2297       /* The accessed index overlaps for each iteration in the
2298          loop.  */
2299       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2300       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2301       *last_conflicts = chrec_dont_know;
2302       return;
2303     }
2304   if (dump_file && (dump_flags & TDF_DETAILS))
2305     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2306
2307   /* For determining the initial intersection, we have to solve a
2308      Diophantine equation.  This is the most time consuming part.
2309
2310      For answering to the question: "Is there a dependence?" we have
2311      to prove that there exists a solution to the Diophantine
2312      equation, and that the solution is in the iteration domain,
2313      i.e. the solution is positive or zero, and that the solution
2314      happens before the upper bound loop.nb_iterations.  Otherwise
2315      there is no dependence.  This function outputs a description of
2316      the iterations that hold the intersections.  */
2317
2318   nb_vars_a = nb_vars_in_chrec (chrec_a);
2319   nb_vars_b = nb_vars_in_chrec (chrec_b);
2320
2321   gcc_obstack_init (&scratch_obstack);
2322
2323   dim = nb_vars_a + nb_vars_b;
2324   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2325   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2326   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2327
2328   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2329   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2330   gamma = init_b - init_a;
2331
2332   /* Don't do all the hard work of solving the Diophantine equation
2333      when we already know the solution: for example,
2334      | {3, +, 1}_1
2335      | {3, +, 4}_2
2336      | gamma = 3 - 3 = 0.
2337      Then the first overlap occurs during the first iterations:
2338      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2339   */
2340   if (gamma == 0)
2341     {
2342       if (nb_vars_a == 1 && nb_vars_b == 1)
2343         {
2344           HOST_WIDE_INT step_a, step_b;
2345           HOST_WIDE_INT niter, niter_a, niter_b;
2346           affine_fn ova, ovb;
2347
2348           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2349           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2350           niter = MIN (niter_a, niter_b);
2351           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2352           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2353
2354           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2355                                                    &ova, &ovb,
2356                                                    last_conflicts, 1);
2357           *overlaps_a = conflict_fn (1, ova);
2358           *overlaps_b = conflict_fn (1, ovb);
2359         }
2360
2361       else if (nb_vars_a == 2 && nb_vars_b == 1)
2362         compute_overlap_steps_for_affine_1_2
2363           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2364
2365       else if (nb_vars_a == 1 && nb_vars_b == 2)
2366         compute_overlap_steps_for_affine_1_2
2367           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2368
2369       else
2370         {
2371           if (dump_file && (dump_flags & TDF_DETAILS))
2372             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2373           *overlaps_a = conflict_fn_not_known ();
2374           *overlaps_b = conflict_fn_not_known ();
2375           *last_conflicts = chrec_dont_know;
2376         }
2377       goto end_analyze_subs_aa;
2378     }
2379
2380   /* U.A = S */
2381   lambda_matrix_right_hermite (A, dim, 1, S, U);
2382
2383   if (S[0][0] < 0)
2384     {
2385       S[0][0] *= -1;
2386       lambda_matrix_row_negate (U, dim, 0);
2387     }
2388   gcd_alpha_beta = S[0][0];
2389
2390   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2391      but that is a quite strange case.  Instead of ICEing, answer
2392      don't know.  */
2393   if (gcd_alpha_beta == 0)
2394     {
2395       *overlaps_a = conflict_fn_not_known ();
2396       *overlaps_b = conflict_fn_not_known ();
2397       *last_conflicts = chrec_dont_know;
2398       goto end_analyze_subs_aa;
2399     }
2400
2401   /* The classic "gcd-test".  */
2402   if (!int_divides_p (gcd_alpha_beta, gamma))
2403     {
2404       /* The "gcd-test" has determined that there is no integer
2405          solution, i.e. there is no dependence.  */
2406       *overlaps_a = conflict_fn_no_dependence ();
2407       *overlaps_b = conflict_fn_no_dependence ();
2408       *last_conflicts = integer_zero_node;
2409     }
2410
2411   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2412   else if (nb_vars_a == 1 && nb_vars_b == 1)
2413     {
2414       /* Both functions should have the same evolution sign.  */
2415       if (((A[0][0] > 0 && -A[1][0] > 0)
2416            || (A[0][0] < 0 && -A[1][0] < 0)))
2417         {
2418           /* The solutions are given by:
2419              |
2420              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2421              |                           [u21 u22]    [y0]
2422
2423              For a given integer t.  Using the following variables,
2424
2425              | i0 = u11 * gamma / gcd_alpha_beta
2426              | j0 = u12 * gamma / gcd_alpha_beta
2427              | i1 = u21
2428              | j1 = u22
2429
2430              the solutions are:
2431
2432              | x0 = i0 + i1 * t,
2433              | y0 = j0 + j1 * t.  */
2434           HOST_WIDE_INT i0, j0, i1, j1;
2435
2436           i0 = U[0][0] * gamma / gcd_alpha_beta;
2437           j0 = U[0][1] * gamma / gcd_alpha_beta;
2438           i1 = U[1][0];
2439           j1 = U[1][1];
2440
2441           if ((i1 == 0 && i0 < 0)
2442               || (j1 == 0 && j0 < 0))
2443             {
2444               /* There is no solution.
2445                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2446                  falls in here, but for the moment we don't look at the
2447                  upper bound of the iteration domain.  */
2448               *overlaps_a = conflict_fn_no_dependence ();
2449               *overlaps_b = conflict_fn_no_dependence ();
2450               *last_conflicts = integer_zero_node;
2451               goto end_analyze_subs_aa;
2452             }
2453
2454           if (i1 > 0 && j1 > 0)
2455             {
2456               HOST_WIDE_INT niter_a = max_stmt_executions_int
2457                 (get_chrec_loop (chrec_a), true);
2458               HOST_WIDE_INT niter_b = max_stmt_executions_int
2459                 (get_chrec_loop (chrec_b), true);
2460               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2461
2462               /* (X0, Y0) is a solution of the Diophantine equation:
2463                  "chrec_a (X0) = chrec_b (Y0)".  */
2464               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2465                                         CEIL (-j0, j1));
2466               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2467               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2468
2469               /* (X1, Y1) is the smallest positive solution of the eq
2470                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2471                  first conflict occurs.  */
2472               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2473               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2474               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2475
2476               if (niter > 0)
2477                 {
2478                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2479                                             FLOOR_DIV (niter - j0, j1));
2480                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2481
2482                   /* If the overlap occurs outside of the bounds of the
2483                      loop, there is no dependence.  */
2484                   if (x1 >= niter || y1 >= niter)
2485                     {
2486                       *overlaps_a = conflict_fn_no_dependence ();
2487                       *overlaps_b = conflict_fn_no_dependence ();
2488                       *last_conflicts = integer_zero_node;
2489                       goto end_analyze_subs_aa;
2490                     }
2491                   else
2492                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2493                 }
2494               else
2495                 *last_conflicts = chrec_dont_know;
2496
2497               *overlaps_a
2498                 = conflict_fn (1,
2499                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2500                                                  1,
2501                                                  build_int_cst (NULL_TREE, i1)));
2502               *overlaps_b
2503                 = conflict_fn (1,
2504                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2505                                                  1,
2506                                                  build_int_cst (NULL_TREE, j1)));
2507             }
2508           else
2509             {
2510               /* FIXME: For the moment, the upper bound of the
2511                  iteration domain for i and j is not checked.  */
2512               if (dump_file && (dump_flags & TDF_DETAILS))
2513                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2514               *overlaps_a = conflict_fn_not_known ();
2515               *overlaps_b = conflict_fn_not_known ();
2516               *last_conflicts = chrec_dont_know;
2517             }
2518         }
2519       else
2520         {
2521           if (dump_file && (dump_flags & TDF_DETAILS))
2522             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2523           *overlaps_a = conflict_fn_not_known ();
2524           *overlaps_b = conflict_fn_not_known ();
2525           *last_conflicts = chrec_dont_know;
2526         }
2527     }
2528   else
2529     {
2530       if (dump_file && (dump_flags & TDF_DETAILS))
2531         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2532       *overlaps_a = conflict_fn_not_known ();
2533       *overlaps_b = conflict_fn_not_known ();
2534       *last_conflicts = chrec_dont_know;
2535     }
2536
2537 end_analyze_subs_aa:
2538   obstack_free (&scratch_obstack, NULL);
2539   if (dump_file && (dump_flags & TDF_DETAILS))
2540     {
2541       fprintf (dump_file, "  (overlaps_a = ");
2542       dump_conflict_function (dump_file, *overlaps_a);
2543       fprintf (dump_file, ")\n  (overlaps_b = ");
2544       dump_conflict_function (dump_file, *overlaps_b);
2545       fprintf (dump_file, ")\n");
2546       fprintf (dump_file, ")\n");
2547     }
2548 }
2549
2550 /* Returns true when analyze_subscript_affine_affine can be used for
2551    determining the dependence relation between chrec_a and chrec_b,
2552    that contain symbols.  This function modifies chrec_a and chrec_b
2553    such that the analysis result is the same, and such that they don't
2554    contain symbols, and then can safely be passed to the analyzer.
2555
2556    Example: The analysis of the following tuples of evolutions produce
2557    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2558    vs. {0, +, 1}_1
2559
2560    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2561    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2562 */
2563
2564 static bool
2565 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2566 {
2567   tree diff, type, left_a, left_b, right_b;
2568
2569   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2570       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2571     /* FIXME: For the moment not handled.  Might be refined later.  */
2572     return false;
2573
2574   type = chrec_type (*chrec_a);
2575   left_a = CHREC_LEFT (*chrec_a);
2576   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2577   diff = chrec_fold_minus (type, left_a, left_b);
2578
2579   if (!evolution_function_is_constant_p (diff))
2580     return false;
2581
2582   if (dump_file && (dump_flags & TDF_DETAILS))
2583     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2584
2585   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2586                                      diff, CHREC_RIGHT (*chrec_a));
2587   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2588   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2589                                      build_int_cst (type, 0),
2590                                      right_b);
2591   return true;
2592 }
2593
2594 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2595    *OVERLAPS_B are initialized to the functions that describe the
2596    relation between the elements accessed twice by CHREC_A and
2597    CHREC_B.  For k >= 0, the following property is verified:
2598
2599    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2600
2601 static void
2602 analyze_siv_subscript (tree chrec_a,
2603                        tree chrec_b,
2604                        conflict_function **overlaps_a,
2605                        conflict_function **overlaps_b,
2606                        tree *last_conflicts,
2607                        int loop_nest_num)
2608 {
2609   dependence_stats.num_siv++;
2610
2611   if (dump_file && (dump_flags & TDF_DETAILS))
2612     fprintf (dump_file, "(analyze_siv_subscript \n");
2613
2614   if (evolution_function_is_constant_p (chrec_a)
2615       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2616     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2617                                       overlaps_a, overlaps_b, last_conflicts);
2618
2619   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2620            && evolution_function_is_constant_p (chrec_b))
2621     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2622                                       overlaps_b, overlaps_a, last_conflicts);
2623
2624   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2625            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2626     {
2627       if (!chrec_contains_symbols (chrec_a)
2628           && !chrec_contains_symbols (chrec_b))
2629         {
2630           analyze_subscript_affine_affine (chrec_a, chrec_b,
2631                                            overlaps_a, overlaps_b,
2632                                            last_conflicts);
2633
2634           if (CF_NOT_KNOWN_P (*overlaps_a)
2635               || CF_NOT_KNOWN_P (*overlaps_b))
2636             dependence_stats.num_siv_unimplemented++;
2637           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2638                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2639             dependence_stats.num_siv_independent++;
2640           else
2641             dependence_stats.num_siv_dependent++;
2642         }
2643       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2644                                                         &chrec_b))
2645         {
2646           analyze_subscript_affine_affine (chrec_a, chrec_b,
2647                                            overlaps_a, overlaps_b,
2648                                            last_conflicts);
2649
2650           if (CF_NOT_KNOWN_P (*overlaps_a)
2651               || CF_NOT_KNOWN_P (*overlaps_b))
2652             dependence_stats.num_siv_unimplemented++;
2653           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2654                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2655             dependence_stats.num_siv_independent++;
2656           else
2657             dependence_stats.num_siv_dependent++;
2658         }
2659       else
2660         goto siv_subscript_dontknow;
2661     }
2662
2663   else
2664     {
2665     siv_subscript_dontknow:;
2666       if (dump_file && (dump_flags & TDF_DETAILS))
2667         fprintf (dump_file, "siv test failed: unimplemented.\n");
2668       *overlaps_a = conflict_fn_not_known ();
2669       *overlaps_b = conflict_fn_not_known ();
2670       *last_conflicts = chrec_dont_know;
2671       dependence_stats.num_siv_unimplemented++;
2672     }
2673
2674   if (dump_file && (dump_flags & TDF_DETAILS))
2675     fprintf (dump_file, ")\n");
2676 }
2677
2678 /* Returns false if we can prove that the greatest common divisor of the steps
2679    of CHREC does not divide CST, false otherwise.  */
2680
2681 static bool
2682 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2683 {
2684   HOST_WIDE_INT cd = 0, val;
2685   tree step;
2686
2687   if (!host_integerp (cst, 0))
2688     return true;
2689   val = tree_low_cst (cst, 0);
2690
2691   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2692     {
2693       step = CHREC_RIGHT (chrec);
2694       if (!host_integerp (step, 0))
2695         return true;
2696       cd = gcd (cd, tree_low_cst (step, 0));
2697       chrec = CHREC_LEFT (chrec);
2698     }
2699
2700   return val % cd == 0;
2701 }
2702
2703 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2704    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2705    functions that describe the relation between the elements accessed
2706    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2707    is verified:
2708
2709    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2710
2711 static void
2712 analyze_miv_subscript (tree chrec_a,
2713                        tree chrec_b,
2714                        conflict_function **overlaps_a,
2715                        conflict_function **overlaps_b,
2716                        tree *last_conflicts,
2717                        struct loop *loop_nest)
2718 {
2719   tree type, difference;
2720
2721   dependence_stats.num_miv++;
2722   if (dump_file && (dump_flags & TDF_DETAILS))
2723     fprintf (dump_file, "(analyze_miv_subscript \n");
2724
2725   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2726   chrec_a = chrec_convert (type, chrec_a, NULL);
2727   chrec_b = chrec_convert (type, chrec_b, NULL);
2728   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2729
2730   if (eq_evolutions_p (chrec_a, chrec_b))
2731     {
2732       /* Access functions are the same: all the elements are accessed
2733          in the same order.  */
2734       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2735       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2736       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2737       dependence_stats.num_miv_dependent++;
2738     }
2739
2740   else if (evolution_function_is_constant_p (difference)
2741            /* For the moment, the following is verified:
2742               evolution_function_is_affine_multivariate_p (chrec_a,
2743               loop_nest->num) */
2744            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2745     {
2746       /* testsuite/.../ssa-chrec-33.c
2747          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2748
2749          The difference is 1, and all the evolution steps are multiples
2750          of 2, consequently there are no overlapping elements.  */
2751       *overlaps_a = conflict_fn_no_dependence ();
2752       *overlaps_b = conflict_fn_no_dependence ();
2753       *last_conflicts = integer_zero_node;
2754       dependence_stats.num_miv_independent++;
2755     }
2756
2757   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2758            && !chrec_contains_symbols (chrec_a)
2759            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2760            && !chrec_contains_symbols (chrec_b))
2761     {
2762       /* testsuite/.../ssa-chrec-35.c
2763          {0, +, 1}_2  vs.  {0, +, 1}_3
2764          the overlapping elements are respectively located at iterations:
2765          {0, +, 1}_x and {0, +, 1}_x,
2766          in other words, we have the equality:
2767          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2768
2769          Other examples:
2770          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2771          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2772
2773          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2774          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2775       */
2776       analyze_subscript_affine_affine (chrec_a, chrec_b,
2777                                        overlaps_a, overlaps_b, last_conflicts);
2778
2779       if (CF_NOT_KNOWN_P (*overlaps_a)
2780           || CF_NOT_KNOWN_P (*overlaps_b))
2781         dependence_stats.num_miv_unimplemented++;
2782       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2783                || CF_NO_DEPENDENCE_P (*overlaps_b))
2784         dependence_stats.num_miv_independent++;
2785       else
2786         dependence_stats.num_miv_dependent++;
2787     }
2788
2789   else
2790     {
2791       /* When the analysis is too difficult, answer "don't know".  */
2792       if (dump_file && (dump_flags & TDF_DETAILS))
2793         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2794
2795       *overlaps_a = conflict_fn_not_known ();
2796       *overlaps_b = conflict_fn_not_known ();
2797       *last_conflicts = chrec_dont_know;
2798       dependence_stats.num_miv_unimplemented++;
2799     }
2800
2801   if (dump_file && (dump_flags & TDF_DETAILS))
2802     fprintf (dump_file, ")\n");
2803 }
2804
2805 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2806    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2807    OVERLAP_ITERATIONS_B are initialized with two functions that
2808    describe the iterations that contain conflicting elements.
2809
2810    Remark: For an integer k >= 0, the following equality is true:
2811
2812    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2813 */
2814
2815 static void
2816 analyze_overlapping_iterations (tree chrec_a,
2817                                 tree chrec_b,
2818                                 conflict_function **overlap_iterations_a,
2819                                 conflict_function **overlap_iterations_b,
2820                                 tree *last_conflicts, struct loop *loop_nest)
2821 {
2822   unsigned int lnn = loop_nest->num;
2823
2824   dependence_stats.num_subscript_tests++;
2825
2826   if (dump_file && (dump_flags & TDF_DETAILS))
2827     {
2828       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2829       fprintf (dump_file, "  (chrec_a = ");
2830       print_generic_expr (dump_file, chrec_a, 0);
2831       fprintf (dump_file, ")\n  (chrec_b = ");
2832       print_generic_expr (dump_file, chrec_b, 0);
2833       fprintf (dump_file, ")\n");
2834     }
2835
2836   if (chrec_a == NULL_TREE
2837       || chrec_b == NULL_TREE
2838       || chrec_contains_undetermined (chrec_a)
2839       || chrec_contains_undetermined (chrec_b))
2840     {
2841       dependence_stats.num_subscript_undetermined++;
2842
2843       *overlap_iterations_a = conflict_fn_not_known ();
2844       *overlap_iterations_b = conflict_fn_not_known ();
2845     }
2846
2847   /* If they are the same chrec, and are affine, they overlap
2848      on every iteration.  */
2849   else if (eq_evolutions_p (chrec_a, chrec_b)
2850            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2851                || operand_equal_p (chrec_a, chrec_b, 0)))
2852     {
2853       dependence_stats.num_same_subscript_function++;
2854       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2855       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2856       *last_conflicts = chrec_dont_know;
2857     }
2858
2859   /* If they aren't the same, and aren't affine, we can't do anything
2860      yet.  */
2861   else if ((chrec_contains_symbols (chrec_a)
2862             || chrec_contains_symbols (chrec_b))
2863            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2864                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2865     {
2866       dependence_stats.num_subscript_undetermined++;
2867       *overlap_iterations_a = conflict_fn_not_known ();
2868       *overlap_iterations_b = conflict_fn_not_known ();
2869     }
2870
2871   else if (ziv_subscript_p (chrec_a, chrec_b))
2872     analyze_ziv_subscript (chrec_a, chrec_b,
2873                            overlap_iterations_a, overlap_iterations_b,
2874                            last_conflicts);
2875
2876   else if (siv_subscript_p (chrec_a, chrec_b))
2877     analyze_siv_subscript (chrec_a, chrec_b,
2878                            overlap_iterations_a, overlap_iterations_b,
2879                            last_conflicts, lnn);
2880
2881   else
2882     analyze_miv_subscript (chrec_a, chrec_b,
2883                            overlap_iterations_a, overlap_iterations_b,
2884                            last_conflicts, loop_nest);
2885
2886   if (dump_file && (dump_flags & TDF_DETAILS))
2887     {
2888       fprintf (dump_file, "  (overlap_iterations_a = ");
2889       dump_conflict_function (dump_file, *overlap_iterations_a);
2890       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
2891       dump_conflict_function (dump_file, *overlap_iterations_b);
2892       fprintf (dump_file, ")\n");
2893       fprintf (dump_file, ")\n");
2894     }
2895 }
2896
2897 /* Helper function for uniquely inserting distance vectors.  */
2898
2899 static void
2900 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2901 {
2902   unsigned i;
2903   lambda_vector v;
2904
2905   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2906     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2907       return;
2908
2909   VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2910 }
2911
2912 /* Helper function for uniquely inserting direction vectors.  */
2913
2914 static void
2915 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2916 {
2917   unsigned i;
2918   lambda_vector v;
2919
2920   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2921     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2922       return;
2923
2924   VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2925 }
2926
2927 /* Add a distance of 1 on all the loops outer than INDEX.  If we
2928    haven't yet determined a distance for this outer loop, push a new
2929    distance vector composed of the previous distance, and a distance
2930    of 1 for this outer loop.  Example:
2931
2932    | loop_1
2933    |   loop_2
2934    |     A[10]
2935    |   endloop_2
2936    | endloop_1
2937
2938    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
2939    save (0, 1), then we have to save (1, 0).  */
2940
2941 static void
2942 add_outer_distances (struct data_dependence_relation *ddr,
2943                      lambda_vector dist_v, int index)
2944 {
2945   /* For each outer loop where init_v is not set, the accesses are
2946      in dependence of distance 1 in the loop.  */
2947   while (--index >= 0)
2948     {
2949       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2950       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2951       save_v[index] = 1;
2952       save_dist_v (ddr, save_v);
2953     }
2954 }
2955
2956 /* Return false when fail to represent the data dependence as a
2957    distance vector.  INIT_B is set to true when a component has been
2958    added to the distance vector DIST_V.  INDEX_CARRY is then set to
2959    the index in DIST_V that carries the dependence.  */
2960
2961 static bool
2962 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2963                              struct data_reference *ddr_a,
2964                              struct data_reference *ddr_b,
2965                              lambda_vector dist_v, bool *init_b,
2966                              int *index_carry)
2967 {
2968   unsigned i;
2969   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2970
2971   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2972     {
2973       tree access_fn_a, access_fn_b;
2974       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2975
2976       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2977         {
2978           non_affine_dependence_relation (ddr);
2979           return false;
2980         }
2981
2982       access_fn_a = DR_ACCESS_FN (ddr_a, i);
2983       access_fn_b = DR_ACCESS_FN (ddr_b, i);
2984
2985       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2986           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2987         {
2988           int dist, index;
2989           int var_a = CHREC_VARIABLE (access_fn_a);
2990           int var_b = CHREC_VARIABLE (access_fn_b);
2991
2992           if (var_a != var_b
2993               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2994             {
2995               non_affine_dependence_relation (ddr);
2996               return false;
2997             }
2998
2999           dist = int_cst_value (SUB_DISTANCE (subscript));
3000           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3001           *index_carry = MIN (index, *index_carry);
3002
3003           /* This is the subscript coupling test.  If we have already
3004              recorded a distance for this loop (a distance coming from
3005              another subscript), it should be the same.  For example,
3006              in the following code, there is no dependence:
3007
3008              | loop i = 0, N, 1
3009              |   T[i+1][i] = ...
3010              |   ... = T[i][i]
3011              | endloop
3012           */
3013           if (init_v[index] != 0 && dist_v[index] != dist)
3014             {
3015               finalize_ddr_dependent (ddr, chrec_known);
3016               return false;
3017             }
3018
3019           dist_v[index] = dist;
3020           init_v[index] = 1;
3021           *init_b = true;
3022         }
3023       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3024         {
3025           /* This can be for example an affine vs. constant dependence
3026              (T[i] vs. T[3]) that is not an affine dependence and is
3027              not representable as a distance vector.  */
3028           non_affine_dependence_relation (ddr);
3029           return false;
3030         }
3031     }
3032
3033   return true;
3034 }
3035
3036 /* Return true when the DDR contains only constant access functions.  */
3037
3038 static bool
3039 constant_access_functions (const struct data_dependence_relation *ddr)
3040 {
3041   unsigned i;
3042
3043   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3044     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3045         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3046       return false;
3047
3048   return true;
3049 }
3050
3051 /* Helper function for the case where DDR_A and DDR_B are the same
3052    multivariate access function with a constant step.  For an example
3053    see pr34635-1.c.  */
3054
3055 static void
3056 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3057 {
3058   int x_1, x_2;
3059   tree c_1 = CHREC_LEFT (c_2);
3060   tree c_0 = CHREC_LEFT (c_1);
3061   lambda_vector dist_v;
3062   int v1, v2, cd;
3063
3064   /* Polynomials with more than 2 variables are not handled yet.  When
3065      the evolution steps are parameters, it is not possible to
3066      represent the dependence using classical distance vectors.  */
3067   if (TREE_CODE (c_0) != INTEGER_CST
3068       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3069       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3070     {
3071       DDR_AFFINE_P (ddr) = false;
3072       return;
3073     }
3074
3075   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3076   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3077
3078   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3079   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3080   v1 = int_cst_value (CHREC_RIGHT (c_1));
3081   v2 = int_cst_value (CHREC_RIGHT (c_2));
3082   cd = gcd (v1, v2);
3083   v1 /= cd;
3084   v2 /= cd;
3085
3086   if (v2 < 0)
3087     {
3088       v2 = -v2;
3089       v1 = -v1;
3090     }
3091
3092   dist_v[x_1] = v2;
3093   dist_v[x_2] = -v1;
3094   save_dist_v (ddr, dist_v);
3095
3096   add_outer_distances (ddr, dist_v, x_1);
3097 }
3098
3099 /* Helper function for the case where DDR_A and DDR_B are the same
3100    access functions.  */
3101
3102 static void
3103 add_other_self_distances (struct data_dependence_relation *ddr)
3104 {
3105   lambda_vector dist_v;
3106   unsigned i;
3107   int index_carry = DDR_NB_LOOPS (ddr);
3108
3109   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3110     {
3111       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3112
3113       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3114         {
3115           if (!evolution_function_is_univariate_p (access_fun))
3116             {
3117               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3118                 {
3119                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3120                   return;
3121                 }
3122
3123               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3124
3125               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3126                 add_multivariate_self_dist (ddr, access_fun);
3127               else
3128                 /* The evolution step is not constant: it varies in
3129                    the outer loop, so this cannot be represented by a
3130                    distance vector.  For example in pr34635.c the
3131                    evolution is {0, +, {0, +, 4}_1}_2.  */
3132                 DDR_AFFINE_P (ddr) = false;
3133
3134               return;
3135             }
3136
3137           index_carry = MIN (index_carry,
3138                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3139                                                  DDR_LOOP_NEST (ddr)));
3140         }
3141     }
3142
3143   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3144   add_outer_distances (ddr, dist_v, index_carry);
3145 }
3146
3147 static void
3148 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3149 {
3150   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3151
3152   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3153   save_dist_v (ddr, dist_v);
3154 }
3155
3156 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3157    is the case for example when access functions are the same and
3158    equal to a constant, as in:
3159
3160    | loop_1
3161    |   A[3] = ...
3162    |   ... = A[3]
3163    | endloop_1
3164
3165    in which case the distance vectors are (0) and (1).  */
3166
3167 static void
3168 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3169 {
3170   unsigned i, j;
3171
3172   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3173     {
3174       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3175       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3176       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3177
3178       for (j = 0; j < ca->n; j++)
3179         if (affine_function_zero_p (ca->fns[j]))
3180           {
3181             insert_innermost_unit_dist_vector (ddr);
3182             return;
3183           }
3184
3185       for (j = 0; j < cb->n; j++)
3186         if (affine_function_zero_p (cb->fns[j]))
3187           {
3188             insert_innermost_unit_dist_vector (ddr);
3189             return;
3190           }
3191     }
3192 }
3193
3194 /* Compute the classic per loop distance vector.  DDR is the data
3195    dependence relation to build a vector from.  Return false when fail
3196    to represent the data dependence as a distance vector.  */
3197
3198 static bool
3199 build_classic_dist_vector (struct data_dependence_relation *ddr,
3200                            struct loop *loop_nest)
3201 {
3202   bool init_b = false;
3203   int index_carry = DDR_NB_LOOPS (ddr);
3204   lambda_vector dist_v;
3205
3206   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3207     return false;
3208
3209   if (same_access_functions (ddr))
3210     {
3211       /* Save the 0 vector.  */
3212       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3213       save_dist_v (ddr, dist_v);
3214
3215       if (constant_access_functions (ddr))
3216         add_distance_for_zero_overlaps (ddr);
3217
3218       if (DDR_NB_LOOPS (ddr) > 1)
3219         add_other_self_distances (ddr);
3220
3221       return true;
3222     }
3223
3224   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3225   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3226                                     dist_v, &init_b, &index_carry))
3227     return false;
3228
3229   /* Save the distance vector if we initialized one.  */
3230   if (init_b)
3231     {
3232       /* Verify a basic constraint: classic distance vectors should
3233          always be lexicographically positive.
3234
3235          Data references are collected in the order of execution of
3236          the program, thus for the following loop
3237
3238          | for (i = 1; i < 100; i++)
3239          |   for (j = 1; j < 100; j++)
3240          |     {
3241          |       t = T[j+1][i-1];  // A
3242          |       T[j][i] = t + 2;  // B
3243          |     }
3244
3245          references are collected following the direction of the wind:
3246          A then B.  The data dependence tests are performed also
3247          following this order, such that we're looking at the distance
3248          separating the elements accessed by A from the elements later
3249          accessed by B.  But in this example, the distance returned by
3250          test_dep (A, B) is lexicographically negative (-1, 1), that
3251          means that the access A occurs later than B with respect to
3252          the outer loop, ie. we're actually looking upwind.  In this
3253          case we solve test_dep (B, A) looking downwind to the
3254          lexicographically positive solution, that returns the
3255          distance vector (1, -1).  */
3256       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3257         {
3258           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3259           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3260                                               loop_nest))
3261             return false;
3262           compute_subscript_distance (ddr);
3263           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3264                                             save_v, &init_b, &index_carry))
3265             return false;
3266           save_dist_v (ddr, save_v);
3267           DDR_REVERSED_P (ddr) = true;
3268
3269           /* In this case there is a dependence forward for all the
3270              outer loops:
3271
3272              | for (k = 1; k < 100; k++)
3273              |  for (i = 1; i < 100; i++)
3274              |   for (j = 1; j < 100; j++)
3275              |     {
3276              |       t = T[j+1][i-1];  // A
3277              |       T[j][i] = t + 2;  // B
3278              |     }
3279
3280              the vectors are:
3281              (0,  1, -1)
3282              (1,  1, -1)
3283              (1, -1,  1)
3284           */
3285           if (DDR_NB_LOOPS (ddr) > 1)
3286             {
3287               add_outer_distances (ddr, save_v, index_carry);
3288               add_outer_distances (ddr, dist_v, index_carry);
3289             }
3290         }
3291       else
3292         {
3293           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3294           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3295
3296           if (DDR_NB_LOOPS (ddr) > 1)
3297             {
3298               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3299
3300               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3301                                                   DDR_A (ddr), loop_nest))
3302                 return false;
3303               compute_subscript_distance (ddr);
3304               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3305                                                 opposite_v, &init_b,
3306                                                 &index_carry))
3307                 return false;
3308
3309               save_dist_v (ddr, save_v);
3310               add_outer_distances (ddr, dist_v, index_carry);
3311               add_outer_distances (ddr, opposite_v, index_carry);
3312             }
3313           else
3314             save_dist_v (ddr, save_v);
3315         }
3316     }
3317   else
3318     {
3319       /* There is a distance of 1 on all the outer loops: Example:
3320          there is a dependence of distance 1 on loop_1 for the array A.
3321
3322          | loop_1
3323          |   A[5] = ...
3324          | endloop
3325       */
3326       add_outer_distances (ddr, dist_v,
3327                            lambda_vector_first_nz (dist_v,
3328                                                    DDR_NB_LOOPS (ddr), 0));
3329     }
3330
3331   if (dump_file && (dump_flags & TDF_DETAILS))
3332     {
3333       unsigned i;
3334
3335       fprintf (dump_file, "(build_classic_dist_vector\n");
3336       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3337         {
3338           fprintf (dump_file, "  dist_vector = (");
3339           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3340                                DDR_NB_LOOPS (ddr));
3341           fprintf (dump_file, "  )\n");
3342         }
3343       fprintf (dump_file, ")\n");
3344     }
3345
3346   return true;
3347 }
3348
3349 /* Return the direction for a given distance.
3350    FIXME: Computing dir this way is suboptimal, since dir can catch
3351    cases that dist is unable to represent.  */
3352
3353 static inline enum data_dependence_direction
3354 dir_from_dist (int dist)
3355 {
3356   if (dist > 0)
3357     return dir_positive;
3358   else if (dist < 0)
3359     return dir_negative;
3360   else
3361     return dir_equal;
3362 }
3363
3364 /* Compute the classic per loop direction vector.  DDR is the data
3365    dependence relation to build a vector from.  */
3366
3367 static void
3368 build_classic_dir_vector (struct data_dependence_relation *ddr)
3369 {
3370   unsigned i, j;
3371   lambda_vector dist_v;
3372
3373   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3374     {
3375       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3376
3377       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3378         dir_v[j] = dir_from_dist (dist_v[j]);
3379
3380       save_dir_v (ddr, dir_v);
3381     }
3382 }
3383
3384 /* Helper function.  Returns true when there is a dependence between
3385    data references DRA and DRB.  */
3386
3387 static bool
3388 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3389                                struct data_reference *dra,
3390                                struct data_reference *drb,
3391                                struct loop *loop_nest)
3392 {
3393   unsigned int i;
3394   tree last_conflicts;
3395   struct subscript *subscript;
3396
3397   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3398        i++)
3399     {
3400       conflict_function *overlaps_a, *overlaps_b;
3401
3402       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3403                                       DR_ACCESS_FN (drb, i),
3404                                       &overlaps_a, &overlaps_b,
3405                                       &last_conflicts, loop_nest);
3406
3407       if (CF_NOT_KNOWN_P (overlaps_a)
3408           || CF_NOT_KNOWN_P (overlaps_b))
3409         {
3410           finalize_ddr_dependent (ddr, chrec_dont_know);
3411           dependence_stats.num_dependence_undetermined++;
3412           free_conflict_function (overlaps_a);
3413           free_conflict_function (overlaps_b);
3414           return false;
3415         }
3416
3417       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3418                || CF_NO_DEPENDENCE_P (overlaps_b))
3419         {
3420           finalize_ddr_dependent (ddr, chrec_known);
3421           dependence_stats.num_dependence_independent++;
3422           free_conflict_function (overlaps_a);
3423           free_conflict_function (overlaps_b);
3424           return false;
3425         }
3426
3427       else
3428         {
3429           if (SUB_CONFLICTS_IN_A (subscript))
3430             free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3431           if (SUB_CONFLICTS_IN_B (subscript))
3432             free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3433
3434           SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3435           SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3436           SUB_LAST_CONFLICT (subscript) = last_conflicts;
3437         }
3438     }
3439
3440   return true;
3441 }
3442
3443 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3444
3445 static void
3446 subscript_dependence_tester (struct data_dependence_relation *ddr,
3447                              struct loop *loop_nest)
3448 {
3449
3450   if (dump_file && (dump_flags & TDF_DETAILS))
3451     fprintf (dump_file, "(subscript_dependence_tester \n");
3452
3453   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3454     dependence_stats.num_dependence_dependent++;
3455
3456   compute_subscript_distance (ddr);
3457   if (build_classic_dist_vector (ddr, loop_nest))
3458     build_classic_dir_vector (ddr);
3459
3460   if (dump_file && (dump_flags & TDF_DETAILS))
3461     fprintf (dump_file, ")\n");
3462 }
3463
3464 /* Returns true when all the access functions of A are affine or
3465    constant with respect to LOOP_NEST.  */
3466
3467 static bool
3468 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3469                                            const struct loop *loop_nest)
3470 {
3471   unsigned int i;
3472   VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3473   tree t;
3474
3475   FOR_EACH_VEC_ELT (tree, fns, i, t)
3476     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3477         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3478       return false;
3479
3480   return true;
3481 }
3482
3483 /* Initializes an equation for an OMEGA problem using the information
3484    contained in the ACCESS_FUN.  Returns true when the operation
3485    succeeded.
3486
3487    PB is the omega constraint system.
3488    EQ is the number of the equation to be initialized.
3489    OFFSET is used for shifting the variables names in the constraints:
3490    a constrain is composed of 2 * the number of variables surrounding
3491    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3492    then it is set to n.
3493    ACCESS_FUN is expected to be an affine chrec.  */
3494
3495 static bool
3496 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3497                        unsigned int offset, tree access_fun,
3498                        struct data_dependence_relation *ddr)
3499 {
3500   switch (TREE_CODE (access_fun))
3501     {
3502     case POLYNOMIAL_CHREC:
3503       {
3504         tree left = CHREC_LEFT (access_fun);
3505         tree right = CHREC_RIGHT (access_fun);
3506         int var = CHREC_VARIABLE (access_fun);
3507         unsigned var_idx;
3508
3509         if (TREE_CODE (right) != INTEGER_CST)
3510           return false;
3511
3512         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3513         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3514
3515         /* Compute the innermost loop index.  */
3516         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3517
3518         if (offset == 0)
3519           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3520             += int_cst_value (right);
3521
3522         switch (TREE_CODE (left))
3523           {
3524           case POLYNOMIAL_CHREC:
3525             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3526
3527           case INTEGER_CST:
3528             pb->eqs[eq].coef[0] += int_cst_value (left);
3529             return true;
3530
3531           default:
3532             return false;
3533           }
3534       }
3535
3536     case INTEGER_CST:
3537       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3538       return true;
3539
3540     default:
3541       return false;
3542     }
3543 }
3544
3545 /* As explained in the comments preceding init_omega_for_ddr, we have
3546    to set up a system for each loop level, setting outer loops
3547    variation to zero, and current loop variation to positive or zero.
3548    Save each lexico positive distance vector.  */
3549
3550 static void
3551 omega_extract_distance_vectors (omega_pb pb,
3552                                 struct data_dependence_relation *ddr)
3553 {
3554   int eq, geq;
3555   unsigned i, j;
3556   struct loop *loopi, *loopj;
3557   enum omega_result res;
3558
3559   /* Set a new problem for each loop in the nest.  The basis is the
3560      problem that we have initialized until now.  On top of this we
3561      add new constraints.  */
3562   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3563          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3564     {
3565       int dist = 0;
3566       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3567                                            DDR_NB_LOOPS (ddr));
3568
3569       omega_copy_problem (copy, pb);
3570
3571       /* For all the outer loops "loop_j", add "dj = 0".  */
3572       for (j = 0;
3573            j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3574         {
3575           eq = omega_add_zero_eq (copy, omega_black);
3576           copy->eqs[eq].coef[j + 1] = 1;
3577         }
3578
3579       /* For "loop_i", add "0 <= di".  */
3580       geq = omega_add_zero_geq (copy, omega_black);
3581       copy->geqs[geq].coef[i + 1] = 1;
3582
3583       /* Reduce the constraint system, and test that the current
3584          problem is feasible.  */
3585       res = omega_simplify_problem (copy);
3586       if (res == omega_false
3587           || res == omega_unknown
3588           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3589         goto next_problem;
3590
3591       for (eq = 0; eq < copy->num_subs; eq++)
3592         if (copy->subs[eq].key == (int) i + 1)
3593           {
3594             dist = copy->subs[eq].coef[0];
3595             goto found_dist;
3596           }
3597
3598       if (dist == 0)
3599         {
3600           /* Reinitialize problem...  */
3601           omega_copy_problem (copy, pb);
3602           for (j = 0;
3603                j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3604             {
3605               eq = omega_add_zero_eq (copy, omega_black);
3606               copy->eqs[eq].coef[j + 1] = 1;
3607             }
3608
3609           /* ..., but this time "di = 1".  */
3610           eq = omega_add_zero_eq (copy, omega_black);
3611           copy->eqs[eq].coef[i + 1] = 1;
3612           copy->eqs[eq].coef[0] = -1;
3613
3614           res = omega_simplify_problem (copy);
3615           if (res == omega_false
3616               || res == omega_unknown
3617               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3618             goto next_problem;
3619
3620           for (eq = 0; eq < copy->num_subs; eq++)
3621             if (copy->subs[eq].key == (int) i + 1)
3622               {
3623                 dist = copy->subs[eq].coef[0];
3624                 goto found_dist;
3625               }
3626         }
3627
3628     found_dist:;
3629       /* Save the lexicographically positive distance vector.  */
3630       if (dist >= 0)
3631         {
3632           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3633           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3634
3635           dist_v[i] = dist;
3636
3637           for (eq = 0; eq < copy->num_subs; eq++)
3638             if (copy->subs[eq].key > 0)
3639               {
3640                 dist = copy->subs[eq].coef[0];
3641                 dist_v[copy->subs[eq].key - 1] = dist;
3642               }
3643
3644           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3645             dir_v[j] = dir_from_dist (dist_v[j]);
3646
3647           save_dist_v (ddr, dist_v);
3648           save_dir_v (ddr, dir_v);
3649         }
3650
3651     next_problem:;
3652       omega_free_problem (copy);
3653     }
3654 }
3655
3656 /* This is called for each subscript of a tuple of data references:
3657    insert an equality for representing the conflicts.  */
3658
3659 static bool
3660 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3661                        struct data_dependence_relation *ddr,
3662                        omega_pb pb, bool *maybe_dependent)
3663 {
3664   int eq;
3665   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3666                                      TREE_TYPE (access_fun_b));
3667   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3668   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3669   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3670   tree minus_one;
3671
3672   /* When the fun_a - fun_b is not constant, the dependence is not
3673      captured by the classic distance vector representation.  */
3674   if (TREE_CODE (difference) != INTEGER_CST)
3675     return false;
3676
3677   /* ZIV test.  */
3678   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3679     {
3680       /* There is no dependence.  */
3681       *maybe_dependent = false;
3682       return true;
3683     }
3684
3685   minus_one = build_int_cst (type, -1);
3686   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3687
3688   eq = omega_add_zero_eq (pb, omega_black);
3689   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3690       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3691     /* There is probably a dependence, but the system of
3692        constraints cannot be built: answer "don't know".  */
3693     return false;
3694
3695   /* GCD test.  */
3696   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3697       && !int_divides_p (lambda_vector_gcd
3698                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3699                           2 * DDR_NB_LOOPS (ddr)),
3700                          pb->eqs[eq].coef[0]))
3701     {
3702       /* There is no dependence.  */
3703       *maybe_dependent = false;
3704       return true;
3705     }
3706
3707   return true;
3708 }
3709
3710 /* Helper function, same as init_omega_for_ddr but specialized for
3711    data references A and B.  */
3712
3713 static bool
3714 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3715                       struct data_dependence_relation *ddr,
3716                       omega_pb pb, bool *maybe_dependent)
3717 {
3718   unsigned i;
3719   int ineq;
3720   struct loop *loopi;
3721   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3722
3723   /* Insert an equality per subscript.  */
3724   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3725     {
3726       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3727                                   ddr, pb, maybe_dependent))
3728         return false;
3729       else if (*maybe_dependent == false)
3730         {
3731           /* There is no dependence.  */
3732           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3733           return true;
3734         }
3735     }
3736
3737   /* Insert inequalities: constraints corresponding to the iteration
3738      domain, i.e. the loops surrounding the references "loop_x" and
3739      the distance variables "dx".  The layout of the OMEGA
3740      representation is as follows:
3741      - coef[0] is the constant
3742      - coef[1..nb_loops] are the protected variables that will not be
3743      removed by the solver: the "dx"
3744      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3745   */
3746   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3747          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3748     {
3749       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3750
3751       /* 0 <= loop_x */
3752       ineq = omega_add_zero_geq (pb, omega_black);
3753       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3754
3755       /* 0 <= loop_x + dx */
3756       ineq = omega_add_zero_geq (pb, omega_black);
3757       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3758       pb->geqs[ineq].coef[i + 1] = 1;
3759
3760       if (nbi != -1)
3761         {
3762           /* loop_x <= nb_iters */
3763           ineq = omega_add_zero_geq (pb, omega_black);
3764           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3765           pb->geqs[ineq].coef[0] = nbi;
3766
3767           /* loop_x + dx <= nb_iters */
3768           ineq = omega_add_zero_geq (pb, omega_black);
3769           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3770           pb->geqs[ineq].coef[i + 1] = -1;
3771           pb->geqs[ineq].coef[0] = nbi;
3772
3773           /* A step "dx" bigger than nb_iters is not feasible, so
3774              add "0 <= nb_iters + dx",  */
3775           ineq = omega_add_zero_geq (pb, omega_black);
3776           pb->geqs[ineq].coef[i + 1] = 1;
3777           pb->geqs[ineq].coef[0] = nbi;
3778           /* and "dx <= nb_iters".  */
3779           ineq = omega_add_zero_geq (pb, omega_black);
3780           pb->geqs[ineq].coef[i + 1] = -1;
3781           pb->geqs[ineq].coef[0] = nbi;
3782         }
3783     }
3784
3785   omega_extract_distance_vectors (pb, ddr);
3786
3787   return true;
3788 }
3789
3790 /* Sets up the Omega dependence problem for the data dependence
3791    relation DDR.  Returns false when the constraint system cannot be
3792    built, ie. when the test answers "don't know".  Returns true
3793    otherwise, and when independence has been proved (using one of the
3794    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3795    set MAYBE_DEPENDENT to true.
3796
3797    Example: for setting up the dependence system corresponding to the
3798    conflicting accesses
3799
3800    | loop_i
3801    |   loop_j
3802    |     A[i, i+1] = ...
3803    |     ... A[2*j, 2*(i + j)]
3804    |   endloop_j
3805    | endloop_i
3806
3807    the following constraints come from the iteration domain:
3808
3809    0 <= i <= Ni
3810    0 <= i + di <= Ni
3811    0 <= j <= Nj
3812    0 <= j + dj <= Nj
3813
3814    where di, dj are the distance variables.  The constraints
3815    representing the conflicting elements are:
3816
3817    i = 2 * (j + dj)
3818    i + 1 = 2 * (i + di + j + dj)
3819
3820    For asking that the resulting distance vector (di, dj) be
3821    lexicographically positive, we insert the constraint "di >= 0".  If
3822    "di = 0" in the solution, we fix that component to zero, and we
3823    look at the inner loops: we set a new problem where all the outer
3824    loop distances are zero, and fix this inner component to be
3825    positive.  When one of the components is positive, we save that
3826    distance, and set a new problem where the distance on this loop is
3827    zero, searching for other distances in the inner loops.  Here is
3828    the classic example that illustrates that we have to set for each
3829    inner loop a new problem:
3830
3831    | loop_1
3832    |   loop_2
3833    |     A[10]
3834    |   endloop_2
3835    | endloop_1
3836
3837    we have to save two distances (1, 0) and (0, 1).
3838
3839    Given two array references, refA and refB, we have to set the
3840    dependence problem twice, refA vs. refB and refB vs. refA, and we
3841    cannot do a single test, as refB might occur before refA in the
3842    inner loops, and the contrary when considering outer loops: ex.
3843
3844    | loop_0
3845    |   loop_1
3846    |     loop_2
3847    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3848    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3849    |     endloop_2
3850    |   endloop_1
3851    | endloop_0
3852
3853    refB touches the elements in T before refA, and thus for the same
3854    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3855    but for successive loop_0 iterations, we have (1, -1, 1)
3856
3857    The Omega solver expects the distance variables ("di" in the
3858    previous example) to come first in the constraint system (as
3859    variables to be protected, or "safe" variables), the constraint
3860    system is built using the following layout:
3861
3862    "cst | distance vars | index vars".
3863 */
3864
3865 static bool
3866 init_omega_for_ddr (struct data_dependence_relation *ddr,
3867                     bool *maybe_dependent)
3868 {
3869   omega_pb pb;
3870   bool res = false;
3871
3872   *maybe_dependent = true;
3873
3874   if (same_access_functions (ddr))
3875     {
3876       unsigned j;
3877       lambda_vector dir_v;
3878
3879       /* Save the 0 vector.  */
3880       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3881       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3882       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3883         dir_v[j] = dir_equal;
3884       save_dir_v (ddr, dir_v);
3885
3886       /* Save the dependences carried by outer loops.  */
3887       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3888       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3889                                   maybe_dependent);
3890       omega_free_problem (pb);
3891       return res;
3892     }
3893
3894   /* Omega expects the protected variables (those that have to be kept
3895      after elimination) to appear first in the constraint system.
3896      These variables are the distance variables.  In the following
3897      initialization we declare NB_LOOPS safe variables, and the total
3898      number of variables for the constraint system is 2*NB_LOOPS.  */
3899   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3900   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3901                               maybe_dependent);
3902   omega_free_problem (pb);
3903
3904   /* Stop computation if not decidable, or no dependence.  */
3905   if (res == false || *maybe_dependent == false)
3906     return res;
3907
3908   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3909   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3910                               maybe_dependent);
3911   omega_free_problem (pb);
3912
3913   return res;
3914 }
3915
3916 /* Return true when DDR contains the same information as that stored
3917    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
3918
3919 static bool
3920 ddr_consistent_p (FILE *file,
3921                   struct data_dependence_relation *ddr,
3922                   VEC (lambda_vector, heap) *dist_vects,
3923                   VEC (lambda_vector, heap) *dir_vects)
3924 {
3925   unsigned int i, j;
3926
3927   /* If dump_file is set, output there.  */
3928   if (dump_file && (dump_flags & TDF_DETAILS))
3929     file = dump_file;
3930
3931   if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3932     {
3933       lambda_vector b_dist_v;
3934       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3935                VEC_length (lambda_vector, dist_vects),
3936                DDR_NUM_DIST_VECTS (ddr));
3937
3938       fprintf (file, "Banerjee dist vectors:\n");
3939       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3940         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3941
3942       fprintf (file, "Omega dist vectors:\n");
3943       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3944         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3945
3946       fprintf (file, "data dependence relation:\n");
3947       dump_data_dependence_relation (file, ddr);
3948
3949       fprintf (file, ")\n");
3950       return false;
3951     }
3952
3953   if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3954     {
3955       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3956                VEC_length (lambda_vector, dir_vects),
3957                DDR_NUM_DIR_VECTS (ddr));
3958       return false;
3959     }
3960
3961   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3962     {
3963       lambda_vector a_dist_v;
3964       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3965
3966       /* Distance vectors are not ordered in the same way in the DDR
3967          and in the DIST_VECTS: search for a matching vector.  */
3968       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3969         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3970           break;
3971
3972       if (j == VEC_length (lambda_vector, dist_vects))
3973         {
3974           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3975           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3976           fprintf (file, "not found in Omega dist vectors:\n");
3977           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3978           fprintf (file, "data dependence relation:\n");
3979           dump_data_dependence_relation (file, ddr);
3980           fprintf (file, ")\n");
3981         }
3982     }
3983
3984   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3985     {
3986       lambda_vector a_dir_v;
3987       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3988
3989       /* Direction vectors are not ordered in the same way in the DDR
3990          and in the DIR_VECTS: search for a matching vector.  */
3991       FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3992         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3993           break;
3994
3995       if (j == VEC_length (lambda_vector, dist_vects))
3996         {
3997           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3998           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3999           fprintf (file, "not found in Omega dir vectors:\n");
4000           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4001           fprintf (file, "data dependence relation:\n");
4002           dump_data_dependence_relation (file, ddr);
4003           fprintf (file, ")\n");
4004         }
4005     }
4006
4007   return true;
4008 }
4009
4010 /* This computes the affine dependence relation between A and B with
4011    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4012    independence between two accesses, while CHREC_DONT_KNOW is used
4013    for representing the unknown relation.
4014
4015    Note that it is possible to stop the computation of the dependence
4016    relation the first time we detect a CHREC_KNOWN element for a given
4017    subscript.  */
4018
4019 static void
4020 compute_affine_dependence (struct data_dependence_relation *ddr,
4021                            struct loop *loop_nest)
4022 {
4023   struct data_reference *dra = DDR_A (ddr);
4024   struct data_reference *drb = DDR_B (ddr);
4025
4026   if (dump_file && (dump_flags & TDF_DETAILS))
4027     {
4028       fprintf (dump_file, "(compute_affine_dependence\n");
4029       fprintf (dump_file, "  (stmt_a = \n");
4030       print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4031       fprintf (dump_file, ")\n  (stmt_b = \n");
4032       print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4033       fprintf (dump_file, ")\n");
4034     }
4035
4036   /* Analyze only when the dependence relation is not yet known.  */
4037   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4038       && !DDR_SELF_REFERENCE (ddr))
4039     {
4040       dependence_stats.num_dependence_tests++;
4041
4042       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4043           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4044         {
4045           if (flag_check_data_deps)
4046             {
4047               /* Compute the dependences using the first algorithm.  */
4048               subscript_dependence_tester (ddr, loop_nest);
4049
4050               if (dump_file && (dump_flags & TDF_DETAILS))
4051                 {
4052                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4053                   dump_data_dependence_relation (dump_file, ddr);
4054                 }
4055
4056               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4057                 {
4058                   bool maybe_dependent;
4059                   VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4060
4061                   /* Save the result of the first DD analyzer.  */
4062                   dist_vects = DDR_DIST_VECTS (ddr);
4063                   dir_vects = DDR_DIR_VECTS (ddr);
4064
4065                   /* Reset the information.  */
4066                   DDR_DIST_VECTS (ddr) = NULL;
4067                   DDR_DIR_VECTS (ddr) = NULL;
4068
4069                   /* Compute the same information using Omega.  */
4070                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
4071                     goto csys_dont_know;
4072
4073                   if (dump_file && (dump_flags & TDF_DETAILS))
4074                     {
4075                       fprintf (dump_file, "Omega Analyzer\n");
4076                       dump_data_dependence_relation (dump_file, ddr);
4077                     }
4078
4079                   /* Check that we get the same information.  */
4080                   if (maybe_dependent)
4081                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4082                                                   dir_vects));
4083                 }
4084             }
4085           else
4086             subscript_dependence_tester (ddr, loop_nest);
4087         }
4088
4089       /* As a last case, if the dependence cannot be determined, or if
4090          the dependence is considered too difficult to determine, answer
4091          "don't know".  */
4092       else
4093         {
4094         csys_dont_know:;
4095           dependence_stats.num_dependence_undetermined++;
4096
4097           if (dump_file && (dump_flags & TDF_DETAILS))
4098             {
4099               fprintf (dump_file, "Data ref a:\n");
4100               dump_data_reference (dump_file, dra);
4101               fprintf (dump_file, "Data ref b:\n");
4102               dump_data_reference (dump_file, drb);
4103               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4104             }
4105           finalize_ddr_dependent (ddr, chrec_dont_know);
4106         }
4107     }
4108
4109   if (dump_file && (dump_flags & TDF_DETAILS))
4110     fprintf (dump_file, ")\n");
4111 }
4112
4113 /* This computes the dependence relation for the same data
4114    reference into DDR.  */
4115
4116 static void
4117 compute_self_dependence (struct data_dependence_relation *ddr)
4118 {
4119   unsigned int i;
4120   struct subscript *subscript;
4121
4122   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4123     return;
4124
4125   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4126        i++)
4127     {
4128       if (SUB_CONFLICTS_IN_A (subscript))
4129         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4130       if (SUB_CONFLICTS_IN_B (subscript))
4131         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4132
4133       /* The accessed index overlaps for each iteration.  */
4134       SUB_CONFLICTS_IN_A (subscript)
4135         = conflict_fn (1, affine_fn_cst (integer_zero_node));
4136       SUB_CONFLICTS_IN_B (subscript)
4137         = conflict_fn (1, affine_fn_cst (integer_zero_node));
4138       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4139     }
4140
4141   /* The distance vector is the zero vector.  */
4142   save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4143   save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4144 }
4145
4146 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4147    the data references in DATAREFS, in the LOOP_NEST.  When
4148    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4149    relations.  */
4150
4151 void
4152 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4153                          VEC (ddr_p, heap) **dependence_relations,
4154                          VEC (loop_p, heap) *loop_nest,
4155                          bool compute_self_and_rr)
4156 {
4157   struct data_dependence_relation *ddr;
4158   struct data_reference *a, *b;
4159   unsigned int i, j;
4160
4161   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4162     for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4163       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4164         {
4165           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4166           VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4167           if (loop_nest)
4168             compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4169         }
4170
4171   if (compute_self_and_rr)
4172     FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4173       {
4174         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4175         VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4176         compute_self_dependence (ddr);
4177       }
4178 }
4179
4180 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4181    true if STMT clobbers memory, false otherwise.  */
4182
4183 bool
4184 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4185 {
4186   bool clobbers_memory = false;
4187   data_ref_loc *ref;
4188   tree *op0, *op1;
4189   enum gimple_code stmt_code = gimple_code (stmt);
4190
4191   *references = NULL;
4192
4193   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4194      Calls have side-effects, except those to const or pure
4195      functions.  */
4196   if ((stmt_code == GIMPLE_CALL
4197        && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4198       || (stmt_code == GIMPLE_ASM
4199           && gimple_asm_volatile_p (stmt)))
4200     clobbers_memory = true;
4201
4202   if (!gimple_vuse (stmt))
4203     return clobbers_memory;
4204
4205   if (stmt_code == GIMPLE_ASSIGN)
4206     {
4207       tree base;
4208       op0 = gimple_assign_lhs_ptr (stmt);
4209       op1 = gimple_assign_rhs1_ptr (stmt);
4210
4211       if (DECL_P (*op1)
4212           || (REFERENCE_CLASS_P (*op1)
4213               && (base = get_base_address (*op1))
4214               && TREE_CODE (base) != SSA_NAME))
4215         {
4216           ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4217           ref->pos = op1;
4218           ref->is_read = true;
4219         }
4220     }
4221   else if (stmt_code == GIMPLE_CALL)
4222     {
4223       unsigned i, n;
4224
4225       op0 = gimple_call_lhs_ptr (stmt);
4226       n = gimple_call_num_args (stmt);
4227       for (i = 0; i < n; i++)
4228         {
4229           op1 = gimple_call_arg_ptr (stmt, i);
4230
4231           if (DECL_P (*op1)
4232               || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4233             {
4234               ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4235               ref->pos = op1;
4236               ref->is_read = true;
4237             }
4238         }
4239     }
4240   else
4241     return clobbers_memory;
4242
4243   if (*op0
4244       && (DECL_P (*op0)
4245           || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4246     {
4247       ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4248       ref->pos = op0;
4249       ref->is_read = false;
4250     }
4251   return clobbers_memory;
4252 }
4253
4254 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4255    reference, returns false, otherwise returns true.  NEST is the outermost
4256    loop of the loop nest in which the references should be analyzed.  */
4257
4258 bool
4259 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4260                               VEC (data_reference_p, heap) **datarefs)
4261 {
4262   unsigned i;
4263   VEC (data_ref_loc, heap) *references;
4264   data_ref_loc *ref;
4265   bool ret = true;
4266   data_reference_p dr;
4267
4268   if (get_references_in_stmt (stmt, &references))
4269     {
4270       VEC_free (data_ref_loc, heap, references);
4271       return false;
4272     }
4273
4274   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4275     {
4276       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4277                             *ref->pos, stmt, ref->is_read);
4278       gcc_assert (dr != NULL);
4279       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4280     }
4281   VEC_free (data_ref_loc, heap, references);
4282   return ret;
4283 }
4284
4285 /* Stores the data references in STMT to DATAREFS.  If there is an
4286    unanalyzable reference, returns false, otherwise returns true.
4287    NEST is the outermost loop of the loop nest in which the references
4288    should be instantiated, LOOP is the loop in which the references
4289    should be analyzed.  */
4290
4291 bool
4292 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4293                                        VEC (data_reference_p, heap) **datarefs)
4294 {
4295   unsigned i;
4296   VEC (data_ref_loc, heap) *references;
4297   data_ref_loc *ref;
4298   bool ret = true;
4299   data_reference_p dr;
4300
4301   if (get_references_in_stmt (stmt, &references))
4302     {
4303       VEC_free (data_ref_loc, heap, references);
4304       return false;
4305     }
4306
4307   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4308     {
4309       dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4310       gcc_assert (dr != NULL);
4311       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4312     }
4313
4314   VEC_free (data_ref_loc, heap, references);
4315   return ret;
4316 }
4317
4318 /* Search the data references in LOOP, and record the information into
4319    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4320    difficult case, returns NULL_TREE otherwise.  */
4321
4322 tree
4323 find_data_references_in_bb (struct loop *loop, basic_block bb,
4324                             VEC (data_reference_p, heap) **datarefs)
4325 {
4326   gimple_stmt_iterator bsi;
4327
4328   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4329     {
4330       gimple stmt = gsi_stmt (bsi);
4331
4332       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4333         {
4334           struct data_reference *res;
4335           res = XCNEW (struct data_reference);
4336           VEC_safe_push (data_reference_p, heap, *datarefs, res);
4337
4338           return chrec_dont_know;
4339         }
4340     }
4341
4342   return NULL_TREE;
4343 }
4344
4345 /* Search the data references in LOOP, and record the information into
4346    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4347    difficult case, returns NULL_TREE otherwise.
4348
4349    TODO: This function should be made smarter so that it can handle address
4350    arithmetic as if they were array accesses, etc.  */
4351
4352 tree
4353 find_data_references_in_loop (struct loop *loop,
4354                               VEC (data_reference_p, heap) **datarefs)
4355 {
4356   basic_block bb, *bbs;
4357   unsigned int i;
4358
4359   bbs = get_loop_body_in_dom_order (loop);
4360
4361   for (i = 0; i < loop->num_nodes; i++)
4362     {
4363       bb = bbs[i];
4364
4365       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4366         {
4367           free (bbs);
4368           return chrec_dont_know;
4369         }
4370     }
4371   free (bbs);
4372
4373   return NULL_TREE;
4374 }
4375
4376 /* Recursive helper function.  */
4377
4378 static bool
4379 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4380 {
4381   /* Inner loops of the nest should not contain siblings.  Example:
4382      when there are two consecutive loops,
4383
4384      | loop_0
4385      |   loop_1
4386      |     A[{0, +, 1}_1]
4387      |   endloop_1
4388      |   loop_2
4389      |     A[{0, +, 1}_2]
4390      |   endloop_2
4391      | endloop_0
4392
4393      the dependence relation cannot be captured by the distance
4394      abstraction.  */
4395   if (loop->next)
4396     return false;
4397
4398   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4399   if (loop->inner)
4400     return find_loop_nest_1 (loop->inner, loop_nest);
4401   return true;
4402 }
4403
4404 /* Return false when the LOOP is not well nested.  Otherwise return
4405    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4406    contain the loops from the outermost to the innermost, as they will
4407    appear in the classic distance vector.  */
4408
4409 bool
4410 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4411 {
4412   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4413   if (loop->inner)
4414     return find_loop_nest_1 (loop->inner, loop_nest);
4415   return true;
4416 }
4417
4418 /* Returns true when the data dependences have been computed, false otherwise.
4419    Given a loop nest LOOP, the following vectors are returned:
4420    DATAREFS is initialized to all the array elements contained in this loop,
4421    DEPENDENCE_RELATIONS contains the relations between the data references.
4422    Compute read-read and self relations if
4423    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4424
4425 bool
4426 compute_data_dependences_for_loop (struct loop *loop,
4427                                    bool compute_self_and_read_read_dependences,
4428                                    VEC (loop_p, heap) **loop_nest,
4429                                    VEC (data_reference_p, heap) **datarefs,
4430                                    VEC (ddr_p, heap) **dependence_relations)
4431 {
4432   bool res = true;
4433
4434   memset (&dependence_stats, 0, sizeof (dependence_stats));
4435
4436   /* If the loop nest is not well formed, or one of the data references
4437      is not computable, give up without spending time to compute other
4438      dependences.  */
4439   if (!loop
4440       || !find_loop_nest (loop, loop_nest)
4441       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4442     {
4443       struct data_dependence_relation *ddr;
4444
4445       /* Insert a single relation into dependence_relations:
4446          chrec_dont_know.  */
4447       ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4448       VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4449       res = false;
4450     }
4451   else
4452     compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4453                              compute_self_and_read_read_dependences);
4454
4455   if (dump_file && (dump_flags & TDF_STATS))
4456     {
4457       fprintf (dump_file, "Dependence tester statistics:\n");
4458
4459       fprintf (dump_file, "Number of dependence tests: %d\n",
4460                dependence_stats.num_dependence_tests);
4461       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4462                dependence_stats.num_dependence_dependent);
4463       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4464                dependence_stats.num_dependence_independent);
4465       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4466                dependence_stats.num_dependence_undetermined);
4467
4468       fprintf (dump_file, "Number of subscript tests: %d\n",
4469                dependence_stats.num_subscript_tests);
4470       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4471                dependence_stats.num_subscript_undetermined);
4472       fprintf (dump_file, "Number of same subscript function: %d\n",
4473                dependence_stats.num_same_subscript_function);
4474
4475       fprintf (dump_file, "Number of ziv tests: %d\n",
4476                dependence_stats.num_ziv);
4477       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4478                dependence_stats.num_ziv_dependent);
4479       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4480                dependence_stats.num_ziv_independent);
4481       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4482                dependence_stats.num_ziv_unimplemented);
4483
4484       fprintf (dump_file, "Number of siv tests: %d\n",
4485                dependence_stats.num_siv);
4486       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4487                dependence_stats.num_siv_dependent);
4488       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4489                dependence_stats.num_siv_independent);
4490       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4491                dependence_stats.num_siv_unimplemented);
4492
4493       fprintf (dump_file, "Number of miv tests: %d\n",
4494                dependence_stats.num_miv);
4495       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4496                dependence_stats.num_miv_dependent);
4497       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4498                dependence_stats.num_miv_independent);
4499       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4500                dependence_stats.num_miv_unimplemented);
4501     }
4502
4503   return res;
4504 }
4505
4506 /* Returns true when the data dependences for the basic block BB have been
4507    computed, false otherwise.
4508    DATAREFS is initialized to all the array elements contained in this basic
4509    block, DEPENDENCE_RELATIONS contains the relations between the data
4510    references. Compute read-read and self relations if
4511    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4512 bool
4513 compute_data_dependences_for_bb (basic_block bb,
4514                                  bool compute_self_and_read_read_dependences,
4515                                  VEC (data_reference_p, heap) **datarefs,
4516                                  VEC (ddr_p, heap) **dependence_relations)
4517 {
4518   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4519     return false;
4520
4521   compute_all_dependences (*datarefs, dependence_relations, NULL,
4522                            compute_self_and_read_read_dependences);
4523   return true;
4524 }
4525
4526 /* Entry point (for testing only).  Analyze all the data references
4527    and the dependence relations in LOOP.
4528
4529    The data references are computed first.
4530
4531    A relation on these nodes is represented by a complete graph.  Some
4532    of the relations could be of no interest, thus the relations can be
4533    computed on demand.
4534
4535    In the following function we compute all the relations.  This is
4536    just a first implementation that is here for:
4537    - for showing how to ask for the dependence relations,
4538    - for the debugging the whole dependence graph,
4539    - for the dejagnu testcases and maintenance.
4540
4541    It is possible to ask only for a part of the graph, avoiding to
4542    compute the whole dependence graph.  The computed dependences are
4543    stored in a knowledge base (KB) such that later queries don't
4544    recompute the same information.  The implementation of this KB is
4545    transparent to the optimizer, and thus the KB can be changed with a
4546    more efficient implementation, or the KB could be disabled.  */
4547 static void
4548 analyze_all_data_dependences (struct loop *loop)
4549 {
4550   unsigned int i;
4551   int nb_data_refs = 10;
4552   VEC (data_reference_p, heap) *datarefs =
4553     VEC_alloc (data_reference_p, heap, nb_data_refs);
4554   VEC (ddr_p, heap) *dependence_relations =
4555     VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4556   VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4557
4558   /* Compute DDs on the whole function.  */
4559   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4560                                      &dependence_relations);
4561
4562   if (dump_file)
4563     {
4564       dump_data_dependence_relations (dump_file, dependence_relations);
4565       fprintf (dump_file, "\n\n");
4566
4567       if (dump_flags & TDF_DETAILS)
4568         dump_dist_dir_vectors (dump_file, dependence_relations);
4569
4570       if (dump_flags & TDF_STATS)
4571         {
4572           unsigned nb_top_relations = 0;
4573           unsigned nb_bot_relations = 0;
4574           unsigned nb_chrec_relations = 0;
4575           struct data_dependence_relation *ddr;
4576
4577           FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4578             {
4579               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4580                 nb_top_relations++;
4581
4582               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4583                 nb_bot_relations++;
4584
4585               else
4586                 nb_chrec_relations++;
4587             }
4588
4589           gather_stats_on_scev_database ();
4590         }
4591     }
4592
4593   VEC_free (loop_p, heap, loop_nest);
4594   free_dependence_relations (dependence_relations);
4595   free_data_refs (datarefs);
4596 }
4597
4598 /* Computes all the data dependences and check that the results of
4599    several analyzers are the same.  */
4600
4601 void
4602 tree_check_data_deps (void)
4603 {
4604   loop_iterator li;
4605   struct loop *loop_nest;
4606
4607   FOR_EACH_LOOP (li, loop_nest, 0)
4608     analyze_all_data_dependences (loop_nest);
4609 }
4610
4611 /* Free the memory used by a data dependence relation DDR.  */
4612
4613 void
4614 free_dependence_relation (struct data_dependence_relation *ddr)
4615 {
4616   if (ddr == NULL)
4617     return;
4618
4619   if (DDR_SUBSCRIPTS (ddr))
4620     free_subscripts (DDR_SUBSCRIPTS (ddr));
4621   if (DDR_DIST_VECTS (ddr))
4622     VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4623   if (DDR_DIR_VECTS (ddr))
4624     VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4625
4626   free (ddr);
4627 }
4628
4629 /* Free the memory used by the data dependence relations from
4630    DEPENDENCE_RELATIONS.  */
4631
4632 void
4633 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4634 {
4635   unsigned int i;
4636   struct data_dependence_relation *ddr;
4637
4638   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4639     if (ddr)
4640       free_dependence_relation (ddr);
4641
4642   VEC_free (ddr_p, heap, dependence_relations);
4643 }
4644
4645 /* Free the memory used by the data references from DATAREFS.  */
4646
4647 void
4648 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4649 {
4650   unsigned int i;
4651   struct data_reference *dr;
4652
4653   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4654     free_data_ref (dr);
4655   VEC_free (data_reference_p, heap, datarefs);
4656 }
4657
4658 \f
4659
4660 /* Dump vertex I in RDG to FILE.  */
4661
4662 void
4663 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4664 {
4665   struct vertex *v = &(rdg->vertices[i]);
4666   struct graph_edge *e;
4667
4668   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4669            RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4670            RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4671
4672   if (v->pred)
4673     for (e = v->pred; e; e = e->pred_next)
4674       fprintf (file, " %d", e->src);
4675
4676   fprintf (file, ") (out:");
4677
4678   if (v->succ)
4679     for (e = v->succ; e; e = e->succ_next)
4680       fprintf (file, " %d", e->dest);
4681
4682   fprintf (file, ")\n");
4683   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4684   fprintf (file, ")\n");
4685 }
4686
4687 /* Call dump_rdg_vertex on stderr.  */
4688
4689 DEBUG_FUNCTION void
4690 debug_rdg_vertex (struct graph *rdg, int i)
4691 {
4692   dump_rdg_vertex (stderr, rdg, i);
4693 }
4694
4695 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4696    dumped vertices to that bitmap.  */
4697
4698 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4699 {
4700   int i;
4701
4702   fprintf (file, "(%d\n", c);
4703
4704   for (i = 0; i < rdg->n_vertices; i++)
4705     if (rdg->vertices[i].component == c)
4706       {
4707         if (dumped)
4708           bitmap_set_bit (dumped, i);
4709
4710         dump_rdg_vertex (file, rdg, i);
4711       }
4712
4713   fprintf (file, ")\n");
4714 }
4715
4716 /* Call dump_rdg_vertex on stderr.  */
4717
4718 DEBUG_FUNCTION void
4719 debug_rdg_component (struct graph *rdg, int c)
4720 {
4721   dump_rdg_component (stderr, rdg, c, NULL);
4722 }
4723
4724 /* Dump the reduced dependence graph RDG to FILE.  */
4725
4726 void
4727 dump_rdg (FILE *file, struct graph *rdg)
4728 {
4729   int i;
4730   bitmap dumped = BITMAP_ALLOC (NULL);
4731
4732   fprintf (file, "(rdg\n");
4733
4734   for (i = 0; i < rdg->n_vertices; i++)
4735     if (!bitmap_bit_p (dumped, i))
4736       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4737
4738   fprintf (file, ")\n");
4739   BITMAP_FREE (dumped);
4740 }
4741
4742 /* Call dump_rdg on stderr.  */
4743
4744 DEBUG_FUNCTION void
4745 debug_rdg (struct graph *rdg)
4746 {
4747   dump_rdg (stderr, rdg);
4748 }
4749
4750 static void
4751 dot_rdg_1 (FILE *file, struct graph *rdg)
4752 {
4753   int i;
4754
4755   fprintf (file, "digraph RDG {\n");
4756
4757   for (i = 0; i < rdg->n_vertices; i++)
4758     {
4759       struct vertex *v = &(rdg->vertices[i]);
4760       struct graph_edge *e;
4761
4762       /* Highlight reads from memory.  */
4763       if (RDG_MEM_READS_STMT (rdg, i))
4764        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4765
4766       /* Highlight stores to memory.  */
4767       if (RDG_MEM_WRITE_STMT (rdg, i))
4768        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4769
4770       if (v->succ)
4771        for (e = v->succ; e; e = e->succ_next)
4772          switch (RDGE_TYPE (e))
4773            {
4774            case input_dd:
4775              fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4776              break;
4777
4778            case output_dd:
4779              fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4780              break;
4781
4782            case flow_dd:
4783              /* These are the most common dependences: don't print these. */
4784              fprintf (file, "%d -> %d \n", i, e->dest);
4785              break;
4786
4787            case anti_dd:
4788              fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4789              break;
4790
4791            default:
4792              gcc_unreachable ();
4793            }
4794     }
4795
4796   fprintf (file, "}\n\n");
4797 }
4798
4799 /* Display the Reduced Dependence Graph using dotty.  */
4800 extern void dot_rdg (struct graph *);
4801
4802 DEBUG_FUNCTION void
4803 dot_rdg (struct graph *rdg)
4804 {
4805   /* When debugging, enable the following code.  This cannot be used
4806      in production compilers because it calls "system".  */
4807 #if 0
4808   FILE *file = fopen ("/tmp/rdg.dot", "w");
4809   gcc_assert (file != NULL);
4810
4811   dot_rdg_1 (file, rdg);
4812   fclose (file);
4813
4814   system ("dotty /tmp/rdg.dot &");
4815 #else
4816   dot_rdg_1 (stderr, rdg);
4817 #endif
4818 }
4819
4820 /* This structure is used for recording the mapping statement index in
4821    the RDG.  */
4822
4823 struct GTY(()) rdg_vertex_info
4824 {
4825   gimple stmt;
4826   int index;
4827 };
4828
4829 /* Returns the index of STMT in RDG.  */
4830
4831 int
4832 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4833 {
4834   struct rdg_vertex_info rvi, *slot;
4835
4836   rvi.stmt = stmt;
4837   slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4838
4839   if (!slot)
4840     return -1;
4841
4842   return slot->index;
4843 }
4844
4845 /* Creates an edge in RDG for each distance vector from DDR.  The
4846    order that we keep track of in the RDG is the order in which
4847    statements have to be executed.  */
4848
4849 static void
4850 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4851 {
4852   struct graph_edge *e;
4853   int va, vb;
4854   data_reference_p dra = DDR_A (ddr);
4855   data_reference_p drb = DDR_B (ddr);
4856   unsigned level = ddr_dependence_level (ddr);
4857
4858   /* For non scalar dependences, when the dependence is REVERSED,
4859      statement B has to be executed before statement A.  */
4860   if (level > 0
4861       && !DDR_REVERSED_P (ddr))
4862     {
4863       data_reference_p tmp = dra;
4864       dra = drb;
4865       drb = tmp;
4866     }
4867
4868   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4869   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4870
4871   if (va < 0 || vb < 0)
4872     return;
4873
4874   e = add_edge (rdg, va, vb);
4875   e->data = XNEW (struct rdg_edge);
4876
4877   RDGE_LEVEL (e) = level;
4878   RDGE_RELATION (e) = ddr;
4879
4880   /* Determines the type of the data dependence.  */
4881   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4882     RDGE_TYPE (e) = input_dd;
4883   else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4884     RDGE_TYPE (e) = output_dd;
4885   else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4886     RDGE_TYPE (e) = flow_dd;
4887   else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4888     RDGE_TYPE (e) = anti_dd;
4889 }
4890
4891 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4892    the index of DEF in RDG.  */
4893
4894 static void
4895 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4896 {
4897   use_operand_p imm_use_p;
4898   imm_use_iterator iterator;
4899
4900   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4901     {
4902       struct graph_edge *e;
4903       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4904
4905       if (use < 0)
4906         continue;
4907
4908       e = add_edge (rdg, idef, use);
4909       e->data = XNEW (struct rdg_edge);
4910       RDGE_TYPE (e) = flow_dd;
4911       RDGE_RELATION (e) = NULL;
4912     }
4913 }
4914
4915 /* Creates the edges of the reduced dependence graph RDG.  */
4916
4917 static void
4918 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4919 {
4920   int i;
4921   struct data_dependence_relation *ddr;
4922   def_operand_p def_p;
4923   ssa_op_iter iter;
4924
4925   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4926     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4927       create_rdg_edge_for_ddr (rdg, ddr);
4928
4929   for (i = 0; i < rdg->n_vertices; i++)
4930     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4931                               iter, SSA_OP_DEF)
4932       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4933 }
4934
4935 /* Build the vertices of the reduced dependence graph RDG.  */
4936
4937 void
4938 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4939 {
4940   int i, j;
4941   gimple stmt;
4942
4943   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4944     {
4945       VEC (data_ref_loc, heap) *references;
4946       data_ref_loc *ref;
4947       struct vertex *v = &(rdg->vertices[i]);
4948       struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4949       struct rdg_vertex_info **slot;
4950
4951       rvi->stmt = stmt;
4952       rvi->index = i;
4953       slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4954
4955       if (!*slot)
4956         *slot = rvi;
4957       else
4958         free (rvi);
4959
4960       v->data = XNEW (struct rdg_vertex);
4961       RDG_STMT (rdg, i) = stmt;
4962
4963       RDG_MEM_WRITE_STMT (rdg, i) = false;
4964       RDG_MEM_READS_STMT (rdg, i) = false;
4965       if (gimple_code (stmt) == GIMPLE_PHI)
4966         continue;
4967
4968       get_references_in_stmt (stmt, &references);
4969       FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4970         if (!ref->is_read)
4971           RDG_MEM_WRITE_STMT (rdg, i) = true;
4972         else
4973           RDG_MEM_READS_STMT (rdg, i) = true;
4974
4975       VEC_free (data_ref_loc, heap, references);
4976     }
4977 }
4978
4979 /* Initialize STMTS with all the statements of LOOP.  When
4980    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
4981    which we discover statements is important as
4982    generate_loops_for_partition is using the same traversal for
4983    identifying statements. */
4984
4985 static void
4986 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4987 {
4988   unsigned int i;
4989   basic_block *bbs = get_loop_body_in_dom_order (loop);
4990
4991   for (i = 0; i < loop->num_nodes; i++)
4992     {
4993       basic_block bb = bbs[i];
4994       gimple_stmt_iterator bsi;
4995       gimple stmt;
4996
4997       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4998         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4999
5000       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5001         {
5002           stmt = gsi_stmt (bsi);
5003           if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5004             VEC_safe_push (gimple, heap, *stmts, stmt);
5005         }
5006     }
5007
5008   free (bbs);
5009 }
5010
5011 /* Returns true when all the dependences are computable.  */
5012
5013 static bool
5014 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5015 {
5016   ddr_p ddr;
5017   unsigned int i;
5018
5019   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5020     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5021       return false;
5022
5023   return true;
5024 }
5025
5026 /* Computes a hash function for element ELT.  */
5027
5028 static hashval_t
5029 hash_stmt_vertex_info (const void *elt)
5030 {
5031   const struct rdg_vertex_info *const rvi =
5032     (const struct rdg_vertex_info *) elt;
5033   gimple stmt = rvi->stmt;
5034
5035   return htab_hash_pointer (stmt);
5036 }
5037
5038 /* Compares database elements E1 and E2.  */
5039
5040 static int
5041 eq_stmt_vertex_info (const void *e1, const void *e2)
5042 {
5043   const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5044   const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5045
5046   return elt1->stmt == elt2->stmt;
5047 }
5048
5049 /* Free the element E.  */
5050
5051 static void
5052 hash_stmt_vertex_del (void *e)
5053 {
5054   free (e);
5055 }
5056
5057 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5058    statement of the loop nest, and one edge per data dependence or
5059    scalar dependence.  */
5060
5061 struct graph *
5062 build_empty_rdg (int n_stmts)
5063 {
5064   int nb_data_refs = 10;
5065   struct graph *rdg = new_graph (n_stmts);
5066
5067   rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5068                               eq_stmt_vertex_info, hash_stmt_vertex_del);
5069   return rdg;
5070 }
5071
5072 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5073    statement of the loop nest, and one edge per data dependence or
5074    scalar dependence.  */
5075
5076 struct graph *
5077 build_rdg (struct loop *loop,
5078            VEC (loop_p, heap) **loop_nest,
5079            VEC (ddr_p, heap) **dependence_relations,
5080            VEC (data_reference_p, heap) **datarefs)
5081 {
5082   struct graph *rdg = NULL;
5083   VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5084
5085   compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5086                                      dependence_relations);
5087
5088   if (known_dependences_p (*dependence_relations))
5089     {
5090       stmts_from_loop (loop, &stmts);
5091       rdg = build_empty_rdg (VEC_length (gimple, stmts));
5092       create_rdg_vertices (rdg, stmts);
5093       create_rdg_edges (rdg, *dependence_relations);
5094     }
5095
5096   VEC_free (gimple, heap, stmts);
5097   return rdg;
5098 }
5099
5100 /* Free the reduced dependence graph RDG.  */
5101
5102 void
5103 free_rdg (struct graph *rdg)
5104 {
5105   int i;
5106
5107   for (i = 0; i < rdg->n_vertices; i++)
5108     {
5109       struct vertex *v = &(rdg->vertices[i]);
5110       struct graph_edge *e;
5111
5112       for (e = v->succ; e; e = e->succ_next)
5113         free (e->data);
5114
5115       free (v->data);
5116     }
5117
5118   htab_delete (rdg->indices);
5119   free_graph (rdg);
5120 }
5121
5122 /* Initialize STMTS with all the statements of LOOP that contain a
5123    store to memory.  */
5124
5125 void
5126 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5127 {
5128   unsigned int i;
5129   basic_block *bbs = get_loop_body_in_dom_order (loop);
5130
5131   for (i = 0; i < loop->num_nodes; i++)
5132     {
5133       basic_block bb = bbs[i];
5134       gimple_stmt_iterator bsi;
5135
5136       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5137         if (gimple_vdef (gsi_stmt (bsi)))
5138           VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5139     }
5140
5141   free (bbs);
5142 }
5143
5144 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5145    that contains a data reference on its LHS with a stride of the same
5146    size as its unit type.  */
5147
5148 bool
5149 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5150 {
5151   tree op0, op1;
5152   bool res;
5153   struct data_reference *dr;
5154
5155   if (!stmt
5156       || !gimple_vdef (stmt)
5157       || !is_gimple_assign (stmt)
5158       || !gimple_assign_single_p (stmt)
5159       || !(op1 = gimple_assign_rhs1 (stmt))
5160       || !(integer_zerop (op1) || real_zerop (op1)))
5161     return false;
5162
5163   dr = XCNEW (struct data_reference);
5164   op0 = gimple_assign_lhs (stmt);
5165
5166   DR_STMT (dr) = stmt;
5167   DR_REF (dr) = op0;
5168
5169   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5170     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5171
5172   free_data_ref (dr);
5173   return res;
5174 }
5175
5176 /* Initialize STMTS with all the statements of LOOP that contain a
5177    store to memory of the form "A[i] = 0".  */
5178
5179 void
5180 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5181 {
5182   unsigned int i;
5183   basic_block bb;
5184   gimple_stmt_iterator si;
5185   gimple stmt;
5186   basic_block *bbs = get_loop_body_in_dom_order (loop);
5187
5188   for (i = 0; i < loop->num_nodes; i++)
5189     for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5190       if ((stmt = gsi_stmt (si))
5191           && stmt_with_adjacent_zero_store_dr_p (stmt))
5192         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5193
5194   free (bbs);
5195 }
5196
5197 /* For a data reference REF, return the declaration of its base
5198    address or NULL_TREE if the base is not determined.  */
5199
5200 static inline tree
5201 ref_base_address (gimple stmt, data_ref_loc *ref)
5202 {
5203   tree base = NULL_TREE;
5204   tree base_address;
5205   struct data_reference *dr = XCNEW (struct data_reference);
5206
5207   DR_STMT (dr) = stmt;
5208   DR_REF (dr) = *ref->pos;
5209   dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5210   base_address = DR_BASE_ADDRESS (dr);
5211
5212   if (!base_address)
5213     goto end;
5214
5215   switch (TREE_CODE (base_address))
5216     {
5217     case ADDR_EXPR:
5218       base = TREE_OPERAND (base_address, 0);
5219       break;
5220
5221     default:
5222       base = base_address;
5223       break;
5224     }
5225
5226  end:
5227   free_data_ref (dr);
5228   return base;
5229 }
5230
5231 /* Determines whether the statement from vertex V of the RDG has a
5232    definition used outside the loop that contains this statement.  */
5233
5234 bool
5235 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5236 {
5237   gimple stmt = RDG_STMT (rdg, v);
5238   struct loop *loop = loop_containing_stmt (stmt);
5239   use_operand_p imm_use_p;
5240   imm_use_iterator iterator;
5241   ssa_op_iter it;
5242   def_operand_p def_p;
5243
5244   if (!loop)
5245     return true;
5246
5247   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5248     {
5249       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5250         {
5251           if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5252             return true;
5253         }
5254     }
5255
5256   return false;
5257 }
5258
5259 /* Determines whether statements S1 and S2 access to similar memory
5260    locations.  Two memory accesses are considered similar when they
5261    have the same base address declaration, i.e. when their
5262    ref_base_address is the same.  */
5263
5264 bool
5265 have_similar_memory_accesses (gimple s1, gimple s2)
5266 {
5267   bool res = false;
5268   unsigned i, j;
5269   VEC (data_ref_loc, heap) *refs1, *refs2;
5270   data_ref_loc *ref1, *ref2;
5271
5272   get_references_in_stmt (s1, &refs1);
5273   get_references_in_stmt (s2, &refs2);
5274
5275   FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5276     {
5277       tree base1 = ref_base_address (s1, ref1);
5278
5279       if (base1)
5280         FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5281           if (base1 == ref_base_address (s2, ref2))
5282             {
5283               res = true;
5284               goto end;
5285             }
5286     }
5287
5288  end:
5289   VEC_free (data_ref_loc, heap, refs1);
5290   VEC_free (data_ref_loc, heap, refs2);
5291   return res;
5292 }
5293
5294 /* Helper function for the hashtab.  */
5295
5296 static int
5297 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5298 {
5299   return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5300                                        CONST_CAST_GIMPLE ((const_gimple) s2));
5301 }
5302
5303 /* Helper function for the hashtab.  */
5304
5305 static hashval_t
5306 ref_base_address_1 (const void *s)
5307 {
5308   gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5309   unsigned i;
5310   VEC (data_ref_loc, heap) *refs;
5311   data_ref_loc *ref;
5312   hashval_t res = 0;
5313
5314   get_references_in_stmt (stmt, &refs);
5315
5316   FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5317     if (!ref->is_read)
5318       {
5319         res = htab_hash_pointer (ref_base_address (stmt, ref));
5320         break;
5321       }
5322
5323   VEC_free (data_ref_loc, heap, refs);
5324   return res;
5325 }
5326
5327 /* Try to remove duplicated write data references from STMTS.  */
5328
5329 void
5330 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5331 {
5332   unsigned i;
5333   gimple stmt;
5334   htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5335                              have_similar_memory_accesses_1, NULL);
5336
5337   for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5338     {
5339       void **slot;
5340
5341       slot = htab_find_slot (seen, stmt, INSERT);
5342
5343       if (*slot)
5344         VEC_ordered_remove (gimple, *stmts, i);
5345       else
5346         {
5347           *slot = (void *) stmt;
5348           i++;
5349         }
5350     }
5351
5352   htab_delete (seen);
5353 }
5354
5355 /* Returns the index of PARAMETER in the parameters vector of the
5356    ACCESS_MATRIX.  If PARAMETER does not exist return -1.  */
5357
5358 int
5359 access_matrix_get_index_for_parameter (tree parameter,
5360                                        struct access_matrix *access_matrix)
5361 {
5362   int i;
5363   VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5364   tree lambda_parameter;
5365
5366   FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5367     if (lambda_parameter == parameter)
5368       return i + AM_NB_INDUCTION_VARS (access_matrix);
5369
5370   return -1;
5371 }