gcc/tree-predcom.c

   1 /* Predictive commoning.
   2    Copyright (C) 2005, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This file implements the predictive commoning optimization.  Predictive
  21    commoning can be viewed as CSE around a loop, and with some improvements,
  22    as generalized strength reduction-- i.e., reusing values computed in
  23    earlier iterations of a loop in the later ones.  So far, the pass only
  24    handles the most useful case, that is, reusing values of memory references.
  25    If you think this is all just a special case of PRE, you are sort of right;
  26    however, concentrating on loops is simpler, and makes it possible to
  27    incorporate data dependence analysis to detect the opportunities, perform
  28    loop unrolling to avoid copies together with renaming immediately,
  29    and if needed, we could also take register pressure into account.
  30
  31    Let us demonstrate what is done on an example:
  32
  33    for (i = 0; i < 100; i++)
  34      {
  35        a[i+2] = a[i] + a[i+1];
  36        b[10] = b[10] + i;
  37        c[i] = c[99 - i];
  38        d[i] = d[i + 1];
  39      }
  40
  41    1) We find data references in the loop, and split them to mutually
  42       independent groups (i.e., we find components of a data dependence
  43       graph).  We ignore read-read dependences whose distance is not constant.
  44       (TODO -- we could also ignore antidependences).  In this example, we
  45       find the following groups:
  46
  47       a[i]{read}, a[i+1]{read}, a[i+2]{write}
  48       b[10]{read}, b[10]{write}
  49       c[99 - i]{read}, c[i]{write}
  50       d[i + 1]{read}, d[i]{write}
  51
  52    2) Inside each of the group, we verify several conditions:
  53       a) all the references must differ in indices only, and the indices
  54          must all have the same step
  55       b) the references must dominate loop latch (and thus, they must be
  56          ordered by dominance relation).
  57       c) the distance of the indices must be a small multiple of the step
  58       We are then able to compute the difference of the references (# of
  59       iterations before they point to the same place as the first of them).
  60       Also, in case there are writes in the loop, we split the groups into
  61       chains whose head is the write whose values are used by the reads in
  62       the same chain.  The chains are then processed independently,
  63       making the further transformations simpler.  Also, the shorter chains
  64       need the same number of registers, but may require lower unrolling
  65       factor in order to get rid of the copies on the loop latch.
  66
  67       In our example, we get the following chains (the chain for c is invalid).
  68
  69       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
  70       b[10]{read,+0}, b[10]{write,+0}
  71       d[i + 1]{read,+0}, d[i]{write,+1}
  72
  73    3) For each read, we determine the read or write whose value it reuses,
  74       together with the distance of this reuse.  I.e. we take the last
  75       reference before it with distance 0, or the last of the references
  76       with the smallest positive distance to the read.  Then, we remove
  77       the references that are not used in any of these chains, discard the
  78       empty groups, and propagate all the links so that they point to the
  79       single root reference of the chain (adjusting their distance
  80       appropriately).  Some extra care needs to be taken for references with
  81       step 0.  In our example (the numbers indicate the distance of the
  82       reuse),
  83
  84       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
  85       b[10] --> (*) 1, b[10] (*)
  86
  87    4) The chains are combined together if possible.  If the corresponding
  88       elements of two chains are always combined together with the same
  89       operator, we remember just the result of this combination, instead
  90       of remembering the values separately.  We may need to perform
  91       reassociation to enable combining, for example
  92
  93       e[i] + f[i+1] + e[i+1] + f[i]
  94
  95       can be reassociated as
  96
  97       (e[i] + f[i]) + (e[i+1] + f[i+1])
  98
  99       and we can combine the chains for e and f into one chain.
 100
 101    5) For each root reference (end of the chain) R, let N be maximum distance
 102       of a reference reusing its value.  Variables R0 upto RN are created,
 103       together with phi nodes that transfer values from R1 .. RN to
 104       R0 .. R(N-1).
 105       Initial values are loaded to R0..R(N-1) (in case not all references
 106       must necessarily be accessed and they may trap, we may fail here;
 107       TODO sometimes, the loads could be guarded by a check for the number
 108       of iterations).  Values loaded/stored in roots are also copied to
 109       RN.  Other reads are replaced with the appropriate variable Ri.
 110       Everything is put to SSA form.
 111
 112       As a small improvement, if R0 is dead after the root (i.e., all uses of
 113       the value with the maximum distance dominate the root), we can avoid
 114       creating RN and use R0 instead of it.
 115
 116       In our example, we get (only the parts concerning a and b are shown):
 117       for (i = 0; i < 100; i++)
 118         {
 119           f = phi (a[0], s);
 120           s = phi (a[1], f);
 121           x = phi (b[10], x);
 122
 123           f = f + s;
 124           a[i+2] = f;
 125           x = x + i;
 126           b[10] = x;
 127         }
 128
 129    6) Factor F for unrolling is determined as the smallest common multiple of
 130       (N + 1) for each root reference (N for references for that we avoided
 131       creating RN).  If F and the loop is small enough, loop is unrolled F
 132       times.  The stores to RN (R0) in the copies of the loop body are
 133       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
 134       be coalesced and the copies can be eliminated.
 135
 136       TODO -- copy propagation and other optimizations may change the live
 137       ranges of the temporary registers and prevent them from being coalesced;
 138       this may increase the register pressure.
 139
 140       In our case, F = 2 and the (main loop of the) result is
 141
 142       for (i = 0; i < ...; i += 2)
 143         {
 144           f = phi (a[0], f);
 145           s = phi (a[1], s);
 146           x = phi (b[10], x);
 147
 148           f = f + s;
 149           a[i+2] = f;
 150           x = x + i;
 151           b[10] = x;
 152
 153           s = s + f;
 154           a[i+3] = s;
 155           x = x + i;
 156           b[10] = x;
 157        }
 158
 159    TODO -- stores killing other stores can be taken into account, e.g.,
 160    for (i = 0; i < n; i++)
 161      {
 162        a[i] = 1;
 163        a[i+2] = 2;
 164      }
 165
 166    can be replaced with
 167
 168    t0 = a[0];
 169    t1 = a[1];
 170    for (i = 0; i < n; i++)
 171      {
 172        a[i] = 1;
 173        t2 = 2;
 174        t0 = t1;
 175        t1 = t2;
 176      }
 177    a[n] = t0;
 178    a[n+1] = t1;
 179
 180    The interesting part is that this would generalize store motion; still, since
 181    sm is performed elsewhere, it does not seem that important.
 182
 183    Predictive commoning can be generalized for arbitrary computations (not
 184    just memory loads), and also nontrivial transfer functions (e.g., replacing
 185    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
 186
 187 #include "config.h"
 188 #include "system.h"
 189 #include "coretypes.h"
 190 #include "tm.h"
 191 #include "tree.h"
 192 #include "tm_p.h"
 193 #include "cfgloop.h"
 194 #include "tree-flow.h"
 195 #include "ggc.h"
 196 #include "tree-data-ref.h"
 197 #include "tree-scalar-evolution.h"
 198 #include "tree-chrec.h"
 199 #include "params.h"
 200 #include "diagnostic.h"
 201 #include "tree-pass.h"
 202 #include "tree-affine.h"
 203 #include "tree-inline.h"
 204
 205 /* The maximum number of iterations between the considered memory
 206    references.  */
 207
 208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
 209
 210 /* Data references.  */
 211
 212 typedef struct dref
 213 {
 214   /* The reference itself.  */
 215   struct data_reference *ref;
 216
 217   /* The statement in that the reference appears.  */
 218   tree stmt;
 219
 220   /* Distance of the reference from the root of the chain (in number of
 221      iterations of the loop).  */
 222   unsigned distance;
 223
 224   /* Number of iterations offset from the first reference in the component.  */
 225   double_int offset;
 226
 227   /* Number of the reference in a component, in dominance ordering.  */
 228   unsigned pos;
 229
 230   /* True if the memory reference is always accessed when the loop is
 231      entered.  */
 232   unsigned always_accessed : 1;
 233 } *dref;
 234
 235 DEF_VEC_P (dref);
 236 DEF_VEC_ALLOC_P (dref, heap);
 237
 238 /* Type of the chain of the references.  */
 239
 240 enum chain_type
 241 {
 242   /* The addresses of the references in the chain are constant.  */
 243   CT_INVARIANT,
 244
 245   /* There are only loads in the chain.  */
 246   CT_LOAD,
 247
 248   /* Root of the chain is store, the rest are loads.  */
 249   CT_STORE_LOAD,
 250
 251   /* A combination of two chains.  */
 252   CT_COMBINATION
 253 };
 254
 255 /* Chains of data references.  */
 256
 257 typedef struct chain
 258 {
 259   /* Type of the chain.  */
 260   enum chain_type type;
 261
 262   /* For combination chains, the operator and the two chains that are
 263      combined, and the type of the result.  */
 264   enum tree_code operator;
 265   tree rslt_type;
 266   struct chain *ch1, *ch2;
 267
 268   /* The references in the chain.  */
 269   VEC(dref,heap) *refs;
 270
 271   /* The maximum distance of the reference in the chain from the root.  */
 272   unsigned length;
 273
 274   /* The variables used to copy the value throughout iterations.  */
 275   VEC(tree,heap) *vars;
 276
 277   /* Initializers for the variables.  */
 278   VEC(tree,heap) *inits;
 279
 280   /* True if there is a use of a variable with the maximal distance
 281      that comes after the root in the loop.  */
 282   unsigned has_max_use_after : 1;
 283
 284   /* True if all the memory references in the chain are always accessed.  */
 285   unsigned all_always_accessed : 1;
 286
 287   /* True if this chain was combined together with some other chain.  */
 288   unsigned combined : 1;
 289 } *chain_p;
 290
 291 DEF_VEC_P (chain_p);
 292 DEF_VEC_ALLOC_P (chain_p, heap);
 293
 294 /* Describes the knowledge about the step of the memory references in
 295    the component.  */
 296
 297 enum ref_step_type
 298 {
 299   /* The step is zero.  */
 300   RS_INVARIANT,
 301
 302   /* The step is nonzero.  */
 303   RS_NONZERO,
 304
 305   /* The step may or may not be nonzero.  */
 306   RS_ANY
 307 };
 308
 309 /* Components of the data dependence graph.  */
 310
 311 struct component
 312 {
 313   /* The references in the component.  */
 314   VEC(dref,heap) *refs;
 315
 316   /* What we know about the step of the references in the component.  */
 317   enum ref_step_type comp_step;
 318
 319   /* Next component in the list.  */
 320   struct component *next;
 321 };
 322
 323 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
 324
 325 static bitmap looparound_phis;
 326
 327 /* Cache used by tree_to_aff_combination_expand.  */
 328
 329 static struct pointer_map_t *name_expansions;
 330
 331 /* Dumps data reference REF to FILE.  */
 332
 333 extern void dump_dref (FILE *, dref);
 334 void
 335 dump_dref (FILE *file, dref ref)
 336 {
 337   if (ref->ref)
 338     {
 339       fprintf (file, "    ");
 340       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
 341       fprintf (file, " (id %u%s)\n", ref->pos,
 342                DR_IS_READ (ref->ref) ? "" : ", write");
 343
 344       fprintf (file, "      offset ");
 345       dump_double_int (file, ref->offset, false);
 346       fprintf (file, "\n");
 347
 348       fprintf (file, "      distance %u\n", ref->distance);
 349     }
 350   else
 351     {
 352       if (TREE_CODE (ref->stmt) == PHI_NODE)
 353         fprintf (file, "    looparound ref\n");
 354       else
 355         fprintf (file, "    combination ref\n");
 356       fprintf (file, "      in statement ");
 357       print_generic_expr (file, ref->stmt, TDF_SLIM);
 358       fprintf (file, "\n");
 359       fprintf (file, "      distance %u\n", ref->distance);
 360     }
 361
 362 }
 363
 364 /* Dumps CHAIN to FILE.  */
 365
 366 extern void dump_chain (FILE *, chain_p);
 367 void
 368 dump_chain (FILE *file, chain_p chain)
 369 {
 370   dref a;
 371   const char *chain_type;
 372   unsigned i;
 373   tree var;
 374
 375   switch (chain->type)
 376     {
 377     case CT_INVARIANT:
 378       chain_type = "Load motion";
 379       break;
 380
 381     case CT_LOAD:
 382       chain_type = "Loads-only";
 383       break;
 384
 385     case CT_STORE_LOAD:
 386       chain_type = "Store-loads";
 387       break;
 388
 389     case CT_COMBINATION:
 390       chain_type = "Combination";
 391       break;
 392
 393     default:
 394       gcc_unreachable ();
 395     }
 396
 397   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
 398            chain->combined ? " (combined)" : "");
 399   if (chain->type != CT_INVARIANT)
 400     fprintf (file, "  max distance %u%s\n", chain->length,
 401              chain->has_max_use_after ? "" : ", may reuse first");
 402
 403   if (chain->type == CT_COMBINATION)
 404     {
 405       fprintf (file, "  equal to %p %s %p in type ",
 406                (void *) chain->ch1, op_symbol_code (chain->operator),
 407                (void *) chain->ch2);
 408       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (chain->vars)
 413     {
 414       fprintf (file, "  vars");
 415       for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
 416         {
 417           fprintf (file, " ");
 418           print_generic_expr (file, var, TDF_SLIM);
 419         }
 420       fprintf (file, "\n");
 421     }
 422
 423   if (chain->inits)
 424     {
 425       fprintf (file, "  inits");
 426       for (i = 0; VEC_iterate (tree, chain->inits, i, var); i++)
 427         {
 428           fprintf (file, " ");
 429           print_generic_expr (file, var, TDF_SLIM);
 430         }
 431       fprintf (file, "\n");
 432     }
 433
 434   fprintf (file, "  references:\n");
 435   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
 436     dump_dref (file, a);
 437
 438   fprintf (file, "\n");
 439 }
 440
 441 /* Dumps CHAINS to FILE.  */
 442
 443 extern void dump_chains (FILE *, VEC (chain_p, heap) *);
 444 void
 445 dump_chains (FILE *file, VEC (chain_p, heap) *chains)
 446 {
 447   chain_p chain;
 448   unsigned i;
 449
 450   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
 451     dump_chain (file, chain);
 452 }
 453
 454 /* Dumps COMP to FILE.  */
 455
 456 extern void dump_component (FILE *, struct component *);
 457 void
 458 dump_component (FILE *file, struct component *comp)
 459 {
 460   dref a;
 461   unsigned i;
 462
 463   fprintf (file, "Component%s:\n",
 464            comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
 465   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
 466     dump_dref (file, a);
 467   fprintf (file, "\n");
 468 }
 469
 470 /* Dumps COMPS to FILE.  */
 471
 472 extern void dump_components (FILE *, struct component *);
 473 void
 474 dump_components (FILE *file, struct component *comps)
 475 {
 476   struct component *comp;
 477
 478   for (comp = comps; comp; comp = comp->next)
 479     dump_component (file, comp);
 480 }
 481
 482 /* Frees a chain CHAIN.  */
 483
 484 static void
 485 release_chain (chain_p chain)
 486 {
 487   dref ref;
 488   unsigned i;
 489
 490   if (chain == NULL)
 491     return;
 492
 493   for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
 494     free (ref);
 495
 496   VEC_free (dref, heap, chain->refs);
 497   VEC_free (tree, heap, chain->vars);
 498   VEC_free (tree, heap, chain->inits);
 499
 500   free (chain);
 501 }
 502
 503 /* Frees CHAINS.  */
 504
 505 static void
 506 release_chains (VEC (chain_p, heap) *chains)
 507 {
 508   unsigned i;
 509   chain_p chain;
 510
 511   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
 512     release_chain (chain);
 513   VEC_free (chain_p, heap, chains);
 514 }
 515
 516 /* Frees a component COMP.  */
 517
 518 static void
 519 release_component (struct component *comp)
 520 {
 521   VEC_free (dref, heap, comp->refs);
 522   free (comp);
 523 }
 524
 525 /* Frees list of components COMPS.  */
 526
 527 static void
 528 release_components (struct component *comps)
 529 {
 530   struct component *act, *next;
 531
 532   for (act = comps; act; act = next)
 533     {
 534       next = act->next;
 535       release_component (act);
 536     }
 537 }
 538
 539 /* Finds a root of tree given by FATHERS containing A, and performs path
 540    shortening.  */
 541
 542 static unsigned
 543 component_of (unsigned fathers[], unsigned a)
 544 {
 545   unsigned root, n;
 546
 547   for (root = a; root != fathers[root]; root = fathers[root])
 548     continue;
 549
 550   for (; a != root; a = n)
 551     {
 552       n = fathers[a];
 553       fathers[a] = root;
 554     }
 555
 556   return root;
 557 }
 558
 559 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
 560    components, A and B are components to merge.  */
 561
 562 static void
 563 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
 564 {
 565   unsigned ca = component_of (fathers, a);
 566   unsigned cb = component_of (fathers, b);
 567
 568   if (ca == cb)
 569     return;
 570
 571   if (sizes[ca] < sizes[cb])
 572     {
 573       sizes[cb] += sizes[ca];
 574       fathers[ca] = cb;
 575     }
 576   else
 577     {
 578       sizes[ca] += sizes[cb];
 579       fathers[cb] = ca;
 580     }
 581 }
 582
 583 /* Returns true if A is a reference that is suitable for predictive commoning
 584    in the innermost loop that contains it.  REF_STEP is set according to the
 585    step of the reference A.  */
 586
 587 static bool
 588 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
 589 {
 590   tree ref = DR_REF (a), step = DR_STEP (a);
 591
 592   if (!step
 593       || !is_gimple_reg_type (TREE_TYPE (ref)))
 594     return false;
 595
 596   if (integer_zerop (step))
 597     *ref_step = RS_INVARIANT;
 598   else if (integer_nonzerop (step))
 599     *ref_step = RS_NONZERO;
 600   else
 601     *ref_step = RS_ANY;
 602
 603   return true;
 604 }
 605
 606 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
 607
 608 static void
 609 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
 610 {
 611   aff_tree delta;
 612
 613   tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
 614                                   &name_expansions);
 615   aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
 616   aff_combination_add (offset, &delta);
 617 }
 618
 619 /* Determines number of iterations of the innermost enclosing loop before B
 620    refers to exactly the same location as A and stores it to OFF.  If A and
 621    B do not have the same step, they never meet, or anything else fails,
 622    returns false, otherwise returns true.  Both A and B are assumed to
 623    satisfy suitable_reference_p.  */
 624
 625 static bool
 626 determine_offset (struct data_reference *a, struct data_reference *b,
 627                   double_int *off)
 628 {
 629   aff_tree diff, baseb, step;
 630   tree typea, typeb;
 631
 632   /* Check that both the references access the location in the same type.  */
 633   typea = TREE_TYPE (DR_REF (a));
 634   typeb = TREE_TYPE (DR_REF (b));
 635   if (!useless_type_conversion_p (typeb, typea))
 636     return false;
 637
 638   /* Check whether the base address and the step of both references is the
 639      same.  */
 640   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
 641       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
 642     return false;
 643
 644   if (integer_zerop (DR_STEP (a)))
 645     {
 646       /* If the references have loop invariant address, check that they access
 647          exactly the same location.  */
 648       *off = double_int_zero;
 649       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
 650               && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
 651     }
 652
 653   /* Compare the offsets of the addresses, and check whether the difference
 654      is a multiple of step.  */
 655   aff_combination_dr_offset (a, &diff);
 656   aff_combination_dr_offset (b, &baseb);
 657   aff_combination_scale (&baseb, double_int_minus_one);
 658   aff_combination_add (&diff, &baseb);
 659
 660   tree_to_aff_combination_expand (DR_STEP (a), sizetype,
 661                                   &step, &name_expansions);
 662   return aff_combination_constant_multiple_p (&diff, &step, off);
 663 }
 664
 665 /* Returns the last basic block in LOOP for that we are sure that
 666    it is executed whenever the loop is entered.  */
 667
 668 static basic_block
 669 last_always_executed_block (struct loop *loop)
 670 {
 671   unsigned i;
 672   VEC (edge, heap) *exits = get_loop_exit_edges (loop);
 673   edge ex;
 674   basic_block last = loop->latch;
 675
 676   for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
 677     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
 678   VEC_free (edge, heap, exits);
 679
 680   return last;
 681 }
 682
 683 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
 684
 685 static struct component *
 686 split_data_refs_to_components (struct loop *loop,
 687                                VEC (data_reference_p, heap) *datarefs,
 688                                VEC (ddr_p, heap) *depends)
 689 {
 690   unsigned i, n = VEC_length (data_reference_p, datarefs);
 691   unsigned ca, ia, ib, bad;
 692   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
 693   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
 694   struct component **comps;
 695   struct data_reference *dr, *dra, *drb;
 696   struct data_dependence_relation *ddr;
 697   struct component *comp_list = NULL, *comp;
 698   dref dataref;
 699   basic_block last_always_executed = last_always_executed_block (loop);
 700
 701   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 702     {
 703       if (!DR_REF (dr))
 704         {
 705           /* A fake reference for call or asm_expr that may clobber memory;
 706              just fail.  */
 707           goto end;
 708         }
 709       dr->aux = (void *) (size_t) i;
 710       comp_father[i] = i;
 711       comp_size[i] = 1;
 712     }
 713
 714   /* A component reserved for the "bad" data references.  */
 715   comp_father[n] = n;
 716   comp_size[n] = 1;
 717
 718   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 719     {
 720       enum ref_step_type dummy;
 721
 722       if (!suitable_reference_p (dr, &dummy))
 723         {
 724           ia = (unsigned) (size_t) dr->aux;
 725           merge_comps (comp_father, comp_size, n, ia);
 726         }
 727     }
 728
 729   for (i = 0; VEC_iterate (ddr_p, depends, i, ddr); i++)
 730     {
 731       double_int dummy_off;
 732
 733       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 734         continue;
 735
 736       dra = DDR_A (ddr);
 737       drb = DDR_B (ddr);
 738       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
 739       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
 740       if (ia == ib)
 741         continue;
 742
 743       bad = component_of (comp_father, n);
 744
 745       /* If both A and B are reads, we may ignore unsuitable dependences.  */
 746       if (DR_IS_READ (dra) && DR_IS_READ (drb)
 747           && (ia == bad || ib == bad
 748               || !determine_offset (dra, drb, &dummy_off)))
 749         continue;
 750
 751       merge_comps (comp_father, comp_size, ia, ib);
 752     }
 753
 754   comps = XCNEWVEC (struct component *, n);
 755   bad = component_of (comp_father, n);
 756   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 757     {
 758       ia = (unsigned) (size_t) dr->aux;
 759       ca = component_of (comp_father, ia);
 760       if (ca == bad)
 761         continue;
 762
 763       comp = comps[ca];
 764       if (!comp)
 765         {
 766           comp = XCNEW (struct component);
 767           comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
 768           comps[ca] = comp;
 769         }
 770
 771       dataref = XCNEW (struct dref);
 772       dataref->ref = dr;
 773       dataref->stmt = DR_STMT (dr);
 774       dataref->offset = double_int_zero;
 775       dataref->distance = 0;
 776
 777       dataref->always_accessed
 778               = dominated_by_p (CDI_DOMINATORS, last_always_executed,
 779                                 bb_for_stmt (dataref->stmt));
 780       dataref->pos = VEC_length (dref, comp->refs);
 781       VEC_quick_push (dref, comp->refs, dataref);
 782     }
 783
 784   for (i = 0; i < n; i++)
 785     {
 786       comp = comps[i];
 787       if (comp)
 788         {
 789           comp->next = comp_list;
 790           comp_list = comp;
 791         }
 792     }
 793   free (comps);
 794
 795 end:
 796   free (comp_father);
 797   free (comp_size);
 798   return comp_list;
 799 }
 800
 801 /* Returns true if the component COMP satisfies the conditions
 802    described in 2) at the beginning of this file.  LOOP is the current
 803    loop.  */
 804
 805 static bool
 806 suitable_component_p (struct loop *loop, struct component *comp)
 807 {
 808   unsigned i;
 809   dref a, first;
 810   basic_block ba, bp = loop->header;
 811   bool ok, has_write = false;
 812
 813   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
 814     {
 815       ba = bb_for_stmt (a->stmt);
 816
 817       if (!just_once_each_iteration_p (loop, ba))
 818         return false;
 819
 820       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
 821       bp = ba;
 822
 823       if (!DR_IS_READ (a->ref))
 824         has_write = true;
 825     }
 826
 827   first = VEC_index (dref, comp->refs, 0);
 828   ok = suitable_reference_p (first->ref, &comp->comp_step);
 829   gcc_assert (ok);
 830   first->offset = double_int_zero;
 831
 832   for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
 833     {
 834       if (!determine_offset (first->ref, a->ref, &a->offset))
 835         return false;
 836
 837 #ifdef ENABLE_CHECKING
 838       {
 839         enum ref_step_type a_step;
 840         ok = suitable_reference_p (a->ref, &a_step);
 841         gcc_assert (ok && a_step == comp->comp_step);
 842       }
 843 #endif
 844     }
 845
 846   /* If there is a write inside the component, we must know whether the
 847      step is nonzero or not -- we would not otherwise be able to recognize
 848      whether the value accessed by reads comes from the OFFSET-th iteration
 849      or the previous one.  */
 850   if (has_write && comp->comp_step == RS_ANY)
 851     return false;
 852
 853   return true;
 854 }
 855
 856 /* Check the conditions on references inside each of components COMPS,
 857    and remove the unsuitable components from the list.  The new list
 858    of components is returned.  The conditions are described in 2) at
 859    the beginning of this file.  LOOP is the current loop.  */
 860
 861 static struct component *
 862 filter_suitable_components (struct loop *loop, struct component *comps)
 863 {
 864   struct component **comp, *act;
 865
 866   for (comp = &comps; *comp; )
 867     {
 868       act = *comp;
 869       if (suitable_component_p (loop, act))
 870         comp = &act->next;
 871       else
 872         {
 873           *comp = act->next;
 874           release_component (act);
 875         }
 876     }
 877
 878   return comps;
 879 }
 880
 881 /* Compares two drefs A and B by their offset and position.  Callback for
 882    qsort.  */
 883
 884 static int
 885 order_drefs (const void *a, const void *b)
 886 {
 887   const dref *da = a;
 888   const dref *db = b;
 889   int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
 890
 891   if (offcmp != 0)
 892     return offcmp;
 893
 894   return (*da)->pos - (*db)->pos;
 895 }
 896
 897 /* Returns root of the CHAIN.  */
 898
 899 static inline dref
 900 get_chain_root (chain_p chain)
 901 {
 902   return VEC_index (dref, chain->refs, 0);
 903 }
 904
 905 /* Adds REF to the chain CHAIN.  */
 906
 907 static void
 908 add_ref_to_chain (chain_p chain, dref ref)
 909 {
 910   dref root = get_chain_root (chain);
 911   double_int dist;
 912
 913   gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
 914   dist = double_int_add (ref->offset, double_int_neg (root->offset));
 915   if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
 916     return;
 917   gcc_assert (double_int_fits_in_uhwi_p (dist));
 918
 919   VEC_safe_push (dref, heap, chain->refs, ref);
 920
 921   ref->distance = double_int_to_uhwi (dist);
 922
 923   if (ref->distance >= chain->length)
 924     {
 925       chain->length = ref->distance;
 926       chain->has_max_use_after = false;
 927     }
 928
 929   if (ref->distance == chain->length
 930       && ref->pos > root->pos)
 931     chain->has_max_use_after = true;
 932
 933   chain->all_always_accessed &= ref->always_accessed;
 934 }
 935
 936 /* Returns the chain for invariant component COMP.  */
 937
 938 static chain_p
 939 make_invariant_chain (struct component *comp)
 940 {
 941   chain_p chain = XCNEW (struct chain);
 942   unsigned i;
 943   dref ref;
 944
 945   chain->type = CT_INVARIANT;
 946
 947   chain->all_always_accessed = true;
 948
 949   for (i = 0; VEC_iterate (dref, comp->refs, i, ref); i++)
 950     {
 951       VEC_safe_push (dref, heap, chain->refs, ref);
 952       chain->all_always_accessed &= ref->always_accessed;
 953     }
 954
 955   return chain;
 956 }
 957
 958 /* Make a new chain rooted at REF.  */
 959
 960 static chain_p
 961 make_rooted_chain (dref ref)
 962 {
 963   chain_p chain = XCNEW (struct chain);
 964
 965   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
 966
 967   VEC_safe_push (dref, heap, chain->refs, ref);
 968   chain->all_always_accessed = ref->always_accessed;
 969
 970   ref->distance = 0;
 971
 972   return chain;
 973 }
 974
 975 /* Returns true if CHAIN is not trivial.  */
 976
 977 static bool
 978 nontrivial_chain_p (chain_p chain)
 979 {
 980   return chain != NULL && VEC_length (dref, chain->refs) > 1;
 981 }
 982
 983 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
 984    is no such name.  */
 985
 986 static tree
 987 name_for_ref (dref ref)
 988 {
 989   tree name;
 990
 991   if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
 992     {
 993       if (!ref->ref || DR_IS_READ (ref->ref))
 994         name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
 995       else
 996         name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
 997     }
 998   else
 999     name = PHI_RESULT (ref->stmt);
1000
1001   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1002 }
1003
1004 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1005    iterations of the innermost enclosing loop).  */
1006
1007 static bool
1008 valid_initializer_p (struct data_reference *ref,
1009                      unsigned distance, struct data_reference *root)
1010 {
1011   aff_tree diff, base, step;
1012   double_int off;
1013
1014   if (!DR_BASE_ADDRESS (ref))
1015     return false;
1016
1017   /* Both REF and ROOT must be accessing the same object.  */
1018   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1019     return false;
1020
1021   /* The initializer is defined outside of loop, hence its address must be
1022      invariant inside the loop.  */
1023   gcc_assert (integer_zerop (DR_STEP (ref)));
1024
1025   /* If the address of the reference is invariant, initializer must access
1026      exactly the same location.  */
1027   if (integer_zerop (DR_STEP (root)))
1028     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1029             && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1030
1031   /* Verify that this index of REF is equal to the root's index at
1032      -DISTANCE-th iteration.  */
1033   aff_combination_dr_offset (root, &diff);
1034   aff_combination_dr_offset (ref, &base);
1035   aff_combination_scale (&base, double_int_minus_one);
1036   aff_combination_add (&diff, &base);
1037
1038   tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
1039                                   &name_expansions);
1040   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1041     return false;
1042
1043   if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
1044     return false;
1045
1046   return true;
1047 }
1048
1049 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1050    initial value is correct (equal to initial value of REF shifted by one
1051    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1052    is the root of the current chain.  */
1053
1054 static tree
1055 find_looparound_phi (struct loop *loop, dref ref, dref root)
1056 {
1057   tree name, phi, init, init_stmt, init_ref;
1058   edge latch = loop_latch_edge (loop);
1059   struct data_reference init_dr;
1060
1061   if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
1062     {
1063       if (DR_IS_READ (ref->ref))
1064         name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
1065       else
1066         name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
1067     }
1068   else
1069     name = PHI_RESULT (ref->stmt);
1070   if (!name)
1071     return NULL_TREE;
1072
1073   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1074     if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1075       break;
1076
1077   if (!phi)
1078     return NULL_TREE;
1079
1080   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1081   if (TREE_CODE (init) != SSA_NAME)
1082     return NULL_TREE;
1083   init_stmt = SSA_NAME_DEF_STMT (init);
1084   if (TREE_CODE (init_stmt) != GIMPLE_MODIFY_STMT)
1085     return NULL_TREE;
1086   gcc_assert (GIMPLE_STMT_OPERAND (init_stmt, 0) == init);
1087
1088   init_ref = GIMPLE_STMT_OPERAND (init_stmt, 1);
1089   if (!REFERENCE_CLASS_P (init_ref)
1090       && !DECL_P (init_ref))
1091     return NULL_TREE;
1092
1093   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1094      loop enclosing PHI).  */
1095   memset (&init_dr, 0, sizeof (struct data_reference));
1096   DR_REF (&init_dr) = init_ref;
1097   DR_STMT (&init_dr) = phi;
1098   dr_analyze_innermost (&init_dr);
1099
1100   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1101     return NULL_TREE;
1102
1103   return phi;
1104 }
1105
1106 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1107
1108 static void
1109 insert_looparound_copy (chain_p chain, dref ref, tree phi)
1110 {
1111   dref nw = XCNEW (struct dref), aref;
1112   unsigned i;
1113
1114   nw->stmt = phi;
1115   nw->distance = ref->distance + 1;
1116   nw->always_accessed = 1;
1117
1118   for (i = 0; VEC_iterate (dref, chain->refs, i, aref); i++)
1119     if (aref->distance >= nw->distance)
1120       break;
1121   VEC_safe_insert (dref, heap, chain->refs, i, nw);
1122
1123   if (nw->distance > chain->length)
1124     {
1125       chain->length = nw->distance;
1126       chain->has_max_use_after = false;
1127     }
1128 }
1129
1130 /* For references in CHAIN that are copied around the LOOP (created previously
1131    by PRE, or by user), add the results of such copies to the chain.  This
1132    enables us to remove the copies by unrolling, and may need less registers
1133    (also, it may allow us to combine chains together).  */
1134
1135 static void
1136 add_looparound_copies (struct loop *loop, chain_p chain)
1137 {
1138   unsigned i;
1139   dref ref, root = get_chain_root (chain);
1140   tree phi;
1141
1142   for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
1143     {
1144       phi = find_looparound_phi (loop, ref, root);
1145       if (!phi)
1146         continue;
1147
1148       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1149       insert_looparound_copy (chain, ref, phi);
1150     }
1151 }
1152
1153 /* Find roots of the values and determine distances in the component COMP.
1154    The references are redistributed into CHAINS.  LOOP is the current
1155    loop.  */
1156
1157 static void
1158 determine_roots_comp (struct loop *loop,
1159                       struct component *comp,
1160                       VEC (chain_p, heap) **chains)
1161 {
1162   unsigned i;
1163   dref a;
1164   chain_p chain = NULL;
1165
1166   /* Invariants are handled specially.  */
1167   if (comp->comp_step == RS_INVARIANT)
1168     {
1169       chain = make_invariant_chain (comp);
1170       VEC_safe_push (chain_p, heap, *chains, chain);
1171       return;
1172     }
1173
1174   qsort (VEC_address (dref, comp->refs), VEC_length (dref, comp->refs),
1175          sizeof (dref), order_drefs);
1176
1177   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
1178     {
1179       if (!chain || !DR_IS_READ (a->ref))
1180         {
1181           if (nontrivial_chain_p (chain))
1182             VEC_safe_push (chain_p, heap, *chains, chain);
1183           else
1184             release_chain (chain);
1185           chain = make_rooted_chain (a);
1186           continue;
1187         }
1188
1189       add_ref_to_chain (chain, a);
1190     }
1191
1192   if (nontrivial_chain_p (chain))
1193     {
1194       add_looparound_copies (loop, chain);
1195       VEC_safe_push (chain_p, heap, *chains, chain);
1196     }
1197   else
1198     release_chain (chain);
1199 }
1200
1201 /* Find roots of the values and determine distances in components COMPS, and
1202    separates the references to CHAINS.  LOOP is the current loop.  */
1203
1204 static void
1205 determine_roots (struct loop *loop,
1206                  struct component *comps, VEC (chain_p, heap) **chains)
1207 {
1208   struct component *comp;
1209
1210   for (comp = comps; comp; comp = comp->next)
1211     determine_roots_comp (loop, comp, chains);
1212 }
1213
1214 /* Replace the reference in statement STMT with temporary variable
1215    NEW.  If SET is true, NEW is instead initialized to the value of
1216    the reference in the statement.  IN_LHS is true if the reference
1217    is in the lhs of STMT, false if it is in rhs.  */
1218
1219 static void
1220 replace_ref_with (tree stmt, tree new, bool set, bool in_lhs)
1221 {
1222   tree val, new_stmt;
1223   block_stmt_iterator bsi;
1224
1225   if (TREE_CODE (stmt) == PHI_NODE)
1226     {
1227       gcc_assert (!in_lhs && !set);
1228
1229       val = PHI_RESULT (stmt);
1230       bsi = bsi_after_labels (bb_for_stmt (stmt));
1231       remove_phi_node (stmt, NULL_TREE, false);
1232
1233       /* Turn the phi node into GIMPLE_MODIFY_STMT.  */
1234       new_stmt = build_gimple_modify_stmt (val, new);
1235       SSA_NAME_DEF_STMT (val) = new_stmt;
1236       bsi_insert_before (&bsi, new_stmt, BSI_NEW_STMT);
1237       return;
1238     }
1239
1240   /* Since the reference is of gimple_reg type, it should only
1241      appear as lhs or rhs of modify statement.  */
1242   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1243
1244   /* If we do not need to initialize NEW, just replace the use of OLD.  */
1245   if (!set)
1246     {
1247       gcc_assert (!in_lhs);
1248       GIMPLE_STMT_OPERAND (stmt, 1) = new;
1249       update_stmt (stmt);
1250       return;
1251     }
1252
1253   bsi = bsi_for_stmt (stmt);
1254   if (in_lhs)
1255     {
1256       val = GIMPLE_STMT_OPERAND (stmt, 1);
1257
1258       /* OLD = VAL
1259
1260          is transformed to
1261
1262          OLD = VAL
1263          NEW = VAL
1264
1265          (since the reference is of gimple_reg type, VAL is either gimple
1266          invariant or ssa name).  */
1267     }
1268   else
1269     {
1270       val = GIMPLE_STMT_OPERAND (stmt, 0);
1271
1272       /* VAL = OLD
1273
1274          is transformed to
1275
1276          VAL = OLD
1277          NEW = VAL  */
1278     }
1279
1280   new_stmt = build_gimple_modify_stmt (new, unshare_expr (val));
1281   bsi_insert_after (&bsi, new_stmt, BSI_NEW_STMT);
1282   SSA_NAME_DEF_STMT (new) = new_stmt;
1283 }
1284
1285 /* Returns the reference to the address of REF in the ITER-th iteration of
1286    LOOP, or NULL if we fail to determine it (ITER may be negative).  We
1287    try to preserve the original shape of the reference (not rewrite it
1288    as an indirect ref to the address), to make tree_could_trap_p in
1289    prepare_initializers_chain return false more often.  */
1290
1291 static tree
1292 ref_at_iteration (struct loop *loop, tree ref, int iter)
1293 {
1294   tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
1295   affine_iv iv;
1296   bool ok;
1297
1298   if (handled_component_p (ref))
1299     {
1300       op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
1301       if (!op0)
1302         return NULL_TREE;
1303     }
1304   else if (!INDIRECT_REF_P (ref))
1305     return unshare_expr (ref);
1306
1307   if (TREE_CODE (ref) == INDIRECT_REF)
1308     {
1309       ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
1310       idx = TREE_OPERAND (ref, 0);
1311       idx_p = &TREE_OPERAND (ret, 0);
1312     }
1313   else if (TREE_CODE (ref) == COMPONENT_REF)
1314     {
1315       /* Check that the offset is loop invariant.  */
1316       if (TREE_OPERAND (ref, 2)
1317           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1318         return NULL_TREE;
1319
1320       return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
1321                      unshare_expr (TREE_OPERAND (ref, 1)),
1322                      unshare_expr (TREE_OPERAND (ref, 2)));
1323     }
1324   else if (TREE_CODE (ref) == ARRAY_REF)
1325     {
1326       /* Check that the lower bound and the step are loop invariant.  */
1327       if (TREE_OPERAND (ref, 2)
1328           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1329         return NULL_TREE;
1330       if (TREE_OPERAND (ref, 3)
1331           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
1332         return NULL_TREE;
1333
1334       ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
1335                     unshare_expr (TREE_OPERAND (ref, 2)),
1336                     unshare_expr (TREE_OPERAND (ref, 3)));
1337       idx = TREE_OPERAND (ref, 1);
1338       idx_p = &TREE_OPERAND (ret, 1);
1339     }
1340   else
1341     return NULL_TREE;
1342
1343   ok = simple_iv (loop, first_stmt (loop->header), idx, &iv, true);
1344   if (!ok)
1345     return NULL_TREE;
1346   iv.base = expand_simple_operations (iv.base);
1347   if (integer_zerop (iv.step))
1348     *idx_p = unshare_expr (iv.base);
1349   else
1350     {
1351       type = TREE_TYPE (iv.base);
1352       if (POINTER_TYPE_P (type))
1353         {
1354           val = fold_build2 (MULT_EXPR, sizetype, iv.step,
1355                              size_int (iter));
1356           val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
1357         }
1358       else
1359         {
1360           val = fold_build2 (MULT_EXPR, type, iv.step,
1361                              build_int_cst_type (type, iter));
1362           val = fold_build2 (PLUS_EXPR, type, iv.base, val);
1363         }
1364       *idx_p = unshare_expr (val);
1365     }
1366
1367   return ret;
1368 }
1369
1370 /* Get the initialization expression for the INDEX-th temporary variable
1371    of CHAIN.  */
1372
1373 static tree
1374 get_init_expr (chain_p chain, unsigned index)
1375 {
1376   if (chain->type == CT_COMBINATION)
1377     {
1378       tree e1 = get_init_expr (chain->ch1, index);
1379       tree e2 = get_init_expr (chain->ch2, index);
1380
1381       return fold_build2 (chain->operator, chain->rslt_type, e1, e2);
1382     }
1383   else
1384     return VEC_index (tree, chain->inits, index);
1385 }
1386
1387 /* Marks all virtual operands of statement STMT for renaming.  */
1388
1389 void
1390 mark_virtual_ops_for_renaming (tree stmt)
1391 {
1392   ssa_op_iter iter;
1393   tree var;
1394
1395   if (TREE_CODE (stmt) == PHI_NODE)
1396     return;
1397
1398   update_stmt (stmt);
1399
1400   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_VIRTUALS)
1401     {
1402       if (TREE_CODE (var) == SSA_NAME)
1403         var = SSA_NAME_VAR (var);
1404       mark_sym_for_renaming (var);
1405     }
1406 }
1407
1408 /* Calls mark_virtual_ops_for_renaming for all members of LIST.  */
1409
1410 static void
1411 mark_virtual_ops_for_renaming_list (tree list)
1412 {
1413   tree_stmt_iterator tsi;
1414
1415   for (tsi = tsi_start (list); !tsi_end_p (tsi); tsi_next (&tsi))
1416     mark_virtual_ops_for_renaming (tsi_stmt (tsi));
1417 }
1418
1419 /* Returns a new temporary variable used for the I-th variable carrying
1420    value of REF.  The variable's uid is marked in TMP_VARS.  */
1421
1422 static tree
1423 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1424 {
1425   tree type = TREE_TYPE (ref);
1426   tree var = create_tmp_var (type, get_lsm_tmp_name (ref, i));
1427
1428   /* We never access the components of the temporary variable in predictive
1429      commoning.  */
1430   if (TREE_CODE (type) == COMPLEX_TYPE
1431       || TREE_CODE (type) == VECTOR_TYPE)
1432     DECL_GIMPLE_REG_P (var) = 1;
1433
1434   add_referenced_var (var);
1435   bitmap_set_bit (tmp_vars, DECL_UID (var));
1436   return var;
1437 }
1438
1439 /* Creates the variables for CHAIN, as well as phi nodes for them and
1440    initialization on entry to LOOP.  Uids of the newly created
1441    temporary variables are marked in TMP_VARS.  */
1442
1443 static void
1444 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1445 {
1446   unsigned i;
1447   unsigned n = chain->length;
1448   dref root = get_chain_root (chain);
1449   bool reuse_first = !chain->has_max_use_after;
1450   tree ref, init, var, next, stmts;
1451   tree phi;
1452   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1453
1454   /* If N == 0, then all the references are within the single iteration.  And
1455      since this is an nonempty chain, reuse_first cannot be true.  */
1456   gcc_assert (n > 0 || !reuse_first);
1457
1458   chain->vars = VEC_alloc (tree, heap, n + 1);
1459
1460   if (chain->type == CT_COMBINATION)
1461     ref = GIMPLE_STMT_OPERAND (root->stmt, 0);
1462   else
1463     ref = DR_REF (root->ref);
1464
1465   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1466     {
1467       var = predcom_tmp_var (ref, i, tmp_vars);
1468       VEC_quick_push (tree, chain->vars, var);
1469     }
1470   if (reuse_first)
1471     VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
1472
1473   for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
1474     VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL_TREE));
1475
1476   for (i = 0; i < n; i++)
1477     {
1478       var = VEC_index (tree, chain->vars, i);
1479       next = VEC_index (tree, chain->vars, i + 1);
1480       init = get_init_expr (chain, i);
1481
1482       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1483       if (stmts)
1484         {
1485           mark_virtual_ops_for_renaming_list (stmts);
1486           bsi_insert_on_edge_immediate (entry, stmts);
1487         }
1488
1489       phi = create_phi_node (var, loop->header);
1490       SSA_NAME_DEF_STMT (var) = phi;
1491       add_phi_arg (phi, init, entry);
1492       add_phi_arg (phi, next, latch);
1493     }
1494 }
1495
1496 /* Create the variables and initialization statement for root of chain
1497    CHAIN.  Uids of the newly created temporary variables are marked
1498    in TMP_VARS.  */
1499
1500 static void
1501 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1502 {
1503   dref root = get_chain_root (chain);
1504   bool in_lhs = (chain->type == CT_STORE_LOAD
1505                  || chain->type == CT_COMBINATION);
1506
1507   initialize_root_vars (loop, chain, tmp_vars);
1508   replace_ref_with (root->stmt,
1509                     VEC_index (tree, chain->vars, chain->length),
1510                     true, in_lhs);
1511 }
1512
1513 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1514    initialization on entry to LOOP if necessary.  The ssa name for the variable
1515    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1516    around the loop is created.  Uid of the newly created temporary variable
1517    is marked in TMP_VARS.  INITS is the list containing the (single)
1518    initializer.  */
1519
1520 static void
1521 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1522                          VEC(tree, heap) **vars, VEC(tree, heap) *inits,
1523                          bitmap tmp_vars)
1524 {
1525   unsigned i;
1526   tree ref = DR_REF (root->ref), init, var, next, stmts;
1527   tree phi;
1528   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1529
1530   /* Find the initializer for the variable, and check that it cannot
1531      trap.  */
1532   init = VEC_index (tree, inits, 0);
1533
1534   *vars = VEC_alloc (tree, heap, written ? 2 : 1);
1535   var = predcom_tmp_var (ref, 0, tmp_vars);
1536   VEC_quick_push (tree, *vars, var);
1537   if (written)
1538     VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
1539
1540   for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
1541     VEC_replace (tree, *vars, i, make_ssa_name (var, NULL_TREE));
1542
1543   var = VEC_index (tree, *vars, 0);
1544
1545   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1546   if (stmts)
1547     {
1548       mark_virtual_ops_for_renaming_list (stmts);
1549       bsi_insert_on_edge_immediate (entry, stmts);
1550     }
1551
1552   if (written)
1553     {
1554       next = VEC_index (tree, *vars, 1);
1555       phi = create_phi_node (var, loop->header);
1556       SSA_NAME_DEF_STMT (var) = phi;
1557       add_phi_arg (phi, init, entry);
1558       add_phi_arg (phi, next, latch);
1559     }
1560   else
1561     {
1562       init = build_gimple_modify_stmt (var, init);
1563       SSA_NAME_DEF_STMT (var) = init;
1564       mark_virtual_ops_for_renaming (init);
1565       bsi_insert_on_edge_immediate (entry, init);
1566     }
1567 }
1568
1569
1570 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1571    created temporary variables are marked in TMP_VARS.  */
1572
1573 static void
1574 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1575 {
1576   VEC (tree, heap) *vars;
1577   dref a;
1578   unsigned n_writes = 0, ridx, i;
1579   tree var;
1580
1581   gcc_assert (chain->type == CT_INVARIANT);
1582   gcc_assert (!chain->combined);
1583   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1584     if (!DR_IS_READ (a->ref))
1585       n_writes++;
1586
1587   /* If there are no reads in the loop, there is nothing to do.  */
1588   if (n_writes == VEC_length (dref, chain->refs))
1589     return;
1590
1591   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1592                            &vars, chain->inits, tmp_vars);
1593
1594   ridx = 0;
1595   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1596     {
1597       bool is_read = DR_IS_READ (a->ref);
1598       mark_virtual_ops_for_renaming (a->stmt);
1599
1600       if (!DR_IS_READ (a->ref))
1601         {
1602           n_writes--;
1603           if (n_writes)
1604             {
1605               var = VEC_index (tree, vars, 0);
1606               var = make_ssa_name (SSA_NAME_VAR (var), NULL_TREE);
1607               VEC_replace (tree, vars, 0, var);
1608             }
1609           else
1610             ridx = 1;
1611         }
1612
1613       replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
1614                         !is_read, !is_read);
1615     }
1616
1617   VEC_free (tree, heap, vars);
1618 }
1619
1620 /* Returns the single statement in that NAME is used, excepting
1621    the looparound phi nodes contained in one of the chains.  If there is no
1622    such statement, or more statements, NULL_TREE is returned.  */
1623
1624 static tree
1625 single_nonlooparound_use (tree name)
1626 {
1627   use_operand_p use;
1628   imm_use_iterator it;
1629   tree stmt, ret = NULL_TREE;
1630
1631   FOR_EACH_IMM_USE_FAST (use, it, name)
1632     {
1633       stmt = USE_STMT (use);
1634
1635       if (TREE_CODE (stmt) == PHI_NODE)
1636         {
1637           /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1638              could not be processed anyway, so just fail for them.  */
1639           if (bitmap_bit_p (looparound_phis,
1640                             SSA_NAME_VERSION (PHI_RESULT (stmt))))
1641             continue;
1642
1643           return NULL_TREE;
1644         }
1645       else if (ret != NULL_TREE)
1646         return NULL_TREE;
1647       else
1648         ret = stmt;
1649     }
1650
1651   return ret;
1652 }
1653
1654 /* Remove statement STMT, as well as the chain of assignments in that it is
1655    used.  */
1656
1657 static void
1658 remove_stmt (tree stmt)
1659 {
1660   tree next, name;
1661
1662   if (TREE_CODE (stmt) == PHI_NODE)
1663     {
1664       name = PHI_RESULT (stmt);
1665       next = single_nonlooparound_use (name);
1666       remove_phi_node (stmt, NULL_TREE, true);
1667
1668       if (!next
1669           || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1670           || GIMPLE_STMT_OPERAND (next, 1) != name)
1671         return;
1672
1673       stmt = next;
1674     }
1675
1676   while (1)
1677     {
1678       block_stmt_iterator bsi;
1679
1680       bsi = bsi_for_stmt (stmt);
1681
1682       name = GIMPLE_STMT_OPERAND (stmt, 0);
1683       gcc_assert (TREE_CODE (name) == SSA_NAME);
1684
1685       next = single_nonlooparound_use (name);
1686
1687       mark_virtual_ops_for_renaming (stmt);
1688       bsi_remove (&bsi, true);
1689
1690       if (!next
1691           || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1692           || GIMPLE_STMT_OPERAND (next, 1) != name)
1693         return;
1694
1695       stmt = next;
1696     }
1697 }
1698
1699 /* Perform the predictive commoning optimization for a chain CHAIN.
1700    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1701
1702 static void
1703 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1704                              bitmap tmp_vars)
1705 {
1706   unsigned i;
1707   dref a, root;
1708   tree var;
1709
1710   if (chain->combined)
1711     {
1712       /* For combined chains, just remove the statements that are used to
1713          compute the values of the expression (except for the root one).  */
1714       for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1715         remove_stmt (a->stmt);
1716     }
1717   else
1718     {
1719       /* For non-combined chains, set up the variables that hold its value,
1720          and replace the uses of the original references by these
1721          variables.  */
1722       root = get_chain_root (chain);
1723       mark_virtual_ops_for_renaming (root->stmt);
1724
1725       initialize_root (loop, chain, tmp_vars);
1726       for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1727         {
1728           mark_virtual_ops_for_renaming (a->stmt);
1729           var = VEC_index (tree, chain->vars, chain->length - a->distance);
1730           replace_ref_with (a->stmt, var, false, false);
1731         }
1732     }
1733 }
1734
1735 /* Determines the unroll factor necessary to remove as many temporary variable
1736    copies as possible.  CHAINS is the list of chains that will be
1737    optimized.  */
1738
1739 static unsigned
1740 determine_unroll_factor (VEC (chain_p, heap) *chains)
1741 {
1742   chain_p chain;
1743   unsigned factor = 1, af, nfactor, i;
1744   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1745
1746   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1747     {
1748       if (chain->type == CT_INVARIANT || chain->combined)
1749         continue;
1750
1751       /* The best unroll factor for this chain is equal to the number of
1752          temporary variables that we create for it.  */
1753       af = chain->length;
1754       if (chain->has_max_use_after)
1755         af++;
1756
1757       nfactor = factor * af / gcd (factor, af);
1758       if (nfactor <= max)
1759         factor = nfactor;
1760     }
1761
1762   return factor;
1763 }
1764
1765 /* Perform the predictive commoning optimization for CHAINS.
1766    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1767
1768 static void
1769 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
1770                         bitmap tmp_vars)
1771 {
1772   chain_p chain;
1773   unsigned i;
1774
1775   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1776     {
1777       if (chain->type == CT_INVARIANT)
1778         execute_load_motion (loop, chain, tmp_vars);
1779       else
1780         execute_pred_commoning_chain (loop, chain, tmp_vars);
1781     }
1782
1783   update_ssa (TODO_update_ssa_only_virtuals);
1784 }
1785
1786 /* For each reference in CHAINS, if its defining statement is
1787    ssa name, set it to phi node that defines it.  */
1788
1789 static void
1790 replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
1791 {
1792   chain_p chain;
1793   dref a;
1794   unsigned i, j;
1795
1796   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1797     for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1798       {
1799         gcc_assert (TREE_CODE (a->stmt) != SSA_NAME);
1800         if (TREE_CODE (a->stmt) == PHI_NODE)
1801           a->stmt = PHI_RESULT (a->stmt);
1802       }
1803 }
1804
1805 /* For each reference in CHAINS, if its defining statement is
1806    phi node, set it to the ssa name that is defined by it.  */
1807
1808 static void
1809 replace_names_by_phis (VEC (chain_p, heap) *chains)
1810 {
1811   chain_p chain;
1812   dref a;
1813   unsigned i, j;
1814
1815   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1816     for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1817       if (TREE_CODE (a->stmt) == SSA_NAME)
1818         {
1819           a->stmt = SSA_NAME_DEF_STMT (a->stmt);
1820           gcc_assert (TREE_CODE (a->stmt) == PHI_NODE);
1821         }
1822 }
1823
1824 /* Wrapper over execute_pred_commoning, to pass it as a callback
1825    to tree_transform_and_unroll_loop.  */
1826
1827 struct epcc_data
1828 {
1829   VEC (chain_p, heap) *chains;
1830   bitmap tmp_vars;
1831 };
1832
1833 static void
1834 execute_pred_commoning_cbck (struct loop *loop, void *data)
1835 {
1836   struct epcc_data *dta = data;
1837
1838   /* Restore phi nodes that were replaced by ssa names before
1839      tree_transform_and_unroll_loop (see detailed description in
1840      tree_predictive_commoning_loop).  */
1841   replace_names_by_phis (dta->chains);
1842   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1843 }
1844
1845 /* Returns true if we can and should unroll LOOP FACTOR times.  Number
1846    of iterations of the loop is returned in NITER.  */
1847
1848 static bool
1849 should_unroll_loop_p (struct loop *loop, unsigned factor,
1850                       struct tree_niter_desc *niter)
1851 {
1852   edge exit;
1853
1854   if (factor == 1)
1855     return false;
1856
1857   /* Check whether unrolling is possible.  We only want to unroll loops
1858      for that we are able to determine number of iterations.  We also
1859      want to split the extra iterations of the loop from its end,
1860      therefore we require that the loop has precisely one
1861      exit.  */
1862
1863   exit = single_dom_exit (loop);
1864   if (!exit)
1865     return false;
1866
1867   if (!number_of_iterations_exit (loop, exit, niter, false))
1868     return false;
1869
1870   /* And of course, we must be able to duplicate the loop.  */
1871   if (!can_duplicate_loop_p (loop))
1872     return false;
1873
1874   /* The final loop should be small enough.  */
1875   if (tree_num_loop_insns (loop, &eni_size_weights) * factor
1876       > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
1877     return false;
1878
1879   return true;
1880 }
1881
1882 /* Base NAME and all the names in the chain of phi nodes that use it
1883    on variable VAR.  The phi nodes are recognized by being in the copies of
1884    the header of the LOOP.  */
1885
1886 static void
1887 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1888 {
1889   tree stmt, phi;
1890   imm_use_iterator iter;
1891   edge e;
1892
1893   SSA_NAME_VAR (name) = var;
1894
1895   while (1)
1896     {
1897       phi = NULL;
1898       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1899         {
1900           if (TREE_CODE (stmt) == PHI_NODE
1901               && flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1902             {
1903               phi = stmt;
1904               BREAK_FROM_IMM_USE_STMT (iter);
1905             }
1906         }
1907       if (!phi)
1908         return;
1909
1910       if (bb_for_stmt (phi) == loop->header)
1911         e = loop_latch_edge (loop);
1912       else
1913         e = single_pred_edge (bb_for_stmt (stmt));
1914
1915       name = PHI_RESULT (phi);
1916       SSA_NAME_VAR (name) = var;
1917     }
1918 }
1919
1920 /* Given an unrolled LOOP after predictive commoning, remove the
1921    register copies arising from phi nodes by changing the base
1922    variables of SSA names.  TMP_VARS is the set of the temporary variables
1923    for those we want to perform this.  */
1924
1925 static void
1926 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1927 {
1928   edge e;
1929   tree phi, name, use, var, stmt;
1930
1931   e = loop_latch_edge (loop);
1932   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1933     {
1934       name = PHI_RESULT (phi);
1935       var = SSA_NAME_VAR (name);
1936       if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
1937         continue;
1938       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1939       gcc_assert (TREE_CODE (use) == SSA_NAME);
1940
1941       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1942       stmt = SSA_NAME_DEF_STMT (use);
1943       while (TREE_CODE (stmt) == PHI_NODE
1944              /* In case we could not unroll the loop enough to eliminate
1945                 all copies, we may reach the loop header before the defining
1946                 statement (in that case, some register copies will be present
1947                 in loop latch in the final code, corresponding to the newly
1948                 created looparound phi nodes).  */
1949              && bb_for_stmt (stmt) != loop->header)
1950         {
1951           gcc_assert (single_pred_p (bb_for_stmt (stmt)));
1952           use = PHI_ARG_DEF (stmt, 0);
1953           stmt = SSA_NAME_DEF_STMT (use);
1954         }
1955
1956       base_names_in_chain_on (loop, use, var);
1957     }
1958 }
1959
1960 /* Returns true if CHAIN is suitable to be combined.  */
1961
1962 static bool
1963 chain_can_be_combined_p (chain_p chain)
1964 {
1965   return (!chain->combined
1966           && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1967 }
1968
1969 /* Returns the modify statement that uses NAME.  Skips over assignment
1970    statements, NAME is replaced with the actual name used in the returned
1971    statement.  */
1972
1973 static tree
1974 find_use_stmt (tree *name)
1975 {
1976   tree stmt, rhs, lhs;
1977
1978   /* Skip over assignments.  */
1979   while (1)
1980     {
1981       stmt = single_nonlooparound_use (*name);
1982       if (!stmt)
1983         return NULL_TREE;
1984
1985       if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1986         return NULL_TREE;
1987
1988       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1989       if (TREE_CODE (lhs) != SSA_NAME)
1990         return NULL_TREE;
1991
1992       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1993       if (rhs != *name)
1994         break;
1995
1996       *name = lhs;
1997     }
1998
1999   if (!EXPR_P (rhs)
2000       || REFERENCE_CLASS_P (rhs)
2001       || TREE_CODE_LENGTH (TREE_CODE (rhs)) != 2)
2002     return NULL_TREE;
2003
2004   return stmt;
2005 }
2006
2007 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2008
2009 static bool
2010 may_reassociate_p (tree type, enum tree_code code)
2011 {
2012   if (FLOAT_TYPE_P (type)
2013       && !flag_unsafe_math_optimizations)
2014     return false;
2015
2016   return (commutative_tree_code (code)
2017           && associative_tree_code (code));
2018 }
2019
2020 /* If the operation used in STMT is associative and commutative, go through the
2021    tree of the same operations and returns its root.  Distance to the root
2022    is stored in DISTANCE.  */
2023
2024 static tree
2025 find_associative_operation_root (tree stmt, unsigned *distance)
2026 {
2027   tree rhs = GIMPLE_STMT_OPERAND (stmt, 1), lhs, next;
2028   enum tree_code code = TREE_CODE (rhs);
2029   unsigned dist = 0;
2030
2031   if (!may_reassociate_p (TREE_TYPE (rhs), code))
2032     return NULL_TREE;
2033
2034   while (1)
2035     {
2036       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2037       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2038
2039       next = find_use_stmt (&lhs);
2040       if (!next)
2041         break;
2042
2043       rhs = GIMPLE_STMT_OPERAND (next, 1);
2044       if (TREE_CODE (rhs) != code)
2045         break;
2046
2047       stmt = next;
2048       dist++;
2049     }
2050
2051   if (distance)
2052     *distance = dist;
2053   return stmt;
2054 }
2055
2056 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2057    is no such statement, returns NULL_TREE.  In case the operation used on
2058    NAME1 and NAME2 is associative and commutative, returns the root of the
2059    tree formed by this operation instead of the statement that uses NAME1 or
2060    NAME2.  */
2061
2062 static tree
2063 find_common_use_stmt (tree *name1, tree *name2)
2064 {
2065   tree stmt1, stmt2;
2066
2067   stmt1 = find_use_stmt (name1);
2068   if (!stmt1)
2069     return NULL_TREE;
2070
2071   stmt2 = find_use_stmt (name2);
2072   if (!stmt2)
2073     return NULL_TREE;
2074
2075   if (stmt1 == stmt2)
2076     return stmt1;
2077
2078   stmt1 = find_associative_operation_root (stmt1, NULL);
2079   if (!stmt1)
2080     return NULL_TREE;
2081   stmt2 = find_associative_operation_root (stmt2, NULL);
2082   if (!stmt2)
2083     return NULL_TREE;
2084
2085   return (stmt1 == stmt2 ? stmt1 : NULL_TREE);
2086 }
2087
2088 /* Checks whether R1 and R2 are combined together using CODE, with the result
2089    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2090    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2091
2092 static bool
2093 combinable_refs_p (dref r1, dref r2,
2094                    enum tree_code *code, bool *swap, tree *rslt_type)
2095 {
2096   enum tree_code acode;
2097   bool aswap;
2098   tree atype;
2099   tree name1, name2, stmt, rhs;
2100
2101   name1 = name_for_ref (r1);
2102   name2 = name_for_ref (r2);
2103   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2104
2105   stmt = find_common_use_stmt (&name1, &name2);
2106
2107   if (!stmt)
2108     return false;
2109
2110   rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2111   acode = TREE_CODE (rhs);
2112   aswap = (!commutative_tree_code (acode)
2113            && TREE_OPERAND (rhs, 0) != name1);
2114   atype = TREE_TYPE (rhs);
2115
2116   if (*code == ERROR_MARK)
2117     {
2118       *code = acode;
2119       *swap = aswap;
2120       *rslt_type = atype;
2121       return true;
2122     }
2123
2124   return (*code == acode
2125           && *swap == aswap
2126           && *rslt_type == atype);
2127 }
2128
2129 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2130    an assignment of the remaining operand.  */
2131
2132 static void
2133 remove_name_from_operation (tree stmt, tree op)
2134 {
2135   tree *rhs;
2136
2137   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2138
2139   rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
2140   if (TREE_OPERAND (*rhs, 0) == op)
2141     *rhs = TREE_OPERAND (*rhs, 1);
2142   else if (TREE_OPERAND (*rhs, 1) == op)
2143     *rhs = TREE_OPERAND (*rhs, 0);
2144   else
2145     gcc_unreachable ();
2146   update_stmt (stmt);
2147 }
2148
2149 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2150    are combined in a single statement, and returns this statement.  */
2151
2152 static tree
2153 reassociate_to_the_same_stmt (tree name1, tree name2)
2154 {
2155   tree stmt1, stmt2, root1, root2, r1, r2, s1, s2;
2156   tree new_stmt, tmp_stmt, new_name, tmp_name, var;
2157   unsigned dist1, dist2;
2158   enum tree_code code;
2159   tree type = TREE_TYPE (name1);
2160   block_stmt_iterator bsi;
2161
2162   stmt1 = find_use_stmt (&name1);
2163   stmt2 = find_use_stmt (&name2);
2164   root1 = find_associative_operation_root (stmt1, &dist1);
2165   root2 = find_associative_operation_root (stmt2, &dist2);
2166   code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt1, 1));
2167
2168   gcc_assert (root1 && root2 && root1 == root2
2169               && code == TREE_CODE (GIMPLE_STMT_OPERAND (stmt2, 1)));
2170
2171   /* Find the root of the nearest expression in that both NAME1 and NAME2
2172      are used.  */
2173   r1 = name1;
2174   s1 = stmt1;
2175   r2 = name2;
2176   s2 = stmt2;
2177
2178   while (dist1 > dist2)
2179     {
2180       s1 = find_use_stmt (&r1);
2181       r1 = GIMPLE_STMT_OPERAND (s1, 0);
2182       dist1--;
2183     }
2184   while (dist2 > dist1)
2185     {
2186       s2 = find_use_stmt (&r2);
2187       r2 = GIMPLE_STMT_OPERAND (s2, 0);
2188       dist2--;
2189     }
2190
2191   while (s1 != s2)
2192     {
2193       s1 = find_use_stmt (&r1);
2194       r1 = GIMPLE_STMT_OPERAND (s1, 0);
2195       s2 = find_use_stmt (&r2);
2196       r2 = GIMPLE_STMT_OPERAND (s2, 0);
2197     }
2198
2199   /* Remove NAME1 and NAME2 from the statements in that they are used
2200      currently.  */
2201   remove_name_from_operation (stmt1, name1);
2202   remove_name_from_operation (stmt2, name2);
2203
2204   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2205      combine it with the rhs of S1.  */
2206   var = create_tmp_var (type, "predreastmp");
2207   add_referenced_var (var);
2208   new_name = make_ssa_name (var, NULL_TREE);
2209   new_stmt = build_gimple_modify_stmt (new_name,
2210                             fold_build2 (code, type, name1, name2));
2211   SSA_NAME_DEF_STMT (new_name) = new_stmt;
2212
2213   var = create_tmp_var (type, "predreastmp");
2214   add_referenced_var (var);
2215   tmp_name = make_ssa_name (var, NULL_TREE);
2216   tmp_stmt = build_gimple_modify_stmt (tmp_name,
2217                                             GIMPLE_STMT_OPERAND (s1, 1));
2218   SSA_NAME_DEF_STMT (tmp_name) = tmp_stmt;
2219
2220   GIMPLE_STMT_OPERAND (s1, 1) = fold_build2 (code, type, new_name, tmp_name);
2221   update_stmt (s1);
2222
2223   bsi = bsi_for_stmt (s1);
2224   bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
2225   bsi_insert_before (&bsi, tmp_stmt, BSI_SAME_STMT);
2226
2227   return new_stmt;
2228 }
2229
2230 /* Returns the statement that combines references R1 and R2.  In case R1
2231    and R2 are not used in the same statement, but they are used with an
2232    associative and commutative operation in the same expression, reassociate
2233    the expression so that they are used in the same statement.  */
2234
2235 static tree
2236 stmt_combining_refs (dref r1, dref r2)
2237 {
2238   tree stmt1, stmt2;
2239   tree name1 = name_for_ref (r1);
2240   tree name2 = name_for_ref (r2);
2241
2242   stmt1 = find_use_stmt (&name1);
2243   stmt2 = find_use_stmt (&name2);
2244   if (stmt1 == stmt2)
2245     return stmt1;
2246
2247   return reassociate_to_the_same_stmt (name1, name2);
2248 }
2249
2250 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2251    description of the new chain is returned, otherwise we return NULL.  */
2252
2253 static chain_p
2254 combine_chains (chain_p ch1, chain_p ch2)
2255 {
2256   dref r1, r2, nw;
2257   enum tree_code op = ERROR_MARK;
2258   bool swap = false;
2259   chain_p new_chain;
2260   unsigned i;
2261   tree root_stmt;
2262   tree rslt_type = NULL_TREE;
2263
2264   if (ch1 == ch2)
2265     return false;
2266   if (ch1->length != ch2->length)
2267     return NULL;
2268
2269   if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
2270     return NULL;
2271
2272   for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2273                && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2274     {
2275       if (r1->distance != r2->distance)
2276         return NULL;
2277
2278       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2279         return NULL;
2280     }
2281
2282   if (swap)
2283     {
2284       chain_p tmp = ch1;
2285       ch1 = ch2;
2286       ch2 = tmp;
2287     }
2288
2289   new_chain = XCNEW (struct chain);
2290   new_chain->type = CT_COMBINATION;
2291   new_chain->operator = op;
2292   new_chain->ch1 = ch1;
2293   new_chain->ch2 = ch2;
2294   new_chain->rslt_type = rslt_type;
2295   new_chain->length = ch1->length;
2296
2297   for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2298                && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2299     {
2300       nw = XCNEW (struct dref);
2301       nw->stmt = stmt_combining_refs (r1, r2);
2302       nw->distance = r1->distance;
2303
2304       VEC_safe_push (dref, heap, new_chain->refs, nw);
2305     }
2306
2307   new_chain->has_max_use_after = false;
2308   root_stmt = get_chain_root (new_chain)->stmt;
2309   for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
2310     {
2311       if (nw->distance == new_chain->length
2312           && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2313         {
2314           new_chain->has_max_use_after = true;
2315           break;
2316         }
2317     }
2318
2319   ch1->combined = true;
2320   ch2->combined = true;
2321   return new_chain;
2322 }
2323
2324 /* Try to combine the CHAINS.  */
2325
2326 static void
2327 try_combine_chains (VEC (chain_p, heap) **chains)
2328 {
2329   unsigned i, j;
2330   chain_p ch1, ch2, cch;
2331   VEC (chain_p, heap) *worklist = NULL;
2332
2333   for (i = 0; VEC_iterate (chain_p, *chains, i, ch1); i++)
2334     if (chain_can_be_combined_p (ch1))
2335       VEC_safe_push (chain_p, heap, worklist, ch1);
2336
2337   while (!VEC_empty (chain_p, worklist))
2338     {
2339       ch1 = VEC_pop (chain_p, worklist);
2340       if (!chain_can_be_combined_p (ch1))
2341         continue;
2342
2343       for (j = 0; VEC_iterate (chain_p, *chains, j, ch2); j++)
2344         {
2345           if (!chain_can_be_combined_p (ch2))
2346             continue;
2347
2348           cch = combine_chains (ch1, ch2);
2349           if (cch)
2350             {
2351               VEC_safe_push (chain_p, heap, worklist, cch);
2352               VEC_safe_push (chain_p, heap, *chains, cch);
2353               break;
2354             }
2355         }
2356     }
2357 }
2358
2359 /* Sets alias information based on data reference DR for REF,
2360    if necessary.  */
2361
2362 static void
2363 set_alias_info (tree ref, struct data_reference *dr)
2364 {
2365   tree var;
2366   tree tag = DR_SYMBOL_TAG (dr);
2367
2368   gcc_assert (tag != NULL_TREE);
2369
2370   ref = get_base_address (ref);
2371   if (!ref || !INDIRECT_REF_P (ref))
2372     return;
2373
2374   var = SSA_NAME_VAR (TREE_OPERAND (ref, 0));
2375   if (var_ann (var)->symbol_mem_tag)
2376     return;
2377
2378   if (!MTAG_P (tag))
2379     new_type_alias (var, tag, ref);
2380   else
2381     var_ann (var)->symbol_mem_tag = tag;
2382
2383   var_ann (var)->subvars = DR_SUBVARS (dr);
2384 }
2385
2386 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2387    impossible because one of these initializers may trap, true otherwise.  */
2388
2389 static bool
2390 prepare_initializers_chain (struct loop *loop, chain_p chain)
2391 {
2392   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2393   struct data_reference *dr = get_chain_root (chain)->ref;
2394   tree init, stmts;
2395   dref laref;
2396   edge entry = loop_preheader_edge (loop);
2397
2398   /* Find the initializers for the variables, and check that they cannot
2399      trap.  */
2400   chain->inits = VEC_alloc (tree, heap, n);
2401   for (i = 0; i < n; i++)
2402     VEC_quick_push (tree, chain->inits, NULL_TREE);
2403
2404   /* If we have replaced some looparound phi nodes, use their initializers
2405      instead of creating our own.  */
2406   for (i = 0; VEC_iterate (dref, chain->refs, i, laref); i++)
2407     {
2408       if (TREE_CODE (laref->stmt) != PHI_NODE)
2409         continue;
2410
2411       gcc_assert (laref->distance > 0);
2412       VEC_replace (tree, chain->inits, n - laref->distance,
2413                    PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
2414     }
2415
2416   for (i = 0; i < n; i++)
2417     {
2418       if (VEC_index (tree, chain->inits, i) != NULL_TREE)
2419         continue;
2420
2421       init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
2422       if (!init)
2423         return false;
2424
2425       if (!chain->all_always_accessed && tree_could_trap_p (init))
2426         return false;
2427
2428       init = force_gimple_operand (init, &stmts, false, NULL_TREE);
2429       if (stmts)
2430         {
2431           mark_virtual_ops_for_renaming_list (stmts);
2432           bsi_insert_on_edge_immediate (entry, stmts);
2433         }
2434       set_alias_info (init, dr);
2435
2436       VEC_replace (tree, chain->inits, i, init);
2437     }
2438
2439   return true;
2440 }
2441
2442 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2443    be used because the initializers might trap.  */
2444
2445 static void
2446 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
2447 {
2448   chain_p chain;
2449   unsigned i;
2450
2451   for (i = 0; i < VEC_length (chain_p, chains); )
2452     {
2453       chain = VEC_index (chain_p, chains, i);
2454       if (prepare_initializers_chain (loop, chain))
2455         i++;
2456       else
2457         {
2458           release_chain (chain);
2459           VEC_unordered_remove (chain_p, chains, i);
2460         }
2461     }
2462 }
2463
2464 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2465    unrolled.  */
2466
2467 static bool
2468 tree_predictive_commoning_loop (struct loop *loop)
2469 {
2470   VEC (data_reference_p, heap) *datarefs;
2471   VEC (ddr_p, heap) *dependences;
2472   struct component *components;
2473   VEC (chain_p, heap) *chains = NULL;
2474   unsigned unroll_factor;
2475   struct tree_niter_desc desc;
2476   bool unroll = false;
2477   edge exit;
2478   bitmap tmp_vars;
2479
2480   if (dump_file && (dump_flags & TDF_DETAILS))
2481     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2482
2483   /* Find the data references and split them into components according to their
2484      dependence relations.  */
2485   datarefs = VEC_alloc (data_reference_p, heap, 10);
2486   dependences = VEC_alloc (ddr_p, heap, 10);
2487   compute_data_dependences_for_loop (loop, true, &datarefs, &dependences);
2488   if (dump_file && (dump_flags & TDF_DETAILS))
2489     dump_data_dependence_relations (dump_file, dependences);
2490
2491   components = split_data_refs_to_components (loop, datarefs, dependences);
2492   free_dependence_relations (dependences);
2493   if (!components)
2494     {
2495       free_data_refs (datarefs);
2496       return false;
2497     }
2498
2499   if (dump_file && (dump_flags & TDF_DETAILS))
2500     {
2501       fprintf (dump_file, "Initial state:\n\n");
2502       dump_components (dump_file, components);
2503     }
2504
2505   /* Find the suitable components and split them into chains.  */
2506   components = filter_suitable_components (loop, components);
2507
2508   tmp_vars = BITMAP_ALLOC (NULL);
2509   looparound_phis = BITMAP_ALLOC (NULL);
2510   determine_roots (loop, components, &chains);
2511   release_components (components);
2512
2513   if (!chains)
2514     {
2515       if (dump_file && (dump_flags & TDF_DETAILS))
2516         fprintf (dump_file,
2517                  "Predictive commoning failed: no suitable chains\n");
2518       goto end;
2519     }
2520   prepare_initializers (loop, chains);
2521
2522   /* Try to combine the chains that are always worked with together.  */
2523   try_combine_chains (&chains);
2524
2525   if (dump_file && (dump_flags & TDF_DETAILS))
2526     {
2527       fprintf (dump_file, "Before commoning:\n\n");
2528       dump_chains (dump_file, chains);
2529     }
2530
2531   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2532      that its number of iterations is divisible by the factor.  */
2533   unroll_factor = determine_unroll_factor (chains);
2534   scev_reset ();
2535   unroll = should_unroll_loop_p (loop, unroll_factor, &desc);
2536   exit = single_dom_exit (loop);
2537
2538   /* Execute the predictive commoning transformations, and possibly unroll the
2539      loop.  */
2540   if (unroll)
2541     {
2542       struct epcc_data dta;
2543
2544       if (dump_file && (dump_flags & TDF_DETAILS))
2545         fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2546
2547       dta.chains = chains;
2548       dta.tmp_vars = tmp_vars;
2549
2550       update_ssa (TODO_update_ssa_only_virtuals);
2551
2552       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2553          execute_pred_commoning_cbck is called may cause phi nodes to be
2554          reallocated, which is a problem since CHAINS may point to these
2555          statements.  To fix this, we store the ssa names defined by the
2556          phi nodes here instead of the phi nodes themselves, and restore
2557          the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2558       replace_phis_by_defined_names (chains);
2559
2560       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2561                                       execute_pred_commoning_cbck, &dta);
2562       eliminate_temp_copies (loop, tmp_vars);
2563     }
2564   else
2565     {
2566       if (dump_file && (dump_flags & TDF_DETAILS))
2567         fprintf (dump_file,
2568                  "Executing predictive commoning without unrolling.\n");
2569       execute_pred_commoning (loop, chains, tmp_vars);
2570     }
2571
2572 end: ;
2573   release_chains (chains);
2574   free_data_refs (datarefs);
2575   BITMAP_FREE (tmp_vars);
2576   BITMAP_FREE (looparound_phis);
2577
2578   free_affine_expand_cache (&name_expansions);
2579
2580   return unroll;
2581 }
2582
2583 /* Runs predictive commoning.  */
2584
2585 unsigned
2586 tree_predictive_commoning (void)
2587 {
2588   bool unrolled = false;
2589   struct loop *loop;
2590   loop_iterator li;
2591   unsigned ret = 0;
2592
2593   initialize_original_copy_tables ();
2594   FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2595     {
2596       unrolled |= tree_predictive_commoning_loop (loop);
2597     }
2598
2599   if (unrolled)
2600     {
2601       scev_reset ();
2602       ret = TODO_cleanup_cfg;
2603     }
2604   free_original_copy_tables ();
2605
2606   return ret;
2607 }