gcc/tree-predcom.c

   1 /* Predictive commoning.
   2    Copyright (C) 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This file implements the predictive commoning optimization.  Predictive
  22    commoning can be viewed as CSE around a loop, and with some improvements,
  23    as generalized strength reduction-- i.e., reusing values computed in
  24    earlier iterations of a loop in the later ones.  So far, the pass only
  25    handles the most useful case, that is, reusing values of memory references.
  26    If you think this is all just a special case of PRE, you are sort of right;
  27    however, concentrating on loops is simpler, and makes it possible to
  28    incorporate data dependence analysis to detect the opportunities, perform
  29    loop unrolling to avoid copies together with renaming immediately,
  30    and if needed, we could also take register pressure into account.
  31
  32    Let us demonstrate what is done on an example:
  33
  34    for (i = 0; i < 100; i++)
  35      {
  36        a[i+2] = a[i] + a[i+1];
  37        b[10] = b[10] + i;
  38        c[i] = c[99 - i];
  39        d[i] = d[i + 1];
  40      }
  41
  42    1) We find data references in the loop, and split them to mutually
  43       independent groups (i.e., we find components of a data dependence
  44       graph).  We ignore read-read dependences whose distance is not constant.
  45       (TODO -- we could also ignore antidependences).  In this example, we
  46       find the following groups:
  47
  48       a[i]{read}, a[i+1]{read}, a[i+2]{write}
  49       b[10]{read}, b[10]{write}
  50       c[99 - i]{read}, c[i]{write}
  51       d[i + 1]{read}, d[i]{write}
  52
  53    2) Inside each of the group, we verify several conditions:
  54       a) all the references must differ in indices only, and the indices
  55          must all have the same step
  56       b) the references must dominate loop latch (and thus, they must be
  57          ordered by dominance relation).
  58       c) the distance of the indices must be a small multiple of the step
  59       We are then able to compute the difference of the references (# of
  60       iterations before they point to the same place as the first of them).
  61       Also, in case there are writes in the loop, we split the groups into
  62       chains whose head is the write whose values are used by the reads in
  63       the same chain.  The chains are then processed independently,
  64       making the further transformations simpler.  Also, the shorter chains
  65       need the same number of registers, but may require lower unrolling
  66       factor in order to get rid of the copies on the loop latch.
  67
  68       In our example, we get the following chains (the chain for c is invalid).
  69
  70       a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
  71       b[10]{read,+0}, b[10]{write,+0}
  72       d[i + 1]{read,+0}, d[i]{write,+1}
  73
  74    3) For each read, we determine the read or write whose value it reuses,
  75       together with the distance of this reuse.  I.e. we take the last
  76       reference before it with distance 0, or the last of the references
  77       with the smallest positive distance to the read.  Then, we remove
  78       the references that are not used in any of these chains, discard the
  79       empty groups, and propagate all the links so that they point to the
  80       single root reference of the chain (adjusting their distance
  81       appropriately).  Some extra care needs to be taken for references with
  82       step 0.  In our example (the numbers indicate the distance of the
  83       reuse),
  84
  85       a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
  86       b[10] --> (*) 1, b[10] (*)
  87
  88    4) The chains are combined together if possible.  If the corresponding
  89       elements of two chains are always combined together with the same
  90       operator, we remember just the result of this combination, instead
  91       of remembering the values separately.  We may need to perform
  92       reassociation to enable combining, for example
  93
  94       e[i] + f[i+1] + e[i+1] + f[i]
  95
  96       can be reassociated as
  97
  98       (e[i] + f[i]) + (e[i+1] + f[i+1])
  99
 100       and we can combine the chains for e and f into one chain.
 101
 102    5) For each root reference (end of the chain) R, let N be maximum distance
 103       of a reference reusing its value.  Variables R0 upto RN are created,
 104       together with phi nodes that transfer values from R1 .. RN to
 105       R0 .. R(N-1).
 106       Initial values are loaded to R0..R(N-1) (in case not all references
 107       must necessarily be accessed and they may trap, we may fail here;
 108       TODO sometimes, the loads could be guarded by a check for the number
 109       of iterations).  Values loaded/stored in roots are also copied to
 110       RN.  Other reads are replaced with the appropriate variable Ri.
 111       Everything is put to SSA form.
 112
 113       As a small improvement, if R0 is dead after the root (i.e., all uses of
 114       the value with the maximum distance dominate the root), we can avoid
 115       creating RN and use R0 instead of it.
 116
 117       In our example, we get (only the parts concerning a and b are shown):
 118       for (i = 0; i < 100; i++)
 119         {
 120           f = phi (a[0], s);
 121           s = phi (a[1], f);
 122           x = phi (b[10], x);
 123
 124           f = f + s;
 125           a[i+2] = f;
 126           x = x + i;
 127           b[10] = x;
 128         }
 129
 130    6) Factor F for unrolling is determined as the smallest common multiple of
 131       (N + 1) for each root reference (N for references for that we avoided
 132       creating RN).  If F and the loop is small enough, loop is unrolled F
 133       times.  The stores to RN (R0) in the copies of the loop body are
 134       periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
 135       be coalesced and the copies can be eliminated.
 136
 137       TODO -- copy propagation and other optimizations may change the live
 138       ranges of the temporary registers and prevent them from being coalesced;
 139       this may increase the register pressure.
 140
 141       In our case, F = 2 and the (main loop of the) result is
 142
 143       for (i = 0; i < ...; i += 2)
 144         {
 145           f = phi (a[0], f);
 146           s = phi (a[1], s);
 147           x = phi (b[10], x);
 148
 149           f = f + s;
 150           a[i+2] = f;
 151           x = x + i;
 152           b[10] = x;
 153
 154           s = s + f;
 155           a[i+3] = s;
 156           x = x + i;
 157           b[10] = x;
 158        }
 159
 160    TODO -- stores killing other stores can be taken into account, e.g.,
 161    for (i = 0; i < n; i++)
 162      {
 163        a[i] = 1;
 164        a[i+2] = 2;
 165      }
 166
 167    can be replaced with
 168
 169    t0 = a[0];
 170    t1 = a[1];
 171    for (i = 0; i < n; i++)
 172      {
 173        a[i] = 1;
 174        t2 = 2;
 175        t0 = t1;
 176        t1 = t2;
 177      }
 178    a[n] = t0;
 179    a[n+1] = t1;
 180
 181    The interesting part is that this would generalize store motion; still, since
 182    sm is performed elsewhere, it does not seem that important.
 183
 184    Predictive commoning can be generalized for arbitrary computations (not
 185    just memory loads), and also nontrivial transfer functions (e.g., replacing
 186    i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
 187
 188 #include "config.h"
 189 #include "system.h"
 190 #include "coretypes.h"
 191 #include "tm.h"
 192 #include "tree.h"
 193 #include "tm_p.h"
 194 #include "cfgloop.h"
 195 #include "tree-flow.h"
 196 #include "ggc.h"
 197 #include "tree-data-ref.h"
 198 #include "tree-scalar-evolution.h"
 199 #include "tree-chrec.h"
 200 #include "params.h"
 201 #include "diagnostic.h"
 202 #include "tree-pass.h"
 203 #include "tree-affine.h"
 204 #include "tree-inline.h"
 205
 206 /* The maximum number of iterations between the considered memory
 207    references.  */
 208
 209 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
 210
 211 /* Data references.  */
 212
 213 typedef struct dref
 214 {
 215   /* The reference itself.  */
 216   struct data_reference *ref;
 217
 218   /* The statement in that the reference appears.  */
 219   tree stmt;
 220
 221   /* Distance of the reference from the root of the chain (in number of
 222      iterations of the loop).  */
 223   unsigned distance;
 224
 225   /* Number of iterations offset from the first reference in the component.  */
 226   double_int offset;
 227
 228   /* Number of the reference in a component, in dominance ordering.  */
 229   unsigned pos;
 230
 231   /* True if the memory reference is always accessed when the loop is
 232      entered.  */
 233   unsigned always_accessed : 1;
 234 } *dref;
 235
 236 DEF_VEC_P (dref);
 237 DEF_VEC_ALLOC_P (dref, heap);
 238
 239 /* Type of the chain of the references.  */
 240
 241 enum chain_type
 242 {
 243   /* The addresses of the references in the chain are constant.  */
 244   CT_INVARIANT,
 245
 246   /* There are only loads in the chain.  */
 247   CT_LOAD,
 248
 249   /* Root of the chain is store, the rest are loads.  */
 250   CT_STORE_LOAD,
 251
 252   /* A combination of two chains.  */
 253   CT_COMBINATION
 254 };
 255
 256 /* Chains of data references.  */
 257
 258 typedef struct chain
 259 {
 260   /* Type of the chain.  */
 261   enum chain_type type;
 262
 263   /* For combination chains, the operator and the two chains that are
 264      combined, and the type of the result.  */
 265   enum tree_code operator;
 266   tree rslt_type;
 267   struct chain *ch1, *ch2;
 268
 269   /* The references in the chain.  */
 270   VEC(dref,heap) *refs;
 271
 272   /* The maximum distance of the reference in the chain from the root.  */
 273   unsigned length;
 274
 275   /* The variables used to copy the value throughout iterations.  */
 276   VEC(tree,heap) *vars;
 277
 278   /* Initializers for the variables.  */
 279   VEC(tree,heap) *inits;
 280
 281   /* True if there is a use of a variable with the maximal distance
 282      that comes after the root in the loop.  */
 283   unsigned has_max_use_after : 1;
 284
 285   /* True if all the memory references in the chain are always accessed.  */
 286   unsigned all_always_accessed : 1;
 287
 288   /* True if this chain was combined together with some other chain.  */
 289   unsigned combined : 1;
 290 } *chain_p;
 291
 292 DEF_VEC_P (chain_p);
 293 DEF_VEC_ALLOC_P (chain_p, heap);
 294
 295 /* Describes the knowledge about the step of the memory references in
 296    the component.  */
 297
 298 enum ref_step_type
 299 {
 300   /* The step is zero.  */
 301   RS_INVARIANT,
 302
 303   /* The step is nonzero.  */
 304   RS_NONZERO,
 305
 306   /* The step may or may not be nonzero.  */
 307   RS_ANY
 308 };
 309
 310 /* Components of the data dependence graph.  */
 311
 312 struct component
 313 {
 314   /* The references in the component.  */
 315   VEC(dref,heap) *refs;
 316
 317   /* What we know about the step of the references in the component.  */
 318   enum ref_step_type comp_step;
 319
 320   /* Next component in the list.  */
 321   struct component *next;
 322 };
 323
 324 /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
 325
 326 static bitmap looparound_phis;
 327
 328 /* Cache used by tree_to_aff_combination_expand.  */
 329
 330 static struct pointer_map_t *name_expansions;
 331
 332 /* Dumps data reference REF to FILE.  */
 333
 334 extern void dump_dref (FILE *, dref);
 335 void
 336 dump_dref (FILE *file, dref ref)
 337 {
 338   if (ref->ref)
 339     {
 340       fprintf (file, "    ");
 341       print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
 342       fprintf (file, " (id %u%s)\n", ref->pos,
 343                DR_IS_READ (ref->ref) ? "" : ", write");
 344
 345       fprintf (file, "      offset ");
 346       dump_double_int (file, ref->offset, false);
 347       fprintf (file, "\n");
 348
 349       fprintf (file, "      distance %u\n", ref->distance);
 350     }
 351   else
 352     {
 353       if (TREE_CODE (ref->stmt) == PHI_NODE)
 354         fprintf (file, "    looparound ref\n");
 355       else
 356         fprintf (file, "    combination ref\n");
 357       fprintf (file, "      in statement ");
 358       print_generic_expr (file, ref->stmt, TDF_SLIM);
 359       fprintf (file, "\n");
 360       fprintf (file, "      distance %u\n", ref->distance);
 361     }
 362
 363 }
 364
 365 /* Dumps CHAIN to FILE.  */
 366
 367 extern void dump_chain (FILE *, chain_p);
 368 void
 369 dump_chain (FILE *file, chain_p chain)
 370 {
 371   dref a;
 372   const char *chain_type;
 373   unsigned i;
 374   tree var;
 375
 376   switch (chain->type)
 377     {
 378     case CT_INVARIANT:
 379       chain_type = "Load motion";
 380       break;
 381
 382     case CT_LOAD:
 383       chain_type = "Loads-only";
 384       break;
 385
 386     case CT_STORE_LOAD:
 387       chain_type = "Store-loads";
 388       break;
 389
 390     case CT_COMBINATION:
 391       chain_type = "Combination";
 392       break;
 393
 394     default:
 395       gcc_unreachable ();
 396     }
 397
 398   fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
 399            chain->combined ? " (combined)" : "");
 400   if (chain->type != CT_INVARIANT)
 401     fprintf (file, "  max distance %u%s\n", chain->length,
 402              chain->has_max_use_after ? "" : ", may reuse first");
 403
 404   if (chain->type == CT_COMBINATION)
 405     {
 406       fprintf (file, "  equal to %p %s %p in type ",
 407                (void *) chain->ch1, op_symbol_code (chain->operator),
 408                (void *) chain->ch2);
 409       print_generic_expr (file, chain->rslt_type, TDF_SLIM);
 410       fprintf (file, "\n");
 411     }
 412
 413   if (chain->vars)
 414     {
 415       fprintf (file, "  vars");
 416       for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
 417         {
 418           fprintf (file, " ");
 419           print_generic_expr (file, var, TDF_SLIM);
 420         }
 421       fprintf (file, "\n");
 422     }
 423
 424   if (chain->inits)
 425     {
 426       fprintf (file, "  inits");
 427       for (i = 0; VEC_iterate (tree, chain->inits, i, var); i++)
 428         {
 429           fprintf (file, " ");
 430           print_generic_expr (file, var, TDF_SLIM);
 431         }
 432       fprintf (file, "\n");
 433     }
 434
 435   fprintf (file, "  references:\n");
 436   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
 437     dump_dref (file, a);
 438
 439   fprintf (file, "\n");
 440 }
 441
 442 /* Dumps CHAINS to FILE.  */
 443
 444 extern void dump_chains (FILE *, VEC (chain_p, heap) *);
 445 void
 446 dump_chains (FILE *file, VEC (chain_p, heap) *chains)
 447 {
 448   chain_p chain;
 449   unsigned i;
 450
 451   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
 452     dump_chain (file, chain);
 453 }
 454
 455 /* Dumps COMP to FILE.  */
 456
 457 extern void dump_component (FILE *, struct component *);
 458 void
 459 dump_component (FILE *file, struct component *comp)
 460 {
 461   dref a;
 462   unsigned i;
 463
 464   fprintf (file, "Component%s:\n",
 465            comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
 466   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
 467     dump_dref (file, a);
 468   fprintf (file, "\n");
 469 }
 470
 471 /* Dumps COMPS to FILE.  */
 472
 473 extern void dump_components (FILE *, struct component *);
 474 void
 475 dump_components (FILE *file, struct component *comps)
 476 {
 477   struct component *comp;
 478
 479   for (comp = comps; comp; comp = comp->next)
 480     dump_component (file, comp);
 481 }
 482
 483 /* Frees a chain CHAIN.  */
 484
 485 static void
 486 release_chain (chain_p chain)
 487 {
 488   dref ref;
 489   unsigned i;
 490
 491   if (chain == NULL)
 492     return;
 493
 494   for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
 495     free (ref);
 496
 497   VEC_free (dref, heap, chain->refs);
 498   VEC_free (tree, heap, chain->vars);
 499   VEC_free (tree, heap, chain->inits);
 500
 501   free (chain);
 502 }
 503
 504 /* Frees CHAINS.  */
 505
 506 static void
 507 release_chains (VEC (chain_p, heap) *chains)
 508 {
 509   unsigned i;
 510   chain_p chain;
 511
 512   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
 513     release_chain (chain);
 514   VEC_free (chain_p, heap, chains);
 515 }
 516
 517 /* Frees a component COMP.  */
 518
 519 static void
 520 release_component (struct component *comp)
 521 {
 522   VEC_free (dref, heap, comp->refs);
 523   free (comp);
 524 }
 525
 526 /* Frees list of components COMPS.  */
 527
 528 static void
 529 release_components (struct component *comps)
 530 {
 531   struct component *act, *next;
 532
 533   for (act = comps; act; act = next)
 534     {
 535       next = act->next;
 536       release_component (act);
 537     }
 538 }
 539
 540 /* Finds a root of tree given by FATHERS containing A, and performs path
 541    shortening.  */
 542
 543 static unsigned
 544 component_of (unsigned fathers[], unsigned a)
 545 {
 546   unsigned root, n;
 547
 548   for (root = a; root != fathers[root]; root = fathers[root])
 549     continue;
 550
 551   for (; a != root; a = n)
 552     {
 553       n = fathers[a];
 554       fathers[a] = root;
 555     }
 556
 557   return root;
 558 }
 559
 560 /* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
 561    components, A and B are components to merge.  */
 562
 563 static void
 564 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
 565 {
 566   unsigned ca = component_of (fathers, a);
 567   unsigned cb = component_of (fathers, b);
 568
 569   if (ca == cb)
 570     return;
 571
 572   if (sizes[ca] < sizes[cb])
 573     {
 574       sizes[cb] += sizes[ca];
 575       fathers[ca] = cb;
 576     }
 577   else
 578     {
 579       sizes[ca] += sizes[cb];
 580       fathers[cb] = ca;
 581     }
 582 }
 583
 584 /* Returns true if A is a reference that is suitable for predictive commoning
 585    in the innermost loop that contains it.  REF_STEP is set according to the
 586    step of the reference A.  */
 587
 588 static bool
 589 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
 590 {
 591   tree ref = DR_REF (a), step = DR_STEP (a);
 592
 593   if (!step
 594       || !is_gimple_reg_type (TREE_TYPE (ref)))
 595     return false;
 596
 597   if (integer_zerop (step))
 598     *ref_step = RS_INVARIANT;
 599   else if (integer_nonzerop (step))
 600     *ref_step = RS_NONZERO;
 601   else
 602     *ref_step = RS_ANY;
 603
 604   return true;
 605 }
 606
 607 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
 608
 609 static void
 610 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
 611 {
 612   aff_tree delta;
 613
 614   tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
 615                                   &name_expansions);
 616   aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
 617   aff_combination_add (offset, &delta);
 618 }
 619
 620 /* Determines number of iterations of the innermost enclosing loop before B
 621    refers to exactly the same location as A and stores it to OFF.  If A and
 622    B do not have the same step, they never meet, or anything else fails,
 623    returns false, otherwise returns true.  Both A and B are assumed to
 624    satisfy suitable_reference_p.  */
 625
 626 static bool
 627 determine_offset (struct data_reference *a, struct data_reference *b,
 628                   double_int *off)
 629 {
 630   aff_tree diff, baseb, step;
 631   tree typea, typeb;
 632
 633   /* Check that both the references access the location in the same type.  */
 634   typea = TREE_TYPE (DR_REF (a));
 635   typeb = TREE_TYPE (DR_REF (b));
 636   if (!tree_ssa_useless_type_conversion_1 (typeb, typea))
 637     return false;
 638
 639   /* Check whether the base address and the step of both references is the
 640      same.  */
 641   if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
 642       || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
 643     return false;
 644
 645   if (integer_zerop (DR_STEP (a)))
 646     {
 647       /* If the references have loop invariant address, check that they access
 648          exactly the same location.  */
 649       *off = double_int_zero;
 650       return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
 651               && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
 652     }
 653
 654   /* Compare the offsets of the addresses, and check whether the difference
 655      is a multiple of step.  */
 656   aff_combination_dr_offset (a, &diff);
 657   aff_combination_dr_offset (b, &baseb);
 658   aff_combination_scale (&baseb, double_int_minus_one);
 659   aff_combination_add (&diff, &baseb);
 660
 661   tree_to_aff_combination_expand (DR_STEP (a), sizetype,
 662                                   &step, &name_expansions);
 663   return aff_combination_constant_multiple_p (&diff, &step, off);
 664 }
 665
 666 /* Returns the last basic block in LOOP for that we are sure that
 667    it is executed whenever the loop is entered.  */
 668
 669 static basic_block
 670 last_always_executed_block (struct loop *loop)
 671 {
 672   unsigned i;
 673   VEC (edge, heap) *exits = get_loop_exit_edges (loop);
 674   edge ex;
 675   basic_block last = loop->latch;
 676
 677   for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
 678     last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
 679   VEC_free (edge, heap, exits);
 680
 681   return last;
 682 }
 683
 684 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
 685
 686 static struct component *
 687 split_data_refs_to_components (struct loop *loop,
 688                                VEC (data_reference_p, heap) *datarefs,
 689                                VEC (ddr_p, heap) *depends)
 690 {
 691   unsigned i, n = VEC_length (data_reference_p, datarefs);
 692   unsigned ca, ia, ib, bad;
 693   unsigned *comp_father = XNEWVEC (unsigned, n + 1);
 694   unsigned *comp_size = XNEWVEC (unsigned, n + 1);
 695   struct component **comps;
 696   struct data_reference *dr, *dra, *drb;
 697   struct data_dependence_relation *ddr;
 698   struct component *comp_list = NULL, *comp;
 699   dref dataref;
 700   basic_block last_always_executed = last_always_executed_block (loop);
 701
 702   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 703     {
 704       if (!DR_REF (dr))
 705         {
 706           /* A fake reference for call or asm_expr that may clobber memory;
 707              just fail.  */
 708           goto end;
 709         }
 710       dr->aux = (void *) (size_t) i;
 711       comp_father[i] = i;
 712       comp_size[i] = 1;
 713     }
 714
 715   /* A component reserved for the "bad" data references.  */
 716   comp_father[n] = n;
 717   comp_size[n] = 1;
 718
 719   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 720     {
 721       enum ref_step_type dummy;
 722
 723       if (!suitable_reference_p (dr, &dummy))
 724         {
 725           ia = (unsigned) (size_t) dr->aux;
 726           merge_comps (comp_father, comp_size, n, ia);
 727         }
 728     }
 729
 730   for (i = 0; VEC_iterate (ddr_p, depends, i, ddr); i++)
 731     {
 732       double_int dummy_off;
 733
 734       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 735         continue;
 736
 737       dra = DDR_A (ddr);
 738       drb = DDR_B (ddr);
 739       ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
 740       ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
 741       if (ia == ib)
 742         continue;
 743
 744       bad = component_of (comp_father, n);
 745
 746       /* If both A and B are reads, we may ignore unsuitable dependences.  */
 747       if (DR_IS_READ (dra) && DR_IS_READ (drb)
 748           && (ia == bad || ib == bad
 749               || !determine_offset (dra, drb, &dummy_off)))
 750         continue;
 751
 752       merge_comps (comp_father, comp_size, ia, ib);
 753     }
 754
 755   comps = XCNEWVEC (struct component *, n);
 756   bad = component_of (comp_father, n);
 757   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
 758     {
 759       ia = (unsigned) (size_t) dr->aux;
 760       ca = component_of (comp_father, ia);
 761       if (ca == bad)
 762         continue;
 763
 764       comp = comps[ca];
 765       if (!comp)
 766         {
 767           comp = XCNEW (struct component);
 768           comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
 769           comps[ca] = comp;
 770         }
 771
 772       dataref = XCNEW (struct dref);
 773       dataref->ref = dr;
 774       dataref->stmt = DR_STMT (dr);
 775       dataref->offset = double_int_zero;
 776       dataref->distance = 0;
 777
 778       dataref->always_accessed
 779               = dominated_by_p (CDI_DOMINATORS, last_always_executed,
 780                                 bb_for_stmt (dataref->stmt));
 781       dataref->pos = VEC_length (dref, comp->refs);
 782       VEC_quick_push (dref, comp->refs, dataref);
 783     }
 784
 785   for (i = 0; i < n; i++)
 786     {
 787       comp = comps[i];
 788       if (comp)
 789         {
 790           comp->next = comp_list;
 791           comp_list = comp;
 792         }
 793     }
 794   free (comps);
 795
 796 end:
 797   free (comp_father);
 798   free (comp_size);
 799   return comp_list;
 800 }
 801
 802 /* Returns true if the component COMP satisfies the conditions
 803    described in 2) at the beginning of this file.  LOOP is the current
 804    loop.  */
 805
 806 static bool
 807 suitable_component_p (struct loop *loop, struct component *comp)
 808 {
 809   unsigned i;
 810   dref a, first;
 811   basic_block ba, bp = loop->header;
 812   bool ok, has_write = false;
 813
 814   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
 815     {
 816       ba = bb_for_stmt (a->stmt);
 817
 818       if (!just_once_each_iteration_p (loop, ba))
 819         return false;
 820
 821       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
 822       bp = ba;
 823
 824       if (!DR_IS_READ (a->ref))
 825         has_write = true;
 826     }
 827
 828   first = VEC_index (dref, comp->refs, 0);
 829   ok = suitable_reference_p (first->ref, &comp->comp_step);
 830   gcc_assert (ok);
 831   first->offset = double_int_zero;
 832
 833   for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
 834     {
 835       if (!determine_offset (first->ref, a->ref, &a->offset))
 836         return false;
 837
 838 #ifdef ENABLE_CHECKING
 839       {
 840         enum ref_step_type a_step;
 841         ok = suitable_reference_p (a->ref, &a_step);
 842         gcc_assert (ok && a_step == comp->comp_step);
 843       }
 844 #endif
 845     }
 846
 847   /* If there is a write inside the component, we must know whether the
 848      step is nonzero or not -- we would not otherwise be able to recognize
 849      whether the value accessed by reads comes from the OFFSET-th iteration
 850      or the previous one.  */
 851   if (has_write && comp->comp_step == RS_ANY)
 852     return false;
 853
 854   return true;
 855 }
 856
 857 /* Check the conditions on references inside each of components COMPS,
 858    and remove the unsuitable components from the list.  The new list
 859    of components is returned.  The conditions are described in 2) at
 860    the beginning of this file.  LOOP is the current loop.  */
 861
 862 static struct component *
 863 filter_suitable_components (struct loop *loop, struct component *comps)
 864 {
 865   struct component **comp, *act;
 866
 867   for (comp = &comps; *comp; )
 868     {
 869       act = *comp;
 870       if (suitable_component_p (loop, act))
 871         comp = &act->next;
 872       else
 873         {
 874           *comp = act->next;
 875           release_component (act);
 876         }
 877     }
 878
 879   return comps;
 880 }
 881
 882 /* Compares two drefs A and B by their offset and position.  Callback for
 883    qsort.  */
 884
 885 static int
 886 order_drefs (const void *a, const void *b)
 887 {
 888   const dref *da = a;
 889   const dref *db = b;
 890   int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
 891
 892   if (offcmp != 0)
 893     return offcmp;
 894
 895   return (*da)->pos - (*db)->pos;
 896 }
 897
 898 /* Returns root of the CHAIN.  */
 899
 900 static inline dref
 901 get_chain_root (chain_p chain)
 902 {
 903   return VEC_index (dref, chain->refs, 0);
 904 }
 905
 906 /* Adds REF to the chain CHAIN.  */
 907
 908 static void
 909 add_ref_to_chain (chain_p chain, dref ref)
 910 {
 911   dref root = get_chain_root (chain);
 912   double_int dist;
 913
 914   gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
 915   dist = double_int_add (ref->offset, double_int_neg (root->offset));
 916   if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
 917     return;
 918   gcc_assert (double_int_fits_in_uhwi_p (dist));
 919
 920   VEC_safe_push (dref, heap, chain->refs, ref);
 921
 922   ref->distance = double_int_to_uhwi (dist);
 923
 924   if (ref->distance >= chain->length)
 925     {
 926       chain->length = ref->distance;
 927       chain->has_max_use_after = false;
 928     }
 929
 930   if (ref->distance == chain->length
 931       && ref->pos > root->pos)
 932     chain->has_max_use_after = true;
 933
 934   chain->all_always_accessed &= ref->always_accessed;
 935 }
 936
 937 /* Returns the chain for invariant component COMP.  */
 938
 939 static chain_p
 940 make_invariant_chain (struct component *comp)
 941 {
 942   chain_p chain = XCNEW (struct chain);
 943   unsigned i;
 944   dref ref;
 945
 946   chain->type = CT_INVARIANT;
 947
 948   chain->all_always_accessed = true;
 949
 950   for (i = 0; VEC_iterate (dref, comp->refs, i, ref); i++)
 951     {
 952       VEC_safe_push (dref, heap, chain->refs, ref);
 953       chain->all_always_accessed &= ref->always_accessed;
 954     }
 955
 956   return chain;
 957 }
 958
 959 /* Make a new chain rooted at REF.  */
 960
 961 static chain_p
 962 make_rooted_chain (dref ref)
 963 {
 964   chain_p chain = XCNEW (struct chain);
 965
 966   chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
 967
 968   VEC_safe_push (dref, heap, chain->refs, ref);
 969   chain->all_always_accessed = ref->always_accessed;
 970
 971   ref->distance = 0;
 972
 973   return chain;
 974 }
 975
 976 /* Returns true if CHAIN is not trivial.  */
 977
 978 static bool
 979 nontrivial_chain_p (chain_p chain)
 980 {
 981   return chain != NULL && VEC_length (dref, chain->refs) > 1;
 982 }
 983
 984 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
 985    is no such name.  */
 986
 987 static tree
 988 name_for_ref (dref ref)
 989 {
 990   tree name;
 991
 992   if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
 993     {
 994       if (!ref->ref || DR_IS_READ (ref->ref))
 995         name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
 996       else
 997         name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
 998     }
 999   else
1000     name = PHI_RESULT (ref->stmt);
1001
1002   return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1003 }
1004
1005 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1006    iterations of the innermost enclosing loop).  */
1007
1008 static bool
1009 valid_initializer_p (struct data_reference *ref,
1010                      unsigned distance, struct data_reference *root)
1011 {
1012   aff_tree diff, base, step;
1013   double_int off;
1014
1015   if (!DR_BASE_ADDRESS (ref))
1016     return false;
1017
1018   /* Both REF and ROOT must be accessing the same object.  */
1019   if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1020     return false;
1021
1022   /* The initializer is defined outside of loop, hence its address must be
1023      invariant inside the loop.  */
1024   gcc_assert (integer_zerop (DR_STEP (ref)));
1025
1026   /* If the address of the reference is invariant, initializer must access
1027      exactly the same location.  */
1028   if (integer_zerop (DR_STEP (root)))
1029     return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1030             && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1031
1032   /* Verify that this index of REF is equal to the root's index at
1033      -DISTANCE-th iteration.  */
1034   aff_combination_dr_offset (root, &diff);
1035   aff_combination_dr_offset (ref, &base);
1036   aff_combination_scale (&base, double_int_minus_one);
1037   aff_combination_add (&diff, &base);
1038
1039   tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
1040                                   &name_expansions);
1041   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1042     return false;
1043
1044   if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
1045     return false;
1046
1047   return true;
1048 }
1049
1050 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1051    initial value is correct (equal to initial value of REF shifted by one
1052    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1053    is the root of the current chain.  */
1054
1055 static tree
1056 find_looparound_phi (struct loop *loop, dref ref, dref root)
1057 {
1058   tree name, phi, init, init_stmt, init_ref;
1059   edge latch = loop_latch_edge (loop);
1060   struct data_reference init_dr;
1061
1062   if (TREE_CODE (ref->stmt) == GIMPLE_MODIFY_STMT)
1063     {
1064       if (DR_IS_READ (ref->ref))
1065         name = GIMPLE_STMT_OPERAND (ref->stmt, 0);
1066       else
1067         name = GIMPLE_STMT_OPERAND (ref->stmt, 1);
1068     }
1069   else
1070     name = PHI_RESULT (ref->stmt);
1071   if (!name)
1072     return NULL_TREE;
1073
1074   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1075     if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1076       break;
1077
1078   if (!phi)
1079     return NULL_TREE;
1080
1081   init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1082   if (TREE_CODE (init) != SSA_NAME)
1083     return NULL_TREE;
1084   init_stmt = SSA_NAME_DEF_STMT (init);
1085   if (TREE_CODE (init_stmt) != GIMPLE_MODIFY_STMT)
1086     return NULL_TREE;
1087   gcc_assert (GIMPLE_STMT_OPERAND (init_stmt, 0) == init);
1088
1089   init_ref = GIMPLE_STMT_OPERAND (init_stmt, 1);
1090   if (!REFERENCE_CLASS_P (init_ref)
1091       && !DECL_P (init_ref))
1092     return NULL_TREE;
1093
1094   /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1095      loop enclosing PHI).  */
1096   memset (&init_dr, 0, sizeof (struct data_reference));
1097   DR_REF (&init_dr) = init_ref;
1098   DR_STMT (&init_dr) = phi;
1099   dr_analyze_innermost (&init_dr);
1100
1101   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1102     return NULL_TREE;
1103
1104   return phi;
1105 }
1106
1107 /* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1108
1109 static void
1110 insert_looparound_copy (chain_p chain, dref ref, tree phi)
1111 {
1112   dref nw = XCNEW (struct dref), aref;
1113   unsigned i;
1114
1115   nw->stmt = phi;
1116   nw->distance = ref->distance + 1;
1117   nw->always_accessed = 1;
1118
1119   for (i = 0; VEC_iterate (dref, chain->refs, i, aref); i++)
1120     if (aref->distance >= nw->distance)
1121       break;
1122   VEC_safe_insert (dref, heap, chain->refs, i, nw);
1123
1124   if (nw->distance > chain->length)
1125     {
1126       chain->length = nw->distance;
1127       chain->has_max_use_after = false;
1128     }
1129 }
1130
1131 /* For references in CHAIN that are copied around the LOOP (created previously
1132    by PRE, or by user), add the results of such copies to the chain.  This
1133    enables us to remove the copies by unrolling, and may need less registers
1134    (also, it may allow us to combine chains together).  */
1135
1136 static void
1137 add_looparound_copies (struct loop *loop, chain_p chain)
1138 {
1139   unsigned i;
1140   dref ref, root = get_chain_root (chain);
1141   tree phi;
1142
1143   for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
1144     {
1145       phi = find_looparound_phi (loop, ref, root);
1146       if (!phi)
1147         continue;
1148
1149       bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1150       insert_looparound_copy (chain, ref, phi);
1151     }
1152 }
1153
1154 /* Find roots of the values and determine distances in the component COMP.
1155    The references are redistributed into CHAINS.  LOOP is the current
1156    loop.  */
1157
1158 static void
1159 determine_roots_comp (struct loop *loop,
1160                       struct component *comp,
1161                       VEC (chain_p, heap) **chains)
1162 {
1163   unsigned i;
1164   dref a;
1165   chain_p chain = NULL;
1166
1167   /* Invariants are handled specially.  */
1168   if (comp->comp_step == RS_INVARIANT)
1169     {
1170       chain = make_invariant_chain (comp);
1171       VEC_safe_push (chain_p, heap, *chains, chain);
1172       return;
1173     }
1174
1175   qsort (VEC_address (dref, comp->refs), VEC_length (dref, comp->refs),
1176          sizeof (dref), order_drefs);
1177
1178   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
1179     {
1180       if (!chain || !DR_IS_READ (a->ref))
1181         {
1182           if (nontrivial_chain_p (chain))
1183             VEC_safe_push (chain_p, heap, *chains, chain);
1184           else
1185             release_chain (chain);
1186           chain = make_rooted_chain (a);
1187           continue;
1188         }
1189
1190       add_ref_to_chain (chain, a);
1191     }
1192
1193   if (nontrivial_chain_p (chain))
1194     {
1195       add_looparound_copies (loop, chain);
1196       VEC_safe_push (chain_p, heap, *chains, chain);
1197     }
1198   else
1199     release_chain (chain);
1200 }
1201
1202 /* Find roots of the values and determine distances in components COMPS, and
1203    separates the references to CHAINS.  LOOP is the current loop.  */
1204
1205 static void
1206 determine_roots (struct loop *loop,
1207                  struct component *comps, VEC (chain_p, heap) **chains)
1208 {
1209   struct component *comp;
1210
1211   for (comp = comps; comp; comp = comp->next)
1212     determine_roots_comp (loop, comp, chains);
1213 }
1214
1215 /* Replace the reference in statement STMT with temporary variable
1216    NEW.  If SET is true, NEW is instead initialized to the value of
1217    the reference in the statement.  IN_LHS is true if the reference
1218    is in the lhs of STMT, false if it is in rhs.  */
1219
1220 static void
1221 replace_ref_with (tree stmt, tree new, bool set, bool in_lhs)
1222 {
1223   tree val, new_stmt;
1224   block_stmt_iterator bsi;
1225
1226   if (TREE_CODE (stmt) == PHI_NODE)
1227     {
1228       gcc_assert (!in_lhs && !set);
1229
1230       val = PHI_RESULT (stmt);
1231       bsi = bsi_after_labels (bb_for_stmt (stmt));
1232       remove_phi_node (stmt, NULL_TREE, false);
1233
1234       /* Turn the phi node into GIMPLE_MODIFY_STMT.  */
1235       new_stmt = build_gimple_modify_stmt (val, new);
1236       SSA_NAME_DEF_STMT (val) = new_stmt;
1237       bsi_insert_before (&bsi, new_stmt, BSI_NEW_STMT);
1238       return;
1239     }
1240
1241   /* Since the reference is of gimple_reg type, it should only
1242      appear as lhs or rhs of modify statement.  */
1243   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1244
1245   /* If we do not need to initialize NEW, just replace the use of OLD.  */
1246   if (!set)
1247     {
1248       gcc_assert (!in_lhs);
1249       GIMPLE_STMT_OPERAND (stmt, 1) = new;
1250       update_stmt (stmt);
1251       return;
1252     }
1253
1254   bsi = bsi_for_stmt (stmt);
1255   if (in_lhs)
1256     {
1257       val = GIMPLE_STMT_OPERAND (stmt, 1);
1258
1259       /* OLD = VAL
1260
1261          is transformed to
1262
1263          OLD = VAL
1264          NEW = VAL
1265
1266          (since the reference is of gimple_reg type, VAL is either gimple
1267          invariant or ssa name).  */
1268     }
1269   else
1270     {
1271       val = GIMPLE_STMT_OPERAND (stmt, 0);
1272
1273       /* VAL = OLD
1274
1275          is transformed to
1276
1277          VAL = OLD
1278          NEW = VAL  */
1279     }
1280
1281   new_stmt = build_gimple_modify_stmt (new, unshare_expr (val));
1282   bsi_insert_after (&bsi, new_stmt, BSI_NEW_STMT);
1283   SSA_NAME_DEF_STMT (new) = new_stmt;
1284 }
1285
1286 /* Returns the reference to the address of REF in the ITER-th iteration of
1287    LOOP, or NULL if we fail to determine it (ITER may be negative).  We
1288    try to preserve the original shape of the reference (not rewrite it
1289    as an indirect ref to the address), to make tree_could_trap_p in
1290    prepare_initializers_chain return false more often.  */
1291
1292 static tree
1293 ref_at_iteration (struct loop *loop, tree ref, int iter)
1294 {
1295   tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
1296   affine_iv iv;
1297   bool ok;
1298
1299   if (handled_component_p (ref))
1300     {
1301       op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
1302       if (!op0)
1303         return NULL_TREE;
1304     }
1305   else if (!INDIRECT_REF_P (ref))
1306     return unshare_expr (ref);
1307
1308   if (TREE_CODE (ref) == INDIRECT_REF)
1309     {
1310       ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
1311       idx = TREE_OPERAND (ref, 0);
1312       idx_p = &TREE_OPERAND (ret, 0);
1313     }
1314   else if (TREE_CODE (ref) == COMPONENT_REF)
1315     {
1316       /* Check that the offset is loop invariant.  */
1317       if (TREE_OPERAND (ref, 2)
1318           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1319         return NULL_TREE;
1320
1321       return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
1322                      unshare_expr (TREE_OPERAND (ref, 1)),
1323                      unshare_expr (TREE_OPERAND (ref, 2)));
1324     }
1325   else if (TREE_CODE (ref) == ARRAY_REF)
1326     {
1327       /* Check that the lower bound and the step are loop invariant.  */
1328       if (TREE_OPERAND (ref, 2)
1329           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1330         return NULL_TREE;
1331       if (TREE_OPERAND (ref, 3)
1332           && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
1333         return NULL_TREE;
1334
1335       ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
1336                     unshare_expr (TREE_OPERAND (ref, 2)),
1337                     unshare_expr (TREE_OPERAND (ref, 3)));
1338       idx = TREE_OPERAND (ref, 1);
1339       idx_p = &TREE_OPERAND (ret, 1);
1340     }
1341   else
1342     return NULL_TREE;
1343
1344   ok = simple_iv (loop, first_stmt (loop->header), idx, &iv, true);
1345   if (!ok)
1346     return NULL_TREE;
1347   iv.base = expand_simple_operations (iv.base);
1348   if (integer_zerop (iv.step))
1349     *idx_p = unshare_expr (iv.base);
1350   else
1351     {
1352       type = TREE_TYPE (iv.base);
1353       val = fold_build2 (MULT_EXPR, type, iv.step,
1354                          build_int_cst_type (type, iter));
1355       val = fold_build2 (PLUS_EXPR, type, iv.base, val);
1356       *idx_p = unshare_expr (val);
1357     }
1358
1359   return ret;
1360 }
1361
1362 /* Get the initialization expression for the INDEX-th temporary variable
1363    of CHAIN.  */
1364
1365 static tree
1366 get_init_expr (chain_p chain, unsigned index)
1367 {
1368   if (chain->type == CT_COMBINATION)
1369     {
1370       tree e1 = get_init_expr (chain->ch1, index);
1371       tree e2 = get_init_expr (chain->ch2, index);
1372
1373       return fold_build2 (chain->operator, chain->rslt_type, e1, e2);
1374     }
1375   else
1376     return VEC_index (tree, chain->inits, index);
1377 }
1378
1379 /* Marks all virtual operands of statement STMT for renaming.  */
1380
1381 static void
1382 mark_virtual_ops_for_renaming (tree stmt)
1383 {
1384   ssa_op_iter iter;
1385   tree var;
1386
1387   if (TREE_CODE (stmt) == PHI_NODE)
1388     return;
1389
1390   update_stmt (stmt);
1391
1392   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_VIRTUALS)
1393     {
1394       if (TREE_CODE (var) == SSA_NAME)
1395         var = SSA_NAME_VAR (var);
1396       mark_sym_for_renaming (var);
1397     }
1398 }
1399
1400 /* Calls mark_virtual_ops_for_renaming for all members of LIST.  */
1401
1402 static void
1403 mark_virtual_ops_for_renaming_list (tree list)
1404 {
1405   tree_stmt_iterator tsi;
1406
1407   for (tsi = tsi_start (list); !tsi_end_p (tsi); tsi_next (&tsi))
1408     mark_virtual_ops_for_renaming (tsi_stmt (tsi));
1409 }
1410
1411 /* Returns a new temporary variable used for the I-th variable carrying
1412    value of REF.  The variable's uid is marked in TMP_VARS.  */
1413
1414 static tree
1415 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1416 {
1417   tree type = TREE_TYPE (ref);
1418   tree var = create_tmp_var (type, get_lsm_tmp_name (ref, i));
1419
1420   /* We never access the components of the temporary variable in predictive
1421      commoning.  */
1422   if (TREE_CODE (type) == COMPLEX_TYPE
1423       || TREE_CODE (type) == VECTOR_TYPE)
1424     DECL_GIMPLE_REG_P (var) = 1;
1425
1426   add_referenced_var (var);
1427   bitmap_set_bit (tmp_vars, DECL_UID (var));
1428   return var;
1429 }
1430
1431 /* Creates the variables for CHAIN, as well as phi nodes for them and
1432    initialization on entry to LOOP.  Uids of the newly created
1433    temporary variables are marked in TMP_VARS.  */
1434
1435 static void
1436 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1437 {
1438   unsigned i;
1439   unsigned n = chain->length;
1440   dref root = get_chain_root (chain);
1441   bool reuse_first = !chain->has_max_use_after;
1442   tree ref, init, var, next, stmts;
1443   tree phi;
1444   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1445
1446   /* If N == 0, then all the references are within the single iteration.  And
1447      since this is an nonempty chain, reuse_first cannot be true.  */
1448   gcc_assert (n > 0 || !reuse_first);
1449
1450   chain->vars = VEC_alloc (tree, heap, n + 1);
1451
1452   if (chain->type == CT_COMBINATION)
1453     ref = GIMPLE_STMT_OPERAND (root->stmt, 0);
1454   else
1455     ref = DR_REF (root->ref);
1456
1457   for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1458     {
1459       var = predcom_tmp_var (ref, i, tmp_vars);
1460       VEC_quick_push (tree, chain->vars, var);
1461     }
1462   if (reuse_first)
1463     VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
1464
1465   for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
1466     VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL_TREE));
1467
1468   for (i = 0; i < n; i++)
1469     {
1470       var = VEC_index (tree, chain->vars, i);
1471       next = VEC_index (tree, chain->vars, i + 1);
1472       init = get_init_expr (chain, i);
1473
1474       init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1475       if (stmts)
1476         {
1477           mark_virtual_ops_for_renaming_list (stmts);
1478           bsi_insert_on_edge_immediate (entry, stmts);
1479         }
1480
1481       phi = create_phi_node (var, loop->header);
1482       SSA_NAME_DEF_STMT (var) = phi;
1483       add_phi_arg (phi, init, entry);
1484       add_phi_arg (phi, next, latch);
1485     }
1486 }
1487
1488 /* Create the variables and initialization statement for root of chain
1489    CHAIN.  Uids of the newly created temporary variables are marked
1490    in TMP_VARS.  */
1491
1492 static void
1493 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1494 {
1495   dref root = get_chain_root (chain);
1496   bool in_lhs = (chain->type == CT_STORE_LOAD
1497                  || chain->type == CT_COMBINATION);
1498
1499   initialize_root_vars (loop, chain, tmp_vars);
1500   replace_ref_with (root->stmt,
1501                     VEC_index (tree, chain->vars, chain->length),
1502                     true, in_lhs);
1503 }
1504
1505 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1506    initialization on entry to LOOP if necessary.  The ssa name for the variable
1507    is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1508    around the loop is created.  Uid of the newly created temporary variable
1509    is marked in TMP_VARS.  INITS is the list containing the (single)
1510    initializer.  */
1511
1512 static void
1513 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1514                          VEC(tree, heap) **vars, VEC(tree, heap) *inits,
1515                          bitmap tmp_vars)
1516 {
1517   unsigned i;
1518   tree ref = DR_REF (root->ref), init, var, next, stmts;
1519   tree phi;
1520   edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1521
1522   /* Find the initializer for the variable, and check that it cannot
1523      trap.  */
1524   init = VEC_index (tree, inits, 0);
1525
1526   *vars = VEC_alloc (tree, heap, written ? 2 : 1);
1527   var = predcom_tmp_var (ref, 0, tmp_vars);
1528   VEC_quick_push (tree, *vars, var);
1529   if (written)
1530     VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
1531
1532   for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
1533     VEC_replace (tree, *vars, i, make_ssa_name (var, NULL_TREE));
1534
1535   var = VEC_index (tree, *vars, 0);
1536
1537   init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1538   if (stmts)
1539     {
1540       mark_virtual_ops_for_renaming_list (stmts);
1541       bsi_insert_on_edge_immediate (entry, stmts);
1542     }
1543
1544   if (written)
1545     {
1546       next = VEC_index (tree, *vars, 1);
1547       phi = create_phi_node (var, loop->header);
1548       SSA_NAME_DEF_STMT (var) = phi;
1549       add_phi_arg (phi, init, entry);
1550       add_phi_arg (phi, next, latch);
1551     }
1552   else
1553     {
1554       init = build_gimple_modify_stmt (var, init);
1555       SSA_NAME_DEF_STMT (var) = init;
1556       mark_virtual_ops_for_renaming (init);
1557       bsi_insert_on_edge_immediate (entry, init);
1558     }
1559 }
1560
1561
1562 /* Execute load motion for references in chain CHAIN.  Uids of the newly
1563    created temporary variables are marked in TMP_VARS.  */
1564
1565 static void
1566 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1567 {
1568   VEC (tree, heap) *vars;
1569   dref a;
1570   unsigned n_writes = 0, ridx, i;
1571   tree var;
1572
1573   gcc_assert (chain->type == CT_INVARIANT);
1574   gcc_assert (!chain->combined);
1575   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1576     if (!DR_IS_READ (a->ref))
1577       n_writes++;
1578
1579   /* If there are no reads in the loop, there is nothing to do.  */
1580   if (n_writes == VEC_length (dref, chain->refs))
1581     return;
1582
1583   initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1584                            &vars, chain->inits, tmp_vars);
1585
1586   ridx = 0;
1587   for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1588     {
1589       bool is_read = DR_IS_READ (a->ref);
1590       mark_virtual_ops_for_renaming (a->stmt);
1591
1592       if (!DR_IS_READ (a->ref))
1593         {
1594           n_writes--;
1595           if (n_writes)
1596             {
1597               var = VEC_index (tree, vars, 0);
1598               var = make_ssa_name (SSA_NAME_VAR (var), NULL_TREE);
1599               VEC_replace (tree, vars, 0, var);
1600             }
1601           else
1602             ridx = 1;
1603         }
1604
1605       replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
1606                         !is_read, !is_read);
1607     }
1608
1609   VEC_free (tree, heap, vars);
1610 }
1611
1612 /* Returns the single statement in that NAME is used, excepting
1613    the looparound phi nodes contained in one of the chains.  If there is no
1614    such statement, or more statements, NULL_TREE is returned.  */
1615
1616 static tree
1617 single_nonlooparound_use (tree name)
1618 {
1619   use_operand_p use;
1620   imm_use_iterator it;
1621   tree stmt, ret = NULL_TREE;
1622
1623   FOR_EACH_IMM_USE_FAST (use, it, name)
1624     {
1625       stmt = USE_STMT (use);
1626
1627       if (TREE_CODE (stmt) == PHI_NODE)
1628         {
1629           /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1630              could not be processed anyway, so just fail for them.  */
1631           if (bitmap_bit_p (looparound_phis,
1632                             SSA_NAME_VERSION (PHI_RESULT (stmt))))
1633             continue;
1634
1635           return NULL_TREE;
1636         }
1637       else if (ret != NULL_TREE)
1638         return NULL_TREE;
1639       else
1640         ret = stmt;
1641     }
1642
1643   return ret;
1644 }
1645
1646 /* Remove statement STMT, as well as the chain of assignments in that it is
1647    used.  */
1648
1649 static void
1650 remove_stmt (tree stmt)
1651 {
1652   tree next, name;
1653
1654   if (TREE_CODE (stmt) == PHI_NODE)
1655     {
1656       name = PHI_RESULT (stmt);
1657       next = single_nonlooparound_use (name);
1658       remove_phi_node (stmt, NULL_TREE, true);
1659
1660       if (!next
1661           || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1662           || GIMPLE_STMT_OPERAND (next, 1) != name)
1663         return;
1664
1665       stmt = next;
1666     }
1667
1668   while (1)
1669     {
1670       block_stmt_iterator bsi;
1671
1672       bsi = bsi_for_stmt (stmt);
1673
1674       name = GIMPLE_STMT_OPERAND (stmt, 0);
1675       gcc_assert (TREE_CODE (name) == SSA_NAME);
1676
1677       next = single_nonlooparound_use (name);
1678
1679       mark_virtual_ops_for_renaming (stmt);
1680       bsi_remove (&bsi, true);
1681
1682       if (!next
1683           || TREE_CODE (next) != GIMPLE_MODIFY_STMT
1684           || GIMPLE_STMT_OPERAND (next, 1) != name)
1685         return;
1686
1687       stmt = next;
1688     }
1689 }
1690
1691 /* Perform the predictive commoning optimization for a chain CHAIN.
1692    Uids of the newly created temporary variables are marked in TMP_VARS.*/
1693
1694 static void
1695 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1696                              bitmap tmp_vars)
1697 {
1698   unsigned i;
1699   dref a, root;
1700   tree var;
1701
1702   if (chain->combined)
1703     {
1704       /* For combined chains, just remove the statements that are used to
1705          compute the values of the expression (except for the root one).  */
1706       for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1707         remove_stmt (a->stmt);
1708     }
1709   else
1710     {
1711       /* For non-combined chains, set up the variables that hold its value,
1712          and replace the uses of the original references by these
1713          variables.  */
1714       root = get_chain_root (chain);
1715       mark_virtual_ops_for_renaming (root->stmt);
1716
1717       initialize_root (loop, chain, tmp_vars);
1718       for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1719         {
1720           mark_virtual_ops_for_renaming (a->stmt);
1721           var = VEC_index (tree, chain->vars, chain->length - a->distance);
1722           replace_ref_with (a->stmt, var, false, false);
1723         }
1724     }
1725 }
1726
1727 /* Determines the unroll factor necessary to remove as many temporary variable
1728    copies as possible.  CHAINS is the list of chains that will be
1729    optimized.  */
1730
1731 static unsigned
1732 determine_unroll_factor (VEC (chain_p, heap) *chains)
1733 {
1734   chain_p chain;
1735   unsigned factor = 1, af, nfactor, i;
1736   unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1737
1738   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1739     {
1740       if (chain->type == CT_INVARIANT || chain->combined)
1741         continue;
1742
1743       /* The best unroll factor for this chain is equal to the number of
1744          temporary variables that we create for it.  */
1745       af = chain->length;
1746       if (chain->has_max_use_after)
1747         af++;
1748
1749       nfactor = factor * af / gcd (factor, af);
1750       if (nfactor <= max)
1751         factor = nfactor;
1752     }
1753
1754   return factor;
1755 }
1756
1757 /* Perform the predictive commoning optimization for CHAINS.
1758    Uids of the newly created temporary variables are marked in TMP_VARS.  */
1759
1760 static void
1761 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
1762                         bitmap tmp_vars)
1763 {
1764   chain_p chain;
1765   unsigned i;
1766
1767   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1768     {
1769       if (chain->type == CT_INVARIANT)
1770         execute_load_motion (loop, chain, tmp_vars);
1771       else
1772         execute_pred_commoning_chain (loop, chain, tmp_vars);
1773     }
1774
1775   update_ssa (TODO_update_ssa_only_virtuals);
1776 }
1777
1778 /* For each reference in CHAINS, if its defining statement is
1779    ssa name, set it to phi node that defines it.  */
1780
1781 static void
1782 replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
1783 {
1784   chain_p chain;
1785   dref a;
1786   unsigned i, j;
1787
1788   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1789     for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1790       {
1791         gcc_assert (TREE_CODE (a->stmt) != SSA_NAME);
1792         if (TREE_CODE (a->stmt) == PHI_NODE)
1793           a->stmt = PHI_RESULT (a->stmt);
1794       }
1795 }
1796
1797 /* For each reference in CHAINS, if its defining statement is
1798    phi node, set it to the ssa name that is defined by it.  */
1799
1800 static void
1801 replace_names_by_phis (VEC (chain_p, heap) *chains)
1802 {
1803   chain_p chain;
1804   dref a;
1805   unsigned i, j;
1806
1807   for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1808     for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1809       if (TREE_CODE (a->stmt) == SSA_NAME)
1810         {
1811           a->stmt = SSA_NAME_DEF_STMT (a->stmt);
1812           gcc_assert (TREE_CODE (a->stmt) == PHI_NODE);
1813         }
1814 }
1815
1816 /* Wrapper over execute_pred_commoning, to pass it as a callback
1817    to tree_transform_and_unroll_loop.  */
1818
1819 struct epcc_data
1820 {
1821   VEC (chain_p, heap) *chains;
1822   bitmap tmp_vars;
1823 };
1824
1825 static void
1826 execute_pred_commoning_cbck (struct loop *loop, void *data)
1827 {
1828   struct epcc_data *dta = data;
1829
1830   /* Restore phi nodes that were replaced by ssa names before
1831      tree_transform_and_unroll_loop (see detailed description in
1832      tree_predictive_commoning_loop).  */
1833   replace_names_by_phis (dta->chains);
1834   execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1835 }
1836
1837 /* Returns true if we can and should unroll LOOP FACTOR times.  Number
1838    of iterations of the loop is returned in NITER.  */
1839
1840 static bool
1841 should_unroll_loop_p (struct loop *loop, unsigned factor,
1842                       struct tree_niter_desc *niter)
1843 {
1844   edge exit;
1845
1846   if (factor == 1)
1847     return false;
1848
1849   /* Check whether unrolling is possible.  We only want to unroll loops
1850      for that we are able to determine number of iterations.  We also
1851      want to split the extra iterations of the loop from its end,
1852      therefore we require that the loop has precisely one
1853      exit.  */
1854
1855   exit = single_dom_exit (loop);
1856   if (!exit)
1857     return false;
1858
1859   if (!number_of_iterations_exit (loop, exit, niter, false))
1860     return false;
1861
1862   /* And of course, we must be able to duplicate the loop.  */
1863   if (!can_duplicate_loop_p (loop))
1864     return false;
1865
1866   /* The final loop should be small enough.  */
1867   if (tree_num_loop_insns (loop, &eni_size_weights) * factor
1868       > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
1869     return false;
1870
1871   return true;
1872 }
1873
1874 /* Base NAME and all the names in the chain of phi nodes that use it
1875    on variable VAR.  The phi nodes are recognized by being in the copies of
1876    the header of the LOOP.  */
1877
1878 static void
1879 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1880 {
1881   tree stmt, phi;
1882   imm_use_iterator iter;
1883   edge e;
1884
1885   SSA_NAME_VAR (name) = var;
1886
1887   while (1)
1888     {
1889       phi = NULL;
1890       FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1891         {
1892           if (TREE_CODE (stmt) == PHI_NODE
1893               && flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1894             {
1895               phi = stmt;
1896               BREAK_FROM_IMM_USE_STMT (iter);
1897             }
1898         }
1899       if (!phi)
1900         return;
1901
1902       if (bb_for_stmt (phi) == loop->header)
1903         e = loop_latch_edge (loop);
1904       else
1905         e = single_pred_edge (bb_for_stmt (stmt));
1906
1907       name = PHI_RESULT (phi);
1908       SSA_NAME_VAR (name) = var;
1909     }
1910 }
1911
1912 /* Given an unrolled LOOP after predictive commoning, remove the
1913    register copies arising from phi nodes by changing the base
1914    variables of SSA names.  TMP_VARS is the set of the temporary variables
1915    for those we want to perform this.  */
1916
1917 static void
1918 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1919 {
1920   edge e;
1921   tree phi, name, use, var, stmt;
1922
1923   e = loop_latch_edge (loop);
1924   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1925     {
1926       name = PHI_RESULT (phi);
1927       var = SSA_NAME_VAR (name);
1928       if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
1929         continue;
1930       use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1931       gcc_assert (TREE_CODE (use) == SSA_NAME);
1932
1933       /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1934       stmt = SSA_NAME_DEF_STMT (use);
1935       while (TREE_CODE (stmt) == PHI_NODE
1936              /* In case we could not unroll the loop enough to eliminate
1937                 all copies, we may reach the loop header before the defining
1938                 statement (in that case, some register copies will be present
1939                 in loop latch in the final code, corresponding to the newly
1940                 created looparound phi nodes).  */
1941              && bb_for_stmt (stmt) != loop->header)
1942         {
1943           gcc_assert (single_pred_p (bb_for_stmt (stmt)));
1944           use = PHI_ARG_DEF (stmt, 0);
1945           stmt = SSA_NAME_DEF_STMT (use);
1946         }
1947
1948       base_names_in_chain_on (loop, use, var);
1949     }
1950 }
1951
1952 /* Returns true if CHAIN is suitable to be combined.  */
1953
1954 static bool
1955 chain_can_be_combined_p (chain_p chain)
1956 {
1957   return (!chain->combined
1958           && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1959 }
1960
1961 /* Returns the modify statement that uses NAME.  Skips over assignment
1962    statements, NAME is replaced with the actual name used in the returned
1963    statement.  */
1964
1965 static tree
1966 find_use_stmt (tree *name)
1967 {
1968   tree stmt, rhs, lhs;
1969
1970   /* Skip over assignments.  */
1971   while (1)
1972     {
1973       stmt = single_nonlooparound_use (*name);
1974       if (!stmt)
1975         return NULL_TREE;
1976
1977       if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1978         return NULL_TREE;
1979
1980       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1981       if (TREE_CODE (lhs) != SSA_NAME)
1982         return NULL_TREE;
1983
1984       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1985       if (rhs != *name)
1986         break;
1987
1988       *name = lhs;
1989     }
1990
1991   if (!EXPR_P (rhs)
1992       || REFERENCE_CLASS_P (rhs)
1993       || TREE_CODE_LENGTH (TREE_CODE (rhs)) != 2)
1994     return NULL_TREE;
1995
1996   return stmt;
1997 }
1998
1999 /* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2000
2001 static bool
2002 may_reassociate_p (tree type, enum tree_code code)
2003 {
2004   if (FLOAT_TYPE_P (type)
2005       && !flag_unsafe_math_optimizations)
2006     return false;
2007
2008   return (commutative_tree_code (code)
2009           && associative_tree_code (code));
2010 }
2011
2012 /* If the operation used in STMT is associative and commutative, go through the
2013    tree of the same operations and returns its root.  Distance to the root
2014    is stored in DISTANCE.  */
2015
2016 static tree
2017 find_associative_operation_root (tree stmt, unsigned *distance)
2018 {
2019   tree rhs = GIMPLE_STMT_OPERAND (stmt, 1), lhs, next;
2020   enum tree_code code = TREE_CODE (rhs);
2021   unsigned dist = 0;
2022
2023   if (!may_reassociate_p (TREE_TYPE (rhs), code))
2024     return NULL_TREE;
2025
2026   while (1)
2027     {
2028       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2029       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2030
2031       next = find_use_stmt (&lhs);
2032       if (!next)
2033         break;
2034
2035       rhs = GIMPLE_STMT_OPERAND (next, 1);
2036       if (TREE_CODE (rhs) != code)
2037         break;
2038
2039       stmt = next;
2040       dist++;
2041     }
2042
2043   if (distance)
2044     *distance = dist;
2045   return stmt;
2046 }
2047
2048 /* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2049    is no such statement, returns NULL_TREE.  In case the operation used on
2050    NAME1 and NAME2 is associative and commutative, returns the root of the
2051    tree formed by this operation instead of the statement that uses NAME1 or
2052    NAME2.  */
2053
2054 static tree
2055 find_common_use_stmt (tree *name1, tree *name2)
2056 {
2057   tree stmt1, stmt2;
2058
2059   stmt1 = find_use_stmt (name1);
2060   if (!stmt1)
2061     return NULL_TREE;
2062
2063   stmt2 = find_use_stmt (name2);
2064   if (!stmt2)
2065     return NULL_TREE;
2066
2067   if (stmt1 == stmt2)
2068     return stmt1;
2069
2070   stmt1 = find_associative_operation_root (stmt1, NULL);
2071   if (!stmt1)
2072     return NULL_TREE;
2073   stmt2 = find_associative_operation_root (stmt2, NULL);
2074   if (!stmt2)
2075     return NULL_TREE;
2076
2077   return (stmt1 == stmt2 ? stmt1 : NULL_TREE);
2078 }
2079
2080 /* Checks whether R1 and R2 are combined together using CODE, with the result
2081    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2082    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2083
2084 static bool
2085 combinable_refs_p (dref r1, dref r2,
2086                    enum tree_code *code, bool *swap, tree *rslt_type)
2087 {
2088   enum tree_code acode;
2089   bool aswap;
2090   tree atype;
2091   tree name1, name2, stmt, rhs;
2092
2093   name1 = name_for_ref (r1);
2094   name2 = name_for_ref (r2);
2095   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2096
2097   stmt = find_common_use_stmt (&name1, &name2);
2098
2099   if (!stmt)
2100     return false;
2101
2102   rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2103   acode = TREE_CODE (rhs);
2104   aswap = (!commutative_tree_code (acode)
2105            && TREE_OPERAND (rhs, 0) != name1);
2106   atype = TREE_TYPE (rhs);
2107
2108   if (*code == ERROR_MARK)
2109     {
2110       *code = acode;
2111       *swap = aswap;
2112       *rslt_type = atype;
2113       return true;
2114     }
2115
2116   return (*code == acode
2117           && *swap == aswap
2118           && *rslt_type == atype);
2119 }
2120
2121 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2122    an assignment of the remaining operand.  */
2123
2124 static void
2125 remove_name_from_operation (tree stmt, tree op)
2126 {
2127   tree *rhs;
2128
2129   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2130
2131   rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
2132   if (TREE_OPERAND (*rhs, 0) == op)
2133     *rhs = TREE_OPERAND (*rhs, 1);
2134   else if (TREE_OPERAND (*rhs, 1) == op)
2135     *rhs = TREE_OPERAND (*rhs, 0);
2136   else
2137     gcc_unreachable ();
2138   update_stmt (stmt);
2139 }
2140
2141 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2142    are combined in a single statement, and returns this statement.  */
2143
2144 static tree
2145 reassociate_to_the_same_stmt (tree name1, tree name2)
2146 {
2147   tree stmt1, stmt2, root1, root2, r1, r2, s1, s2;
2148   tree new_stmt, tmp_stmt, new_name, tmp_name, var;
2149   unsigned dist1, dist2;
2150   enum tree_code code;
2151   tree type = TREE_TYPE (name1);
2152   block_stmt_iterator bsi;
2153
2154   stmt1 = find_use_stmt (&name1);
2155   stmt2 = find_use_stmt (&name2);
2156   root1 = find_associative_operation_root (stmt1, &dist1);
2157   root2 = find_associative_operation_root (stmt2, &dist2);
2158   code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt1, 1));
2159
2160   gcc_assert (root1 && root2 && root1 == root2
2161               && code == TREE_CODE (GIMPLE_STMT_OPERAND (stmt2, 1)));
2162
2163   /* Find the root of the nearest expression in that both NAME1 and NAME2
2164      are used.  */
2165   r1 = name1;
2166   s1 = stmt1;
2167   r2 = name2;
2168   s2 = stmt2;
2169
2170   while (dist1 > dist2)
2171     {
2172       s1 = find_use_stmt (&r1);
2173       r1 = GIMPLE_STMT_OPERAND (s1, 0);
2174       dist1--;
2175     }
2176   while (dist2 > dist1)
2177     {
2178       s2 = find_use_stmt (&r2);
2179       r2 = GIMPLE_STMT_OPERAND (s2, 0);
2180       dist2--;
2181     }
2182
2183   while (s1 != s2)
2184     {
2185       s1 = find_use_stmt (&r1);
2186       r1 = GIMPLE_STMT_OPERAND (s1, 0);
2187       s2 = find_use_stmt (&r2);
2188       r2 = GIMPLE_STMT_OPERAND (s2, 0);
2189     }
2190
2191   /* Remove NAME1 and NAME2 from the statements in that they are used
2192      currently.  */
2193   remove_name_from_operation (stmt1, name1);
2194   remove_name_from_operation (stmt2, name2);
2195
2196   /* Insert the new statement combining NAME1 and NAME2 before S1, and
2197      combine it with the rhs of S1.  */
2198   var = create_tmp_var (type, "predreastmp");
2199   add_referenced_var (var);
2200   new_name = make_ssa_name (var, NULL_TREE);
2201   new_stmt = build_gimple_modify_stmt (new_name,
2202                             fold_build2 (code, type, name1, name2));
2203   SSA_NAME_DEF_STMT (new_name) = new_stmt;
2204
2205   var = create_tmp_var (type, "predreastmp");
2206   add_referenced_var (var);
2207   tmp_name = make_ssa_name (var, NULL_TREE);
2208   tmp_stmt = build_gimple_modify_stmt (tmp_name,
2209                                             GIMPLE_STMT_OPERAND (s1, 1));
2210   SSA_NAME_DEF_STMT (tmp_name) = tmp_stmt;
2211
2212   GIMPLE_STMT_OPERAND (s1, 1) = fold_build2 (code, type, new_name, tmp_name);
2213   update_stmt (s1);
2214
2215   bsi = bsi_for_stmt (s1);
2216   bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
2217   bsi_insert_before (&bsi, tmp_stmt, BSI_SAME_STMT);
2218
2219   return new_stmt;
2220 }
2221
2222 /* Returns the statement that combines references R1 and R2.  In case R1
2223    and R2 are not used in the same statement, but they are used with an
2224    associative and commutative operation in the same expression, reassociate
2225    the expression so that they are used in the same statement.  */
2226
2227 static tree
2228 stmt_combining_refs (dref r1, dref r2)
2229 {
2230   tree stmt1, stmt2;
2231   tree name1 = name_for_ref (r1);
2232   tree name2 = name_for_ref (r2);
2233
2234   stmt1 = find_use_stmt (&name1);
2235   stmt2 = find_use_stmt (&name2);
2236   if (stmt1 == stmt2)
2237     return stmt1;
2238
2239   return reassociate_to_the_same_stmt (name1, name2);
2240 }
2241
2242 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2243    description of the new chain is returned, otherwise we return NULL.  */
2244
2245 static chain_p
2246 combine_chains (chain_p ch1, chain_p ch2)
2247 {
2248   dref r1, r2, nw;
2249   enum tree_code op = ERROR_MARK;
2250   bool swap = false;
2251   chain_p new_chain;
2252   unsigned i;
2253   tree root_stmt;
2254   tree rslt_type = NULL_TREE;
2255
2256   if (ch1 == ch2)
2257     return false;
2258   if (ch1->length != ch2->length)
2259     return NULL;
2260
2261   if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
2262     return NULL;
2263
2264   for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2265                && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2266     {
2267       if (r1->distance != r2->distance)
2268         return NULL;
2269
2270       if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2271         return NULL;
2272     }
2273
2274   if (swap)
2275     {
2276       chain_p tmp = ch1;
2277       ch1 = ch2;
2278       ch2 = tmp;
2279     }
2280
2281   new_chain = XCNEW (struct chain);
2282   new_chain->type = CT_COMBINATION;
2283   new_chain->operator = op;
2284   new_chain->ch1 = ch1;
2285   new_chain->ch2 = ch2;
2286   new_chain->rslt_type = rslt_type;
2287   new_chain->length = ch1->length;
2288
2289   for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2290                && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2291     {
2292       nw = XCNEW (struct dref);
2293       nw->stmt = stmt_combining_refs (r1, r2);
2294       nw->distance = r1->distance;
2295
2296       VEC_safe_push (dref, heap, new_chain->refs, nw);
2297     }
2298
2299   new_chain->has_max_use_after = false;
2300   root_stmt = get_chain_root (new_chain)->stmt;
2301   for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
2302     {
2303       if (nw->distance == new_chain->length
2304           && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2305         {
2306           new_chain->has_max_use_after = true;
2307           break;
2308         }
2309     }
2310
2311   ch1->combined = true;
2312   ch2->combined = true;
2313   return new_chain;
2314 }
2315
2316 /* Try to combine the CHAINS.  */
2317
2318 static void
2319 try_combine_chains (VEC (chain_p, heap) **chains)
2320 {
2321   unsigned i, j;
2322   chain_p ch1, ch2, cch;
2323   VEC (chain_p, heap) *worklist = NULL;
2324
2325   for (i = 0; VEC_iterate (chain_p, *chains, i, ch1); i++)
2326     if (chain_can_be_combined_p (ch1))
2327       VEC_safe_push (chain_p, heap, worklist, ch1);
2328
2329   while (!VEC_empty (chain_p, worklist))
2330     {
2331       ch1 = VEC_pop (chain_p, worklist);
2332       if (!chain_can_be_combined_p (ch1))
2333         continue;
2334
2335       for (j = 0; VEC_iterate (chain_p, *chains, j, ch2); j++)
2336         {
2337           if (!chain_can_be_combined_p (ch2))
2338             continue;
2339
2340           cch = combine_chains (ch1, ch2);
2341           if (cch)
2342             {
2343               VEC_safe_push (chain_p, heap, worklist, cch);
2344               VEC_safe_push (chain_p, heap, *chains, cch);
2345               break;
2346             }
2347         }
2348     }
2349 }
2350
2351 /* Sets alias information based on data reference DR for REF,
2352    if necessary.  */
2353
2354 static void
2355 set_alias_info (tree ref, struct data_reference *dr)
2356 {
2357   tree var;
2358   tree tag = DR_SYMBOL_TAG (dr);
2359
2360   gcc_assert (tag != NULL_TREE);
2361
2362   ref = get_base_address (ref);
2363   if (!ref || !INDIRECT_REF_P (ref))
2364     return;
2365
2366   var = SSA_NAME_VAR (TREE_OPERAND (ref, 0));
2367   if (var_ann (var)->symbol_mem_tag)
2368     return;
2369
2370   if (!MTAG_P (tag))
2371     new_type_alias (var, tag, ref);
2372   else
2373     var_ann (var)->symbol_mem_tag = tag;
2374
2375   var_ann (var)->subvars = DR_SUBVARS (dr);
2376 }
2377
2378 /* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2379    impossible because one of these initializers may trap, true otherwise.  */
2380
2381 static bool
2382 prepare_initializers_chain (struct loop *loop, chain_p chain)
2383 {
2384   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2385   struct data_reference *dr = get_chain_root (chain)->ref;
2386   tree init, stmts;
2387   dref laref;
2388   edge entry = loop_preheader_edge (loop);
2389
2390   /* Find the initializers for the variables, and check that they cannot
2391      trap.  */
2392   chain->inits = VEC_alloc (tree, heap, n);
2393   for (i = 0; i < n; i++)
2394     VEC_quick_push (tree, chain->inits, NULL_TREE);
2395
2396   /* If we have replaced some looparound phi nodes, use their initializers
2397      instead of creating our own.  */
2398   for (i = 0; VEC_iterate (dref, chain->refs, i, laref); i++)
2399     {
2400       if (TREE_CODE (laref->stmt) != PHI_NODE)
2401         continue;
2402
2403       gcc_assert (laref->distance > 0);
2404       VEC_replace (tree, chain->inits, n - laref->distance,
2405                    PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
2406     }
2407
2408   for (i = 0; i < n; i++)
2409     {
2410       if (VEC_index (tree, chain->inits, i) != NULL_TREE)
2411         continue;
2412
2413       init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
2414       if (!init)
2415         return false;
2416
2417       if (!chain->all_always_accessed && tree_could_trap_p (init))
2418         return false;
2419
2420       init = force_gimple_operand (init, &stmts, false, NULL_TREE);
2421       if (stmts)
2422         {
2423           mark_virtual_ops_for_renaming_list (stmts);
2424           bsi_insert_on_edge_immediate (entry, stmts);
2425         }
2426       set_alias_info (init, dr);
2427
2428       VEC_replace (tree, chain->inits, i, init);
2429     }
2430
2431   return true;
2432 }
2433
2434 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2435    be used because the initializers might trap.  */
2436
2437 static void
2438 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
2439 {
2440   chain_p chain;
2441   unsigned i;
2442
2443   for (i = 0; i < VEC_length (chain_p, chains); )
2444     {
2445       chain = VEC_index (chain_p, chains, i);
2446       if (prepare_initializers_chain (loop, chain))
2447         i++;
2448       else
2449         {
2450           release_chain (chain);
2451           VEC_unordered_remove (chain_p, chains, i);
2452         }
2453     }
2454 }
2455
2456 /* Performs predictive commoning for LOOP.  Returns true if LOOP was
2457    unrolled.  */
2458
2459 static bool
2460 tree_predictive_commoning_loop (struct loop *loop)
2461 {
2462   VEC (data_reference_p, heap) *datarefs;
2463   VEC (ddr_p, heap) *dependences;
2464   struct component *components;
2465   VEC (chain_p, heap) *chains = NULL;
2466   unsigned unroll_factor;
2467   struct tree_niter_desc desc;
2468   bool unroll = false;
2469   edge exit;
2470   bitmap tmp_vars;
2471
2472   if (dump_file && (dump_flags & TDF_DETAILS))
2473     fprintf (dump_file, "Processing loop %d\n",  loop->num);
2474
2475   /* Find the data references and split them into components according to their
2476      dependence relations.  */
2477   datarefs = VEC_alloc (data_reference_p, heap, 10);
2478   dependences = VEC_alloc (ddr_p, heap, 10);
2479   compute_data_dependences_for_loop (loop, true, &datarefs, &dependences);
2480   if (dump_file && (dump_flags & TDF_DETAILS))
2481     dump_data_dependence_relations (dump_file, dependences);
2482
2483   components = split_data_refs_to_components (loop, datarefs, dependences);
2484   free_dependence_relations (dependences);
2485   if (!components)
2486     {
2487       free_data_refs (datarefs);
2488       return false;
2489     }
2490
2491   if (dump_file && (dump_flags & TDF_DETAILS))
2492     {
2493       fprintf (dump_file, "Initial state:\n\n");
2494       dump_components (dump_file, components);
2495     }
2496
2497   /* Find the suitable components and split them into chains.  */
2498   components = filter_suitable_components (loop, components);
2499
2500   tmp_vars = BITMAP_ALLOC (NULL);
2501   looparound_phis = BITMAP_ALLOC (NULL);
2502   determine_roots (loop, components, &chains);
2503   release_components (components);
2504
2505   if (!chains)
2506     {
2507       if (dump_file && (dump_flags & TDF_DETAILS))
2508         fprintf (dump_file,
2509                  "Predictive commoning failed: no suitable chains\n");
2510       goto end;
2511     }
2512   prepare_initializers (loop, chains);
2513
2514   /* Try to combine the chains that are always worked with together.  */
2515   try_combine_chains (&chains);
2516
2517   if (dump_file && (dump_flags & TDF_DETAILS))
2518     {
2519       fprintf (dump_file, "Before commoning:\n\n");
2520       dump_chains (dump_file, chains);
2521     }
2522
2523   /* Determine the unroll factor, and if the loop should be unrolled, ensure
2524      that its number of iterations is divisible by the factor.  */
2525   unroll_factor = determine_unroll_factor (chains);
2526   scev_reset ();
2527   unroll = should_unroll_loop_p (loop, unroll_factor, &desc);
2528   exit = single_dom_exit (loop);
2529
2530   /* Execute the predictive commoning transformations, and possibly unroll the
2531      loop.  */
2532   if (unroll)
2533     {
2534       struct epcc_data dta;
2535
2536       if (dump_file && (dump_flags & TDF_DETAILS))
2537         fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2538
2539       dta.chains = chains;
2540       dta.tmp_vars = tmp_vars;
2541
2542       update_ssa (TODO_update_ssa_only_virtuals);
2543
2544       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2545          execute_pred_commoning_cbck is called may cause phi nodes to be
2546          reallocated, which is a problem since CHAINS may point to these
2547          statements.  To fix this, we store the ssa names defined by the
2548          phi nodes here instead of the phi nodes themselves, and restore
2549          the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2550       replace_phis_by_defined_names (chains);
2551
2552       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2553                                       execute_pred_commoning_cbck, &dta);
2554       eliminate_temp_copies (loop, tmp_vars);
2555     }
2556   else
2557     {
2558       if (dump_file && (dump_flags & TDF_DETAILS))
2559         fprintf (dump_file,
2560                  "Executing predictive commoning without unrolling.\n");
2561       execute_pred_commoning (loop, chains, tmp_vars);
2562     }
2563
2564 end: ;
2565   release_chains (chains);
2566   free_data_refs (datarefs);
2567   BITMAP_FREE (tmp_vars);
2568   BITMAP_FREE (looparound_phis);
2569
2570   free_affine_expand_cache (&name_expansions);
2571
2572   return unroll;
2573 }
2574
2575 /* Runs predictive commoning.  */
2576
2577 void
2578 tree_predictive_commoning (void)
2579 {
2580   bool unrolled = false;
2581   struct loop *loop;
2582   loop_iterator li;
2583
2584   initialize_original_copy_tables ();
2585   FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2586     {
2587       unrolled |= tree_predictive_commoning_loop (loop);
2588     }
2589
2590   if (unrolled)
2591     {
2592       scev_reset ();
2593       cleanup_tree_cfg_loop ();
2594     }
2595   free_original_copy_tables ();
2596 }