1 /* Predictive commoning.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This file implements the predictive commoning optimization. Predictive
22 commoning can be viewed as CSE around a loop, and with some improvements,
23 as generalized strength reduction-- i.e., reusing values computed in
24 earlier iterations of a loop in the later ones. So far, the pass only
25 handles the most useful case, that is, reusing values of memory references.
26 If you think this is all just a special case of PRE, you are sort of right;
27 however, concentrating on loops is simpler, and makes it possible to
28 incorporate data dependence analysis to detect the opportunities, perform
29 loop unrolling to avoid copies together with renaming immediately,
30 and if needed, we could also take register pressure into account.
32 Let us demonstrate what is done on an example:
34 for (i = 0; i < 100; i++)
36 a[i+2] = a[i] + a[i+1];
42 1) We find data references in the loop, and split them to mutually
43 independent groups (i.e., we find components of a data dependence
44 graph). We ignore read-read dependences whose distance is not constant.
45 (TODO -- we could also ignore antidependences). In this example, we
46 find the following groups:
48 a[i]{read}, a[i+1]{read}, a[i+2]{write}
49 b[10]{read}, b[10]{write}
50 c[99 - i]{read}, c[i]{write}
51 d[i + 1]{read}, d[i]{write}
53 2) Inside each of the group, we verify several conditions:
54 a) all the references must differ in indices only, and the indices
55 must all have the same step
56 b) the references must dominate loop latch (and thus, they must be
57 ordered by dominance relation).
58 c) the distance of the indices must be a small multiple of the step
59 We are then able to compute the difference of the references (# of
60 iterations before they point to the same place as the first of them).
61 Also, in case there are writes in the loop, we split the groups into
62 chains whose head is the write whose values are used by the reads in
63 the same chain. The chains are then processed independently,
64 making the further transformations simpler. Also, the shorter chains
65 need the same number of registers, but may require lower unrolling
66 factor in order to get rid of the copies on the loop latch.
68 In our example, we get the following chains (the chain for c is invalid).
70 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
71 b[10]{read,+0}, b[10]{write,+0}
72 d[i + 1]{read,+0}, d[i]{write,+1}
74 3) For each read, we determine the read or write whose value it reuses,
75 together with the distance of this reuse. I.e. we take the last
76 reference before it with distance 0, or the last of the references
77 with the smallest positive distance to the read. Then, we remove
78 the references that are not used in any of these chains, discard the
79 empty groups, and propagate all the links so that they point to the
80 single root reference of the chain (adjusting their distance
81 appropriately). Some extra care needs to be taken for references with
82 step 0. In our example (the numbers indicate the distance of the
85 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
86 b[10] --> (*) 1, b[10] (*)
88 4) The chains are combined together if possible. If the corresponding
89 elements of two chains are always combined together with the same
90 operator, we remember just the result of this combination, instead
91 of remembering the values separately. We may need to perform
92 reassociation to enable combining, for example
94 e[i] + f[i+1] + e[i+1] + f[i]
96 can be reassociated as
98 (e[i] + f[i]) + (e[i+1] + f[i+1])
100 and we can combine the chains for e and f into one chain.
102 5) For each root reference (end of the chain) R, let N be maximum distance
103 of a reference reusing its value. Variables R0 upto RN are created,
104 together with phi nodes that transfer values from R1 .. RN to
106 Initial values are loaded to R0..R(N-1) (in case not all references
107 must necessarily be accessed and they may trap, we may fail here;
108 TODO sometimes, the loads could be guarded by a check for the number
109 of iterations). Values loaded/stored in roots are also copied to
110 RN. Other reads are replaced with the appropriate variable Ri.
111 Everything is put to SSA form.
113 As a small improvement, if R0 is dead after the root (i.e., all uses of
114 the value with the maximum distance dominate the root), we can avoid
115 creating RN and use R0 instead of it.
117 In our example, we get (only the parts concerning a and b are shown):
118 for (i = 0; i < 100; i++)
130 6) Factor F for unrolling is determined as the smallest common multiple of
131 (N + 1) for each root reference (N for references for that we avoided
132 creating RN). If F and the loop is small enough, loop is unrolled F
133 times. The stores to RN (R0) in the copies of the loop body are
134 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
135 be coalesced and the copies can be eliminated.
137 TODO -- copy propagation and other optimizations may change the live
138 ranges of the temporary registers and prevent them from being coalesced;
139 this may increase the register pressure.
141 In our case, F = 2 and the (main loop of the) result is
143 for (i = 0; i < ...; i += 2)
160 TODO -- stores killing other stores can be taken into account, e.g.,
161 for (i = 0; i < n; i++)
171 for (i = 0; i < n; i++)
181 The interesting part is that this would generalize store motion; still, since
182 sm is performed elsewhere, it does not seem that important.
184 Predictive commoning can be generalized for arbitrary computations (not
185 just memory loads), and also nontrivial transfer functions (e.g., replacing
186 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
190 #include "coretypes.h"
195 #include "tree-flow.h"
197 #include "tree-data-ref.h"
198 #include "tree-scalar-evolution.h"
199 #include "tree-chrec.h"
201 #include "diagnostic.h"
202 #include "tree-pass.h"
203 #include "tree-affine.h"
204 #include "tree-inline.h"
206 /* The maximum number of iterations between the considered memory
209 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
211 /* Data references (or phi nodes that carry data reference values across
214 typedef struct dref_d
216 /* The reference itself. */
217 struct data_reference *ref;
219 /* The statement in that the reference appears. */
222 /* In case that STMT is a phi node, this field is set to the SSA name
223 defined by it in replace_phis_by_defined_names (in order to avoid
224 pointing to phi node that got reallocated in the meantime). */
225 tree name_defined_by_phi;
227 /* Distance of the reference from the root of the chain (in number of
228 iterations of the loop). */
231 /* Number of iterations offset from the first reference in the component. */
234 /* Number of the reference in a component, in dominance ordering. */
237 /* True if the memory reference is always accessed when the loop is
239 unsigned always_accessed : 1;
243 DEF_VEC_ALLOC_P (dref, heap);
245 /* Type of the chain of the references. */
249 /* The addresses of the references in the chain are constant. */
252 /* There are only loads in the chain. */
255 /* Root of the chain is store, the rest are loads. */
258 /* A combination of two chains. */
262 /* Chains of data references. */
266 /* Type of the chain. */
267 enum chain_type type;
269 /* For combination chains, the operator and the two chains that are
270 combined, and the type of the result. */
273 struct chain *ch1, *ch2;
275 /* The references in the chain. */
276 VEC(dref,heap) *refs;
278 /* The maximum distance of the reference in the chain from the root. */
281 /* The variables used to copy the value throughout iterations. */
282 VEC(tree,heap) *vars;
284 /* Initializers for the variables. */
285 VEC(tree,heap) *inits;
287 /* True if there is a use of a variable with the maximal distance
288 that comes after the root in the loop. */
289 unsigned has_max_use_after : 1;
291 /* True if all the memory references in the chain are always accessed. */
292 unsigned all_always_accessed : 1;
294 /* True if this chain was combined together with some other chain. */
295 unsigned combined : 1;
299 DEF_VEC_ALLOC_P (chain_p, heap);
301 /* Describes the knowledge about the step of the memory references in
306 /* The step is zero. */
309 /* The step is nonzero. */
312 /* The step may or may not be nonzero. */
316 /* Components of the data dependence graph. */
320 /* The references in the component. */
321 VEC(dref,heap) *refs;
323 /* What we know about the step of the references in the component. */
324 enum ref_step_type comp_step;
326 /* Next component in the list. */
327 struct component *next;
330 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
332 static bitmap looparound_phis;
334 /* Cache used by tree_to_aff_combination_expand. */
336 static struct pointer_map_t *name_expansions;
338 /* Dumps data reference REF to FILE. */
340 extern void dump_dref (FILE *, dref);
342 dump_dref (FILE *file, dref ref)
347 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
348 fprintf (file, " (id %u%s)\n", ref->pos,
349 DR_IS_READ (ref->ref) ? "" : ", write");
351 fprintf (file, " offset ");
352 dump_double_int (file, ref->offset, false);
353 fprintf (file, "\n");
355 fprintf (file, " distance %u\n", ref->distance);
359 if (gimple_code (ref->stmt) == GIMPLE_PHI)
360 fprintf (file, " looparound ref\n");
362 fprintf (file, " combination ref\n");
363 fprintf (file, " in statement ");
364 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
365 fprintf (file, "\n");
366 fprintf (file, " distance %u\n", ref->distance);
371 /* Dumps CHAIN to FILE. */
373 extern void dump_chain (FILE *, chain_p);
375 dump_chain (FILE *file, chain_p chain)
378 const char *chain_type;
385 chain_type = "Load motion";
389 chain_type = "Loads-only";
393 chain_type = "Store-loads";
397 chain_type = "Combination";
404 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
405 chain->combined ? " (combined)" : "");
406 if (chain->type != CT_INVARIANT)
407 fprintf (file, " max distance %u%s\n", chain->length,
408 chain->has_max_use_after ? "" : ", may reuse first");
410 if (chain->type == CT_COMBINATION)
412 fprintf (file, " equal to %p %s %p in type ",
413 (void *) chain->ch1, op_symbol_code (chain->op),
414 (void *) chain->ch2);
415 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
416 fprintf (file, "\n");
421 fprintf (file, " vars");
422 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
425 print_generic_expr (file, var, TDF_SLIM);
427 fprintf (file, "\n");
432 fprintf (file, " inits");
433 for (i = 0; VEC_iterate (tree, chain->inits, i, var); i++)
436 print_generic_expr (file, var, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " references:\n");
442 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
445 fprintf (file, "\n");
448 /* Dumps CHAINS to FILE. */
450 extern void dump_chains (FILE *, VEC (chain_p, heap) *);
452 dump_chains (FILE *file, VEC (chain_p, heap) *chains)
457 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
458 dump_chain (file, chain);
461 /* Dumps COMP to FILE. */
463 extern void dump_component (FILE *, struct component *);
465 dump_component (FILE *file, struct component *comp)
470 fprintf (file, "Component%s:\n",
471 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
472 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
474 fprintf (file, "\n");
477 /* Dumps COMPS to FILE. */
479 extern void dump_components (FILE *, struct component *);
481 dump_components (FILE *file, struct component *comps)
483 struct component *comp;
485 for (comp = comps; comp; comp = comp->next)
486 dump_component (file, comp);
489 /* Frees a chain CHAIN. */
492 release_chain (chain_p chain)
500 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
503 VEC_free (dref, heap, chain->refs);
504 VEC_free (tree, heap, chain->vars);
505 VEC_free (tree, heap, chain->inits);
513 release_chains (VEC (chain_p, heap) *chains)
518 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
519 release_chain (chain);
520 VEC_free (chain_p, heap, chains);
523 /* Frees a component COMP. */
526 release_component (struct component *comp)
528 VEC_free (dref, heap, comp->refs);
532 /* Frees list of components COMPS. */
535 release_components (struct component *comps)
537 struct component *act, *next;
539 for (act = comps; act; act = next)
542 release_component (act);
546 /* Finds a root of tree given by FATHERS containing A, and performs path
550 component_of (unsigned fathers[], unsigned a)
554 for (root = a; root != fathers[root]; root = fathers[root])
557 for (; a != root; a = n)
566 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
567 components, A and B are components to merge. */
570 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
572 unsigned ca = component_of (fathers, a);
573 unsigned cb = component_of (fathers, b);
578 if (sizes[ca] < sizes[cb])
580 sizes[cb] += sizes[ca];
585 sizes[ca] += sizes[cb];
590 /* Returns true if A is a reference that is suitable for predictive commoning
591 in the innermost loop that contains it. REF_STEP is set according to the
592 step of the reference A. */
595 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
597 tree ref = DR_REF (a), step = DR_STEP (a);
600 || !is_gimple_reg_type (TREE_TYPE (ref))
601 || tree_could_throw_p (ref))
604 if (integer_zerop (step))
605 *ref_step = RS_INVARIANT;
606 else if (integer_nonzerop (step))
607 *ref_step = RS_NONZERO;
614 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
617 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
621 tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
623 aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
624 aff_combination_add (offset, &delta);
627 /* Determines number of iterations of the innermost enclosing loop before B
628 refers to exactly the same location as A and stores it to OFF. If A and
629 B do not have the same step, they never meet, or anything else fails,
630 returns false, otherwise returns true. Both A and B are assumed to
631 satisfy suitable_reference_p. */
634 determine_offset (struct data_reference *a, struct data_reference *b,
637 aff_tree diff, baseb, step;
640 /* Check that both the references access the location in the same type. */
641 typea = TREE_TYPE (DR_REF (a));
642 typeb = TREE_TYPE (DR_REF (b));
643 if (!useless_type_conversion_p (typeb, typea))
646 /* Check whether the base address and the step of both references is the
648 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
649 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
652 if (integer_zerop (DR_STEP (a)))
654 /* If the references have loop invariant address, check that they access
655 exactly the same location. */
656 *off = double_int_zero;
657 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
658 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
661 /* Compare the offsets of the addresses, and check whether the difference
662 is a multiple of step. */
663 aff_combination_dr_offset (a, &diff);
664 aff_combination_dr_offset (b, &baseb);
665 aff_combination_scale (&baseb, double_int_minus_one);
666 aff_combination_add (&diff, &baseb);
668 tree_to_aff_combination_expand (DR_STEP (a), sizetype,
669 &step, &name_expansions);
670 return aff_combination_constant_multiple_p (&diff, &step, off);
673 /* Returns the last basic block in LOOP for that we are sure that
674 it is executed whenever the loop is entered. */
677 last_always_executed_block (struct loop *loop)
680 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
682 basic_block last = loop->latch;
684 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
685 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
686 VEC_free (edge, heap, exits);
691 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
693 static struct component *
694 split_data_refs_to_components (struct loop *loop,
695 VEC (data_reference_p, heap) *datarefs,
696 VEC (ddr_p, heap) *depends)
698 unsigned i, n = VEC_length (data_reference_p, datarefs);
699 unsigned ca, ia, ib, bad;
700 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
701 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
702 struct component **comps;
703 struct data_reference *dr, *dra, *drb;
704 struct data_dependence_relation *ddr;
705 struct component *comp_list = NULL, *comp;
707 basic_block last_always_executed = last_always_executed_block (loop);
709 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
713 /* A fake reference for call or asm_expr that may clobber memory;
717 dr->aux = (void *) (size_t) i;
722 /* A component reserved for the "bad" data references. */
726 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
728 enum ref_step_type dummy;
730 if (!suitable_reference_p (dr, &dummy))
732 ia = (unsigned) (size_t) dr->aux;
733 merge_comps (comp_father, comp_size, n, ia);
737 for (i = 0; VEC_iterate (ddr_p, depends, i, ddr); i++)
739 double_int dummy_off;
741 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
746 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
747 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
751 bad = component_of (comp_father, n);
753 /* If both A and B are reads, we may ignore unsuitable dependences. */
754 if (DR_IS_READ (dra) && DR_IS_READ (drb)
755 && (ia == bad || ib == bad
756 || !determine_offset (dra, drb, &dummy_off)))
759 merge_comps (comp_father, comp_size, ia, ib);
762 comps = XCNEWVEC (struct component *, n);
763 bad = component_of (comp_father, n);
764 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
766 ia = (unsigned) (size_t) dr->aux;
767 ca = component_of (comp_father, ia);
774 comp = XCNEW (struct component);
775 comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
779 dataref = XCNEW (struct dref_d);
781 dataref->stmt = DR_STMT (dr);
782 dataref->offset = double_int_zero;
783 dataref->distance = 0;
785 dataref->always_accessed
786 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
787 gimple_bb (dataref->stmt));
788 dataref->pos = VEC_length (dref, comp->refs);
789 VEC_quick_push (dref, comp->refs, dataref);
792 for (i = 0; i < n; i++)
797 comp->next = comp_list;
809 /* Returns true if the component COMP satisfies the conditions
810 described in 2) at the beginning of this file. LOOP is the current
814 suitable_component_p (struct loop *loop, struct component *comp)
818 basic_block ba, bp = loop->header;
819 bool ok, has_write = false;
821 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
823 ba = gimple_bb (a->stmt);
825 if (!just_once_each_iteration_p (loop, ba))
828 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
831 if (!DR_IS_READ (a->ref))
835 first = VEC_index (dref, comp->refs, 0);
836 ok = suitable_reference_p (first->ref, &comp->comp_step);
838 first->offset = double_int_zero;
840 for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
842 if (!determine_offset (first->ref, a->ref, &a->offset))
845 #ifdef ENABLE_CHECKING
847 enum ref_step_type a_step;
848 ok = suitable_reference_p (a->ref, &a_step);
849 gcc_assert (ok && a_step == comp->comp_step);
854 /* If there is a write inside the component, we must know whether the
855 step is nonzero or not -- we would not otherwise be able to recognize
856 whether the value accessed by reads comes from the OFFSET-th iteration
857 or the previous one. */
858 if (has_write && comp->comp_step == RS_ANY)
864 /* Check the conditions on references inside each of components COMPS,
865 and remove the unsuitable components from the list. The new list
866 of components is returned. The conditions are described in 2) at
867 the beginning of this file. LOOP is the current loop. */
869 static struct component *
870 filter_suitable_components (struct loop *loop, struct component *comps)
872 struct component **comp, *act;
874 for (comp = &comps; *comp; )
877 if (suitable_component_p (loop, act))
885 for (i = 0; VEC_iterate (dref, act->refs, i, ref); i++)
887 release_component (act);
894 /* Compares two drefs A and B by their offset and position. Callback for
898 order_drefs (const void *a, const void *b)
900 const dref *const da = (const dref *) a;
901 const dref *const db = (const dref *) b;
902 int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
907 return (*da)->pos - (*db)->pos;
910 /* Returns root of the CHAIN. */
913 get_chain_root (chain_p chain)
915 return VEC_index (dref, chain->refs, 0);
918 /* Adds REF to the chain CHAIN. */
921 add_ref_to_chain (chain_p chain, dref ref)
923 dref root = get_chain_root (chain);
926 gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
927 dist = double_int_add (ref->offset, double_int_neg (root->offset));
928 if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
933 gcc_assert (double_int_fits_in_uhwi_p (dist));
935 VEC_safe_push (dref, heap, chain->refs, ref);
937 ref->distance = double_int_to_uhwi (dist);
939 if (ref->distance >= chain->length)
941 chain->length = ref->distance;
942 chain->has_max_use_after = false;
945 if (ref->distance == chain->length
946 && ref->pos > root->pos)
947 chain->has_max_use_after = true;
949 chain->all_always_accessed &= ref->always_accessed;
952 /* Returns the chain for invariant component COMP. */
955 make_invariant_chain (struct component *comp)
957 chain_p chain = XCNEW (struct chain);
961 chain->type = CT_INVARIANT;
963 chain->all_always_accessed = true;
965 for (i = 0; VEC_iterate (dref, comp->refs, i, ref); i++)
967 VEC_safe_push (dref, heap, chain->refs, ref);
968 chain->all_always_accessed &= ref->always_accessed;
974 /* Make a new chain rooted at REF. */
977 make_rooted_chain (dref ref)
979 chain_p chain = XCNEW (struct chain);
981 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
983 VEC_safe_push (dref, heap, chain->refs, ref);
984 chain->all_always_accessed = ref->always_accessed;
991 /* Returns true if CHAIN is not trivial. */
994 nontrivial_chain_p (chain_p chain)
996 return chain != NULL && VEC_length (dref, chain->refs) > 1;
999 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1003 name_for_ref (dref ref)
1007 if (is_gimple_assign (ref->stmt))
1009 if (!ref->ref || DR_IS_READ (ref->ref))
1010 name = gimple_assign_lhs (ref->stmt);
1012 name = gimple_assign_rhs1 (ref->stmt);
1015 name = PHI_RESULT (ref->stmt);
1017 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1020 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1021 iterations of the innermost enclosing loop). */
1024 valid_initializer_p (struct data_reference *ref,
1025 unsigned distance, struct data_reference *root)
1027 aff_tree diff, base, step;
1030 /* Both REF and ROOT must be accessing the same object. */
1031 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1034 /* The initializer is defined outside of loop, hence its address must be
1035 invariant inside the loop. */
1036 gcc_assert (integer_zerop (DR_STEP (ref)));
1038 /* If the address of the reference is invariant, initializer must access
1039 exactly the same location. */
1040 if (integer_zerop (DR_STEP (root)))
1041 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1042 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1044 /* Verify that this index of REF is equal to the root's index at
1045 -DISTANCE-th iteration. */
1046 aff_combination_dr_offset (root, &diff);
1047 aff_combination_dr_offset (ref, &base);
1048 aff_combination_scale (&base, double_int_minus_one);
1049 aff_combination_add (&diff, &base);
1051 tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
1053 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1056 if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
1062 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1063 initial value is correct (equal to initial value of REF shifted by one
1064 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1065 is the root of the current chain. */
1068 find_looparound_phi (struct loop *loop, dref ref, dref root)
1070 tree name, init, init_ref;
1071 gimple phi = NULL, init_stmt;
1072 edge latch = loop_latch_edge (loop);
1073 struct data_reference init_dr;
1074 gimple_stmt_iterator psi;
1076 if (is_gimple_assign (ref->stmt))
1078 if (DR_IS_READ (ref->ref))
1079 name = gimple_assign_lhs (ref->stmt);
1081 name = gimple_assign_rhs1 (ref->stmt);
1084 name = PHI_RESULT (ref->stmt);
1088 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1090 phi = gsi_stmt (psi);
1091 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1095 if (gsi_end_p (psi))
1098 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1099 if (TREE_CODE (init) != SSA_NAME)
1101 init_stmt = SSA_NAME_DEF_STMT (init);
1102 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1104 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1106 init_ref = gimple_assign_rhs1 (init_stmt);
1107 if (!REFERENCE_CLASS_P (init_ref)
1108 && !DECL_P (init_ref))
1111 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1112 loop enclosing PHI). */
1113 memset (&init_dr, 0, sizeof (struct data_reference));
1114 DR_REF (&init_dr) = init_ref;
1115 DR_STMT (&init_dr) = phi;
1116 if (!dr_analyze_innermost (&init_dr))
1119 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1125 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1128 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
1130 dref nw = XCNEW (struct dref_d), aref;
1134 nw->distance = ref->distance + 1;
1135 nw->always_accessed = 1;
1137 for (i = 0; VEC_iterate (dref, chain->refs, i, aref); i++)
1138 if (aref->distance >= nw->distance)
1140 VEC_safe_insert (dref, heap, chain->refs, i, nw);
1142 if (nw->distance > chain->length)
1144 chain->length = nw->distance;
1145 chain->has_max_use_after = false;
1149 /* For references in CHAIN that are copied around the LOOP (created previously
1150 by PRE, or by user), add the results of such copies to the chain. This
1151 enables us to remove the copies by unrolling, and may need less registers
1152 (also, it may allow us to combine chains together). */
1155 add_looparound_copies (struct loop *loop, chain_p chain)
1158 dref ref, root = get_chain_root (chain);
1161 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
1163 phi = find_looparound_phi (loop, ref, root);
1167 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1168 insert_looparound_copy (chain, ref, phi);
1172 /* Find roots of the values and determine distances in the component COMP.
1173 The references are redistributed into CHAINS. LOOP is the current
1177 determine_roots_comp (struct loop *loop,
1178 struct component *comp,
1179 VEC (chain_p, heap) **chains)
1183 chain_p chain = NULL;
1184 double_int last_ofs = double_int_zero;
1186 /* Invariants are handled specially. */
1187 if (comp->comp_step == RS_INVARIANT)
1189 chain = make_invariant_chain (comp);
1190 VEC_safe_push (chain_p, heap, *chains, chain);
1194 qsort (VEC_address (dref, comp->refs), VEC_length (dref, comp->refs),
1195 sizeof (dref), order_drefs);
1197 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
1199 if (!chain || !DR_IS_READ (a->ref)
1200 || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
1201 double_int_add (a->offset,
1202 double_int_neg (last_ofs))) <= 0)
1204 if (nontrivial_chain_p (chain))
1206 add_looparound_copies (loop, chain);
1207 VEC_safe_push (chain_p, heap, *chains, chain);
1210 release_chain (chain);
1211 chain = make_rooted_chain (a);
1212 last_ofs = a->offset;
1216 add_ref_to_chain (chain, a);
1219 if (nontrivial_chain_p (chain))
1221 add_looparound_copies (loop, chain);
1222 VEC_safe_push (chain_p, heap, *chains, chain);
1225 release_chain (chain);
1228 /* Find roots of the values and determine distances in components COMPS, and
1229 separates the references to CHAINS. LOOP is the current loop. */
1232 determine_roots (struct loop *loop,
1233 struct component *comps, VEC (chain_p, heap) **chains)
1235 struct component *comp;
1237 for (comp = comps; comp; comp = comp->next)
1238 determine_roots_comp (loop, comp, chains);
1241 /* Replace the reference in statement STMT with temporary variable
1242 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1243 the reference in the statement. IN_LHS is true if the reference
1244 is in the lhs of STMT, false if it is in rhs. */
1247 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1251 gimple_stmt_iterator bsi, psi;
1253 if (gimple_code (stmt) == GIMPLE_PHI)
1255 gcc_assert (!in_lhs && !set);
1257 val = PHI_RESULT (stmt);
1258 bsi = gsi_after_labels (gimple_bb (stmt));
1259 psi = gsi_for_stmt (stmt);
1260 remove_phi_node (&psi, false);
1262 /* Turn the phi node into GIMPLE_ASSIGN. */
1263 new_stmt = gimple_build_assign (val, new_tree);
1264 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1268 /* Since the reference is of gimple_reg type, it should only
1269 appear as lhs or rhs of modify statement. */
1270 gcc_assert (is_gimple_assign (stmt));
1272 bsi = gsi_for_stmt (stmt);
1274 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1277 gcc_assert (!in_lhs);
1278 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1279 stmt = gsi_stmt (bsi);
1286 /* We have statement
1290 If OLD is a memory reference, then VAL is gimple_val, and we transform
1296 Otherwise, we are replacing a combination chain,
1297 VAL is the expression that performs the combination, and OLD is an
1298 SSA name. In this case, we transform the assignment to
1305 val = gimple_assign_lhs (stmt);
1306 if (TREE_CODE (val) != SSA_NAME)
1308 gcc_assert (gimple_assign_copy_p (stmt));
1309 val = gimple_assign_rhs1 (stmt);
1321 val = gimple_assign_lhs (stmt);
1324 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1325 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1328 /* Returns the reference to the address of REF in the ITER-th iteration of
1329 LOOP, or NULL if we fail to determine it (ITER may be negative). We
1330 try to preserve the original shape of the reference (not rewrite it
1331 as an indirect ref to the address), to make tree_could_trap_p in
1332 prepare_initializers_chain return false more often. */
1335 ref_at_iteration (struct loop *loop, tree ref, int iter)
1337 tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
1341 if (handled_component_p (ref))
1343 op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
1347 else if (!INDIRECT_REF_P (ref))
1348 return unshare_expr (ref);
1350 if (INDIRECT_REF_P (ref))
1352 /* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at
1354 ret = copy_node (ref);
1355 idx = TREE_OPERAND (ref, 0);
1356 idx_p = &TREE_OPERAND (ret, 0);
1358 else if (TREE_CODE (ref) == COMPONENT_REF)
1360 /* Check that the offset is loop invariant. */
1361 if (TREE_OPERAND (ref, 2)
1362 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1365 return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
1366 unshare_expr (TREE_OPERAND (ref, 1)),
1367 unshare_expr (TREE_OPERAND (ref, 2)));
1369 else if (TREE_CODE (ref) == ARRAY_REF)
1371 /* Check that the lower bound and the step are loop invariant. */
1372 if (TREE_OPERAND (ref, 2)
1373 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1375 if (TREE_OPERAND (ref, 3)
1376 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
1379 ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
1380 unshare_expr (TREE_OPERAND (ref, 2)),
1381 unshare_expr (TREE_OPERAND (ref, 3)));
1382 idx = TREE_OPERAND (ref, 1);
1383 idx_p = &TREE_OPERAND (ret, 1);
1388 ok = simple_iv (loop, loop, idx, &iv, true);
1391 iv.base = expand_simple_operations (iv.base);
1392 if (integer_zerop (iv.step))
1393 *idx_p = unshare_expr (iv.base);
1396 type = TREE_TYPE (iv.base);
1397 if (POINTER_TYPE_P (type))
1399 val = fold_build2 (MULT_EXPR, sizetype, iv.step,
1401 val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
1405 val = fold_build2 (MULT_EXPR, type, iv.step,
1406 build_int_cst_type (type, iter));
1407 val = fold_build2 (PLUS_EXPR, type, iv.base, val);
1409 *idx_p = unshare_expr (val);
1415 /* Get the initialization expression for the INDEX-th temporary variable
1419 get_init_expr (chain_p chain, unsigned index)
1421 if (chain->type == CT_COMBINATION)
1423 tree e1 = get_init_expr (chain->ch1, index);
1424 tree e2 = get_init_expr (chain->ch2, index);
1426 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1429 return VEC_index (tree, chain->inits, index);
1432 /* Marks all virtual operands of statement STMT for renaming. */
1435 mark_virtual_ops_for_renaming (gimple stmt)
1439 if (gimple_code (stmt) == GIMPLE_PHI)
1441 var = PHI_RESULT (stmt);
1442 if (is_gimple_reg (var))
1445 if (TREE_CODE (var) == SSA_NAME)
1446 var = SSA_NAME_VAR (var);
1447 mark_sym_for_renaming (var);
1452 if (gimple_vuse (stmt))
1453 mark_sym_for_renaming (gimple_vop (cfun));
1456 /* Returns a new temporary variable used for the I-th variable carrying
1457 value of REF. The variable's uid is marked in TMP_VARS. */
1460 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1462 tree type = TREE_TYPE (ref);
1463 tree var = create_tmp_var (type, get_lsm_tmp_name (ref, i));
1465 /* We never access the components of the temporary variable in predictive
1467 if (TREE_CODE (type) == COMPLEX_TYPE
1468 || TREE_CODE (type) == VECTOR_TYPE)
1469 DECL_GIMPLE_REG_P (var) = 1;
1471 add_referenced_var (var);
1472 bitmap_set_bit (tmp_vars, DECL_UID (var));
1476 /* Creates the variables for CHAIN, as well as phi nodes for them and
1477 initialization on entry to LOOP. Uids of the newly created
1478 temporary variables are marked in TMP_VARS. */
1481 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1484 unsigned n = chain->length;
1485 dref root = get_chain_root (chain);
1486 bool reuse_first = !chain->has_max_use_after;
1487 tree ref, init, var, next;
1490 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1492 /* If N == 0, then all the references are within the single iteration. And
1493 since this is an nonempty chain, reuse_first cannot be true. */
1494 gcc_assert (n > 0 || !reuse_first);
1496 chain->vars = VEC_alloc (tree, heap, n + 1);
1498 if (chain->type == CT_COMBINATION)
1499 ref = gimple_assign_lhs (root->stmt);
1501 ref = DR_REF (root->ref);
1503 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1505 var = predcom_tmp_var (ref, i, tmp_vars);
1506 VEC_quick_push (tree, chain->vars, var);
1509 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
1511 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
1512 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL));
1514 for (i = 0; i < n; i++)
1516 var = VEC_index (tree, chain->vars, i);
1517 next = VEC_index (tree, chain->vars, i + 1);
1518 init = get_init_expr (chain, i);
1520 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1522 gsi_insert_seq_on_edge_immediate (entry, stmts);
1524 phi = create_phi_node (var, loop->header);
1525 SSA_NAME_DEF_STMT (var) = phi;
1526 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1527 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1531 /* Create the variables and initialization statement for root of chain
1532 CHAIN. Uids of the newly created temporary variables are marked
1536 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1538 dref root = get_chain_root (chain);
1539 bool in_lhs = (chain->type == CT_STORE_LOAD
1540 || chain->type == CT_COMBINATION);
1542 initialize_root_vars (loop, chain, tmp_vars);
1543 replace_ref_with (root->stmt,
1544 VEC_index (tree, chain->vars, chain->length),
1548 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1549 initialization on entry to LOOP if necessary. The ssa name for the variable
1550 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1551 around the loop is created. Uid of the newly created temporary variable
1552 is marked in TMP_VARS. INITS is the list containing the (single)
1556 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1557 VEC(tree, heap) **vars, VEC(tree, heap) *inits,
1561 tree ref = DR_REF (root->ref), init, var, next;
1564 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1566 /* Find the initializer for the variable, and check that it cannot
1568 init = VEC_index (tree, inits, 0);
1570 *vars = VEC_alloc (tree, heap, written ? 2 : 1);
1571 var = predcom_tmp_var (ref, 0, tmp_vars);
1572 VEC_quick_push (tree, *vars, var);
1574 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
1576 for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
1577 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL));
1579 var = VEC_index (tree, *vars, 0);
1581 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1583 gsi_insert_seq_on_edge_immediate (entry, stmts);
1587 next = VEC_index (tree, *vars, 1);
1588 phi = create_phi_node (var, loop->header);
1589 SSA_NAME_DEF_STMT (var) = phi;
1590 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1591 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1595 gimple init_stmt = gimple_build_assign (var, init);
1596 mark_virtual_ops_for_renaming (init_stmt);
1597 gsi_insert_on_edge_immediate (entry, init_stmt);
1602 /* Execute load motion for references in chain CHAIN. Uids of the newly
1603 created temporary variables are marked in TMP_VARS. */
1606 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1608 VEC (tree, heap) *vars;
1610 unsigned n_writes = 0, ridx, i;
1613 gcc_assert (chain->type == CT_INVARIANT);
1614 gcc_assert (!chain->combined);
1615 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1616 if (!DR_IS_READ (a->ref))
1619 /* If there are no reads in the loop, there is nothing to do. */
1620 if (n_writes == VEC_length (dref, chain->refs))
1623 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1624 &vars, chain->inits, tmp_vars);
1627 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
1629 bool is_read = DR_IS_READ (a->ref);
1630 mark_virtual_ops_for_renaming (a->stmt);
1632 if (!DR_IS_READ (a->ref))
1637 var = VEC_index (tree, vars, 0);
1638 var = make_ssa_name (SSA_NAME_VAR (var), NULL);
1639 VEC_replace (tree, vars, 0, var);
1645 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
1646 !is_read, !is_read);
1649 VEC_free (tree, heap, vars);
1652 /* Returns the single statement in that NAME is used, excepting
1653 the looparound phi nodes contained in one of the chains. If there is no
1654 such statement, or more statements, NULL is returned. */
1657 single_nonlooparound_use (tree name)
1660 imm_use_iterator it;
1661 gimple stmt, ret = NULL;
1663 FOR_EACH_IMM_USE_FAST (use, it, name)
1665 stmt = USE_STMT (use);
1667 if (gimple_code (stmt) == GIMPLE_PHI)
1669 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1670 could not be processed anyway, so just fail for them. */
1671 if (bitmap_bit_p (looparound_phis,
1672 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1677 else if (ret != NULL)
1686 /* Remove statement STMT, as well as the chain of assignments in that it is
1690 remove_stmt (gimple stmt)
1694 gimple_stmt_iterator psi;
1696 if (gimple_code (stmt) == GIMPLE_PHI)
1698 name = PHI_RESULT (stmt);
1699 next = single_nonlooparound_use (name);
1700 psi = gsi_for_stmt (stmt);
1701 remove_phi_node (&psi, true);
1704 || !gimple_assign_ssa_name_copy_p (next)
1705 || gimple_assign_rhs1 (next) != name)
1713 gimple_stmt_iterator bsi;
1715 bsi = gsi_for_stmt (stmt);
1717 name = gimple_assign_lhs (stmt);
1718 gcc_assert (TREE_CODE (name) == SSA_NAME);
1720 next = single_nonlooparound_use (name);
1722 mark_virtual_ops_for_renaming (stmt);
1723 gsi_remove (&bsi, true);
1724 release_defs (stmt);
1727 || !gimple_assign_ssa_name_copy_p (next)
1728 || gimple_assign_rhs1 (next) != name)
1735 /* Perform the predictive commoning optimization for a chain CHAIN.
1736 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1739 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1746 if (chain->combined)
1748 /* For combined chains, just remove the statements that are used to
1749 compute the values of the expression (except for the root one). */
1750 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1751 remove_stmt (a->stmt);
1755 /* For non-combined chains, set up the variables that hold its value,
1756 and replace the uses of the original references by these
1758 root = get_chain_root (chain);
1759 mark_virtual_ops_for_renaming (root->stmt);
1761 initialize_root (loop, chain, tmp_vars);
1762 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
1764 mark_virtual_ops_for_renaming (a->stmt);
1765 var = VEC_index (tree, chain->vars, chain->length - a->distance);
1766 replace_ref_with (a->stmt, var, false, false);
1771 /* Determines the unroll factor necessary to remove as many temporary variable
1772 copies as possible. CHAINS is the list of chains that will be
1776 determine_unroll_factor (VEC (chain_p, heap) *chains)
1779 unsigned factor = 1, af, nfactor, i;
1780 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1782 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1784 if (chain->type == CT_INVARIANT || chain->combined)
1787 /* The best unroll factor for this chain is equal to the number of
1788 temporary variables that we create for it. */
1790 if (chain->has_max_use_after)
1793 nfactor = factor * af / gcd (factor, af);
1801 /* Perform the predictive commoning optimization for CHAINS.
1802 Uids of the newly created temporary variables are marked in TMP_VARS. */
1805 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
1811 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1813 if (chain->type == CT_INVARIANT)
1814 execute_load_motion (loop, chain, tmp_vars);
1816 execute_pred_commoning_chain (loop, chain, tmp_vars);
1819 update_ssa (TODO_update_ssa_only_virtuals);
1822 /* For each reference in CHAINS, if its defining statement is
1823 phi node, record the ssa name that is defined by it. */
1826 replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
1832 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1833 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1835 if (gimple_code (a->stmt) == GIMPLE_PHI)
1837 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1843 /* For each reference in CHAINS, if name_defined_by_phi is not
1844 NULL, use it to set the stmt field. */
1847 replace_names_by_phis (VEC (chain_p, heap) *chains)
1853 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
1854 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
1855 if (a->stmt == NULL)
1857 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1858 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1859 a->name_defined_by_phi = NULL_TREE;
1863 /* Wrapper over execute_pred_commoning, to pass it as a callback
1864 to tree_transform_and_unroll_loop. */
1868 VEC (chain_p, heap) *chains;
1873 execute_pred_commoning_cbck (struct loop *loop, void *data)
1875 struct epcc_data *const dta = (struct epcc_data *) data;
1877 /* Restore phi nodes that were replaced by ssa names before
1878 tree_transform_and_unroll_loop (see detailed description in
1879 tree_predictive_commoning_loop). */
1880 replace_names_by_phis (dta->chains);
1881 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1884 /* Base NAME and all the names in the chain of phi nodes that use it
1885 on variable VAR. The phi nodes are recognized by being in the copies of
1886 the header of the LOOP. */
1889 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1892 imm_use_iterator iter;
1894 SSA_NAME_VAR (name) = var;
1899 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1901 if (gimple_code (stmt) == GIMPLE_PHI
1902 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1905 BREAK_FROM_IMM_USE_STMT (iter);
1911 name = PHI_RESULT (phi);
1912 SSA_NAME_VAR (name) = var;
1916 /* Given an unrolled LOOP after predictive commoning, remove the
1917 register copies arising from phi nodes by changing the base
1918 variables of SSA names. TMP_VARS is the set of the temporary variables
1919 for those we want to perform this. */
1922 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1926 tree name, use, var;
1927 gimple_stmt_iterator psi;
1929 e = loop_latch_edge (loop);
1930 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1932 phi = gsi_stmt (psi);
1933 name = PHI_RESULT (phi);
1934 var = SSA_NAME_VAR (name);
1935 if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
1937 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1938 gcc_assert (TREE_CODE (use) == SSA_NAME);
1940 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1941 stmt = SSA_NAME_DEF_STMT (use);
1942 while (gimple_code (stmt) == GIMPLE_PHI
1943 /* In case we could not unroll the loop enough to eliminate
1944 all copies, we may reach the loop header before the defining
1945 statement (in that case, some register copies will be present
1946 in loop latch in the final code, corresponding to the newly
1947 created looparound phi nodes). */
1948 && gimple_bb (stmt) != loop->header)
1950 gcc_assert (single_pred_p (gimple_bb (stmt)));
1951 use = PHI_ARG_DEF (stmt, 0);
1952 stmt = SSA_NAME_DEF_STMT (use);
1955 base_names_in_chain_on (loop, use, var);
1959 /* Returns true if CHAIN is suitable to be combined. */
1962 chain_can_be_combined_p (chain_p chain)
1964 return (!chain->combined
1965 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1968 /* Returns the modify statement that uses NAME. Skips over assignment
1969 statements, NAME is replaced with the actual name used in the returned
1973 find_use_stmt (tree *name)
1978 /* Skip over assignments. */
1981 stmt = single_nonlooparound_use (*name);
1985 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1988 lhs = gimple_assign_lhs (stmt);
1989 if (TREE_CODE (lhs) != SSA_NAME)
1992 if (gimple_assign_copy_p (stmt))
1994 rhs = gimple_assign_rhs1 (stmt);
2000 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2001 == GIMPLE_BINARY_RHS)
2008 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2011 may_reassociate_p (tree type, enum tree_code code)
2013 if (FLOAT_TYPE_P (type)
2014 && !flag_unsafe_math_optimizations)
2017 return (commutative_tree_code (code)
2018 && associative_tree_code (code));
2021 /* If the operation used in STMT is associative and commutative, go through the
2022 tree of the same operations and returns its root. Distance to the root
2023 is stored in DISTANCE. */
2026 find_associative_operation_root (gimple stmt, unsigned *distance)
2030 enum tree_code code = gimple_assign_rhs_code (stmt);
2031 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2034 if (!may_reassociate_p (type, code))
2039 lhs = gimple_assign_lhs (stmt);
2040 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2042 next = find_use_stmt (&lhs);
2044 || gimple_assign_rhs_code (next) != code)
2056 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2057 is no such statement, returns NULL_TREE. In case the operation used on
2058 NAME1 and NAME2 is associative and commutative, returns the root of the
2059 tree formed by this operation instead of the statement that uses NAME1 or
2063 find_common_use_stmt (tree *name1, tree *name2)
2065 gimple stmt1, stmt2;
2067 stmt1 = find_use_stmt (name1);
2071 stmt2 = find_use_stmt (name2);
2078 stmt1 = find_associative_operation_root (stmt1, NULL);
2081 stmt2 = find_associative_operation_root (stmt2, NULL);
2085 return (stmt1 == stmt2 ? stmt1 : NULL);
2088 /* Checks whether R1 and R2 are combined together using CODE, with the result
2089 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2090 if it is true. If CODE is ERROR_MARK, set these values instead. */
2093 combinable_refs_p (dref r1, dref r2,
2094 enum tree_code *code, bool *swap, tree *rslt_type)
2096 enum tree_code acode;
2102 name1 = name_for_ref (r1);
2103 name2 = name_for_ref (r2);
2104 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2106 stmt = find_common_use_stmt (&name1, &name2);
2111 acode = gimple_assign_rhs_code (stmt);
2112 aswap = (!commutative_tree_code (acode)
2113 && gimple_assign_rhs1 (stmt) != name1);
2114 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2116 if (*code == ERROR_MARK)
2124 return (*code == acode
2126 && *rslt_type == atype);
2129 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2130 an assignment of the remaining operand. */
2133 remove_name_from_operation (gimple stmt, tree op)
2136 gimple_stmt_iterator si;
2138 gcc_assert (is_gimple_assign (stmt));
2140 if (gimple_assign_rhs1 (stmt) == op)
2141 other_op = gimple_assign_rhs2 (stmt);
2143 other_op = gimple_assign_rhs1 (stmt);
2145 si = gsi_for_stmt (stmt);
2146 gimple_assign_set_rhs_from_tree (&si, other_op);
2148 /* We should not have reallocated STMT. */
2149 gcc_assert (gsi_stmt (si) == stmt);
2154 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2155 are combined in a single statement, and returns this statement. */
2158 reassociate_to_the_same_stmt (tree name1, tree name2)
2160 gimple stmt1, stmt2, root1, root2, s1, s2;
2161 gimple new_stmt, tmp_stmt;
2162 tree new_name, tmp_name, var, r1, r2;
2163 unsigned dist1, dist2;
2164 enum tree_code code;
2165 tree type = TREE_TYPE (name1);
2166 gimple_stmt_iterator bsi;
2168 stmt1 = find_use_stmt (&name1);
2169 stmt2 = find_use_stmt (&name2);
2170 root1 = find_associative_operation_root (stmt1, &dist1);
2171 root2 = find_associative_operation_root (stmt2, &dist2);
2172 code = gimple_assign_rhs_code (stmt1);
2174 gcc_assert (root1 && root2 && root1 == root2
2175 && code == gimple_assign_rhs_code (stmt2));
2177 /* Find the root of the nearest expression in that both NAME1 and NAME2
2184 while (dist1 > dist2)
2186 s1 = find_use_stmt (&r1);
2187 r1 = gimple_assign_lhs (s1);
2190 while (dist2 > dist1)
2192 s2 = find_use_stmt (&r2);
2193 r2 = gimple_assign_lhs (s2);
2199 s1 = find_use_stmt (&r1);
2200 r1 = gimple_assign_lhs (s1);
2201 s2 = find_use_stmt (&r2);
2202 r2 = gimple_assign_lhs (s2);
2205 /* Remove NAME1 and NAME2 from the statements in that they are used
2207 remove_name_from_operation (stmt1, name1);
2208 remove_name_from_operation (stmt2, name2);
2210 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2211 combine it with the rhs of S1. */
2212 var = create_tmp_var (type, "predreastmp");
2213 if (TREE_CODE (type) == COMPLEX_TYPE
2214 || TREE_CODE (type) == VECTOR_TYPE)
2215 DECL_GIMPLE_REG_P (var) = 1;
2216 add_referenced_var (var);
2217 new_name = make_ssa_name (var, NULL);
2218 new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
2220 var = create_tmp_var (type, "predreastmp");
2221 if (TREE_CODE (type) == COMPLEX_TYPE
2222 || TREE_CODE (type) == VECTOR_TYPE)
2223 DECL_GIMPLE_REG_P (var) = 1;
2224 add_referenced_var (var);
2225 tmp_name = make_ssa_name (var, NULL);
2227 /* Rhs of S1 may now be either a binary expression with operation
2228 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2229 so that name1 or name2 was removed from it). */
2230 tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
2232 gimple_assign_rhs1 (s1),
2233 gimple_assign_rhs2 (s1));
2235 bsi = gsi_for_stmt (s1);
2236 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2237 s1 = gsi_stmt (bsi);
2240 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2241 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2246 /* Returns the statement that combines references R1 and R2. In case R1
2247 and R2 are not used in the same statement, but they are used with an
2248 associative and commutative operation in the same expression, reassociate
2249 the expression so that they are used in the same statement. */
2252 stmt_combining_refs (dref r1, dref r2)
2254 gimple stmt1, stmt2;
2255 tree name1 = name_for_ref (r1);
2256 tree name2 = name_for_ref (r2);
2258 stmt1 = find_use_stmt (&name1);
2259 stmt2 = find_use_stmt (&name2);
2263 return reassociate_to_the_same_stmt (name1, name2);
2266 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2267 description of the new chain is returned, otherwise we return NULL. */
2270 combine_chains (chain_p ch1, chain_p ch2)
2273 enum tree_code op = ERROR_MARK;
2278 tree rslt_type = NULL_TREE;
2282 if (ch1->length != ch2->length)
2285 if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
2288 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2289 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2291 if (r1->distance != r2->distance)
2294 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2305 new_chain = XCNEW (struct chain);
2306 new_chain->type = CT_COMBINATION;
2308 new_chain->ch1 = ch1;
2309 new_chain->ch2 = ch2;
2310 new_chain->rslt_type = rslt_type;
2311 new_chain->length = ch1->length;
2313 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
2314 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
2316 nw = XCNEW (struct dref_d);
2317 nw->stmt = stmt_combining_refs (r1, r2);
2318 nw->distance = r1->distance;
2320 VEC_safe_push (dref, heap, new_chain->refs, nw);
2323 new_chain->has_max_use_after = false;
2324 root_stmt = get_chain_root (new_chain)->stmt;
2325 for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
2327 if (nw->distance == new_chain->length
2328 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2330 new_chain->has_max_use_after = true;
2335 ch1->combined = true;
2336 ch2->combined = true;
2340 /* Try to combine the CHAINS. */
2343 try_combine_chains (VEC (chain_p, heap) **chains)
2346 chain_p ch1, ch2, cch;
2347 VEC (chain_p, heap) *worklist = NULL;
2349 for (i = 0; VEC_iterate (chain_p, *chains, i, ch1); i++)
2350 if (chain_can_be_combined_p (ch1))
2351 VEC_safe_push (chain_p, heap, worklist, ch1);
2353 while (!VEC_empty (chain_p, worklist))
2355 ch1 = VEC_pop (chain_p, worklist);
2356 if (!chain_can_be_combined_p (ch1))
2359 for (j = 0; VEC_iterate (chain_p, *chains, j, ch2); j++)
2361 if (!chain_can_be_combined_p (ch2))
2364 cch = combine_chains (ch1, ch2);
2367 VEC_safe_push (chain_p, heap, worklist, cch);
2368 VEC_safe_push (chain_p, heap, *chains, cch);
2375 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2376 impossible because one of these initializers may trap, true otherwise. */
2379 prepare_initializers_chain (struct loop *loop, chain_p chain)
2381 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2382 struct data_reference *dr = get_chain_root (chain)->ref;
2386 edge entry = loop_preheader_edge (loop);
2388 /* Find the initializers for the variables, and check that they cannot
2390 chain->inits = VEC_alloc (tree, heap, n);
2391 for (i = 0; i < n; i++)
2392 VEC_quick_push (tree, chain->inits, NULL_TREE);
2394 /* If we have replaced some looparound phi nodes, use their initializers
2395 instead of creating our own. */
2396 for (i = 0; VEC_iterate (dref, chain->refs, i, laref); i++)
2398 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2401 gcc_assert (laref->distance > 0);
2402 VEC_replace (tree, chain->inits, n - laref->distance,
2403 PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
2406 for (i = 0; i < n; i++)
2408 if (VEC_index (tree, chain->inits, i) != NULL_TREE)
2411 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
2415 if (!chain->all_always_accessed && tree_could_trap_p (init))
2418 init = force_gimple_operand (init, &stmts, false, NULL_TREE);
2420 gsi_insert_seq_on_edge_immediate (entry, stmts);
2422 VEC_replace (tree, chain->inits, i, init);
2428 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2429 be used because the initializers might trap. */
2432 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
2437 for (i = 0; i < VEC_length (chain_p, chains); )
2439 chain = VEC_index (chain_p, chains, i);
2440 if (prepare_initializers_chain (loop, chain))
2444 release_chain (chain);
2445 VEC_unordered_remove (chain_p, chains, i);
2450 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2454 tree_predictive_commoning_loop (struct loop *loop)
2456 VEC (data_reference_p, heap) *datarefs;
2457 VEC (ddr_p, heap) *dependences;
2458 struct component *components;
2459 VEC (chain_p, heap) *chains = NULL;
2460 unsigned unroll_factor;
2461 struct tree_niter_desc desc;
2462 bool unroll = false;
2466 if (dump_file && (dump_flags & TDF_DETAILS))
2467 fprintf (dump_file, "Processing loop %d\n", loop->num);
2469 /* Find the data references and split them into components according to their
2470 dependence relations. */
2471 datarefs = VEC_alloc (data_reference_p, heap, 10);
2472 dependences = VEC_alloc (ddr_p, heap, 10);
2473 compute_data_dependences_for_loop (loop, true, &datarefs, &dependences);
2474 if (dump_file && (dump_flags & TDF_DETAILS))
2475 dump_data_dependence_relations (dump_file, dependences);
2477 components = split_data_refs_to_components (loop, datarefs, dependences);
2478 free_dependence_relations (dependences);
2481 free_data_refs (datarefs);
2485 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "Initial state:\n\n");
2488 dump_components (dump_file, components);
2491 /* Find the suitable components and split them into chains. */
2492 components = filter_suitable_components (loop, components);
2494 tmp_vars = BITMAP_ALLOC (NULL);
2495 looparound_phis = BITMAP_ALLOC (NULL);
2496 determine_roots (loop, components, &chains);
2497 release_components (components);
2501 if (dump_file && (dump_flags & TDF_DETAILS))
2503 "Predictive commoning failed: no suitable chains\n");
2506 prepare_initializers (loop, chains);
2508 /* Try to combine the chains that are always worked with together. */
2509 try_combine_chains (&chains);
2511 if (dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file, "Before commoning:\n\n");
2514 dump_chains (dump_file, chains);
2517 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2518 that its number of iterations is divisible by the factor. */
2519 unroll_factor = determine_unroll_factor (chains);
2521 unroll = (unroll_factor > 1
2522 && can_unroll_loop_p (loop, unroll_factor, &desc));
2523 exit = single_dom_exit (loop);
2525 /* Execute the predictive commoning transformations, and possibly unroll the
2529 struct epcc_data dta;
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2532 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2534 dta.chains = chains;
2535 dta.tmp_vars = tmp_vars;
2537 update_ssa (TODO_update_ssa_only_virtuals);
2539 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2540 execute_pred_commoning_cbck is called may cause phi nodes to be
2541 reallocated, which is a problem since CHAINS may point to these
2542 statements. To fix this, we store the ssa names defined by the
2543 phi nodes here instead of the phi nodes themselves, and restore
2544 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2545 replace_phis_by_defined_names (chains);
2547 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2548 execute_pred_commoning_cbck, &dta);
2549 eliminate_temp_copies (loop, tmp_vars);
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2555 "Executing predictive commoning without unrolling.\n");
2556 execute_pred_commoning (loop, chains, tmp_vars);
2560 release_chains (chains);
2561 free_data_refs (datarefs);
2562 BITMAP_FREE (tmp_vars);
2563 BITMAP_FREE (looparound_phis);
2565 free_affine_expand_cache (&name_expansions);
2570 /* Runs predictive commoning. */
2573 tree_predictive_commoning (void)
2575 bool unrolled = false;
2580 initialize_original_copy_tables ();
2581 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
2582 if (optimize_loop_for_speed_p (loop))
2584 unrolled |= tree_predictive_commoning_loop (loop);
2590 ret = TODO_cleanup_cfg;
2592 free_original_copy_tables ();