1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "cfglayout.h"
34 static void duplicate_subloops (struct loop *, struct loop *);
35 static void copy_loops_to (struct loop **, int,
37 static void loop_redirect_edge (edge, basic_block);
38 static bool loop_delete_branch_edge (edge, int);
39 static void remove_bbs (basic_block *, int);
40 static bool rpe_enum_p (basic_block, void *);
41 static int find_path (edge, basic_block **);
42 static bool alp_enum_p (basic_block, void *);
43 static void fix_loop_placements (struct loop *, bool *);
44 static bool fix_bb_placement (basic_block);
45 static void fix_bb_placements (basic_block, bool *);
46 static void place_new_loop (struct loop *);
47 static basic_block create_preheader (struct loop *, int);
48 static void unloop (struct loop *, bool *);
50 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
52 /* Checks whether basic block BB is dominated by DATA. */
54 rpe_enum_p (basic_block bb, void *data)
56 return dominated_by_p (CDI_DOMINATORS, bb, data);
59 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
62 remove_bbs (basic_block *bbs, int nbbs)
66 for (i = 0; i < nbbs; i++)
67 delete_basic_block (bbs[i]);
70 /* Find path -- i.e. the basic blocks dominated by edge E and put them
71 into array BBS, that will be allocated large enough to contain them.
72 E->dest must have exactly one predecessor for this to work (it is
73 easy to achieve and we do not put it here because we do not want to
74 alter anything by this function). The number of basic blocks in the
77 find_path (edge e, basic_block **bbs)
79 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
81 /* Find bbs in the path. */
82 *bbs = XCNEWVEC (basic_block, n_basic_blocks);
83 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
84 n_basic_blocks, e->dest);
87 /* Fix placement of basic block BB inside loop hierarchy --
88 Let L be a loop to that BB belongs. Then every successor of BB must either
89 1) belong to some superloop of loop L, or
90 2) be a header of loop K such that K->outer is superloop of L
91 Returns true if we had to move BB into other loop to enforce this condition,
92 false if the placement of BB was already correct (provided that placements
93 of its successors are correct). */
95 fix_bb_placement (basic_block bb)
99 struct loop *loop = current_loops->tree_root, *act;
101 FOR_EACH_EDGE (e, ei, bb->succs)
103 if (e->dest == EXIT_BLOCK_PTR)
106 act = e->dest->loop_father;
107 if (act->header == e->dest)
110 if (flow_loop_nested_p (loop, act))
114 if (loop == bb->loop_father)
117 remove_bb_from_loops (bb);
118 add_bb_to_loop (bb, loop);
123 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
124 enforce condition condition stated in description of fix_bb_placement. We
125 start from basic block FROM that had some of its successors removed, so that
126 his placement no longer has to be correct, and iteratively fix placement of
127 its predecessors that may change if placement of FROM changed. Also fix
128 placement of subloops of FROM->loop_father, that might also be altered due
129 to this change; the condition for them is similar, except that instead of
130 successors we consider edges coming out of the loops.
132 If the changes may invalidate the information about irreducible regions,
133 IRRED_INVALIDATED is set to true. */
136 fix_bb_placements (basic_block from,
137 bool *irred_invalidated)
140 basic_block *queue, *qtop, *qbeg, *qend;
141 struct loop *base_loop;
144 /* We pass through blocks back-reachable from FROM, testing whether some
145 of their successors moved to outer loop. It may be necessary to
146 iterate several times, but it is finite, as we stop unless we move
147 the basic block up the loop structure. The whole story is a bit
148 more complicated due to presence of subloops, those are moved using
149 fix_loop_placement. */
151 base_loop = from->loop_father;
152 if (base_loop == current_loops->tree_root)
155 in_queue = sbitmap_alloc (last_basic_block);
156 sbitmap_zero (in_queue);
157 SET_BIT (in_queue, from->index);
158 /* Prevent us from going out of the base_loop. */
159 SET_BIT (in_queue, base_loop->header->index);
161 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
162 qtop = queue + base_loop->num_nodes + 1;
174 RESET_BIT (in_queue, from->index);
176 if (from->loop_father->header == from)
178 /* Subloop header, maybe move the loop upward. */
179 if (!fix_loop_placement (from->loop_father))
184 /* Ordinary basic block. */
185 if (!fix_bb_placement (from))
189 FOR_EACH_EDGE (e, ei, from->succs)
191 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
192 *irred_invalidated = true;
195 /* Something has changed, insert predecessors into queue. */
196 FOR_EACH_EDGE (e, ei, from->preds)
198 basic_block pred = e->src;
201 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
202 *irred_invalidated = true;
204 if (TEST_BIT (in_queue, pred->index))
207 /* If it is subloop, then it either was not moved, or
208 the path up the loop tree from base_loop do not contain
210 nca = find_common_loop (pred->loop_father, base_loop);
211 if (pred->loop_father != base_loop
213 || nca != pred->loop_father))
214 pred = pred->loop_father->header;
215 else if (!flow_loop_nested_p (from->loop_father, pred->loop_father))
217 /* No point in processing it. */
221 if (TEST_BIT (in_queue, pred->index))
224 /* Schedule the basic block. */
229 SET_BIT (in_queue, pred->index);
236 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
237 and update loop structures and dominators. Return true if we were able
238 to remove the path, false otherwise (and nothing is affected then). */
243 basic_block *rem_bbs, *bord_bbs, *dom_bbs, from, bb;
244 int i, nrem, n_bord_bbs, n_dom_bbs, nreml;
246 bool deleted, irred_invalidated = false;
247 struct loop **deleted_loop;
249 if (!loop_delete_branch_edge (e, 0))
252 /* Keep track of whether we need to update information about irreducible
253 regions. This is the case if the removed area is a part of the
254 irreducible region, or if the set of basic blocks that belong to a loop
255 that is inside an irreducible region is changed, or if such a loop is
257 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
258 irred_invalidated = true;
260 /* We need to check whether basic blocks are dominated by the edge
261 e, but we only have basic block dominators. This is easy to
262 fix -- when e->dest has exactly one predecessor, this corresponds
263 to blocks dominated by e->dest, if not, split the edge. */
264 if (!single_pred_p (e->dest))
265 e = single_pred_edge (split_edge (e));
267 /* It may happen that by removing path we remove one or more loops
268 we belong to. In this case first unloop the loops, then proceed
269 normally. We may assume that e->dest is not a header of any loop,
270 as it now has exactly one predecessor. */
271 while (e->src->loop_father->outer
272 && dominated_by_p (CDI_DOMINATORS,
273 e->src->loop_father->latch, e->dest))
274 unloop (e->src->loop_father, &irred_invalidated);
276 /* Identify the path. */
277 nrem = find_path (e, &rem_bbs);
280 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks);
281 seen = sbitmap_alloc (last_basic_block);
284 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
285 for (i = 0; i < nrem; i++)
286 SET_BIT (seen, rem_bbs[i]->index);
287 for (i = 0; i < nrem; i++)
291 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
292 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
294 SET_BIT (seen, ae->dest->index);
295 bord_bbs[n_bord_bbs++] = ae->dest;
297 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
298 irred_invalidated = true;
302 /* Remove the path. */
304 deleted = loop_delete_branch_edge (e, 1);
305 gcc_assert (deleted);
306 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
308 /* Cancel loops contained in the path. */
309 deleted_loop = XNEWVEC (struct loop *, nrem);
311 for (i = 0; i < nrem; i++)
312 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
313 deleted_loop[nreml++] = rem_bbs[i]->loop_father;
315 remove_bbs (rem_bbs, nrem);
318 for (i = 0; i < nreml; i++)
319 cancel_loop_tree (deleted_loop[i]);
322 /* Find blocks whose dominators may be affected. */
325 for (i = 0; i < n_bord_bbs; i++)
329 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
330 if (TEST_BIT (seen, bb->index))
332 SET_BIT (seen, bb->index);
334 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
336 ldom = next_dom_son (CDI_DOMINATORS, ldom))
337 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
338 dom_bbs[n_dom_bbs++] = ldom;
343 /* Recount dominators. */
344 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
348 /* Fix placements of basic blocks inside loops and the placement of
349 loops in the loop tree. */
350 fix_bb_placements (from, &irred_invalidated);
351 fix_loop_placements (from->loop_father, &irred_invalidated);
353 if (irred_invalidated
354 && (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) != 0)
355 mark_irreducible_loops ();
360 /* Predicate for enumeration in add_loop. */
362 alp_enum_p (basic_block bb, void *alp_header)
364 return bb != (basic_block) alp_header;
367 /* Given LOOP structure with filled header and latch, find the body of the
368 corresponding loop and add it to loops tree. Insert the LOOP as a son of
372 add_loop (struct loop *loop, struct loop *outer)
377 /* Add it to loop structure. */
378 place_new_loop (loop);
379 flow_loop_tree_node_add (outer, loop);
381 /* Find its nodes. */
382 bbs = XCNEWVEC (basic_block, n_basic_blocks);
383 n = dfs_enumerate_from (loop->latch, 1, alp_enum_p,
384 bbs, n_basic_blocks, loop->header);
386 for (i = 0; i < n; i++)
388 remove_bb_from_loops (bbs[i]);
389 add_bb_to_loop (bbs[i], loop);
391 remove_bb_from_loops (loop->header);
392 add_bb_to_loop (loop->header, loop);
397 /* Multiply all frequencies in LOOP by NUM/DEN. */
399 scale_loop_frequencies (struct loop *loop, int num, int den)
403 bbs = get_loop_body (loop);
404 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
408 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
409 latch to header and update loop tree and dominators
410 accordingly. Everything between them plus LATCH_EDGE destination must
411 be dominated by HEADER_EDGE destination, and back-reachable from
412 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
413 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
414 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
415 Returns the newly created loop. Frequencies and counts in the new loop
416 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
419 loopify (edge latch_edge, edge header_edge,
420 basic_block switch_bb, edge true_edge, edge false_edge,
421 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
423 basic_block succ_bb = latch_edge->dest;
424 basic_block pred_bb = header_edge->src;
425 basic_block *dom_bbs, *body;
426 unsigned n_dom_bbs, i;
428 struct loop *loop = alloc_loop ();
429 struct loop *outer = succ_bb->loop_father->outer;
435 loop->header = header_edge->dest;
436 loop->latch = latch_edge->src;
438 freq = EDGE_FREQUENCY (header_edge);
439 cnt = header_edge->count;
441 /* Redirect edges. */
442 loop_redirect_edge (latch_edge, loop->header);
443 loop_redirect_edge (true_edge, succ_bb);
445 /* During loop versioning, one of the switch_bb edge is already properly
446 set. Do not redirect it again unless redirect_all_edges is true. */
447 if (redirect_all_edges)
449 loop_redirect_edge (header_edge, switch_bb);
450 loop_redirect_edge (false_edge, loop->header);
452 /* Update dominators. */
453 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
454 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
457 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
459 /* Compute new loop. */
460 add_loop (loop, outer);
462 /* Add switch_bb to appropriate loop. */
463 if (switch_bb->loop_father)
464 remove_bb_from_loops (switch_bb);
465 add_bb_to_loop (switch_bb, outer);
467 /* Fix frequencies. */
468 if (redirect_all_edges)
470 switch_bb->frequency = freq;
471 switch_bb->count = cnt;
472 FOR_EACH_EDGE (e, ei, switch_bb->succs)
474 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
477 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
478 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
480 /* Update dominators of blocks outside of LOOP. */
481 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
483 seen = sbitmap_alloc (last_basic_block);
485 body = get_loop_body (loop);
487 for (i = 0; i < loop->num_nodes; i++)
488 SET_BIT (seen, body[i]->index);
490 for (i = 0; i < loop->num_nodes; i++)
494 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
496 ldom = next_dom_son (CDI_DOMINATORS, ldom))
497 if (!TEST_BIT (seen, ldom->index))
499 SET_BIT (seen, ldom->index);
500 dom_bbs[n_dom_bbs++] = ldom;
504 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
513 /* Remove the latch edge of a LOOP and update loops to indicate that
514 the LOOP was removed. After this function, original loop latch will
515 have no successor, which caller is expected to fix somehow.
517 If this may cause the information about irreducible regions to become
518 invalid, IRRED_INVALIDATED is set to true. */
521 unloop (struct loop *loop, bool *irred_invalidated)
526 basic_block latch = loop->latch;
529 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
530 *irred_invalidated = true;
532 /* This is relatively straightforward. The dominators are unchanged, as
533 loop header dominates loop latch, so the only thing we have to care of
534 is the placement of loops and basic blocks inside the loop tree. We
535 move them all to the loop->outer, and then let fix_bb_placements do
538 body = get_loop_body (loop);
540 for (i = 0; i < n; i++)
541 if (body[i]->loop_father == loop)
543 remove_bb_from_loops (body[i]);
544 add_bb_to_loop (body[i], loop->outer);
551 flow_loop_tree_node_remove (ploop);
552 flow_loop_tree_node_add (loop->outer, ploop);
555 /* Remove the loop and free its data. */
558 remove_edge (single_succ_edge (latch));
560 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
561 there is an irreducible region inside the cancelled loop, the flags will
563 fix_bb_placements (latch, &dummy);
566 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
567 FATHER of LOOP such that all of the edges coming out of LOOP belong to
568 FATHER, and set it as outer loop of LOOP. Return true if placement of
572 fix_loop_placement (struct loop *loop)
578 struct loop *father = loop->pred[0], *act;
580 body = get_loop_body (loop);
581 for (i = 0; i < loop->num_nodes; i++)
582 FOR_EACH_EDGE (e, ei, body[i]->succs)
583 if (!flow_bb_inside_loop_p (loop, e->dest))
585 act = find_common_loop (loop, e->dest->loop_father);
586 if (flow_loop_nested_p (father, act))
591 if (father != loop->outer)
593 for (act = loop->outer; act != father; act = act->outer)
594 act->num_nodes -= loop->num_nodes;
595 flow_loop_tree_node_remove (loop);
596 flow_loop_tree_node_add (father, loop);
602 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
603 condition stated in description of fix_loop_placement holds for them.
604 It is used in case when we removed some edges coming out of LOOP, which
605 may cause the right placement of LOOP inside loop tree to change.
607 IRRED_INVALIDATED is set to true if a change in the loop structures might
608 invalidate the information about irreducible regions. */
611 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
618 if (!fix_loop_placement (loop))
621 /* Changing the placement of a loop in the loop tree may alter the
622 validity of condition 2) of the description of fix_bb_placement
623 for its preheader, because the successor is the header and belongs
624 to the loop. So call fix_bb_placements to fix up the placement
625 of the preheader and (possibly) of its predecessors. */
626 fix_bb_placements (loop_preheader_edge (loop)->src,
632 /* Creates place for a new LOOP in loops structure. */
634 place_new_loop (struct loop *loop)
636 loop->num = number_of_loops ();
637 VEC_safe_push (loop_p, heap, current_loops->larray, loop);
640 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
641 created loop into loops structure. */
643 duplicate_loop (struct loop *loop, struct loop *target)
646 cloop = alloc_loop ();
647 place_new_loop (cloop);
649 /* Mark the new loop as copy of LOOP. */
652 /* Add it to target. */
653 flow_loop_tree_node_add (target, cloop);
658 /* Copies structure of subloops of LOOP into TARGET loop, placing
659 newly created loops into loop tree. */
661 duplicate_subloops (struct loop *loop, struct loop *target)
663 struct loop *aloop, *cloop;
665 for (aloop = loop->inner; aloop; aloop = aloop->next)
667 cloop = duplicate_loop (aloop, target);
668 duplicate_subloops (aloop, cloop);
672 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
673 into TARGET loop, placing newly created loops into loop tree. */
675 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
680 for (i = 0; i < n; i++)
682 aloop = duplicate_loop (copied_loops[i], target);
683 duplicate_subloops (copied_loops[i], aloop);
687 /* Redirects edge E to basic block DEST. */
689 loop_redirect_edge (edge e, basic_block dest)
694 redirect_edge_and_branch_force (e, dest);
697 /* Deletes edge E from a branch if possible. Unless REALLY_DELETE is set,
698 just test whether it is possible to remove the edge. */
700 loop_delete_branch_edge (edge e, int really_delete)
702 basic_block src = e->src;
707 gcc_assert (EDGE_COUNT (src->succs) > 1);
709 /* Cannot handle more than two exit edges. */
710 if (EDGE_COUNT (src->succs) > 2)
712 /* And it must be just a simple branch. */
713 if (!any_condjump_p (BB_END (src)))
716 snd = e == EDGE_SUCC (src, 0) ? EDGE_SUCC (src, 1) : EDGE_SUCC (src, 0);
718 if (newdest == EXIT_BLOCK_PTR)
721 /* Hopefully the above conditions should suffice. */
725 /* Redirecting behaves wrongly wrto this flag. */
726 irr = snd->flags & EDGE_IRREDUCIBLE_LOOP;
728 if (!redirect_edge_and_branch (e, newdest))
730 single_succ_edge (src)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
731 single_succ_edge (src)->flags |= irr;
736 /* Check whether LOOP's body can be duplicated. */
738 can_duplicate_loop_p (struct loop *loop)
741 basic_block *bbs = get_loop_body (loop);
743 ret = can_copy_bbs_p (bbs, loop->num_nodes);
749 /* Sets probability and count of edge E to zero. The probability and count
750 is redistributed evenly to the remaining edges coming from E->src. */
753 set_zero_probability (edge e)
755 basic_block bb = e->src;
757 edge ae, last = NULL;
758 unsigned n = EDGE_COUNT (bb->succs);
759 gcov_type cnt = e->count, cnt1;
760 unsigned prob = e->probability, prob1;
763 cnt1 = cnt / (n - 1);
764 prob1 = prob / (n - 1);
766 FOR_EACH_EDGE (ae, ei, bb->succs)
771 ae->probability += prob1;
776 /* Move the rest to one of the edges. */
777 last->probability += prob % (n - 1);
778 last->count += cnt % (n - 1);
784 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
785 loop structure and dominators. E's destination must be LOOP header for
786 this to work, i.e. it must be entry or latch edge of this loop; these are
787 unique, as the loops must have preheaders for this function to work
788 correctly (in case E is latch, the function unrolls the loop, if E is entry
789 edge, it peels the loop). Store edges created by copying ORIG edge from
790 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
791 original LOOP body, the other copies are numbered in order given by control
792 flow through them) into TO_REMOVE array. Returns false if duplication is
796 duplicate_loop_to_header_edge (struct loop *loop, edge e,
797 unsigned int ndupl, sbitmap wont_exit,
798 edge orig, VEC (edge, heap) **to_remove,
801 struct loop *target, *aloop;
802 struct loop **orig_loops;
803 unsigned n_orig_loops;
804 basic_block header = loop->header, latch = loop->latch;
805 basic_block *new_bbs, *bbs, *first_active;
806 basic_block new_bb, bb, first_active_latch = NULL;
808 edge spec_edges[2], new_spec_edges[2];
812 int is_latch = (latch == e->src);
813 int scale_act = 0, *scale_step = NULL, scale_main = 0;
814 int scale_after_exit = 0;
815 int p, freq_in, freq_le, freq_out_orig;
816 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
817 int add_irreducible_flag;
818 basic_block place_after;
819 bitmap bbs_to_scale = NULL;
822 gcc_assert (e->dest == loop->header);
823 gcc_assert (ndupl > 0);
827 /* Orig must be edge out of the loop. */
828 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
829 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
833 bbs = get_loop_body_in_dom_order (loop);
834 gcc_assert (bbs[0] == loop->header);
835 gcc_assert (bbs[n - 1] == loop->latch);
837 /* Check whether duplication is possible. */
838 if (!can_copy_bbs_p (bbs, loop->num_nodes))
843 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
845 /* In case we are doing loop peeling and the loop is in the middle of
846 irreducible region, the peeled copies will be inside it too. */
847 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
848 gcc_assert (!is_latch || !add_irreducible_flag);
850 /* Find edge from latch. */
851 latch_edge = loop_latch_edge (loop);
853 if (flags & DLTHE_FLAG_UPDATE_FREQ)
855 /* Calculate coefficients by that we have to scale frequencies
856 of duplicated loop bodies. */
857 freq_in = header->frequency;
858 freq_le = EDGE_FREQUENCY (latch_edge);
861 if (freq_in < freq_le)
863 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
864 if (freq_out_orig > freq_in - freq_le)
865 freq_out_orig = freq_in - freq_le;
866 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
867 prob_pass_wont_exit =
868 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
871 && REG_BR_PROB_BASE - orig->probability != 0)
873 /* The blocks that are dominated by a removed exit edge ORIG have
874 frequencies scaled by this. */
875 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
876 REG_BR_PROB_BASE - orig->probability);
877 bbs_to_scale = BITMAP_ALLOC (NULL);
878 for (i = 0; i < n; i++)
880 if (bbs[i] != orig->src
881 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
882 bitmap_set_bit (bbs_to_scale, i);
886 scale_step = XNEWVEC (int, ndupl);
888 for (i = 1; i <= ndupl; i++)
889 scale_step[i - 1] = TEST_BIT (wont_exit, i)
890 ? prob_pass_wont_exit
893 /* Complete peeling is special as the probability of exit in last
895 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
897 int wanted_freq = EDGE_FREQUENCY (e);
899 if (wanted_freq > freq_in)
900 wanted_freq = freq_in;
902 gcc_assert (!is_latch);
903 /* First copy has frequency of incoming edge. Each subsequent
904 frequency should be reduced by prob_pass_wont_exit. Caller
905 should've managed the flags so all except for original loop
906 has won't exist set. */
907 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
908 /* Now simulate the duplication adjustments and compute header
909 frequency of the last copy. */
910 for (i = 0; i < ndupl; i++)
911 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
912 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
916 prob_pass_main = TEST_BIT (wont_exit, 0)
917 ? prob_pass_wont_exit
920 scale_main = REG_BR_PROB_BASE;
921 for (i = 0; i < ndupl; i++)
924 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
926 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
927 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
931 scale_main = REG_BR_PROB_BASE;
932 for (i = 0; i < ndupl; i++)
933 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
934 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
936 for (i = 0; i < ndupl; i++)
937 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
938 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
939 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
942 /* Loop the new bbs will belong to. */
943 target = e->src->loop_father;
945 /* Original loops. */
947 for (aloop = loop->inner; aloop; aloop = aloop->next)
949 orig_loops = XCNEWVEC (struct loop *, n_orig_loops);
950 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
951 orig_loops[i] = aloop;
955 first_active = XNEWVEC (basic_block, n);
958 memcpy (first_active, bbs, n * sizeof (basic_block));
959 first_active_latch = latch;
962 spec_edges[SE_ORIG] = orig;
963 spec_edges[SE_LATCH] = latch_edge;
965 place_after = e->src;
966 for (j = 0; j < ndupl; j++)
969 copy_loops_to (orig_loops, n_orig_loops, target);
972 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
974 place_after = new_spec_edges[SE_LATCH]->src;
976 if (flags & DLTHE_RECORD_COPY_NUMBER)
977 for (i = 0; i < n; i++)
979 gcc_assert (!new_bbs[i]->aux);
980 new_bbs[i]->aux = (void *)(size_t)(j + 1);
983 /* Note whether the blocks and edges belong to an irreducible loop. */
984 if (add_irreducible_flag)
986 for (i = 0; i < n; i++)
987 new_bbs[i]->flags |= BB_DUPLICATED;
988 for (i = 0; i < n; i++)
992 if (new_bb->loop_father == target)
993 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
995 FOR_EACH_EDGE (ae, ei, new_bb->succs)
996 if ((ae->dest->flags & BB_DUPLICATED)
997 && (ae->src->loop_father == target
998 || ae->dest->loop_father == target))
999 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1001 for (i = 0; i < n; i++)
1002 new_bbs[i]->flags &= ~BB_DUPLICATED;
1005 /* Redirect the special edges. */
1008 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1009 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1011 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1012 latch = loop->latch = new_bbs[n - 1];
1013 e = latch_edge = new_spec_edges[SE_LATCH];
1017 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1019 redirect_edge_and_branch_force (e, new_bbs[0]);
1020 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1021 e = new_spec_edges[SE_LATCH];
1024 /* Record exit edge in this copy. */
1025 if (orig && TEST_BIT (wont_exit, j + 1))
1028 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
1029 set_zero_probability (new_spec_edges[SE_ORIG]);
1031 /* Scale the frequencies of the blocks dominated by the exit. */
1034 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1036 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1042 /* Record the first copy in the control flow order if it is not
1043 the original loop (i.e. in case of peeling). */
1044 if (!first_active_latch)
1046 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1047 first_active_latch = new_bbs[n - 1];
1050 /* Set counts and frequencies. */
1051 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1053 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1054 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1060 /* Record the exit edge in the original loop body, and update the frequencies. */
1061 if (orig && TEST_BIT (wont_exit, 0))
1064 VEC_safe_push (edge, heap, *to_remove, orig);
1065 set_zero_probability (orig);
1067 /* Scale the frequencies of the blocks dominated by the exit. */
1070 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1072 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1078 /* Update the original loop. */
1080 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1081 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1083 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1087 /* Update dominators of outer blocks if affected. */
1088 for (i = 0; i < n; i++)
1090 basic_block dominated, dom_bb, *dom_bbs;
1096 n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
1097 for (j = 0; j < n_dom_bbs; j++)
1099 dominated = dom_bbs[j];
1100 if (flow_bb_inside_loop_p (loop, dominated))
1102 dom_bb = nearest_common_dominator (
1103 CDI_DOMINATORS, first_active[i], first_active_latch);
1104 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1108 free (first_active);
1111 BITMAP_FREE (bbs_to_scale);
1116 /* A callback for make_forwarder block, to redirect all edges except for
1117 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1118 whether to redirect it. */
1120 static edge mfb_kj_edge;
1122 mfb_keep_just (edge e)
1124 return e != mfb_kj_edge;
1127 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1128 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1129 entry; otherwise we also force preheader block to have only one successor.
1130 The function also updates dominators. */
1133 create_preheader (struct loop *loop, int flags)
1139 bool latch_edge_was_fallthru;
1140 edge one_succ_pred = 0;
1143 FOR_EACH_EDGE (e, ei, loop->header->preds)
1145 if (e->src == loop->latch)
1147 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1149 if (single_succ_p (e->src))
1152 gcc_assert (nentry);
1155 /* Get an edge that is different from the one from loop->latch
1157 e = EDGE_PRED (loop->header,
1158 EDGE_PRED (loop->header, 0)->src == loop->latch);
1160 if (!(flags & CP_SIMPLE_PREHEADERS) || single_succ_p (e->src))
1164 mfb_kj_edge = loop_latch_edge (loop);
1165 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1166 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1167 dummy = fallthru->src;
1168 loop->header = fallthru->dest;
1170 /* Try to be clever in placing the newly created preheader. The idea is to
1171 avoid breaking any "fallthruness" relationship between blocks.
1173 The preheader was created just before the header and all incoming edges
1174 to the header were redirected to the preheader, except the latch edge.
1175 So the only problematic case is when this latch edge was a fallthru
1176 edge: it is not anymore after the preheader creation so we have broken
1177 the fallthruness. We're therefore going to look for a better place. */
1178 if (latch_edge_was_fallthru)
1183 e = EDGE_PRED (dummy, 0);
1185 move_block_after (dummy, e->src);
1190 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1191 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1195 fprintf (dump_file, "Created preheader block for loop %i\n",
1201 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1204 create_preheaders (int flags)
1209 FOR_EACH_LOOP (li, loop, 0)
1210 create_preheader (loop, flags);
1211 current_loops->state |= LOOPS_HAVE_PREHEADERS;
1214 /* Forces all loop latches to have only single successor. */
1217 force_single_succ_latches (void)
1223 FOR_EACH_LOOP (li, loop, 0)
1225 if (loop->latch != loop->header && single_succ_p (loop->latch))
1228 e = find_edge (loop->latch, loop->header);
1232 current_loops->state |= LOOPS_HAVE_SIMPLE_LATCHES;
1235 /* This function is called from loop_version. It splits the entry edge
1236 of the loop we want to version, adds the versioning condition, and
1237 adjust the edges to the two versions of the loop appropriately.
1238 e is an incoming edge. Returns the basic block containing the
1241 --- edge e ---- > [second_head]
1243 Split it and insert new conditional expression and adjust edges.
1245 --- edge e ---> [cond expr] ---> [first_head]
1247 +---------> [second_head]
1249 THEN_PROB is the probability of then branch of the condition. */
1252 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1253 edge e, void *cond_expr, unsigned then_prob)
1255 basic_block new_head = NULL;
1258 gcc_assert (e->dest == second_head);
1260 /* Split edge 'e'. This will create a new basic block, where we can
1261 insert conditional expr. */
1262 new_head = split_edge (e);
1264 lv_add_condition_to_bb (first_head, second_head, new_head,
1267 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1268 e = single_succ_edge (new_head);
1269 e1 = make_edge (new_head, first_head,
1270 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1271 e1->probability = then_prob;
1272 e->probability = REG_BR_PROB_BASE - then_prob;
1273 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
1274 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
1276 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1277 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1279 /* Adjust loop header phi nodes. */
1280 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1285 /* Main entry point for Loop Versioning transformation.
1287 This transformation given a condition and a loop, creates
1288 -if (condition) { loop_copy1 } else { loop_copy2 },
1289 where loop_copy1 is the loop transformed in one way, and loop_copy2
1290 is the loop transformed in another way (or unchanged). 'condition'
1291 may be a run time test for things that were not resolved by static
1292 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1294 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1295 is the ratio by that the frequencies in the original loop should
1296 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1297 new loop should be scaled.
1299 If PLACE_AFTER is true, we place the new loop after LOOP in the
1300 instruction stream, otherwise it is placed before LOOP. */
1303 loop_version (struct loop *loop,
1304 void *cond_expr, basic_block *condition_bb,
1305 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1308 basic_block first_head, second_head;
1309 edge entry, latch_edge, true_edge, false_edge;
1312 basic_block cond_bb;
1314 /* CHECKME: Loop versioning does not handle nested loop at this point. */
1318 /* Record entry and latch edges for the loop */
1319 entry = loop_preheader_edge (loop);
1320 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1321 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1323 /* Note down head of loop as first_head. */
1324 first_head = entry->dest;
1326 /* Duplicate loop. */
1327 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1328 NULL, NULL, NULL, 0))
1331 /* After duplication entry edge now points to new loop head block.
1332 Note down new head as second_head. */
1333 second_head = entry->dest;
1335 /* Split loop entry edge and insert new block with cond expr. */
1336 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1337 entry, cond_expr, then_prob);
1339 *condition_bb = cond_bb;
1343 entry->flags |= irred_flag;
1347 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1349 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1350 nloop = loopify (latch_edge,
1351 single_pred_edge (get_bb_copy (loop->header)),
1352 cond_bb, true_edge, false_edge,
1353 false /* Do not redirect all edges. */,
1354 then_scale, else_scale);
1356 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1357 lv_flush_pending_stmts (latch_edge);
1359 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1360 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1361 lv_flush_pending_stmts (false_edge);
1362 /* Adjust irreducible flag. */
1365 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1366 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1367 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1368 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1373 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1376 after = loop->latch;
1378 for (i = 0; i < nloop->num_nodes; i++)
1380 move_block_after (bbs[i], after);
1386 /* At this point condition_bb is loop predheader with two successors,
1387 first_head and second_head. Make sure that loop predheader has only
1389 split_edge (loop_preheader_edge (loop));
1390 split_edge (loop_preheader_edge (nloop));
1395 /* The structure of loops might have changed. Some loops might get removed
1396 (and their headers and latches were set to NULL), loop exists might get
1397 removed (thus the loop nesting may be wrong), and some blocks and edges
1398 were changed (so the information about bb --> loop mapping does not have
1399 to be correct). But still for the remaining loops the header dominates
1400 the latch, and loops did not get new subloobs (new loops might possibly
1401 get created, but we are not interested in them). Fix up the mess.
1403 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1407 fix_loop_structure (bitmap changed_bbs)
1410 struct loop *loop, *ploop;
1413 /* Remove the old bb -> loop mapping. */
1416 bb->aux = (void *) (size_t) bb->loop_father->depth;
1417 bb->loop_father = current_loops->tree_root;
1420 /* Remove the dead loops from structures. */
1421 current_loops->tree_root->num_nodes = n_basic_blocks;
1422 FOR_EACH_LOOP (li, loop, 0)
1424 loop->num_nodes = 0;
1430 ploop = loop->inner;
1431 flow_loop_tree_node_remove (ploop);
1432 flow_loop_tree_node_add (loop->outer, ploop);
1435 /* Remove the loop and free its data. */
1439 /* Rescan the bodies of loops, starting from the outermost. */
1440 FOR_EACH_LOOP (li, loop, 0)
1442 loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
1445 /* Now fix the loop nesting. */
1446 FOR_EACH_LOOP (li, loop, 0)
1448 bb = loop_preheader_edge (loop)->src;
1449 if (bb->loop_father != loop->outer)
1451 flow_loop_tree_node_remove (loop);
1452 flow_loop_tree_node_add (bb->loop_father, loop);
1456 /* Mark the blocks whose loop has changed. */
1460 && (void *) (size_t) bb->loop_father->depth != bb->aux)
1461 bitmap_set_bit (changed_bbs, bb->index);
1466 if (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
1467 mark_irreducible_loops ();
1469 if (current_loops->state & LOOPS_HAVE_RECORDED_EXITS)
1471 release_recorded_exits ();
1472 record_loop_exits ();