1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "cfglayout.h"
34 static void duplicate_subloops (struct loop *, struct loop *);
35 static void copy_loops_to (struct loop **, int,
37 static void loop_redirect_edge (edge, basic_block);
38 static bool loop_delete_branch_edge (edge, int);
39 static void remove_bbs (basic_block *, int);
40 static bool rpe_enum_p (basic_block, void *);
41 static int find_path (edge, basic_block **);
42 static bool alp_enum_p (basic_block, void *);
43 static void fix_loop_placements (struct loop *, bool *);
44 static bool fix_bb_placement (basic_block);
45 static void fix_bb_placements (basic_block, bool *);
46 static void place_new_loop (struct loop *);
47 static void scale_loop_frequencies (struct loop *, int, int);
48 static basic_block create_preheader (struct loop *, int);
49 static void unloop (struct loop *, bool *);
51 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
53 /* Checks whether basic block BB is dominated by DATA. */
55 rpe_enum_p (basic_block bb, void *data)
57 return dominated_by_p (CDI_DOMINATORS, bb, data);
60 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
63 remove_bbs (basic_block *bbs, int nbbs)
67 for (i = 0; i < nbbs; i++)
68 delete_basic_block (bbs[i]);
71 /* Find path -- i.e. the basic blocks dominated by edge E and put them
72 into array BBS, that will be allocated large enough to contain them.
73 E->dest must have exactly one predecessor for this to work (it is
74 easy to achieve and we do not put it here because we do not want to
75 alter anything by this function). The number of basic blocks in the
78 find_path (edge e, basic_block **bbs)
80 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
82 /* Find bbs in the path. */
83 *bbs = XCNEWVEC (basic_block, n_basic_blocks);
84 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
85 n_basic_blocks, e->dest);
88 /* Fix placement of basic block BB inside loop hierarchy --
89 Let L be a loop to that BB belongs. Then every successor of BB must either
90 1) belong to some superloop of loop L, or
91 2) be a header of loop K such that K->outer is superloop of L
92 Returns true if we had to move BB into other loop to enforce this condition,
93 false if the placement of BB was already correct (provided that placements
94 of its successors are correct). */
96 fix_bb_placement (basic_block bb)
100 struct loop *loop = current_loops->tree_root, *act;
102 FOR_EACH_EDGE (e, ei, bb->succs)
104 if (e->dest == EXIT_BLOCK_PTR)
107 act = e->dest->loop_father;
108 if (act->header == e->dest)
111 if (flow_loop_nested_p (loop, act))
115 if (loop == bb->loop_father)
118 remove_bb_from_loops (bb);
119 add_bb_to_loop (bb, loop);
124 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
125 enforce condition condition stated in description of fix_bb_placement. We
126 start from basic block FROM that had some of its successors removed, so that
127 his placement no longer has to be correct, and iteratively fix placement of
128 its predecessors that may change if placement of FROM changed. Also fix
129 placement of subloops of FROM->loop_father, that might also be altered due
130 to this change; the condition for them is similar, except that instead of
131 successors we consider edges coming out of the loops.
133 If the changes may invalidate the information about irreducible regions,
134 IRRED_INVALIDATED is set to true. */
137 fix_bb_placements (basic_block from,
138 bool *irred_invalidated)
141 basic_block *queue, *qtop, *qbeg, *qend;
142 struct loop *base_loop;
145 /* We pass through blocks back-reachable from FROM, testing whether some
146 of their successors moved to outer loop. It may be necessary to
147 iterate several times, but it is finite, as we stop unless we move
148 the basic block up the loop structure. The whole story is a bit
149 more complicated due to presence of subloops, those are moved using
150 fix_loop_placement. */
152 base_loop = from->loop_father;
153 if (base_loop == current_loops->tree_root)
156 in_queue = sbitmap_alloc (last_basic_block);
157 sbitmap_zero (in_queue);
158 SET_BIT (in_queue, from->index);
159 /* Prevent us from going out of the base_loop. */
160 SET_BIT (in_queue, base_loop->header->index);
162 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
163 qtop = queue + base_loop->num_nodes + 1;
175 RESET_BIT (in_queue, from->index);
177 if (from->loop_father->header == from)
179 /* Subloop header, maybe move the loop upward. */
180 if (!fix_loop_placement (from->loop_father))
185 /* Ordinary basic block. */
186 if (!fix_bb_placement (from))
190 FOR_EACH_EDGE (e, ei, from->succs)
192 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
193 *irred_invalidated = true;
196 /* Something has changed, insert predecessors into queue. */
197 FOR_EACH_EDGE (e, ei, from->preds)
199 basic_block pred = e->src;
202 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
203 *irred_invalidated = true;
205 if (TEST_BIT (in_queue, pred->index))
208 /* If it is subloop, then it either was not moved, or
209 the path up the loop tree from base_loop do not contain
211 nca = find_common_loop (pred->loop_father, base_loop);
212 if (pred->loop_father != base_loop
214 || nca != pred->loop_father))
215 pred = pred->loop_father->header;
216 else if (!flow_loop_nested_p (from->loop_father, pred->loop_father))
218 /* No point in processing it. */
222 if (TEST_BIT (in_queue, pred->index))
225 /* Schedule the basic block. */
230 SET_BIT (in_queue, pred->index);
237 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
238 and update loop structures and dominators. Return true if we were able
239 to remove the path, false otherwise (and nothing is affected then). */
244 basic_block *rem_bbs, *bord_bbs, *dom_bbs, from, bb;
245 int i, nrem, n_bord_bbs, n_dom_bbs, nreml;
247 bool deleted, irred_invalidated = false;
248 struct loop **deleted_loop;
250 if (!loop_delete_branch_edge (e, 0))
253 /* Keep track of whether we need to update information about irreducible
254 regions. This is the case if the removed area is a part of the
255 irreducible region, or if the set of basic blocks that belong to a loop
256 that is inside an irreducible region is changed, or if such a loop is
258 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
259 irred_invalidated = true;
261 /* We need to check whether basic blocks are dominated by the edge
262 e, but we only have basic block dominators. This is easy to
263 fix -- when e->dest has exactly one predecessor, this corresponds
264 to blocks dominated by e->dest, if not, split the edge. */
265 if (!single_pred_p (e->dest))
266 e = single_pred_edge (split_edge (e));
268 /* It may happen that by removing path we remove one or more loops
269 we belong to. In this case first unloop the loops, then proceed
270 normally. We may assume that e->dest is not a header of any loop,
271 as it now has exactly one predecessor. */
272 while (e->src->loop_father->outer
273 && dominated_by_p (CDI_DOMINATORS,
274 e->src->loop_father->latch, e->dest))
275 unloop (e->src->loop_father, &irred_invalidated);
277 /* Identify the path. */
278 nrem = find_path (e, &rem_bbs);
281 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks);
282 seen = sbitmap_alloc (last_basic_block);
285 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
286 for (i = 0; i < nrem; i++)
287 SET_BIT (seen, rem_bbs[i]->index);
288 for (i = 0; i < nrem; i++)
292 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
293 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
295 SET_BIT (seen, ae->dest->index);
296 bord_bbs[n_bord_bbs++] = ae->dest;
298 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
299 irred_invalidated = true;
303 /* Remove the path. */
305 deleted = loop_delete_branch_edge (e, 1);
306 gcc_assert (deleted);
307 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
309 /* Cancel loops contained in the path. */
310 deleted_loop = XNEWVEC (struct loop *, nrem);
312 for (i = 0; i < nrem; i++)
313 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
314 deleted_loop[nreml++] = rem_bbs[i]->loop_father;
316 remove_bbs (rem_bbs, nrem);
319 for (i = 0; i < nreml; i++)
320 cancel_loop_tree (deleted_loop[i]);
323 /* Find blocks whose dominators may be affected. */
326 for (i = 0; i < n_bord_bbs; i++)
330 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
331 if (TEST_BIT (seen, bb->index))
333 SET_BIT (seen, bb->index);
335 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
337 ldom = next_dom_son (CDI_DOMINATORS, ldom))
338 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
339 dom_bbs[n_dom_bbs++] = ldom;
344 /* Recount dominators. */
345 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
349 /* Fix placements of basic blocks inside loops and the placement of
350 loops in the loop tree. */
351 fix_bb_placements (from, &irred_invalidated);
352 fix_loop_placements (from->loop_father, &irred_invalidated);
354 if (irred_invalidated
355 && (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) != 0)
356 mark_irreducible_loops ();
361 /* Predicate for enumeration in add_loop. */
363 alp_enum_p (basic_block bb, void *alp_header)
365 return bb != (basic_block) alp_header;
368 /* Given LOOP structure with filled header and latch, find the body of the
369 corresponding loop and add it to loops tree. Insert the LOOP as a son of
373 add_loop (struct loop *loop, struct loop *outer)
378 /* Add it to loop structure. */
379 place_new_loop (loop);
380 flow_loop_tree_node_add (outer, loop);
382 /* Find its nodes. */
383 bbs = XCNEWVEC (basic_block, n_basic_blocks);
384 n = dfs_enumerate_from (loop->latch, 1, alp_enum_p,
385 bbs, n_basic_blocks, loop->header);
387 for (i = 0; i < n; i++)
389 remove_bb_from_loops (bbs[i]);
390 add_bb_to_loop (bbs[i], loop);
392 remove_bb_from_loops (loop->header);
393 add_bb_to_loop (loop->header, loop);
398 /* Multiply all frequencies in LOOP by NUM/DEN. */
400 scale_loop_frequencies (struct loop *loop, int num, int den)
404 bbs = get_loop_body (loop);
405 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
409 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
410 latch to header and update loop tree and dominators
411 accordingly. Everything between them plus LATCH_EDGE destination must
412 be dominated by HEADER_EDGE destination, and back-reachable from
413 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
414 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
415 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
416 Returns newly created loop. */
419 loopify (edge latch_edge, edge header_edge,
420 basic_block switch_bb, edge true_edge, edge false_edge,
421 bool redirect_all_edges)
423 basic_block succ_bb = latch_edge->dest;
424 basic_block pred_bb = header_edge->src;
425 basic_block *dom_bbs, *body;
426 unsigned n_dom_bbs, i;
428 struct loop *loop = XCNEW (struct loop);
429 struct loop *outer = succ_bb->loop_father->outer;
430 int freq, prob, tot_prob;
435 loop->header = header_edge->dest;
436 loop->latch = latch_edge->src;
438 freq = EDGE_FREQUENCY (header_edge);
439 cnt = header_edge->count;
440 prob = EDGE_SUCC (switch_bb, 0)->probability;
441 tot_prob = prob + EDGE_SUCC (switch_bb, 1)->probability;
445 /* Redirect edges. */
446 loop_redirect_edge (latch_edge, loop->header);
447 loop_redirect_edge (true_edge, succ_bb);
449 /* During loop versioning, one of the switch_bb edge is already properly
450 set. Do not redirect it again unless redirect_all_edges is true. */
451 if (redirect_all_edges)
453 loop_redirect_edge (header_edge, switch_bb);
454 loop_redirect_edge (false_edge, loop->header);
456 /* Update dominators. */
457 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
458 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
461 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
463 /* Compute new loop. */
464 add_loop (loop, outer);
466 /* Add switch_bb to appropriate loop. */
467 if (switch_bb->loop_father)
468 remove_bb_from_loops (switch_bb);
469 add_bb_to_loop (switch_bb, outer);
471 /* Fix frequencies. */
472 switch_bb->frequency = freq;
473 switch_bb->count = cnt;
474 FOR_EACH_EDGE (e, ei, switch_bb->succs)
475 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
476 scale_loop_frequencies (loop, prob, tot_prob);
477 scale_loop_frequencies (succ_bb->loop_father, tot_prob - prob, tot_prob);
479 /* Update dominators of blocks outside of LOOP. */
480 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
482 seen = sbitmap_alloc (last_basic_block);
484 body = get_loop_body (loop);
486 for (i = 0; i < loop->num_nodes; i++)
487 SET_BIT (seen, body[i]->index);
489 for (i = 0; i < loop->num_nodes; i++)
493 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
495 ldom = next_dom_son (CDI_DOMINATORS, ldom))
496 if (!TEST_BIT (seen, ldom->index))
498 SET_BIT (seen, ldom->index);
499 dom_bbs[n_dom_bbs++] = ldom;
503 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
512 /* Remove the latch edge of a LOOP and update loops to indicate that
513 the LOOP was removed. After this function, original loop latch will
514 have no successor, which caller is expected to fix somehow.
516 If this may cause the information about irreducible regions to become
517 invalid, IRRED_INVALIDATED is set to true. */
520 unloop (struct loop *loop, bool *irred_invalidated)
525 basic_block latch = loop->latch;
528 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
529 *irred_invalidated = true;
531 /* This is relatively straightforward. The dominators are unchanged, as
532 loop header dominates loop latch, so the only thing we have to care of
533 is the placement of loops and basic blocks inside the loop tree. We
534 move them all to the loop->outer, and then let fix_bb_placements do
537 body = get_loop_body (loop);
539 for (i = 0; i < n; i++)
540 if (body[i]->loop_father == loop)
542 remove_bb_from_loops (body[i]);
543 add_bb_to_loop (body[i], loop->outer);
550 flow_loop_tree_node_remove (ploop);
551 flow_loop_tree_node_add (loop->outer, ploop);
554 /* Remove the loop and free its data. */
555 flow_loop_tree_node_remove (loop);
556 current_loops->parray[loop->num] = NULL;
557 flow_loop_free (loop);
559 remove_edge (single_succ_edge (latch));
561 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
562 there is an irreducible region inside the cancelled loop, the flags will
564 fix_bb_placements (latch, &dummy);
567 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
568 FATHER of LOOP such that all of the edges coming out of LOOP belong to
569 FATHER, and set it as outer loop of LOOP. Return true if placement of
573 fix_loop_placement (struct loop *loop)
579 struct loop *father = loop->pred[0], *act;
581 body = get_loop_body (loop);
582 for (i = 0; i < loop->num_nodes; i++)
583 FOR_EACH_EDGE (e, ei, body[i]->succs)
584 if (!flow_bb_inside_loop_p (loop, e->dest))
586 act = find_common_loop (loop, e->dest->loop_father);
587 if (flow_loop_nested_p (father, act))
592 if (father != loop->outer)
594 for (act = loop->outer; act != father; act = act->outer)
595 act->num_nodes -= loop->num_nodes;
596 flow_loop_tree_node_remove (loop);
597 flow_loop_tree_node_add (father, loop);
603 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
604 condition stated in description of fix_loop_placement holds for them.
605 It is used in case when we removed some edges coming out of LOOP, which
606 may cause the right placement of LOOP inside loop tree to change.
608 IRRED_INVALIDATED is set to true if a change in the loop structures might
609 invalidate the information about irreducible regions. */
612 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
619 if (!fix_loop_placement (loop))
622 /* Changing the placement of a loop in the loop tree may alter the
623 validity of condition 2) of the description of fix_bb_placement
624 for its preheader, because the successor is the header and belongs
625 to the loop. So call fix_bb_placements to fix up the placement
626 of the preheader and (possibly) of its predecessors. */
627 fix_bb_placements (loop_preheader_edge (loop)->src,
633 /* Creates place for a new LOOP in loops structure. */
635 place_new_loop (struct loop *loop)
637 current_loops->parray =
638 xrealloc (current_loops->parray, (current_loops->num + 1) * sizeof (struct loop *));
639 current_loops->parray[current_loops->num] = loop;
641 loop->num = current_loops->num++;
644 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
645 created loop into loops structure. */
647 duplicate_loop (struct loop *loop, struct loop *target)
650 cloop = XCNEW (struct loop);
651 place_new_loop (cloop);
653 /* Mark the new loop as copy of LOOP. */
656 /* Add it to target. */
657 flow_loop_tree_node_add (target, cloop);
662 /* Copies structure of subloops of LOOP into TARGET loop, placing
663 newly created loops into loop tree. */
665 duplicate_subloops (struct loop *loop, struct loop *target)
667 struct loop *aloop, *cloop;
669 for (aloop = loop->inner; aloop; aloop = aloop->next)
671 cloop = duplicate_loop (aloop, target);
672 duplicate_subloops (aloop, cloop);
676 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
677 into TARGET loop, placing newly created loops into loop tree. */
679 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
684 for (i = 0; i < n; i++)
686 aloop = duplicate_loop (copied_loops[i], target);
687 duplicate_subloops (copied_loops[i], aloop);
691 /* Redirects edge E to basic block DEST. */
693 loop_redirect_edge (edge e, basic_block dest)
698 redirect_edge_and_branch_force (e, dest);
701 /* Deletes edge E from a branch if possible. Unless REALLY_DELETE is set,
702 just test whether it is possible to remove the edge. */
704 loop_delete_branch_edge (edge e, int really_delete)
706 basic_block src = e->src;
711 gcc_assert (EDGE_COUNT (src->succs) > 1);
713 /* Cannot handle more than two exit edges. */
714 if (EDGE_COUNT (src->succs) > 2)
716 /* And it must be just a simple branch. */
717 if (!any_condjump_p (BB_END (src)))
720 snd = e == EDGE_SUCC (src, 0) ? EDGE_SUCC (src, 1) : EDGE_SUCC (src, 0);
722 if (newdest == EXIT_BLOCK_PTR)
725 /* Hopefully the above conditions should suffice. */
729 /* Redirecting behaves wrongly wrto this flag. */
730 irr = snd->flags & EDGE_IRREDUCIBLE_LOOP;
732 if (!redirect_edge_and_branch (e, newdest))
734 single_succ_edge (src)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
735 single_succ_edge (src)->flags |= irr;
740 /* Check whether LOOP's body can be duplicated. */
742 can_duplicate_loop_p (struct loop *loop)
745 basic_block *bbs = get_loop_body (loop);
747 ret = can_copy_bbs_p (bbs, loop->num_nodes);
753 /* The NBBS blocks in BBS will get duplicated and the copies will be placed
754 to LOOP. Update the single_exit information in superloops of LOOP. */
757 update_single_exits_after_duplication (basic_block *bbs, unsigned nbbs,
762 for (i = 0; i < nbbs; i++)
763 bbs[i]->flags |= BB_DUPLICATED;
765 for (; loop->outer; loop = loop->outer)
767 if (!single_exit (loop))
770 if (single_exit (loop)->src->flags & BB_DUPLICATED)
771 set_single_exit (loop, NULL);
774 for (i = 0; i < nbbs; i++)
775 bbs[i]->flags &= ~BB_DUPLICATED;
778 /* Updates single exit information for the copy of LOOP. */
781 update_single_exit_for_duplicated_loop (struct loop *loop)
783 struct loop *copy = loop->copy;
784 basic_block src, dest;
785 edge exit = single_exit (loop);
790 src = get_bb_copy (exit->src);
792 if (dest->flags & BB_DUPLICATED)
793 dest = get_bb_copy (dest);
795 exit = find_edge (src, dest);
796 gcc_assert (exit != NULL);
797 set_single_exit (copy, exit);
800 /* Updates single exit information for copies of ORIG_LOOPS and their subloops.
801 N is the number of the loops in the ORIG_LOOPS array. */
804 update_single_exit_for_duplicated_loops (struct loop *orig_loops[], unsigned n)
808 for (i = 0; i < n; i++)
809 update_single_exit_for_duplicated_loop (orig_loops[i]);
812 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
813 loop structure and dominators. E's destination must be LOOP header for
814 this to work, i.e. it must be entry or latch edge of this loop; these are
815 unique, as the loops must have preheaders for this function to work
816 correctly (in case E is latch, the function unrolls the loop, if E is entry
817 edge, it peels the loop). Store edges created by copying ORIG edge from
818 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
819 original LOOP body, the other copies are numbered in order given by control
820 flow through them) into TO_REMOVE array. Returns false if duplication is
823 duplicate_loop_to_header_edge (struct loop *loop, edge e,
824 unsigned int ndupl, sbitmap wont_exit,
825 edge orig, edge *to_remove,
826 unsigned int *n_to_remove, int flags)
828 struct loop *target, *aloop;
829 struct loop **orig_loops;
830 unsigned n_orig_loops;
831 basic_block header = loop->header, latch = loop->latch;
832 basic_block *new_bbs, *bbs, *first_active;
833 basic_block new_bb, bb, first_active_latch = NULL;
835 edge spec_edges[2], new_spec_edges[2];
839 int is_latch = (latch == e->src);
840 int scale_act = 0, *scale_step = NULL, scale_main = 0;
841 int p, freq_in, freq_le, freq_out_orig;
842 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
843 int add_irreducible_flag;
844 basic_block place_after;
846 gcc_assert (e->dest == loop->header);
847 gcc_assert (ndupl > 0);
851 /* Orig must be edge out of the loop. */
852 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
853 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
857 bbs = get_loop_body_in_dom_order (loop);
858 gcc_assert (bbs[0] == loop->header);
859 gcc_assert (bbs[n - 1] == loop->latch);
861 /* Check whether duplication is possible. */
862 if (!can_copy_bbs_p (bbs, loop->num_nodes))
867 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
869 /* In case we are doing loop peeling and the loop is in the middle of
870 irreducible region, the peeled copies will be inside it too. */
871 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
872 gcc_assert (!is_latch || !add_irreducible_flag);
874 /* Find edge from latch. */
875 latch_edge = loop_latch_edge (loop);
877 if (flags & DLTHE_FLAG_UPDATE_FREQ)
879 /* Calculate coefficients by that we have to scale frequencies
880 of duplicated loop bodies. */
881 freq_in = header->frequency;
882 freq_le = EDGE_FREQUENCY (latch_edge);
885 if (freq_in < freq_le)
887 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
888 if (freq_out_orig > freq_in - freq_le)
889 freq_out_orig = freq_in - freq_le;
890 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
891 prob_pass_wont_exit =
892 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
894 scale_step = XNEWVEC (int, ndupl);
896 for (i = 1; i <= ndupl; i++)
897 scale_step[i - 1] = TEST_BIT (wont_exit, i)
898 ? prob_pass_wont_exit
901 /* Complete peeling is special as the probability of exit in last
903 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
905 int wanted_freq = EDGE_FREQUENCY (e);
907 if (wanted_freq > freq_in)
908 wanted_freq = freq_in;
910 gcc_assert (!is_latch);
911 /* First copy has frequency of incoming edge. Each subsequent
912 frequency should be reduced by prob_pass_wont_exit. Caller
913 should've managed the flags so all except for original loop
914 has won't exist set. */
915 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
916 /* Now simulate the duplication adjustments and compute header
917 frequency of the last copy. */
918 for (i = 0; i < ndupl; i++)
919 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
920 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
924 prob_pass_main = TEST_BIT (wont_exit, 0)
925 ? prob_pass_wont_exit
928 scale_main = REG_BR_PROB_BASE;
929 for (i = 0; i < ndupl; i++)
932 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
934 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
935 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
939 scale_main = REG_BR_PROB_BASE;
940 for (i = 0; i < ndupl; i++)
941 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
942 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
944 for (i = 0; i < ndupl; i++)
945 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
946 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
947 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
950 /* Loop the new bbs will belong to. */
951 target = e->src->loop_father;
953 /* Original loops. */
955 for (aloop = loop->inner; aloop; aloop = aloop->next)
957 orig_loops = XCNEWVEC (struct loop *, n_orig_loops);
958 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
959 orig_loops[i] = aloop;
963 first_active = XNEWVEC (basic_block, n);
966 memcpy (first_active, bbs, n * sizeof (basic_block));
967 first_active_latch = latch;
970 /* Update the information about single exits. */
971 if (current_loops->state & LOOPS_HAVE_MARKED_SINGLE_EXITS)
972 update_single_exits_after_duplication (bbs, n, target);
974 /* Record exit edge in original loop body. */
975 if (orig && TEST_BIT (wont_exit, 0))
976 to_remove[(*n_to_remove)++] = orig;
978 spec_edges[SE_ORIG] = orig;
979 spec_edges[SE_LATCH] = latch_edge;
981 place_after = e->src;
982 for (j = 0; j < ndupl; j++)
985 copy_loops_to (orig_loops, n_orig_loops, target);
988 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
990 place_after = new_spec_edges[SE_LATCH]->src;
992 if (current_loops->state & LOOPS_HAVE_MARKED_SINGLE_EXITS)
994 for (i = 0; i < n; i++)
995 bbs[i]->flags |= BB_DUPLICATED;
996 update_single_exit_for_duplicated_loops (orig_loops, n_orig_loops);
997 for (i = 0; i < n; i++)
998 bbs[i]->flags &= ~BB_DUPLICATED;
1001 if (flags & DLTHE_RECORD_COPY_NUMBER)
1002 for (i = 0; i < n; i++)
1004 gcc_assert (!new_bbs[i]->aux);
1005 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1008 /* Note whether the blocks and edges belong to an irreducible loop. */
1009 if (add_irreducible_flag)
1011 for (i = 0; i < n; i++)
1012 new_bbs[i]->flags |= BB_DUPLICATED;
1013 for (i = 0; i < n; i++)
1016 new_bb = new_bbs[i];
1017 if (new_bb->loop_father == target)
1018 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1020 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1021 if ((ae->dest->flags & BB_DUPLICATED)
1022 && (ae->src->loop_father == target
1023 || ae->dest->loop_father == target))
1024 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1026 for (i = 0; i < n; i++)
1027 new_bbs[i]->flags &= ~BB_DUPLICATED;
1030 /* Redirect the special edges. */
1033 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1034 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1036 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1037 latch = loop->latch = new_bbs[n - 1];
1038 e = latch_edge = new_spec_edges[SE_LATCH];
1042 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1044 redirect_edge_and_branch_force (e, new_bbs[0]);
1045 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1046 e = new_spec_edges[SE_LATCH];
1049 /* Record exit edge in this copy. */
1050 if (orig && TEST_BIT (wont_exit, j + 1))
1051 to_remove[(*n_to_remove)++] = new_spec_edges[SE_ORIG];
1053 /* Record the first copy in the control flow order if it is not
1054 the original loop (i.e. in case of peeling). */
1055 if (!first_active_latch)
1057 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1058 first_active_latch = new_bbs[n - 1];
1061 /* Set counts and frequencies. */
1062 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1064 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1065 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1071 /* Update the original loop. */
1073 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1074 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1076 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1080 /* Update dominators of outer blocks if affected. */
1081 for (i = 0; i < n; i++)
1083 basic_block dominated, dom_bb, *dom_bbs;
1089 n_dom_bbs = get_dominated_by (CDI_DOMINATORS, bb, &dom_bbs);
1090 for (j = 0; j < n_dom_bbs; j++)
1092 dominated = dom_bbs[j];
1093 if (flow_bb_inside_loop_p (loop, dominated))
1095 dom_bb = nearest_common_dominator (
1096 CDI_DOMINATORS, first_active[i], first_active_latch);
1097 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1101 free (first_active);
1108 /* A callback for make_forwarder block, to redirect all edges except for
1109 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1110 whether to redirect it. */
1112 static edge mfb_kj_edge;
1114 mfb_keep_just (edge e)
1116 return e != mfb_kj_edge;
1119 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1120 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1121 entry; otherwise we also force preheader block to have only one successor.
1122 The function also updates dominators. */
1125 create_preheader (struct loop *loop, int flags)
1131 bool latch_edge_was_fallthru;
1132 edge one_succ_pred = 0;
1135 FOR_EACH_EDGE (e, ei, loop->header->preds)
1137 if (e->src == loop->latch)
1139 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1141 if (single_succ_p (e->src))
1144 gcc_assert (nentry);
1147 /* Get an edge that is different from the one from loop->latch
1149 e = EDGE_PRED (loop->header,
1150 EDGE_PRED (loop->header, 0)->src == loop->latch);
1152 if (!(flags & CP_SIMPLE_PREHEADERS) || single_succ_p (e->src))
1156 mfb_kj_edge = loop_latch_edge (loop);
1157 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1158 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1159 dummy = fallthru->src;
1160 loop->header = fallthru->dest;
1162 /* Try to be clever in placing the newly created preheader. The idea is to
1163 avoid breaking any "fallthruness" relationship between blocks.
1165 The preheader was created just before the header and all incoming edges
1166 to the header were redirected to the preheader, except the latch edge.
1167 So the only problematic case is when this latch edge was a fallthru
1168 edge: it is not anymore after the preheader creation so we have broken
1169 the fallthruness. We're therefore going to look for a better place. */
1170 if (latch_edge_was_fallthru)
1175 e = EDGE_PRED (dummy, 0);
1177 move_block_after (dummy, e->src);
1182 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1183 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1187 fprintf (dump_file, "Created preheader block for loop %i\n",
1193 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1196 create_preheaders (int flags)
1199 for (i = 1; i < current_loops->num; i++)
1200 create_preheader (current_loops->parray[i], flags);
1201 current_loops->state |= LOOPS_HAVE_PREHEADERS;
1204 /* Forces all loop latches to have only single successor. */
1207 force_single_succ_latches (void)
1213 for (i = 1; i < current_loops->num; i++)
1215 loop = current_loops->parray[i];
1216 if (loop->latch != loop->header && single_succ_p (loop->latch))
1219 e = find_edge (loop->latch, loop->header);
1223 current_loops->state |= LOOPS_HAVE_SIMPLE_LATCHES;
1226 /* This function is called from loop_version. It splits the entry edge
1227 of the loop we want to version, adds the versioning condition, and
1228 adjust the edges to the two versions of the loop appropriately.
1229 e is an incoming edge. Returns the basic block containing the
1232 --- edge e ---- > [second_head]
1234 Split it and insert new conditional expression and adjust edges.
1236 --- edge e ---> [cond expr] ---> [first_head]
1238 +---------> [second_head]
1242 lv_adjust_loop_entry_edge (basic_block first_head,
1243 basic_block second_head,
1247 basic_block new_head = NULL;
1250 gcc_assert (e->dest == second_head);
1252 /* Split edge 'e'. This will create a new basic block, where we can
1253 insert conditional expr. */
1254 new_head = split_edge (e);
1257 lv_add_condition_to_bb (first_head, second_head, new_head,
1260 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1261 e1 = make_edge (new_head, first_head,
1262 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1263 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1264 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1266 /* Adjust loop header phi nodes. */
1267 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1272 /* Main entry point for Loop Versioning transformation.
1274 This transformation given a condition and a loop, creates
1275 -if (condition) { loop_copy1 } else { loop_copy2 },
1276 where loop_copy1 is the loop transformed in one way, and loop_copy2
1277 is the loop transformed in another way (or unchanged). 'condition'
1278 may be a run time test for things that were not resolved by static
1279 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1281 If PLACE_AFTER is true, we place the new loop after LOOP in the
1282 instruction stream, otherwise it is placed before LOOP. */
1285 loop_version (struct loop *loop,
1286 void *cond_expr, basic_block *condition_bb,
1289 basic_block first_head, second_head;
1290 edge entry, latch_edge, exit, true_edge, false_edge;
1293 basic_block cond_bb;
1295 /* CHECKME: Loop versioning does not handle nested loop at this point. */
1299 /* Record entry and latch edges for the loop */
1300 entry = loop_preheader_edge (loop);
1301 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1302 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1304 /* Note down head of loop as first_head. */
1305 first_head = entry->dest;
1307 /* Duplicate loop. */
1308 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1309 NULL, NULL, NULL, NULL, 0))
1312 /* After duplication entry edge now points to new loop head block.
1313 Note down new head as second_head. */
1314 second_head = entry->dest;
1316 /* Split loop entry edge and insert new block with cond expr. */
1317 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1320 *condition_bb = cond_bb;
1324 entry->flags |= irred_flag;
1328 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1330 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1331 nloop = loopify (latch_edge,
1332 single_pred_edge (get_bb_copy (loop->header)),
1333 cond_bb, true_edge, false_edge,
1334 false /* Do not redirect all edges. */);
1336 exit = single_exit (loop);
1338 set_single_exit (nloop, find_edge (get_bb_copy (exit->src), exit->dest));
1340 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1341 lv_flush_pending_stmts (latch_edge);
1343 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1344 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1345 lv_flush_pending_stmts (false_edge);
1346 /* Adjust irreducible flag. */
1349 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1350 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1351 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1352 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1357 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1360 after = loop->latch;
1362 for (i = 0; i < nloop->num_nodes; i++)
1364 move_block_after (bbs[i], after);
1370 /* At this point condition_bb is loop predheader with two successors,
1371 first_head and second_head. Make sure that loop predheader has only
1373 split_edge (loop_preheader_edge (loop));
1374 split_edge (loop_preheader_edge (nloop));
1379 /* The structure of loops might have changed. Some loops might get removed
1380 (and their headers and latches were set to NULL), loop exists might get
1381 removed (thus the loop nesting may be wrong), and some blocks and edges
1382 were changed (so the information about bb --> loop mapping does not have
1383 to be correct). But still for the remaining loops the header dominates
1384 the latch, and loops did not get new subloobs (new loops might possibly
1385 get created, but we are not interested in them). Fix up the mess.
1387 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1391 fix_loop_structure (bitmap changed_bbs)
1394 struct loop *loop, *ploop;
1397 /* Remove the old bb -> loop mapping. */
1400 bb->aux = (void *) (size_t) bb->loop_father->depth;
1401 bb->loop_father = current_loops->tree_root;
1404 /* Remove the dead loops from structures. */
1405 current_loops->tree_root->num_nodes = n_basic_blocks;
1406 for (i = 1; i < current_loops->num; i++)
1408 loop = current_loops->parray[i];
1412 loop->num_nodes = 0;
1418 ploop = loop->inner;
1419 flow_loop_tree_node_remove (ploop);
1420 flow_loop_tree_node_add (loop->outer, ploop);
1423 /* Remove the loop and free its data. */
1424 flow_loop_tree_node_remove (loop);
1425 current_loops->parray[loop->num] = NULL;
1426 flow_loop_free (loop);
1429 /* Rescan the bodies of loops, starting from the outermost. */
1430 loop = current_loops->tree_root;
1438 && loop != current_loops->tree_root)
1440 if (loop == current_loops->tree_root)
1446 loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
1449 /* Now fix the loop nesting. */
1450 for (i = 1; i < current_loops->num; i++)
1452 loop = current_loops->parray[i];
1456 bb = loop_preheader_edge (loop)->src;
1457 if (bb->loop_father != loop->outer)
1459 flow_loop_tree_node_remove (loop);
1460 flow_loop_tree_node_add (bb->loop_father, loop);
1464 /* Mark the blocks whose loop has changed. */
1468 && (void *) (size_t) bb->loop_father->depth != bb->aux)
1469 bitmap_set_bit (changed_bbs, bb->index);
1474 if (current_loops->state & LOOPS_HAVE_MARKED_SINGLE_EXITS)
1475 mark_single_exit_loops ();
1476 if (current_loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
1477 mark_irreducible_loops ();