1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
29 #include "cfglayout.h"
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for further passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
69 static void decide_unrolling_and_peeling (struct loops *, int);
70 static void peel_loops_completely (struct loops *, int);
71 static void decide_peel_simple (struct loop *, int);
72 static void decide_peel_once_rolling (struct loop *, int);
73 static void decide_peel_completely (struct loop *, int);
74 static void decide_unroll_stupid (struct loop *, int);
75 static void decide_unroll_constant_iterations (struct loop *, int);
76 static void decide_unroll_runtime_iterations (struct loop *, int);
77 static void peel_loop_simple (struct loops *, struct loop *);
78 static void peel_loop_completely (struct loops *, struct loop *);
79 static void unroll_loop_stupid (struct loops *, struct loop *);
80 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
81 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
83 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
85 unroll_and_peel_loops (struct loops *loops, int flags)
87 struct loop *loop, *next;
90 /* First perform complete loop peeling (it is almost surely a win,
91 and affects parameters for further decision a lot). */
92 peel_loops_completely (loops, flags);
94 /* Now decide rest of unrolling and peeling. */
95 decide_unrolling_and_peeling (loops, flags);
97 loop = loops->tree_root;
101 /* Scan the loops, inner ones first. */
102 while (loop != loops->tree_root)
114 /* And perform the appropriate transformations. */
115 switch (loop->lpt_decision.decision)
117 case LPT_PEEL_COMPLETELY:
120 case LPT_PEEL_SIMPLE:
121 peel_loop_simple (loops, loop);
123 case LPT_UNROLL_CONSTANT:
124 unroll_loop_constant_iterations (loops, loop);
126 case LPT_UNROLL_RUNTIME:
127 unroll_loop_runtime_iterations (loops, loop);
129 case LPT_UNROLL_STUPID:
130 unroll_loop_stupid (loops, loop);
140 #ifdef ENABLE_CHECKING
141 verify_dominators (CDI_DOMINATORS);
142 verify_loop_structure (loops);
151 /* Check whether exit of the LOOP is at the end of loop body. */
154 loop_exit_at_end_p (struct loop *loop)
156 struct niter_desc *desc = get_simple_loop_desc (loop);
159 if (desc->in_edge->dest != loop->latch)
162 /* Check that the latch is empty. */
163 FOR_BB_INSNS (loop->latch, insn)
172 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
174 peel_loops_completely (struct loops *loops, int flags)
176 struct loop *loop, *next;
178 loop = loops->tree_root;
182 while (loop != loops->tree_root)
193 loop->lpt_decision.decision = LPT_NONE;
197 "\n;; *** Considering loop %d for complete peeling ***\n",
200 loop->ninsns = num_loop_insns (loop);
202 decide_peel_once_rolling (loop, flags);
203 if (loop->lpt_decision.decision == LPT_NONE)
204 decide_peel_completely (loop, flags);
206 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
208 peel_loop_completely (loops, loop);
209 #ifdef ENABLE_CHECKING
210 verify_dominators (CDI_DOMINATORS);
211 verify_loop_structure (loops);
218 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
220 decide_unrolling_and_peeling (struct loops *loops, int flags)
222 struct loop *loop = loops->tree_root, *next;
227 /* Scan the loops, inner ones first. */
228 while (loop != loops->tree_root)
239 loop->lpt_decision.decision = LPT_NONE;
242 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
244 /* Do not peel cold areas. */
245 if (!maybe_hot_bb_p (loop->header))
248 fprintf (dump_file, ";; Not considering loop, cold area\n");
253 /* Can the loop be manipulated? */
254 if (!can_duplicate_loop_p (loop))
258 ";; Not considering loop, cannot duplicate\n");
263 /* Skip non-innermost loops. */
267 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
272 loop->ninsns = num_loop_insns (loop);
273 loop->av_ninsns = average_num_loop_insns (loop);
275 /* Try transformations one by one in decreasing order of
278 decide_unroll_constant_iterations (loop, flags);
279 if (loop->lpt_decision.decision == LPT_NONE)
280 decide_unroll_runtime_iterations (loop, flags);
281 if (loop->lpt_decision.decision == LPT_NONE)
282 decide_unroll_stupid (loop, flags);
283 if (loop->lpt_decision.decision == LPT_NONE)
284 decide_peel_simple (loop, flags);
290 /* Decide whether the LOOP is once rolling and suitable for complete
293 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
295 struct niter_desc *desc;
298 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
300 /* Is the loop small enough? */
301 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
304 fprintf (dump_file, ";; Not considering loop, is too big\n");
308 /* Check for simple loops. */
309 desc = get_simple_loop_desc (loop);
311 /* Check number of iterations. */
319 ";; Unable to prove that the loop rolls exactly once\n");
325 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
326 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
329 /* Decide whether the LOOP is suitable for complete peeling. */
331 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
334 struct niter_desc *desc;
337 fprintf (dump_file, "\n;; Considering peeling completely\n");
339 /* Skip non-innermost loops. */
343 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
347 /* Do not peel cold areas. */
348 if (!maybe_hot_bb_p (loop->header))
351 fprintf (dump_file, ";; Not considering loop, cold area\n");
355 /* Can the loop be manipulated? */
356 if (!can_duplicate_loop_p (loop))
360 ";; Not considering loop, cannot duplicate\n");
364 /* npeel = number of iterations to peel. */
365 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
366 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
367 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
369 /* Is the loop small enough? */
373 fprintf (dump_file, ";; Not considering loop, is too big\n");
377 /* Check for simple loops. */
378 desc = get_simple_loop_desc (loop);
380 /* Check number of iterations. */
383 || !desc->const_iter)
387 ";; Unable to prove that the loop iterates constant times\n");
391 if (desc->niter > npeel - 1)
396 ";; Not peeling loop completely, rolls too much (");
397 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
398 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
405 fprintf (dump_file, ";; Decided to peel loop completely\n");
406 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
409 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
410 completely. The transformation done:
412 for (i = 0; i < 4; i++)
424 peel_loop_completely (struct loops *loops, struct loop *loop)
427 unsigned HOST_WIDE_INT npeel;
428 unsigned n_remove_edges, i;
429 edge *remove_edges, ei;
430 struct niter_desc *desc = get_simple_loop_desc (loop);
438 wont_exit = sbitmap_alloc (npeel + 1);
439 sbitmap_ones (wont_exit);
440 RESET_BIT (wont_exit, 0);
441 if (desc->noloop_assumptions)
442 RESET_BIT (wont_exit, 1);
444 remove_edges = xcalloc (npeel, sizeof (edge));
447 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
449 wont_exit, desc->out_edge,
450 remove_edges, &n_remove_edges,
451 DLTHE_FLAG_UPDATE_FREQ);
456 /* Remove the exit edges. */
457 for (i = 0; i < n_remove_edges; i++)
458 remove_path (loops, remove_edges[i]);
463 free_simple_loop_desc (loop);
465 /* Now remove the unreachable part of the last iteration and cancel
467 remove_path (loops, ei);
470 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
473 /* Decide whether to unroll LOOP iterating constant number of times
477 decide_unroll_constant_iterations (struct loop *loop, int flags)
479 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
480 struct niter_desc *desc;
482 if (!(flags & UAP_UNROLL))
484 /* We were not asked to, just return back silently. */
490 "\n;; Considering unrolling loop with constant "
491 "number of iterations\n");
493 /* nunroll = total number of copies of the original loop body in
494 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
495 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
497 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
498 if (nunroll > nunroll_by_av)
499 nunroll = nunroll_by_av;
500 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
501 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
503 /* Skip big loops. */
507 fprintf (dump_file, ";; Not considering loop, is too big\n");
511 /* Check for simple loops. */
512 desc = get_simple_loop_desc (loop);
514 /* Check number of iterations. */
515 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
519 ";; Unable to prove that the loop iterates constant times\n");
523 /* Check whether the loop rolls enough to consider. */
524 if (desc->niter < 2 * nunroll)
527 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
531 /* Success; now compute number of iterations to unroll. We alter
532 nunroll so that as few as possible copies of loop body are
533 necessary, while still not decreasing the number of unrollings
534 too much (at most by 1). */
535 best_copies = 2 * nunroll + 10;
538 if (i - 1 >= desc->niter)
541 for (; i >= nunroll - 1; i--)
543 unsigned exit_mod = desc->niter % (i + 1);
545 if (!loop_exit_at_end_p (loop))
546 n_copies = exit_mod + i + 1;
547 else if (exit_mod != (unsigned) i
548 || desc->noloop_assumptions != NULL_RTX)
549 n_copies = exit_mod + i + 2;
553 if (n_copies < best_copies)
555 best_copies = n_copies;
561 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
562 best_unroll + 1, best_copies, nunroll);
564 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
565 loop->lpt_decision.times = best_unroll;
569 ";; Decided to unroll the constant times rolling loop, %d times.\n",
570 loop->lpt_decision.times);
573 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
574 times. The transformation does this:
576 for (i = 0; i < 102; i++)
593 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
595 unsigned HOST_WIDE_INT niter;
598 unsigned n_remove_edges, i;
600 unsigned max_unroll = loop->lpt_decision.times;
601 struct niter_desc *desc = get_simple_loop_desc (loop);
602 bool exit_at_end = loop_exit_at_end_p (loop);
607 /* Should not assert out here (such loop should be peeled instead). */
608 gcc_assert (niter > max_unroll + 1);
610 exit_mod = niter % (max_unroll + 1);
612 wont_exit = sbitmap_alloc (max_unroll + 1);
613 sbitmap_ones (wont_exit);
615 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
620 /* The exit is not at the end of the loop; leave exit test
621 in the first copy, so that the loops that start with test
622 of exit condition have continuous body after unrolling. */
625 fprintf (dump_file, ";; Condition on beginning of loop.\n");
627 /* Peel exit_mod iterations. */
628 RESET_BIT (wont_exit, 0);
629 if (desc->noloop_assumptions)
630 RESET_BIT (wont_exit, 1);
636 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
638 wont_exit, desc->out_edge,
639 remove_edges, &n_remove_edges,
640 DLTHE_FLAG_UPDATE_FREQ);
643 desc->noloop_assumptions = NULL_RTX;
644 desc->niter -= exit_mod;
645 desc->niter_max -= exit_mod;
648 SET_BIT (wont_exit, 1);
652 /* Leave exit test in last copy, for the same reason as above if
653 the loop tests the condition at the end of loop body. */
656 fprintf (dump_file, ";; Condition on end of loop.\n");
658 /* We know that niter >= max_unroll + 2; so we do not need to care of
659 case when we would exit before reaching the loop. So just peel
660 exit_mod + 1 iterations. */
661 if (exit_mod != max_unroll
662 || desc->noloop_assumptions)
666 RESET_BIT (wont_exit, 0);
667 if (desc->noloop_assumptions)
668 RESET_BIT (wont_exit, 1);
670 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
672 wont_exit, desc->out_edge,
673 remove_edges, &n_remove_edges,
674 DLTHE_FLAG_UPDATE_FREQ);
677 desc->niter -= exit_mod + 1;
678 desc->niter_max -= exit_mod + 1;
679 desc->noloop_assumptions = NULL_RTX;
681 SET_BIT (wont_exit, 0);
682 SET_BIT (wont_exit, 1);
685 RESET_BIT (wont_exit, max_unroll);
688 /* Now unroll the loop. */
689 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
691 wont_exit, desc->out_edge,
692 remove_edges, &n_remove_edges,
693 DLTHE_FLAG_UPDATE_FREQ);
700 basic_block exit_block = desc->in_edge->src->rbi->copy;
701 /* Find a new in and out edge; they are in the last copy we have made. */
703 if (exit_block->succ->dest == desc->out_edge->dest)
705 desc->out_edge = exit_block->succ;
706 desc->in_edge = exit_block->succ->succ_next;
710 desc->out_edge = exit_block->succ->succ_next;
711 desc->in_edge = exit_block->succ;
715 desc->niter /= max_unroll + 1;
716 desc->niter_max /= max_unroll + 1;
717 desc->niter_expr = GEN_INT (desc->niter);
719 /* Remove the edges. */
720 for (i = 0; i < n_remove_edges; i++)
721 remove_path (loops, remove_edges[i]);
726 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
727 max_unroll, num_loop_insns (loop));
730 /* Decide whether to unroll LOOP iterating runtime computable number of times
733 decide_unroll_runtime_iterations (struct loop *loop, int flags)
735 unsigned nunroll, nunroll_by_av, i;
736 struct niter_desc *desc;
738 if (!(flags & UAP_UNROLL))
740 /* We were not asked to, just return back silently. */
746 "\n;; Considering unrolling loop with runtime "
747 "computable number of iterations\n");
749 /* nunroll = total number of copies of the original loop body in
750 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
751 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
752 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
753 if (nunroll > nunroll_by_av)
754 nunroll = nunroll_by_av;
755 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
756 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
758 /* Skip big loops. */
762 fprintf (dump_file, ";; Not considering loop, is too big\n");
766 /* Check for simple loops. */
767 desc = get_simple_loop_desc (loop);
769 /* Check simpleness. */
770 if (!desc->simple_p || desc->assumptions)
774 ";; Unable to prove that the number of iterations "
775 "can be counted in runtime\n");
779 if (desc->const_iter)
782 fprintf (dump_file, ";; Loop iterates constant times\n");
786 /* If we have profile feedback, check whether the loop rolls. */
787 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
790 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
794 /* Success; now force nunroll to be power of 2, as we are unable to
795 cope with overflows in computation of number of iterations. */
796 for (i = 1; 2 * i <= nunroll; i *= 2)
799 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
800 loop->lpt_decision.times = i - 1;
804 ";; Decided to unroll the runtime computable "
805 "times rolling loop, %d times.\n",
806 loop->lpt_decision.times);
809 /* Unroll LOOP for that we are able to count number of iterations in runtime
810 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
811 extra care for case n < 0):
813 for (i = 0; i < n; i++)
841 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
843 rtx old_niter, niter, init_code, branch_code, tmp;
845 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
849 unsigned n_peel, n_remove_edges;
850 edge *remove_edges, e;
851 bool extra_zero_check, last_may_exit;
852 unsigned max_unroll = loop->lpt_decision.times;
853 struct niter_desc *desc = get_simple_loop_desc (loop);
854 bool exit_at_end = loop_exit_at_end_p (loop);
857 /* Remember blocks whose dominators will have to be updated. */
858 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
861 body = get_loop_body (loop);
862 for (i = 0; i < loop->num_nodes; i++)
867 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
868 for (j = 0; j < nldom; j++)
869 if (!flow_bb_inside_loop_p (loop, ldom[j]))
870 dom_bbs[n_dom_bbs++] = ldom[j];
878 /* Leave exit in first copy (for explanation why see comment in
879 unroll_loop_constant_iterations). */
881 n_peel = max_unroll - 1;
882 extra_zero_check = true;
883 last_may_exit = false;
887 /* Leave exit in last copy (for explanation why see comment in
888 unroll_loop_constant_iterations). */
889 may_exit_copy = max_unroll;
891 extra_zero_check = false;
892 last_may_exit = true;
895 /* Get expression for number of iterations. */
897 old_niter = niter = gen_reg_rtx (desc->mode);
898 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
900 emit_move_insn (niter, tmp);
902 /* Count modulo by ANDing it with max_unroll; we use the fact that
903 the number of unrollings is a power of two, and thus this is correct
904 even if there is overflow in the computation. */
905 niter = expand_simple_binop (desc->mode, AND,
907 GEN_INT (max_unroll),
908 NULL_RTX, 0, OPTAB_LIB_WIDEN);
910 init_code = get_insns ();
913 /* Precondition the loop. */
914 loop_split_edge_with (loop_preheader_edge (loop), init_code);
916 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
919 wont_exit = sbitmap_alloc (max_unroll + 2);
921 /* Peel the first copy of loop body (almost always we must leave exit test
922 here; the only exception is when we have extra zero check and the number
923 of iterations is reliable. Also record the place of (possible) extra
925 sbitmap_zero (wont_exit);
927 && !desc->noloop_assumptions)
928 SET_BIT (wont_exit, 1);
929 ezc_swtch = loop_preheader_edge (loop)->src;
930 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
932 wont_exit, desc->out_edge,
933 remove_edges, &n_remove_edges,
934 DLTHE_FLAG_UPDATE_FREQ);
937 /* Record the place where switch will be built for preconditioning. */
938 swtch = loop_split_edge_with (loop_preheader_edge (loop),
941 for (i = 0; i < n_peel; i++)
944 sbitmap_zero (wont_exit);
945 if (i != n_peel - 1 || !last_may_exit)
946 SET_BIT (wont_exit, 1);
947 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
949 wont_exit, desc->out_edge,
950 remove_edges, &n_remove_edges,
951 DLTHE_FLAG_UPDATE_FREQ);
954 /* Create item for switch. */
955 j = n_peel - i - (extra_zero_check ? 0 : 1);
956 p = REG_BR_PROB_BASE / (i + 2);
958 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
959 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
960 block_label (preheader), p, NULL_RTX);
962 swtch = loop_split_edge_with (swtch->pred, branch_code);
963 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
964 swtch->succ->probability = REG_BR_PROB_BASE - p;
965 e = make_edge (swtch, preheader,
966 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
970 if (extra_zero_check)
972 /* Add branch for zero iterations. */
973 p = REG_BR_PROB_BASE / (max_unroll + 1);
975 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
976 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
977 block_label (preheader), p, NULL_RTX);
979 swtch = loop_split_edge_with (swtch->succ, branch_code);
980 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
981 swtch->succ->probability = REG_BR_PROB_BASE - p;
982 e = make_edge (swtch, preheader,
983 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
987 /* Recount dominators for outer blocks. */
988 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
990 /* And unroll loop. */
992 sbitmap_ones (wont_exit);
993 RESET_BIT (wont_exit, may_exit_copy);
995 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
997 wont_exit, desc->out_edge,
998 remove_edges, &n_remove_edges,
999 DLTHE_FLAG_UPDATE_FREQ);
1006 basic_block exit_block = desc->in_edge->src->rbi->copy;
1007 /* Find a new in and out edge; they are in the last copy we have made. */
1009 if (exit_block->succ->dest == desc->out_edge->dest)
1011 desc->out_edge = exit_block->succ;
1012 desc->in_edge = exit_block->succ->succ_next;
1016 desc->out_edge = exit_block->succ->succ_next;
1017 desc->in_edge = exit_block->succ;
1021 /* Remove the edges. */
1022 for (i = 0; i < n_remove_edges; i++)
1023 remove_path (loops, remove_edges[i]);
1024 free (remove_edges);
1026 /* We must be careful when updating the number of iterations due to
1027 preconditioning and the fact that the value must be valid at entry
1028 of the loop. After passing through the above code, we see that
1029 the correct new number of iterations is this: */
1030 gcc_assert (!desc->const_iter);
1032 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1033 desc->niter_max /= max_unroll + 1;
1037 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1038 desc->noloop_assumptions = NULL_RTX;
1044 ";; Unrolled loop %d times, counting # of iterations "
1045 "in runtime, %i insns\n",
1046 max_unroll, num_loop_insns (loop));
1049 /* Decide whether to simply peel LOOP and how much. */
1051 decide_peel_simple (struct loop *loop, int flags)
1054 struct niter_desc *desc;
1056 if (!(flags & UAP_PEEL))
1058 /* We were not asked to, just return back silently. */
1063 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1065 /* npeel = number of iterations to peel. */
1066 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1067 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1068 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1070 /* Skip big loops. */
1074 fprintf (dump_file, ";; Not considering loop, is too big\n");
1078 /* Check for simple loops. */
1079 desc = get_simple_loop_desc (loop);
1081 /* Check number of iterations. */
1082 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1085 fprintf (dump_file, ";; Loop iterates constant times\n");
1089 /* Do not simply peel loops with branches inside -- it increases number
1091 if (num_loop_branches (loop) > 1)
1094 fprintf (dump_file, ";; Not peeling, contains branches\n");
1098 if (loop->header->count)
1100 unsigned niter = expected_loop_iterations (loop);
1101 if (niter + 1 > npeel)
1105 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1106 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1107 (HOST_WIDEST_INT) (niter + 1));
1108 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1117 /* For now we have no good heuristics to decide whether loop peeling
1118 will be effective, so disable it. */
1121 ";; Not peeling loop, no evidence it will be profitable\n");
1126 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1127 loop->lpt_decision.times = npeel;
1130 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1131 loop->lpt_decision.times);
1134 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1140 if (!cond) goto end;
1142 if (!cond) goto end;
1149 peel_loop_simple (struct loops *loops, struct loop *loop)
1152 unsigned npeel = loop->lpt_decision.times;
1153 struct niter_desc *desc = get_simple_loop_desc (loop);
1156 wont_exit = sbitmap_alloc (npeel + 1);
1157 sbitmap_zero (wont_exit);
1159 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1160 loops, npeel, wont_exit,
1162 DLTHE_FLAG_UPDATE_FREQ);
1169 if (desc->const_iter)
1171 desc->niter -= npeel;
1172 desc->niter_expr = GEN_INT (desc->niter);
1173 desc->noloop_assumptions = NULL_RTX;
1177 /* We cannot just update niter_expr, as its value might be clobbered
1178 inside loop. We could handle this by counting the number into
1179 temporary just like we do in runtime unrolling, but it does not
1181 free_simple_loop_desc (loop);
1185 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1188 /* Decide whether to unroll LOOP stupidly and how much. */
1190 decide_unroll_stupid (struct loop *loop, int flags)
1192 unsigned nunroll, nunroll_by_av, i;
1193 struct niter_desc *desc;
1195 if (!(flags & UAP_UNROLL_ALL))
1197 /* We were not asked to, just return back silently. */
1202 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1204 /* nunroll = total number of copies of the original loop body in
1205 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1206 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1208 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1209 if (nunroll > nunroll_by_av)
1210 nunroll = nunroll_by_av;
1211 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1212 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1214 /* Skip big loops. */
1218 fprintf (dump_file, ";; Not considering loop, is too big\n");
1222 /* Check for simple loops. */
1223 desc = get_simple_loop_desc (loop);
1225 /* Check simpleness. */
1226 if (desc->simple_p && !desc->assumptions)
1229 fprintf (dump_file, ";; The loop is simple\n");
1233 /* Do not unroll loops with branches inside -- it increases number
1235 if (num_loop_branches (loop) > 1)
1238 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1242 /* If we have profile feedback, check whether the loop rolls. */
1243 if (loop->header->count
1244 && expected_loop_iterations (loop) < 2 * nunroll)
1247 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1251 /* Success. Now force nunroll to be power of 2, as it seems that this
1252 improves results (partially because of better alignments, partially
1253 because of some dark magic). */
1254 for (i = 1; 2 * i <= nunroll; i *= 2)
1257 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1258 loop->lpt_decision.times = i - 1;
1262 ";; Decided to unroll the loop stupidly, %d times.\n",
1263 loop->lpt_decision.times);
1266 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1284 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1287 unsigned nunroll = loop->lpt_decision.times;
1288 struct niter_desc *desc = get_simple_loop_desc (loop);
1291 wont_exit = sbitmap_alloc (nunroll + 1);
1292 sbitmap_zero (wont_exit);
1294 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1295 loops, nunroll, wont_exit,
1297 DLTHE_FLAG_UPDATE_FREQ);
1304 /* We indeed may get here provided that there are nontrivial assumptions
1305 for a loop to be really simple. We could update the counts, but the
1306 problem is that we are unable to decide which exit will be taken
1307 (not really true in case the number of iterations is constant,
1308 but noone will do anything with this information, so we do not
1310 desc->simple_p = false;
1314 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1315 nunroll, num_loop_insns (loop));