1 /* Loop unrolling and peeling.
2 Copyright (C) 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
29 #include "cfglayout.h"
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimalizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for futher passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
69 static void decide_unrolling_and_peeling PARAMS ((struct loops *, int));
70 static void peel_loops_completely PARAMS ((struct loops *, int));
71 static void decide_peel_simple PARAMS ((struct loops *, struct loop *, int));
72 static void decide_peel_once_rolling PARAMS ((struct loops *, struct loop *, int));
73 static void decide_peel_completely PARAMS ((struct loops *, struct loop *, int));
74 static void decide_unroll_stupid PARAMS ((struct loops *, struct loop *, int));
75 static void decide_unroll_constant_iterations PARAMS ((struct loops *, struct loop *, int));
76 static void decide_unroll_runtime_iterations PARAMS ((struct loops *, struct loop *, int));
77 static void peel_loop_simple PARAMS ((struct loops *, struct loop *));
78 static void peel_loop_completely PARAMS ((struct loops *, struct loop *));
79 static void unroll_loop_stupid PARAMS ((struct loops *, struct loop *));
80 static void unroll_loop_constant_iterations PARAMS ((struct loops *,
82 static void unroll_loop_runtime_iterations PARAMS ((struct loops *,
85 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
87 unroll_and_peel_loops (loops, flags)
91 struct loop *loop, *next;
94 /* First perform complete loop peeling (it is almost surely a win,
95 and affects parameters for further decision a lot). */
96 peel_loops_completely (loops, flags);
98 /* Now decide rest of unrolling and peeling. */
99 decide_unrolling_and_peeling (loops, flags);
101 loop = loops->tree_root;
105 /* Scan the loops, inner ones first. */
106 while (loop != loops->tree_root)
118 /* And perform the appropriate transformations. */
119 switch (loop->lpt_decision.decision)
121 case LPT_PEEL_COMPLETELY:
124 case LPT_PEEL_SIMPLE:
125 peel_loop_simple (loops, loop);
127 case LPT_UNROLL_CONSTANT:
128 unroll_loop_constant_iterations (loops, loop);
130 case LPT_UNROLL_RUNTIME:
131 unroll_loop_runtime_iterations (loops, loop);
133 case LPT_UNROLL_STUPID:
134 unroll_loop_stupid (loops, loop);
144 #ifdef ENABLE_CHECKING
145 verify_dominators (loops->cfg.dom);
146 verify_loop_structure (loops);
153 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
155 peel_loops_completely (loops, flags)
159 struct loop *loop, *next;
161 loop = loops->tree_root;
165 while (loop != loops->tree_root)
176 loop->lpt_decision.decision = LPT_NONE;
180 fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
183 /* Do not peel cold areas. */
184 if (!maybe_hot_bb_p (loop->header))
187 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
192 /* Can the loop be manipulated? */
193 if (!can_duplicate_loop_p (loop))
196 fprintf (rtl_dump_file,
197 ";; Not considering loop, cannot duplicate\n");
202 loop->ninsns = num_loop_insns (loop);
204 decide_peel_once_rolling (loops, loop, flags);
205 if (loop->lpt_decision.decision == LPT_NONE)
206 decide_peel_completely (loops, loop, flags);
208 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
210 peel_loop_completely (loops, loop);
211 #ifdef ENABLE_CHECKING
212 verify_dominators (loops->cfg.dom);
213 verify_loop_structure (loops);
220 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
222 decide_unrolling_and_peeling (loops, flags)
226 struct loop *loop = loops->tree_root, *next;
231 /* Scan the loops, inner ones first. */
232 while (loop != loops->tree_root)
243 loop->lpt_decision.decision = LPT_NONE;
246 fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
248 /* Do not peel cold areas. */
249 if (!maybe_hot_bb_p (loop->header))
252 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
257 /* Can the loop be manipulated? */
258 if (!can_duplicate_loop_p (loop))
261 fprintf (rtl_dump_file,
262 ";; Not considering loop, cannot duplicate\n");
267 /* Skip non-innermost loops. */
271 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
276 loop->ninsns = num_loop_insns (loop);
277 loop->av_ninsns = average_num_loop_insns (loop);
279 /* Try transformations one by one in decreasing order of
282 decide_unroll_constant_iterations (loops, loop, flags);
283 if (loop->lpt_decision.decision == LPT_NONE)
284 decide_unroll_runtime_iterations (loops, loop, flags);
285 if (loop->lpt_decision.decision == LPT_NONE)
286 decide_unroll_stupid (loops, loop, flags);
287 if (loop->lpt_decision.decision == LPT_NONE)
288 decide_peel_simple (loops, loop, flags);
294 /* Decide whether the LOOP is once rolling and suitable for complete
297 decide_peel_once_rolling (loops, loop, flags)
300 int flags ATTRIBUTE_UNUSED;
303 fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
305 /* Is the loop small enough? */
306 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
309 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
313 /* Check for simple loops. */
314 loop->simple = simple_loop_p (loops, loop, &loop->desc);
317 /* Check number of iterations. */
318 if (!loop->simple || !loop->desc.const_iter || loop->desc.niter !=0)
321 fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
327 fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n");
328 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
331 /* Decide whether the LOOP is suitable for complete peeling. */
333 decide_peel_completely (loops, loop, flags)
336 int flags ATTRIBUTE_UNUSED;
341 fprintf (rtl_dump_file, ";; Considering peeling completely\n");
343 /* Skip non-innermost loops. */
347 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
351 /* npeel = number of iterations to peel. */
352 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
353 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
354 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
356 /* Is the loop small enough? */
360 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
364 /* Check for simple loops. */
366 loop->simple = simple_loop_p (loops, loop, &loop->desc);
368 /* Check number of iterations. */
369 if (!loop->simple || !loop->desc.const_iter)
372 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
376 if (loop->desc.niter > npeel - 1)
380 fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
381 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
382 fprintf (rtl_dump_file, "iterations > %d [maximum peelings])\n", npeel);
389 fprintf (rtl_dump_file, ";; Decided to peel loop completely\n");
390 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
393 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
394 completely. The transformation done:
396 for (i = 0; i < 4; i++)
408 peel_loop_completely (loops, loop)
413 unsigned HOST_WIDE_INT npeel;
415 unsigned n_remove_edges, i;
417 struct loop_desc *desc = &loop->desc;
421 wont_exit = sbitmap_alloc (npeel + 2);
422 sbitmap_ones (wont_exit);
423 RESET_BIT (wont_exit, 0);
424 RESET_BIT (wont_exit, npeel + 1);
425 if (desc->may_be_zero)
426 RESET_BIT (wont_exit, 1);
428 remove_edges = xcalloc (npeel, sizeof (edge));
431 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
433 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
434 DLTHE_FLAG_UPDATE_FREQ))
439 /* Remove the exit edges. */
440 for (i = 0; i < n_remove_edges; i++)
441 remove_path (loops, remove_edges[i]);
444 /* Now remove the loop. */
445 for (e = RBI (desc->in_edge->src)->copy->succ;
446 e && e->dest != RBI (desc->in_edge->dest)->copy;
452 remove_path (loops, e);
455 fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
458 /* Decide whether to unroll LOOP iterating constant number of times and how much. */
460 decide_unroll_constant_iterations (loops, loop, flags)
465 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
467 if (!(flags & UAP_UNROLL))
469 /* We were not asked to, just return back silently. */
474 fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
476 /* nunroll = total number of copies of the original loop body in
477 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
478 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
479 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
480 if (nunroll > nunroll_by_av)
481 nunroll = nunroll_by_av;
482 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
483 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
485 /* Skip big loops. */
489 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
493 /* Check for simple loops. */
495 loop->simple = simple_loop_p (loops, loop, &loop->desc);
497 /* Check number of iterations. */
498 if (!loop->simple || !loop->desc.const_iter)
501 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
505 /* Check whether the loop rolls enough to consider. */
506 if (loop->desc.niter < 2 * nunroll)
509 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
513 /* Success; now compute number of iterations to unroll. We alter
514 nunroll so that as few as possible copies of loop body are
515 neccesary, while still not decreasing the number of unrollings
516 too much (at most by 1). */
517 best_copies = 2 * nunroll + 10;
520 if ((unsigned) i - 1 >= loop->desc.niter)
521 i = loop->desc.niter - 2;
523 for (; i >= nunroll - 1; i--)
525 unsigned exit_mod = loop->desc.niter % (i + 1);
527 if (loop->desc.postincr)
528 n_copies = exit_mod + i + 1;
529 else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
530 n_copies = exit_mod + i + 2;
534 if (n_copies < best_copies)
536 best_copies = n_copies;
542 fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
543 best_unroll + 1, best_copies, nunroll);
545 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
546 loop->lpt_decision.times = best_unroll;
549 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
550 times. The transformation does this:
552 for (i = 0; i < 102; i++)
569 unroll_loop_constant_iterations (loops, loop)
573 unsigned HOST_WIDE_INT niter;
576 unsigned n_remove_edges, i;
578 unsigned max_unroll = loop->lpt_decision.times;
579 struct loop_desc *desc = &loop->desc;
583 if (niter <= (unsigned) max_unroll + 1)
584 abort (); /* Should not get here (such loop should be peeled instead). */
586 exit_mod = niter % (max_unroll + 1);
588 wont_exit = sbitmap_alloc (max_unroll + 1);
589 sbitmap_ones (wont_exit);
591 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
596 /* Counter is incremented after the exit test; leave exit test
597 in the first copy, so that the loops that start with test
598 of exit condition have continuous body after unrolling. */
601 fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n");
603 /* Peel exit_mod iterations. */
604 RESET_BIT (wont_exit, 0);
605 if (desc->may_be_zero)
606 RESET_BIT (wont_exit, 1);
609 && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
611 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
612 DLTHE_FLAG_UPDATE_FREQ))
615 SET_BIT (wont_exit, 1);
619 /* Leave exit test in last copy, for the same reason as above if
620 the loop tests the condition at the end of loop body. */
623 fprintf (rtl_dump_file, ";; Condition on end of loop.\n");
625 /* We know that niter >= max_unroll + 2; so we do not need to care of
626 case when we would exit before reaching the loop. So just peel
627 exit_mod + 1 iterations.
629 if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
631 RESET_BIT (wont_exit, 0);
632 if (desc->may_be_zero)
633 RESET_BIT (wont_exit, 1);
635 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
637 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
638 DLTHE_FLAG_UPDATE_FREQ))
641 SET_BIT (wont_exit, 0);
642 SET_BIT (wont_exit, 1);
645 RESET_BIT (wont_exit, max_unroll);
648 /* Now unroll the loop. */
649 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
651 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
652 DLTHE_FLAG_UPDATE_FREQ))
657 /* Remove the edges. */
658 for (i = 0; i < n_remove_edges; i++)
659 remove_path (loops, remove_edges[i]);
663 fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop));
666 /* Decide whether to unroll LOOP iterating runtime computable number of times
669 decide_unroll_runtime_iterations (loops, loop, flags)
674 unsigned nunroll, nunroll_by_av, i;
676 if (!(flags & UAP_UNROLL))
678 /* We were not asked to, just return back silently. */
683 fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
685 /* nunroll = total number of copies of the original loop body in
686 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
687 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
688 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
689 if (nunroll > nunroll_by_av)
690 nunroll = nunroll_by_av;
691 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
692 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
694 /* Skip big loops. */
698 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
702 /* Check for simple loops. */
704 loop->simple = simple_loop_p (loops, loop, &loop->desc);
706 /* Check simpleness. */
710 fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
714 if (loop->desc.const_iter)
717 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
721 /* If we have profile feedback, check whether the loop rolls. */
722 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
725 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
729 /* Success; now force nunroll to be power of 2, as we are unable to
730 cope with overflows in computation of number of iterations. */
731 for (i = 1; 2 * i <= nunroll; i *= 2);
733 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
734 loop->lpt_decision.times = i - 1;
737 /* Unroll LOOP for that we are able to count number of iterations in runtime
738 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
739 extra care for case n < 0):
741 for (i = 0; i < n; i++)
769 unroll_loop_runtime_iterations (loops, loop)
773 rtx niter, init_code, branch_code, jump, label;
775 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
779 unsigned n_peel, n_remove_edges;
780 edge *remove_edges, e;
781 bool extra_zero_check, last_may_exit;
782 unsigned max_unroll = loop->lpt_decision.times;
783 struct loop_desc *desc = &loop->desc;
785 /* Remember blocks whose dominators will have to be updated. */
786 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
789 body = get_loop_body (loop);
790 for (i = 0; i < loop->num_nodes; i++)
795 nldom = get_dominated_by (loops->cfg.dom, body[i], &ldom);
796 for (j = 0; j < nldom; j++)
797 if (!flow_bb_inside_loop_p (loop, ldom[j]))
798 dom_bbs[n_dom_bbs++] = ldom[j];
806 /* Leave exit in first copy (for explanation why see comment in
807 unroll_loop_constant_iterations). */
809 n_peel = max_unroll - 1;
810 extra_zero_check = true;
811 last_may_exit = false;
815 /* Leave exit in last copy (for explanation why see comment in
816 unroll_loop_constant_iterations). */
817 may_exit_copy = max_unroll;
819 extra_zero_check = false;
820 last_may_exit = true;
823 /* Get expression for number of iterations. */
825 niter = count_loop_iterations (desc, NULL, NULL);
828 niter = force_operand (niter, NULL);
830 /* Count modulo by ANDing it with max_unroll; we use the fact that
831 the number of unrollings is a power of two, and thus this is correct
832 even if there is overflow in the computation. */
833 niter = expand_simple_binop (GET_MODE (desc->var), AND,
835 GEN_INT (max_unroll),
836 NULL_RTX, 0, OPTAB_LIB_WIDEN);
838 init_code = get_insns ();
841 /* Precondition the loop. */
842 loop_split_edge_with (loop_preheader_edge (loop), init_code, loops);
844 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
847 wont_exit = sbitmap_alloc (max_unroll + 2);
849 /* Peel the first copy of loop body (almost always we must leave exit test
850 here; the only exception is when we have extra zero check and the number
851 of iterations is reliable (i.e. comes out of NE condition). Also record
852 the place of (possible) extra zero check. */
853 sbitmap_zero (wont_exit);
854 if (extra_zero_check && desc->cond == NE)
855 SET_BIT (wont_exit, 1);
856 ezc_swtch = loop_preheader_edge (loop)->src;
857 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
859 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
860 DLTHE_FLAG_UPDATE_FREQ))
863 /* Record the place where switch will be built for preconditioning. */
864 swtch = loop_split_edge_with (loop_preheader_edge (loop),
867 for (i = 0; i < n_peel; i++)
870 sbitmap_zero (wont_exit);
871 if (i != n_peel - 1 || !last_may_exit)
872 SET_BIT (wont_exit, 1);
873 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
875 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
876 DLTHE_FLAG_UPDATE_FREQ))
881 /* Create item for switch. */
882 j = n_peel - i - (extra_zero_check ? 0 : 1);
883 p = REG_BR_PROB_BASE / (i + 2);
885 preheader = loop_split_edge_with (loop_preheader_edge (loop),
887 label = block_label (preheader);
889 do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
890 GET_MODE (desc->var), NULL_RTX, NULL_RTX,
892 jump = get_last_insn ();
893 JUMP_LABEL (jump) = label;
895 = gen_rtx_EXPR_LIST (REG_BR_PROB,
896 GEN_INT (p), REG_NOTES (jump));
898 LABEL_NUSES (label)++;
899 branch_code = get_insns ();
902 swtch = loop_split_edge_with (swtch->pred, branch_code, loops);
903 set_immediate_dominator (loops->cfg.dom, preheader, swtch);
904 swtch->succ->probability = REG_BR_PROB_BASE - p;
905 e = make_edge (swtch, preheader, 0);
910 if (extra_zero_check)
912 /* Add branch for zero iterations. */
913 p = REG_BR_PROB_BASE / (max_unroll + 1);
915 preheader = loop_split_edge_with (loop_preheader_edge (loop),
917 label = block_label (preheader);
919 do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
920 GET_MODE (desc->var), NULL_RTX, NULL_RTX,
922 jump = get_last_insn ();
923 JUMP_LABEL (jump) = label;
925 = gen_rtx_EXPR_LIST (REG_BR_PROB,
926 GEN_INT (p), REG_NOTES (jump));
928 LABEL_NUSES (label)++;
929 branch_code = get_insns ();
932 swtch = loop_split_edge_with (swtch->succ, branch_code, loops);
933 set_immediate_dominator (loops->cfg.dom, preheader, swtch);
934 swtch->succ->probability = REG_BR_PROB_BASE - p;
935 e = make_edge (swtch, preheader, 0);
939 /* Recount dominators for outer blocks. */
940 iterate_fix_dominators (loops->cfg.dom, dom_bbs, n_dom_bbs);
942 /* And unroll loop. */
944 sbitmap_ones (wont_exit);
945 RESET_BIT (wont_exit, may_exit_copy);
947 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
949 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
950 DLTHE_FLAG_UPDATE_FREQ))
955 /* Remove the edges. */
956 for (i = 0; i < n_remove_edges; i++)
957 remove_path (loops, remove_edges[i]);
961 fprintf (rtl_dump_file,
962 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
963 max_unroll, num_loop_insns (loop));
966 /* Decide whether to simply peel LOOP and how much. */
968 decide_peel_simple (loops, loop, flags)
975 if (!(flags & UAP_PEEL))
977 /* We were not asked to, just return back silently. */
982 fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
984 /* npeel = number of iterations to peel. */
985 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
986 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
987 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
989 /* Skip big loops. */
993 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
997 /* Check for simple loops. */
999 loop->simple = simple_loop_p (loops, loop, &loop->desc);
1001 /* Check number of iterations. */
1002 if (loop->simple && loop->desc.const_iter)
1005 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
1009 /* Do not simply peel loops with branches inside -- it increases number
1011 if (loop->desc.n_branches > 1)
1014 fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
1018 if (loop->header->count)
1020 unsigned niter = expected_loop_iterations (loop);
1021 if (niter + 1 > npeel)
1025 fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much (");
1026 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1));
1027 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
1035 /* For now we have no good heuristics to decide whether loop peeling
1036 will be effective, so disable it. */
1038 fprintf (rtl_dump_file,
1039 ";; Not peeling loop, no evidence it will be profitable\n");
1044 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1045 loop->lpt_decision.times = npeel;
1048 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1054 if (!cond) goto end;
1056 if (!cond) goto end;
1063 peel_loop_simple (loops, loop)
1064 struct loops *loops;
1068 unsigned npeel = loop->lpt_decision.times;
1070 wont_exit = sbitmap_alloc (npeel + 1);
1071 sbitmap_zero (wont_exit);
1073 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1074 loops, npeel, wont_exit, NULL, NULL, NULL,
1075 DLTHE_FLAG_UPDATE_FREQ))
1081 fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
1084 /* Decide whether to unroll LOOP stupidly and how much. */
1086 decide_unroll_stupid (loops, loop, flags)
1087 struct loops *loops;
1091 unsigned nunroll, nunroll_by_av, i;
1093 if (!(flags & UAP_UNROLL_ALL))
1095 /* We were not asked to, just return back silently. */
1100 fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
1102 /* nunroll = total number of copies of the original loop body in
1103 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1104 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1105 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1106 if (nunroll > nunroll_by_av)
1107 nunroll = nunroll_by_av;
1108 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1109 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1111 /* Skip big loops. */
1115 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1119 /* Check for simple loops. */
1120 if (!loop->has_desc)
1121 loop->simple = simple_loop_p (loops, loop, &loop->desc);
1123 /* Check simpleness. */
1127 fprintf (rtl_dump_file, ";; The loop is simple\n");
1131 /* Do not unroll loops with branches inside -- it increases number
1133 if (loop->desc.n_branches > 1)
1136 fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
1140 /* If we have profile feedback, check whether the loop rolls. */
1141 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
1144 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
1148 /* Success. Now force nunroll to be power of 2, as it seems that this
1149 improves results (partially because of better aligments, partially
1150 because of some dark magic). */
1151 for (i = 1; 2 * i <= nunroll; i *= 2);
1153 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1154 loop->lpt_decision.times = i - 1;
1157 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1175 unroll_loop_stupid (loops, loop)
1176 struct loops *loops;
1180 unsigned nunroll = loop->lpt_decision.times;
1182 wont_exit = sbitmap_alloc (nunroll + 1);
1183 sbitmap_zero (wont_exit);
1185 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1186 loops, nunroll, wont_exit, NULL, NULL, NULL,
1187 DLTHE_FLAG_UPDATE_FREQ))
1193 fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
1194 nunroll, num_loop_insns (loop));