1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
51 /* Random guesstimation given names. */
52 #define PROB_NEVER (0)
53 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 10 - 1)
54 #define PROB_UNLIKELY (REG_BR_PROB_BASE * 4 / 10 - 1)
55 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
56 #define PROB_LIKELY (REG_BR_PROB_BASE - PROB_UNLIKELY)
57 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
58 #define PROB_ALWAYS (REG_BR_PROB_BASE)
60 static void combine_predictions_for_insn PARAMS ((rtx, basic_block));
61 static void dump_prediction PARAMS ((enum br_predictor, int,
63 static void estimate_loops_at_level PARAMS ((struct loop *loop));
64 static void propagate_freq PARAMS ((basic_block));
65 static void estimate_bb_frequencies PARAMS ((struct loops *));
66 static void counts_to_freqs PARAMS ((void));
68 /* Information we hold about each branch predictor.
69 Filled using information from predict.def. */
72 const char *name; /* Name used in the debugging dumps. */
73 int hitrate; /* Expected hitrate used by
74 predict_insn_def call. */
77 #define DEF_PREDICTOR(ENUM, NAME, HITRATE) {NAME, HITRATE},
78 struct predictor_info predictor_info[] = {
79 #include "predict.def"
81 /* Upper bound on non-language-specific builtins. */
87 predict_insn (insn, predictor, probability)
90 enum br_predictor predictor;
92 if (!any_condjump_p (insn))
95 = gen_rtx_EXPR_LIST (REG_BR_PRED,
96 gen_rtx_CONCAT (VOIDmode,
97 GEN_INT ((int) predictor),
98 GEN_INT ((int) probability)),
102 /* Predict insn by given predictor. */
104 predict_insn_def (insn, predictor, taken)
106 enum br_predictor predictor;
107 enum prediction taken;
109 int probability = predictor_info[(int) predictor].hitrate;
111 probability = REG_BR_PROB_BASE - probability;
112 predict_insn (insn, predictor, probability);
115 /* Predict edge E with given probability if possible. */
117 predict_edge (e, predictor, probability)
120 enum br_predictor predictor;
123 last_insn = e->src->end;
125 /* We can store the branch prediction information only about
126 conditional jumps. */
127 if (!any_condjump_p (last_insn))
130 /* We always store probability of branching. */
131 if (e->flags & EDGE_FALLTHRU)
132 probability = REG_BR_PROB_BASE - probability;
134 predict_insn (last_insn, predictor, probability);
137 /* Predict edge E by given predictor if possible. */
139 predict_edge_def (e, predictor, taken)
141 enum br_predictor predictor;
142 enum prediction taken;
144 int probability = predictor_info[(int) predictor].hitrate;
147 probability = REG_BR_PROB_BASE - probability;
148 predict_edge (e, predictor, probability);
151 /* Invert all branch predictions or probability notes in the INSN. This needs
152 to be done each time we invert the condition used by the jump. */
154 invert_br_probabilities (insn)
157 rtx note = REG_NOTES (insn);
161 if (REG_NOTE_KIND (note) == REG_BR_PROB)
162 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
163 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
164 XEXP (XEXP (note, 0), 1)
165 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
166 note = XEXP (note, 1);
170 /* Dump information about the branch prediction to the output file. */
172 dump_prediction (predictor, probability, bb)
173 enum br_predictor predictor;
182 while (e->flags & EDGE_FALLTHRU)
185 fprintf (rtl_dump_file, " %s heuristics: %.1f%%",
186 predictor_info[predictor].name,
187 probability * 100.0 / REG_BR_PROB_BASE);
191 fprintf (rtl_dump_file, " exec ");
192 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
193 (HOST_WIDEST_INT) bb->count);
194 fprintf (rtl_dump_file, " hit ");
195 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
196 (HOST_WIDEST_INT) e->count);
197 fprintf (rtl_dump_file, " (%.1f%%)",
198 e->count * 100.0 / bb->count);
200 fprintf (rtl_dump_file, "\n");
203 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
204 note if not already present. Remove now useless REG_BR_PRED notes. */
206 combine_predictions_for_insn (insn, bb)
210 rtx prob_note = find_reg_note (insn, REG_BR_PROB, 0);
211 rtx *pnote = ®_NOTES (insn);
212 int best_probability = PROB_EVEN;
213 int best_predictor = END_PREDICTORS;
216 fprintf (rtl_dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
219 /* We implement "first match" heuristics and use probability guessed
220 by predictor with smallest index. In future we will use better
221 probability combination techniques. */
224 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
226 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
227 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
229 dump_prediction (predictor, probability, bb);
230 if (best_predictor > predictor)
231 best_probability = probability, best_predictor = predictor;
232 *pnote = XEXP (*pnote, 1);
235 pnote = &XEXP (*pnote, 1);
237 dump_prediction (PRED_FIRST_MATCH, best_probability, bb);
241 = gen_rtx_EXPR_LIST (REG_BR_PROB,
242 GEN_INT (best_probability), REG_NOTES (insn));
246 /* Statically estimate the probability that a branch will be taken.
247 ??? In the next revision there will be a number of other predictors added
248 from the above references. Further, each heuristic will be factored out
249 into its own function for clarity (and to facilitate the combination of
253 estimate_probability (loops_info)
254 struct loops *loops_info;
256 sbitmap *dominators, *post_dominators;
259 dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
260 post_dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
261 calculate_dominance_info (NULL, dominators, 0);
262 calculate_dominance_info (NULL, post_dominators, 1);
264 /* Try to predict out blocks in a loop that are not part of a
266 for (i = 0; i < loops_info->num; i++)
270 for (j = loops_info->array[i].first->index;
271 j <= loops_info->array[i].last->index;
274 if (TEST_BIT (loops_info->array[i].nodes, j))
276 int header_found = 0;
279 /* Loop branch heruistics - predict as taken an edge back to
281 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
282 if (e->dest == loops_info->array[i].header)
285 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
287 /* Loop exit heruistics - predict as not taken an edge exiting
288 the loop if the conditinal has no loop header successors */
290 for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
291 if (e->dest->index <= 0
292 || !TEST_BIT (loops_info->array[i].nodes, e->dest->index))
293 predict_edge_def (e, PRED_LOOP_EXIT, NOT_TAKEN);
298 /* Attempt to predict conditional jumps using a number of heuristics.
299 For each conditional jump, we try each heuristic in a fixed order.
300 If more than one heuristic applies to a particular branch, the first
301 is used as the prediction for the branch. */
302 for (i = 0; i < n_basic_blocks; i++)
304 basic_block bb = BASIC_BLOCK (i);
305 rtx last_insn = bb->end;
309 /* If block has no sucessor, predict all possible paths to
310 it as improbable, as the block contains a call to a noreturn
311 function and thus can be executed only once. */
312 if (bb->succ == NULL)
315 for (y = 0; y < n_basic_blocks; y++)
316 if (!TEST_BIT (post_dominators[y], i))
318 for (e = BASIC_BLOCK (y)->succ; e; e = e->succ_next)
319 if (e->dest->index >= 0
320 && TEST_BIT (post_dominators[e->dest->index], i))
321 predict_edge_def (e, PRED_NORETURN, NOT_TAKEN);
325 if (GET_CODE (last_insn) != JUMP_INSN
326 || ! any_condjump_p (last_insn))
329 for (e = bb->succ; e; e = e->succ_next)
331 /* Predict edges to blocks that return immediately to be
332 improbable. These are usually used to signal error states. */
333 if (e->dest == EXIT_BLOCK_PTR
334 || (e->dest->succ && !e->dest->succ->succ_next
335 && e->dest->succ->dest == EXIT_BLOCK_PTR))
336 predict_edge_def (e, PRED_ERROR_RETURN, NOT_TAKEN);
338 /* Look for block we are guarding (ie we dominate it,
339 but it doesn't postdominate us). */
340 if (e->dest != EXIT_BLOCK_PTR
342 && TEST_BIT (dominators[e->dest->index], e->src->index)
343 && !TEST_BIT (post_dominators[e->src->index], e->dest->index))
346 /* The call heuristic claims that a guarded function call
347 is improbable. This is because such calls are often used
348 to signal exceptional situations such as printing error
350 for (insn = e->dest->head; insn != NEXT_INSN (e->dest->end);
351 insn = NEXT_INSN (insn))
352 if (GET_CODE (insn) == CALL_INSN
353 /* Constant and pure calls are hardly used to signalize
354 something exceptional. */
355 && ! CONST_CALL_P (insn))
357 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
363 cond = get_condition (last_insn, &earliest);
367 /* Try "pointer heuristic."
368 A comparison ptr == 0 is predicted as false.
369 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
370 switch (GET_CODE (cond))
373 if (GET_CODE (XEXP (cond, 0)) == REG
374 && REG_POINTER (XEXP (cond, 0))
375 && (XEXP (cond, 1) == const0_rtx
376 || (GET_CODE (XEXP (cond, 1)) == REG
377 && REG_POINTER (XEXP (cond, 1)))))
379 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
382 if (GET_CODE (XEXP (cond, 0)) == REG
383 && REG_POINTER (XEXP (cond, 0))
384 && (XEXP (cond, 1) == const0_rtx
385 || (GET_CODE (XEXP (cond, 1)) == REG
386 && REG_POINTER (XEXP (cond, 1)))))
387 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
394 /* Try "opcode heuristic."
395 EQ tests are usually false and NE tests are usually true. Also,
396 most quantities are positive, so we can make the appropriate guesses
397 about signed comparisons against zero. */
398 switch (GET_CODE (cond))
401 /* Unconditional branch. */
402 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
403 cond == const0_rtx ? NOT_TAKEN : TAKEN);
408 predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN);
412 predict_insn_def (last_insn, PRED_OPCODE, TAKEN);
415 predict_insn_def (last_insn, PRED_OPCODE, TAKEN);
418 predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN);
422 if (XEXP (cond, 1) == const0_rtx
423 || (GET_CODE (XEXP (cond, 1)) == CONST_INT
424 && INTVAL (XEXP (cond, 1)) == -1))
425 predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN);
429 if (XEXP (cond, 1) == const0_rtx
430 || (GET_CODE (XEXP (cond, 1)) == CONST_INT
431 && INTVAL (XEXP (cond, 1)) == -1))
432 predict_insn_def (last_insn, PRED_OPCODE, TAKEN);
440 /* Attach the combined probability to each conditional jump. */
441 for (i = 0; i < n_basic_blocks; i++)
443 rtx last_insn = BLOCK_END (i);
445 if (GET_CODE (last_insn) != JUMP_INSN
446 || ! any_condjump_p (last_insn))
448 combine_predictions_for_insn (last_insn, BASIC_BLOCK (i));
450 sbitmap_vector_free (post_dominators);
451 sbitmap_vector_free (dominators);
453 estimate_bb_frequencies (loops_info);
456 /* __builtin_expect dropped tokens into the insn stream describing
457 expected values of registers. Generate branch probabilities
458 based off these values. */
461 expected_value_to_br_prob ()
463 rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
465 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
467 switch (GET_CODE (insn))
470 /* Look for expected value notes. */
471 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
473 ev = NOTE_EXPECTED_VALUE (insn);
474 ev_reg = XEXP (ev, 0);
479 /* Never propagate across labels. */
484 /* Look for insns that clobber the EV register. */
485 if (ev && reg_set_p (ev_reg, insn))
490 /* Look for simple conditional branches. If we havn't got an
491 expected value yet, no point going further. */
492 if (GET_CODE (insn) != JUMP_INSN || ev == NULL_RTX)
494 if (! any_condjump_p (insn))
499 /* Collect the branch condition, hopefully relative to EV_REG. */
500 /* ??? At present we'll miss things like
501 (expected_value (eq r70 0))
503 (set r80 (lt r70 r71))
504 (set pc (if_then_else (ne r80 0) ...))
505 as canonicalize_condition will render this to us as
507 Could use cselib to try and reduce this further. */
508 cond = XEXP (SET_SRC (PATTERN (insn)), 0);
509 cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg);
511 || XEXP (cond, 0) != ev_reg
512 || GET_CODE (XEXP (cond, 1)) != CONST_INT)
515 /* Substitute and simplify. Given that the expression we're
516 building involves two constants, we should wind up with either
518 cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
519 XEXP (ev, 1), XEXP (cond, 1));
520 cond = simplify_rtx (cond);
522 /* Turn the condition into a scaled branch probability. */
523 if (cond != const_true_rtx && cond != const0_rtx)
525 predict_insn_def (insn, PRED_BUILTIN_EXPECT,
526 cond == const_true_rtx ? TAKEN : NOT_TAKEN);
530 /* This is used to carry information about basic blocks. It is
531 attached to the AUX field of the standard CFG block. */
533 typedef struct block_info_def
535 /* Estimated frequency of execution of basic_block. */
538 /* To keep queue of basic blocks to process. */
541 /* True if block already converted. */
544 /* Number of block proceeded before adding basic block to the queue. Used
545 to recognize irregular regions. */
549 /* Similar information for edges. */
550 typedef struct edge_info_def
552 /* In case edge is an loopback edge, the probability edge will be reached
553 in case header is. Estimated number of iterations of the loop can be
554 then computed as 1 / (1 - back_edge_prob). */
555 double back_edge_prob;
556 /* True if the edge is an loopback edge in the natural loop. */
560 #define BLOCK_INFO(B) ((block_info) (B)->aux)
561 #define EDGE_INFO(E) ((edge_info) (E)->aux)
563 /* Helper function for estimate_bb_frequencies.
564 Propagate the frequencies for loops headed by HEAD. */
566 propagate_freq (head)
569 basic_block bb = head;
570 basic_block last = bb;
575 BLOCK_INFO (head)->frequency = 1;
576 for (; bb; bb = nextbb)
578 double cyclic_probability = 0, frequency = 0;
580 nextbb = BLOCK_INFO (bb)->next;
581 BLOCK_INFO (bb)->next = NULL;
583 /* Compute frequency of basic block. */
586 for (e = bb->pred; e; e = e->pred_next)
587 if (!BLOCK_INFO (e->src)->visited && !EDGE_INFO (e)->back_edge)
590 /* We didn't proceeded all predecesors of edge e yet. These may
591 be waiting in the queue or we may hit irreducible region.
593 To avoid infinite looping on irrecudible regions, count number
594 of block proceeded at the time basic block has been queued. In the
595 case number didn't changed, we've hit irreducible region and we
596 forget the backward edge. This can increase time complexity
597 by the number of irreducible blocks, but in same way standard the
598 loop does, so it should not result in noticeable slowodwn.
600 Alternativly we may distinquish backward and cross edges in the
601 DFS tree by preprocesing pass and ignore existence of non-loop
603 if (e && BLOCK_INFO (bb)->nvisited != nvisited)
608 BLOCK_INFO (last)->next = e->dest;
609 BLOCK_INFO (last)->nvisited = nvisited;
613 else if (e && rtl_dump_file)
614 fprintf (rtl_dump_file, "Irreducible region hit, ignoring edge to bb %i\n",
617 for (e = bb->pred; e; e = e->pred_next)
618 if (EDGE_INFO (e)->back_edge)
619 cyclic_probability += EDGE_INFO (e)->back_edge_prob;
620 else if (BLOCK_INFO (e->src)->visited)
621 frequency += (e->probability
622 * BLOCK_INFO (e->src)->frequency /
625 if (cyclic_probability > 1.0 - 1.0 / REG_BR_PROB_BASE)
626 cyclic_probability = 1.0 - 1.0 / REG_BR_PROB_BASE;
628 BLOCK_INFO (bb)->frequency = frequency / (1 - cyclic_probability);
631 BLOCK_INFO (bb)->visited = 1;
633 /* Compute back edge frequencies. */
634 for (e = bb->succ; e; e = e->succ_next)
636 EDGE_INFO (e)->back_edge_prob = (e->probability
637 * BLOCK_INFO (bb)->frequency
640 /* Propagate to succesor blocks. */
641 for (e = bb->succ; e; e = e->succ_next)
642 if (!EDGE_INFO (e)->back_edge
643 && !BLOCK_INFO (e->dest)->visited
644 && !BLOCK_INFO (e->dest)->next && e->dest != last)
649 BLOCK_INFO (last)->next = e->dest;
650 BLOCK_INFO (last)->nvisited = nvisited;
657 /* Estimate probabilities of the loopback edges in loops at same nest level. */
659 estimate_loops_at_level (first_loop)
660 struct loop *first_loop;
662 struct loop *l, *loop = first_loop;
664 for (loop = first_loop; loop; loop = loop->next)
669 estimate_loops_at_level (loop->inner);
671 /* find current loop back edge and mark it. */
672 for (e = loop->latch->succ; e->dest != loop->header; e = e->succ_next);
674 EDGE_INFO (e)->back_edge = 1;
676 /* In case loop header is shared, ensure that it is the last one sharing
677 same header, so we avoid redundant work. */
680 for (l = loop->next; l; l = l->next)
681 if (l->header == loop->header)
687 /* Now merge all nodes of all loops with given header as not visited. */
688 for (l = loop->shared ? first_loop : loop; l != loop->next; l = l->next)
689 if (loop->header == l->header)
690 EXECUTE_IF_SET_IN_SBITMAP (l->nodes, 0, n,
691 BLOCK_INFO (BASIC_BLOCK (n))->visited =
693 propagate_freq (loop->header);
697 /* Convert counts measured by profile driven feedback to frequencies. */
701 HOST_WIDEST_INT count_max = 1;
704 for (i = 0; i < n_basic_blocks; i++)
705 if (BASIC_BLOCK (i)->count > count_max)
706 count_max = BASIC_BLOCK (i)->count;
708 for (i = -2; i < n_basic_blocks; i++)
712 bb = ENTRY_BLOCK_PTR;
716 bb = BASIC_BLOCK (i);
717 bb->frequency = ((bb->count * BB_FREQ_MAX + count_max / 2)
722 /* Estimate basic blocks frequency by given branch probabilities. */
724 estimate_bb_frequencies (loops)
733 if (flag_branch_probabilities)
739 /* Fill in the probability values in flowgraph based on the REG_BR_PROB
741 for (i = 0; i < n_basic_blocks; i++)
743 rtx last_insn = BLOCK_END (i);
745 edge fallthru, branch;
747 if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
748 /* Avoid handling of conditionals jump jumping to fallthru edge. */
749 || BASIC_BLOCK (i)->succ->succ_next == NULL)
751 /* We can predict only conditional jumps at the moment.
752 Expect each edge to be equall probable.
753 ?? In future we want to make abnormal edges improbable. */
757 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
760 if (e->probability != 0)
764 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
765 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
769 probability = INTVAL (XEXP (find_reg_note (last_insn,
770 REG_BR_PROB, 0), 0));
771 fallthru = BASIC_BLOCK (i)->succ;
772 if (!fallthru->flags & EDGE_FALLTHRU)
773 fallthru = fallthru->succ_next;
774 branch = BASIC_BLOCK (i)->succ;
775 if (branch->flags & EDGE_FALLTHRU)
776 branch = branch->succ_next;
778 branch->probability = probability;
779 fallthru->probability = REG_BR_PROB_BASE - probability;
782 ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
784 /* Set up block info for each basic block. */
785 bi = (block_info) xcalloc ((n_basic_blocks + 2), sizeof (*bi));
786 ei = (edge_info) xcalloc ((n_edges), sizeof (*ei));
787 for (i = -2; i < n_basic_blocks; i++)
793 bb = ENTRY_BLOCK_PTR;
797 bb = BASIC_BLOCK (i);
798 bb->aux = bi + i + 2;
799 BLOCK_INFO (bb)->visited = 1;
800 for (e = bb->succ; e; e = e->succ_next)
802 e->aux = ei + edgenum, edgenum++;
803 EDGE_INFO (e)->back_edge_prob = ((double) e->probability
807 /* First compute probabilities locally for each loop from innermost
808 to outermost to examine probabilities for back edges. */
809 estimate_loops_at_level (loops->tree_root);
811 /* Now fake loop around whole function to finalize probabilities. */
812 for (i = 0; i < n_basic_blocks; i++)
813 BLOCK_INFO (BASIC_BLOCK (i))->visited = 0;
814 BLOCK_INFO (ENTRY_BLOCK_PTR)->visited = 0;
815 BLOCK_INFO (EXIT_BLOCK_PTR)->visited = 0;
816 propagate_freq (ENTRY_BLOCK_PTR);
818 for (i = 0; i < n_basic_blocks; i++)
819 if (BLOCK_INFO (BASIC_BLOCK (i))->frequency > freq_max)
820 freq_max = BLOCK_INFO (BASIC_BLOCK (i))->frequency;
821 for (i = -2; i < n_basic_blocks; i++)
825 bb = ENTRY_BLOCK_PTR;
829 bb = BASIC_BLOCK (i);
830 bb->frequency = (BLOCK_INFO (bb)->frequency * BB_FREQ_MAX / freq_max