1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
69 /* Random guesstimation given names. */
70 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
71 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
72 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73 #define PROB_ALWAYS (REG_BR_PROB_BASE)
75 static void combine_predictions_for_insn (rtx, basic_block);
76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
78 static void compute_function_frequency (void);
79 static void choose_function_section (void);
80 static bool can_predict_insn_p (const_rtx);
82 /* Information we hold about each branch predictor.
83 Filled using information from predict.def. */
87 const char *const name; /* Name used in the debugging dumps. */
88 const int hitrate; /* Expected hitrate used by
89 predict_insn_def call. */
93 /* Use given predictor without Dempster-Shaffer theory if it matches
94 using first_match heuristics. */
95 #define PRED_FLAG_FIRST_MATCH 1
97 /* Recompute hitrate in percent to our representation. */
99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
102 static const struct predictor_info predictor_info[]= {
103 #include "predict.def"
105 /* Upper bound on predictors. */
110 /* Return TRUE if frequency FREQ is considered to be hot. */
112 maybe_hot_frequency_p (int freq)
114 if (!profile_info || !flag_branch_probabilities)
116 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
118 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
121 if (profile_status == PROFILE_ABSENT)
123 if (freq < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
128 /* Return true in case BB can be CPU intensive and should be optimized
129 for maximal performance. */
132 maybe_hot_bb_p (const_basic_block bb)
134 if (profile_info && flag_branch_probabilities
136 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
138 return maybe_hot_frequency_p (bb->frequency);
141 /* Return true if the call can be hot. */
144 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
146 if (profile_info && flag_branch_probabilities
148 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
150 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl))
151 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl)))
153 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)))
155 if (flag_guess_branch_prob
156 && edge->frequency < (CGRAPH_FREQ_MAX
157 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
162 /* Return true in case BB can be CPU intensive and should be optimized
163 for maximal performance. */
166 maybe_hot_edge_p (edge e)
168 if (profile_info && flag_branch_probabilities
170 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
172 return maybe_hot_frequency_p (EDGE_FREQUENCY (e));
175 /* Return true in case BB is cold and should be optimized for size. */
178 probably_cold_bb_p (const_basic_block bb)
180 if (profile_info && flag_branch_probabilities
182 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
184 if ((!profile_info || !flag_branch_probabilities)
185 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
187 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
192 /* Return true in case BB is probably never executed. */
194 probably_never_executed_bb_p (const_basic_block bb)
196 if (profile_info && flag_branch_probabilities)
197 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
198 if ((!profile_info || !flag_branch_probabilities)
199 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
204 /* Return true when current function should always be optimized for size. */
207 optimize_function_for_size_p (struct function *fun)
209 return (optimize_size
210 || fun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED);
213 /* Return true when current function should always be optimized for speed. */
216 optimize_function_for_speed_p (struct function *fun)
218 return !optimize_function_for_size_p (fun);
221 /* Return TRUE when BB should be optimized for size. */
224 optimize_bb_for_size_p (const_basic_block bb)
226 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb);
229 /* Return TRUE when BB should be optimized for speed. */
232 optimize_bb_for_speed_p (const_basic_block bb)
234 return !optimize_bb_for_size_p (bb);
237 /* Return TRUE when BB should be optimized for size. */
240 optimize_edge_for_size_p (edge e)
242 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
245 /* Return TRUE when BB should be optimized for speed. */
248 optimize_edge_for_speed_p (edge e)
250 return !optimize_edge_for_size_p (e);
253 /* Return TRUE when BB should be optimized for size. */
256 optimize_insn_for_size_p (void)
258 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
261 /* Return TRUE when BB should be optimized for speed. */
264 optimize_insn_for_speed_p (void)
266 return !optimize_insn_for_size_p ();
269 /* Return TRUE when LOOP should be optimized for size. */
272 optimize_loop_for_size_p (struct loop *loop)
274 return optimize_bb_for_size_p (loop->header);
277 /* Return TRUE when LOOP should be optimized for speed. */
280 optimize_loop_for_speed_p (struct loop *loop)
282 return optimize_bb_for_speed_p (loop->header);
285 /* Return TRUE when LOOP nest should be optimized for speed. */
288 optimize_loop_nest_for_speed_p (struct loop *loop)
290 struct loop *l = loop;
291 if (optimize_loop_for_speed_p (loop))
294 while (l && l != loop)
296 if (optimize_loop_for_speed_p (l))
304 while (l != loop && !l->next)
313 /* Return TRUE when LOOP nest should be optimized for size. */
316 optimize_loop_nest_for_size_p (struct loop *loop)
318 return !optimize_loop_nest_for_speed_p (loop);
321 /* Return true when edge E is likely to be well predictable by branch
325 predictable_edge_p (edge e)
327 if (profile_status == PROFILE_ABSENT)
330 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
331 || (REG_BR_PROB_BASE - e->probability
332 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
338 /* Set RTL expansion for BB profile. */
341 rtl_profile_for_bb (basic_block bb)
343 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb);
346 /* Set RTL expansion for edge profile. */
349 rtl_profile_for_edge (edge e)
351 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
354 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
356 default_rtl_profile (void)
358 crtl->maybe_hot_insn_p = true;
361 /* Return true if the one of outgoing edges is already predicted by
365 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
368 if (!INSN_P (BB_END (bb)))
370 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
371 if (REG_NOTE_KIND (note) == REG_BR_PRED
372 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
377 /* This map contains for a basic block the list of predictions for the
380 static struct pointer_map_t *bb_predictions;
382 /* Return true if the one of outgoing edges is already predicted by
386 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
388 struct edge_prediction *i;
389 void **preds = pointer_map_contains (bb_predictions, bb);
394 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
395 if (i->ep_predictor == predictor)
400 /* Return true when the probability of edge is reliable.
402 The profile guessing code is good at predicting branch outcome (ie.
403 taken/not taken), that is predicted right slightly over 75% of time.
404 It is however notoriously poor on predicting the probability itself.
405 In general the profile appear a lot flatter (with probabilities closer
406 to 50%) than the reality so it is bad idea to use it to drive optimization
407 such as those disabling dynamic branch prediction for well predictable
410 There are two exceptions - edges leading to noreturn edges and edges
411 predicted by number of iterations heuristics are predicted well. This macro
412 should be able to distinguish those, but at the moment it simply check for
413 noreturn heuristic that is only one giving probability over 99% or bellow
414 1%. In future we might want to propagate reliability information across the
415 CFG if we find this information useful on multiple places. */
417 probability_reliable_p (int prob)
419 return (profile_status == PROFILE_READ
420 || (profile_status == PROFILE_GUESSED
421 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
424 /* Same predicate as above, working on edges. */
426 edge_probability_reliable_p (const_edge e)
428 return probability_reliable_p (e->probability);
431 /* Same predicate as edge_probability_reliable_p, working on notes. */
433 br_prob_note_reliable_p (const_rtx note)
435 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
436 return probability_reliable_p (INTVAL (XEXP (note, 0)));
440 predict_insn (rtx insn, enum br_predictor predictor, int probability)
442 gcc_assert (any_condjump_p (insn));
443 if (!flag_guess_branch_prob)
446 add_reg_note (insn, REG_BR_PRED,
447 gen_rtx_CONCAT (VOIDmode,
448 GEN_INT ((int) predictor),
449 GEN_INT ((int) probability)));
452 /* Predict insn by given predictor. */
455 predict_insn_def (rtx insn, enum br_predictor predictor,
456 enum prediction taken)
458 int probability = predictor_info[(int) predictor].hitrate;
461 probability = REG_BR_PROB_BASE - probability;
463 predict_insn (insn, predictor, probability);
466 /* Predict edge E with given probability if possible. */
469 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
472 last_insn = BB_END (e->src);
474 /* We can store the branch prediction information only about
475 conditional jumps. */
476 if (!any_condjump_p (last_insn))
479 /* We always store probability of branching. */
480 if (e->flags & EDGE_FALLTHRU)
481 probability = REG_BR_PROB_BASE - probability;
483 predict_insn (last_insn, predictor, probability);
486 /* Predict edge E with the given PROBABILITY. */
488 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
490 gcc_assert (profile_status != PROFILE_GUESSED);
491 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
492 && flag_guess_branch_prob && optimize)
494 struct edge_prediction *i = XNEW (struct edge_prediction);
495 void **preds = pointer_map_insert (bb_predictions, e->src);
497 i->ep_next = (struct edge_prediction *) *preds;
499 i->ep_probability = probability;
500 i->ep_predictor = predictor;
505 /* Remove all predictions on given basic block that are attached
508 remove_predictions_associated_with_edge (edge e)
515 preds = pointer_map_contains (bb_predictions, e->src);
519 struct edge_prediction **prediction = (struct edge_prediction **) preds;
520 struct edge_prediction *next;
524 if ((*prediction)->ep_edge == e)
526 next = (*prediction)->ep_next;
531 prediction = &((*prediction)->ep_next);
536 /* Clears the list of predictions stored for BB. */
539 clear_bb_predictions (basic_block bb)
541 void **preds = pointer_map_contains (bb_predictions, bb);
542 struct edge_prediction *pred, *next;
547 for (pred = (struct edge_prediction *) *preds; pred; pred = next)
549 next = pred->ep_next;
555 /* Return true when we can store prediction on insn INSN.
556 At the moment we represent predictions only on conditional
557 jumps, not at computed jump or other complicated cases. */
559 can_predict_insn_p (const_rtx insn)
561 return (JUMP_P (insn)
562 && any_condjump_p (insn)
563 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
566 /* Predict edge E by given predictor if possible. */
569 predict_edge_def (edge e, enum br_predictor predictor,
570 enum prediction taken)
572 int probability = predictor_info[(int) predictor].hitrate;
575 probability = REG_BR_PROB_BASE - probability;
577 predict_edge (e, predictor, probability);
580 /* Invert all branch predictions or probability notes in the INSN. This needs
581 to be done each time we invert the condition used by the jump. */
584 invert_br_probabilities (rtx insn)
588 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
589 if (REG_NOTE_KIND (note) == REG_BR_PROB)
590 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
591 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
592 XEXP (XEXP (note, 0), 1)
593 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
596 /* Dump information about the branch prediction to the output file. */
599 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
600 basic_block bb, int used)
608 FOR_EACH_EDGE (e, ei, bb->succs)
609 if (! (e->flags & EDGE_FALLTHRU))
612 fprintf (file, " %s heuristics%s: %.1f%%",
613 predictor_info[predictor].name,
614 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
618 fprintf (file, " exec ");
619 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
622 fprintf (file, " hit ");
623 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
624 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
628 fprintf (file, "\n");
631 /* We can not predict the probabilities of outgoing edges of bb. Set them
632 evenly and hope for the best. */
634 set_even_probabilities (basic_block bb)
640 FOR_EACH_EDGE (e, ei, bb->succs)
641 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
643 FOR_EACH_EDGE (e, ei, bb->succs)
644 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
645 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
650 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
651 note if not already present. Remove now useless REG_BR_PRED notes. */
654 combine_predictions_for_insn (rtx insn, basic_block bb)
659 int best_probability = PROB_EVEN;
660 int best_predictor = END_PREDICTORS;
661 int combined_probability = REG_BR_PROB_BASE / 2;
663 bool first_match = false;
666 if (!can_predict_insn_p (insn))
668 set_even_probabilities (bb);
672 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
673 pnote = ®_NOTES (insn);
675 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
678 /* We implement "first match" heuristics and use probability guessed
679 by predictor with smallest index. */
680 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
681 if (REG_NOTE_KIND (note) == REG_BR_PRED)
683 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
684 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
687 if (best_predictor > predictor)
688 best_probability = probability, best_predictor = predictor;
690 d = (combined_probability * probability
691 + (REG_BR_PROB_BASE - combined_probability)
692 * (REG_BR_PROB_BASE - probability));
694 /* Use FP math to avoid overflows of 32bit integers. */
696 /* If one probability is 0% and one 100%, avoid division by zero. */
697 combined_probability = REG_BR_PROB_BASE / 2;
699 combined_probability = (((double) combined_probability) * probability
700 * REG_BR_PROB_BASE / d + 0.5);
703 /* Decide which heuristic to use. In case we didn't match anything,
704 use no_prediction heuristic, in case we did match, use either
705 first match or Dempster-Shaffer theory depending on the flags. */
707 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
711 dump_prediction (dump_file, PRED_NO_PREDICTION,
712 combined_probability, bb, true);
715 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
717 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
722 combined_probability = best_probability;
723 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
727 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
729 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
730 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
732 dump_prediction (dump_file, predictor, probability, bb,
733 !first_match || best_predictor == predictor);
734 *pnote = XEXP (*pnote, 1);
737 pnote = &XEXP (*pnote, 1);
742 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability));
744 /* Save the prediction into CFG in case we are seeing non-degenerated
746 if (!single_succ_p (bb))
748 BRANCH_EDGE (bb)->probability = combined_probability;
749 FALLTHRU_EDGE (bb)->probability
750 = REG_BR_PROB_BASE - combined_probability;
753 else if (!single_succ_p (bb))
755 int prob = INTVAL (XEXP (prob_note, 0));
757 BRANCH_EDGE (bb)->probability = prob;
758 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
761 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
764 /* Combine predictions into single probability and store them into CFG.
765 Remove now useless prediction entries. */
768 combine_predictions_for_bb (basic_block bb)
770 int best_probability = PROB_EVEN;
771 int best_predictor = END_PREDICTORS;
772 int combined_probability = REG_BR_PROB_BASE / 2;
774 bool first_match = false;
776 struct edge_prediction *pred;
778 edge e, first = NULL, second = NULL;
782 FOR_EACH_EDGE (e, ei, bb->succs)
783 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
786 if (first && !second)
792 /* When there is no successor or only one choice, prediction is easy.
794 We are lazy for now and predict only basic blocks with two outgoing
795 edges. It is possible to predict generic case too, but we have to
796 ignore first match heuristics and do more involved combining. Implement
801 set_even_probabilities (bb);
802 clear_bb_predictions (bb);
804 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
810 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
812 preds = pointer_map_contains (bb_predictions, bb);
815 /* We implement "first match" heuristics and use probability guessed
816 by predictor with smallest index. */
817 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
819 int predictor = pred->ep_predictor;
820 int probability = pred->ep_probability;
822 if (pred->ep_edge != first)
823 probability = REG_BR_PROB_BASE - probability;
826 if (best_predictor > predictor)
827 best_probability = probability, best_predictor = predictor;
829 d = (combined_probability * probability
830 + (REG_BR_PROB_BASE - combined_probability)
831 * (REG_BR_PROB_BASE - probability));
833 /* Use FP math to avoid overflows of 32bit integers. */
835 /* If one probability is 0% and one 100%, avoid division by zero. */
836 combined_probability = REG_BR_PROB_BASE / 2;
838 combined_probability = (((double) combined_probability)
840 * REG_BR_PROB_BASE / d + 0.5);
844 /* Decide which heuristic to use. In case we didn't match anything,
845 use no_prediction heuristic, in case we did match, use either
846 first match or Dempster-Shaffer theory depending on the flags. */
848 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
852 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
855 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
857 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
862 combined_probability = best_probability;
863 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
867 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
869 int predictor = pred->ep_predictor;
870 int probability = pred->ep_probability;
872 if (pred->ep_edge != EDGE_SUCC (bb, 0))
873 probability = REG_BR_PROB_BASE - probability;
874 dump_prediction (dump_file, predictor, probability, bb,
875 !first_match || best_predictor == predictor);
878 clear_bb_predictions (bb);
882 first->probability = combined_probability;
883 second->probability = REG_BR_PROB_BASE - combined_probability;
887 /* Predict edge probabilities by exploiting loop structure. */
897 /* Try to predict out blocks in a loop that are not part of a
899 FOR_EACH_LOOP (li, loop, 0)
901 basic_block bb, *bbs;
903 VEC (edge, heap) *exits;
904 struct tree_niter_desc niter_desc;
907 exits = get_loop_exit_edges (loop);
908 n_exits = VEC_length (edge, exits);
910 for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
913 HOST_WIDE_INT nitercst;
914 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
916 enum br_predictor predictor;
918 if (number_of_iterations_exit (loop, ex, &niter_desc, false))
919 niter = niter_desc.niter;
920 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
921 niter = loop_niter_by_eval (loop, ex);
923 if (TREE_CODE (niter) == INTEGER_CST)
925 if (host_integerp (niter, 1)
926 && compare_tree_int (niter, max-1) == -1)
927 nitercst = tree_low_cst (niter, 1) + 1;
930 predictor = PRED_LOOP_ITERATIONS;
932 /* If we have just one exit and we can derive some information about
933 the number of iterations of the loop from the statements inside
934 the loop, use it to predict this exit. */
935 else if (n_exits == 1)
937 nitercst = estimated_loop_iterations_int (loop, false);
943 predictor = PRED_LOOP_ITERATIONS_GUESSED;
948 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
949 predict_edge (ex, predictor, probability);
951 VEC_free (edge, heap, exits);
953 bbs = get_loop_body (loop);
955 for (j = 0; j < loop->num_nodes; j++)
957 int header_found = 0;
963 /* Bypass loop heuristics on continue statement. These
964 statements construct loops via "non-loop" constructs
965 in the source language and are better to be handled
967 if (predicted_by_p (bb, PRED_CONTINUE))
970 /* Loop branch heuristics - predict an edge back to a
971 loop's head as taken. */
972 if (bb == loop->latch)
974 e = find_edge (loop->latch, loop->header);
978 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
982 /* Loop exit heuristics - predict an edge exiting the loop if the
983 conditional has no loop header successors as not taken. */
985 /* If we already used more reliable loop exit predictors, do not
986 bother with PRED_LOOP_EXIT. */
987 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
988 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
990 /* For loop with many exits we don't want to predict all exits
991 with the pretty large probability, because if all exits are
992 considered in row, the loop would be predicted to iterate
993 almost never. The code to divide probability by number of
994 exits is very rough. It should compute the number of exits
995 taken in each patch through function (not the overall number
996 of exits that might be a lot higher for loops with wide switch
997 statements in them) and compute n-th square root.
999 We limit the minimal probability by 2% to avoid
1000 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1001 as this was causing regression in perl benchmark containing such
1004 int probability = ((REG_BR_PROB_BASE
1005 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1007 if (probability < HITRATE (2))
1008 probability = HITRATE (2);
1009 FOR_EACH_EDGE (e, ei, bb->succs)
1010 if (e->dest->index < NUM_FIXED_BLOCKS
1011 || !flow_bb_inside_loop_p (loop, e->dest))
1012 predict_edge (e, PRED_LOOP_EXIT, probability);
1016 /* Free basic blocks from get_loop_body. */
1023 /* Attempt to predict probabilities of BB outgoing edges using local
1026 bb_estimate_probability_locally (basic_block bb)
1028 rtx last_insn = BB_END (bb);
1031 if (! can_predict_insn_p (last_insn))
1033 cond = get_condition (last_insn, NULL, false, false);
1037 /* Try "pointer heuristic."
1038 A comparison ptr == 0 is predicted as false.
1039 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1040 if (COMPARISON_P (cond)
1041 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1042 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1044 if (GET_CODE (cond) == EQ)
1045 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1046 else if (GET_CODE (cond) == NE)
1047 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1051 /* Try "opcode heuristic."
1052 EQ tests are usually false and NE tests are usually true. Also,
1053 most quantities are positive, so we can make the appropriate guesses
1054 about signed comparisons against zero. */
1055 switch (GET_CODE (cond))
1058 /* Unconditional branch. */
1059 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1060 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1065 /* Floating point comparisons appears to behave in a very
1066 unpredictable way because of special role of = tests in
1068 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1070 /* Comparisons with 0 are often used for booleans and there is
1071 nothing useful to predict about them. */
1072 else if (XEXP (cond, 1) == const0_rtx
1073 || XEXP (cond, 0) == const0_rtx)
1076 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1081 /* Floating point comparisons appears to behave in a very
1082 unpredictable way because of special role of = tests in
1084 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1086 /* Comparisons with 0 are often used for booleans and there is
1087 nothing useful to predict about them. */
1088 else if (XEXP (cond, 1) == const0_rtx
1089 || XEXP (cond, 0) == const0_rtx)
1092 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1096 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1100 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1105 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1106 || XEXP (cond, 1) == constm1_rtx)
1107 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1112 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1113 || XEXP (cond, 1) == constm1_rtx)
1114 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1122 /* Set edge->probability for each successor edge of BB. */
1124 guess_outgoing_edge_probabilities (basic_block bb)
1126 bb_estimate_probability_locally (bb);
1127 combine_predictions_for_insn (BB_END (bb), bb);
1130 static tree expr_expected_value (tree, bitmap);
1132 /* Helper function for expr_expected_value. */
1135 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited)
1139 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1141 if (TREE_CONSTANT (op0))
1144 if (code != SSA_NAME)
1147 def = SSA_NAME_DEF_STMT (op0);
1149 /* If we were already here, break the infinite cycle. */
1150 if (bitmap_bit_p (visited, SSA_NAME_VERSION (op0)))
1152 bitmap_set_bit (visited, SSA_NAME_VERSION (op0));
1154 if (gimple_code (def) == GIMPLE_PHI)
1156 /* All the arguments of the PHI node must have the same constant
1158 int i, n = gimple_phi_num_args (def);
1159 tree val = NULL, new_val;
1161 for (i = 0; i < n; i++)
1163 tree arg = PHI_ARG_DEF (def, i);
1165 /* If this PHI has itself as an argument, we cannot
1166 determine the string length of this argument. However,
1167 if we can find an expected constant value for the other
1168 PHI args then we can still be sure that this is
1169 likely a constant. So be optimistic and just
1170 continue with the next argument. */
1171 if (arg == PHI_RESULT (def))
1174 new_val = expr_expected_value (arg, visited);
1179 else if (!operand_equal_p (val, new_val, false))
1184 if (is_gimple_assign (def))
1186 if (gimple_assign_lhs (def) != op0)
1189 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1190 gimple_assign_rhs1 (def),
1191 gimple_assign_rhs_code (def),
1192 gimple_assign_rhs2 (def),
1196 if (is_gimple_call (def))
1198 tree decl = gimple_call_fndecl (def);
1201 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
1202 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
1206 if (gimple_call_num_args (def) != 2)
1208 val = gimple_call_arg (def, 0);
1209 if (TREE_CONSTANT (val))
1211 return gimple_call_arg (def, 1);
1218 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1221 op0 = expr_expected_value (op0, visited);
1224 op1 = expr_expected_value (op1, visited);
1227 res = fold_build2 (code, type, op0, op1);
1228 if (TREE_CONSTANT (res))
1232 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1235 op0 = expr_expected_value (op0, visited);
1238 res = fold_build1 (code, type, op0);
1239 if (TREE_CONSTANT (res))
1246 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1247 The function is used by builtin_expect branch predictor so the evidence
1248 must come from this construct and additional possible constant folding.
1250 We may want to implement more involved value guess (such as value range
1251 propagation based prediction), but such tricks shall go to new
1255 expr_expected_value (tree expr, bitmap visited)
1257 enum tree_code code;
1260 if (TREE_CONSTANT (expr))
1263 extract_ops_from_tree (expr, &code, &op0, &op1);
1264 return expr_expected_value_1 (TREE_TYPE (expr),
1265 op0, code, op1, visited);
1269 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1270 we no longer need. */
1272 strip_predict_hints (void)
1280 gimple_stmt_iterator bi;
1281 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1283 gimple stmt = gsi_stmt (bi);
1285 if (gimple_code (stmt) == GIMPLE_PREDICT)
1287 gsi_remove (&bi, true);
1290 else if (gimple_code (stmt) == GIMPLE_CALL)
1292 tree fndecl = gimple_call_fndecl (stmt);
1295 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1296 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1297 && gimple_call_num_args (stmt) == 2)
1299 var = gimple_call_lhs (stmt);
1300 ass_stmt = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1302 gsi_replace (&bi, ass_stmt, true);
1311 /* Predict using opcode of the last statement in basic block. */
1313 tree_predict_by_opcode (basic_block bb)
1315 gimple stmt = last_stmt (bb);
1324 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1326 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1327 if (then_edge->flags & EDGE_TRUE_VALUE)
1329 op0 = gimple_cond_lhs (stmt);
1330 op1 = gimple_cond_rhs (stmt);
1331 cmp = gimple_cond_code (stmt);
1332 type = TREE_TYPE (op0);
1333 visited = BITMAP_ALLOC (NULL);
1334 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
1335 BITMAP_FREE (visited);
1338 if (integer_zerop (val))
1339 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1341 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1344 /* Try "pointer heuristic."
1345 A comparison ptr == 0 is predicted as false.
1346 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1347 if (POINTER_TYPE_P (type))
1350 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1351 else if (cmp == NE_EXPR)
1352 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1356 /* Try "opcode heuristic."
1357 EQ tests are usually false and NE tests are usually true. Also,
1358 most quantities are positive, so we can make the appropriate guesses
1359 about signed comparisons against zero. */
1364 /* Floating point comparisons appears to behave in a very
1365 unpredictable way because of special role of = tests in
1367 if (FLOAT_TYPE_P (type))
1369 /* Comparisons with 0 are often used for booleans and there is
1370 nothing useful to predict about them. */
1371 else if (integer_zerop (op0) || integer_zerop (op1))
1374 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1379 /* Floating point comparisons appears to behave in a very
1380 unpredictable way because of special role of = tests in
1382 if (FLOAT_TYPE_P (type))
1384 /* Comparisons with 0 are often used for booleans and there is
1385 nothing useful to predict about them. */
1386 else if (integer_zerop (op0)
1387 || integer_zerop (op1))
1390 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1394 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1397 case UNORDERED_EXPR:
1398 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1403 if (integer_zerop (op1)
1404 || integer_onep (op1)
1405 || integer_all_onesp (op1)
1408 || real_minus_onep (op1))
1409 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1414 if (integer_zerop (op1)
1415 || integer_onep (op1)
1416 || integer_all_onesp (op1)
1419 || real_minus_onep (op1))
1420 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1428 /* Try to guess whether the value of return means error code. */
1430 static enum br_predictor
1431 return_prediction (tree val, enum prediction *prediction)
1435 return PRED_NO_PREDICTION;
1436 /* Different heuristics for pointers and scalars. */
1437 if (POINTER_TYPE_P (TREE_TYPE (val)))
1439 /* NULL is usually not returned. */
1440 if (integer_zerop (val))
1442 *prediction = NOT_TAKEN;
1443 return PRED_NULL_RETURN;
1446 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1448 /* Negative return values are often used to indicate
1450 if (TREE_CODE (val) == INTEGER_CST
1451 && tree_int_cst_sgn (val) < 0)
1453 *prediction = NOT_TAKEN;
1454 return PRED_NEGATIVE_RETURN;
1456 /* Constant return values seems to be commonly taken.
1457 Zero/one often represent booleans so exclude them from the
1459 if (TREE_CONSTANT (val)
1460 && (!integer_zerop (val) && !integer_onep (val)))
1462 *prediction = TAKEN;
1463 return PRED_CONST_RETURN;
1466 return PRED_NO_PREDICTION;
1469 /* Find the basic block with return expression and look up for possible
1470 return value trying to apply RETURN_PREDICTION heuristics. */
1472 apply_return_prediction (void)
1474 gimple return_stmt = NULL;
1478 int phi_num_args, i;
1479 enum br_predictor pred;
1480 enum prediction direction;
1483 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1485 return_stmt = last_stmt (e->src);
1487 && gimple_code (return_stmt) == GIMPLE_RETURN)
1492 return_val = gimple_return_retval (return_stmt);
1495 if (TREE_CODE (return_val) != SSA_NAME
1496 || !SSA_NAME_DEF_STMT (return_val)
1497 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
1499 phi = SSA_NAME_DEF_STMT (return_val);
1500 phi_num_args = gimple_phi_num_args (phi);
1501 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1503 /* Avoid the degenerate case where all return values form the function
1504 belongs to same category (ie they are all positive constants)
1505 so we can hardly say something about them. */
1506 for (i = 1; i < phi_num_args; i++)
1507 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1509 if (i != phi_num_args)
1510 for (i = 0; i < phi_num_args; i++)
1512 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1513 if (pred != PRED_NO_PREDICTION)
1514 predict_paths_leading_to (gimple_phi_arg_edge (phi, i)->src, pred,
1519 /* Look for basic block that contains unlikely to happen events
1520 (such as noreturn calls) and mark all paths leading to execution
1521 of this basic blocks as unlikely. */
1524 tree_bb_level_predictions (void)
1528 apply_return_prediction ();
1532 gimple_stmt_iterator gsi;
1534 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1536 gimple stmt = gsi_stmt (gsi);
1539 if (is_gimple_call (stmt))
1541 if (gimple_call_flags (stmt) & ECF_NORETURN)
1542 predict_paths_leading_to (bb, PRED_NORETURN,
1544 decl = gimple_call_fndecl (stmt);
1546 && lookup_attribute ("cold",
1547 DECL_ATTRIBUTES (decl)))
1548 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
1551 else if (gimple_code (stmt) == GIMPLE_PREDICT)
1553 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
1554 gimple_predict_outcome (stmt));
1555 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1556 hints to callers. */
1562 #ifdef ENABLE_CHECKING
1564 /* Callback for pointer_map_traverse, asserts that the pointer map is
1568 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
1569 void *data ATTRIBUTE_UNUSED)
1571 gcc_assert (!*value);
1576 /* Predict branch probabilities and estimate profile of the tree CFG. */
1578 tree_estimate_probability (void)
1582 loop_optimizer_init (0);
1583 if (dump_file && (dump_flags & TDF_DETAILS))
1584 flow_loops_dump (dump_file, NULL, 0);
1586 add_noreturn_fake_exit_edges ();
1587 connect_infinite_loops_to_exit ();
1588 /* We use loop_niter_by_eval, which requires that the loops have
1590 create_preheaders (CP_SIMPLE_PREHEADERS);
1591 calculate_dominance_info (CDI_POST_DOMINATORS);
1593 bb_predictions = pointer_map_create ();
1594 tree_bb_level_predictions ();
1596 mark_irreducible_loops ();
1597 record_loop_exits ();
1598 if (number_of_loops () > 1)
1606 FOR_EACH_EDGE (e, ei, bb->succs)
1608 /* Predict early returns to be probable, as we've already taken
1609 care for error returns and other cases are often used for
1610 fast paths through function.
1612 Since we've already removed the return statements, we are
1613 looking for CFG like:
1623 if (e->dest != bb->next_bb
1624 && e->dest != EXIT_BLOCK_PTR
1625 && single_succ_p (e->dest)
1626 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
1627 && gimple_code (last_stmt (e->dest)) == GIMPLE_RETURN)
1632 if (single_succ_p (bb))
1634 FOR_EACH_EDGE (e1, ei1, bb->preds)
1635 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1636 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1637 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
1638 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1641 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
1642 && !predicted_by_p (e->src, PRED_CONST_RETURN)
1643 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
1644 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1647 /* Look for block we are guarding (ie we dominate it,
1648 but it doesn't postdominate us). */
1649 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1650 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1651 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1653 gimple_stmt_iterator bi;
1655 /* The call heuristic claims that a guarded function call
1656 is improbable. This is because such calls are often used
1657 to signal exceptional situations such as printing error
1659 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
1662 gimple stmt = gsi_stmt (bi);
1663 if (is_gimple_call (stmt)
1664 /* Constant and pure calls are hardly used to signalize
1665 something exceptional. */
1666 && gimple_has_side_effects (stmt))
1668 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1674 tree_predict_by_opcode (bb);
1677 combine_predictions_for_bb (bb);
1679 #ifdef ENABLE_CHECKING
1680 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
1682 pointer_map_destroy (bb_predictions);
1683 bb_predictions = NULL;
1685 estimate_bb_frequencies ();
1686 free_dominance_info (CDI_POST_DOMINATORS);
1687 remove_fake_exit_edges ();
1688 loop_optimizer_finalize ();
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1690 gimple_dump_cfg (dump_file, dump_flags);
1691 if (profile_status == PROFILE_ABSENT)
1692 profile_status = PROFILE_GUESSED;
1696 /* Predict edges to successors of CUR whose sources are not postdominated by
1697 BB by PRED and recurse to all postdominators. */
1700 predict_paths_for_bb (basic_block cur, basic_block bb,
1701 enum br_predictor pred,
1702 enum prediction taken)
1708 /* We are looking for all edges forming edge cut induced by
1709 set of all blocks postdominated by BB. */
1710 FOR_EACH_EDGE (e, ei, cur->preds)
1711 if (e->src->index >= NUM_FIXED_BLOCKS
1712 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
1714 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
1715 predict_edge_def (e, pred, taken);
1717 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
1719 son = next_dom_son (CDI_POST_DOMINATORS, son))
1720 predict_paths_for_bb (son, bb, pred, taken);
1723 /* Sets branch probabilities according to PREDiction and
1727 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
1728 enum prediction taken)
1730 predict_paths_for_bb (bb, bb, pred, taken);
1733 /* This is used to carry information about basic blocks. It is
1734 attached to the AUX field of the standard CFG block. */
1736 typedef struct block_info_def
1738 /* Estimated frequency of execution of basic_block. */
1741 /* To keep queue of basic blocks to process. */
1744 /* Number of predecessors we need to visit first. */
1748 /* Similar information for edges. */
1749 typedef struct edge_info_def
1751 /* In case edge is a loopback edge, the probability edge will be reached
1752 in case header is. Estimated number of iterations of the loop can be
1753 then computed as 1 / (1 - back_edge_prob). */
1754 sreal back_edge_prob;
1755 /* True if the edge is a loopback edge in the natural loop. */
1756 unsigned int back_edge:1;
1759 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1760 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1762 /* Helper function for estimate_bb_frequencies.
1763 Propagate the frequencies in blocks marked in
1764 TOVISIT, starting in HEAD. */
1767 propagate_freq (basic_block head, bitmap tovisit)
1776 /* For each basic block we need to visit count number of his predecessors
1777 we need to visit first. */
1778 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1783 /* The outermost "loop" includes the exit block, which we can not
1784 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1785 directly. Do the same for the entry block. */
1786 bb = BASIC_BLOCK (i);
1788 FOR_EACH_EDGE (e, ei, bb->preds)
1790 bool visit = bitmap_bit_p (tovisit, e->src->index);
1792 if (visit && !(e->flags & EDGE_DFS_BACK))
1794 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1796 "Irreducible region hit, ignoring edge to %i->%i\n",
1797 e->src->index, bb->index);
1799 BLOCK_INFO (bb)->npredecessors = count;
1802 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1804 for (bb = head; bb; bb = nextbb)
1807 sreal cyclic_probability, frequency;
1809 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1810 memcpy (&frequency, &real_zero, sizeof (real_zero));
1812 nextbb = BLOCK_INFO (bb)->next;
1813 BLOCK_INFO (bb)->next = NULL;
1815 /* Compute frequency of basic block. */
1818 #ifdef ENABLE_CHECKING
1819 FOR_EACH_EDGE (e, ei, bb->preds)
1820 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1821 || (e->flags & EDGE_DFS_BACK));
1824 FOR_EACH_EDGE (e, ei, bb->preds)
1825 if (EDGE_INFO (e)->back_edge)
1827 sreal_add (&cyclic_probability, &cyclic_probability,
1828 &EDGE_INFO (e)->back_edge_prob);
1830 else if (!(e->flags & EDGE_DFS_BACK))
1834 /* frequency += (e->probability
1835 * BLOCK_INFO (e->src)->frequency /
1836 REG_BR_PROB_BASE); */
1838 sreal_init (&tmp, e->probability, 0);
1839 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1840 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1841 sreal_add (&frequency, &frequency, &tmp);
1844 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1846 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1847 sizeof (frequency));
1851 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1853 memcpy (&cyclic_probability, &real_almost_one,
1854 sizeof (real_almost_one));
1857 /* BLOCK_INFO (bb)->frequency = frequency
1858 / (1 - cyclic_probability) */
1860 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1861 sreal_div (&BLOCK_INFO (bb)->frequency,
1862 &frequency, &cyclic_probability);
1866 bitmap_clear_bit (tovisit, bb->index);
1868 e = find_edge (bb, head);
1873 /* EDGE_INFO (e)->back_edge_prob
1874 = ((e->probability * BLOCK_INFO (bb)->frequency)
1875 / REG_BR_PROB_BASE); */
1877 sreal_init (&tmp, e->probability, 0);
1878 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1879 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1880 &tmp, &real_inv_br_prob_base);
1883 /* Propagate to successor blocks. */
1884 FOR_EACH_EDGE (e, ei, bb->succs)
1885 if (!(e->flags & EDGE_DFS_BACK)
1886 && BLOCK_INFO (e->dest)->npredecessors)
1888 BLOCK_INFO (e->dest)->npredecessors--;
1889 if (!BLOCK_INFO (e->dest)->npredecessors)
1894 BLOCK_INFO (last)->next = e->dest;
1902 /* Estimate probabilities of loopback edges in loops at same nest level. */
1905 estimate_loops_at_level (struct loop *first_loop)
1909 for (loop = first_loop; loop; loop = loop->next)
1914 bitmap tovisit = BITMAP_ALLOC (NULL);
1916 estimate_loops_at_level (loop->inner);
1918 /* Find current loop back edge and mark it. */
1919 e = loop_latch_edge (loop);
1920 EDGE_INFO (e)->back_edge = 1;
1922 bbs = get_loop_body (loop);
1923 for (i = 0; i < loop->num_nodes; i++)
1924 bitmap_set_bit (tovisit, bbs[i]->index);
1926 propagate_freq (loop->header, tovisit);
1927 BITMAP_FREE (tovisit);
1931 /* Propagates frequencies through structure of loops. */
1934 estimate_loops (void)
1936 bitmap tovisit = BITMAP_ALLOC (NULL);
1939 /* Start by estimating the frequencies in the loops. */
1940 if (number_of_loops () > 1)
1941 estimate_loops_at_level (current_loops->tree_root->inner);
1943 /* Now propagate the frequencies through all the blocks. */
1946 bitmap_set_bit (tovisit, bb->index);
1948 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1949 BITMAP_FREE (tovisit);
1952 /* Convert counts measured by profile driven feedback to frequencies.
1953 Return nonzero iff there was any nonzero execution count. */
1956 counts_to_freqs (void)
1958 gcov_type count_max, true_count_max = 0;
1962 true_count_max = MAX (bb->count, true_count_max);
1964 count_max = MAX (true_count_max, 1);
1965 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1966 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1968 return true_count_max;
1971 /* Return true if function is likely to be expensive, so there is no point to
1972 optimize performance of prologue, epilogue or do inlining at the expense
1973 of code size growth. THRESHOLD is the limit of number of instructions
1974 function can execute at average to be still considered not expensive. */
1977 expensive_function_p (int threshold)
1979 unsigned int sum = 0;
1983 /* We can not compute accurately for large thresholds due to scaled
1985 gcc_assert (threshold <= BB_FREQ_MAX);
1987 /* Frequencies are out of range. This either means that function contains
1988 internal loop executing more than BB_FREQ_MAX times or profile feedback
1989 is available and function has not been executed at all. */
1990 if (ENTRY_BLOCK_PTR->frequency == 0)
1993 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1994 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1999 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2000 insn = NEXT_INSN (insn))
2001 if (active_insn_p (insn))
2003 sum += bb->frequency;
2012 /* Estimate basic blocks frequency by given branch probabilities. */
2015 estimate_bb_frequencies (void)
2020 if (!flag_branch_probabilities || !counts_to_freqs ())
2022 static int real_values_initialized = 0;
2024 if (!real_values_initialized)
2026 real_values_initialized = 1;
2027 sreal_init (&real_zero, 0, 0);
2028 sreal_init (&real_one, 1, 0);
2029 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2030 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2031 sreal_init (&real_one_half, 1, -1);
2032 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2033 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2036 mark_dfs_back_edges ();
2038 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2040 /* Set up block info for each basic block. */
2041 alloc_aux_for_blocks (sizeof (struct block_info_def));
2042 alloc_aux_for_edges (sizeof (struct edge_info_def));
2043 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2048 FOR_EACH_EDGE (e, ei, bb->succs)
2050 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2051 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2052 &EDGE_INFO (e)->back_edge_prob,
2053 &real_inv_br_prob_base);
2057 /* First compute probabilities locally for each loop from innermost
2058 to outermost to examine probabilities for back edges. */
2061 memcpy (&freq_max, &real_zero, sizeof (real_zero));
2063 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2064 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2066 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2067 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2071 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2072 sreal_add (&tmp, &tmp, &real_one_half);
2073 bb->frequency = sreal_to_int (&tmp);
2076 free_aux_for_blocks ();
2077 free_aux_for_edges ();
2079 compute_function_frequency ();
2080 if (flag_reorder_functions)
2081 choose_function_section ();
2084 /* Decide whether function is hot, cold or unlikely executed. */
2086 compute_function_frequency (void)
2090 if (!profile_info || !flag_branch_probabilities)
2092 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2094 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2095 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2097 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2100 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2103 if (maybe_hot_bb_p (bb))
2105 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2108 if (!probably_never_executed_bb_p (bb))
2109 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2113 /* Choose appropriate section for the function. */
2115 choose_function_section (void)
2117 if (DECL_SECTION_NAME (current_function_decl)
2118 || !targetm.have_named_sections
2119 /* Theoretically we can split the gnu.linkonce text section too,
2120 but this requires more work as the frequency needs to match
2121 for all generated objects so we need to merge the frequency
2122 of all instances. For now just never set frequency for these. */
2123 || DECL_ONE_ONLY (current_function_decl))
2126 /* If we are doing the partitioning optimization, let the optimization
2127 choose the correct section into which to put things. */
2129 if (flag_reorder_blocks_and_partition)
2132 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
2133 DECL_SECTION_NAME (current_function_decl) =
2134 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
2135 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
2136 DECL_SECTION_NAME (current_function_decl) =
2137 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
2138 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
2142 gate_estimate_probability (void)
2144 return flag_guess_branch_prob;
2147 /* Build PREDICT_EXPR. */
2149 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2151 tree t = build1 (PREDICT_EXPR, void_type_node,
2152 build_int_cst (NULL, predictor));
2153 PREDICT_EXPR_OUTCOME (t) = taken;
2158 predictor_name (enum br_predictor predictor)
2160 return predictor_info[predictor].name;
2163 struct gimple_opt_pass pass_profile =
2167 "profile", /* name */
2168 gate_estimate_probability, /* gate */
2169 tree_estimate_probability, /* execute */
2172 0, /* static_pass_number */
2173 TV_BRANCH_PROB, /* tv_id */
2174 PROP_cfg, /* properties_required */
2175 0, /* properties_provided */
2176 0, /* properties_destroyed */
2177 0, /* todo_flags_start */
2178 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
2182 struct gimple_opt_pass pass_strip_predict_hints =
2188 strip_predict_hints, /* execute */
2191 0, /* static_pass_number */
2192 TV_BRANCH_PROB, /* tv_id */
2193 PROP_cfg, /* properties_required */
2194 0, /* properties_provided */
2195 0, /* properties_destroyed */
2196 0, /* todo_flags_start */
2197 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */