1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
69 /* Random guesstimation given names. */
70 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
71 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
72 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73 #define PROB_ALWAYS (REG_BR_PROB_BASE)
75 static void combine_predictions_for_insn (rtx, basic_block);
76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
78 static void compute_function_frequency (void);
79 static void choose_function_section (void);
80 static bool can_predict_insn_p (const_rtx);
82 /* Information we hold about each branch predictor.
83 Filled using information from predict.def. */
87 const char *const name; /* Name used in the debugging dumps. */
88 const int hitrate; /* Expected hitrate used by
89 predict_insn_def call. */
93 /* Use given predictor without Dempster-Shaffer theory if it matches
94 using first_match heuristics. */
95 #define PRED_FLAG_FIRST_MATCH 1
97 /* Recompute hitrate in percent to our representation. */
99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
102 static const struct predictor_info predictor_info[]= {
103 #include "predict.def"
105 /* Upper bound on predictors. */
110 /* Return TRUE if frequency FREQ is considered to be hot. */
112 maybe_hot_frequency_p (int freq)
114 if (!profile_info || !flag_branch_probabilities)
116 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
118 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
121 if (profile_status == PROFILE_ABSENT)
123 if (freq < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
128 /* Return true in case BB can be CPU intensive and should be optimized
129 for maximal performance. */
132 maybe_hot_bb_p (const_basic_block bb)
134 if (profile_info && flag_branch_probabilities
136 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
138 return maybe_hot_frequency_p (bb->frequency);
141 /* Return true if the call can be hot. */
144 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
146 if (profile_info && flag_branch_probabilities
148 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
150 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl))
151 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl)))
153 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)))
155 if (flag_guess_branch_prob
156 && edge->frequency < (CGRAPH_FREQ_MAX
157 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
162 /* Return true in case BB can be CPU intensive and should be optimized
163 for maximal performance. */
166 maybe_hot_edge_p (edge e)
168 if (profile_info && flag_branch_probabilities
170 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
172 return maybe_hot_frequency_p (EDGE_FREQUENCY (e));
175 /* Return true in case BB is cold and should be optimized for size. */
178 probably_cold_bb_p (const_basic_block bb)
180 if (profile_info && flag_branch_probabilities
182 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
184 if ((!profile_info || !flag_branch_probabilities)
185 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
187 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
192 /* Return true in case BB is probably never executed. */
194 probably_never_executed_bb_p (const_basic_block bb)
196 if (profile_info && flag_branch_probabilities)
197 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
198 if ((!profile_info || !flag_branch_probabilities)
199 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
204 /* Return true when current function should always be optimized for size. */
207 optimize_function_for_size_p (struct function *fun)
209 return (optimize_size
210 || fun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED);
213 /* Return true when current function should always be optimized for speed. */
216 optimize_function_for_speed_p (struct function *fun)
218 return !optimize_function_for_size_p (fun);
221 /* Return TRUE when BB should be optimized for size. */
224 optimize_bb_for_size_p (const_basic_block bb)
226 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb);
229 /* Return TRUE when BB should be optimized for speed. */
232 optimize_bb_for_speed_p (const_basic_block bb)
234 return !optimize_bb_for_size_p (bb);
237 /* Return TRUE when BB should be optimized for size. */
240 optimize_edge_for_size_p (edge e)
242 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
245 /* Return TRUE when BB should be optimized for speed. */
248 optimize_edge_for_speed_p (edge e)
250 return !optimize_edge_for_size_p (e);
253 /* Return TRUE when BB should be optimized for size. */
256 optimize_insn_for_size_p (void)
258 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
261 /* Return TRUE when BB should be optimized for speed. */
264 optimize_insn_for_speed_p (void)
266 return !optimize_insn_for_size_p ();
269 /* Return TRUE when LOOP should be optimized for size. */
272 optimize_loop_for_size_p (struct loop *loop)
274 return optimize_bb_for_size_p (loop->header);
277 /* Return TRUE when LOOP should be optimized for speed. */
280 optimize_loop_for_speed_p (struct loop *loop)
282 return optimize_bb_for_speed_p (loop->header);
285 /* Return TRUE when LOOP nest should be optimized for speed. */
288 optimize_loop_nest_for_speed_p (struct loop *loop)
290 struct loop *l = loop;
291 if (optimize_loop_for_speed_p (loop))
294 while (l && l != loop)
296 if (optimize_loop_for_speed_p (l))
308 /* Return TRUE when LOOP nest should be optimized for size. */
311 optimize_loop_nest_for_size_p (struct loop *loop)
313 return !optimize_loop_nest_for_speed_p (loop);
316 /* Set RTL expansion for BB profile. */
319 rtl_profile_for_bb (basic_block bb)
321 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb);
324 /* Set RTL expansion for edge profile. */
327 rtl_profile_for_edge (edge e)
329 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
332 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
334 default_rtl_profile (void)
336 crtl->maybe_hot_insn_p = true;
339 /* Return true if the one of outgoing edges is already predicted by
343 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
346 if (!INSN_P (BB_END (bb)))
348 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
349 if (REG_NOTE_KIND (note) == REG_BR_PRED
350 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
355 /* This map contains for a basic block the list of predictions for the
358 static struct pointer_map_t *bb_predictions;
360 /* Return true if the one of outgoing edges is already predicted by
364 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
366 struct edge_prediction *i;
367 void **preds = pointer_map_contains (bb_predictions, bb);
372 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
373 if (i->ep_predictor == predictor)
378 /* Return true when the probability of edge is reliable.
380 The profile guessing code is good at predicting branch outcome (ie.
381 taken/not taken), that is predicted right slightly over 75% of time.
382 It is however notoriously poor on predicting the probability itself.
383 In general the profile appear a lot flatter (with probabilities closer
384 to 50%) than the reality so it is bad idea to use it to drive optimization
385 such as those disabling dynamic branch prediction for well predictable
388 There are two exceptions - edges leading to noreturn edges and edges
389 predicted by number of iterations heuristics are predicted well. This macro
390 should be able to distinguish those, but at the moment it simply check for
391 noreturn heuristic that is only one giving probability over 99% or bellow
392 1%. In future we might want to propagate reliability information across the
393 CFG if we find this information useful on multiple places. */
395 probability_reliable_p (int prob)
397 return (profile_status == PROFILE_READ
398 || (profile_status == PROFILE_GUESSED
399 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
402 /* Same predicate as above, working on edges. */
404 edge_probability_reliable_p (const_edge e)
406 return probability_reliable_p (e->probability);
409 /* Same predicate as edge_probability_reliable_p, working on notes. */
411 br_prob_note_reliable_p (const_rtx note)
413 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
414 return probability_reliable_p (INTVAL (XEXP (note, 0)));
418 predict_insn (rtx insn, enum br_predictor predictor, int probability)
420 gcc_assert (any_condjump_p (insn));
421 if (!flag_guess_branch_prob)
424 add_reg_note (insn, REG_BR_PRED,
425 gen_rtx_CONCAT (VOIDmode,
426 GEN_INT ((int) predictor),
427 GEN_INT ((int) probability)));
430 /* Predict insn by given predictor. */
433 predict_insn_def (rtx insn, enum br_predictor predictor,
434 enum prediction taken)
436 int probability = predictor_info[(int) predictor].hitrate;
439 probability = REG_BR_PROB_BASE - probability;
441 predict_insn (insn, predictor, probability);
444 /* Predict edge E with given probability if possible. */
447 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
450 last_insn = BB_END (e->src);
452 /* We can store the branch prediction information only about
453 conditional jumps. */
454 if (!any_condjump_p (last_insn))
457 /* We always store probability of branching. */
458 if (e->flags & EDGE_FALLTHRU)
459 probability = REG_BR_PROB_BASE - probability;
461 predict_insn (last_insn, predictor, probability);
464 /* Predict edge E with the given PROBABILITY. */
466 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
468 gcc_assert (profile_status != PROFILE_GUESSED);
469 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
470 && flag_guess_branch_prob && optimize)
472 struct edge_prediction *i = XNEW (struct edge_prediction);
473 void **preds = pointer_map_insert (bb_predictions, e->src);
475 i->ep_next = (struct edge_prediction *) *preds;
477 i->ep_probability = probability;
478 i->ep_predictor = predictor;
483 /* Remove all predictions on given basic block that are attached
486 remove_predictions_associated_with_edge (edge e)
493 preds = pointer_map_contains (bb_predictions, e->src);
497 struct edge_prediction **prediction = (struct edge_prediction **) preds;
498 struct edge_prediction *next;
502 if ((*prediction)->ep_edge == e)
504 next = (*prediction)->ep_next;
509 prediction = &((*prediction)->ep_next);
514 /* Clears the list of predictions stored for BB. */
517 clear_bb_predictions (basic_block bb)
519 void **preds = pointer_map_contains (bb_predictions, bb);
520 struct edge_prediction *pred, *next;
525 for (pred = (struct edge_prediction *) *preds; pred; pred = next)
527 next = pred->ep_next;
533 /* Return true when we can store prediction on insn INSN.
534 At the moment we represent predictions only on conditional
535 jumps, not at computed jump or other complicated cases. */
537 can_predict_insn_p (const_rtx insn)
539 return (JUMP_P (insn)
540 && any_condjump_p (insn)
541 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
544 /* Predict edge E by given predictor if possible. */
547 predict_edge_def (edge e, enum br_predictor predictor,
548 enum prediction taken)
550 int probability = predictor_info[(int) predictor].hitrate;
553 probability = REG_BR_PROB_BASE - probability;
555 predict_edge (e, predictor, probability);
558 /* Invert all branch predictions or probability notes in the INSN. This needs
559 to be done each time we invert the condition used by the jump. */
562 invert_br_probabilities (rtx insn)
566 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
567 if (REG_NOTE_KIND (note) == REG_BR_PROB)
568 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
569 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
570 XEXP (XEXP (note, 0), 1)
571 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
574 /* Dump information about the branch prediction to the output file. */
577 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
578 basic_block bb, int used)
586 FOR_EACH_EDGE (e, ei, bb->succs)
587 if (! (e->flags & EDGE_FALLTHRU))
590 fprintf (file, " %s heuristics%s: %.1f%%",
591 predictor_info[predictor].name,
592 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
596 fprintf (file, " exec ");
597 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
600 fprintf (file, " hit ");
601 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
602 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
606 fprintf (file, "\n");
609 /* We can not predict the probabilities of outgoing edges of bb. Set them
610 evenly and hope for the best. */
612 set_even_probabilities (basic_block bb)
618 FOR_EACH_EDGE (e, ei, bb->succs)
619 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
621 FOR_EACH_EDGE (e, ei, bb->succs)
622 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
623 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
628 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
629 note if not already present. Remove now useless REG_BR_PRED notes. */
632 combine_predictions_for_insn (rtx insn, basic_block bb)
637 int best_probability = PROB_EVEN;
638 int best_predictor = END_PREDICTORS;
639 int combined_probability = REG_BR_PROB_BASE / 2;
641 bool first_match = false;
644 if (!can_predict_insn_p (insn))
646 set_even_probabilities (bb);
650 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
651 pnote = ®_NOTES (insn);
653 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
656 /* We implement "first match" heuristics and use probability guessed
657 by predictor with smallest index. */
658 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
659 if (REG_NOTE_KIND (note) == REG_BR_PRED)
661 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
662 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
665 if (best_predictor > predictor)
666 best_probability = probability, best_predictor = predictor;
668 d = (combined_probability * probability
669 + (REG_BR_PROB_BASE - combined_probability)
670 * (REG_BR_PROB_BASE - probability));
672 /* Use FP math to avoid overflows of 32bit integers. */
674 /* If one probability is 0% and one 100%, avoid division by zero. */
675 combined_probability = REG_BR_PROB_BASE / 2;
677 combined_probability = (((double) combined_probability) * probability
678 * REG_BR_PROB_BASE / d + 0.5);
681 /* Decide which heuristic to use. In case we didn't match anything,
682 use no_prediction heuristic, in case we did match, use either
683 first match or Dempster-Shaffer theory depending on the flags. */
685 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
689 dump_prediction (dump_file, PRED_NO_PREDICTION,
690 combined_probability, bb, true);
693 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
695 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
700 combined_probability = best_probability;
701 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
705 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
707 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
708 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
710 dump_prediction (dump_file, predictor, probability, bb,
711 !first_match || best_predictor == predictor);
712 *pnote = XEXP (*pnote, 1);
715 pnote = &XEXP (*pnote, 1);
720 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability));
722 /* Save the prediction into CFG in case we are seeing non-degenerated
724 if (!single_succ_p (bb))
726 BRANCH_EDGE (bb)->probability = combined_probability;
727 FALLTHRU_EDGE (bb)->probability
728 = REG_BR_PROB_BASE - combined_probability;
731 else if (!single_succ_p (bb))
733 int prob = INTVAL (XEXP (prob_note, 0));
735 BRANCH_EDGE (bb)->probability = prob;
736 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
739 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
742 /* Combine predictions into single probability and store them into CFG.
743 Remove now useless prediction entries. */
746 combine_predictions_for_bb (basic_block bb)
748 int best_probability = PROB_EVEN;
749 int best_predictor = END_PREDICTORS;
750 int combined_probability = REG_BR_PROB_BASE / 2;
752 bool first_match = false;
754 struct edge_prediction *pred;
756 edge e, first = NULL, second = NULL;
760 FOR_EACH_EDGE (e, ei, bb->succs)
761 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
764 if (first && !second)
770 /* When there is no successor or only one choice, prediction is easy.
772 We are lazy for now and predict only basic blocks with two outgoing
773 edges. It is possible to predict generic case too, but we have to
774 ignore first match heuristics and do more involved combining. Implement
779 set_even_probabilities (bb);
780 clear_bb_predictions (bb);
782 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
788 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
790 preds = pointer_map_contains (bb_predictions, bb);
793 /* We implement "first match" heuristics and use probability guessed
794 by predictor with smallest index. */
795 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
797 int predictor = pred->ep_predictor;
798 int probability = pred->ep_probability;
800 if (pred->ep_edge != first)
801 probability = REG_BR_PROB_BASE - probability;
804 if (best_predictor > predictor)
805 best_probability = probability, best_predictor = predictor;
807 d = (combined_probability * probability
808 + (REG_BR_PROB_BASE - combined_probability)
809 * (REG_BR_PROB_BASE - probability));
811 /* Use FP math to avoid overflows of 32bit integers. */
813 /* If one probability is 0% and one 100%, avoid division by zero. */
814 combined_probability = REG_BR_PROB_BASE / 2;
816 combined_probability = (((double) combined_probability)
818 * REG_BR_PROB_BASE / d + 0.5);
822 /* Decide which heuristic to use. In case we didn't match anything,
823 use no_prediction heuristic, in case we did match, use either
824 first match or Dempster-Shaffer theory depending on the flags. */
826 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
830 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
833 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
835 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
840 combined_probability = best_probability;
841 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
845 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
847 int predictor = pred->ep_predictor;
848 int probability = pred->ep_probability;
850 if (pred->ep_edge != EDGE_SUCC (bb, 0))
851 probability = REG_BR_PROB_BASE - probability;
852 dump_prediction (dump_file, predictor, probability, bb,
853 !first_match || best_predictor == predictor);
856 clear_bb_predictions (bb);
860 first->probability = combined_probability;
861 second->probability = REG_BR_PROB_BASE - combined_probability;
865 /* Predict edge probabilities by exploiting loop structure. */
875 /* Try to predict out blocks in a loop that are not part of a
877 FOR_EACH_LOOP (li, loop, 0)
879 basic_block bb, *bbs;
881 VEC (edge, heap) *exits;
882 struct tree_niter_desc niter_desc;
885 exits = get_loop_exit_edges (loop);
886 n_exits = VEC_length (edge, exits);
888 for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
891 HOST_WIDE_INT nitercst;
892 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
894 enum br_predictor predictor;
896 if (number_of_iterations_exit (loop, ex, &niter_desc, false))
897 niter = niter_desc.niter;
898 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
899 niter = loop_niter_by_eval (loop, ex);
901 if (TREE_CODE (niter) == INTEGER_CST)
903 if (host_integerp (niter, 1)
904 && compare_tree_int (niter, max-1) == -1)
905 nitercst = tree_low_cst (niter, 1) + 1;
908 predictor = PRED_LOOP_ITERATIONS;
910 /* If we have just one exit and we can derive some information about
911 the number of iterations of the loop from the statements inside
912 the loop, use it to predict this exit. */
913 else if (n_exits == 1)
915 nitercst = estimated_loop_iterations_int (loop, false);
921 predictor = PRED_LOOP_ITERATIONS_GUESSED;
926 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
927 predict_edge (ex, predictor, probability);
929 VEC_free (edge, heap, exits);
931 bbs = get_loop_body (loop);
933 for (j = 0; j < loop->num_nodes; j++)
935 int header_found = 0;
941 /* Bypass loop heuristics on continue statement. These
942 statements construct loops via "non-loop" constructs
943 in the source language and are better to be handled
945 if (predicted_by_p (bb, PRED_CONTINUE))
948 /* Loop branch heuristics - predict an edge back to a
949 loop's head as taken. */
950 if (bb == loop->latch)
952 e = find_edge (loop->latch, loop->header);
956 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
960 /* Loop exit heuristics - predict an edge exiting the loop if the
961 conditional has no loop header successors as not taken. */
963 /* If we already used more reliable loop exit predictors, do not
964 bother with PRED_LOOP_EXIT. */
965 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
966 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
968 /* For loop with many exits we don't want to predict all exits
969 with the pretty large probability, because if all exits are
970 considered in row, the loop would be predicted to iterate
971 almost never. The code to divide probability by number of
972 exits is very rough. It should compute the number of exits
973 taken in each patch through function (not the overall number
974 of exits that might be a lot higher for loops with wide switch
975 statements in them) and compute n-th square root.
977 We limit the minimal probability by 2% to avoid
978 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
979 as this was causing regression in perl benchmark containing such
982 int probability = ((REG_BR_PROB_BASE
983 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
985 if (probability < HITRATE (2))
986 probability = HITRATE (2);
987 FOR_EACH_EDGE (e, ei, bb->succs)
988 if (e->dest->index < NUM_FIXED_BLOCKS
989 || !flow_bb_inside_loop_p (loop, e->dest))
990 predict_edge (e, PRED_LOOP_EXIT, probability);
994 /* Free basic blocks from get_loop_body. */
1001 /* Attempt to predict probabilities of BB outgoing edges using local
1004 bb_estimate_probability_locally (basic_block bb)
1006 rtx last_insn = BB_END (bb);
1009 if (! can_predict_insn_p (last_insn))
1011 cond = get_condition (last_insn, NULL, false, false);
1015 /* Try "pointer heuristic."
1016 A comparison ptr == 0 is predicted as false.
1017 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1018 if (COMPARISON_P (cond)
1019 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1020 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1022 if (GET_CODE (cond) == EQ)
1023 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1024 else if (GET_CODE (cond) == NE)
1025 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1029 /* Try "opcode heuristic."
1030 EQ tests are usually false and NE tests are usually true. Also,
1031 most quantities are positive, so we can make the appropriate guesses
1032 about signed comparisons against zero. */
1033 switch (GET_CODE (cond))
1036 /* Unconditional branch. */
1037 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1038 cond == const0_rtx ? NOT_TAKEN : TAKEN);
1043 /* Floating point comparisons appears to behave in a very
1044 unpredictable way because of special role of = tests in
1046 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1048 /* Comparisons with 0 are often used for booleans and there is
1049 nothing useful to predict about them. */
1050 else if (XEXP (cond, 1) == const0_rtx
1051 || XEXP (cond, 0) == const0_rtx)
1054 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1059 /* Floating point comparisons appears to behave in a very
1060 unpredictable way because of special role of = tests in
1062 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1064 /* Comparisons with 0 are often used for booleans and there is
1065 nothing useful to predict about them. */
1066 else if (XEXP (cond, 1) == const0_rtx
1067 || XEXP (cond, 0) == const0_rtx)
1070 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1074 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1078 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1083 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1084 || XEXP (cond, 1) == constm1_rtx)
1085 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1090 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1091 || XEXP (cond, 1) == constm1_rtx)
1092 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1100 /* Set edge->probability for each successor edge of BB. */
1102 guess_outgoing_edge_probabilities (basic_block bb)
1104 bb_estimate_probability_locally (bb);
1105 combine_predictions_for_insn (BB_END (bb), bb);
1108 static tree expr_expected_value (tree, bitmap);
1110 /* Helper function for expr_expected_value. */
1113 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited)
1117 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1119 if (TREE_CONSTANT (op0))
1122 if (code != SSA_NAME)
1125 def = SSA_NAME_DEF_STMT (op0);
1127 /* If we were already here, break the infinite cycle. */
1128 if (bitmap_bit_p (visited, SSA_NAME_VERSION (op0)))
1130 bitmap_set_bit (visited, SSA_NAME_VERSION (op0));
1132 if (gimple_code (def) == GIMPLE_PHI)
1134 /* All the arguments of the PHI node must have the same constant
1136 int i, n = gimple_phi_num_args (def);
1137 tree val = NULL, new_val;
1139 for (i = 0; i < n; i++)
1141 tree arg = PHI_ARG_DEF (def, i);
1143 /* If this PHI has itself as an argument, we cannot
1144 determine the string length of this argument. However,
1145 if we can find an expected constant value for the other
1146 PHI args then we can still be sure that this is
1147 likely a constant. So be optimistic and just
1148 continue with the next argument. */
1149 if (arg == PHI_RESULT (def))
1152 new_val = expr_expected_value (arg, visited);
1157 else if (!operand_equal_p (val, new_val, false))
1162 if (is_gimple_assign (def))
1164 if (gimple_assign_lhs (def) != op0)
1167 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1168 gimple_assign_rhs1 (def),
1169 gimple_assign_rhs_code (def),
1170 gimple_assign_rhs2 (def),
1174 if (is_gimple_call (def))
1176 tree decl = gimple_call_fndecl (def);
1179 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
1180 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
1184 if (gimple_call_num_args (def) != 2)
1186 val = gimple_call_arg (def, 0);
1187 if (TREE_CONSTANT (val))
1189 return gimple_call_arg (def, 1);
1196 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1199 op0 = expr_expected_value (op0, visited);
1202 op1 = expr_expected_value (op1, visited);
1205 res = fold_build2 (code, type, op0, op1);
1206 if (TREE_CONSTANT (res))
1210 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1213 op0 = expr_expected_value (op0, visited);
1216 res = fold_build1 (code, type, op0);
1217 if (TREE_CONSTANT (res))
1224 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1225 The function is used by builtin_expect branch predictor so the evidence
1226 must come from this construct and additional possible constant folding.
1228 We may want to implement more involved value guess (such as value range
1229 propagation based prediction), but such tricks shall go to new
1233 expr_expected_value (tree expr, bitmap visited)
1235 enum tree_code code;
1238 if (TREE_CONSTANT (expr))
1241 extract_ops_from_tree (expr, &code, &op0, &op1);
1242 return expr_expected_value_1 (TREE_TYPE (expr),
1243 op0, code, op1, visited);
1247 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1248 we no longer need. */
1250 strip_predict_hints (void)
1258 gimple_stmt_iterator bi;
1259 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1261 gimple stmt = gsi_stmt (bi);
1263 if (gimple_code (stmt) == GIMPLE_PREDICT)
1265 gsi_remove (&bi, true);
1268 else if (gimple_code (stmt) == GIMPLE_CALL)
1270 tree fndecl = gimple_call_fndecl (stmt);
1273 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1274 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1275 && gimple_call_num_args (stmt) == 2)
1277 var = gimple_call_lhs (stmt);
1278 ass_stmt = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1280 gsi_replace (&bi, ass_stmt, true);
1289 /* Predict using opcode of the last statement in basic block. */
1291 tree_predict_by_opcode (basic_block bb)
1293 gimple stmt = last_stmt (bb);
1302 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1304 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1305 if (then_edge->flags & EDGE_TRUE_VALUE)
1307 op0 = gimple_cond_lhs (stmt);
1308 op1 = gimple_cond_rhs (stmt);
1309 cmp = gimple_cond_code (stmt);
1310 type = TREE_TYPE (op0);
1311 visited = BITMAP_ALLOC (NULL);
1312 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
1313 BITMAP_FREE (visited);
1316 if (integer_zerop (val))
1317 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1319 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1322 /* Try "pointer heuristic."
1323 A comparison ptr == 0 is predicted as false.
1324 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1325 if (POINTER_TYPE_P (type))
1328 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1329 else if (cmp == NE_EXPR)
1330 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1334 /* Try "opcode heuristic."
1335 EQ tests are usually false and NE tests are usually true. Also,
1336 most quantities are positive, so we can make the appropriate guesses
1337 about signed comparisons against zero. */
1342 /* Floating point comparisons appears to behave in a very
1343 unpredictable way because of special role of = tests in
1345 if (FLOAT_TYPE_P (type))
1347 /* Comparisons with 0 are often used for booleans and there is
1348 nothing useful to predict about them. */
1349 else if (integer_zerop (op0) || integer_zerop (op1))
1352 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1357 /* Floating point comparisons appears to behave in a very
1358 unpredictable way because of special role of = tests in
1360 if (FLOAT_TYPE_P (type))
1362 /* Comparisons with 0 are often used for booleans and there is
1363 nothing useful to predict about them. */
1364 else if (integer_zerop (op0)
1365 || integer_zerop (op1))
1368 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1372 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1375 case UNORDERED_EXPR:
1376 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1381 if (integer_zerop (op1)
1382 || integer_onep (op1)
1383 || integer_all_onesp (op1)
1386 || real_minus_onep (op1))
1387 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1392 if (integer_zerop (op1)
1393 || integer_onep (op1)
1394 || integer_all_onesp (op1)
1397 || real_minus_onep (op1))
1398 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1406 /* Try to guess whether the value of return means error code. */
1408 static enum br_predictor
1409 return_prediction (tree val, enum prediction *prediction)
1413 return PRED_NO_PREDICTION;
1414 /* Different heuristics for pointers and scalars. */
1415 if (POINTER_TYPE_P (TREE_TYPE (val)))
1417 /* NULL is usually not returned. */
1418 if (integer_zerop (val))
1420 *prediction = NOT_TAKEN;
1421 return PRED_NULL_RETURN;
1424 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1426 /* Negative return values are often used to indicate
1428 if (TREE_CODE (val) == INTEGER_CST
1429 && tree_int_cst_sgn (val) < 0)
1431 *prediction = NOT_TAKEN;
1432 return PRED_NEGATIVE_RETURN;
1434 /* Constant return values seems to be commonly taken.
1435 Zero/one often represent booleans so exclude them from the
1437 if (TREE_CONSTANT (val)
1438 && (!integer_zerop (val) && !integer_onep (val)))
1440 *prediction = TAKEN;
1441 return PRED_CONST_RETURN;
1444 return PRED_NO_PREDICTION;
1447 /* Find the basic block with return expression and look up for possible
1448 return value trying to apply RETURN_PREDICTION heuristics. */
1450 apply_return_prediction (void)
1452 gimple return_stmt = NULL;
1456 int phi_num_args, i;
1457 enum br_predictor pred;
1458 enum prediction direction;
1461 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1463 return_stmt = last_stmt (e->src);
1465 && gimple_code (return_stmt) == GIMPLE_RETURN)
1470 return_val = gimple_return_retval (return_stmt);
1473 if (TREE_CODE (return_val) != SSA_NAME
1474 || !SSA_NAME_DEF_STMT (return_val)
1475 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
1477 phi = SSA_NAME_DEF_STMT (return_val);
1478 phi_num_args = gimple_phi_num_args (phi);
1479 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1481 /* Avoid the degenerate case where all return values form the function
1482 belongs to same category (ie they are all positive constants)
1483 so we can hardly say something about them. */
1484 for (i = 1; i < phi_num_args; i++)
1485 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1487 if (i != phi_num_args)
1488 for (i = 0; i < phi_num_args; i++)
1490 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1491 if (pred != PRED_NO_PREDICTION)
1492 predict_paths_leading_to (gimple_phi_arg_edge (phi, i)->src, pred,
1497 /* Look for basic block that contains unlikely to happen events
1498 (such as noreturn calls) and mark all paths leading to execution
1499 of this basic blocks as unlikely. */
1502 tree_bb_level_predictions (void)
1506 apply_return_prediction ();
1510 gimple_stmt_iterator gsi;
1512 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1514 gimple stmt = gsi_stmt (gsi);
1517 if (is_gimple_call (stmt))
1519 if (gimple_call_flags (stmt) & ECF_NORETURN)
1520 predict_paths_leading_to (bb, PRED_NORETURN,
1522 decl = gimple_call_fndecl (stmt);
1524 && lookup_attribute ("cold",
1525 DECL_ATTRIBUTES (decl)))
1526 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
1529 else if (gimple_code (stmt) == GIMPLE_PREDICT)
1531 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
1532 gimple_predict_outcome (stmt));
1533 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1534 hints to callers. */
1540 #ifdef ENABLE_CHECKING
1542 /* Callback for pointer_map_traverse, asserts that the pointer map is
1546 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
1547 void *data ATTRIBUTE_UNUSED)
1549 gcc_assert (!*value);
1554 /* Predict branch probabilities and estimate profile of the tree CFG. */
1556 tree_estimate_probability (void)
1560 loop_optimizer_init (0);
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 flow_loops_dump (dump_file, NULL, 0);
1564 add_noreturn_fake_exit_edges ();
1565 connect_infinite_loops_to_exit ();
1566 /* We use loop_niter_by_eval, which requires that the loops have
1568 create_preheaders (CP_SIMPLE_PREHEADERS);
1569 calculate_dominance_info (CDI_POST_DOMINATORS);
1571 bb_predictions = pointer_map_create ();
1572 tree_bb_level_predictions ();
1574 mark_irreducible_loops ();
1575 record_loop_exits ();
1576 if (number_of_loops () > 1)
1584 FOR_EACH_EDGE (e, ei, bb->succs)
1586 /* Predict early returns to be probable, as we've already taken
1587 care for error returns and other cases are often used for
1588 fast paths through function.
1590 Since we've already removed the return statements, we are
1591 looking for CFG like:
1601 if (e->dest != bb->next_bb
1602 && e->dest != EXIT_BLOCK_PTR
1603 && single_succ_p (e->dest)
1604 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
1605 && gimple_code (last_stmt (e->dest)) == GIMPLE_RETURN)
1610 if (single_succ_p (bb))
1612 FOR_EACH_EDGE (e1, ei1, bb->preds)
1613 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1614 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1615 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
1616 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1619 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
1620 && !predicted_by_p (e->src, PRED_CONST_RETURN)
1621 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
1622 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1625 /* Look for block we are guarding (ie we dominate it,
1626 but it doesn't postdominate us). */
1627 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1628 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1629 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1631 gimple_stmt_iterator bi;
1633 /* The call heuristic claims that a guarded function call
1634 is improbable. This is because such calls are often used
1635 to signal exceptional situations such as printing error
1637 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
1640 gimple stmt = gsi_stmt (bi);
1641 if (is_gimple_call (stmt)
1642 /* Constant and pure calls are hardly used to signalize
1643 something exceptional. */
1644 && gimple_has_side_effects (stmt))
1646 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1652 tree_predict_by_opcode (bb);
1655 combine_predictions_for_bb (bb);
1657 #ifdef ENABLE_CHECKING
1658 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
1660 pointer_map_destroy (bb_predictions);
1661 bb_predictions = NULL;
1663 estimate_bb_frequencies ();
1664 free_dominance_info (CDI_POST_DOMINATORS);
1665 remove_fake_exit_edges ();
1666 loop_optimizer_finalize ();
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1668 gimple_dump_cfg (dump_file, dump_flags);
1669 if (profile_status == PROFILE_ABSENT)
1670 profile_status = PROFILE_GUESSED;
1674 /* Predict edges to successors of CUR whose sources are not postdominated by
1675 BB by PRED and recurse to all postdominators. */
1678 predict_paths_for_bb (basic_block cur, basic_block bb,
1679 enum br_predictor pred,
1680 enum prediction taken)
1686 /* We are looking for all edges forming edge cut induced by
1687 set of all blocks postdominated by BB. */
1688 FOR_EACH_EDGE (e, ei, cur->preds)
1689 if (e->src->index >= NUM_FIXED_BLOCKS
1690 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
1692 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
1693 predict_edge_def (e, pred, taken);
1695 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
1697 son = next_dom_son (CDI_POST_DOMINATORS, son))
1698 predict_paths_for_bb (son, bb, pred, taken);
1701 /* Sets branch probabilities according to PREDiction and
1705 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
1706 enum prediction taken)
1708 predict_paths_for_bb (bb, bb, pred, taken);
1711 /* This is used to carry information about basic blocks. It is
1712 attached to the AUX field of the standard CFG block. */
1714 typedef struct block_info_def
1716 /* Estimated frequency of execution of basic_block. */
1719 /* To keep queue of basic blocks to process. */
1722 /* Number of predecessors we need to visit first. */
1726 /* Similar information for edges. */
1727 typedef struct edge_info_def
1729 /* In case edge is a loopback edge, the probability edge will be reached
1730 in case header is. Estimated number of iterations of the loop can be
1731 then computed as 1 / (1 - back_edge_prob). */
1732 sreal back_edge_prob;
1733 /* True if the edge is a loopback edge in the natural loop. */
1734 unsigned int back_edge:1;
1737 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1738 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1740 /* Helper function for estimate_bb_frequencies.
1741 Propagate the frequencies in blocks marked in
1742 TOVISIT, starting in HEAD. */
1745 propagate_freq (basic_block head, bitmap tovisit)
1754 /* For each basic block we need to visit count number of his predecessors
1755 we need to visit first. */
1756 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1761 /* The outermost "loop" includes the exit block, which we can not
1762 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1763 directly. Do the same for the entry block. */
1764 bb = BASIC_BLOCK (i);
1766 FOR_EACH_EDGE (e, ei, bb->preds)
1768 bool visit = bitmap_bit_p (tovisit, e->src->index);
1770 if (visit && !(e->flags & EDGE_DFS_BACK))
1772 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1774 "Irreducible region hit, ignoring edge to %i->%i\n",
1775 e->src->index, bb->index);
1777 BLOCK_INFO (bb)->npredecessors = count;
1780 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1782 for (bb = head; bb; bb = nextbb)
1785 sreal cyclic_probability, frequency;
1787 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1788 memcpy (&frequency, &real_zero, sizeof (real_zero));
1790 nextbb = BLOCK_INFO (bb)->next;
1791 BLOCK_INFO (bb)->next = NULL;
1793 /* Compute frequency of basic block. */
1796 #ifdef ENABLE_CHECKING
1797 FOR_EACH_EDGE (e, ei, bb->preds)
1798 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1799 || (e->flags & EDGE_DFS_BACK));
1802 FOR_EACH_EDGE (e, ei, bb->preds)
1803 if (EDGE_INFO (e)->back_edge)
1805 sreal_add (&cyclic_probability, &cyclic_probability,
1806 &EDGE_INFO (e)->back_edge_prob);
1808 else if (!(e->flags & EDGE_DFS_BACK))
1812 /* frequency += (e->probability
1813 * BLOCK_INFO (e->src)->frequency /
1814 REG_BR_PROB_BASE); */
1816 sreal_init (&tmp, e->probability, 0);
1817 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1818 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1819 sreal_add (&frequency, &frequency, &tmp);
1822 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1824 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1825 sizeof (frequency));
1829 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1831 memcpy (&cyclic_probability, &real_almost_one,
1832 sizeof (real_almost_one));
1835 /* BLOCK_INFO (bb)->frequency = frequency
1836 / (1 - cyclic_probability) */
1838 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1839 sreal_div (&BLOCK_INFO (bb)->frequency,
1840 &frequency, &cyclic_probability);
1844 bitmap_clear_bit (tovisit, bb->index);
1846 e = find_edge (bb, head);
1851 /* EDGE_INFO (e)->back_edge_prob
1852 = ((e->probability * BLOCK_INFO (bb)->frequency)
1853 / REG_BR_PROB_BASE); */
1855 sreal_init (&tmp, e->probability, 0);
1856 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1857 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1858 &tmp, &real_inv_br_prob_base);
1861 /* Propagate to successor blocks. */
1862 FOR_EACH_EDGE (e, ei, bb->succs)
1863 if (!(e->flags & EDGE_DFS_BACK)
1864 && BLOCK_INFO (e->dest)->npredecessors)
1866 BLOCK_INFO (e->dest)->npredecessors--;
1867 if (!BLOCK_INFO (e->dest)->npredecessors)
1872 BLOCK_INFO (last)->next = e->dest;
1880 /* Estimate probabilities of loopback edges in loops at same nest level. */
1883 estimate_loops_at_level (struct loop *first_loop)
1887 for (loop = first_loop; loop; loop = loop->next)
1892 bitmap tovisit = BITMAP_ALLOC (NULL);
1894 estimate_loops_at_level (loop->inner);
1896 /* Find current loop back edge and mark it. */
1897 e = loop_latch_edge (loop);
1898 EDGE_INFO (e)->back_edge = 1;
1900 bbs = get_loop_body (loop);
1901 for (i = 0; i < loop->num_nodes; i++)
1902 bitmap_set_bit (tovisit, bbs[i]->index);
1904 propagate_freq (loop->header, tovisit);
1905 BITMAP_FREE (tovisit);
1909 /* Propagates frequencies through structure of loops. */
1912 estimate_loops (void)
1914 bitmap tovisit = BITMAP_ALLOC (NULL);
1917 /* Start by estimating the frequencies in the loops. */
1918 if (number_of_loops () > 1)
1919 estimate_loops_at_level (current_loops->tree_root->inner);
1921 /* Now propagate the frequencies through all the blocks. */
1924 bitmap_set_bit (tovisit, bb->index);
1926 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1927 BITMAP_FREE (tovisit);
1930 /* Convert counts measured by profile driven feedback to frequencies.
1931 Return nonzero iff there was any nonzero execution count. */
1934 counts_to_freqs (void)
1936 gcov_type count_max, true_count_max = 0;
1940 true_count_max = MAX (bb->count, true_count_max);
1942 count_max = MAX (true_count_max, 1);
1943 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1944 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1946 return true_count_max;
1949 /* Return true if function is likely to be expensive, so there is no point to
1950 optimize performance of prologue, epilogue or do inlining at the expense
1951 of code size growth. THRESHOLD is the limit of number of instructions
1952 function can execute at average to be still considered not expensive. */
1955 expensive_function_p (int threshold)
1957 unsigned int sum = 0;
1961 /* We can not compute accurately for large thresholds due to scaled
1963 gcc_assert (threshold <= BB_FREQ_MAX);
1965 /* Frequencies are out of range. This either means that function contains
1966 internal loop executing more than BB_FREQ_MAX times or profile feedback
1967 is available and function has not been executed at all. */
1968 if (ENTRY_BLOCK_PTR->frequency == 0)
1971 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1972 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1977 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1978 insn = NEXT_INSN (insn))
1979 if (active_insn_p (insn))
1981 sum += bb->frequency;
1990 /* Estimate basic blocks frequency by given branch probabilities. */
1993 estimate_bb_frequencies (void)
1998 if (!flag_branch_probabilities || !counts_to_freqs ())
2000 static int real_values_initialized = 0;
2002 if (!real_values_initialized)
2004 real_values_initialized = 1;
2005 sreal_init (&real_zero, 0, 0);
2006 sreal_init (&real_one, 1, 0);
2007 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2008 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2009 sreal_init (&real_one_half, 1, -1);
2010 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2011 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2014 mark_dfs_back_edges ();
2016 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
2018 /* Set up block info for each basic block. */
2019 alloc_aux_for_blocks (sizeof (struct block_info_def));
2020 alloc_aux_for_edges (sizeof (struct edge_info_def));
2021 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2026 FOR_EACH_EDGE (e, ei, bb->succs)
2028 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2029 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2030 &EDGE_INFO (e)->back_edge_prob,
2031 &real_inv_br_prob_base);
2035 /* First compute probabilities locally for each loop from innermost
2036 to outermost to examine probabilities for back edges. */
2039 memcpy (&freq_max, &real_zero, sizeof (real_zero));
2041 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2042 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2044 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2045 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
2049 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2050 sreal_add (&tmp, &tmp, &real_one_half);
2051 bb->frequency = sreal_to_int (&tmp);
2054 free_aux_for_blocks ();
2055 free_aux_for_edges ();
2057 compute_function_frequency ();
2058 if (flag_reorder_functions)
2059 choose_function_section ();
2062 /* Decide whether function is hot, cold or unlikely executed. */
2064 compute_function_frequency (void)
2068 if (!profile_info || !flag_branch_probabilities)
2070 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2072 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2073 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2075 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2078 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2081 if (maybe_hot_bb_p (bb))
2083 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2086 if (!probably_never_executed_bb_p (bb))
2087 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2091 /* Choose appropriate section for the function. */
2093 choose_function_section (void)
2095 if (DECL_SECTION_NAME (current_function_decl)
2096 || !targetm.have_named_sections
2097 /* Theoretically we can split the gnu.linkonce text section too,
2098 but this requires more work as the frequency needs to match
2099 for all generated objects so we need to merge the frequency
2100 of all instances. For now just never set frequency for these. */
2101 || DECL_ONE_ONLY (current_function_decl))
2104 /* If we are doing the partitioning optimization, let the optimization
2105 choose the correct section into which to put things. */
2107 if (flag_reorder_blocks_and_partition)
2110 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
2111 DECL_SECTION_NAME (current_function_decl) =
2112 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
2113 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
2114 DECL_SECTION_NAME (current_function_decl) =
2115 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
2116 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
2120 gate_estimate_probability (void)
2122 return flag_guess_branch_prob;
2125 /* Build PREDICT_EXPR. */
2127 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2129 tree t = build1 (PREDICT_EXPR, void_type_node,
2130 build_int_cst (NULL, predictor));
2131 PREDICT_EXPR_OUTCOME (t) = taken;
2136 predictor_name (enum br_predictor predictor)
2138 return predictor_info[predictor].name;
2141 struct gimple_opt_pass pass_profile =
2145 "profile", /* name */
2146 gate_estimate_probability, /* gate */
2147 tree_estimate_probability, /* execute */
2150 0, /* static_pass_number */
2151 TV_BRANCH_PROB, /* tv_id */
2152 PROP_cfg, /* properties_required */
2153 0, /* properties_provided */
2154 0, /* properties_destroyed */
2155 0, /* todo_flags_start */
2156 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
2160 struct gimple_opt_pass pass_strip_predict_hints =
2166 strip_predict_hints, /* execute */
2169 0, /* static_pass_number */
2170 TV_BRANCH_PROB, /* tv_id */
2171 PROP_cfg, /* properties_required */
2172 0, /* properties_provided */
2173 0, /* properties_destroyed */
2174 0, /* todo_flags_start */
2175 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */