1 /* Branch prediction routines for the GNU compiler.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 [1] "Branch Prediction for Free"
24 Ball and Larus; PLDI '93.
25 [2] "Static Branch Frequency and Program Profile Analysis"
26 Wu and Larus; MICRO-27.
27 [3] "Corpus-based Static Branch Prediction"
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
33 #include "coretypes.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
55 #include "tree-flow.h"
57 #include "tree-dump.h"
58 #include "tree-pass.h"
60 #include "tree-scalar-evolution.h"
62 #include "pointer-set.h"
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
69 /* Random guesstimation given names. */
70 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 100 - 1)
71 #define PROB_EVEN (REG_BR_PROB_BASE / 2)
72 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73 #define PROB_ALWAYS (REG_BR_PROB_BASE)
75 static void combine_predictions_for_insn (rtx, basic_block);
76 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
78 static void compute_function_frequency (void);
79 static void choose_function_section (void);
80 static bool can_predict_insn_p (const_rtx);
82 /* Information we hold about each branch predictor.
83 Filled using information from predict.def. */
87 const char *const name; /* Name used in the debugging dumps. */
88 const int hitrate; /* Expected hitrate used by
89 predict_insn_def call. */
93 /* Use given predictor without Dempster-Shaffer theory if it matches
94 using first_match heuristics. */
95 #define PRED_FLAG_FIRST_MATCH 1
97 /* Recompute hitrate in percent to our representation. */
99 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
102 static const struct predictor_info predictor_info[]= {
103 #include "predict.def"
105 /* Upper bound on predictors. */
110 /* Return TRUE if frequency FREQ is considered to be hot. */
112 maybe_hot_frequency_p (int freq)
114 if (!profile_info || !flag_branch_probabilities)
116 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
118 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
121 if (profile_status == PROFILE_ABSENT)
123 if (freq < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
128 /* Return true in case BB can be CPU intensive and should be optimized
129 for maximal performance. */
132 maybe_hot_bb_p (const_basic_block bb)
134 if (profile_info && flag_branch_probabilities
136 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
138 return maybe_hot_frequency_p (bb->frequency);
141 /* Return true in case BB can be CPU intensive and should be optimized
142 for maximal performance. */
145 maybe_hot_edge_p (edge e)
147 if (profile_info && flag_branch_probabilities
149 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
151 return maybe_hot_frequency_p (EDGE_FREQUENCY (e));
154 /* Return true in case BB is cold and should be optimized for size. */
157 probably_cold_bb_p (const_basic_block bb)
159 if (profile_info && flag_branch_probabilities
161 < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
163 if ((!profile_info || !flag_branch_probabilities)
164 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
166 if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
171 /* Return true in case BB is probably never executed. */
173 probably_never_executed_bb_p (const_basic_block bb)
175 if (profile_info && flag_branch_probabilities)
176 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
177 if ((!profile_info || !flag_branch_probabilities)
178 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
183 /* Return true when current function should always be optimized for size. */
186 optimize_function_for_size_p (struct function *fun)
188 return (optimize_size
189 || fun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED);
192 /* Return true when current function should always be optimized for speed. */
195 optimize_function_for_speed_p (struct function *fun)
197 return !optimize_function_for_size_p (fun);
200 /* Return TRUE when BB should be optimized for size. */
203 optimize_bb_for_size_p (const_basic_block bb)
205 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb);
208 /* Return TRUE when BB should be optimized for speed. */
211 optimize_bb_for_speed_p (const_basic_block bb)
213 return !optimize_bb_for_size_p (bb);
216 /* Return TRUE when BB should be optimized for size. */
219 optimize_edge_for_size_p (edge e)
221 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
224 /* Return TRUE when BB should be optimized for speed. */
227 optimize_edge_for_speed_p (edge e)
229 return !optimize_edge_for_size_p (e);
232 /* Return TRUE when BB should be optimized for size. */
235 optimize_insn_for_size_p (void)
237 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
240 /* Return TRUE when BB should be optimized for speed. */
243 optimize_insn_for_speed_p (void)
245 return !optimize_insn_for_size_p ();
248 /* Return TRUE when LOOP should be optimized for size. */
251 optimize_loop_for_size_p (struct loop *loop)
253 return optimize_bb_for_size_p (loop->header);
256 /* Return TRUE when LOOP should be optimized for speed. */
259 optimize_loop_for_speed_p (struct loop *loop)
261 return optimize_bb_for_speed_p (loop->header);
264 /* Set RTL expansion for BB profile. */
267 rtl_profile_for_bb (basic_block bb)
269 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb);
272 /* Set RTL expansion for edge profile. */
275 rtl_profile_for_edge (edge e)
277 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
280 /* Set RTL expansion to default mode (i.e. when profile info is not known). */
282 default_rtl_profile (void)
284 crtl->maybe_hot_insn_p = true;
287 /* Return true if the one of outgoing edges is already predicted by
291 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
294 if (!INSN_P (BB_END (bb)))
296 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
297 if (REG_NOTE_KIND (note) == REG_BR_PRED
298 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
303 /* This map contains for a basic block the list of predictions for the
306 static struct pointer_map_t *bb_predictions;
308 /* Return true if the one of outgoing edges is already predicted by
312 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
314 struct edge_prediction *i;
315 void **preds = pointer_map_contains (bb_predictions, bb);
320 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
321 if (i->ep_predictor == predictor)
326 /* Return true when the probability of edge is reliable.
328 The profile guessing code is good at predicting branch outcome (ie.
329 taken/not taken), that is predicted right slightly over 75% of time.
330 It is however notoriously poor on predicting the probability itself.
331 In general the profile appear a lot flatter (with probabilities closer
332 to 50%) than the reality so it is bad idea to use it to drive optimization
333 such as those disabling dynamic branch prediction for well predictable
336 There are two exceptions - edges leading to noreturn edges and edges
337 predicted by number of iterations heuristics are predicted well. This macro
338 should be able to distinguish those, but at the moment it simply check for
339 noreturn heuristic that is only one giving probability over 99% or bellow
340 1%. In future we might want to propagate reliability information across the
341 CFG if we find this information useful on multiple places. */
343 probability_reliable_p (int prob)
345 return (profile_status == PROFILE_READ
346 || (profile_status == PROFILE_GUESSED
347 && (prob <= HITRATE (1) || prob >= HITRATE (99))));
350 /* Same predicate as above, working on edges. */
352 edge_probability_reliable_p (const_edge e)
354 return probability_reliable_p (e->probability);
357 /* Same predicate as edge_probability_reliable_p, working on notes. */
359 br_prob_note_reliable_p (const_rtx note)
361 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
362 return probability_reliable_p (INTVAL (XEXP (note, 0)));
366 predict_insn (rtx insn, enum br_predictor predictor, int probability)
368 gcc_assert (any_condjump_p (insn));
369 if (!flag_guess_branch_prob)
372 add_reg_note (insn, REG_BR_PRED,
373 gen_rtx_CONCAT (VOIDmode,
374 GEN_INT ((int) predictor),
375 GEN_INT ((int) probability)));
378 /* Predict insn by given predictor. */
381 predict_insn_def (rtx insn, enum br_predictor predictor,
382 enum prediction taken)
384 int probability = predictor_info[(int) predictor].hitrate;
387 probability = REG_BR_PROB_BASE - probability;
389 predict_insn (insn, predictor, probability);
392 /* Predict edge E with given probability if possible. */
395 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
398 last_insn = BB_END (e->src);
400 /* We can store the branch prediction information only about
401 conditional jumps. */
402 if (!any_condjump_p (last_insn))
405 /* We always store probability of branching. */
406 if (e->flags & EDGE_FALLTHRU)
407 probability = REG_BR_PROB_BASE - probability;
409 predict_insn (last_insn, predictor, probability);
412 /* Predict edge E with the given PROBABILITY. */
414 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
416 gcc_assert (profile_status != PROFILE_GUESSED);
417 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
418 && flag_guess_branch_prob && optimize)
420 struct edge_prediction *i = XNEW (struct edge_prediction);
421 void **preds = pointer_map_insert (bb_predictions, e->src);
423 i->ep_next = (struct edge_prediction *) *preds;
425 i->ep_probability = probability;
426 i->ep_predictor = predictor;
431 /* Remove all predictions on given basic block that are attached
434 remove_predictions_associated_with_edge (edge e)
441 preds = pointer_map_contains (bb_predictions, e->src);
445 struct edge_prediction **prediction = (struct edge_prediction **) preds;
446 struct edge_prediction *next;
450 if ((*prediction)->ep_edge == e)
452 next = (*prediction)->ep_next;
457 prediction = &((*prediction)->ep_next);
462 /* Clears the list of predictions stored for BB. */
465 clear_bb_predictions (basic_block bb)
467 void **preds = pointer_map_contains (bb_predictions, bb);
468 struct edge_prediction *pred, *next;
473 for (pred = (struct edge_prediction *) *preds; pred; pred = next)
475 next = pred->ep_next;
481 /* Return true when we can store prediction on insn INSN.
482 At the moment we represent predictions only on conditional
483 jumps, not at computed jump or other complicated cases. */
485 can_predict_insn_p (const_rtx insn)
487 return (JUMP_P (insn)
488 && any_condjump_p (insn)
489 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
492 /* Predict edge E by given predictor if possible. */
495 predict_edge_def (edge e, enum br_predictor predictor,
496 enum prediction taken)
498 int probability = predictor_info[(int) predictor].hitrate;
501 probability = REG_BR_PROB_BASE - probability;
503 predict_edge (e, predictor, probability);
506 /* Invert all branch predictions or probability notes in the INSN. This needs
507 to be done each time we invert the condition used by the jump. */
510 invert_br_probabilities (rtx insn)
514 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
515 if (REG_NOTE_KIND (note) == REG_BR_PROB)
516 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
517 else if (REG_NOTE_KIND (note) == REG_BR_PRED)
518 XEXP (XEXP (note, 0), 1)
519 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
522 /* Dump information about the branch prediction to the output file. */
525 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
526 basic_block bb, int used)
534 FOR_EACH_EDGE (e, ei, bb->succs)
535 if (! (e->flags & EDGE_FALLTHRU))
538 fprintf (file, " %s heuristics%s: %.1f%%",
539 predictor_info[predictor].name,
540 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
544 fprintf (file, " exec ");
545 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
548 fprintf (file, " hit ");
549 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
550 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
554 fprintf (file, "\n");
557 /* We can not predict the probabilities of outgoing edges of bb. Set them
558 evenly and hope for the best. */
560 set_even_probabilities (basic_block bb)
566 FOR_EACH_EDGE (e, ei, bb->succs)
567 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
569 FOR_EACH_EDGE (e, ei, bb->succs)
570 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
571 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
576 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
577 note if not already present. Remove now useless REG_BR_PRED notes. */
580 combine_predictions_for_insn (rtx insn, basic_block bb)
585 int best_probability = PROB_EVEN;
586 int best_predictor = END_PREDICTORS;
587 int combined_probability = REG_BR_PROB_BASE / 2;
589 bool first_match = false;
592 if (!can_predict_insn_p (insn))
594 set_even_probabilities (bb);
598 prob_note = find_reg_note (insn, REG_BR_PROB, 0);
599 pnote = ®_NOTES (insn);
601 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
604 /* We implement "first match" heuristics and use probability guessed
605 by predictor with smallest index. */
606 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
607 if (REG_NOTE_KIND (note) == REG_BR_PRED)
609 int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
610 int probability = INTVAL (XEXP (XEXP (note, 0), 1));
613 if (best_predictor > predictor)
614 best_probability = probability, best_predictor = predictor;
616 d = (combined_probability * probability
617 + (REG_BR_PROB_BASE - combined_probability)
618 * (REG_BR_PROB_BASE - probability));
620 /* Use FP math to avoid overflows of 32bit integers. */
622 /* If one probability is 0% and one 100%, avoid division by zero. */
623 combined_probability = REG_BR_PROB_BASE / 2;
625 combined_probability = (((double) combined_probability) * probability
626 * REG_BR_PROB_BASE / d + 0.5);
629 /* Decide which heuristic to use. In case we didn't match anything,
630 use no_prediction heuristic, in case we did match, use either
631 first match or Dempster-Shaffer theory depending on the flags. */
633 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
637 dump_prediction (dump_file, PRED_NO_PREDICTION,
638 combined_probability, bb, true);
641 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
643 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
648 combined_probability = best_probability;
649 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
653 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
655 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
656 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
658 dump_prediction (dump_file, predictor, probability, bb,
659 !first_match || best_predictor == predictor);
660 *pnote = XEXP (*pnote, 1);
663 pnote = &XEXP (*pnote, 1);
668 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability));
670 /* Save the prediction into CFG in case we are seeing non-degenerated
672 if (!single_succ_p (bb))
674 BRANCH_EDGE (bb)->probability = combined_probability;
675 FALLTHRU_EDGE (bb)->probability
676 = REG_BR_PROB_BASE - combined_probability;
679 else if (!single_succ_p (bb))
681 int prob = INTVAL (XEXP (prob_note, 0));
683 BRANCH_EDGE (bb)->probability = prob;
684 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
687 single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
690 /* Combine predictions into single probability and store them into CFG.
691 Remove now useless prediction entries. */
694 combine_predictions_for_bb (basic_block bb)
696 int best_probability = PROB_EVEN;
697 int best_predictor = END_PREDICTORS;
698 int combined_probability = REG_BR_PROB_BASE / 2;
700 bool first_match = false;
702 struct edge_prediction *pred;
704 edge e, first = NULL, second = NULL;
708 FOR_EACH_EDGE (e, ei, bb->succs)
709 if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
712 if (first && !second)
718 /* When there is no successor or only one choice, prediction is easy.
720 We are lazy for now and predict only basic blocks with two outgoing
721 edges. It is possible to predict generic case too, but we have to
722 ignore first match heuristics and do more involved combining. Implement
727 set_even_probabilities (bb);
728 clear_bb_predictions (bb);
730 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
736 fprintf (dump_file, "Predictions for bb %i\n", bb->index);
738 preds = pointer_map_contains (bb_predictions, bb);
741 /* We implement "first match" heuristics and use probability guessed
742 by predictor with smallest index. */
743 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
745 int predictor = pred->ep_predictor;
746 int probability = pred->ep_probability;
748 if (pred->ep_edge != first)
749 probability = REG_BR_PROB_BASE - probability;
752 if (best_predictor > predictor)
753 best_probability = probability, best_predictor = predictor;
755 d = (combined_probability * probability
756 + (REG_BR_PROB_BASE - combined_probability)
757 * (REG_BR_PROB_BASE - probability));
759 /* Use FP math to avoid overflows of 32bit integers. */
761 /* If one probability is 0% and one 100%, avoid division by zero. */
762 combined_probability = REG_BR_PROB_BASE / 2;
764 combined_probability = (((double) combined_probability)
766 * REG_BR_PROB_BASE / d + 0.5);
770 /* Decide which heuristic to use. In case we didn't match anything,
771 use no_prediction heuristic, in case we did match, use either
772 first match or Dempster-Shaffer theory depending on the flags. */
774 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
778 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
781 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
783 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
788 combined_probability = best_probability;
789 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
793 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
795 int predictor = pred->ep_predictor;
796 int probability = pred->ep_probability;
798 if (pred->ep_edge != EDGE_SUCC (bb, 0))
799 probability = REG_BR_PROB_BASE - probability;
800 dump_prediction (dump_file, predictor, probability, bb,
801 !first_match || best_predictor == predictor);
804 clear_bb_predictions (bb);
808 first->probability = combined_probability;
809 second->probability = REG_BR_PROB_BASE - combined_probability;
813 /* Predict edge probabilities by exploiting loop structure. */
823 /* Try to predict out blocks in a loop that are not part of a
825 FOR_EACH_LOOP (li, loop, 0)
827 basic_block bb, *bbs;
829 VEC (edge, heap) *exits;
830 struct tree_niter_desc niter_desc;
833 exits = get_loop_exit_edges (loop);
834 n_exits = VEC_length (edge, exits);
836 for (j = 0; VEC_iterate (edge, exits, j, ex); j++)
839 HOST_WIDE_INT nitercst;
840 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
842 enum br_predictor predictor;
844 if (number_of_iterations_exit (loop, ex, &niter_desc, false))
845 niter = niter_desc.niter;
846 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
847 niter = loop_niter_by_eval (loop, ex);
849 if (TREE_CODE (niter) == INTEGER_CST)
851 if (host_integerp (niter, 1)
852 && compare_tree_int (niter, max-1) == -1)
853 nitercst = tree_low_cst (niter, 1) + 1;
856 predictor = PRED_LOOP_ITERATIONS;
858 /* If we have just one exit and we can derive some information about
859 the number of iterations of the loop from the statements inside
860 the loop, use it to predict this exit. */
861 else if (n_exits == 1)
863 nitercst = estimated_loop_iterations_int (loop, false);
869 predictor = PRED_LOOP_ITERATIONS_GUESSED;
874 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
875 predict_edge (ex, predictor, probability);
877 VEC_free (edge, heap, exits);
879 bbs = get_loop_body (loop);
881 for (j = 0; j < loop->num_nodes; j++)
883 int header_found = 0;
889 /* Bypass loop heuristics on continue statement. These
890 statements construct loops via "non-loop" constructs
891 in the source language and are better to be handled
893 if (predicted_by_p (bb, PRED_CONTINUE))
896 /* Loop branch heuristics - predict an edge back to a
897 loop's head as taken. */
898 if (bb == loop->latch)
900 e = find_edge (loop->latch, loop->header);
904 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
908 /* Loop exit heuristics - predict an edge exiting the loop if the
909 conditional has no loop header successors as not taken. */
911 /* If we already used more reliable loop exit predictors, do not
912 bother with PRED_LOOP_EXIT. */
913 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
914 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
916 /* For loop with many exits we don't want to predict all exits
917 with the pretty large probability, because if all exits are
918 considered in row, the loop would be predicted to iterate
919 almost never. The code to divide probability by number of
920 exits is very rough. It should compute the number of exits
921 taken in each patch through function (not the overall number
922 of exits that might be a lot higher for loops with wide switch
923 statements in them) and compute n-th square root.
925 We limit the minimal probability by 2% to avoid
926 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
927 as this was causing regression in perl benchmark containing such
930 int probability = ((REG_BR_PROB_BASE
931 - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
933 if (probability < HITRATE (2))
934 probability = HITRATE (2);
935 FOR_EACH_EDGE (e, ei, bb->succs)
936 if (e->dest->index < NUM_FIXED_BLOCKS
937 || !flow_bb_inside_loop_p (loop, e->dest))
938 predict_edge (e, PRED_LOOP_EXIT, probability);
942 /* Free basic blocks from get_loop_body. */
949 /* Attempt to predict probabilities of BB outgoing edges using local
952 bb_estimate_probability_locally (basic_block bb)
954 rtx last_insn = BB_END (bb);
957 if (! can_predict_insn_p (last_insn))
959 cond = get_condition (last_insn, NULL, false, false);
963 /* Try "pointer heuristic."
964 A comparison ptr == 0 is predicted as false.
965 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
966 if (COMPARISON_P (cond)
967 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
968 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
970 if (GET_CODE (cond) == EQ)
971 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
972 else if (GET_CODE (cond) == NE)
973 predict_insn_def (last_insn, PRED_POINTER, TAKEN);
977 /* Try "opcode heuristic."
978 EQ tests are usually false and NE tests are usually true. Also,
979 most quantities are positive, so we can make the appropriate guesses
980 about signed comparisons against zero. */
981 switch (GET_CODE (cond))
984 /* Unconditional branch. */
985 predict_insn_def (last_insn, PRED_UNCONDITIONAL,
986 cond == const0_rtx ? NOT_TAKEN : TAKEN);
991 /* Floating point comparisons appears to behave in a very
992 unpredictable way because of special role of = tests in
994 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
996 /* Comparisons with 0 are often used for booleans and there is
997 nothing useful to predict about them. */
998 else if (XEXP (cond, 1) == const0_rtx
999 || XEXP (cond, 0) == const0_rtx)
1002 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1007 /* Floating point comparisons appears to behave in a very
1008 unpredictable way because of special role of = tests in
1010 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1012 /* Comparisons with 0 are often used for booleans and there is
1013 nothing useful to predict about them. */
1014 else if (XEXP (cond, 1) == const0_rtx
1015 || XEXP (cond, 0) == const0_rtx)
1018 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1022 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1026 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1031 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1032 || XEXP (cond, 1) == constm1_rtx)
1033 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1038 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1039 || XEXP (cond, 1) == constm1_rtx)
1040 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1048 /* Set edge->probability for each successor edge of BB. */
1050 guess_outgoing_edge_probabilities (basic_block bb)
1052 bb_estimate_probability_locally (bb);
1053 combine_predictions_for_insn (BB_END (bb), bb);
1056 static tree expr_expected_value (tree, bitmap);
1058 /* Helper function for expr_expected_value. */
1061 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited)
1065 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1067 if (TREE_CONSTANT (op0))
1070 if (code != SSA_NAME)
1073 def = SSA_NAME_DEF_STMT (op0);
1075 /* If we were already here, break the infinite cycle. */
1076 if (bitmap_bit_p (visited, SSA_NAME_VERSION (op0)))
1078 bitmap_set_bit (visited, SSA_NAME_VERSION (op0));
1080 if (gimple_code (def) == GIMPLE_PHI)
1082 /* All the arguments of the PHI node must have the same constant
1084 int i, n = gimple_phi_num_args (def);
1085 tree val = NULL, new_val;
1087 for (i = 0; i < n; i++)
1089 tree arg = PHI_ARG_DEF (def, i);
1091 /* If this PHI has itself as an argument, we cannot
1092 determine the string length of this argument. However,
1093 if we can find an expected constant value for the other
1094 PHI args then we can still be sure that this is
1095 likely a constant. So be optimistic and just
1096 continue with the next argument. */
1097 if (arg == PHI_RESULT (def))
1100 new_val = expr_expected_value (arg, visited);
1105 else if (!operand_equal_p (val, new_val, false))
1110 if (is_gimple_assign (def))
1112 if (gimple_assign_lhs (def) != op0)
1115 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1116 gimple_assign_rhs1 (def),
1117 gimple_assign_rhs_code (def),
1118 gimple_assign_rhs2 (def),
1122 if (is_gimple_call (def))
1124 tree decl = gimple_call_fndecl (def);
1127 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
1128 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
1132 if (gimple_call_num_args (def) != 2)
1134 val = gimple_call_arg (def, 0);
1135 if (TREE_CONSTANT (val))
1137 return gimple_call_arg (def, 1);
1144 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1147 op0 = expr_expected_value (op0, visited);
1150 op1 = expr_expected_value (op1, visited);
1153 res = fold_build2 (code, type, op0, op1);
1154 if (TREE_CONSTANT (res))
1158 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1161 op0 = expr_expected_value (op0, visited);
1164 res = fold_build1 (code, type, op0);
1165 if (TREE_CONSTANT (res))
1172 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1173 The function is used by builtin_expect branch predictor so the evidence
1174 must come from this construct and additional possible constant folding.
1176 We may want to implement more involved value guess (such as value range
1177 propagation based prediction), but such tricks shall go to new
1181 expr_expected_value (tree expr, bitmap visited)
1183 enum tree_code code;
1186 if (TREE_CONSTANT (expr))
1189 extract_ops_from_tree (expr, &code, &op0, &op1);
1190 return expr_expected_value_1 (TREE_TYPE (expr),
1191 op0, code, op1, visited);
1195 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1196 we no longer need. */
1198 strip_predict_hints (void)
1206 gimple_stmt_iterator bi;
1207 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
1209 gimple stmt = gsi_stmt (bi);
1211 if (gimple_code (stmt) == GIMPLE_PREDICT)
1213 gsi_remove (&bi, true);
1216 else if (gimple_code (stmt) == GIMPLE_CALL)
1218 tree fndecl = gimple_call_fndecl (stmt);
1221 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1222 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1223 && gimple_call_num_args (stmt) == 2)
1225 var = gimple_call_lhs (stmt);
1226 ass_stmt = gimple_build_assign (var, gimple_call_arg (stmt, 0));
1228 gsi_replace (&bi, ass_stmt, true);
1237 /* Predict using opcode of the last statement in basic block. */
1239 tree_predict_by_opcode (basic_block bb)
1241 gimple stmt = last_stmt (bb);
1250 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1252 FOR_EACH_EDGE (then_edge, ei, bb->succs)
1253 if (then_edge->flags & EDGE_TRUE_VALUE)
1255 op0 = gimple_cond_lhs (stmt);
1256 op1 = gimple_cond_rhs (stmt);
1257 cmp = gimple_cond_code (stmt);
1258 type = TREE_TYPE (op0);
1259 visited = BITMAP_ALLOC (NULL);
1260 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited);
1261 BITMAP_FREE (visited);
1264 if (integer_zerop (val))
1265 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1267 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1270 /* Try "pointer heuristic."
1271 A comparison ptr == 0 is predicted as false.
1272 Similarly, a comparison ptr1 == ptr2 is predicted as false. */
1273 if (POINTER_TYPE_P (type))
1276 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1277 else if (cmp == NE_EXPR)
1278 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1282 /* Try "opcode heuristic."
1283 EQ tests are usually false and NE tests are usually true. Also,
1284 most quantities are positive, so we can make the appropriate guesses
1285 about signed comparisons against zero. */
1290 /* Floating point comparisons appears to behave in a very
1291 unpredictable way because of special role of = tests in
1293 if (FLOAT_TYPE_P (type))
1295 /* Comparisons with 0 are often used for booleans and there is
1296 nothing useful to predict about them. */
1297 else if (integer_zerop (op0) || integer_zerop (op1))
1300 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1305 /* Floating point comparisons appears to behave in a very
1306 unpredictable way because of special role of = tests in
1308 if (FLOAT_TYPE_P (type))
1310 /* Comparisons with 0 are often used for booleans and there is
1311 nothing useful to predict about them. */
1312 else if (integer_zerop (op0)
1313 || integer_zerop (op1))
1316 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1320 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1323 case UNORDERED_EXPR:
1324 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1329 if (integer_zerop (op1)
1330 || integer_onep (op1)
1331 || integer_all_onesp (op1)
1334 || real_minus_onep (op1))
1335 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1340 if (integer_zerop (op1)
1341 || integer_onep (op1)
1342 || integer_all_onesp (op1)
1345 || real_minus_onep (op1))
1346 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1354 /* Try to guess whether the value of return means error code. */
1356 static enum br_predictor
1357 return_prediction (tree val, enum prediction *prediction)
1361 return PRED_NO_PREDICTION;
1362 /* Different heuristics for pointers and scalars. */
1363 if (POINTER_TYPE_P (TREE_TYPE (val)))
1365 /* NULL is usually not returned. */
1366 if (integer_zerop (val))
1368 *prediction = NOT_TAKEN;
1369 return PRED_NULL_RETURN;
1372 else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1374 /* Negative return values are often used to indicate
1376 if (TREE_CODE (val) == INTEGER_CST
1377 && tree_int_cst_sgn (val) < 0)
1379 *prediction = NOT_TAKEN;
1380 return PRED_NEGATIVE_RETURN;
1382 /* Constant return values seems to be commonly taken.
1383 Zero/one often represent booleans so exclude them from the
1385 if (TREE_CONSTANT (val)
1386 && (!integer_zerop (val) && !integer_onep (val)))
1388 *prediction = TAKEN;
1389 return PRED_CONST_RETURN;
1392 return PRED_NO_PREDICTION;
1395 /* Find the basic block with return expression and look up for possible
1396 return value trying to apply RETURN_PREDICTION heuristics. */
1398 apply_return_prediction (void)
1400 gimple return_stmt = NULL;
1404 int phi_num_args, i;
1405 enum br_predictor pred;
1406 enum prediction direction;
1409 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1411 return_stmt = last_stmt (e->src);
1413 && gimple_code (return_stmt) == GIMPLE_RETURN)
1418 return_val = gimple_return_retval (return_stmt);
1421 if (TREE_CODE (return_val) != SSA_NAME
1422 || !SSA_NAME_DEF_STMT (return_val)
1423 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
1425 phi = SSA_NAME_DEF_STMT (return_val);
1426 phi_num_args = gimple_phi_num_args (phi);
1427 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1429 /* Avoid the degenerate case where all return values form the function
1430 belongs to same category (ie they are all positive constants)
1431 so we can hardly say something about them. */
1432 for (i = 1; i < phi_num_args; i++)
1433 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1435 if (i != phi_num_args)
1436 for (i = 0; i < phi_num_args; i++)
1438 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1439 if (pred != PRED_NO_PREDICTION)
1440 predict_paths_leading_to (gimple_phi_arg_edge (phi, i)->src, pred,
1445 /* Look for basic block that contains unlikely to happen events
1446 (such as noreturn calls) and mark all paths leading to execution
1447 of this basic blocks as unlikely. */
1450 tree_bb_level_predictions (void)
1454 apply_return_prediction ();
1458 gimple_stmt_iterator gsi;
1460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1462 gimple stmt = gsi_stmt (gsi);
1465 if (is_gimple_call (stmt))
1467 if (gimple_call_flags (stmt) & ECF_NORETURN)
1468 predict_paths_leading_to (bb, PRED_NORETURN,
1470 decl = gimple_call_fndecl (stmt);
1472 && lookup_attribute ("cold",
1473 DECL_ATTRIBUTES (decl)))
1474 predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
1477 else if (gimple_code (stmt) == GIMPLE_PREDICT)
1479 predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
1480 gimple_predict_outcome (stmt));
1481 /* Keep GIMPLE_PREDICT around so early inlining will propagate
1482 hints to callers. */
1488 #ifdef ENABLE_CHECKING
1490 /* Callback for pointer_map_traverse, asserts that the pointer map is
1494 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
1495 void *data ATTRIBUTE_UNUSED)
1497 gcc_assert (!*value);
1502 /* Predict branch probabilities and estimate profile of the tree CFG. */
1504 tree_estimate_probability (void)
1508 loop_optimizer_init (0);
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1510 flow_loops_dump (dump_file, NULL, 0);
1512 add_noreturn_fake_exit_edges ();
1513 connect_infinite_loops_to_exit ();
1514 /* We use loop_niter_by_eval, which requires that the loops have
1516 create_preheaders (CP_SIMPLE_PREHEADERS);
1517 calculate_dominance_info (CDI_POST_DOMINATORS);
1519 bb_predictions = pointer_map_create ();
1520 tree_bb_level_predictions ();
1522 mark_irreducible_loops ();
1523 record_loop_exits ();
1524 if (number_of_loops () > 1)
1532 FOR_EACH_EDGE (e, ei, bb->succs)
1534 /* Predict early returns to be probable, as we've already taken
1535 care for error returns and other cases are often used for
1536 fast paths through function.
1538 Since we've already removed the return statements, we are
1539 looking for CFG like:
1549 if (e->dest != bb->next_bb
1550 && e->dest != EXIT_BLOCK_PTR
1551 && single_succ_p (e->dest)
1552 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR
1553 && gimple_code (last_stmt (e->dest)) == GIMPLE_RETURN)
1558 if (single_succ_p (bb))
1560 FOR_EACH_EDGE (e1, ei1, bb->preds)
1561 if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1562 && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1563 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
1564 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1567 if (!predicted_by_p (e->src, PRED_NULL_RETURN)
1568 && !predicted_by_p (e->src, PRED_CONST_RETURN)
1569 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
1570 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1573 /* Look for block we are guarding (ie we dominate it,
1574 but it doesn't postdominate us). */
1575 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1576 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1577 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1579 gimple_stmt_iterator bi;
1581 /* The call heuristic claims that a guarded function call
1582 is improbable. This is because such calls are often used
1583 to signal exceptional situations such as printing error
1585 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
1588 gimple stmt = gsi_stmt (bi);
1589 if (is_gimple_call (stmt)
1590 /* Constant and pure calls are hardly used to signalize
1591 something exceptional. */
1592 && gimple_has_side_effects (stmt))
1594 predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1600 tree_predict_by_opcode (bb);
1603 combine_predictions_for_bb (bb);
1605 #ifdef ENABLE_CHECKING
1606 pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
1608 pointer_map_destroy (bb_predictions);
1609 bb_predictions = NULL;
1611 estimate_bb_frequencies ();
1612 free_dominance_info (CDI_POST_DOMINATORS);
1613 remove_fake_exit_edges ();
1614 loop_optimizer_finalize ();
1615 if (dump_file && (dump_flags & TDF_DETAILS))
1616 gimple_dump_cfg (dump_file, dump_flags);
1617 if (profile_status == PROFILE_ABSENT)
1618 profile_status = PROFILE_GUESSED;
1622 /* Predict edges to successors of CUR whose sources are not postdominated by
1623 BB by PRED and recurse to all postdominators. */
1626 predict_paths_for_bb (basic_block cur, basic_block bb,
1627 enum br_predictor pred,
1628 enum prediction taken)
1634 /* We are looking for all edges forming edge cut induced by
1635 set of all blocks postdominated by BB. */
1636 FOR_EACH_EDGE (e, ei, cur->preds)
1637 if (e->src->index >= NUM_FIXED_BLOCKS
1638 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
1640 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
1641 predict_edge_def (e, pred, taken);
1643 for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
1645 son = next_dom_son (CDI_POST_DOMINATORS, son))
1646 predict_paths_for_bb (son, bb, pred, taken);
1649 /* Sets branch probabilities according to PREDiction and
1653 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
1654 enum prediction taken)
1656 predict_paths_for_bb (bb, bb, pred, taken);
1659 /* This is used to carry information about basic blocks. It is
1660 attached to the AUX field of the standard CFG block. */
1662 typedef struct block_info_def
1664 /* Estimated frequency of execution of basic_block. */
1667 /* To keep queue of basic blocks to process. */
1670 /* Number of predecessors we need to visit first. */
1674 /* Similar information for edges. */
1675 typedef struct edge_info_def
1677 /* In case edge is a loopback edge, the probability edge will be reached
1678 in case header is. Estimated number of iterations of the loop can be
1679 then computed as 1 / (1 - back_edge_prob). */
1680 sreal back_edge_prob;
1681 /* True if the edge is a loopback edge in the natural loop. */
1682 unsigned int back_edge:1;
1685 #define BLOCK_INFO(B) ((block_info) (B)->aux)
1686 #define EDGE_INFO(E) ((edge_info) (E)->aux)
1688 /* Helper function for estimate_bb_frequencies.
1689 Propagate the frequencies in blocks marked in
1690 TOVISIT, starting in HEAD. */
1693 propagate_freq (basic_block head, bitmap tovisit)
1702 /* For each basic block we need to visit count number of his predecessors
1703 we need to visit first. */
1704 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1709 /* The outermost "loop" includes the exit block, which we can not
1710 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR
1711 directly. Do the same for the entry block. */
1712 bb = BASIC_BLOCK (i);
1714 FOR_EACH_EDGE (e, ei, bb->preds)
1716 bool visit = bitmap_bit_p (tovisit, e->src->index);
1718 if (visit && !(e->flags & EDGE_DFS_BACK))
1720 else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1722 "Irreducible region hit, ignoring edge to %i->%i\n",
1723 e->src->index, bb->index);
1725 BLOCK_INFO (bb)->npredecessors = count;
1728 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1730 for (bb = head; bb; bb = nextbb)
1733 sreal cyclic_probability, frequency;
1735 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1736 memcpy (&frequency, &real_zero, sizeof (real_zero));
1738 nextbb = BLOCK_INFO (bb)->next;
1739 BLOCK_INFO (bb)->next = NULL;
1741 /* Compute frequency of basic block. */
1744 #ifdef ENABLE_CHECKING
1745 FOR_EACH_EDGE (e, ei, bb->preds)
1746 gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1747 || (e->flags & EDGE_DFS_BACK));
1750 FOR_EACH_EDGE (e, ei, bb->preds)
1751 if (EDGE_INFO (e)->back_edge)
1753 sreal_add (&cyclic_probability, &cyclic_probability,
1754 &EDGE_INFO (e)->back_edge_prob);
1756 else if (!(e->flags & EDGE_DFS_BACK))
1760 /* frequency += (e->probability
1761 * BLOCK_INFO (e->src)->frequency /
1762 REG_BR_PROB_BASE); */
1764 sreal_init (&tmp, e->probability, 0);
1765 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1766 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1767 sreal_add (&frequency, &frequency, &tmp);
1770 if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1772 memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1773 sizeof (frequency));
1777 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1779 memcpy (&cyclic_probability, &real_almost_one,
1780 sizeof (real_almost_one));
1783 /* BLOCK_INFO (bb)->frequency = frequency
1784 / (1 - cyclic_probability) */
1786 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1787 sreal_div (&BLOCK_INFO (bb)->frequency,
1788 &frequency, &cyclic_probability);
1792 bitmap_clear_bit (tovisit, bb->index);
1794 e = find_edge (bb, head);
1799 /* EDGE_INFO (e)->back_edge_prob
1800 = ((e->probability * BLOCK_INFO (bb)->frequency)
1801 / REG_BR_PROB_BASE); */
1803 sreal_init (&tmp, e->probability, 0);
1804 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1805 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1806 &tmp, &real_inv_br_prob_base);
1809 /* Propagate to successor blocks. */
1810 FOR_EACH_EDGE (e, ei, bb->succs)
1811 if (!(e->flags & EDGE_DFS_BACK)
1812 && BLOCK_INFO (e->dest)->npredecessors)
1814 BLOCK_INFO (e->dest)->npredecessors--;
1815 if (!BLOCK_INFO (e->dest)->npredecessors)
1820 BLOCK_INFO (last)->next = e->dest;
1828 /* Estimate probabilities of loopback edges in loops at same nest level. */
1831 estimate_loops_at_level (struct loop *first_loop)
1835 for (loop = first_loop; loop; loop = loop->next)
1840 bitmap tovisit = BITMAP_ALLOC (NULL);
1842 estimate_loops_at_level (loop->inner);
1844 /* Find current loop back edge and mark it. */
1845 e = loop_latch_edge (loop);
1846 EDGE_INFO (e)->back_edge = 1;
1848 bbs = get_loop_body (loop);
1849 for (i = 0; i < loop->num_nodes; i++)
1850 bitmap_set_bit (tovisit, bbs[i]->index);
1852 propagate_freq (loop->header, tovisit);
1853 BITMAP_FREE (tovisit);
1857 /* Propagates frequencies through structure of loops. */
1860 estimate_loops (void)
1862 bitmap tovisit = BITMAP_ALLOC (NULL);
1865 /* Start by estimating the frequencies in the loops. */
1866 if (number_of_loops () > 1)
1867 estimate_loops_at_level (current_loops->tree_root->inner);
1869 /* Now propagate the frequencies through all the blocks. */
1872 bitmap_set_bit (tovisit, bb->index);
1874 propagate_freq (ENTRY_BLOCK_PTR, tovisit);
1875 BITMAP_FREE (tovisit);
1878 /* Convert counts measured by profile driven feedback to frequencies.
1879 Return nonzero iff there was any nonzero execution count. */
1882 counts_to_freqs (void)
1884 gcov_type count_max, true_count_max = 0;
1888 true_count_max = MAX (bb->count, true_count_max);
1890 count_max = MAX (true_count_max, 1);
1891 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1892 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1894 return true_count_max;
1897 /* Return true if function is likely to be expensive, so there is no point to
1898 optimize performance of prologue, epilogue or do inlining at the expense
1899 of code size growth. THRESHOLD is the limit of number of instructions
1900 function can execute at average to be still considered not expensive. */
1903 expensive_function_p (int threshold)
1905 unsigned int sum = 0;
1909 /* We can not compute accurately for large thresholds due to scaled
1911 gcc_assert (threshold <= BB_FREQ_MAX);
1913 /* Frequencies are out of range. This either means that function contains
1914 internal loop executing more than BB_FREQ_MAX times or profile feedback
1915 is available and function has not been executed at all. */
1916 if (ENTRY_BLOCK_PTR->frequency == 0)
1919 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
1920 limit = ENTRY_BLOCK_PTR->frequency * threshold;
1925 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1926 insn = NEXT_INSN (insn))
1927 if (active_insn_p (insn))
1929 sum += bb->frequency;
1938 /* Estimate basic blocks frequency by given branch probabilities. */
1941 estimate_bb_frequencies (void)
1946 if (!flag_branch_probabilities || !counts_to_freqs ())
1948 static int real_values_initialized = 0;
1950 if (!real_values_initialized)
1952 real_values_initialized = 1;
1953 sreal_init (&real_zero, 0, 0);
1954 sreal_init (&real_one, 1, 0);
1955 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1956 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1957 sreal_init (&real_one_half, 1, -1);
1958 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1959 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1962 mark_dfs_back_edges ();
1964 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1966 /* Set up block info for each basic block. */
1967 alloc_aux_for_blocks (sizeof (struct block_info_def));
1968 alloc_aux_for_edges (sizeof (struct edge_info_def));
1969 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1974 FOR_EACH_EDGE (e, ei, bb->succs)
1976 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1977 sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1978 &EDGE_INFO (e)->back_edge_prob,
1979 &real_inv_br_prob_base);
1983 /* First compute probabilities locally for each loop from innermost
1984 to outermost to examine probabilities for back edges. */
1987 memcpy (&freq_max, &real_zero, sizeof (real_zero));
1989 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1990 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1992 sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1993 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1997 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1998 sreal_add (&tmp, &tmp, &real_one_half);
1999 bb->frequency = sreal_to_int (&tmp);
2002 free_aux_for_blocks ();
2003 free_aux_for_edges ();
2005 compute_function_frequency ();
2006 if (flag_reorder_functions)
2007 choose_function_section ();
2010 /* Decide whether function is hot, cold or unlikely executed. */
2012 compute_function_frequency (void)
2016 if (!profile_info || !flag_branch_probabilities)
2018 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2020 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2021 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2023 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2026 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
2029 if (maybe_hot_bb_p (bb))
2031 cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
2034 if (!probably_never_executed_bb_p (bb))
2035 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2039 /* Choose appropriate section for the function. */
2041 choose_function_section (void)
2043 if (DECL_SECTION_NAME (current_function_decl)
2044 || !targetm.have_named_sections
2045 /* Theoretically we can split the gnu.linkonce text section too,
2046 but this requires more work as the frequency needs to match
2047 for all generated objects so we need to merge the frequency
2048 of all instances. For now just never set frequency for these. */
2049 || DECL_ONE_ONLY (current_function_decl))
2052 /* If we are doing the partitioning optimization, let the optimization
2053 choose the correct section into which to put things. */
2055 if (flag_reorder_blocks_and_partition)
2058 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
2059 DECL_SECTION_NAME (current_function_decl) =
2060 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
2061 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
2062 DECL_SECTION_NAME (current_function_decl) =
2063 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
2064 UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
2068 gate_estimate_probability (void)
2070 return flag_guess_branch_prob;
2073 /* Build PREDICT_EXPR. */
2075 build_predict_expr (enum br_predictor predictor, enum prediction taken)
2077 tree t = build1 (PREDICT_EXPR, void_type_node,
2078 build_int_cst (NULL, predictor));
2079 PREDICT_EXPR_OUTCOME (t) = taken;
2084 predictor_name (enum br_predictor predictor)
2086 return predictor_info[predictor].name;
2089 struct gimple_opt_pass pass_profile =
2093 "profile", /* name */
2094 gate_estimate_probability, /* gate */
2095 tree_estimate_probability, /* execute */
2098 0, /* static_pass_number */
2099 TV_BRANCH_PROB, /* tv_id */
2100 PROP_cfg, /* properties_required */
2101 0, /* properties_provided */
2102 0, /* properties_destroyed */
2103 0, /* todo_flags_start */
2104 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
2108 struct gimple_opt_pass pass_strip_predict_hints =
2114 strip_predict_hints, /* execute */
2117 0, /* static_pass_number */
2118 TV_BRANCH_PROB, /* tv_id */
2119 PROP_cfg, /* properties_required */
2120 0, /* properties_provided */
2121 0, /* properties_destroyed */
2122 0, /* todo_flags_start */
2123 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */