1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
42 The auxiliary file generated is <dumpbase>.bbg. The format is
43 described in full in gcov-io.h. */
45 /* ??? Register allocation should use basic block execution counts to
46 give preference to the most commonly executed blocks. */
48 /* ??? Should calculate branch probabilities before instrumenting code, since
49 then we can use arc counts to help decide which arcs to instrument. */
53 #include "coretypes.h"
63 #include "value-prof.h"
66 /* Additional information about the edges we need. */
68 unsigned int count_valid : 1;
70 /* Is on the spanning tree. */
71 unsigned int on_tree : 1;
73 /* Pretend this edge does not exist (it is abnormal and we've
74 inserted a fake to compensate). */
75 unsigned int ignore : 1;
79 unsigned int count_valid : 1;
81 /* Number of successor and predecessor edges. */
86 #define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
87 #define BB_INFO(b) ((struct bb_info *) (b)->aux)
89 /* Counter summary from the last set of coverage counts read. */
91 const struct gcov_ctr_summary *profile_info;
93 /* Collect statistics on the performance of this pass for the entire source
96 static int total_num_blocks;
97 static int total_num_edges;
98 static int total_num_edges_ignored;
99 static int total_num_edges_instrumented;
100 static int total_num_blocks_created;
101 static int total_num_passes;
102 static int total_num_times_called;
103 static int total_hist_br_prob[20];
104 static int total_num_never_executed;
105 static int total_num_branches;
107 /* Forward declarations. */
108 static void find_spanning_tree PARAMS ((struct edge_list *));
109 static rtx gen_edge_profiler PARAMS ((int));
110 static rtx gen_interval_profiler (struct histogram_value *, unsigned, unsigned);
111 static rtx gen_pow2_profiler (struct histogram_value *, unsigned, unsigned);
112 static rtx gen_one_value_profiler (struct histogram_value *, unsigned, unsigned);
113 static rtx gen_const_delta_profiler (struct histogram_value *, unsigned, unsigned);
114 static unsigned instrument_edges PARAMS ((struct edge_list *));
115 static void instrument_values (unsigned, struct histogram_value *);
116 static void compute_branch_probabilities PARAMS ((void));
117 static gcov_type * get_exec_counts PARAMS ((void));
118 static basic_block find_group PARAMS ((basic_block));
119 static void union_groups PARAMS ((basic_block, basic_block));
122 /* Add edge instrumentation code to the entire insn chain.
124 F is the first insn of the chain.
125 NUM_BLOCKS is the number of basic blocks found in F. */
128 instrument_edges (el)
129 struct edge_list *el;
131 unsigned num_instr_edges = 0;
132 int num_edges = NUM_EDGES (el);
135 remove_fake_edges ();
137 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
141 for (e = bb->succ; e; e = e->succ_next)
143 struct edge_info *inf = EDGE_INFO (e);
145 if (!inf->ignore && !inf->on_tree)
149 if (e->flags & EDGE_ABNORMAL)
152 fprintf (rtl_dump_file, "Edge %d to %d instrumented%s\n",
153 e->src->index, e->dest->index,
154 EDGE_CRITICAL_P (e) ? " (and split)" : "");
155 edge_profile = gen_edge_profiler (num_instr_edges++);
156 insert_insn_on_edge (edge_profile, e);
157 rebuild_jump_labels (e->insns);
162 total_num_blocks_created += num_edges;
164 fprintf (rtl_dump_file, "%d edges instrumented\n", num_instr_edges);
165 return num_instr_edges;
168 /* Add code to measure histograms list of VALUES of length N_VALUES. */
170 instrument_values (unsigned n_values, struct histogram_value *values)
176 /* Emit code to generate the histograms before the insns. */
178 for (i = 0; i < n_values; i++)
180 e = split_block (BLOCK_FOR_INSN (values[i].insn),
181 PREV_INSN (values[i].insn));
182 switch (values[i].type)
184 case HIST_TYPE_INTERVAL:
185 t = GCOV_COUNTER_V_INTERVAL;
189 t = GCOV_COUNTER_V_POW2;
192 case HIST_TYPE_SINGLE_VALUE:
193 t = GCOV_COUNTER_V_SINGLE;
196 case HIST_TYPE_CONST_DELTA:
197 t = GCOV_COUNTER_V_DELTA;
203 if (!coverage_counter_alloc (t, values[i].n_counters))
206 switch (values[i].type)
208 case HIST_TYPE_INTERVAL:
209 sequence = gen_interval_profiler (values + i, t, 0);
213 sequence = gen_pow2_profiler (values + i, t, 0);
216 case HIST_TYPE_SINGLE_VALUE:
217 sequence = gen_one_value_profiler (values + i, t, 0);
220 case HIST_TYPE_CONST_DELTA:
221 sequence = gen_const_delta_profiler (values + i, t, 0);
228 safe_insert_insn_on_edge (sequence, e);
233 /* Computes hybrid profile for all matching entries in da_file. */
238 unsigned num_edges = 0;
242 /* Count the edges to be (possibly) instrumented. */
243 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
246 for (e = bb->succ; e; e = e->succ_next)
247 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
251 counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info);
255 if (rtl_dump_file && profile_info)
256 fprintf(rtl_dump_file, "Merged %u profiles with maximal count %u.\n",
257 profile_info->runs, (unsigned) profile_info->sum_max);
263 /* Compute the branch probabilities for the various branches.
264 Annotate them accordingly. */
267 compute_branch_probabilities ()
274 int hist_br_prob[20];
275 int num_never_executed;
277 gcov_type *exec_counts = get_exec_counts ();
278 int exec_counts_pos = 0;
280 /* Attach extra info block to each bb. */
282 alloc_aux_for_blocks (sizeof (struct bb_info));
283 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
287 for (e = bb->succ; e; e = e->succ_next)
288 if (!EDGE_INFO (e)->ignore)
289 BB_INFO (bb)->succ_count++;
290 for (e = bb->pred; e; e = e->pred_next)
291 if (!EDGE_INFO (e)->ignore)
292 BB_INFO (bb)->pred_count++;
295 /* Avoid predicting entry on exit nodes. */
296 BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
297 BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
299 /* For each edge not on the spanning tree, set its execution count from
302 /* The first count in the .da file is the number of times that the function
303 was entered. This is the exec_count for block zero. */
305 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
308 for (e = bb->succ; e; e = e->succ_next)
309 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
314 e->count = exec_counts[exec_counts_pos++];
319 EDGE_INFO (e)->count_valid = 1;
320 BB_INFO (bb)->succ_count--;
321 BB_INFO (e->dest)->pred_count--;
324 fprintf (rtl_dump_file, "\nRead edge from %i to %i, count:",
325 bb->index, e->dest->index);
326 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
327 (HOST_WIDEST_INT) e->count);
333 fprintf (rtl_dump_file, "\n%d edge counts read\n", num_edges);
335 /* For every block in the file,
336 - if every exit/entrance edge has a known count, then set the block count
337 - if the block count is known, and every exit/entrance edge but one has
338 a known execution count, then set the count of the remaining edge
340 As edge counts are set, decrement the succ/pred count, but don't delete
341 the edge, that way we can easily tell when all edges are known, or only
342 one edge is unknown. */
344 /* The order that the basic blocks are iterated through is important.
345 Since the code that finds spanning trees starts with block 0, low numbered
346 edges are put on the spanning tree in preference to high numbered edges.
347 Hence, most instrumented edges are at the end. Graph solving works much
348 faster if we propagate numbers from the end to the start.
350 This takes an average of slightly more than 3 passes. */
358 FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
360 struct bb_info *bi = BB_INFO (bb);
361 if (! bi->count_valid)
363 if (bi->succ_count == 0)
368 for (e = bb->succ; e; e = e->succ_next)
374 else if (bi->pred_count == 0)
379 for (e = bb->pred; e; e = e->pred_next)
388 if (bi->succ_count == 1)
393 /* One of the counts will be invalid, but it is zero,
394 so adding it in also doesn't hurt. */
395 for (e = bb->succ; e; e = e->succ_next)
398 /* Seedgeh for the invalid edge, and set its count. */
399 for (e = bb->succ; e; e = e->succ_next)
400 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
403 /* Calculate count for remaining edge by conservation. */
404 total = bb->count - total;
408 EDGE_INFO (e)->count_valid = 1;
412 BB_INFO (e->dest)->pred_count--;
415 if (bi->pred_count == 1)
420 /* One of the counts will be invalid, but it is zero,
421 so adding it in also doesn't hurt. */
422 for (e = bb->pred; e; e = e->pred_next)
425 /* Search for the invalid edge, and set its count. */
426 for (e = bb->pred; e; e = e->pred_next)
427 if (!EDGE_INFO (e)->count_valid && !EDGE_INFO (e)->ignore)
430 /* Calculate count for remaining edge by conservation. */
431 total = bb->count - total + e->count;
435 EDGE_INFO (e)->count_valid = 1;
439 BB_INFO (e->src)->succ_count--;
446 dump_flow_info (rtl_dump_file);
448 total_num_passes += passes;
450 fprintf (rtl_dump_file, "Graph solving took %d passes.\n\n", passes);
452 /* If the graph has been correctly solved, every block will have a
453 succ and pred count of zero. */
456 if (BB_INFO (bb)->succ_count || BB_INFO (bb)->pred_count)
460 /* For every edge, calculate its branch probability and add a reg_note
461 to the branch insn to indicate this. */
463 for (i = 0; i < 20; i++)
465 num_never_executed = 0;
468 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
475 error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
476 bb->index, (int)bb->count);
479 for (e = bb->succ; e; e = e->succ_next)
481 /* Function may return twice in the cased the called fucntion is
482 setjmp or calls fork, but we can't represent this by extra
483 edge from the entry, since extra edge from the exit is
484 already present. We get negative frequency from the entry
487 && e->dest == EXIT_BLOCK_PTR)
488 || (e->count > bb->count
489 && e->dest != EXIT_BLOCK_PTR))
493 while (GET_CODE (insn) != CALL_INSN
495 && keep_with_call_p (insn))
496 insn = PREV_INSN (insn);
497 if (GET_CODE (insn) == CALL_INSN)
498 e->count = e->count < 0 ? 0 : bb->count;
500 if (e->count < 0 || e->count > bb->count)
502 error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
503 e->src->index, e->dest->index,
505 e->count = bb->count / 2;
510 for (e = bb->succ; e; e = e->succ_next)
511 e->probability = (e->count * REG_BR_PROB_BASE + bb->count / 2) / bb->count;
513 && any_condjump_p (bb->end)
514 && bb->succ->succ_next)
520 /* Find the branch edge. It is possible that we do have fake
522 for (e = bb->succ; e->flags & (EDGE_FAKE | EDGE_FALLTHRU);
524 continue; /* Loop body has been intentionally left blank. */
526 prob = e->probability;
527 index = prob * 20 / REG_BR_PROB_BASE;
531 hist_br_prob[index]++;
533 note = find_reg_note (bb->end, REG_BR_PROB, 0);
534 /* There may be already note put by some other pass, such
535 as builtin_expect expander. */
537 XEXP (note, 0) = GEN_INT (prob);
540 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
541 REG_NOTES (bb->end));
545 /* Otherwise distribute the probabilities evenly so we get sane
546 sum. Use simple heuristics that if there are normal edges,
547 give all abnormals frequency of 0, otherwise distribute the
548 frequency over abnormals (this is the case of noreturn
554 for (e = bb->succ; e; e = e->succ_next)
555 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
559 for (e = bb->succ; e; e = e->succ_next)
560 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
561 e->probability = REG_BR_PROB_BASE / total;
567 for (e = bb->succ; e; e = e->succ_next)
569 for (e = bb->succ; e; e = e->succ_next)
570 e->probability = REG_BR_PROB_BASE / total;
573 && any_condjump_p (bb->end)
574 && bb->succ->succ_next)
575 num_branches++, num_never_executed;
581 fprintf (rtl_dump_file, "%d branches\n", num_branches);
582 fprintf (rtl_dump_file, "%d branches never executed\n",
585 for (i = 0; i < 10; i++)
586 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
587 (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
590 total_num_branches += num_branches;
591 total_num_never_executed += num_never_executed;
592 for (i = 0; i < 20; i++)
593 total_hist_br_prob[i] += hist_br_prob[i];
595 fputc ('\n', rtl_dump_file);
596 fputc ('\n', rtl_dump_file);
599 free_aux_for_blocks ();
602 /* Instrument and/or analyze program behavior based on program flow graph.
603 In either case, this function builds a flow graph for the function being
604 compiled. The flow graph is stored in BB_GRAPH.
606 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
607 the flow graph that are needed to reconstruct the dynamic behavior of the
610 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
611 information from a data file containing edge count information from previous
612 executions of the function being compiled. In this case, the flow graph is
613 annotated with actual execution counts, which are later propagated into the
614 rtl for optimization purposes.
616 Main entry point of this file. */
623 unsigned num_edges, ignored_edges;
624 unsigned num_instrumented;
625 struct edge_list *el;
626 unsigned n_values = 0;
627 struct histogram_value *values = NULL;
629 total_num_times_called++;
631 flow_call_edges_add (NULL);
632 add_noreturn_fake_exit_edges ();
634 /* We can't handle cyclic regions constructed using abnormal edges.
635 To avoid these we replace every source of abnormal edge by a fake
636 edge from entry node and every destination by fake edge to exit.
637 This keeps graph acyclic and our calculation exact for all normal
638 edges except for exit and entrance ones.
640 We also add fake exit edges for each call and asm statement in the
641 basic, since it may not return. */
645 int need_exit_edge = 0, need_entry_edge = 0;
646 int have_exit_edge = 0, have_entry_edge = 0;
649 /* Functions returning multiple times are not handled by extra edges.
650 Instead we simply allow negative counts on edges from exit to the
651 block past call and corresponding probabilities. We can't go
652 with the extra edges because that would result in flowgraph that
653 needs to have fake edges outside the spanning tree. */
655 for (e = bb->succ; e; e = e->succ_next)
657 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
658 && e->dest != EXIT_BLOCK_PTR)
660 if (e->dest == EXIT_BLOCK_PTR)
663 for (e = bb->pred; e; e = e->pred_next)
665 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
666 && e->src != ENTRY_BLOCK_PTR)
668 if (e->src == ENTRY_BLOCK_PTR)
672 if (need_exit_edge && !have_exit_edge)
675 fprintf (rtl_dump_file, "Adding fake exit edge to bb %i\n",
677 make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
679 if (need_entry_edge && !have_entry_edge)
682 fprintf (rtl_dump_file, "Adding fake entry edge to bb %i\n",
684 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
688 el = create_edge_list ();
689 num_edges = NUM_EDGES (el);
690 alloc_aux_for_edges (sizeof (struct edge_info));
692 /* The basic blocks are expected to be numbered sequentially. */
696 for (i = 0 ; i < num_edges ; i++)
698 edge e = INDEX_EDGE (el, i);
701 /* Mark edges we've replaced by fake edges above as ignored. */
702 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
703 && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
705 EDGE_INFO (e)->ignore = 1;
710 #ifdef ENABLE_CHECKING
714 /* Create spanning tree from basic block graph, mark each edge that is
715 on the spanning tree. We insert as many abnormal and critical edges
716 as possible to minimize number of edge splits necessary. */
718 find_spanning_tree (el);
720 /* Fake edges that are not on the tree will not be instrumented, so
721 mark them ignored. */
722 for (num_instrumented = i = 0; i < num_edges; i++)
724 edge e = INDEX_EDGE (el, i);
725 struct edge_info *inf = EDGE_INFO (e);
727 if (inf->ignore || inf->on_tree)
729 else if (e->flags & EDGE_FAKE)
738 total_num_blocks += n_basic_blocks + 2;
740 fprintf (rtl_dump_file, "%d basic blocks\n", n_basic_blocks);
742 total_num_edges += num_edges;
744 fprintf (rtl_dump_file, "%d edges\n", num_edges);
746 total_num_edges_ignored += ignored_edges;
748 fprintf (rtl_dump_file, "%d ignored edges\n", ignored_edges);
750 /* Write the data from which gcov can reconstruct the basic block
753 /* Basic block flags */
754 if (coverage_begin_output ())
756 gcov_position_t offset;
758 offset = gcov_write_tag (GCOV_TAG_BLOCKS);
759 for (i = 0; i != (unsigned) (n_basic_blocks + 2); i++)
760 gcov_write_unsigned (0);
761 gcov_write_length (offset);
764 /* Keep all basic block indexes nonnegative in the gcov output.
765 Index 0 is used for entry block, last index is for exit block.
767 ENTRY_BLOCK_PTR->index = -1;
768 EXIT_BLOCK_PTR->index = last_basic_block;
769 #define BB_TO_GCOV_INDEX(bb) ((bb)->index + 1)
772 if (coverage_begin_output ())
774 gcov_position_t offset;
776 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
780 offset = gcov_write_tag (GCOV_TAG_ARCS);
781 gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
783 for (e = bb->succ; e; e = e->succ_next)
785 struct edge_info *i = EDGE_INFO (e);
788 unsigned flag_bits = 0;
791 flag_bits |= GCOV_ARC_ON_TREE;
792 if (e->flags & EDGE_FAKE)
793 flag_bits |= GCOV_ARC_FAKE;
794 if (e->flags & EDGE_FALLTHRU)
795 flag_bits |= GCOV_ARC_FALLTHROUGH;
797 gcov_write_unsigned (BB_TO_GCOV_INDEX (e->dest));
798 gcov_write_unsigned (flag_bits);
802 gcov_write_length (offset);
807 if (coverage_begin_output ())
809 char const *prev_file_name = NULL;
810 gcov_position_t offset;
815 int ignore_next_note = 0;
819 /* We are looking for line number notes. Search backward
820 before basic block to find correct ones. */
821 insn = prev_nonnote_insn (insn);
825 insn = NEXT_INSN (insn);
827 while (insn != bb->end)
829 if (GET_CODE (insn) == NOTE)
831 /* Must ignore the line number notes that
832 immediately follow the end of an inline function
833 to avoid counting it twice. There is a note
834 before the call, and one after the call. */
835 if (NOTE_LINE_NUMBER (insn)
836 == NOTE_INSN_REPEATED_LINE_NUMBER)
837 ignore_next_note = 1;
838 else if (NOTE_LINE_NUMBER (insn) <= 0)
840 else if (ignore_next_note)
841 ignore_next_note = 0;
846 offset = gcov_write_tag (GCOV_TAG_LINES);
847 gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
850 /* If this is a new source file, then output the
851 file's name to the .bb file. */
853 || strcmp (NOTE_SOURCE_FILE (insn),
856 prev_file_name = NOTE_SOURCE_FILE (insn);
857 gcov_write_unsigned (0);
858 gcov_write_string (prev_file_name);
860 gcov_write_unsigned (NOTE_LINE_NUMBER (insn));
863 insn = NEXT_INSN (insn);
868 /* A file of NULL indicates the end of run. */
869 gcov_write_unsigned (0);
870 gcov_write_string (NULL);
871 gcov_write_length (offset);
875 ENTRY_BLOCK_PTR->index = ENTRY_BLOCK;
876 EXIT_BLOCK_PTR->index = EXIT_BLOCK;
877 #undef BB_TO_GCOV_INDEX
879 if (flag_profile_values)
881 life_analysis (get_insns (), NULL, PROP_DEATH_NOTES);
882 find_values_to_profile (&n_values, &values);
883 allocate_reg_info (max_reg_num (), FALSE, FALSE);
886 if (flag_branch_probabilities)
887 compute_branch_probabilities ();
889 /* For each edge not on the spanning tree, add counting code as rtl. */
891 && coverage_counter_alloc (GCOV_COUNTER_ARCS, num_instrumented))
893 unsigned n_instrumented = instrument_edges (el);
895 if (n_instrumented != num_instrumented)
898 if (flag_profile_values)
899 instrument_values (n_values, values);
901 /* Commit changes done by instrumentation. */
902 commit_edge_insertions_watch_calls ();
903 allocate_reg_info (max_reg_num (), FALSE, FALSE);
906 if (flag_profile_values)
907 count_or_remove_death_notes (NULL, 1);
908 remove_fake_edges ();
909 free_aux_for_edges ();
910 /* Re-merge split basic blocks and the mess introduced by
911 insert_insn_on_edge. */
912 cleanup_cfg (profile_arc_flag ? CLEANUP_EXPENSIVE : 0);
914 dump_flow_info (rtl_dump_file);
919 /* Union find algorithm implementation for the basic blocks using
926 basic_block group = bb, bb1;
928 while ((basic_block) group->aux != group)
929 group = (basic_block) group->aux;
932 while ((basic_block) bb->aux != group)
934 bb1 = (basic_block) bb->aux;
935 bb->aux = (void *) group;
942 union_groups (bb1, bb2)
943 basic_block bb1, bb2;
945 basic_block bb1g = find_group (bb1);
946 basic_block bb2g = find_group (bb2);
948 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
949 this code is unlikely going to be performance problem anyway. */
956 /* This function searches all of the edges in the program flow graph, and puts
957 as many bad edges as possible onto the spanning tree. Bad edges include
958 abnormals edges, which can't be instrumented at the moment. Since it is
959 possible for fake edges to form a cycle, we will have to develop some
960 better way in the future. Also put critical edges to the tree, since they
961 are more expensive to instrument. */
964 find_spanning_tree (el)
965 struct edge_list *el;
968 int num_edges = NUM_EDGES (el);
971 /* We use aux field for standard union-find algorithm. */
972 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
975 /* Add fake edge exit to entry we can't instrument. */
976 union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
978 /* First add all abnormal edges to the tree unless they form a cycle. Also
979 add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
980 setting return value from function. */
981 for (i = 0; i < num_edges; i++)
983 edge e = INDEX_EDGE (el, i);
984 if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
985 || e->dest == EXIT_BLOCK_PTR)
986 && !EDGE_INFO (e)->ignore
987 && (find_group (e->src) != find_group (e->dest)))
990 fprintf (rtl_dump_file, "Abnormal edge %d to %d put to tree\n",
991 e->src->index, e->dest->index);
992 EDGE_INFO (e)->on_tree = 1;
993 union_groups (e->src, e->dest);
997 /* Now insert all critical edges to the tree unless they form a cycle. */
998 for (i = 0; i < num_edges; i++)
1000 edge e = INDEX_EDGE (el, i);
1001 if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
1002 && find_group (e->src) != find_group (e->dest))
1005 fprintf (rtl_dump_file, "Critical edge %d to %d put to tree\n",
1006 e->src->index, e->dest->index);
1007 EDGE_INFO (e)->on_tree = 1;
1008 union_groups (e->src, e->dest);
1012 /* And now the rest. */
1013 for (i = 0; i < num_edges; i++)
1015 edge e = INDEX_EDGE (el, i);
1016 if (!EDGE_INFO (e)->ignore
1017 && find_group (e->src) != find_group (e->dest))
1020 fprintf (rtl_dump_file, "Normal edge %d to %d put to tree\n",
1021 e->src->index, e->dest->index);
1022 EDGE_INFO (e)->on_tree = 1;
1023 union_groups (e->src, e->dest);
1027 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1031 /* Perform file-level initialization for branch-prob processing. */
1038 total_num_blocks = 0;
1039 total_num_edges = 0;
1040 total_num_edges_ignored = 0;
1041 total_num_edges_instrumented = 0;
1042 total_num_blocks_created = 0;
1043 total_num_passes = 0;
1044 total_num_times_called = 0;
1045 total_num_branches = 0;
1046 total_num_never_executed = 0;
1047 for (i = 0; i < 20; i++)
1048 total_hist_br_prob[i] = 0;
1051 /* Performs file-level cleanup after branch-prob processing
1059 fprintf (rtl_dump_file, "\n");
1060 fprintf (rtl_dump_file, "Total number of blocks: %d\n",
1062 fprintf (rtl_dump_file, "Total number of edges: %d\n", total_num_edges);
1063 fprintf (rtl_dump_file, "Total number of ignored edges: %d\n",
1064 total_num_edges_ignored);
1065 fprintf (rtl_dump_file, "Total number of instrumented edges: %d\n",
1066 total_num_edges_instrumented);
1067 fprintf (rtl_dump_file, "Total number of blocks created: %d\n",
1068 total_num_blocks_created);
1069 fprintf (rtl_dump_file, "Total number of graph solution passes: %d\n",
1071 if (total_num_times_called != 0)
1072 fprintf (rtl_dump_file, "Average number of graph solution passes: %d\n",
1073 (total_num_passes + (total_num_times_called >> 1))
1074 / total_num_times_called);
1075 fprintf (rtl_dump_file, "Total number of branches: %d\n",
1076 total_num_branches);
1077 fprintf (rtl_dump_file, "Total number of branches never executed: %d\n",
1078 total_num_never_executed);
1079 if (total_num_branches)
1083 for (i = 0; i < 10; i++)
1084 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
1085 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1086 / total_num_branches, 5*i, 5*i+5);
1092 /* Output instructions as RTL to increment the edge execution count. */
1095 gen_edge_profiler (edgeno)
1098 rtx ref = coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1100 enum machine_mode mode = GET_MODE (ref);
1104 ref = validize_mem (ref);
1106 tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
1107 ref, 0, OPTAB_WIDEN);
1110 emit_move_insn (copy_rtx (ref), tmp);
1112 sequence = get_insns ();
1117 /* Output instructions as RTL to increment the interval histogram counter.
1118 VALUE is the expression whose value is profiled. TAG is the tag of the
1119 section for counters, BASE is offset of the counter position. */
1122 gen_interval_profiler (struct histogram_value *value,
1123 unsigned tag, unsigned base)
1125 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1126 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1127 rtx mem_ref, tmp, tmp1, mr, val;
1129 rtx more_label = gen_label_rtx ();
1130 rtx less_label = gen_label_rtx ();
1131 rtx end_of_code_label = gen_label_rtx ();
1132 int per_counter = gcov_size / BITS_PER_UNIT;
1137 emit_insn (value->seq);
1139 mr = gen_reg_rtx (Pmode);
1141 tmp = coverage_counter_ref (tag, base);
1142 tmp = force_reg (Pmode, XEXP (tmp, 0));
1144 val = expand_simple_binop (value->mode, MINUS,
1145 copy_rtx (value->value),
1146 GEN_INT (value->hdata.intvl.int_start),
1147 NULL_RTX, 0, OPTAB_WIDEN);
1149 if (value->hdata.intvl.may_be_more)
1150 do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
1151 GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
1152 if (value->hdata.intvl.may_be_less)
1153 do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
1154 NULL_RTX, NULL_RTX, less_label);
1156 /* We are in range. */
1157 tmp1 = expand_simple_binop (value->mode, MULT,
1158 copy_rtx (val), GEN_INT (per_counter),
1159 NULL_RTX, 0, OPTAB_WIDEN);
1160 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
1163 emit_move_insn (copy_rtx (mr), tmp1);
1165 if (value->hdata.intvl.may_be_more
1166 || value->hdata.intvl.may_be_less)
1168 emit_jump_insn (gen_jump (end_of_code_label));
1172 /* Above the interval. */
1173 if (value->hdata.intvl.may_be_more)
1175 emit_label (more_label);
1176 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1177 GEN_INT (per_counter * value->hdata.intvl.steps),
1178 mr, 0, OPTAB_WIDEN);
1180 emit_move_insn (copy_rtx (mr), tmp1);
1181 if (value->hdata.intvl.may_be_less)
1183 emit_jump_insn (gen_jump (end_of_code_label));
1188 /* Below the interval. */
1189 if (value->hdata.intvl.may_be_less)
1191 emit_label (less_label);
1192 tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1193 GEN_INT (per_counter * (value->hdata.intvl.steps
1194 + (value->hdata.intvl.may_be_more ? 1 : 0))),
1195 mr, 0, OPTAB_WIDEN);
1197 emit_move_insn (copy_rtx (mr), tmp1);
1200 if (value->hdata.intvl.may_be_more
1201 || value->hdata.intvl.may_be_less)
1202 emit_label (end_of_code_label);
1204 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1206 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1207 mem_ref, 0, OPTAB_WIDEN);
1210 emit_move_insn (copy_rtx (mem_ref), tmp);
1212 sequence = get_insns ();
1214 rebuild_jump_labels (sequence);
1218 /* Output instructions as RTL to increment the power of two histogram counter.
1219 VALUE is the expression whose value is profiled. TAG is the tag of the
1220 section for counters, BASE is offset of the counter position. */
1223 gen_pow2_profiler (struct histogram_value *value,
1224 unsigned tag, unsigned base)
1226 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1227 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1228 rtx mem_ref, tmp, mr, uval;
1230 rtx end_of_code_label = gen_label_rtx ();
1231 rtx loop_label = gen_label_rtx ();
1232 int per_counter = gcov_size / BITS_PER_UNIT;
1237 emit_insn (value->seq);
1239 mr = gen_reg_rtx (Pmode);
1240 tmp = coverage_counter_ref (tag, base);
1241 tmp = force_reg (Pmode, XEXP (tmp, 0));
1242 emit_move_insn (mr, tmp);
1244 uval = gen_reg_rtx (value->mode);
1245 emit_move_insn (uval, copy_rtx (value->value));
1247 /* Check for non-power of 2. */
1248 if (value->hdata.pow2.may_be_other)
1250 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
1251 NULL_RTX, NULL_RTX, end_of_code_label);
1252 tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
1253 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
1254 tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
1255 NULL_RTX, 0, OPTAB_WIDEN);
1256 do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
1257 NULL_RTX, end_of_code_label);
1260 /* Count log_2(value). */
1261 emit_label (loop_label);
1263 tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
1265 emit_move_insn (copy_rtx (mr), tmp);
1267 tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
1268 uval, 0, OPTAB_WIDEN);
1270 emit_move_insn (copy_rtx (uval), tmp);
1272 do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
1273 NULL_RTX, NULL_RTX, loop_label);
1275 /* Increase the counter. */
1276 emit_label (end_of_code_label);
1278 mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1280 tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1281 mem_ref, 0, OPTAB_WIDEN);
1284 emit_move_insn (copy_rtx (mem_ref), tmp);
1286 sequence = get_insns ();
1288 rebuild_jump_labels (sequence);
1292 /* Output instructions as RTL for code to find the most common value.
1293 VALUE is the expression whose value is profiled. TAG is the tag of the
1294 section for counters, BASE is offset of the counter position. */
1297 gen_one_value_profiler (struct histogram_value *value,
1298 unsigned tag, unsigned base)
1300 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1301 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1302 rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
1305 rtx same_label = gen_label_rtx ();
1306 rtx zero_label = gen_label_rtx ();
1307 rtx end_of_code_label = gen_label_rtx ();
1312 emit_insn (value->seq);
1314 stored_value_ref = coverage_counter_ref (tag, base);
1315 counter_ref = coverage_counter_ref (tag, base + 1);
1316 all_ref = coverage_counter_ref (tag, base + 2);
1317 stored_value = validize_mem (stored_value_ref);
1318 counter = validize_mem (counter_ref);
1319 all = validize_mem (all_ref);
1321 uval = gen_reg_rtx (mode);
1322 convert_move (uval, copy_rtx (value->value), 0);
1324 /* Check if the stored value matches. */
1325 do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
1326 0, mode, NULL_RTX, NULL_RTX, same_label);
1328 /* Does not match; check whether the counter is zero. */
1329 do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
1330 NULL_RTX, NULL_RTX, zero_label);
1332 /* The counter is not zero yet. */
1333 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
1334 counter, 0, OPTAB_WIDEN);
1337 emit_move_insn (copy_rtx (counter), tmp);
1339 emit_jump_insn (gen_jump (end_of_code_label));
1342 emit_label (zero_label);
1343 /* Set new value. */
1344 emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
1346 emit_label (same_label);
1347 /* Increase the counter. */
1348 tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
1349 counter, 0, OPTAB_WIDEN);
1352 emit_move_insn (copy_rtx (counter), tmp);
1354 emit_label (end_of_code_label);
1356 /* Increase the counter of all executions; this seems redundant given
1357 that ve have counts for edges in cfg, but it may happen that some
1358 optimization will change the counts for the block (either because
1359 it is unable to update them correctly, or because it will duplicate
1360 the block or its part). */
1361 tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
1362 all, 0, OPTAB_WIDEN);
1365 emit_move_insn (copy_rtx (all), tmp);
1366 sequence = get_insns ();
1368 rebuild_jump_labels (sequence);
1372 /* Output instructions as RTL for code to find the most common value of
1373 a difference between two evaluations of an expression.
1374 VALUE is the expression whose value is profiled. TAG is the tag of the
1375 section for counters, BASE is offset of the counter position. */
1378 gen_const_delta_profiler (struct histogram_value *value,
1379 unsigned tag, unsigned base)
1381 struct histogram_value one_value_delta;
1382 unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1383 enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1384 rtx stored_value_ref, stored_value, tmp, uval;
1390 emit_insn (value->seq);
1392 stored_value_ref = coverage_counter_ref (tag, base);
1393 stored_value = validize_mem (stored_value_ref);
1395 uval = gen_reg_rtx (mode);
1396 convert_move (uval, copy_rtx (value->value), 0);
1397 tmp = expand_simple_binop (mode, MINUS,
1398 copy_rtx (uval), copy_rtx (stored_value),
1399 NULL_RTX, 0, OPTAB_WIDEN);
1401 one_value_delta.value = tmp;
1402 one_value_delta.mode = mode;
1403 one_value_delta.seq = NULL_RTX;
1404 one_value_delta.insn = value->insn;
1405 one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
1406 emit_insn (gen_one_value_profiler (&one_value_delta, tag, base + 1));
1408 emit_move_insn (copy_rtx (stored_value), uval);
1409 sequence = get_insns ();
1411 rebuild_jump_labels (sequence);