1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
70 #include "coretypes.h"
77 #include "cfglayout.h"
87 /* The number of rounds. In most cases there will only be 4 rounds, but
88 when partitioning hot and cold basic blocks into separate sections of
89 the .o file there will be an extra round.*/
92 /* Stubs in case we don't have a return insn.
93 We have to check at runtime too, not only compiletime. */
97 #define gen_return() NULL_RTX
101 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
102 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
104 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
105 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
107 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
108 block the edge destination is not duplicated while connecting traces. */
109 #define DUPLICATION_THRESHOLD 100
111 /* Length of unconditional jump instruction. */
112 static int uncond_jump_length;
114 /* Structure to hold needed information for each basic block. */
115 typedef struct bbro_basic_block_data_def
117 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
120 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
123 /* Which trace is the bb in? */
126 /* Which heap is BB in (if any)? */
129 /* Which heap node is BB in (if any)? */
131 } bbro_basic_block_data;
133 /* The current size of the following dynamic array. */
134 static int array_size;
136 /* The array which holds needed information for basic blocks. */
137 static bbro_basic_block_data *bbd;
139 /* To avoid frequent reallocation the size of arrays is greater than needed,
140 the number of elements is (not less than) 1.25 * size_wanted. */
141 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
143 /* Free the memory and set the pointer to NULL. */
144 #define FREE(P) (gcc_assert (P), free (P), P = 0)
146 /* Structure for holding information about a trace. */
149 /* First and last basic block of the trace. */
150 basic_block first, last;
152 /* The round of the STC creation which this trace was found in. */
155 /* The length (i.e. the number of basic blocks) of the trace. */
159 /* Maximum frequency and count of one of the entry blocks. */
160 static int max_entry_frequency;
161 static gcov_type max_entry_count;
163 /* Local function prototypes. */
164 static void find_traces (int *, struct trace *);
165 static basic_block rotate_loop (edge, struct trace *, int);
166 static void mark_bb_visited (basic_block, int);
167 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
168 int, fibheap_t *, int);
169 static basic_block copy_bb (basic_block, edge, basic_block, int);
170 static fibheapkey_t bb_to_key (basic_block);
171 static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
172 static void connect_traces (int, struct trace *);
173 static bool copy_bb_p (basic_block, int);
174 static int get_uncond_jump_length (void);
175 static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
176 static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
179 static void add_labels_and_missing_jumps (edge *, int);
180 static void add_reg_crossing_jump_notes (void);
181 static void fix_up_fall_thru_edges (void);
182 static void fix_edges_for_rarely_executed_code (edge *, int);
183 static void fix_crossing_conditional_branches (void);
184 static void fix_crossing_unconditional_branches (void);
186 /* Check to see if bb should be pushed into the next round of trace
187 collections or not. Reasons for pushing the block forward are 1).
188 If the block is cold, we are doing partitioning, and there will be
189 another round (cold partition blocks are not supposed to be
190 collected into traces until the very last round); or 2). There will
191 be another round, and the basic block is not "hot enough" for the
192 current round of trace collection. */
195 push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
196 int exec_th, gcov_type count_th)
198 bool there_exists_another_round;
199 bool block_not_hot_enough;
201 there_exists_another_round = round < number_of_rounds - 1;
203 block_not_hot_enough = (bb->frequency < exec_th
204 || bb->count < count_th
205 || probably_never_executed_bb_p (bb));
207 if (there_exists_another_round
208 && block_not_hot_enough)
214 /* Find the traces for Software Trace Cache. Chain each trace through
215 RBI()->next. Store the number of traces to N_TRACES and description of
219 find_traces (int *n_traces, struct trace *traces)
222 int number_of_rounds;
227 /* Add one extra round of trace collection when partitioning hot/cold
228 basic blocks into separate sections. The last round is for all the
229 cold blocks (and ONLY the cold blocks). */
231 number_of_rounds = N_ROUNDS - 1;
233 /* Insert entry points of function into heap. */
234 heap = fibheap_new ();
235 max_entry_frequency = 0;
237 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
239 bbd[e->dest->index].heap = heap;
240 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
242 if (e->dest->frequency > max_entry_frequency)
243 max_entry_frequency = e->dest->frequency;
244 if (e->dest->count > max_entry_count)
245 max_entry_count = e->dest->count;
248 /* Find the traces. */
249 for (i = 0; i < number_of_rounds; i++)
251 gcov_type count_threshold;
254 fprintf (dump_file, "STC - round %d\n", i + 1);
256 if (max_entry_count < INT_MAX / 1000)
257 count_threshold = max_entry_count * exec_threshold[i] / 1000;
259 count_threshold = max_entry_count / 1000 * exec_threshold[i];
261 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
262 max_entry_frequency * exec_threshold[i] / 1000,
263 count_threshold, traces, n_traces, i, &heap,
266 fibheap_delete (heap);
270 for (i = 0; i < *n_traces; i++)
273 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
274 traces[i].round + 1);
275 for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
276 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
277 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
283 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
284 (with sequential number TRACE_N). */
287 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
291 /* Information about the best end (end after rotation) of the loop. */
292 basic_block best_bb = NULL;
293 edge best_edge = NULL;
295 gcov_type best_count = -1;
296 /* The best edge is preferred when its destination is not visited yet
297 or is a start block of some trace. */
298 bool is_preferred = false;
300 /* Find the most frequent edge that goes out from current trace. */
301 bb = back_edge->dest;
307 FOR_EACH_EDGE (e, ei, bb->succs)
308 if (e->dest != EXIT_BLOCK_PTR
309 && e->dest->il.rtl->visited != trace_n
310 && (e->flags & EDGE_CAN_FALLTHRU)
311 && !(e->flags & EDGE_COMPLEX))
315 /* The best edge is preferred. */
316 if (!e->dest->il.rtl->visited
317 || bbd[e->dest->index].start_of_trace >= 0)
319 /* The current edge E is also preferred. */
320 int freq = EDGE_FREQUENCY (e);
321 if (freq > best_freq || e->count > best_count)
324 best_count = e->count;
332 if (!e->dest->il.rtl->visited
333 || bbd[e->dest->index].start_of_trace >= 0)
335 /* The current edge E is preferred. */
337 best_freq = EDGE_FREQUENCY (e);
338 best_count = e->count;
344 int freq = EDGE_FREQUENCY (e);
345 if (!best_edge || freq > best_freq || e->count > best_count)
348 best_count = e->count;
357 while (bb != back_edge->dest);
361 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
363 if (back_edge->dest == trace->first)
365 trace->first = best_bb->aux;
371 for (prev_bb = trace->first;
372 prev_bb->aux != back_edge->dest;
373 prev_bb = prev_bb->aux)
375 prev_bb->aux = best_bb->aux;
377 /* Try to get rid of uncond jump to cond jump. */
378 if (single_succ_p (prev_bb))
380 basic_block header = single_succ (prev_bb);
382 /* Duplicate HEADER if it is a small block containing cond jump
384 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
385 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
387 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
393 /* We have not found suitable loop tail so do no rotation. */
394 best_bb = back_edge->src;
400 /* This function marks BB that it was visited in trace number TRACE. */
403 mark_bb_visited (basic_block bb, int trace)
405 bb->il.rtl->visited = trace;
406 if (bbd[bb->index].heap)
408 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
409 bbd[bb->index].heap = NULL;
410 bbd[bb->index].node = NULL;
414 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
415 not include basic blocks their probability is lower than BRANCH_TH or their
416 frequency is lower than EXEC_TH into traces (or count is lower than
417 COUNT_TH). It stores the new traces into TRACES and modifies the number of
418 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
419 expects that starting basic blocks are in *HEAP and at the end it deletes
420 *HEAP and stores starting points for the next round into new *HEAP. */
423 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
424 struct trace *traces, int *n_traces, int round,
425 fibheap_t *heap, int number_of_rounds)
427 /* Heap for discarded basic blocks which are possible starting points for
429 fibheap_t new_heap = fibheap_new ();
431 while (!fibheap_empty (*heap))
439 bb = fibheap_extract_min (*heap);
440 bbd[bb->index].heap = NULL;
441 bbd[bb->index].node = NULL;
444 fprintf (dump_file, "Getting bb %d\n", bb->index);
446 /* If the BB's frequency is too low send BB to the next round. When
447 partitioning hot/cold blocks into separate sections, make sure all
448 the cold blocks (and ONLY the cold blocks) go into the (extra) final
451 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
454 int key = bb_to_key (bb);
455 bbd[bb->index].heap = new_heap;
456 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
460 " Possible start point of next round: %d (key: %d)\n",
465 trace = traces + *n_traces;
467 trace->round = round;
469 bbd[bb->index].in_trace = *n_traces;
477 /* The probability and frequency of the best edge. */
478 int best_prob = INT_MIN / 2;
479 int best_freq = INT_MIN / 2;
482 mark_bb_visited (bb, *n_traces);
486 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
487 bb->index, *n_traces - 1);
489 ends_in_call = block_ends_with_call_p (bb);
491 /* Select the successor that will be placed after BB. */
492 FOR_EACH_EDGE (e, ei, bb->succs)
494 gcc_assert (!(e->flags & EDGE_FAKE));
496 if (e->dest == EXIT_BLOCK_PTR)
499 if (e->dest->il.rtl->visited
500 && e->dest->il.rtl->visited != *n_traces)
503 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
506 prob = e->probability;
507 freq = e->dest->frequency;
509 /* The only sensible preference for a call instruction is the
510 fallthru edge. Don't bother selecting anything else. */
513 if (e->flags & EDGE_CAN_FALLTHRU)
522 /* Edge that cannot be fallthru or improbable or infrequent
523 successor (i.e. it is unsuitable successor). */
524 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
525 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
526 || e->count < count_th)
529 /* If partitioning hot/cold basic blocks, don't consider edges
530 that cross section boundaries. */
532 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
541 /* If the best destination has multiple predecessors, and can be
542 duplicated cheaper than a jump, don't allow it to be added
543 to a trace. We'll duplicate it when connecting traces. */
544 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
545 && copy_bb_p (best_edge->dest, 0))
548 /* Add all non-selected successors to the heaps. */
549 FOR_EACH_EDGE (e, ei, bb->succs)
552 || e->dest == EXIT_BLOCK_PTR
553 || e->dest->il.rtl->visited)
556 key = bb_to_key (e->dest);
558 if (bbd[e->dest->index].heap)
560 /* E->DEST is already in some heap. */
561 if (key != bbd[e->dest->index].node->key)
566 "Changing key for bb %d from %ld to %ld.\n",
568 (long) bbd[e->dest->index].node->key,
571 fibheap_replace_key (bbd[e->dest->index].heap,
572 bbd[e->dest->index].node, key);
577 fibheap_t which_heap = *heap;
579 prob = e->probability;
580 freq = EDGE_FREQUENCY (e);
582 if (!(e->flags & EDGE_CAN_FALLTHRU)
583 || (e->flags & EDGE_COMPLEX)
584 || prob < branch_th || freq < exec_th
585 || e->count < count_th)
587 /* When partitioning hot/cold basic blocks, make sure
588 the cold blocks (and only the cold blocks) all get
589 pushed to the last round of trace collection. */
591 if (push_to_next_round_p (e->dest, round,
594 which_heap = new_heap;
597 bbd[e->dest->index].heap = which_heap;
598 bbd[e->dest->index].node = fibheap_insert (which_heap,
604 " Possible start of %s round: %d (key: %ld)\n",
605 (which_heap == new_heap) ? "next" : "this",
606 e->dest->index, (long) key);
612 if (best_edge) /* Suitable successor was found. */
614 if (best_edge->dest->il.rtl->visited == *n_traces)
616 /* We do nothing with one basic block loops. */
617 if (best_edge->dest != bb)
619 if (EDGE_FREQUENCY (best_edge)
620 > 4 * best_edge->dest->frequency / 5)
622 /* The loop has at least 4 iterations. If the loop
623 header is not the first block of the function
624 we can rotate the loop. */
626 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
631 "Rotating loop %d - %d\n",
632 best_edge->dest->index, bb->index);
634 bb->aux = best_edge->dest;
635 bbd[best_edge->dest->index].in_trace =
637 bb = rotate_loop (best_edge, trace, *n_traces);
642 /* The loop has less than 4 iterations. */
644 if (single_succ_p (bb)
645 && copy_bb_p (best_edge->dest, !optimize_size))
647 bb = copy_bb (best_edge->dest, best_edge, bb,
654 /* Terminate the trace. */
659 /* Check for a situation
668 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
669 >= EDGE_FREQUENCY (AC).
670 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
671 Best ordering is then A B C.
673 This situation is created for example by:
680 FOR_EACH_EDGE (e, ei, bb->succs)
682 && (e->flags & EDGE_CAN_FALLTHRU)
683 && !(e->flags & EDGE_COMPLEX)
684 && !e->dest->il.rtl->visited
685 && single_pred_p (e->dest)
686 && !(e->flags & EDGE_CROSSING)
687 && single_succ_p (e->dest)
688 && (single_succ_edge (e->dest)->flags
690 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
691 && single_succ (e->dest) == best_edge->dest
692 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
696 fprintf (dump_file, "Selecting BB %d\n",
697 best_edge->dest->index);
701 bb->aux = best_edge->dest;
702 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
703 bb = best_edge->dest;
709 bbd[trace->first->index].start_of_trace = *n_traces - 1;
710 bbd[trace->last->index].end_of_trace = *n_traces - 1;
712 /* The trace is terminated so we have to recount the keys in heap
713 (some block can have a lower key because now one of its predecessors
714 is an end of the trace). */
715 FOR_EACH_EDGE (e, ei, bb->succs)
717 if (e->dest == EXIT_BLOCK_PTR
718 || e->dest->il.rtl->visited)
721 if (bbd[e->dest->index].heap)
723 key = bb_to_key (e->dest);
724 if (key != bbd[e->dest->index].node->key)
729 "Changing key for bb %d from %ld to %ld.\n",
731 (long) bbd[e->dest->index].node->key, key);
733 fibheap_replace_key (bbd[e->dest->index].heap,
734 bbd[e->dest->index].node,
741 fibheap_delete (*heap);
743 /* "Return" the new heap. */
747 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
748 it to trace after BB, mark OLD_BB visited and update pass' data structures
749 (TRACE is a number of trace which OLD_BB is duplicated to). */
752 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
756 new_bb = duplicate_block (old_bb, e);
757 BB_COPY_PARTITION (new_bb, old_bb);
759 gcc_assert (e->dest == new_bb);
760 gcc_assert (!e->dest->il.rtl->visited);
764 "Duplicated bb %d (created bb %d)\n",
765 old_bb->index, new_bb->index);
766 new_bb->il.rtl->visited = trace;
767 new_bb->aux = bb->aux;
770 if (new_bb->index >= array_size || last_basic_block > array_size)
775 new_size = MAX (last_basic_block, new_bb->index + 1);
776 new_size = GET_ARRAY_SIZE (new_size);
777 bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
778 for (i = array_size; i < new_size; i++)
780 bbd[i].start_of_trace = -1;
781 bbd[i].in_trace = -1;
782 bbd[i].end_of_trace = -1;
786 array_size = new_size;
791 "Growing the dynamic array to %d elements.\n",
796 bbd[new_bb->index].in_trace = trace;
801 /* Compute and return the key (for the heap) of the basic block BB. */
804 bb_to_key (basic_block bb)
810 /* Do not start in probably never executed blocks. */
812 if (BB_PARTITION (bb) == BB_COLD_PARTITION
813 || probably_never_executed_bb_p (bb))
816 /* Prefer blocks whose predecessor is an end of some trace
817 or whose predecessor edge is EDGE_DFS_BACK. */
818 FOR_EACH_EDGE (e, ei, bb->preds)
820 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
821 || (e->flags & EDGE_DFS_BACK))
823 int edge_freq = EDGE_FREQUENCY (e);
825 if (edge_freq > priority)
826 priority = edge_freq;
831 /* The block with priority should have significantly lower key. */
832 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
833 return -bb->frequency;
836 /* Return true when the edge E from basic block BB is better than the temporary
837 best edge (details are in function). The probability of edge E is PROB. The
838 frequency of the successor is FREQ. The current best probability is
839 BEST_PROB, the best frequency is BEST_FREQ.
840 The edge is considered to be equivalent when PROB does not differ much from
841 BEST_PROB; similarly for frequency. */
844 better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
845 int best_freq, edge cur_best_edge)
849 /* The BEST_* values do not have to be best, but can be a bit smaller than
851 int diff_prob = best_prob / 10;
852 int diff_freq = best_freq / 10;
854 if (prob > best_prob + diff_prob)
855 /* The edge has higher probability than the temporary best edge. */
856 is_better_edge = true;
857 else if (prob < best_prob - diff_prob)
858 /* The edge has lower probability than the temporary best edge. */
859 is_better_edge = false;
860 else if (freq < best_freq - diff_freq)
861 /* The edge and the temporary best edge have almost equivalent
862 probabilities. The higher frequency of a successor now means
863 that there is another edge going into that successor.
864 This successor has lower frequency so it is better. */
865 is_better_edge = true;
866 else if (freq > best_freq + diff_freq)
867 /* This successor has higher frequency so it is worse. */
868 is_better_edge = false;
869 else if (e->dest->prev_bb == bb)
870 /* The edges have equivalent probabilities and the successors
871 have equivalent frequencies. Select the previous successor. */
872 is_better_edge = true;
874 is_better_edge = false;
876 /* If we are doing hot/cold partitioning, make sure that we always favor
877 non-crossing edges over crossing edges. */
880 && flag_reorder_blocks_and_partition
882 && (cur_best_edge->flags & EDGE_CROSSING)
883 && !(e->flags & EDGE_CROSSING))
884 is_better_edge = true;
886 return is_better_edge;
889 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
892 connect_traces (int n_traces, struct trace *traces)
899 int current_partition;
901 gcov_type count_threshold;
903 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
904 if (max_entry_count < INT_MAX / 1000)
905 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
907 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
909 connected = xcalloc (n_traces, sizeof (bool));
912 current_partition = BB_PARTITION (traces[0].first);
915 if (flag_reorder_blocks_and_partition)
916 for (i = 0; i < n_traces && !two_passes; i++)
917 if (BB_PARTITION (traces[0].first)
918 != BB_PARTITION (traces[i].first))
921 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
930 gcc_assert (two_passes && current_pass == 1);
934 if (current_partition == BB_HOT_PARTITION)
935 current_partition = BB_COLD_PARTITION;
937 current_partition = BB_HOT_PARTITION;
944 && BB_PARTITION (traces[t].first) != current_partition)
949 /* Find the predecessor traces. */
950 for (t2 = t; t2 > 0;)
955 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
957 int si = e->src->index;
959 if (e->src != ENTRY_BLOCK_PTR
960 && (e->flags & EDGE_CAN_FALLTHRU)
961 && !(e->flags & EDGE_COMPLEX)
962 && bbd[si].end_of_trace >= 0
963 && !connected[bbd[si].end_of_trace]
964 && (BB_PARTITION (e->src) == current_partition)
966 || e->probability > best->probability
967 || (e->probability == best->probability
968 && traces[bbd[si].end_of_trace].length > best_len)))
971 best_len = traces[bbd[si].end_of_trace].length;
976 best->src->aux = best->dest;
977 t2 = bbd[best->src->index].end_of_trace;
978 connected[t2] = true;
982 fprintf (dump_file, "Connection: %d %d\n",
983 best->src->index, best->dest->index);
991 traces[last_trace].last->aux = traces[t2].first;
994 /* Find the successor traces. */
997 /* Find the continuation of the chain. */
1001 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1003 int di = e->dest->index;
1005 if (e->dest != EXIT_BLOCK_PTR
1006 && (e->flags & EDGE_CAN_FALLTHRU)
1007 && !(e->flags & EDGE_COMPLEX)
1008 && bbd[di].start_of_trace >= 0
1009 && !connected[bbd[di].start_of_trace]
1010 && (BB_PARTITION (e->dest) == current_partition)
1012 || e->probability > best->probability
1013 || (e->probability == best->probability
1014 && traces[bbd[di].start_of_trace].length > best_len)))
1017 best_len = traces[bbd[di].start_of_trace].length;
1025 fprintf (dump_file, "Connection: %d %d\n",
1026 best->src->index, best->dest->index);
1028 t = bbd[best->dest->index].start_of_trace;
1029 traces[last_trace].last->aux = traces[t].first;
1030 connected[t] = true;
1035 /* Try to connect the traces by duplication of 1 block. */
1037 basic_block next_bb = NULL;
1038 bool try_copy = false;
1040 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1041 if (e->dest != EXIT_BLOCK_PTR
1042 && (e->flags & EDGE_CAN_FALLTHRU)
1043 && !(e->flags & EDGE_COMPLEX)
1044 && (!best || e->probability > best->probability))
1050 /* If the destination is a start of a trace which is only
1051 one block long, then no need to search the successor
1052 blocks of the trace. Accept it. */
1053 if (bbd[e->dest->index].start_of_trace >= 0
1054 && traces[bbd[e->dest->index].start_of_trace].length
1062 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1064 int di = e2->dest->index;
1066 if (e2->dest == EXIT_BLOCK_PTR
1067 || ((e2->flags & EDGE_CAN_FALLTHRU)
1068 && !(e2->flags & EDGE_COMPLEX)
1069 && bbd[di].start_of_trace >= 0
1070 && !connected[bbd[di].start_of_trace]
1071 && (BB_PARTITION (e2->dest) == current_partition)
1072 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1073 && (e2->count >= count_threshold)
1075 || e2->probability > best2->probability
1076 || (e2->probability == best2->probability
1077 && traces[bbd[di].start_of_trace].length
1082 if (e2->dest != EXIT_BLOCK_PTR)
1083 best2_len = traces[bbd[di].start_of_trace].length;
1085 best2_len = INT_MAX;
1092 if (flag_reorder_blocks_and_partition)
1095 /* Copy tiny blocks always; copy larger blocks only when the
1096 edge is traversed frequently enough. */
1098 && copy_bb_p (best->dest,
1100 && EDGE_FREQUENCY (best) >= freq_threshold
1101 && best->count >= count_threshold))
1107 fprintf (dump_file, "Connection: %d %d ",
1108 traces[t].last->index, best->dest->index);
1110 fputc ('\n', dump_file);
1111 else if (next_bb == EXIT_BLOCK_PTR)
1112 fprintf (dump_file, "exit\n");
1114 fprintf (dump_file, "%d\n", next_bb->index);
1117 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1118 traces[t].last = new_bb;
1119 if (next_bb && next_bb != EXIT_BLOCK_PTR)
1121 t = bbd[next_bb->index].start_of_trace;
1122 traces[last_trace].last->aux = traces[t].first;
1123 connected[t] = true;
1127 break; /* Stop finding the successor traces. */
1130 break; /* Stop finding the successor traces. */
1139 fprintf (dump_file, "Final order:\n");
1140 for (bb = traces[0].first; bb; bb = bb->aux)
1141 fprintf (dump_file, "%d ", bb->index);
1142 fprintf (dump_file, "\n");
1149 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1150 when code size is allowed to grow by duplication. */
1153 copy_bb_p (basic_block bb, int code_may_grow)
1156 int max_size = uncond_jump_length;
1161 if (EDGE_COUNT (bb->preds) < 2)
1163 if (!can_duplicate_block_p (bb))
1166 /* Avoid duplicating blocks which have many successors (PR/13430). */
1167 if (EDGE_COUNT (bb->succs) > 8)
1170 if (code_may_grow && maybe_hot_bb_p (bb))
1173 FOR_BB_INSNS (bb, insn)
1176 size += get_attr_length (insn);
1179 if (size <= max_size)
1185 "Block %d can't be copied because its size = %d.\n",
1192 /* Return the length of unconditional jump instruction. */
1195 get_uncond_jump_length (void)
1200 label = emit_label_before (gen_label_rtx (), get_insns ());
1201 jump = emit_jump_insn (gen_jump (label));
1203 length = get_attr_length (jump);
1206 delete_insn (label);
1210 /* Find the basic blocks that are rarely executed and need to be moved to
1211 a separate section of the .o file (to cut down on paging and improve
1215 find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1216 int *n_crossing_edges,
1220 bool has_hot_blocks = false;
1225 /* Mark which partition (hot/cold) each basic block belongs in. */
1229 if (probably_never_executed_bb_p (bb))
1230 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1233 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1234 has_hot_blocks = true;
1238 /* Mark every edge that crosses between sections. */
1242 FOR_EACH_EDGE (e, ei, bb->succs)
1244 if (e->src != ENTRY_BLOCK_PTR
1245 && e->dest != EXIT_BLOCK_PTR
1246 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1248 e->flags |= EDGE_CROSSING;
1252 crossing_edges = xrealloc (crossing_edges,
1253 (*max_idx) * sizeof (edge));
1255 crossing_edges[i++] = e;
1258 e->flags &= ~EDGE_CROSSING;
1260 *n_crossing_edges = i;
1263 /* If any destination of a crossing edge does not have a label, add label;
1264 Convert any fall-through crossing edges (for blocks that do not contain
1265 a jump) to unconditional jumps. */
1268 add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1277 for (i=0; i < n_crossing_edges; i++)
1279 if (crossing_edges[i])
1281 src = crossing_edges[i]->src;
1282 dest = crossing_edges[i]->dest;
1284 /* Make sure dest has a label. */
1286 if (dest && (dest != EXIT_BLOCK_PTR))
1288 label = block_label (dest);
1290 /* Make sure source block ends with a jump. */
1292 if (src && (src != ENTRY_BLOCK_PTR))
1294 if (!JUMP_P (BB_END (src)))
1295 /* bb just falls through. */
1297 /* make sure there's only one successor */
1298 gcc_assert (single_succ_p (src));
1300 /* Find label in dest block. */
1301 label = block_label (dest);
1303 new_jump = emit_jump_insn_after (gen_jump (label),
1305 barrier = emit_barrier_after (new_jump);
1306 JUMP_LABEL (new_jump) = label;
1307 LABEL_NUSES (label) += 1;
1308 src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1309 /* Mark edge as non-fallthru. */
1310 crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1311 } /* end: 'if (GET_CODE ... ' */
1312 } /* end: 'if (src && src->index...' */
1313 } /* end: 'if (dest && dest->index...' */
1314 } /* end: 'if (crossing_edges[i]...' */
1315 } /* end for loop */
1318 /* Find any bb's where the fall-through edge is a crossing edge (note that
1319 these bb's must also contain a conditional jump; we've already
1320 dealt with fall-through edges for blocks that didn't have a
1321 conditional jump in the call to add_labels_and_missing_jumps).
1322 Convert the fall-through edge to non-crossing edge by inserting a
1323 new bb to fall-through into. The new bb will contain an
1324 unconditional jump (crossing edge) to the original fall through
1328 fix_up_fall_thru_edges (void)
1335 edge cond_jump = NULL;
1337 bool cond_jump_crosses;
1340 rtx fall_thru_label;
1343 FOR_EACH_BB (cur_bb)
1346 if (EDGE_COUNT (cur_bb->succs) > 0)
1347 succ1 = EDGE_SUCC (cur_bb, 0);
1351 if (EDGE_COUNT (cur_bb->succs) > 1)
1352 succ2 = EDGE_SUCC (cur_bb, 1);
1356 /* Find the fall-through edge. */
1359 && (succ1->flags & EDGE_FALLTHRU))
1365 && (succ2->flags & EDGE_FALLTHRU))
1371 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1373 /* Check to see if the fall-thru edge is a crossing edge. */
1375 if (fall_thru->flags & EDGE_CROSSING)
1377 /* The fall_thru edge crosses; now check the cond jump edge, if
1380 cond_jump_crosses = true;
1382 old_jump = BB_END (cur_bb);
1384 /* Find the jump instruction, if there is one. */
1388 if (!(cond_jump->flags & EDGE_CROSSING))
1389 cond_jump_crosses = false;
1391 /* We know the fall-thru edge crosses; if the cond
1392 jump edge does NOT cross, and its destination is the
1393 next block in the bb order, invert the jump
1394 (i.e. fix it so the fall thru does not cross and
1395 the cond jump does). */
1397 if (!cond_jump_crosses
1398 && cur_bb->aux == cond_jump->dest)
1400 /* Find label in fall_thru block. We've already added
1401 any missing labels, so there must be one. */
1403 fall_thru_label = block_label (fall_thru->dest);
1405 if (old_jump && fall_thru_label)
1406 invert_worked = invert_jump (old_jump,
1410 fall_thru->flags &= ~EDGE_FALLTHRU;
1411 cond_jump->flags |= EDGE_FALLTHRU;
1412 update_br_prob_note (cur_bb);
1414 fall_thru = cond_jump;
1416 cond_jump->flags |= EDGE_CROSSING;
1417 fall_thru->flags &= ~EDGE_CROSSING;
1422 if (cond_jump_crosses || !invert_worked)
1424 /* This is the case where both edges out of the basic
1425 block are crossing edges. Here we will fix up the
1426 fall through edge. The jump edge will be taken care
1429 new_bb = force_nonfallthru (fall_thru);
1433 new_bb->aux = cur_bb->aux;
1434 cur_bb->aux = new_bb;
1436 /* Make sure new fall-through bb is in same
1437 partition as bb it's falling through from. */
1439 BB_COPY_PARTITION (new_bb, cur_bb);
1440 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1443 /* Add barrier after new jump */
1447 barrier = emit_barrier_after (BB_END (new_bb));
1448 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1453 barrier = emit_barrier_after (BB_END (cur_bb));
1454 cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1463 /* This function checks the destination blockof a "crossing jump" to
1464 see if it has any crossing predecessors that begin with a code label
1465 and end with an unconditional jump. If so, it returns that predecessor
1466 block. (This is to avoid creating lots of new basic blocks that all
1467 contain unconditional jumps to the same destination). */
1470 find_jump_block (basic_block jump_dest)
1472 basic_block source_bb = NULL;
1477 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1478 if (e->flags & EDGE_CROSSING)
1480 basic_block src = e->src;
1482 /* Check each predecessor to see if it has a label, and contains
1483 only one executable instruction, which is an unconditional jump.
1484 If so, we can use it. */
1486 if (LABEL_P (BB_HEAD (src)))
1487 for (insn = BB_HEAD (src);
1488 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1489 insn = NEXT_INSN (insn))
1492 && insn == BB_END (src)
1494 && !any_condjump_p (insn))
1508 /* Find all BB's with conditional jumps that are crossing edges;
1509 insert a new bb and make the conditional jump branch to the new
1510 bb instead (make the new bb same color so conditional branch won't
1511 be a 'crossing' edge). Insert an unconditional jump from the
1512 new bb to the original destination of the conditional jump. */
1515 fix_crossing_conditional_branches (void)
1519 basic_block last_bb;
1521 basic_block prev_bb;
1528 rtx old_label = NULL_RTX;
1533 last_bb = EXIT_BLOCK_PTR->prev_bb;
1535 FOR_EACH_BB (cur_bb)
1537 crossing_edge = NULL;
1538 if (EDGE_COUNT (cur_bb->succs) > 0)
1539 succ1 = EDGE_SUCC (cur_bb, 0);
1543 if (EDGE_COUNT (cur_bb->succs) > 1)
1544 succ2 = EDGE_SUCC (cur_bb, 1);
1548 /* We already took care of fall-through edges, so only one successor
1549 can be a crossing edge. */
1551 if (succ1 && (succ1->flags & EDGE_CROSSING))
1552 crossing_edge = succ1;
1553 else if (succ2 && (succ2->flags & EDGE_CROSSING))
1554 crossing_edge = succ2;
1558 old_jump = BB_END (cur_bb);
1560 /* Check to make sure the jump instruction is a
1561 conditional jump. */
1565 if (any_condjump_p (old_jump))
1567 if (GET_CODE (PATTERN (old_jump)) == SET)
1568 set_src = SET_SRC (PATTERN (old_jump));
1569 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1571 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1572 if (GET_CODE (set_src) == SET)
1573 set_src = SET_SRC (set_src);
1579 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1581 if (GET_CODE (XEXP (set_src, 1)) == PC)
1582 old_label = XEXP (set_src, 2);
1583 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1584 old_label = XEXP (set_src, 1);
1586 /* Check to see if new bb for jumping to that dest has
1587 already been created; if so, use it; if not, create
1590 new_bb = find_jump_block (crossing_edge->dest);
1593 new_label = block_label (new_bb);
1596 /* Create new basic block to be dest for
1597 conditional jump. */
1599 new_bb = create_basic_block (NULL, NULL, last_bb);
1600 new_bb->aux = last_bb->aux;
1601 last_bb->aux = new_bb;
1605 /* Update register liveness information. */
1607 new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (®_obstack);
1608 new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (®_obstack);
1609 COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1610 prev_bb->il.rtl->global_live_at_end);
1611 COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1612 prev_bb->il.rtl->global_live_at_end);
1614 /* Put appropriate instructions in new bb. */
1616 new_label = gen_label_rtx ();
1617 emit_label_before (new_label, BB_HEAD (new_bb));
1618 BB_HEAD (new_bb) = new_label;
1620 if (GET_CODE (old_label) == LABEL_REF)
1622 old_label = JUMP_LABEL (old_jump);
1623 new_jump = emit_jump_insn_after (gen_jump
1629 gcc_assert (HAVE_return
1630 && GET_CODE (old_label) == RETURN);
1631 new_jump = emit_jump_insn_after (gen_return (),
1635 barrier = emit_barrier_after (new_jump);
1636 JUMP_LABEL (new_jump) = old_label;
1637 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1640 /* Make sure new bb is in same partition as source
1641 of conditional branch. */
1642 BB_COPY_PARTITION (new_bb, cur_bb);
1645 /* Make old jump branch to new bb. */
1647 redirect_jump (old_jump, new_label, 0);
1649 /* Remove crossing_edge as predecessor of 'dest'. */
1651 dest = crossing_edge->dest;
1653 redirect_edge_succ (crossing_edge, new_bb);
1655 /* Make a new edge from new_bb to old dest; new edge
1656 will be a successor for new_bb and a predecessor
1659 if (EDGE_COUNT (new_bb->succs) == 0)
1660 new_edge = make_edge (new_bb, dest, 0);
1662 new_edge = EDGE_SUCC (new_bb, 0);
1664 crossing_edge->flags &= ~EDGE_CROSSING;
1665 new_edge->flags |= EDGE_CROSSING;
1671 /* Find any unconditional branches that cross between hot and cold
1672 sections. Convert them into indirect jumps instead. */
1675 fix_crossing_unconditional_branches (void)
1681 rtx indirect_jump_sequence;
1682 rtx jump_insn = NULL_RTX;
1687 FOR_EACH_BB (cur_bb)
1689 last_insn = BB_END (cur_bb);
1691 if (EDGE_COUNT (cur_bb->succs) < 1)
1694 succ = EDGE_SUCC (cur_bb, 0);
1696 /* Check to see if bb ends in a crossing (unconditional) jump. At
1697 this point, no crossing jumps should be conditional. */
1699 if (JUMP_P (last_insn)
1700 && (succ->flags & EDGE_CROSSING))
1704 gcc_assert (!any_condjump_p (last_insn));
1706 /* Make sure the jump is not already an indirect or table jump. */
1708 if (!computed_jump_p (last_insn)
1709 && !tablejump_p (last_insn, &label2, &table))
1711 /* We have found a "crossing" unconditional branch. Now
1712 we must convert it to an indirect jump. First create
1713 reference of label, as target for jump. */
1715 label = JUMP_LABEL (last_insn);
1716 label_addr = gen_rtx_LABEL_REF (Pmode, label);
1717 LABEL_NUSES (label) += 1;
1719 /* Get a register to use for the indirect jump. */
1721 new_reg = gen_reg_rtx (Pmode);
1723 /* Generate indirect the jump sequence. */
1726 emit_move_insn (new_reg, label_addr);
1727 emit_indirect_jump (new_reg);
1728 indirect_jump_sequence = get_insns ();
1731 /* Make sure every instruction in the new jump sequence has
1732 its basic block set to be cur_bb. */
1734 for (cur_insn = indirect_jump_sequence; cur_insn;
1735 cur_insn = NEXT_INSN (cur_insn))
1737 BLOCK_FOR_INSN (cur_insn) = cur_bb;
1738 if (JUMP_P (cur_insn))
1739 jump_insn = cur_insn;
1742 /* Insert the new (indirect) jump sequence immediately before
1743 the unconditional jump, then delete the unconditional jump. */
1745 emit_insn_before (indirect_jump_sequence, last_insn);
1746 delete_insn (last_insn);
1748 /* Make BB_END for cur_bb be the jump instruction (NOT the
1749 barrier instruction at the end of the sequence...). */
1751 BB_END (cur_bb) = jump_insn;
1757 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1760 add_reg_crossing_jump_notes (void)
1767 FOR_EACH_EDGE (e, ei, bb->succs)
1768 if ((e->flags & EDGE_CROSSING)
1769 && JUMP_P (BB_END (e->src)))
1770 REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1776 /* Hot and cold basic blocks are partitioned and put in separate
1777 sections of the .o file, to reduce paging and improve cache
1778 performance (hopefully). This can result in bits of code from the
1779 same function being widely separated in the .o file. However this
1780 is not obvious to the current bb structure. Therefore we must take
1781 care to ensure that: 1). There are no fall_thru edges that cross
1782 between sections; 2). For those architectures which have "short"
1783 conditional branches, all conditional branches that attempt to
1784 cross between sections are converted to unconditional branches;
1785 and, 3). For those architectures which have "short" unconditional
1786 branches, all unconditional branches that attempt to cross between
1787 sections are converted to indirect jumps.
1789 The code for fixing up fall_thru edges that cross between hot and
1790 cold basic blocks does so by creating new basic blocks containing
1791 unconditional branches to the appropriate label in the "other"
1792 section. The new basic block is then put in the same (hot or cold)
1793 section as the original conditional branch, and the fall_thru edge
1794 is modified to fall into the new basic block instead. By adding
1795 this level of indirection we end up with only unconditional branches
1796 crossing between hot and cold sections.
1798 Conditional branches are dealt with by adding a level of indirection.
1799 A new basic block is added in the same (hot/cold) section as the
1800 conditional branch, and the conditional branch is retargeted to the
1801 new basic block. The new basic block contains an unconditional branch
1802 to the original target of the conditional branch (in the other section).
1804 Unconditional branches are dealt with by converting them into
1808 fix_edges_for_rarely_executed_code (edge *crossing_edges,
1809 int n_crossing_edges)
1811 /* Make sure the source of any crossing edge ends in a jump and the
1812 destination of any crossing edge has a label. */
1814 add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1816 /* Convert all crossing fall_thru edges to non-crossing fall
1817 thrus to unconditional jumps (that jump to the original fall
1820 fix_up_fall_thru_edges ();
1822 /* If the architecture does not have conditional branches that can
1823 span all of memory, convert crossing conditional branches into
1824 crossing unconditional branches. */
1826 if (!HAS_LONG_COND_BRANCH)
1827 fix_crossing_conditional_branches ();
1829 /* If the architecture does not have unconditional branches that
1830 can span all of memory, convert crossing unconditional branches
1831 into indirect jumps. Since adding an indirect jump also adds
1832 a new register usage, update the register usage information as
1835 if (!HAS_LONG_UNCOND_BRANCH)
1837 fix_crossing_unconditional_branches ();
1838 reg_scan (get_insns(), max_reg_num ());
1841 add_reg_crossing_jump_notes ();
1844 /* Verify, in the basic block chain, that there is at most one switch
1845 between hot/cold partitions. This is modelled on
1846 rtl_verify_flow_info_1, but it cannot go inside that function
1847 because this condition will not be true until after
1848 reorder_basic_blocks is called. */
1851 verify_hot_cold_block_grouping (void)
1855 bool switched_sections = false;
1856 int current_partition = 0;
1860 if (!current_partition)
1861 current_partition = BB_PARTITION (bb);
1862 if (BB_PARTITION (bb) != current_partition)
1864 if (switched_sections)
1866 error ("multiple hot/cold transitions found (bb %i)",
1872 switched_sections = true;
1873 current_partition = BB_PARTITION (bb);
1881 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1882 the set of flags to pass to cfg_layout_initialize(). */
1885 reorder_basic_blocks (unsigned int flags)
1889 struct trace *traces;
1891 if (n_basic_blocks <= 1)
1894 if (targetm.cannot_modify_jumps_p ())
1897 timevar_push (TV_REORDER_BLOCKS);
1899 cfg_layout_initialize (flags);
1901 set_edge_can_fallthru_flag ();
1902 mark_dfs_back_edges ();
1904 /* We are estimating the length of uncond jump insn only once since the code
1905 for getting the insn length always returns the minimal length now. */
1906 if (uncond_jump_length == 0)
1907 uncond_jump_length = get_uncond_jump_length ();
1909 /* We need to know some information for each basic block. */
1910 array_size = GET_ARRAY_SIZE (last_basic_block);
1911 bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1912 for (i = 0; i < array_size; i++)
1914 bbd[i].start_of_trace = -1;
1915 bbd[i].in_trace = -1;
1916 bbd[i].end_of_trace = -1;
1921 traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1923 find_traces (&n_traces, traces);
1924 connect_traces (n_traces, traces);
1929 dump_flow_info (dump_file);
1931 cfg_layout_finalize ();
1932 if (flag_reorder_blocks_and_partition)
1933 verify_hot_cold_block_grouping ();
1935 timevar_pop (TV_REORDER_BLOCKS);
1938 /* Determine which partition the first basic block in the function
1939 belongs to, then find the first basic block in the current function
1940 that belongs to a different section, and insert a
1941 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1942 instruction stream. When writing out the assembly code,
1943 encountering this note will make the compiler switch between the
1944 hot and cold text sections. */
1947 insert_section_boundary_note (void)
1951 int first_partition = 0;
1953 if (flag_reorder_blocks_and_partition)
1956 if (!first_partition)
1957 first_partition = BB_PARTITION (bb);
1958 if (BB_PARTITION (bb) != first_partition)
1960 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1967 /* Duplicate the blocks containing computed gotos. This basically unfactors
1968 computed gotos that were factored early on in the compilation process to
1969 speed up edge based data flow. We used to not unfactoring them again,
1970 which can seriously pessimize code with many computed jumps in the source
1971 code, such as interpreters. See e.g. PR15242. */
1974 duplicate_computed_gotos (void)
1976 basic_block bb, new_bb;
1980 if (n_basic_blocks <= 1)
1983 if (targetm.cannot_modify_jumps_p ())
1986 timevar_push (TV_REORDER_BLOCKS);
1988 cfg_layout_initialize (0);
1990 /* We are estimating the length of uncond jump insn only once
1991 since the code for getting the insn length always returns
1992 the minimal length now. */
1993 if (uncond_jump_length == 0)
1994 uncond_jump_length = get_uncond_jump_length ();
1996 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
1997 candidates = BITMAP_ALLOC (NULL);
1999 /* Look for blocks that end in a computed jump, and see if such blocks
2000 are suitable for unfactoring. If a block is a candidate for unfactoring,
2001 mark it in the candidates. */
2007 int size, all_flags;
2009 /* Build the reorder chain for the original order of blocks. */
2010 if (bb->next_bb != EXIT_BLOCK_PTR)
2011 bb->aux = bb->next_bb;
2013 /* Obviously the block has to end in a computed jump. */
2014 if (!computed_jump_p (BB_END (bb)))
2017 /* Only consider blocks that can be duplicated. */
2018 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2019 || !can_duplicate_block_p (bb))
2022 /* Make sure that the block is small enough. */
2024 FOR_BB_INSNS (bb, insn)
2027 size += get_attr_length (insn);
2028 if (size > max_size)
2031 if (size > max_size)
2034 /* Final check: there must not be any incoming abnormal edges. */
2036 FOR_EACH_EDGE (e, ei, bb->preds)
2037 all_flags |= e->flags;
2038 if (all_flags & EDGE_COMPLEX)
2041 bitmap_set_bit (candidates, bb->index);
2044 /* Nothing to do if there is no computed jump here. */
2045 if (bitmap_empty_p (candidates))
2048 /* Duplicate computed gotos. */
2051 if (bb->il.rtl->visited)
2054 bb->il.rtl->visited = 1;
2056 /* BB must have one outgoing edge. That edge must not lead to
2057 the exit block or the next block.
2058 The destination must have more than one predecessor. */
2059 if (!single_succ_p (bb)
2060 || single_succ (bb) == EXIT_BLOCK_PTR
2061 || single_succ (bb) == bb->next_bb
2062 || single_pred_p (single_succ (bb)))
2065 /* The successor block has to be a duplication candidate. */
2066 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2069 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb));
2070 new_bb->aux = bb->aux;
2072 new_bb->il.rtl->visited = 1;
2076 cfg_layout_finalize ();
2078 BITMAP_FREE (candidates);
2080 timevar_pop (TV_REORDER_BLOCKS);
2083 /* This function is the main 'entrance' for the optimization that
2084 partitions hot and cold basic blocks into separate sections of the
2085 .o file (to improve performance and cache locality). Ideally it
2086 would be called after all optimizations that rearrange the CFG have
2087 been called. However part of this optimization may introduce new
2088 register usage, so it must be called before register allocation has
2089 occurred. This means that this optimization is actually called
2090 well before the optimization that reorders basic blocks (see
2093 This optimization checks the feedback information to determine
2094 which basic blocks are hot/cold, updates flags on the basic blocks
2095 to indicate which section they belong in. This information is
2096 later used for writing out sections in the .o file. Because hot
2097 and cold sections can be arbitrarily large (within the bounds of
2098 memory), far beyond the size of a single function, it is necessary
2099 to fix up all edges that cross section boundaries, to make sure the
2100 instructions used can actually span the required distance. The
2101 fixes are described below.
2103 Fall-through edges must be changed into jumps; it is not safe or
2104 legal to fall through across a section boundary. Whenever a
2105 fall-through edge crossing a section boundary is encountered, a new
2106 basic block is inserted (in the same section as the fall-through
2107 source), and the fall through edge is redirected to the new basic
2108 block. The new basic block contains an unconditional jump to the
2109 original fall-through target. (If the unconditional jump is
2110 insufficient to cross section boundaries, that is dealt with a
2111 little later, see below).
2113 In order to deal with architectures that have short conditional
2114 branches (which cannot span all of memory) we take any conditional
2115 jump that attempts to cross a section boundary and add a level of
2116 indirection: it becomes a conditional jump to a new basic block, in
2117 the same section. The new basic block contains an unconditional
2118 jump to the original target, in the other section.
2120 For those architectures whose unconditional branch is also
2121 incapable of reaching all of memory, those unconditional jumps are
2122 converted into indirect jumps, through a register.
2124 IMPORTANT NOTE: This optimization causes some messy interactions
2125 with the cfg cleanup optimizations; those optimizations want to
2126 merge blocks wherever possible, and to collapse indirect jump
2127 sequences (change "A jumps to B jumps to C" directly into "A jumps
2128 to C"). Those optimizations can undo the jump fixes that
2129 partitioning is required to make (see above), in order to ensure
2130 that jumps attempting to cross section boundaries are really able
2131 to cover whatever distance the jump requires (on many architectures
2132 conditional or unconditional jumps are not able to reach all of
2133 memory). Therefore tests have to be inserted into each such
2134 optimization to make sure that it does not undo stuff necessary to
2135 cross partition boundaries. This would be much less of a problem
2136 if we could perform this optimization later in the compilation, but
2137 unfortunately the fact that we may need to create indirect jumps
2138 (through registers) requires that this optimization be performed
2139 before register allocation. */
2142 partition_hot_cold_basic_blocks (void)
2145 edge *crossing_edges;
2146 int n_crossing_edges;
2147 int max_edges = 2 * last_basic_block;
2149 if (n_basic_blocks <= 1)
2152 crossing_edges = xcalloc (max_edges, sizeof (edge));
2154 cfg_layout_initialize (0);
2156 FOR_EACH_BB (cur_bb)
2157 if (cur_bb->index >= 0
2158 && cur_bb->next_bb->index >= 0)
2159 cur_bb->aux = cur_bb->next_bb;
2161 find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2165 if (n_crossing_edges > 0)
2166 fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2168 free (crossing_edges);
2170 cfg_layout_finalize();