1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
6 and currently maintained by, Jim Wilson (wilson@cygnus.com)
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
25 /* This pass implements list scheduling within basic blocks. It is
26 run twice: (1) after flow analysis, but before register allocation,
27 and (2) after register allocation.
29 The first run performs interblock scheduling, moving insns between
30 different blocks in the same "region", and the second runs only
31 basic block scheduling.
33 Interblock motions performed are useful motions and speculative
34 motions, including speculative loads. Motions requiring code
35 duplication are not supported. The identification of motion type
36 and the check for validity of speculative motions requires
37 construction and analysis of the function's control flow graph.
39 The main entry point for this pass is schedule_insns(), called for
40 each function. The work of the scheduler is organized in three
41 levels: (1) function level: insns are subject to splitting,
42 control-flow-graph is constructed, regions are computed (after
43 reload, each region is of one block), (2) region level: control
44 flow graph attributes required for interblock scheduling are
45 computed (dominators, reachability, etc.), data dependences and
46 priorities are computed, and (3) block level: insns in the block
47 are actually scheduled. */
51 #include "coretypes.h"
56 #include "hard-reg-set.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
65 #include "cfglayout.h"
67 #include "sched-int.h"
70 #include "tree-pass.h"
73 #ifdef INSN_SCHEDULING
74 /* Some accessor macros for h_i_d members only used within this file. */
75 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
76 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
77 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
79 /* nr_inter/spec counts interblock/speculative motion for the function. */
80 static int nr_inter, nr_spec;
82 static int is_cfg_nonregular (void);
83 static bool sched_is_disabled_for_current_region_p (void);
85 /* A region is the main entity for interblock scheduling: insns
86 are allowed to move between blocks in the same region, along
87 control flow graph edges, in the 'up' direction. */
90 /* Number of extended basic blocks in region. */
92 /* cblocks in the region (actually index in rgn_bb_table). */
94 /* Dependencies for this region are already computed. Basically, indicates,
95 that this is a recovery block. */
96 unsigned int dont_calc_deps : 1;
97 /* This region has at least one non-trivial ebb. */
98 unsigned int has_real_ebb : 1;
102 /* Number of regions in the procedure. */
103 static int nr_regions;
105 /* Table of region descriptions. */
106 static region *rgn_table;
108 /* Array of lists of regions' blocks. */
109 static int *rgn_bb_table;
111 /* Topological order of blocks in the region (if b2 is reachable from
112 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
113 always referred to by either block or b, while its topological
114 order name (in the region) is referred to by bb. */
115 static int *block_to_bb;
117 /* The number of the region containing a block. */
118 static int *containing_rgn;
120 /* The minimum probability of reaching a source block so that it will be
121 considered for speculative scheduling. */
122 static int min_spec_prob;
124 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
125 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
126 #define RGN_DONT_CALC_DEPS(rgn) (rgn_table[rgn].dont_calc_deps)
127 #define RGN_HAS_REAL_EBB(rgn) (rgn_table[rgn].has_real_ebb)
128 #define BLOCK_TO_BB(block) (block_to_bb[block])
129 #define CONTAINING_RGN(block) (containing_rgn[block])
131 void debug_regions (void);
132 static void find_single_block_region (void);
133 static void find_rgns (void);
134 static void extend_rgns (int *, int *, sbitmap, int *);
135 static bool too_large (int, int *, int *);
137 extern void debug_live (int, int);
139 /* Blocks of the current region being scheduled. */
140 static int current_nr_blocks;
141 static int current_blocks;
143 static int rgn_n_insns;
145 /* The mapping from ebb to block. */
146 /* ebb_head [i] - is index in rgn_bb_table, while
147 EBB_HEAD (i) - is basic block index.
148 BASIC_BLOCK (EBB_HEAD (i)) - head of ebb. */
149 #define BB_TO_BLOCK(ebb) (rgn_bb_table[ebb_head[ebb]])
150 #define EBB_FIRST_BB(ebb) BASIC_BLOCK (BB_TO_BLOCK (ebb))
151 #define EBB_LAST_BB(ebb) BASIC_BLOCK (rgn_bb_table[ebb_head[ebb + 1] - 1])
153 /* Target info declarations.
155 The block currently being scheduled is referred to as the "target" block,
156 while other blocks in the region from which insns can be moved to the
157 target are called "source" blocks. The candidate structure holds info
158 about such sources: are they valid? Speculative? Etc. */
161 basic_block *first_member;
176 static candidate *candidate_table;
178 /* A speculative motion requires checking live information on the path
179 from 'source' to 'target'. The split blocks are those to be checked.
180 After a speculative motion, live information should be modified in
183 Lists of split and update blocks for each candidate of the current
184 target are in array bblst_table. */
185 static basic_block *bblst_table;
186 static int bblst_size, bblst_last;
188 #define IS_VALID(src) ( candidate_table[src].is_valid )
189 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
190 #define SRC_PROB(src) ( candidate_table[src].src_prob )
192 /* The bb being currently scheduled. */
193 static int target_bb;
203 static edge *edgelst_table;
204 static int edgelst_last;
206 static void extract_edgelst (sbitmap, edgelst *);
209 /* Target info functions. */
210 static void split_edges (int, int, edgelst *);
211 static void compute_trg_info (int);
212 void debug_candidate (int);
213 void debug_candidates (int);
215 /* Dominators array: dom[i] contains the sbitmap of dominators of
216 bb i in the region. */
219 /* bb 0 is the only region entry. */
220 #define IS_RGN_ENTRY(bb) (!bb)
222 /* Is bb_src dominated by bb_trg. */
223 #define IS_DOMINATED(bb_src, bb_trg) \
224 ( TEST_BIT (dom[bb_src], bb_trg) )
226 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
227 the probability of bb i relative to the region entry. */
230 /* Bit-set of edges, where bit i stands for edge i. */
231 typedef sbitmap edgeset;
233 /* Number of edges in the region. */
234 static int rgn_nr_edges;
236 /* Array of size rgn_nr_edges. */
237 static edge *rgn_edges;
239 /* Mapping from each edge in the graph to its number in the rgn. */
240 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
241 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
243 /* The split edges of a source bb is different for each target
244 bb. In order to compute this efficiently, the 'potential-split edges'
245 are computed for each bb prior to scheduling a region. This is actually
246 the split edges of each bb relative to the region entry.
248 pot_split[bb] is the set of potential split edges of bb. */
249 static edgeset *pot_split;
251 /* For every bb, a set of its ancestor edges. */
252 static edgeset *ancestor_edges;
254 /* Array of EBBs sizes. Currently we can get a ebb only through
255 splitting of currently scheduling block, therefore, we don't need
256 ebb_head array for every region, its sufficient to hold it only
258 static int *ebb_head;
260 static void compute_dom_prob_ps (int);
262 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
263 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
264 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
266 /* Speculative scheduling functions. */
267 static int check_live_1 (int, rtx);
268 static void update_live_1 (int, rtx);
269 static int check_live (rtx, int);
270 static void update_live (rtx, int);
271 static void set_spec_fed (rtx);
272 static int is_pfree (rtx, int, int);
273 static int find_conditional_protection (rtx, int);
274 static int is_conditionally_protected (rtx, int, int);
275 static int is_prisky (rtx, int, int);
276 static int is_exception_free (rtx, int, int);
278 static bool sets_likely_spilled (rtx);
279 static void sets_likely_spilled_1 (rtx, rtx, void *);
280 static void add_branch_dependences (rtx, rtx);
281 static void compute_block_backward_dependences (int);
283 static void init_regions (void);
284 static void schedule_region (int);
285 static rtx concat_INSN_LIST (rtx, rtx);
286 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
287 static void propagate_deps (int, struct deps *);
288 static void free_pending_lists (void);
290 /* Functions for construction of the control flow graph. */
292 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
294 We decide not to build the control flow graph if there is possibly more
295 than one entry to the function, if computed branches exist, if we
296 have nonlocal gotos, or if we have an unreachable loop. */
299 is_cfg_nonregular (void)
304 /* If we have a label that could be the target of a nonlocal goto, then
305 the cfg is not well structured. */
306 if (nonlocal_goto_handler_labels)
309 /* If we have any forced labels, then the cfg is not well structured. */
313 /* If we have exception handlers, then we consider the cfg not well
314 structured. ?!? We should be able to handle this now that we
315 compute an accurate cfg for EH. */
316 if (current_function_has_exception_handlers ())
319 /* If we have non-jumping insns which refer to labels, then we consider
320 the cfg not well structured. */
322 FOR_BB_INSNS (b, insn)
324 /* Check for labels referred by non-jump insns. */
325 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
327 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
329 && ! (JUMP_P (NEXT_INSN (insn))
330 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 else if (JUMP_P (insn) && computed_jump_p (insn))
340 /* Unreachable loops with more than one basic block are detected
341 during the DFS traversal in find_rgns.
343 Unreachable loops with a single block are detected here. This
344 test is redundant with the one in find_rgns, but it's much
345 cheaper to go ahead and catch the trivial case here. */
348 if (EDGE_COUNT (b->preds) == 0
349 || (single_pred_p (b)
350 && single_pred (b) == b))
354 /* All the tests passed. Consider the cfg well structured. */
358 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
361 extract_edgelst (sbitmap set, edgelst *el)
364 sbitmap_iterator sbi;
366 /* edgelst table space is reused in each call to extract_edgelst. */
369 el->first_member = &edgelst_table[edgelst_last];
372 /* Iterate over each word in the bitset. */
373 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
375 edgelst_table[edgelst_last++] = rgn_edges[i];
380 /* Functions for the construction of regions. */
382 /* Print the regions, for debugging purposes. Callable from debugger. */
389 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
390 for (rgn = 0; rgn < nr_regions; rgn++)
392 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
393 rgn_table[rgn].rgn_nr_blocks);
394 fprintf (sched_dump, ";;\tbb/block: ");
396 /* We don't have ebb_head initialized yet, so we can't use
398 current_blocks = RGN_BLOCKS (rgn);
400 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
401 fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
403 fprintf (sched_dump, "\n\n");
407 /* Build a single block region for each basic block in the function.
408 This allows for using the same code for interblock and basic block
412 find_single_block_region (void)
420 rgn_bb_table[nr_regions] = bb->index;
421 RGN_NR_BLOCKS (nr_regions) = 1;
422 RGN_BLOCKS (nr_regions) = nr_regions;
423 RGN_DONT_CALC_DEPS (nr_regions) = 0;
424 RGN_HAS_REAL_EBB (nr_regions) = 0;
425 CONTAINING_RGN (bb->index) = nr_regions;
426 BLOCK_TO_BB (bb->index) = 0;
431 /* Update number of blocks and the estimate for number of insns
432 in the region. Return true if the region is "too large" for interblock
433 scheduling (compile time considerations). */
436 too_large (int block, int *num_bbs, int *num_insns)
439 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
440 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
442 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
443 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
446 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
447 is still an inner loop. Put in max_hdr[blk] the header of the most inner
448 loop containing blk. */
449 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
451 if (max_hdr[blk] == -1) \
452 max_hdr[blk] = hdr; \
453 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
454 RESET_BIT (inner, hdr); \
455 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
457 RESET_BIT (inner,max_hdr[blk]); \
458 max_hdr[blk] = hdr; \
462 /* Find regions for interblock scheduling.
464 A region for scheduling can be:
466 * A loop-free procedure, or
468 * A reducible inner loop, or
470 * A basic block not contained in any other region.
472 ?!? In theory we could build other regions based on extended basic
473 blocks or reverse extended basic blocks. Is it worth the trouble?
475 Loop blocks that form a region are put into the region's block list
476 in topological order.
478 This procedure stores its results into the following global (ick) variables
486 We use dominator relationships to avoid making regions out of non-reducible
489 This procedure needs to be converted to work on pred/succ lists instead
490 of edge tables. That would simplify it somewhat. */
495 int *max_hdr, *dfs_nr, *degree;
497 int node, child, loop_head, i, head, tail;
498 int count = 0, sp, idx = 0;
499 edge_iterator current_edge;
500 edge_iterator *stack;
501 int num_bbs, num_insns, unreachable;
502 int too_large_failure;
505 /* Note if a block is a natural loop header. */
508 /* Note if a block is a natural inner loop header. */
511 /* Note if a block is in the block queue. */
514 /* Note if a block is in the block queue. */
517 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
518 and a mapping from block to its loop header (if the block is contained
521 Store results in HEADER, INNER, and MAX_HDR respectively, these will
522 be used as inputs to the second traversal.
524 STACK, SP and DFS_NR are only used during the first traversal. */
526 /* Allocate and initialize variables for the first traversal. */
527 max_hdr = XNEWVEC (int, last_basic_block);
528 dfs_nr = XCNEWVEC (int, last_basic_block);
529 stack = XNEWVEC (edge_iterator, n_edges);
531 inner = sbitmap_alloc (last_basic_block);
532 sbitmap_ones (inner);
534 header = sbitmap_alloc (last_basic_block);
535 sbitmap_zero (header);
537 in_queue = sbitmap_alloc (last_basic_block);
538 sbitmap_zero (in_queue);
540 in_stack = sbitmap_alloc (last_basic_block);
541 sbitmap_zero (in_stack);
543 for (i = 0; i < last_basic_block; i++)
546 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
547 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
549 /* DFS traversal to find inner loops in the cfg. */
551 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
556 if (EDGE_PASSED (current_edge))
558 /* We have reached a leaf node or a node that was already
559 processed. Pop edges off the stack until we find
560 an edge that has not yet been processed. */
561 while (sp >= 0 && EDGE_PASSED (current_edge))
563 /* Pop entry off the stack. */
564 current_edge = stack[sp--];
565 node = ei_edge (current_edge)->src->index;
566 gcc_assert (node != ENTRY_BLOCK);
567 child = ei_edge (current_edge)->dest->index;
568 gcc_assert (child != EXIT_BLOCK);
569 RESET_BIT (in_stack, child);
570 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
571 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
572 ei_next (¤t_edge);
575 /* See if have finished the DFS tree traversal. */
576 if (sp < 0 && EDGE_PASSED (current_edge))
579 /* Nope, continue the traversal with the popped node. */
583 /* Process a node. */
584 node = ei_edge (current_edge)->src->index;
585 gcc_assert (node != ENTRY_BLOCK);
586 SET_BIT (in_stack, node);
587 dfs_nr[node] = ++count;
589 /* We don't traverse to the exit block. */
590 child = ei_edge (current_edge)->dest->index;
591 if (child == EXIT_BLOCK)
593 SET_EDGE_PASSED (current_edge);
594 ei_next (¤t_edge);
598 /* If the successor is in the stack, then we've found a loop.
599 Mark the loop, if it is not a natural loop, then it will
600 be rejected during the second traversal. */
601 if (TEST_BIT (in_stack, child))
604 SET_BIT (header, child);
605 UPDATE_LOOP_RELATIONS (node, child);
606 SET_EDGE_PASSED (current_edge);
607 ei_next (¤t_edge);
611 /* If the child was already visited, then there is no need to visit
612 it again. Just update the loop relationships and restart
616 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
617 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
618 SET_EDGE_PASSED (current_edge);
619 ei_next (¤t_edge);
623 /* Push an entry on the stack and continue DFS traversal. */
624 stack[++sp] = current_edge;
625 SET_EDGE_PASSED (current_edge);
626 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
629 /* Reset ->aux field used by EDGE_PASSED. */
634 FOR_EACH_EDGE (e, ei, bb->succs)
639 /* Another check for unreachable blocks. The earlier test in
640 is_cfg_nonregular only finds unreachable blocks that do not
643 The DFS traversal will mark every block that is reachable from
644 the entry node by placing a nonzero value in dfs_nr. Thus if
645 dfs_nr is zero for any block, then it must be unreachable. */
648 if (dfs_nr[bb->index] == 0)
654 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
655 to hold degree counts. */
659 degree[bb->index] = EDGE_COUNT (bb->preds);
661 /* Do not perform region scheduling if there are any unreachable
665 int *queue, *degree1 = NULL;
666 /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
667 there basic blocks, which are forced to be region heads.
668 This is done to try to assemble few smaller regions
669 from a too_large region. */
670 sbitmap extended_rgn_header = NULL;
671 bool extend_regions_p;
676 /* Second traversal:find reducible inner loops and topologically sort
677 block of each region. */
679 queue = XNEWVEC (int, n_basic_blocks);
681 extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
682 if (extend_regions_p)
684 degree1 = xmalloc (last_basic_block * sizeof (int));
685 extended_rgn_header = sbitmap_alloc (last_basic_block);
686 sbitmap_zero (extended_rgn_header);
689 /* Find blocks which are inner loop headers. We still have non-reducible
690 loops to consider at this point. */
693 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
699 /* Now check that the loop is reducible. We do this separate
700 from finding inner loops so that we do not find a reducible
701 loop which contains an inner non-reducible loop.
703 A simple way to find reducible/natural loops is to verify
704 that each block in the loop is dominated by the loop
707 If there exists a block that is not dominated by the loop
708 header, then the block is reachable from outside the loop
709 and thus the loop is not a natural loop. */
712 /* First identify blocks in the loop, except for the loop
714 if (bb->index == max_hdr[jbb->index] && bb != jbb)
716 /* Now verify that the block is dominated by the loop
718 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
723 /* If we exited the loop early, then I is the header of
724 a non-reducible loop and we should quit processing it
726 if (jbb != EXIT_BLOCK_PTR)
729 /* I is a header of an inner loop, or block 0 in a subroutine
730 with no loops at all. */
732 too_large_failure = 0;
733 loop_head = max_hdr[bb->index];
735 if (extend_regions_p)
736 /* We save degree in case when we meet a too_large region
737 and cancel it. We need a correct degree later when
738 calling extend_rgns. */
739 memcpy (degree1, degree, last_basic_block * sizeof (int));
741 /* Decrease degree of all I's successors for topological
743 FOR_EACH_EDGE (e, ei, bb->succs)
744 if (e->dest != EXIT_BLOCK_PTR)
745 --degree[e->dest->index];
747 /* Estimate # insns, and count # blocks in the region. */
749 num_insns = (INSN_LUID (BB_END (bb))
750 - INSN_LUID (BB_HEAD (bb)));
752 /* Find all loop latches (blocks with back edges to the loop
753 header) or all the leaf blocks in the cfg has no loops.
755 Place those blocks into the queue. */
759 /* Leaf nodes have only a single successor which must
761 if (single_succ_p (jbb)
762 && single_succ (jbb) == EXIT_BLOCK_PTR)
764 queue[++tail] = jbb->index;
765 SET_BIT (in_queue, jbb->index);
767 if (too_large (jbb->index, &num_bbs, &num_insns))
769 too_large_failure = 1;
778 FOR_EACH_EDGE (e, ei, bb->preds)
780 if (e->src == ENTRY_BLOCK_PTR)
783 node = e->src->index;
785 if (max_hdr[node] == loop_head && node != bb->index)
787 /* This is a loop latch. */
788 queue[++tail] = node;
789 SET_BIT (in_queue, node);
791 if (too_large (node, &num_bbs, &num_insns))
793 too_large_failure = 1;
800 /* Now add all the blocks in the loop to the queue.
802 We know the loop is a natural loop; however the algorithm
803 above will not always mark certain blocks as being in the
811 The algorithm in the DFS traversal may not mark B & D as part
812 of the loop (i.e. they will not have max_hdr set to A).
814 We know they can not be loop latches (else they would have
815 had max_hdr set since they'd have a backedge to a dominator
816 block). So we don't need them on the initial queue.
818 We know they are part of the loop because they are dominated
819 by the loop header and can be reached by a backwards walk of
820 the edges starting with nodes on the initial queue.
822 It is safe and desirable to include those nodes in the
823 loop/scheduling region. To do so we would need to decrease
824 the degree of a node if it is the target of a backedge
825 within the loop itself as the node is placed in the queue.
827 We do not do this because I'm not sure that the actual
828 scheduling code will properly handle this case. ?!? */
830 while (head < tail && !too_large_failure)
833 child = queue[++head];
835 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
837 node = e->src->index;
839 /* See discussion above about nodes not marked as in
840 this loop during the initial DFS traversal. */
841 if (e->src == ENTRY_BLOCK_PTR
842 || max_hdr[node] != loop_head)
847 else if (!TEST_BIT (in_queue, node) && node != bb->index)
849 queue[++tail] = node;
850 SET_BIT (in_queue, node);
852 if (too_large (node, &num_bbs, &num_insns))
854 too_large_failure = 1;
861 if (tail >= 0 && !too_large_failure)
863 /* Place the loop header into list of region blocks. */
864 degree[bb->index] = -1;
865 rgn_bb_table[idx] = bb->index;
866 RGN_NR_BLOCKS (nr_regions) = num_bbs;
867 RGN_BLOCKS (nr_regions) = idx++;
868 RGN_DONT_CALC_DEPS (nr_regions) = 0;
869 RGN_HAS_REAL_EBB (nr_regions) = 0;
870 CONTAINING_RGN (bb->index) = nr_regions;
871 BLOCK_TO_BB (bb->index) = count = 0;
873 /* Remove blocks from queue[] when their in degree
874 becomes zero. Repeat until no blocks are left on the
875 list. This produces a topological list of blocks in
882 if (degree[child] == 0)
887 rgn_bb_table[idx++] = child;
888 BLOCK_TO_BB (child) = ++count;
889 CONTAINING_RGN (child) = nr_regions;
890 queue[head] = queue[tail--];
892 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
893 if (e->dest != EXIT_BLOCK_PTR)
894 --degree[e->dest->index];
901 else if (extend_regions_p)
903 /* Restore DEGREE. */
909 /* And force successors of BB to be region heads.
910 This may provide several smaller regions instead
911 of one too_large region. */
912 FOR_EACH_EDGE (e, ei, bb->succs)
913 if (e->dest != EXIT_BLOCK_PTR)
914 SET_BIT (extended_rgn_header, e->dest->index);
920 if (extend_regions_p)
924 sbitmap_a_or_b (header, header, extended_rgn_header);
925 sbitmap_free (extended_rgn_header);
927 extend_rgns (degree, &idx, header, max_hdr);
931 /* Any block that did not end up in a region is placed into a region
934 if (degree[bb->index] >= 0)
936 rgn_bb_table[idx] = bb->index;
937 RGN_NR_BLOCKS (nr_regions) = 1;
938 RGN_BLOCKS (nr_regions) = idx++;
939 RGN_DONT_CALC_DEPS (nr_regions) = 0;
940 RGN_HAS_REAL_EBB (nr_regions) = 0;
941 CONTAINING_RGN (bb->index) = nr_regions++;
942 BLOCK_TO_BB (bb->index) = 0;
948 sbitmap_free (header);
949 sbitmap_free (inner);
950 sbitmap_free (in_queue);
951 sbitmap_free (in_stack);
954 static int gather_region_statistics (int **);
955 static void print_region_statistics (int *, int, int *, int);
957 /* Calculate the histogram that shows the number of regions having the
958 given number of basic blocks, and store it in the RSP array. Return
959 the size of this array. */
961 gather_region_statistics (int **rsp)
963 int i, *a = 0, a_sz = 0;
965 /* a[i] is the number of regions that have (i + 1) basic blocks. */
966 for (i = 0; i < nr_regions; i++)
968 int nr_blocks = RGN_NR_BLOCKS (i);
970 gcc_assert (nr_blocks >= 1);
972 if (nr_blocks > a_sz)
974 a = xrealloc (a, nr_blocks * sizeof (*a));
977 while (a_sz != nr_blocks);
987 /* Print regions statistics. S1 and S2 denote the data before and after
988 calling extend_rgns, respectively. */
990 print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
994 /* We iterate until s2_sz because extend_rgns does not decrease
995 the maximal region size. */
996 for (i = 1; i < s2_sz; i++)
1010 fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1011 "was %d + %d more\n", i + 1, n1, n2 - n1);
1016 DEGREE - Array of incoming edge count, considering only
1017 the edges, that don't have their sources in formed regions yet.
1018 IDXP - pointer to the next available index in rgn_bb_table.
1019 HEADER - set of all region heads.
1020 LOOP_HDR - mapping from block to the containing loop
1021 (two blocks can reside within one region if they have
1022 the same loop header). */
1024 extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1026 int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1027 int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
1029 max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1031 max_hdr = xmalloc (last_basic_block * sizeof (*max_hdr));
1033 order = xmalloc (last_basic_block * sizeof (*order));
1034 post_order_compute (order, false, false);
1036 for (i = nblocks - 1; i >= 0; i--)
1039 if (degree[bbn] >= 0)
1045 /* This block already was processed in find_rgns. */
1049 /* The idea is to topologically walk through CFG in top-down order.
1050 During the traversal, if all the predecessors of a node are
1051 marked to be in the same region (they all have the same max_hdr),
1052 then current node is also marked to be a part of that region.
1053 Otherwise the node starts its own region.
1054 CFG should be traversed until no further changes are made. On each
1055 iteration the set of the region heads is extended (the set of those
1056 blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
1057 set of all basic blocks, thus the algorithm is guaranteed to terminate. */
1059 while (rescan && iter < max_iter)
1063 for (i = nblocks - 1; i >= 0; i--)
1069 if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
1073 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
1075 int predn = e->src->index;
1077 if (predn != ENTRY_BLOCK
1078 /* If pred wasn't processed in find_rgns. */
1079 && max_hdr[predn] != -1
1080 /* And pred and bb reside in the same loop.
1081 (Or out of any loop). */
1082 && loop_hdr[bbn] == loop_hdr[predn])
1085 /* Then bb extends the containing region of pred. */
1086 hdr = max_hdr[predn];
1087 else if (hdr != max_hdr[predn])
1088 /* Too bad, there are at least two predecessors
1089 that reside in different regions. Thus, BB should
1090 begin its own region. */
1097 /* BB starts its own region. */
1106 /* If BB start its own region,
1107 update set of headers with BB. */
1108 SET_BIT (header, bbn);
1112 gcc_assert (hdr != -1);
1121 /* Statistics were gathered on the SPEC2000 package of tests with
1122 mainline weekly snapshot gcc-4.1-20051015 on ia64.
1124 Statistics for SPECint:
1125 1 iteration : 1751 cases (38.7%)
1126 2 iterations: 2770 cases (61.3%)
1127 Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1128 Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1129 (We don't count single block regions here).
1131 Statistics for SPECfp:
1132 1 iteration : 621 cases (35.9%)
1133 2 iterations: 1110 cases (64.1%)
1134 Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1135 Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1136 (We don't count single block regions here).
1138 By default we do at most 2 iterations.
1139 This can be overridden with max-sched-extend-regions-iters parameter:
1140 0 - disable region extension,
1141 N > 0 - do at most N iterations. */
1143 if (sched_verbose && iter != 0)
1144 fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1145 rescan ? "... failed" : "");
1147 if (!rescan && iter != 0)
1149 int *s1 = NULL, s1_sz = 0;
1151 /* Save the old statistics for later printout. */
1152 if (sched_verbose >= 6)
1153 s1_sz = gather_region_statistics (&s1);
1155 /* We have succeeded. Now assemble the regions. */
1156 for (i = nblocks - 1; i >= 0; i--)
1160 if (max_hdr[bbn] == bbn)
1161 /* BBN is a region head. */
1165 int num_bbs = 0, j, num_insns = 0, large;
1167 large = too_large (bbn, &num_bbs, &num_insns);
1170 rgn_bb_table[idx] = bbn;
1171 RGN_BLOCKS (nr_regions) = idx++;
1172 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1173 RGN_HAS_REAL_EBB (nr_regions) = 0;
1174 CONTAINING_RGN (bbn) = nr_regions;
1175 BLOCK_TO_BB (bbn) = 0;
1177 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
1178 if (e->dest != EXIT_BLOCK_PTR)
1179 degree[e->dest->index]--;
1182 /* Here we check whether the region is too_large. */
1183 for (j = i - 1; j >= 0; j--)
1185 int succn = order[j];
1186 if (max_hdr[succn] == bbn)
1188 if ((large = too_large (succn, &num_bbs, &num_insns)))
1194 /* If the region is too_large, then wrap every block of
1195 the region into single block region.
1196 Here we wrap region head only. Other blocks are
1197 processed in the below cycle. */
1199 RGN_NR_BLOCKS (nr_regions) = 1;
1205 for (j = i - 1; j >= 0; j--)
1207 int succn = order[j];
1209 if (max_hdr[succn] == bbn)
1210 /* This cycle iterates over all basic blocks, that
1211 are supposed to be in the region with head BBN,
1212 and wraps them into that region (or in single
1215 gcc_assert (degree[succn] == 0);
1218 rgn_bb_table[idx] = succn;
1219 BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1220 CONTAINING_RGN (succn) = nr_regions;
1223 /* Wrap SUCCN into single block region. */
1225 RGN_BLOCKS (nr_regions) = idx;
1226 RGN_NR_BLOCKS (nr_regions) = 1;
1227 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1228 RGN_HAS_REAL_EBB (nr_regions) = 0;
1234 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
1235 if (e->dest != EXIT_BLOCK_PTR)
1236 degree[e->dest->index]--;
1242 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1248 if (sched_verbose >= 6)
1252 /* Get the new statistics and print the comparison with the
1253 one before calling this function. */
1254 s2_sz = gather_region_statistics (&s2);
1255 print_region_statistics (s1, s1_sz, s2, s2_sz);
1267 /* Functions for regions scheduling information. */
1269 /* Compute dominators, probability, and potential-split-edges of bb.
1270 Assume that these values were already computed for bb's predecessors. */
1273 compute_dom_prob_ps (int bb)
1275 edge_iterator in_ei;
1278 /* We shouldn't have any real ebbs yet. */
1279 gcc_assert (ebb_head [bb] == bb + current_blocks);
1281 if (IS_RGN_ENTRY (bb))
1283 SET_BIT (dom[bb], 0);
1284 prob[bb] = REG_BR_PROB_BASE;
1290 /* Initialize dom[bb] to '111..1'. */
1291 sbitmap_ones (dom[bb]);
1293 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
1297 edge_iterator out_ei;
1299 if (in_edge->src == ENTRY_BLOCK_PTR)
1302 pred_bb = BLOCK_TO_BB (in_edge->src->index);
1303 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
1304 sbitmap_a_or_b (ancestor_edges[bb],
1305 ancestor_edges[bb], ancestor_edges[pred_bb]);
1307 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1309 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1311 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1312 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
1314 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
1317 SET_BIT (dom[bb], bb);
1318 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1320 if (sched_verbose >= 2)
1321 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1322 (100 * prob[bb]) / REG_BR_PROB_BASE);
1325 /* Functions for target info. */
1327 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1328 Note that bb_trg dominates bb_src. */
1331 split_edges (int bb_src, int bb_trg, edgelst *bl)
1333 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1334 sbitmap_copy (src, pot_split[bb_src]);
1336 sbitmap_difference (src, src, pot_split[bb_trg]);
1337 extract_edgelst (src, bl);
1341 /* Find the valid candidate-source-blocks for the target block TRG, compute
1342 their probability, and check if they are speculative or not.
1343 For speculative sources, compute their update-blocks and split-blocks. */
1346 compute_trg_info (int trg)
1350 int i, j, k, update_idx;
1356 /* Define some of the fields for the target bb as well. */
1357 sp = candidate_table + trg;
1359 sp->is_speculative = 0;
1360 sp->src_prob = REG_BR_PROB_BASE;
1362 visited = sbitmap_alloc (last_basic_block);
1364 for (i = trg + 1; i < current_nr_blocks; i++)
1366 sp = candidate_table + i;
1368 sp->is_valid = IS_DOMINATED (i, trg);
1371 int tf = prob[trg], cf = prob[i];
1373 /* In CFGs with low probability edges TF can possibly be zero. */
1374 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
1375 sp->is_valid = (sp->src_prob >= min_spec_prob);
1380 split_edges (i, trg, &el);
1381 sp->is_speculative = (el.nr_members) ? 1 : 0;
1382 if (sp->is_speculative && !flag_schedule_speculative)
1388 /* Compute split blocks and store them in bblst_table.
1389 The TO block of every split edge is a split block. */
1390 sp->split_bbs.first_member = &bblst_table[bblst_last];
1391 sp->split_bbs.nr_members = el.nr_members;
1392 for (j = 0; j < el.nr_members; bblst_last++, j++)
1393 bblst_table[bblst_last] = el.first_member[j]->dest;
1394 sp->update_bbs.first_member = &bblst_table[bblst_last];
1396 /* Compute update blocks and store them in bblst_table.
1397 For every split edge, look at the FROM block, and check
1398 all out edges. For each out edge that is not a split edge,
1399 add the TO block to the update block list. This list can end
1400 up with a lot of duplicates. We need to weed them out to avoid
1401 overrunning the end of the bblst_table. */
1404 sbitmap_zero (visited);
1405 for (j = 0; j < el.nr_members; j++)
1407 block = el.first_member[j]->src;
1408 FOR_EACH_EDGE (e, ei, block->succs)
1410 if (!TEST_BIT (visited, e->dest->index))
1412 for (k = 0; k < el.nr_members; k++)
1413 if (e == el.first_member[k])
1416 if (k >= el.nr_members)
1418 bblst_table[bblst_last++] = e->dest;
1419 SET_BIT (visited, e->dest->index);
1425 sp->update_bbs.nr_members = update_idx;
1427 /* Make sure we didn't overrun the end of bblst_table. */
1428 gcc_assert (bblst_last <= bblst_size);
1432 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1434 sp->is_speculative = 0;
1439 sbitmap_free (visited);
1442 /* Print candidates info, for debugging purposes. Callable from debugger. */
1445 debug_candidate (int i)
1447 if (!candidate_table[i].is_valid)
1450 if (candidate_table[i].is_speculative)
1453 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1455 fprintf (sched_dump, "split path: ");
1456 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1458 int b = candidate_table[i].split_bbs.first_member[j]->index;
1460 fprintf (sched_dump, " %d ", b);
1462 fprintf (sched_dump, "\n");
1464 fprintf (sched_dump, "update path: ");
1465 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1467 int b = candidate_table[i].update_bbs.first_member[j]->index;
1469 fprintf (sched_dump, " %d ", b);
1471 fprintf (sched_dump, "\n");
1475 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1479 /* Print candidates info, for debugging purposes. Callable from debugger. */
1482 debug_candidates (int trg)
1486 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1487 BB_TO_BLOCK (trg), trg);
1488 for (i = trg + 1; i < current_nr_blocks; i++)
1489 debug_candidate (i);
1492 /* Functions for speculative scheduling. */
1494 static bitmap_head not_in_df;
1496 /* Return 0 if x is a set of a register alive in the beginning of one
1497 of the split-blocks of src, otherwise return 1. */
1500 check_live_1 (int src, rtx x)
1504 rtx reg = SET_DEST (x);
1509 while (GET_CODE (reg) == SUBREG
1510 || GET_CODE (reg) == ZERO_EXTRACT
1511 || GET_CODE (reg) == STRICT_LOW_PART)
1512 reg = XEXP (reg, 0);
1514 if (GET_CODE (reg) == PARALLEL)
1518 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1519 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1520 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1529 regno = REGNO (reg);
1531 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1533 /* Global registers are assumed live. */
1538 if (regno < FIRST_PSEUDO_REGISTER)
1540 /* Check for hard registers. */
1541 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1544 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1546 basic_block b = candidate_table[src].split_bbs.first_member[i];
1547 int t = bitmap_bit_p (¬_in_df, b->index);
1549 /* We can have split blocks, that were recently generated.
1550 such blocks are always outside current region. */
1551 gcc_assert (!t || (CONTAINING_RGN (b->index)
1552 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1554 if (t || REGNO_REG_SET_P (DF_LIVE_IN (b), regno + j))
1561 /* Check for pseudo registers. */
1562 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1564 basic_block b = candidate_table[src].split_bbs.first_member[i];
1565 int t = bitmap_bit_p (¬_in_df, b->index);
1567 gcc_assert (!t || (CONTAINING_RGN (b->index)
1568 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1570 if (t || REGNO_REG_SET_P (DF_LIVE_IN (b), regno))
1579 /* If x is a set of a register R, mark that R is alive in the beginning
1580 of every update-block of src. */
1583 update_live_1 (int src, rtx x)
1587 rtx reg = SET_DEST (x);
1592 while (GET_CODE (reg) == SUBREG
1593 || GET_CODE (reg) == ZERO_EXTRACT
1594 || GET_CODE (reg) == STRICT_LOW_PART)
1595 reg = XEXP (reg, 0);
1597 if (GET_CODE (reg) == PARALLEL)
1601 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1602 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1603 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1611 /* Global registers are always live, so the code below does not apply
1614 regno = REGNO (reg);
1616 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1618 if (regno < FIRST_PSEUDO_REGISTER)
1620 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1623 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1625 basic_block b = candidate_table[src].update_bbs.first_member[i];
1627 SET_REGNO_REG_SET (DF_LIVE_IN (b), regno + j);
1633 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1635 basic_block b = candidate_table[src].update_bbs.first_member[i];
1637 SET_REGNO_REG_SET (DF_LIVE_IN (b), regno);
1643 /* Return 1 if insn can be speculatively moved from block src to trg,
1644 otherwise return 0. Called before first insertion of insn to
1645 ready-list or before the scheduling. */
1648 check_live (rtx insn, int src)
1650 /* Find the registers set by instruction. */
1651 if (GET_CODE (PATTERN (insn)) == SET
1652 || GET_CODE (PATTERN (insn)) == CLOBBER)
1653 return check_live_1 (src, PATTERN (insn));
1654 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1657 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1658 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1659 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1660 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1669 /* Update the live registers info after insn was moved speculatively from
1670 block src to trg. */
1673 update_live (rtx insn, int src)
1675 /* Find the registers set by instruction. */
1676 if (GET_CODE (PATTERN (insn)) == SET
1677 || GET_CODE (PATTERN (insn)) == CLOBBER)
1678 update_live_1 (src, PATTERN (insn));
1679 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1682 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1683 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1684 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1685 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1689 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1690 #define IS_REACHABLE(bb_from, bb_to) \
1692 || IS_RGN_ENTRY (bb_from) \
1693 || (TEST_BIT (ancestor_edges[bb_to], \
1694 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1696 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1699 set_spec_fed (rtx load_insn)
1703 FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (load_insn))
1704 if (DEP_LINK_KIND (link) == REG_DEP_TRUE)
1705 FED_BY_SPEC_LOAD (DEP_LINK_CON (link)) = 1;
1708 /* On the path from the insn to load_insn_bb, find a conditional
1709 branch depending on insn, that guards the speculative load. */
1712 find_conditional_protection (rtx insn, int load_insn_bb)
1716 /* Iterate through DEF-USE forward dependences. */
1717 FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (insn))
1719 rtx next = DEP_LINK_CON (link);
1721 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1722 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1723 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1724 && load_insn_bb != INSN_BB (next)
1725 && DEP_LINK_KIND (link) == REG_DEP_TRUE
1727 || find_conditional_protection (next, load_insn_bb)))
1731 } /* find_conditional_protection */
1733 /* Returns 1 if the same insn1 that participates in the computation
1734 of load_insn's address is feeding a conditional branch that is
1735 guarding on load_insn. This is true if we find a the two DEF-USE
1737 insn1 -> ... -> conditional-branch
1738 insn1 -> ... -> load_insn,
1739 and if a flow path exist:
1740 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1741 and if insn1 is on the path
1742 region-entry -> ... -> bb_trg -> ... load_insn.
1744 Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1745 Locate the branch by following INSN_FORW_DEPS from insn1. */
1748 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1752 FOR_EACH_DEP_LINK (link, INSN_BACK_DEPS (load_insn))
1754 rtx insn1 = DEP_LINK_PRO (link);
1756 /* Must be a DEF-USE dependence upon non-branch. */
1757 if (DEP_LINK_KIND (link) != REG_DEP_TRUE
1761 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1762 if (INSN_BB (insn1) == bb_src
1763 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1764 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1765 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1766 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1769 /* Now search for the conditional-branch. */
1770 if (find_conditional_protection (insn1, bb_src))
1773 /* Recursive step: search another insn1, "above" current insn1. */
1774 return is_conditionally_protected (insn1, bb_src, bb_trg);
1777 /* The chain does not exist. */
1779 } /* is_conditionally_protected */
1781 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1782 load_insn can move speculatively from bb_src to bb_trg. All the
1783 following must hold:
1785 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1786 (2) load_insn and load1 have a def-use dependence upon
1787 the same insn 'insn1'.
1788 (3) either load2 is in bb_trg, or:
1789 - there's only one split-block, and
1790 - load1 is on the escape path, and
1792 From all these we can conclude that the two loads access memory
1793 addresses that differ at most by a constant, and hence if moving
1794 load_insn would cause an exception, it would have been caused by
1798 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1800 dep_link_t back_link;
1801 candidate *candp = candidate_table + bb_src;
1803 if (candp->split_bbs.nr_members != 1)
1804 /* Must have exactly one escape block. */
1807 FOR_EACH_DEP_LINK (back_link, INSN_BACK_DEPS (load_insn))
1809 rtx insn1 = DEP_LINK_PRO (back_link);
1811 if (DEP_LINK_KIND (back_link) == REG_DEP_TRUE)
1813 /* Found a DEF-USE dependence (insn1, load_insn). */
1814 dep_link_t fore_link;
1816 FOR_EACH_DEP_LINK (fore_link, INSN_FORW_DEPS (insn1))
1818 rtx insn2 = DEP_LINK_CON (fore_link);
1820 if (DEP_LINK_KIND (fore_link) == REG_DEP_TRUE)
1822 /* Found a DEF-USE dependence (insn1, insn2). */
1823 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1824 /* insn2 not guaranteed to be a 1 base reg load. */
1827 if (INSN_BB (insn2) == bb_trg)
1828 /* insn2 is the similar load, in the target block. */
1831 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1832 /* insn2 is a similar load, in a split-block. */
1839 /* Couldn't find a similar load. */
1843 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1844 a load moved speculatively, or if load_insn is protected by
1845 a compare on load_insn's address). */
1848 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1850 if (FED_BY_SPEC_LOAD (load_insn))
1853 if (deps_list_empty_p (INSN_BACK_DEPS (load_insn)))
1854 /* Dependence may 'hide' out of the region. */
1857 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1863 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1864 Return 1 if insn is exception-free (and the motion is valid)
1868 is_exception_free (rtx insn, int bb_src, int bb_trg)
1870 int insn_class = haifa_classify_insn (insn);
1872 /* Handle non-load insns. */
1883 if (!flag_schedule_speculative_load)
1885 IS_LOAD_INSN (insn) = 1;
1892 case PFREE_CANDIDATE:
1893 if (is_pfree (insn, bb_src, bb_trg))
1895 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1896 case PRISKY_CANDIDATE:
1897 if (!flag_schedule_speculative_load_dangerous
1898 || is_prisky (insn, bb_src, bb_trg))
1904 return flag_schedule_speculative_load_dangerous;
1907 /* The number of insns from the current block scheduled so far. */
1908 static int sched_target_n_insns;
1909 /* The number of insns from the current block to be scheduled in total. */
1910 static int target_n_insns;
1911 /* The number of insns from the entire region scheduled so far. */
1912 static int sched_n_insns;
1914 /* Implementations of the sched_info functions for region scheduling. */
1915 static void init_ready_list (void);
1916 static int can_schedule_ready_p (rtx);
1917 static void begin_schedule_ready (rtx, rtx);
1918 static ds_t new_ready (rtx, ds_t);
1919 static int schedule_more_p (void);
1920 static const char *rgn_print_insn (rtx, int);
1921 static int rgn_rank (rtx, rtx);
1922 static int contributes_to_priority (rtx, rtx);
1923 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1925 /* Functions for speculative scheduling. */
1926 static void add_remove_insn (rtx, int);
1927 static void extend_regions (void);
1928 static void add_block1 (basic_block, basic_block);
1929 static void fix_recovery_cfg (int, int, int);
1930 static basic_block advance_target_bb (basic_block, rtx);
1932 static void debug_rgn_dependencies (int);
1934 /* Return nonzero if there are more insns that should be scheduled. */
1937 schedule_more_p (void)
1939 return sched_target_n_insns < target_n_insns;
1942 /* Add all insns that are initially ready to the ready list READY. Called
1943 once before scheduling a set of insns. */
1946 init_ready_list (void)
1948 rtx prev_head = current_sched_info->prev_head;
1949 rtx next_tail = current_sched_info->next_tail;
1954 sched_target_n_insns = 0;
1957 /* Print debugging information. */
1958 if (sched_verbose >= 5)
1959 debug_rgn_dependencies (target_bb);
1961 /* Prepare current target block info. */
1962 if (current_nr_blocks > 1)
1964 candidate_table = XNEWVEC (candidate, current_nr_blocks);
1967 /* bblst_table holds split blocks and update blocks for each block after
1968 the current one in the region. split blocks and update blocks are
1969 the TO blocks of region edges, so there can be at most rgn_nr_edges
1971 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1972 bblst_table = XNEWVEC (basic_block, bblst_size);
1975 edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1977 compute_trg_info (target_bb);
1980 /* Initialize ready list with all 'ready' insns in target block.
1981 Count number of insns in the target block being scheduled. */
1982 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1987 gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
1990 /* Add to ready list all 'ready' insns in valid source blocks.
1991 For speculative insns, check-live, exception-free, and
1993 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1994 if (IS_VALID (bb_src))
2000 get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2002 src_next_tail = NEXT_INSN (tail);
2005 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2011 /* Called after taking INSN from the ready list. Returns nonzero if this
2012 insn can be scheduled, nonzero if we should silently discard it. */
2015 can_schedule_ready_p (rtx insn)
2017 /* An interblock motion? */
2018 if (INSN_BB (insn) != target_bb
2019 && IS_SPECULATIVE_INSN (insn)
2020 && !check_live (insn, INSN_BB (insn)))
2026 /* Updates counter and other information. Split from can_schedule_ready_p ()
2027 because when we schedule insn speculatively then insn passed to
2028 can_schedule_ready_p () differs from the one passed to
2029 begin_schedule_ready (). */
2031 begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED)
2033 /* An interblock motion? */
2034 if (INSN_BB (insn) != target_bb)
2036 if (IS_SPECULATIVE_INSN (insn))
2038 gcc_assert (check_live (insn, INSN_BB (insn)));
2040 update_live (insn, INSN_BB (insn));
2042 /* For speculative load, mark insns fed by it. */
2043 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2044 set_spec_fed (insn);
2052 /* In block motion. */
2053 sched_target_n_insns++;
2058 /* Called after INSN has all its hard dependencies resolved and the speculation
2059 of type TS is enough to overcome them all.
2060 Return nonzero if it should be moved to the ready list or the queue, or zero
2061 if we should silently discard it. */
2063 new_ready (rtx next, ds_t ts)
2065 if (INSN_BB (next) != target_bb)
2067 int not_ex_free = 0;
2069 /* For speculative insns, before inserting to ready/queue,
2070 check live, exception-free, and issue-delay. */
2071 if (!IS_VALID (INSN_BB (next))
2073 || (IS_SPECULATIVE_INSN (next)
2074 && ((recog_memoized (next) >= 0
2075 && min_insn_conflict_delay (curr_state, next, next)
2076 > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2077 || IS_SPECULATION_CHECK_P (next)
2078 || !check_live (next, INSN_BB (next))
2079 || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2083 /* We are here because is_exception_free () == false.
2084 But we possibly can handle that with control speculation. */
2085 && current_sched_info->flags & DO_SPECULATION)
2086 /* Here we got new control-speculative instruction. */
2087 ts = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2089 ts = (ts & ~SPECULATIVE) | HARD_DEP;
2096 /* Return a string that contains the insn uid and optionally anything else
2097 necessary to identify this insn in an output. It's valid to use a
2098 static buffer for this. The ALIGNED parameter should cause the string
2099 to be formatted so that multiple output lines will line up nicely. */
2102 rgn_print_insn (rtx insn, int aligned)
2104 static char tmp[80];
2107 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2110 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2111 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2113 sprintf (tmp, "%d", INSN_UID (insn));
2118 /* Compare priority of two insns. Return a positive number if the second
2119 insn is to be preferred for scheduling, and a negative one if the first
2120 is to be preferred. Zero if they are equally good. */
2123 rgn_rank (rtx insn1, rtx insn2)
2125 /* Some comparison make sense in interblock scheduling only. */
2126 if (INSN_BB (insn1) != INSN_BB (insn2))
2128 int spec_val, prob_val;
2130 /* Prefer an inblock motion on an interblock motion. */
2131 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2133 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2136 /* Prefer a useful motion on a speculative one. */
2137 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2141 /* Prefer a more probable (speculative) insn. */
2142 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2149 /* NEXT is an instruction that depends on INSN (a backward dependence);
2150 return nonzero if we should include this dependence in priority
2154 contributes_to_priority (rtx next, rtx insn)
2156 /* NEXT and INSN reside in one ebb. */
2157 return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2160 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2161 conditionally set before INSN. Store the set of registers that
2162 must be considered as used by this jump in USED and that of
2163 registers that must be considered as set in SET. */
2166 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2167 regset cond_exec ATTRIBUTE_UNUSED,
2168 regset used ATTRIBUTE_UNUSED,
2169 regset set ATTRIBUTE_UNUSED)
2171 /* Nothing to do here, since we postprocess jumps in
2172 add_branch_dependences. */
2175 /* Used in schedule_insns to initialize current_sched_info for scheduling
2176 regions (or single basic blocks). */
2178 static struct sched_info region_sched_info =
2181 can_schedule_ready_p,
2186 contributes_to_priority,
2187 compute_jump_reg_dependencies,
2194 begin_schedule_ready,
2201 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2204 sets_likely_spilled (rtx pat)
2207 note_stores (pat, sets_likely_spilled_1, &ret);
2212 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
2214 bool *ret = (bool *) data;
2216 if (GET_CODE (pat) == SET
2218 && REGNO (x) < FIRST_PSEUDO_REGISTER
2219 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2223 /* Add dependences so that branches are scheduled to run last in their
2227 add_branch_dependences (rtx head, rtx tail)
2231 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2232 that can throw exceptions, force them to remain in order at the end of
2233 the block by adding dependencies and giving the last a high priority.
2234 There may be notes present, and prev_head may also be a note.
2236 Branches must obviously remain at the end. Calls should remain at the
2237 end since moving them results in worse register allocation. Uses remain
2238 at the end to ensure proper register allocation.
2240 cc0 setters remain at the end because they can't be moved away from
2243 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2245 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2246 are not moved before reload because we can wind up with register
2247 allocation failures. */
2251 while (CALL_P (insn)
2253 || (NONJUMP_INSN_P (insn)
2254 && (GET_CODE (PATTERN (insn)) == USE
2255 || GET_CODE (PATTERN (insn)) == CLOBBER
2256 || can_throw_internal (insn)
2258 || sets_cc0_p (PATTERN (insn))
2260 || (!reload_completed
2261 && sets_likely_spilled (PATTERN (insn)))))
2267 && (find_link_by_pro_in_deps_list (INSN_BACK_DEPS (last), insn)
2270 if (! sched_insns_conditions_mutex_p (last, insn))
2271 add_dependence (last, insn, REG_DEP_ANTI);
2272 INSN_REF_COUNT (insn)++;
2275 CANT_MOVE (insn) = 1;
2280 /* Don't overrun the bounds of the basic block. */
2284 insn = PREV_INSN (insn);
2287 /* Make sure these insns are scheduled last in their block. */
2290 while (insn != head)
2292 insn = prev_nonnote_insn (insn);
2294 if (INSN_REF_COUNT (insn) != 0)
2297 if (! sched_insns_conditions_mutex_p (last, insn))
2298 add_dependence (last, insn, REG_DEP_ANTI);
2299 INSN_REF_COUNT (insn) = 1;
2302 #ifdef HAVE_conditional_execution
2303 /* Finally, if the block ends in a jump, and we are doing intra-block
2304 scheduling, make sure that the branch depends on any COND_EXEC insns
2305 inside the block to avoid moving the COND_EXECs past the branch insn.
2307 We only have to do this after reload, because (1) before reload there
2308 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2309 scheduler after reload.
2311 FIXME: We could in some cases move COND_EXEC insns past the branch if
2312 this scheduler would be a little smarter. Consider this code:
2320 On a target with a one cycle stall on a memory access the optimal
2329 We don't want to put the 'X += 12' before the branch because it just
2330 wastes a cycle of execution time when the branch is taken.
2332 Note that in the example "!C" will always be true. That is another
2333 possible improvement for handling COND_EXECs in this scheduler: it
2334 could remove always-true predicates. */
2336 if (!reload_completed || ! JUMP_P (tail))
2340 while (insn != head)
2342 insn = PREV_INSN (insn);
2344 /* Note that we want to add this dependency even when
2345 sched_insns_conditions_mutex_p returns true. The whole point
2346 is that we _want_ this dependency, even if these insns really
2348 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2349 add_dependence (tail, insn, REG_DEP_ANTI);
2354 /* Data structures for the computation of data dependences in a regions. We
2355 keep one `deps' structure for every basic block. Before analyzing the
2356 data dependences for a bb, its variables are initialized as a function of
2357 the variables of its predecessors. When the analysis for a bb completes,
2358 we save the contents to the corresponding bb_deps[bb] variable. */
2360 static struct deps *bb_deps;
2362 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2365 concat_INSN_LIST (rtx copy, rtx old)
2368 for (; copy ; copy = XEXP (copy, 1))
2369 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2374 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2377 rtx new_insns = *old_insns_p;
2378 rtx new_mems = *old_mems_p;
2382 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2383 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2384 copy_insns = XEXP (copy_insns, 1);
2385 copy_mems = XEXP (copy_mems, 1);
2388 *old_insns_p = new_insns;
2389 *old_mems_p = new_mems;
2392 /* After computing the dependencies for block BB, propagate the dependencies
2393 found in TMP_DEPS to the successors of the block. */
2395 propagate_deps (int bb, struct deps *pred_deps)
2397 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2401 /* bb's structures are inherited by its successors. */
2402 FOR_EACH_EDGE (e, ei, block->succs)
2404 struct deps *succ_deps;
2406 reg_set_iterator rsi;
2408 /* Only bbs "below" bb, in the same region, are interesting. */
2409 if (e->dest == EXIT_BLOCK_PTR
2410 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2411 || BLOCK_TO_BB (e->dest->index) <= bb)
2414 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2416 /* The reg_last lists are inherited by successor. */
2417 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2419 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2420 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2422 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2423 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2424 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2426 succ_rl->uses_length += pred_rl->uses_length;
2427 succ_rl->clobbers_length += pred_rl->clobbers_length;
2429 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2431 /* Mem read/write lists are inherited by successor. */
2432 concat_insn_mem_list (pred_deps->pending_read_insns,
2433 pred_deps->pending_read_mems,
2434 &succ_deps->pending_read_insns,
2435 &succ_deps->pending_read_mems);
2436 concat_insn_mem_list (pred_deps->pending_write_insns,
2437 pred_deps->pending_write_mems,
2438 &succ_deps->pending_write_insns,
2439 &succ_deps->pending_write_mems);
2441 succ_deps->last_pending_memory_flush
2442 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2443 succ_deps->last_pending_memory_flush);
2445 succ_deps->pending_read_list_length
2446 += pred_deps->pending_read_list_length;
2447 succ_deps->pending_write_list_length
2448 += pred_deps->pending_write_list_length;
2449 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2451 /* last_function_call is inherited by successor. */
2452 succ_deps->last_function_call
2453 = concat_INSN_LIST (pred_deps->last_function_call,
2454 succ_deps->last_function_call);
2456 /* sched_before_next_call is inherited by successor. */
2457 succ_deps->sched_before_next_call
2458 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2459 succ_deps->sched_before_next_call);
2462 /* These lists should point to the right place, for correct
2464 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2465 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2466 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2467 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2469 /* Can't allow these to be freed twice. */
2470 pred_deps->pending_read_insns = 0;
2471 pred_deps->pending_read_mems = 0;
2472 pred_deps->pending_write_insns = 0;
2473 pred_deps->pending_write_mems = 0;
2476 /* Compute backward dependences inside bb. In a multiple blocks region:
2477 (1) a bb is analyzed after its predecessors, and (2) the lists in
2478 effect at the end of bb (after analyzing for bb) are inherited by
2481 Specifically for reg-reg data dependences, the block insns are
2482 scanned by sched_analyze () top-to-bottom. Two lists are
2483 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2484 and reg_last[].uses for register USEs.
2486 When analysis is completed for bb, we update for its successors:
2487 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2488 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2490 The mechanism for computing mem-mem data dependence is very
2491 similar, and the result is interblock dependences in the region. */
2494 compute_block_backward_dependences (int bb)
2497 struct deps tmp_deps;
2499 tmp_deps = bb_deps[bb];
2501 /* Do the analysis for this block. */
2502 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2503 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2504 sched_analyze (&tmp_deps, head, tail);
2505 add_branch_dependences (head, tail);
2507 if (current_nr_blocks > 1)
2508 propagate_deps (bb, &tmp_deps);
2510 /* Free up the INSN_LISTs. */
2511 free_deps (&tmp_deps);
2514 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2515 them to the unused_*_list variables, so that they can be reused. */
2518 free_pending_lists (void)
2522 for (bb = 0; bb < current_nr_blocks; bb++)
2524 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2525 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2526 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2527 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2532 /* Print dependences for debugging starting from FROM_BB.
2533 Callable from debugger. */
2535 debug_rgn_dependencies (int from_bb)
2539 fprintf (sched_dump,
2540 ";; --------------- forward dependences: ------------ \n");
2542 for (bb = from_bb; bb < current_nr_blocks; bb++)
2546 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2547 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2548 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2549 BB_TO_BLOCK (bb), bb);
2551 debug_dependencies (head, tail);
2555 /* Print dependencies information for instructions between HEAD and TAIL.
2556 ??? This function would probably fit best in haifa-sched.c. */
2557 void debug_dependencies (rtx head, rtx tail)
2560 rtx next_tail = NEXT_INSN (tail);
2562 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2563 "insn", "code", "bb", "dep", "prio", "cost",
2565 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2566 "----", "----", "--", "---", "----", "----",
2569 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2573 if (! INSN_P (insn))
2576 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2579 n = NOTE_KIND (insn);
2580 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2583 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2587 fprintf (sched_dump,
2588 ";; %s%5d%6d%6d%6d%6d%6d ",
2589 (SCHED_GROUP_P (insn) ? "+" : " "),
2593 INSN_DEP_COUNT (insn),
2594 INSN_PRIORITY (insn),
2597 if (recog_memoized (insn) < 0)
2598 fprintf (sched_dump, "nothing");
2600 print_reservation (sched_dump, insn);
2602 fprintf (sched_dump, "\t: ");
2603 FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (insn))
2604 fprintf (sched_dump, "%d ", INSN_UID (DEP_LINK_CON (link)));
2605 fprintf (sched_dump, "\n");
2608 fprintf (sched_dump, "\n");
2611 /* Returns true if all the basic blocks of the current region have
2612 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2614 sched_is_disabled_for_current_region_p (void)
2618 for (bb = 0; bb < current_nr_blocks; bb++)
2619 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2625 /* Schedule a region. A region is either an inner loop, a loop-free
2626 subroutine, or a single basic block. Each bb in the region is
2627 scheduled after its flow predecessors. */
2630 schedule_region (int rgn)
2636 int sched_rgn_n_insns = 0;
2639 /* Set variables for the current region. */
2640 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2641 current_blocks = RGN_BLOCKS (rgn);
2643 /* See comments in add_block1, for what reasons we allocate +1 element. */
2644 ebb_head = xrealloc (ebb_head, (current_nr_blocks + 1) * sizeof (*ebb_head));
2645 for (bb = 0; bb <= current_nr_blocks; bb++)
2646 ebb_head[bb] = current_blocks + bb;
2648 /* Don't schedule region that is marked by
2649 NOTE_DISABLE_SCHED_OF_BLOCK. */
2650 if (sched_is_disabled_for_current_region_p ())
2653 if (!RGN_DONT_CALC_DEPS (rgn))
2655 init_deps_global ();
2657 /* Initializations for region data dependence analysis. */
2658 bb_deps = XNEWVEC (struct deps, current_nr_blocks);
2659 for (bb = 0; bb < current_nr_blocks; bb++)
2660 init_deps (bb_deps + bb);
2662 /* Compute backward dependencies. */
2663 for (bb = 0; bb < current_nr_blocks; bb++)
2664 compute_block_backward_dependences (bb);
2666 /* Compute forward dependencies. */
2667 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2671 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2672 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2674 compute_forward_dependences (head, tail);
2676 if (targetm.sched.dependencies_evaluation_hook)
2677 targetm.sched.dependencies_evaluation_hook (head, tail);
2680 free_pending_lists ();
2682 finish_deps_global ();
2687 /* This is a recovery block. It is always a single block region. */
2688 gcc_assert (current_nr_blocks == 1);
2690 /* Set priorities. */
2691 current_sched_info->sched_max_insns_priority = 0;
2692 for (bb = 0; bb < current_nr_blocks; bb++)
2696 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2697 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2699 rgn_n_insns += set_priorities (head, tail);
2701 current_sched_info->sched_max_insns_priority++;
2703 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2704 if (current_nr_blocks > 1)
2706 prob = XNEWVEC (int, current_nr_blocks);
2708 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2709 sbitmap_vector_zero (dom, current_nr_blocks);
2711 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2715 if (CONTAINING_RGN (block->index) != rgn)
2717 FOR_EACH_EDGE (e, ei, block->succs)
2718 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2721 rgn_edges = XNEWVEC (edge, rgn_nr_edges);
2725 if (CONTAINING_RGN (block->index) != rgn)
2727 FOR_EACH_EDGE (e, ei, block->succs)
2728 rgn_edges[rgn_nr_edges++] = e;
2732 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2733 sbitmap_vector_zero (pot_split, current_nr_blocks);
2734 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2735 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2737 /* Compute probabilities, dominators, split_edges. */
2738 for (bb = 0; bb < current_nr_blocks; bb++)
2739 compute_dom_prob_ps (bb);
2741 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2742 /* We don't need them anymore. But we want to avoid duplication of
2743 aux fields in the newly created edges. */
2746 if (CONTAINING_RGN (block->index) != rgn)
2748 FOR_EACH_EDGE (e, ei, block->succs)
2753 /* Now we can schedule all blocks. */
2754 for (bb = 0; bb < current_nr_blocks; bb++)
2756 basic_block first_bb, last_bb, curr_bb;
2759 first_bb = EBB_FIRST_BB (bb);
2760 last_bb = EBB_LAST_BB (bb);
2762 get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2764 if (no_real_insns_p (head, tail))
2766 gcc_assert (first_bb == last_bb);
2770 current_sched_info->prev_head = PREV_INSN (head);
2771 current_sched_info->next_tail = NEXT_INSN (tail);
2774 /* rm_other_notes only removes notes which are _inside_ the
2775 block---that is, it won't remove notes before the first real insn
2776 or after the last real insn of the block. So if the first insn
2777 has a REG_SAVE_NOTE which would otherwise be emitted before the
2778 insn, it is redundant with the note before the start of the
2779 block, and so we have to take it out. */
2784 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2785 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2786 remove_note (head, note);
2789 /* This means that first block in ebb is empty.
2790 It looks to me as an impossible thing. There at least should be
2791 a recovery check, that caused the splitting. */
2794 /* Remove remaining note insns from the block, save them in
2795 note_list. These notes are restored at the end of
2796 schedule_block (). */
2797 rm_other_notes (head, tail);
2799 unlink_bb_notes (first_bb, last_bb);
2803 gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
2804 current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
2807 if (dbg_cnt (sched_block))
2809 schedule_block (&curr_bb, rgn_n_insns);
2810 gcc_assert (EBB_FIRST_BB (bb) == first_bb);
2811 sched_rgn_n_insns += sched_n_insns;
2815 sched_rgn_n_insns += rgn_n_insns;
2819 if (current_nr_blocks > 1)
2821 free (candidate_table);
2823 free (edgelst_table);
2827 /* Sanity check: verify that all region insns were scheduled. */
2828 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2831 /* Done with this region. */
2833 if (current_nr_blocks > 1)
2836 sbitmap_vector_free (dom);
2837 sbitmap_vector_free (pot_split);
2838 sbitmap_vector_free (ancestor_edges);
2843 /* Initialize data structures for region scheduling. */
2855 /* Compute regions for scheduling. */
2856 if (reload_completed
2857 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
2858 || !flag_schedule_interblock
2859 || is_cfg_nonregular ())
2861 find_single_block_region ();
2865 /* Compute the dominators and post dominators. */
2866 calculate_dominance_info (CDI_DOMINATORS);
2871 if (sched_verbose >= 3)
2874 /* For now. This will move as more and more of haifa is converted
2875 to using the cfg code. */
2876 free_dominance_info (CDI_DOMINATORS);
2878 RGN_BLOCKS (nr_regions) = RGN_BLOCKS (nr_regions - 1) +
2879 RGN_NR_BLOCKS (nr_regions - 1);
2882 /* The one entry point in this file. */
2885 schedule_insns (void)
2889 /* Taking care of this degenerate case makes the rest of
2890 this code simpler. */
2891 if (n_basic_blocks == NUM_FIXED_BLOCKS)
2897 /* We need current_sched_info in init_dependency_caches, which is
2898 invoked via sched_init. */
2899 current_sched_info = ®ion_sched_info;
2901 df_set_flags (DF_LR_RUN_DCE);
2902 df_note_add_problem ();
2904 regstat_compute_calls_crossed ();
2908 bitmap_initialize (¬_in_df, 0);
2909 bitmap_clear (¬_in_df);
2911 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
2916 /* EBB_HEAD is a region-scope structure. But we realloc it for
2917 each region to save time/memory/something else. */
2920 /* Schedule every region in the subroutine. */
2921 for (rgn = 0; rgn < nr_regions; rgn++)
2922 if (dbg_cnt (sched_region))
2923 schedule_region (rgn);
2926 /* Reposition the prologue and epilogue notes in case we moved the
2927 prologue/epilogue insns. */
2928 if (reload_completed)
2929 reposition_prologue_and_epilogue_notes ();
2933 if (reload_completed == 0 && flag_schedule_interblock)
2935 fprintf (sched_dump,
2936 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2940 gcc_assert (nr_inter <= 0);
2941 fprintf (sched_dump, "\n\n");
2946 free (rgn_bb_table);
2948 free (containing_rgn);
2950 regstat_free_calls_crossed ();
2952 bitmap_clear (¬_in_df);
2957 /* INSN has been added to/removed from current region. */
2959 add_remove_insn (rtx insn, int remove_p)
2966 if (INSN_BB (insn) == target_bb)
2975 /* Extend internal data structures. */
2977 extend_regions (void)
2979 rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
2980 rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
2981 block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
2982 containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
2985 /* BB was added to ebb after AFTER. */
2987 add_block1 (basic_block bb, basic_block after)
2991 bitmap_set_bit (¬_in_df, bb->index);
2993 if (after == 0 || after == EXIT_BLOCK_PTR)
2997 i = RGN_BLOCKS (nr_regions);
2998 /* I - first free position in rgn_bb_table. */
3000 rgn_bb_table[i] = bb->index;
3001 RGN_NR_BLOCKS (nr_regions) = 1;
3002 RGN_DONT_CALC_DEPS (nr_regions) = after == EXIT_BLOCK_PTR;
3003 RGN_HAS_REAL_EBB (nr_regions) = 0;
3004 CONTAINING_RGN (bb->index) = nr_regions;
3005 BLOCK_TO_BB (bb->index) = 0;
3009 RGN_BLOCKS (nr_regions) = i + 1;
3015 /* We need to fix rgn_table, block_to_bb, containing_rgn
3018 BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3020 /* We extend ebb_head to one more position to
3021 easily find the last position of the last ebb in
3022 the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3023 is _always_ valid for access. */
3025 i = BLOCK_TO_BB (after->index) + 1;
3026 pos = ebb_head[i] - 1;
3027 /* Now POS is the index of the last block in the region. */
3029 /* Find index of basic block AFTER. */
3030 for (; rgn_bb_table[pos] != after->index; pos--);
3033 gcc_assert (pos > ebb_head[i - 1]);
3035 /* i - ebb right after "AFTER". */
3036 /* ebb_head[i] - VALID. */
3038 /* Source position: ebb_head[i]
3039 Destination position: ebb_head[i] + 1
3041 RGN_BLOCKS (nr_regions) - 1
3042 Number of elements to copy: (last_position) - (source_position) + 1
3045 memmove (rgn_bb_table + pos + 1,
3047 ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3048 * sizeof (*rgn_bb_table));
3050 rgn_bb_table[pos] = bb->index;
3052 for (; i <= current_nr_blocks; i++)
3055 i = CONTAINING_RGN (after->index);
3056 CONTAINING_RGN (bb->index) = i;
3058 RGN_HAS_REAL_EBB (i) = 1;
3060 for (++i; i <= nr_regions; i++)
3065 /* Fix internal data after interblock movement of jump instruction.
3066 For parameter meaning please refer to
3067 sched-int.h: struct sched_info: fix_recovery_cfg. */
3069 fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3071 int old_pos, new_pos, i;
3073 BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3075 for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3076 rgn_bb_table[old_pos] != check_bb_nexti;
3078 gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3080 for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3081 rgn_bb_table[new_pos] != bbi;
3084 gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3086 gcc_assert (new_pos < old_pos);
3088 memmove (rgn_bb_table + new_pos + 1,
3089 rgn_bb_table + new_pos,
3090 (old_pos - new_pos) * sizeof (*rgn_bb_table));
3092 rgn_bb_table[new_pos] = check_bb_nexti;
3094 for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3098 /* Return next block in ebb chain. For parameter meaning please refer to
3099 sched-int.h: struct sched_info: advance_target_bb. */
3101 advance_target_bb (basic_block bb, rtx insn)
3106 gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3107 && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3114 gate_handle_sched (void)
3116 #ifdef INSN_SCHEDULING
3117 return flag_schedule_insns && dbg_cnt (sched_func);
3123 /* Run instruction scheduler. */
3125 rest_of_handle_sched (void)
3127 #ifdef INSN_SCHEDULING
3134 gate_handle_sched2 (void)
3136 #ifdef INSN_SCHEDULING
3137 return optimize > 0 && flag_schedule_insns_after_reload
3138 && dbg_cnt (sched2_func);
3144 /* Run second scheduling pass after reload. */
3146 rest_of_handle_sched2 (void)
3148 #ifdef INSN_SCHEDULING
3149 /* Do control and data sched analysis again,
3150 and write some more of the results to dump file. */
3151 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
3159 struct tree_opt_pass pass_sched =
3161 "sched1", /* name */
3162 gate_handle_sched, /* gate */
3163 rest_of_handle_sched, /* execute */
3166 0, /* static_pass_number */
3167 TV_SCHED, /* tv_id */
3168 0, /* properties_required */
3169 0, /* properties_provided */
3170 0, /* properties_destroyed */
3171 0, /* todo_flags_start */
3175 TODO_ggc_collect, /* todo_flags_finish */
3179 struct tree_opt_pass pass_sched2 =
3181 "sched2", /* name */
3182 gate_handle_sched2, /* gate */
3183 rest_of_handle_sched2, /* execute */
3186 0, /* static_pass_number */
3187 TV_SCHED2, /* tv_id */
3188 0, /* properties_required */
3189 0, /* properties_provided */
3190 0, /* properties_destroyed */
3191 0, /* todo_flags_start */
3195 TODO_ggc_collect, /* todo_flags_finish */