1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
65 /* Some accessor macros for h_i_d members only used within this file. */
66 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
67 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
68 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
70 #define MAX_RGN_BLOCKS 10
71 #define MAX_RGN_INSNS 100
73 /* nr_inter/spec counts interblock/speculative motion for the function. */
74 static int nr_inter, nr_spec;
76 /* Control flow graph edges are kept in circular lists. */
85 static haifa_edge *edge_table;
87 #define NEXT_IN(edge) (edge_table[edge].next_in)
88 #define NEXT_OUT(edge) (edge_table[edge].next_out)
89 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
90 #define TO_BLOCK(edge) (edge_table[edge].to_block)
92 /* Number of edges in the control flow graph. (In fact, larger than
93 that by 1, since edge 0 is unused.) */
96 /* Circular list of incoming/outgoing edges of a block. */
98 static int *out_edges;
100 #define IN_EDGES(block) (in_edges[block])
101 #define OUT_EDGES(block) (out_edges[block])
103 static int is_cfg_nonregular PARAMS ((void));
104 static int build_control_flow PARAMS ((struct edge_list *));
105 static void new_edge PARAMS ((int, int));
107 /* A region is the main entity for interblock scheduling: insns
108 are allowed to move between blocks in the same region, along
109 control flow graph edges, in the 'up' direction. */
112 int rgn_nr_blocks; /* Number of blocks in region. */
113 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
117 /* Number of regions in the procedure. */
118 static int nr_regions;
120 /* Table of region descriptions. */
121 static region *rgn_table;
123 /* Array of lists of regions' blocks. */
124 static int *rgn_bb_table;
126 /* Topological order of blocks in the region (if b2 is reachable from
127 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
128 always referred to by either block or b, while its topological
129 order name (in the region) is refered to by bb. */
130 static int *block_to_bb;
132 /* The number of the region containing a block. */
133 static int *containing_rgn;
135 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
136 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
137 #define BLOCK_TO_BB(block) (block_to_bb[block])
138 #define CONTAINING_RGN(block) (containing_rgn[block])
140 void debug_regions PARAMS ((void));
141 static void find_single_block_region PARAMS ((void));
142 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
143 static int too_large PARAMS ((int, int *, int *));
145 extern void debug_live PARAMS ((int, int));
147 /* Blocks of the current region being scheduled. */
148 static int current_nr_blocks;
149 static int current_blocks;
151 /* The mapping from bb to block. */
152 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
154 /* Bit vectors and bitset operations are needed for computations on
155 the control flow graph. */
157 typedef unsigned HOST_WIDE_INT *bitset;
160 int *first_member; /* Pointer to the list start in bitlst_table. */
161 int nr_members; /* The number of members of the bit list. */
165 static int bitlst_table_last;
166 static int bitlst_table_size;
167 static int *bitlst_table;
169 static char bitset_member PARAMS ((bitset, int, int));
170 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
172 /* Target info declarations.
174 The block currently being scheduled is referred to as the "target" block,
175 while other blocks in the region from which insns can be moved to the
176 target are called "source" blocks. The candidate structure holds info
177 about such sources: are they valid? Speculative? Etc. */
178 typedef bitlst bblst;
189 static candidate *candidate_table;
191 /* A speculative motion requires checking live information on the path
192 from 'source' to 'target'. The split blocks are those to be checked.
193 After a speculative motion, live information should be modified in
196 Lists of split and update blocks for each candidate of the current
197 target are in array bblst_table. */
198 static int *bblst_table, bblst_size, bblst_last;
200 #define IS_VALID(src) ( candidate_table[src].is_valid )
201 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
202 #define SRC_PROB(src) ( candidate_table[src].src_prob )
204 /* The bb being currently scheduled. */
205 static int target_bb;
208 typedef bitlst edgelst;
210 /* Target info functions. */
211 static void split_edges PARAMS ((int, int, edgelst *));
212 static void compute_trg_info PARAMS ((int));
213 void debug_candidate PARAMS ((int));
214 void debug_candidates PARAMS ((int));
216 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
217 typedef bitset bbset;
219 /* Number of words of the bbset. */
220 static int bbset_size;
222 /* Dominators array: dom[i] contains the bbset of dominators of
223 bb i in the region. */
226 /* bb 0 is the only region entry. */
227 #define IS_RGN_ENTRY(bb) (!bb)
229 /* Is bb_src dominated by bb_trg. */
230 #define IS_DOMINATED(bb_src, bb_trg) \
231 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
233 /* Probability: Prob[i] is a float in [0, 1] which is the probability
234 of bb i relative to the region entry. */
237 /* The probability of bb_src, relative to bb_trg. Note, that while the
238 'prob[bb]' is a float in [0, 1], this macro returns an integer
240 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
243 /* Bit-set of edges, where bit i stands for edge i. */
244 typedef bitset edgeset;
246 /* Number of edges in the region. */
247 static int rgn_nr_edges;
249 /* Array of size rgn_nr_edges. */
250 static int *rgn_edges;
252 /* Number of words in an edgeset. */
253 static int edgeset_size;
255 /* Number of bits in an edgeset. */
256 static int edgeset_bitsize;
258 /* Mapping from each edge in the graph to its number in the rgn. */
259 static int *edge_to_bit;
260 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
262 /* The split edges of a source bb is different for each target
263 bb. In order to compute this efficiently, the 'potential-split edges'
264 are computed for each bb prior to scheduling a region. This is actually
265 the split edges of each bb relative to the region entry.
267 pot_split[bb] is the set of potential split edges of bb. */
268 static edgeset *pot_split;
270 /* For every bb, a set of its ancestor edges. */
271 static edgeset *ancestor_edges;
273 static void compute_dom_prob_ps PARAMS ((int));
275 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
276 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
277 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
280 /* Parameters affecting the decision of rank_for_schedule().
281 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
282 #define MIN_DIFF_PRIORITY 2
283 #define MIN_PROBABILITY 40
284 #define MIN_PROB_DIFF 10
286 /* Speculative scheduling functions. */
287 static int check_live_1 PARAMS ((int, rtx));
288 static void update_live_1 PARAMS ((int, rtx));
289 static int check_live PARAMS ((rtx, int));
290 static void update_live PARAMS ((rtx, int));
291 static void set_spec_fed PARAMS ((rtx));
292 static int is_pfree PARAMS ((rtx, int, int));
293 static int find_conditional_protection PARAMS ((rtx, int));
294 static int is_conditionally_protected PARAMS ((rtx, int, int));
295 static int may_trap_exp PARAMS ((rtx, int));
296 static int haifa_classify_insn PARAMS ((rtx));
297 static int is_prisky PARAMS ((rtx, int, int));
298 static int is_exception_free PARAMS ((rtx, int, int));
300 static void add_branch_dependences PARAMS ((rtx, rtx));
301 static void compute_block_backward_dependences PARAMS ((int));
302 void debug_dependencies PARAMS ((void));
304 static void init_regions PARAMS ((void));
305 static void schedule_region PARAMS ((int));
306 static void propagate_deps PARAMS ((int, struct deps *, int));
307 static void free_pending_lists PARAMS ((void));
309 /* Functions for construction of the control flow graph. */
311 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
313 We decide not to build the control flow graph if there is possibly more
314 than one entry to the function, if computed branches exist, of if we
315 have nonlocal gotos. */
324 /* If we have a label that could be the target of a nonlocal goto, then
325 the cfg is not well structured. */
326 if (nonlocal_goto_handler_labels)
329 /* If we have any forced labels, then the cfg is not well structured. */
333 /* If this function has a computed jump, then we consider the cfg
334 not well structured. */
335 if (current_function_has_computed_jump)
338 /* If we have exception handlers, then we consider the cfg not well
339 structured. ?!? We should be able to handle this now that flow.c
340 computes an accurate cfg for EH. */
341 if (exception_handler_labels)
344 /* If we have non-jumping insns which refer to labels, then we consider
345 the cfg not well structured. */
346 /* Check for labels referred to other thn by jumps. */
347 for (b = 0; b < n_basic_blocks; b++)
348 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
350 code = GET_CODE (insn);
351 if (GET_RTX_CLASS (code) == 'i')
355 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
356 if (REG_NOTE_KIND (note) == REG_LABEL)
360 if (insn == BLOCK_END (b))
364 /* All the tests passed. Consider the cfg well structured. */
368 /* Build the control flow graph and set nr_edges.
370 Instead of trying to build a cfg ourselves, we rely on flow to
371 do it for us. Stamp out useless code (and bug) duplication.
373 Return nonzero if an irregularity in the cfg is found which would
374 prevent cross block scheduling. */
377 build_control_flow (edge_list)
378 struct edge_list *edge_list;
380 int i, unreachable, num_edges;
382 /* This already accounts for entry/exit edges. */
383 num_edges = NUM_EDGES (edge_list);
385 /* Unreachable loops with more than one basic block are detected
386 during the DFS traversal in find_rgns.
388 Unreachable loops with a single block are detected here. This
389 test is redundant with the one in find_rgns, but it's much
390 cheaper to go ahead and catch the trivial case here. */
392 for (i = 0; i < n_basic_blocks; i++)
394 basic_block b = BASIC_BLOCK (i);
397 || (b->pred->src == b
398 && b->pred->pred_next == NULL))
402 /* ??? We can kill these soon. */
403 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
404 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
405 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
408 for (i = 0; i < num_edges; i++)
410 edge e = INDEX_EDGE (edge_list, i);
412 if (e->dest != EXIT_BLOCK_PTR
413 && e->src != ENTRY_BLOCK_PTR)
414 new_edge (e->src->index, e->dest->index);
417 /* Increment by 1, since edge 0 is unused. */
423 /* Record an edge in the control flow graph from SOURCE to TARGET.
425 In theory, this is redundant with the s_succs computed above, but
426 we have not converted all of haifa to use information from the
430 new_edge (source, target)
434 int curr_edge, fst_edge;
436 /* Check for duplicates. */
437 fst_edge = curr_edge = OUT_EDGES (source);
440 if (FROM_BLOCK (curr_edge) == source
441 && TO_BLOCK (curr_edge) == target)
446 curr_edge = NEXT_OUT (curr_edge);
448 if (fst_edge == curr_edge)
454 FROM_BLOCK (e) = source;
455 TO_BLOCK (e) = target;
457 if (OUT_EDGES (source))
459 next_edge = NEXT_OUT (OUT_EDGES (source));
460 NEXT_OUT (OUT_EDGES (source)) = e;
461 NEXT_OUT (e) = next_edge;
465 OUT_EDGES (source) = e;
469 if (IN_EDGES (target))
471 next_edge = NEXT_IN (IN_EDGES (target));
472 NEXT_IN (IN_EDGES (target)) = e;
473 NEXT_IN (e) = next_edge;
477 IN_EDGES (target) = e;
482 /* BITSET macros for operations on the control flow graph. */
484 /* Compute bitwise union of two bitsets. */
485 #define BITSET_UNION(set1, set2, len) \
486 do { register bitset tp = set1, sp = set2; \
488 for (i = 0; i < len; i++) \
489 *(tp++) |= *(sp++); } while (0)
491 /* Compute bitwise intersection of two bitsets. */
492 #define BITSET_INTER(set1, set2, len) \
493 do { register bitset tp = set1, sp = set2; \
495 for (i = 0; i < len; i++) \
496 *(tp++) &= *(sp++); } while (0)
498 /* Compute bitwise difference of two bitsets. */
499 #define BITSET_DIFFER(set1, set2, len) \
500 do { register bitset tp = set1, sp = set2; \
502 for (i = 0; i < len; i++) \
503 *(tp++) &= ~*(sp++); } while (0)
505 /* Inverts every bit of bitset 'set'. */
506 #define BITSET_INVERT(set, len) \
507 do { register bitset tmpset = set; \
509 for (i = 0; i < len; i++, tmpset++) \
510 *tmpset = ~*tmpset; } while (0)
512 /* Turn on the index'th bit in bitset set. */
513 #define BITSET_ADD(set, index, len) \
515 if (index >= HOST_BITS_PER_WIDE_INT * len) \
518 set[index/HOST_BITS_PER_WIDE_INT] |= \
519 1 << (index % HOST_BITS_PER_WIDE_INT); \
522 /* Turn off the index'th bit in set. */
523 #define BITSET_REMOVE(set, index, len) \
525 if (index >= HOST_BITS_PER_WIDE_INT * len) \
528 set[index/HOST_BITS_PER_WIDE_INT] &= \
529 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
532 /* Check if the index'th bit in bitset set is on. */
535 bitset_member (set, index, len)
539 if (index >= HOST_BITS_PER_WIDE_INT * len)
541 return (set[index / HOST_BITS_PER_WIDE_INT] &
542 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
545 /* Translate a bit-set SET to a list BL of the bit-set members. */
548 extract_bitlst (set, len, bitlen, bl)
555 unsigned HOST_WIDE_INT word;
557 /* bblst table space is reused in each call to extract_bitlst. */
558 bitlst_table_last = 0;
560 bl->first_member = &bitlst_table[bitlst_table_last];
563 /* Iterate over each word in the bitset. */
564 for (i = 0; i < len; i++)
567 offset = i * HOST_BITS_PER_WIDE_INT;
569 /* Iterate over each bit in the word, but do not
570 go beyond the end of the defined bits. */
571 for (j = 0; offset < bitlen && word; j++)
575 bitlst_table[bitlst_table_last++] = offset;
585 /* Functions for the construction of regions. */
587 /* Print the regions, for debugging purposes. Callable from debugger. */
594 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
595 for (rgn = 0; rgn < nr_regions; rgn++)
597 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
598 rgn_table[rgn].rgn_nr_blocks);
599 fprintf (sched_dump, ";;\tbb/block: ");
601 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
603 current_blocks = RGN_BLOCKS (rgn);
605 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
608 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
611 fprintf (sched_dump, "\n\n");
615 /* Build a single block region for each basic block in the function.
616 This allows for using the same code for interblock and basic block
620 find_single_block_region ()
624 for (i = 0; i < n_basic_blocks; i++)
627 RGN_NR_BLOCKS (i) = 1;
629 CONTAINING_RGN (i) = i;
632 nr_regions = n_basic_blocks;
635 /* Update number of blocks and the estimate for number of insns
636 in the region. Return 1 if the region is "too large" for interblock
637 scheduling (compile time considerations), otherwise return 0. */
640 too_large (block, num_bbs, num_insns)
641 int block, *num_bbs, *num_insns;
644 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
645 INSN_LUID (BLOCK_HEAD (block)));
646 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
652 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
653 is still an inner loop. Put in max_hdr[blk] the header of the most inner
654 loop containing blk. */
655 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
657 if (max_hdr[blk] == -1) \
658 max_hdr[blk] = hdr; \
659 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
660 RESET_BIT (inner, hdr); \
661 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
663 RESET_BIT (inner,max_hdr[blk]); \
664 max_hdr[blk] = hdr; \
668 /* Find regions for interblock scheduling.
670 A region for scheduling can be:
672 * A loop-free procedure, or
674 * A reducible inner loop, or
676 * A basic block not contained in any other region.
678 ?!? In theory we could build other regions based on extended basic
679 blocks or reverse extended basic blocks. Is it worth the trouble?
681 Loop blocks that form a region are put into the region's block list
682 in topological order.
684 This procedure stores its results into the following global (ick) variables
692 We use dominator relationships to avoid making regions out of non-reducible
695 This procedure needs to be converted to work on pred/succ lists instead
696 of edge tables. That would simplify it somewhat. */
699 find_rgns (edge_list, dom)
700 struct edge_list *edge_list;
703 int *max_hdr, *dfs_nr, *stack, *degree;
705 int node, child, loop_head, i, head, tail;
706 int count = 0, sp, idx = 0, current_edge = out_edges[0];
707 int num_bbs, num_insns, unreachable;
708 int too_large_failure;
710 /* Note if an edge has been passed. */
713 /* Note if a block is a natural loop header. */
716 /* Note if a block is an natural inner loop header. */
719 /* Note if a block is in the block queue. */
722 /* Note if a block is in the block queue. */
725 int num_edges = NUM_EDGES (edge_list);
727 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
728 and a mapping from block to its loop header (if the block is contained
731 Store results in HEADER, INNER, and MAX_HDR respectively, these will
732 be used as inputs to the second traversal.
734 STACK, SP and DFS_NR are only used during the first traversal. */
736 /* Allocate and initialize variables for the first traversal. */
737 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
738 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
739 stack = (int *) xmalloc (nr_edges * sizeof (int));
741 inner = sbitmap_alloc (n_basic_blocks);
742 sbitmap_ones (inner);
744 header = sbitmap_alloc (n_basic_blocks);
745 sbitmap_zero (header);
747 passed = sbitmap_alloc (nr_edges);
748 sbitmap_zero (passed);
750 in_queue = sbitmap_alloc (n_basic_blocks);
751 sbitmap_zero (in_queue);
753 in_stack = sbitmap_alloc (n_basic_blocks);
754 sbitmap_zero (in_stack);
756 for (i = 0; i < n_basic_blocks; i++)
759 /* DFS traversal to find inner loops in the cfg. */
764 if (current_edge == 0 || TEST_BIT (passed, current_edge))
766 /* We have reached a leaf node or a node that was already
767 processed. Pop edges off the stack until we find
768 an edge that has not yet been processed. */
770 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
772 /* Pop entry off the stack. */
773 current_edge = stack[sp--];
774 node = FROM_BLOCK (current_edge);
775 child = TO_BLOCK (current_edge);
776 RESET_BIT (in_stack, child);
777 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
778 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
779 current_edge = NEXT_OUT (current_edge);
782 /* See if have finished the DFS tree traversal. */
783 if (sp < 0 && TEST_BIT (passed, current_edge))
786 /* Nope, continue the traversal with the popped node. */
790 /* Process a node. */
791 node = FROM_BLOCK (current_edge);
792 child = TO_BLOCK (current_edge);
793 SET_BIT (in_stack, node);
794 dfs_nr[node] = ++count;
796 /* If the successor is in the stack, then we've found a loop.
797 Mark the loop, if it is not a natural loop, then it will
798 be rejected during the second traversal. */
799 if (TEST_BIT (in_stack, child))
802 SET_BIT (header, child);
803 UPDATE_LOOP_RELATIONS (node, child);
804 SET_BIT (passed, current_edge);
805 current_edge = NEXT_OUT (current_edge);
809 /* If the child was already visited, then there is no need to visit
810 it again. Just update the loop relationships and restart
814 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
815 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
816 SET_BIT (passed, current_edge);
817 current_edge = NEXT_OUT (current_edge);
821 /* Push an entry on the stack and continue DFS traversal. */
822 stack[++sp] = current_edge;
823 SET_BIT (passed, current_edge);
824 current_edge = OUT_EDGES (child);
826 /* This is temporary until haifa is converted to use rth's new
827 cfg routines which have true entry/exit blocks and the
828 appropriate edges from/to those blocks.
830 Generally we update dfs_nr for a node when we process its
831 out edge. However, if the node has no out edge then we will
832 not set dfs_nr for that node. This can confuse the scheduler
833 into thinking that we have unreachable blocks, which in turn
834 disables cross block scheduling.
836 So, if we have a node with no out edges, go ahead and mark it
838 if (current_edge == 0)
839 dfs_nr[child] = ++count;
842 /* Another check for unreachable blocks. The earlier test in
843 is_cfg_nonregular only finds unreachable blocks that do not
846 The DFS traversal will mark every block that is reachable from
847 the entry node by placing a nonzero value in dfs_nr. Thus if
848 dfs_nr is zero for any block, then it must be unreachable. */
850 for (i = 0; i < n_basic_blocks; i++)
857 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
858 to hold degree counts. */
861 for (i = 0; i < n_basic_blocks; i++)
863 for (i = 0; i < num_edges; i++)
865 edge e = INDEX_EDGE (edge_list, i);
867 if (e->dest != EXIT_BLOCK_PTR)
868 degree[e->dest->index]++;
871 /* Do not perform region scheduling if there are any unreachable
880 /* Second travsersal:find reducible inner loops and topologically sort
881 block of each region. */
883 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
885 /* Find blocks which are inner loop headers. We still have non-reducible
886 loops to consider at this point. */
887 for (i = 0; i < n_basic_blocks; i++)
889 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
894 /* Now check that the loop is reducible. We do this separate
895 from finding inner loops so that we do not find a reducible
896 loop which contains an inner non-reducible loop.
898 A simple way to find reducible/natural loops is to verify
899 that each block in the loop is dominated by the loop
902 If there exists a block that is not dominated by the loop
903 header, then the block is reachable from outside the loop
904 and thus the loop is not a natural loop. */
905 for (j = 0; j < n_basic_blocks; j++)
907 /* First identify blocks in the loop, except for the loop
909 if (i == max_hdr[j] && i != j)
911 /* Now verify that the block is dominated by the loop
913 if (!TEST_BIT (dom[j], i))
918 /* If we exited the loop early, then I is the header of
919 a non-reducible loop and we should quit processing it
921 if (j != n_basic_blocks)
924 /* I is a header of an inner loop, or block 0 in a subroutine
925 with no loops at all. */
927 too_large_failure = 0;
928 loop_head = max_hdr[i];
930 /* Decrease degree of all I's successors for topological
932 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
933 if (e->dest != EXIT_BLOCK_PTR)
934 --degree[e->dest->index];
936 /* Estimate # insns, and count # blocks in the region. */
938 num_insns = (INSN_LUID (BLOCK_END (i))
939 - INSN_LUID (BLOCK_HEAD (i)));
941 /* Find all loop latches (blocks with back edges to the loop
942 header) or all the leaf blocks in the cfg has no loops.
944 Place those blocks into the queue. */
947 for (j = 0; j < n_basic_blocks; j++)
948 /* Leaf nodes have only a single successor which must
950 if (BASIC_BLOCK (j)->succ
951 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
952 && BASIC_BLOCK (j)->succ->succ_next == NULL)
955 SET_BIT (in_queue, j);
957 if (too_large (j, &num_bbs, &num_insns))
959 too_large_failure = 1;
968 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
970 if (e->src == ENTRY_BLOCK_PTR)
973 node = e->src->index;
975 if (max_hdr[node] == loop_head && node != i)
977 /* This is a loop latch. */
978 queue[++tail] = node;
979 SET_BIT (in_queue, node);
981 if (too_large (node, &num_bbs, &num_insns))
983 too_large_failure = 1;
990 /* Now add all the blocks in the loop to the queue.
992 We know the loop is a natural loop; however the algorithm
993 above will not always mark certain blocks as being in the
1001 The algorithm in the DFS traversal may not mark B & D as part
1002 of the loop (ie they will not have max_hdr set to A).
1004 We know they can not be loop latches (else they would have
1005 had max_hdr set since they'd have a backedge to a dominator
1006 block). So we don't need them on the initial queue.
1008 We know they are part of the loop because they are dominated
1009 by the loop header and can be reached by a backwards walk of
1010 the edges starting with nodes on the initial queue.
1012 It is safe and desirable to include those nodes in the
1013 loop/scheduling region. To do so we would need to decrease
1014 the degree of a node if it is the target of a backedge
1015 within the loop itself as the node is placed in the queue.
1017 We do not do this because I'm not sure that the actual
1018 scheduling code will properly handle this case. ?!? */
1020 while (head < tail && !too_large_failure)
1023 child = queue[++head];
1025 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1027 node = e->src->index;
1029 /* See discussion above about nodes not marked as in
1030 this loop during the initial DFS traversal. */
1031 if (e->src == ENTRY_BLOCK_PTR
1032 || max_hdr[node] != loop_head)
1037 else if (!TEST_BIT (in_queue, node) && node != i)
1039 queue[++tail] = node;
1040 SET_BIT (in_queue, node);
1042 if (too_large (node, &num_bbs, &num_insns))
1044 too_large_failure = 1;
1051 if (tail >= 0 && !too_large_failure)
1053 /* Place the loop header into list of region blocks. */
1055 rgn_bb_table[idx] = i;
1056 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1057 RGN_BLOCKS (nr_regions) = idx++;
1058 CONTAINING_RGN (i) = nr_regions;
1059 BLOCK_TO_BB (i) = count = 0;
1061 /* Remove blocks from queue[] when their in degree
1062 becomes zero. Repeat until no blocks are left on the
1063 list. This produces a topological list of blocks in
1069 child = queue[head];
1070 if (degree[child] == 0)
1075 rgn_bb_table[idx++] = child;
1076 BLOCK_TO_BB (child) = ++count;
1077 CONTAINING_RGN (child) = nr_regions;
1078 queue[head] = queue[tail--];
1080 for (e = BASIC_BLOCK (child)->succ;
1083 if (e->dest != EXIT_BLOCK_PTR)
1084 --degree[e->dest->index];
1096 /* Any block that did not end up in a region is placed into a region
1098 for (i = 0; i < n_basic_blocks; i++)
1101 rgn_bb_table[idx] = i;
1102 RGN_NR_BLOCKS (nr_regions) = 1;
1103 RGN_BLOCKS (nr_regions) = idx++;
1104 CONTAINING_RGN (i) = nr_regions++;
1105 BLOCK_TO_BB (i) = 0;
1118 /* Functions for regions scheduling information. */
1120 /* Compute dominators, probability, and potential-split-edges of bb.
1121 Assume that these values were already computed for bb's predecessors. */
1124 compute_dom_prob_ps (bb)
1127 int nxt_in_edge, fst_in_edge, pred;
1128 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1131 if (IS_RGN_ENTRY (bb))
1133 BITSET_ADD (dom[bb], 0, bbset_size);
1138 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1140 /* Intialize dom[bb] to '111..1'. */
1141 BITSET_INVERT (dom[bb], bbset_size);
1145 pred = FROM_BLOCK (nxt_in_edge);
1146 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1148 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1151 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1154 nr_rgn_out_edges = 0;
1155 fst_out_edge = OUT_EDGES (pred);
1156 nxt_out_edge = NEXT_OUT (fst_out_edge);
1157 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1160 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1162 /* The successor doesn't belong in the region? */
1163 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1164 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1167 while (fst_out_edge != nxt_out_edge)
1170 /* The successor doesn't belong in the region? */
1171 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1172 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1174 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1175 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1179 /* Now nr_rgn_out_edges is the number of region-exit edges from
1180 pred, and nr_out_edges will be the number of pred out edges
1181 not leaving the region. */
1182 nr_out_edges -= nr_rgn_out_edges;
1183 if (nr_rgn_out_edges > 0)
1184 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1186 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1187 nxt_in_edge = NEXT_IN (nxt_in_edge);
1189 while (fst_in_edge != nxt_in_edge);
1191 BITSET_ADD (dom[bb], bb, bbset_size);
1192 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1194 if (sched_verbose >= 2)
1195 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1196 (int) (100.0 * prob[bb]));
1199 /* Functions for target info. */
1201 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1202 Note that bb_trg dominates bb_src. */
1205 split_edges (bb_src, bb_trg, bl)
1210 int es = edgeset_size;
1211 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1214 src[es] = (pot_split[bb_src])[es];
1215 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1216 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1220 /* Find the valid candidate-source-blocks for the target block TRG, compute
1221 their probability, and check if they are speculative or not.
1222 For speculative sources, compute their update-blocks and split-blocks. */
1225 compute_trg_info (trg)
1228 register candidate *sp;
1230 int check_block, update_idx;
1231 int i, j, k, fst_edge, nxt_edge;
1233 /* Define some of the fields for the target bb as well. */
1234 sp = candidate_table + trg;
1236 sp->is_speculative = 0;
1239 for (i = trg + 1; i < current_nr_blocks; i++)
1241 sp = candidate_table + i;
1243 sp->is_valid = IS_DOMINATED (i, trg);
1246 sp->src_prob = GET_SRC_PROB (i, trg);
1247 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1252 split_edges (i, trg, &el);
1253 sp->is_speculative = (el.nr_members) ? 1 : 0;
1254 if (sp->is_speculative && !flag_schedule_speculative)
1260 char *update_blocks;
1262 /* Compute split blocks and store them in bblst_table.
1263 The TO block of every split edge is a split block. */
1264 sp->split_bbs.first_member = &bblst_table[bblst_last];
1265 sp->split_bbs.nr_members = el.nr_members;
1266 for (j = 0; j < el.nr_members; bblst_last++, j++)
1267 bblst_table[bblst_last] =
1268 TO_BLOCK (rgn_edges[el.first_member[j]]);
1269 sp->update_bbs.first_member = &bblst_table[bblst_last];
1271 /* Compute update blocks and store them in bblst_table.
1272 For every split edge, look at the FROM block, and check
1273 all out edges. For each out edge that is not a split edge,
1274 add the TO block to the update block list. This list can end
1275 up with a lot of duplicates. We need to weed them out to avoid
1276 overrunning the end of the bblst_table. */
1277 update_blocks = (char *) alloca (n_basic_blocks);
1278 memset (update_blocks, 0, n_basic_blocks);
1281 for (j = 0; j < el.nr_members; j++)
1283 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1284 fst_edge = nxt_edge = OUT_EDGES (check_block);
1287 if (! update_blocks[TO_BLOCK (nxt_edge)])
1289 for (k = 0; k < el.nr_members; k++)
1290 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1293 if (k >= el.nr_members)
1295 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1296 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1301 nxt_edge = NEXT_OUT (nxt_edge);
1303 while (fst_edge != nxt_edge);
1305 sp->update_bbs.nr_members = update_idx;
1307 /* Make sure we didn't overrun the end of bblst_table. */
1308 if (bblst_last > bblst_size)
1313 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1315 sp->is_speculative = 0;
1321 /* Print candidates info, for debugging purposes. Callable from debugger. */
1327 if (!candidate_table[i].is_valid)
1330 if (candidate_table[i].is_speculative)
1333 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1335 fprintf (sched_dump, "split path: ");
1336 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1338 int b = candidate_table[i].split_bbs.first_member[j];
1340 fprintf (sched_dump, " %d ", b);
1342 fprintf (sched_dump, "\n");
1344 fprintf (sched_dump, "update path: ");
1345 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1347 int b = candidate_table[i].update_bbs.first_member[j];
1349 fprintf (sched_dump, " %d ", b);
1351 fprintf (sched_dump, "\n");
1355 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1359 /* Print candidates info, for debugging purposes. Callable from debugger. */
1362 debug_candidates (trg)
1367 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1368 BB_TO_BLOCK (trg), trg);
1369 for (i = trg + 1; i < current_nr_blocks; i++)
1370 debug_candidate (i);
1373 /* Functions for speculative scheduing. */
1375 /* Return 0 if x is a set of a register alive in the beginning of one
1376 of the split-blocks of src, otherwise return 1. */
1379 check_live_1 (src, x)
1385 register rtx reg = SET_DEST (x);
1390 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1391 || GET_CODE (reg) == SIGN_EXTRACT
1392 || GET_CODE (reg) == STRICT_LOW_PART)
1393 reg = XEXP (reg, 0);
1395 if (GET_CODE (reg) == PARALLEL
1396 && GET_MODE (reg) == BLKmode)
1399 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1400 if (check_live_1 (src, XVECEXP (reg, 0, i)))
1405 if (GET_CODE (reg) != REG)
1408 regno = REGNO (reg);
1410 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1412 /* Global registers are assumed live. */
1417 if (regno < FIRST_PSEUDO_REGISTER)
1419 /* Check for hard registers. */
1420 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1423 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1425 int b = candidate_table[src].split_bbs.first_member[i];
1427 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1437 /* Check for psuedo registers. */
1438 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1440 int b = candidate_table[src].split_bbs.first_member[i];
1442 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1453 /* If x is a set of a register R, mark that R is alive in the beginning
1454 of every update-block of src. */
1457 update_live_1 (src, x)
1463 register rtx reg = SET_DEST (x);
1468 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1469 || GET_CODE (reg) == SIGN_EXTRACT
1470 || GET_CODE (reg) == STRICT_LOW_PART)
1471 reg = XEXP (reg, 0);
1473 if (GET_CODE (reg) == PARALLEL
1474 && GET_MODE (reg) == BLKmode)
1477 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1478 update_live_1 (src, XVECEXP (reg, 0, i));
1482 if (GET_CODE (reg) != REG)
1485 /* Global registers are always live, so the code below does not apply
1488 regno = REGNO (reg);
1490 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1492 if (regno < FIRST_PSEUDO_REGISTER)
1494 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1497 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1499 int b = candidate_table[src].update_bbs.first_member[i];
1501 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1508 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1510 int b = candidate_table[src].update_bbs.first_member[i];
1512 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1518 /* Return 1 if insn can be speculatively moved from block src to trg,
1519 otherwise return 0. Called before first insertion of insn to
1520 ready-list or before the scheduling. */
1523 check_live (insn, src)
1527 /* Find the registers set by instruction. */
1528 if (GET_CODE (PATTERN (insn)) == SET
1529 || GET_CODE (PATTERN (insn)) == CLOBBER)
1530 return check_live_1 (src, PATTERN (insn));
1531 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1534 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1535 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1536 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1537 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1546 /* Update the live registers info after insn was moved speculatively from
1547 block src to trg. */
1550 update_live (insn, src)
1554 /* Find the registers set by instruction. */
1555 if (GET_CODE (PATTERN (insn)) == SET
1556 || GET_CODE (PATTERN (insn)) == CLOBBER)
1557 update_live_1 (src, PATTERN (insn));
1558 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1561 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1562 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1563 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1564 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1568 /* Exception Free Loads:
1570 We define five classes of speculative loads: IFREE, IRISKY,
1571 PFREE, PRISKY, and MFREE.
1573 IFREE loads are loads that are proved to be exception-free, just
1574 by examining the load insn. Examples for such loads are loads
1575 from TOC and loads of global data.
1577 IRISKY loads are loads that are proved to be exception-risky,
1578 just by examining the load insn. Examples for such loads are
1579 volatile loads and loads from shared memory.
1581 PFREE loads are loads for which we can prove, by examining other
1582 insns, that they are exception-free. Currently, this class consists
1583 of loads for which we are able to find a "similar load", either in
1584 the target block, or, if only one split-block exists, in that split
1585 block. Load2 is similar to load1 if both have same single base
1586 register. We identify only part of the similar loads, by finding
1587 an insn upon which both load1 and load2 have a DEF-USE dependence.
1589 PRISKY loads are loads for which we can prove, by examining other
1590 insns, that they are exception-risky. Currently we have two proofs for
1591 such loads. The first proof detects loads that are probably guarded by a
1592 test on the memory address. This proof is based on the
1593 backward and forward data dependence information for the region.
1594 Let load-insn be the examined load.
1595 Load-insn is PRISKY iff ALL the following hold:
1597 - insn1 is not in the same block as load-insn
1598 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1599 - test-insn is either a compare or a branch, not in the same block
1601 - load-insn is reachable from test-insn
1602 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1604 This proof might fail when the compare and the load are fed
1605 by an insn not in the region. To solve this, we will add to this
1606 group all loads that have no input DEF-USE dependence.
1608 The second proof detects loads that are directly or indirectly
1609 fed by a speculative load. This proof is affected by the
1610 scheduling process. We will use the flag fed_by_spec_load.
1611 Initially, all insns have this flag reset. After a speculative
1612 motion of an insn, if insn is either a load, or marked as
1613 fed_by_spec_load, we will also mark as fed_by_spec_load every
1614 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1615 load which is fed_by_spec_load is also PRISKY.
1617 MFREE (maybe-free) loads are all the remaining loads. They may be
1618 exception-free, but we cannot prove it.
1620 Now, all loads in IFREE and PFREE classes are considered
1621 exception-free, while all loads in IRISKY and PRISKY classes are
1622 considered exception-risky. As for loads in the MFREE class,
1623 these are considered either exception-free or exception-risky,
1624 depending on whether we are pessimistic or optimistic. We have
1625 to take the pessimistic approach to assure the safety of
1626 speculative scheduling, but we can take the optimistic approach
1627 by invoking the -fsched_spec_load_dangerous option. */
1629 enum INSN_TRAP_CLASS
1631 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1632 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1635 #define WORST_CLASS(class1, class2) \
1636 ((class1 > class2) ? class1 : class2)
1638 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1639 #define IS_REACHABLE(bb_from, bb_to) \
1641 || IS_RGN_ENTRY (bb_from) \
1642 || (bitset_member (ancestor_edges[bb_to], \
1643 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1646 /* Non-zero iff the address is comprised from at most 1 register. */
1647 #define CONST_BASED_ADDRESS_P(x) \
1648 (GET_CODE (x) == REG \
1649 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1650 || (GET_CODE (x) == LO_SUM)) \
1651 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1652 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1654 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1657 set_spec_fed (load_insn)
1662 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1663 if (GET_MODE (link) == VOIDmode)
1664 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1665 } /* set_spec_fed */
1667 /* On the path from the insn to load_insn_bb, find a conditional
1668 branch depending on insn, that guards the speculative load. */
1671 find_conditional_protection (insn, load_insn_bb)
1677 /* Iterate through DEF-USE forward dependences. */
1678 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1680 rtx next = XEXP (link, 0);
1681 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1682 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1683 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1684 && load_insn_bb != INSN_BB (next)
1685 && GET_MODE (link) == VOIDmode
1686 && (GET_CODE (next) == JUMP_INSN
1687 || find_conditional_protection (next, load_insn_bb)))
1691 } /* find_conditional_protection */
1693 /* Returns 1 if the same insn1 that participates in the computation
1694 of load_insn's address is feeding a conditional branch that is
1695 guarding on load_insn. This is true if we find a the two DEF-USE
1697 insn1 -> ... -> conditional-branch
1698 insn1 -> ... -> load_insn,
1699 and if a flow path exist:
1700 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1701 and if insn1 is on the path
1702 region-entry -> ... -> bb_trg -> ... load_insn.
1704 Locate insn1 by climbing on LOG_LINKS from load_insn.
1705 Locate the branch by following INSN_DEPEND from insn1. */
1708 is_conditionally_protected (load_insn, bb_src, bb_trg)
1714 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1716 rtx insn1 = XEXP (link, 0);
1718 /* Must be a DEF-USE dependence upon non-branch. */
1719 if (GET_MODE (link) != VOIDmode
1720 || GET_CODE (insn1) == JUMP_INSN)
1723 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1724 if (INSN_BB (insn1) == bb_src
1725 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1726 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1727 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1728 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1731 /* Now search for the conditional-branch. */
1732 if (find_conditional_protection (insn1, bb_src))
1735 /* Recursive step: search another insn1, "above" current insn1. */
1736 return is_conditionally_protected (insn1, bb_src, bb_trg);
1739 /* The chain does not exist. */
1741 } /* is_conditionally_protected */
1743 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1744 load_insn can move speculatively from bb_src to bb_trg. All the
1745 following must hold:
1747 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1748 (2) load_insn and load1 have a def-use dependence upon
1749 the same insn 'insn1'.
1750 (3) either load2 is in bb_trg, or:
1751 - there's only one split-block, and
1752 - load1 is on the escape path, and
1754 From all these we can conclude that the two loads access memory
1755 addresses that differ at most by a constant, and hence if moving
1756 load_insn would cause an exception, it would have been caused by
1760 is_pfree (load_insn, bb_src, bb_trg)
1765 register candidate *candp = candidate_table + bb_src;
1767 if (candp->split_bbs.nr_members != 1)
1768 /* Must have exactly one escape block. */
1771 for (back_link = LOG_LINKS (load_insn);
1772 back_link; back_link = XEXP (back_link, 1))
1774 rtx insn1 = XEXP (back_link, 0);
1776 if (GET_MODE (back_link) == VOIDmode)
1778 /* Found a DEF-USE dependence (insn1, load_insn). */
1781 for (fore_link = INSN_DEPEND (insn1);
1782 fore_link; fore_link = XEXP (fore_link, 1))
1784 rtx insn2 = XEXP (fore_link, 0);
1785 if (GET_MODE (fore_link) == VOIDmode)
1787 /* Found a DEF-USE dependence (insn1, insn2). */
1788 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1789 /* insn2 not guaranteed to be a 1 base reg load. */
1792 if (INSN_BB (insn2) == bb_trg)
1793 /* insn2 is the similar load, in the target block. */
1796 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1797 /* insn2 is a similar load, in a split-block. */
1804 /* Couldn't find a similar load. */
1808 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1809 as found by analyzing insn's expression. */
1812 may_trap_exp (x, is_store)
1820 code = GET_CODE (x);
1830 /* The insn uses memory: a volatile load. */
1831 if (MEM_VOLATILE_P (x))
1833 /* An exception-free load. */
1834 if (!may_trap_p (x))
1836 /* A load with 1 base register, to be further checked. */
1837 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1838 return PFREE_CANDIDATE;
1839 /* No info on the load, to be further checked. */
1840 return PRISKY_CANDIDATE;
1845 int i, insn_class = TRAP_FREE;
1847 /* Neither store nor load, check if it may cause a trap. */
1850 /* Recursive step: walk the insn... */
1851 fmt = GET_RTX_FORMAT (code);
1852 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1856 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1857 insn_class = WORST_CLASS (insn_class, tmp_class);
1859 else if (fmt[i] == 'E')
1862 for (j = 0; j < XVECLEN (x, i); j++)
1864 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1865 insn_class = WORST_CLASS (insn_class, tmp_class);
1866 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1870 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1877 /* Classifies insn for the purpose of verifying that it can be
1878 moved speculatively, by examining it's patterns, returning:
1879 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1880 TRAP_FREE: non-load insn.
1881 IFREE: load from a globaly safe location.
1882 IRISKY: volatile load.
1883 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1884 being either PFREE or PRISKY. */
1887 haifa_classify_insn (insn)
1890 rtx pat = PATTERN (insn);
1891 int tmp_class = TRAP_FREE;
1892 int insn_class = TRAP_FREE;
1895 if (GET_CODE (pat) == PARALLEL)
1897 int i, len = XVECLEN (pat, 0);
1899 for (i = len - 1; i >= 0; i--)
1901 code = GET_CODE (XVECEXP (pat, 0, i));
1905 /* Test if it is a 'store'. */
1906 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1909 /* Test if it is a store. */
1910 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1911 if (tmp_class == TRAP_RISKY)
1913 /* Test if it is a load. */
1915 WORST_CLASS (tmp_class,
1916 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
1920 tmp_class = TRAP_RISKY;
1924 insn_class = WORST_CLASS (insn_class, tmp_class);
1925 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1931 code = GET_CODE (pat);
1935 /* Test if it is a 'store'. */
1936 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1939 /* Test if it is a store. */
1940 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1941 if (tmp_class == TRAP_RISKY)
1943 /* Test if it is a load. */
1945 WORST_CLASS (tmp_class,
1946 may_trap_exp (SET_SRC (pat), 0));
1950 tmp_class = TRAP_RISKY;
1954 insn_class = tmp_class;
1960 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1961 a load moved speculatively, or if load_insn is protected by
1962 a compare on load_insn's address). */
1965 is_prisky (load_insn, bb_src, bb_trg)
1969 if (FED_BY_SPEC_LOAD (load_insn))
1972 if (LOG_LINKS (load_insn) == NULL)
1973 /* Dependence may 'hide' out of the region. */
1976 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1982 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1983 Return 1 if insn is exception-free (and the motion is valid)
1987 is_exception_free (insn, bb_src, bb_trg)
1991 int insn_class = haifa_classify_insn (insn);
1993 /* Handle non-load insns. */
2004 if (!flag_schedule_speculative_load)
2006 IS_LOAD_INSN (insn) = 1;
2013 case PFREE_CANDIDATE:
2014 if (is_pfree (insn, bb_src, bb_trg))
2016 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2017 case PRISKY_CANDIDATE:
2018 if (!flag_schedule_speculative_load_dangerous
2019 || is_prisky (insn, bb_src, bb_trg))
2025 return flag_schedule_speculative_load_dangerous;
2028 /* The number of insns from the current block scheduled so far. */
2029 static int sched_target_n_insns;
2030 /* The number of insns from the current block to be scheduled in total. */
2031 static int target_n_insns;
2032 /* The number of insns from the entire region scheduled so far. */
2033 static int sched_n_insns;
2035 /* Implementations of the sched_info functions for region scheduling. */
2036 static void init_ready_list PARAMS ((struct ready_list *));
2037 static int can_schedule_ready_p PARAMS ((rtx));
2038 static int new_ready PARAMS ((rtx));
2039 static int schedule_more_p PARAMS ((void));
2040 static const char *rgn_print_insn PARAMS ((rtx, int));
2041 static int rgn_rank PARAMS ((rtx, rtx));
2043 /* Return nonzero if there are more insns that should be scheduled. */
2048 return sched_target_n_insns < target_n_insns;
2051 /* Add all insns that are initially ready to the ready list READY. Called
2052 once before scheduling a set of insns. */
2055 init_ready_list (ready)
2056 struct ready_list *ready;
2058 rtx prev_head = current_sched_info->prev_head;
2059 rtx next_tail = current_sched_info->next_tail;
2064 sched_target_n_insns = 0;
2067 /* Print debugging information. */
2068 if (sched_verbose >= 5)
2069 debug_dependencies ();
2071 /* Prepare current target block info. */
2072 if (current_nr_blocks > 1)
2074 candidate_table = (candidate *) xmalloc (current_nr_blocks
2075 * sizeof (candidate));
2078 /* bblst_table holds split blocks and update blocks for each block after
2079 the current one in the region. split blocks and update blocks are
2080 the TO blocks of region edges, so there can be at most rgn_nr_edges
2082 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2083 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2085 bitlst_table_last = 0;
2086 bitlst_table_size = rgn_nr_edges;
2087 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2089 compute_trg_info (target_bb);
2092 /* Initialize ready list with all 'ready' insns in target block.
2093 Count number of insns in the target block being scheduled. */
2094 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2098 if (! INSN_P (insn))
2100 next = NEXT_INSN (insn);
2102 if (INSN_DEP_COUNT (insn) == 0
2103 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2104 ready_add (ready, insn);
2105 if (!(SCHED_GROUP_P (insn)))
2109 /* Add to ready list all 'ready' insns in valid source blocks.
2110 For speculative insns, check-live, exception-free, and
2112 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2113 if (IS_VALID (bb_src))
2119 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2120 src_next_tail = NEXT_INSN (tail);
2123 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2125 if (! INSN_P (insn))
2128 if (!CANT_MOVE (insn)
2129 && (!IS_SPECULATIVE_INSN (insn)
2130 || (insn_issue_delay (insn) <= 3
2131 && check_live (insn, bb_src)
2132 && is_exception_free (insn, bb_src, target_bb))))
2136 /* Note that we havn't squirrled away the notes for
2137 blocks other than the current. So if this is a
2138 speculative insn, NEXT might otherwise be a note. */
2139 next = next_nonnote_insn (insn);
2140 if (INSN_DEP_COUNT (insn) == 0
2142 || SCHED_GROUP_P (next) == 0
2143 || ! INSN_P (next)))
2144 ready_add (ready, insn);
2150 /* Called after taking INSN from the ready list. Returns nonzero if this
2151 insn can be scheduled, nonzero if we should silently discard it. */
2154 can_schedule_ready_p (insn)
2157 /* An interblock motion? */
2158 if (INSN_BB (insn) != target_bb)
2163 if (IS_SPECULATIVE_INSN (insn))
2165 if (!check_live (insn, INSN_BB (insn)))
2167 update_live (insn, INSN_BB (insn));
2169 /* For speculative load, mark insns fed by it. */
2170 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2171 set_spec_fed (insn);
2177 /* Find the beginning of the scheduling group. */
2178 /* ??? Ought to update basic block here, but later bits of
2179 schedule_block assumes the original insn block is
2183 while (SCHED_GROUP_P (temp))
2184 temp = PREV_INSN (temp);
2186 /* Update source block boundaries. */
2187 b1 = BLOCK_FOR_INSN (temp);
2188 if (temp == b1->head && insn == b1->end)
2190 /* We moved all the insns in the basic block.
2191 Emit a note after the last insn and update the
2192 begin/end boundaries to point to the note. */
2193 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2197 else if (insn == b1->end)
2199 /* We took insns from the end of the basic block,
2200 so update the end of block boundary so that it
2201 points to the first insn we did not move. */
2202 b1->end = PREV_INSN (temp);
2204 else if (temp == b1->head)
2206 /* We took insns from the start of the basic block,
2207 so update the start of block boundary so that
2208 it points to the first insn we did not move. */
2209 b1->head = NEXT_INSN (insn);
2214 /* In block motion. */
2215 sched_target_n_insns++;
2222 /* Called after INSN has all its dependencies resolved. Return nonzero
2223 if it should be moved to the ready list or the queue, or zero if we
2224 should silently discard it. */
2229 /* For speculative insns, before inserting to ready/queue,
2230 check live, exception-free, and issue-delay. */
2231 if (INSN_BB (next) != target_bb
2232 && (!IS_VALID (INSN_BB (next))
2234 || (IS_SPECULATIVE_INSN (next)
2235 && (insn_issue_delay (next) > 3
2236 || !check_live (next, INSN_BB (next))
2237 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2242 /* Return a string that contains the insn uid and optionally anything else
2243 necessary to identify this insn in an output. It's valid to use a
2244 static buffer for this. The ALIGNED parameter should cause the string
2245 to be formatted so that multiple output lines will line up nicely. */
2248 rgn_print_insn (insn, aligned)
2252 static char tmp[80];
2255 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2258 sprintf (tmp, "%d", INSN_UID (insn));
2259 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2260 sprintf (tmp, "/b%d ", INSN_BB (insn));
2265 /* Compare priority of two insns. Return a positive number if the second
2266 insn is to be preferred for scheduling, and a negative one if the first
2267 is to be preferred. Zero if they are equally good. */
2270 rgn_rank (insn1, insn2)
2273 /* Some comparison make sense in interblock scheduling only. */
2274 if (INSN_BB (insn1) != INSN_BB (insn2))
2276 int spec_val, prob_val;
2278 /* Prefer an inblock motion on an interblock motion. */
2279 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2281 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2284 /* Prefer a useful motion on a speculative one. */
2285 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2289 /* Prefer a more probable (speculative) insn. */
2290 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2297 /* Used in schedule_insns to initialize current_sched_info for scheduling
2298 regions (or single basic blocks). */
2300 static struct sched_info region_sched_info =
2303 can_schedule_ready_p,
2314 /* Add dependences so that branches are scheduled to run last in their
2318 add_branch_dependences (head, tail)
2323 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2324 to remain in order at the end of the block by adding dependencies and
2325 giving the last a high priority. There may be notes present, and
2326 prev_head may also be a note.
2328 Branches must obviously remain at the end. Calls should remain at the
2329 end since moving them results in worse register allocation. Uses remain
2330 at the end to ensure proper register allocation. cc0 setters remaim
2331 at the end because they can't be moved away from their cc0 user. */
2334 while (GET_CODE (insn) == CALL_INSN
2335 || GET_CODE (insn) == JUMP_INSN
2336 || (GET_CODE (insn) == INSN
2337 && (GET_CODE (PATTERN (insn)) == USE
2338 || GET_CODE (PATTERN (insn)) == CLOBBER
2340 || sets_cc0_p (PATTERN (insn))
2343 || GET_CODE (insn) == NOTE)
2345 if (GET_CODE (insn) != NOTE)
2348 && !find_insn_list (insn, LOG_LINKS (last)))
2350 add_dependence (last, insn, REG_DEP_ANTI);
2351 INSN_REF_COUNT (insn)++;
2354 CANT_MOVE (insn) = 1;
2357 /* Skip over insns that are part of a group.
2358 Make each insn explicitly depend on the previous insn.
2359 This ensures that only the group header will ever enter
2360 the ready queue (and, when scheduled, will automatically
2361 schedule the SCHED_GROUP_P block). */
2362 while (SCHED_GROUP_P (insn))
2364 rtx temp = prev_nonnote_insn (insn);
2365 add_dependence (insn, temp, REG_DEP_ANTI);
2370 /* Don't overrun the bounds of the basic block. */
2374 insn = PREV_INSN (insn);
2377 /* Make sure these insns are scheduled last in their block. */
2380 while (insn != head)
2382 insn = prev_nonnote_insn (insn);
2384 if (INSN_REF_COUNT (insn) != 0)
2387 add_dependence (last, insn, REG_DEP_ANTI);
2388 INSN_REF_COUNT (insn) = 1;
2390 /* Skip over insns that are part of a group. */
2391 while (SCHED_GROUP_P (insn))
2392 insn = prev_nonnote_insn (insn);
2396 /* Data structures for the computation of data dependences in a regions. We
2397 keep one `deps' structure for every basic block. Before analyzing the
2398 data dependences for a bb, its variables are initialized as a function of
2399 the variables of its predecessors. When the analysis for a bb completes,
2400 we save the contents to the corresponding bb_deps[bb] variable. */
2402 static struct deps *bb_deps;
2404 /* After computing the dependencies for block BB, propagate the dependencies
2405 found in TMP_DEPS to the successors of the block. MAX_REG is the number
2408 propagate_deps (bb, tmp_deps, max_reg)
2410 struct deps *tmp_deps;
2413 int b = BB_TO_BLOCK (bb);
2416 rtx link_insn, link_mem;
2419 /* These lists should point to the right place, for correct
2421 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2422 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2423 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2424 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2426 /* bb's structures are inherited by its successors. */
2427 first_edge = e = OUT_EDGES (b);
2434 int b_succ = TO_BLOCK (e);
2435 int bb_succ = BLOCK_TO_BB (b_succ);
2436 struct deps *succ_deps = bb_deps + bb_succ;
2438 /* Only bbs "below" bb, in the same region, are interesting. */
2439 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2446 for (reg = 0; reg < max_reg; reg++)
2448 /* reg-last-uses lists are inherited by bb_succ. */
2449 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
2451 if (find_insn_list (XEXP (u, 0),
2452 succ_deps->reg_last_uses[reg]))
2455 succ_deps->reg_last_uses[reg]
2456 = alloc_INSN_LIST (XEXP (u, 0),
2457 succ_deps->reg_last_uses[reg]);
2460 /* reg-last-defs lists are inherited by bb_succ. */
2461 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
2463 if (find_insn_list (XEXP (u, 0),
2464 succ_deps->reg_last_sets[reg]))
2467 succ_deps->reg_last_sets[reg]
2468 = alloc_INSN_LIST (XEXP (u, 0),
2469 succ_deps->reg_last_sets[reg]);
2472 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
2474 if (find_insn_list (XEXP (u, 0),
2475 succ_deps->reg_last_clobbers[reg]))
2478 succ_deps->reg_last_clobbers[reg]
2479 = alloc_INSN_LIST (XEXP (u, 0),
2480 succ_deps->reg_last_clobbers[reg]);
2484 /* Mem read/write lists are inherited by bb_succ. */
2485 link_insn = tmp_deps->pending_read_insns;
2486 link_mem = tmp_deps->pending_read_mems;
2489 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2491 succ_deps->pending_read_insns,
2492 succ_deps->pending_read_mems)))
2493 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2494 &succ_deps->pending_read_mems,
2495 XEXP (link_insn, 0), XEXP (link_mem, 0));
2496 link_insn = XEXP (link_insn, 1);
2497 link_mem = XEXP (link_mem, 1);
2500 link_insn = tmp_deps->pending_write_insns;
2501 link_mem = tmp_deps->pending_write_mems;
2504 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2506 succ_deps->pending_write_insns,
2507 succ_deps->pending_write_mems)))
2508 add_insn_mem_dependence (succ_deps,
2509 &succ_deps->pending_write_insns,
2510 &succ_deps->pending_write_mems,
2511 XEXP (link_insn, 0), XEXP (link_mem, 0));
2513 link_insn = XEXP (link_insn, 1);
2514 link_mem = XEXP (link_mem, 1);
2517 /* last_function_call is inherited by bb_succ. */
2518 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
2520 if (find_insn_list (XEXP (u, 0),
2521 succ_deps->last_function_call))
2524 succ_deps->last_function_call
2525 = alloc_INSN_LIST (XEXP (u, 0),
2526 succ_deps->last_function_call);
2529 /* last_pending_memory_flush is inherited by bb_succ. */
2530 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
2532 if (find_insn_list (XEXP (u, 0),
2533 succ_deps->last_pending_memory_flush))
2536 succ_deps->last_pending_memory_flush
2537 = alloc_INSN_LIST (XEXP (u, 0),
2538 succ_deps->last_pending_memory_flush);
2541 /* sched_before_next_call is inherited by bb_succ. */
2542 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2543 for (; x; x = XEXP (x, 1))
2544 add_dependence (succ_deps->sched_before_next_call,
2545 XEXP (x, 0), REG_DEP_ANTI);
2549 while (e != first_edge);
2552 /* Compute backward dependences inside bb. In a multiple blocks region:
2553 (1) a bb is analyzed after its predecessors, and (2) the lists in
2554 effect at the end of bb (after analyzing for bb) are inherited by
2557 Specifically for reg-reg data dependences, the block insns are
2558 scanned by sched_analyze () top-to-bottom. Two lists are
2559 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
2560 and reg_last_uses[] for register USEs.
2562 When analysis is completed for bb, we update for its successors:
2563 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2564 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2566 The mechanism for computing mem-mem data dependence is very
2567 similar, and the result is interblock dependences in the region. */
2570 compute_block_backward_dependences (bb)
2574 int max_reg = max_reg_num ();
2575 struct deps tmp_deps;
2577 tmp_deps = bb_deps[bb];
2579 /* Do the analysis for this block. */
2580 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2581 sched_analyze (&tmp_deps, head, tail);
2582 add_branch_dependences (head, tail);
2584 if (current_nr_blocks > 1)
2585 propagate_deps (bb, &tmp_deps, max_reg);
2587 /* Free up the INSN_LISTs. */
2588 free_deps (&tmp_deps);
2590 /* Assert that we won't need bb_reg_last_* for this block anymore. */
2591 free (bb_deps[bb].reg_last_uses);
2592 free (bb_deps[bb].reg_last_sets);
2593 free (bb_deps[bb].reg_last_clobbers);
2594 bb_deps[bb].reg_last_uses = 0;
2595 bb_deps[bb].reg_last_sets = 0;
2596 bb_deps[bb].reg_last_clobbers = 0;
2598 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2599 them to the unused_*_list variables, so that they can be reused. */
2602 free_pending_lists ()
2606 for (bb = 0; bb < current_nr_blocks; bb++)
2608 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2609 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2610 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2611 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2615 /* Print dependences for debugging, callable from debugger. */
2618 debug_dependencies ()
2622 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2623 for (bb = 0; bb < current_nr_blocks; bb++)
2631 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2632 next_tail = NEXT_INSN (tail);
2633 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2634 BB_TO_BLOCK (bb), bb);
2636 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2637 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2638 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2639 "----", "----", "--", "---", "----", "----", "--------", "-----");
2640 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2645 if (! INSN_P (insn))
2648 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2649 if (GET_CODE (insn) == NOTE)
2651 n = NOTE_LINE_NUMBER (insn);
2653 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2655 fprintf (sched_dump, "line %d, file %s\n", n,
2656 NOTE_SOURCE_FILE (insn));
2659 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2663 unit = insn_unit (insn);
2665 || function_units[unit].blockage_range_function == 0) ? 0 :
2666 function_units[unit].blockage_range_function (insn);
2667 fprintf (sched_dump,
2668 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2669 (SCHED_GROUP_P (insn) ? "+" : " "),
2673 INSN_DEP_COUNT (insn),
2674 INSN_PRIORITY (insn),
2675 insn_cost (insn, 0, 0),
2676 (int) MIN_BLOCKAGE_COST (range),
2677 (int) MAX_BLOCKAGE_COST (range));
2678 insn_print_units (insn);
2679 fprintf (sched_dump, "\t: ");
2680 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2681 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2682 fprintf (sched_dump, "\n");
2686 fprintf (sched_dump, "\n");
2689 /* Schedule a region. A region is either an inner loop, a loop-free
2690 subroutine, or a single basic block. Each bb in the region is
2691 scheduled after its flow predecessors. */
2694 schedule_region (rgn)
2698 int rgn_n_insns = 0;
2699 int sched_rgn_n_insns = 0;
2701 /* Set variables for the current region. */
2702 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2703 current_blocks = RGN_BLOCKS (rgn);
2705 init_deps_global ();
2707 /* Initializations for region data dependence analyisis. */
2708 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2709 for (bb = 0; bb < current_nr_blocks; bb++)
2710 init_deps (bb_deps + bb);
2712 /* Compute LOG_LINKS. */
2713 for (bb = 0; bb < current_nr_blocks; bb++)
2714 compute_block_backward_dependences (bb);
2716 /* Compute INSN_DEPEND. */
2717 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2720 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2722 compute_forward_dependences (head, tail);
2725 /* Set priorities. */
2726 for (bb = 0; bb < current_nr_blocks; bb++)
2727 rgn_n_insns += set_priorities (BB_TO_BLOCK (bb));
2729 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2730 if (current_nr_blocks > 1)
2734 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2736 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2737 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2738 for (i = 0; i < current_nr_blocks; i++)
2739 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2743 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2744 for (i = 1; i < nr_edges; i++)
2745 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2746 EDGE_TO_BIT (i) = rgn_nr_edges++;
2747 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2750 for (i = 1; i < nr_edges; i++)
2751 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2752 rgn_edges[rgn_nr_edges++] = i;
2755 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2756 edgeset_bitsize = rgn_nr_edges;
2757 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2759 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2760 for (i = 0; i < current_nr_blocks; i++)
2763 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2765 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2768 /* Compute probabilities, dominators, split_edges. */
2769 for (bb = 0; bb < current_nr_blocks; bb++)
2770 compute_dom_prob_ps (bb);
2773 /* Now we can schedule all blocks. */
2774 for (bb = 0; bb < current_nr_blocks; bb++)
2777 int b = BB_TO_BLOCK (bb);
2779 get_block_head_tail (b, &head, &tail);
2781 if (no_real_insns_p (head, tail))
2784 current_sched_info->prev_head = PREV_INSN (head);
2785 current_sched_info->next_tail = NEXT_INSN (tail);
2787 if (write_symbols != NO_DEBUG)
2789 save_line_notes (b);
2793 /* rm_other_notes only removes notes which are _inside_ the
2794 block---that is, it won't remove notes before the first real insn
2795 or after the last real insn of the block. So if the first insn
2796 has a REG_SAVE_NOTE which would otherwise be emitted before the
2797 insn, it is redundant with the note before the start of the
2798 block, and so we have to take it out.
2800 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
2801 referencing NOTE_INSN_SETJMP at the end of the block. */
2806 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2807 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2809 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
2811 remove_note (head, note);
2812 note = XEXP (note, 1);
2813 remove_note (head, note);
2816 note = XEXP (note, 1);
2820 /* Remove remaining note insns from the block, save them in
2821 note_list. These notes are restored at the end of
2822 schedule_block (). */
2823 rm_other_notes (head, tail);
2827 current_sched_info->queue_must_finish_empty
2828 = current_nr_blocks > 1 && !flag_schedule_interblock;
2830 schedule_block (b, rgn_n_insns);
2831 sched_rgn_n_insns += sched_n_insns;
2833 /* Update target block boundaries. */
2834 if (head == BLOCK_HEAD (b))
2835 BLOCK_HEAD (b) = current_sched_info->head;
2836 if (tail == BLOCK_END (b))
2837 BLOCK_END (b) = current_sched_info->tail;
2840 if (current_nr_blocks > 1)
2842 free (candidate_table);
2844 free (bitlst_table);
2848 /* Sanity check: verify that all region insns were scheduled. */
2849 if (sched_rgn_n_insns != rgn_n_insns)
2852 /* Restore line notes. */
2853 if (write_symbols != NO_DEBUG)
2855 for (bb = 0; bb < current_nr_blocks; bb++)
2856 restore_line_notes (BB_TO_BLOCK (bb));
2859 /* Done with this region. */
2860 free_pending_lists ();
2862 finish_deps_global ();
2866 if (current_nr_blocks > 1)
2871 for (i = 0; i < current_nr_blocks; ++i)
2874 free (pot_split[i]);
2875 free (ancestor_edges[i]);
2881 free (ancestor_edges);
2885 /* Indexed by region, holds the number of death notes found in that region.
2886 Used for consistency checks. */
2887 static int *deaths_in_region;
2889 /* Initialize data structures for region scheduling. */
2898 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2899 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2900 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2901 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2903 blocks = sbitmap_alloc (n_basic_blocks);
2905 /* Compute regions for scheduling. */
2906 if (reload_completed
2907 || n_basic_blocks == 1
2908 || !flag_schedule_interblock)
2910 find_single_block_region ();
2914 /* Verify that a 'good' control flow graph can be built. */
2915 if (is_cfg_nonregular ())
2917 find_single_block_region ();
2922 struct edge_list *edge_list;
2924 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2926 /* The scheduler runs after flow; therefore, we can't blindly call
2927 back into find_basic_blocks since doing so could invalidate the
2928 info in global_live_at_start.
2930 Consider a block consisting entirely of dead stores; after life
2931 analysis it would be a block of NOTE_INSN_DELETED notes. If
2932 we call find_basic_blocks again, then the block would be removed
2933 entirely and invalidate our the register live information.
2935 We could (should?) recompute register live information. Doing
2936 so may even be beneficial. */
2937 edge_list = create_edge_list ();
2939 /* Compute the dominators and post dominators. */
2940 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
2942 /* build_control_flow will return nonzero if it detects unreachable
2943 blocks or any other irregularity with the cfg which prevents
2944 cross block scheduling. */
2945 if (build_control_flow (edge_list) != 0)
2946 find_single_block_region ();
2948 find_rgns (edge_list, dom);
2950 if (sched_verbose >= 3)
2953 /* We are done with flow's edge list. */
2954 free_edge_list (edge_list);
2956 /* For now. This will move as more and more of haifa is converted
2957 to using the cfg code in flow.c. */
2962 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2964 /* Remove all death notes from the subroutine. */
2965 for (rgn = 0; rgn < nr_regions; rgn++)
2969 sbitmap_zero (blocks);
2970 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2971 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2973 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2976 sbitmap_free (blocks);
2979 /* The one entry point in this file. DUMP_FILE is the dump file for
2983 schedule_insns (dump_file)
2986 sbitmap large_region_blocks, blocks;
2988 int any_large_regions;
2990 /* Taking care of this degenerate case makes the rest of
2991 this code simpler. */
2992 if (n_basic_blocks == 0)
2998 sched_init (dump_file);
3002 current_sched_info = ®ion_sched_info;
3004 /* Schedule every region in the subroutine. */
3005 for (rgn = 0; rgn < nr_regions; rgn++)
3006 schedule_region (rgn);
3008 /* Update life analysis for the subroutine. Do single block regions
3009 first so that we can verify that live_at_start didn't change. Then
3010 do all other blocks. */
3011 /* ??? There is an outside possibility that update_life_info, or more
3012 to the point propagate_block, could get called with non-zero flags
3013 more than once for one basic block. This would be kinda bad if it
3014 were to happen, since REG_INFO would be accumulated twice for the
3015 block, and we'd have twice the REG_DEAD notes.
3017 I'm fairly certain that this _shouldn't_ happen, since I don't think
3018 that live_at_start should change at region heads. Not sure what the
3019 best way to test for this kind of thing... */
3021 allocate_reg_life_data ();
3022 compute_bb_for_insn (get_max_uid ());
3024 any_large_regions = 0;
3025 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3026 sbitmap_ones (large_region_blocks);
3028 blocks = sbitmap_alloc (n_basic_blocks);
3030 for (rgn = 0; rgn < nr_regions; rgn++)
3031 if (RGN_NR_BLOCKS (rgn) > 1)
3032 any_large_regions = 1;
3035 sbitmap_zero (blocks);
3036 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3037 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3039 /* Don't update reg info after reload, since that affects
3040 regs_ever_live, which should not change after reload. */
3041 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3042 (reload_completed ? PROP_DEATH_NOTES
3043 : PROP_DEATH_NOTES | PROP_REG_INFO));
3045 #ifndef HAVE_conditional_execution
3046 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3047 a count of the conditional plus unconditional deaths for this to
3049 /* In the single block case, the count of registers that died should
3050 not have changed during the schedule. */
3051 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3056 if (any_large_regions)
3058 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3059 PROP_DEATH_NOTES | PROP_REG_INFO);
3062 /* Reposition the prologue and epilogue notes in case we moved the
3063 prologue/epilogue insns. */
3064 if (reload_completed)
3065 reposition_prologue_and_epilogue_notes (get_insns ());
3067 /* Delete redundant line notes. */
3068 if (write_symbols != NO_DEBUG)
3069 rm_redundant_line_notes ();
3073 if (reload_completed == 0 && flag_schedule_interblock)
3075 fprintf (sched_dump,
3076 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3084 fprintf (sched_dump, "\n\n");
3089 free (rgn_bb_table);
3091 free (containing_rgn);
3112 sbitmap_free (blocks);
3113 sbitmap_free (large_region_blocks);
3115 free (deaths_in_region);