1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free the
21 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
66 #ifdef INSN_SCHEDULING
67 /* Some accessor macros for h_i_d members only used within this file. */
68 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
69 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
70 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
72 #define MAX_RGN_BLOCKS 10
73 #define MAX_RGN_INSNS 100
75 /* nr_inter/spec counts interblock/speculative motion for the function. */
76 static int nr_inter, nr_spec;
78 /* Control flow graph edges are kept in circular lists. */
87 static haifa_edge *edge_table;
89 #define NEXT_IN(edge) (edge_table[edge].next_in)
90 #define NEXT_OUT(edge) (edge_table[edge].next_out)
91 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
92 #define TO_BLOCK(edge) (edge_table[edge].to_block)
94 /* Number of edges in the control flow graph. (In fact, larger than
95 that by 1, since edge 0 is unused.) */
98 /* Circular list of incoming/outgoing edges of a block. */
100 static int *out_edges;
102 #define IN_EDGES(block) (in_edges[block])
103 #define OUT_EDGES(block) (out_edges[block])
105 static int is_cfg_nonregular PARAMS ((void));
106 static int build_control_flow PARAMS ((struct edge_list *));
107 static void new_edge PARAMS ((int, int));
109 /* A region is the main entity for interblock scheduling: insns
110 are allowed to move between blocks in the same region, along
111 control flow graph edges, in the 'up' direction. */
114 int rgn_nr_blocks; /* Number of blocks in region. */
115 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
119 /* Number of regions in the procedure. */
120 static int nr_regions;
122 /* Table of region descriptions. */
123 static region *rgn_table;
125 /* Array of lists of regions' blocks. */
126 static int *rgn_bb_table;
128 /* Topological order of blocks in the region (if b2 is reachable from
129 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
130 always referred to by either block or b, while its topological
131 order name (in the region) is refered to by bb. */
132 static int *block_to_bb;
134 /* The number of the region containing a block. */
135 static int *containing_rgn;
137 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
138 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
139 #define BLOCK_TO_BB(block) (block_to_bb[block])
140 #define CONTAINING_RGN(block) (containing_rgn[block])
142 void debug_regions PARAMS ((void));
143 static void find_single_block_region PARAMS ((void));
144 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
145 static int too_large PARAMS ((int, int *, int *));
147 extern void debug_live PARAMS ((int, int));
149 /* Blocks of the current region being scheduled. */
150 static int current_nr_blocks;
151 static int current_blocks;
153 /* The mapping from bb to block. */
154 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
156 /* Bit vectors and bitset operations are needed for computations on
157 the control flow graph. */
159 typedef unsigned HOST_WIDE_INT *bitset;
162 int *first_member; /* Pointer to the list start in bitlst_table. */
163 int nr_members; /* The number of members of the bit list. */
167 static int bitlst_table_last;
168 static int bitlst_table_size;
169 static int *bitlst_table;
171 static char bitset_member PARAMS ((bitset, int, int));
172 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
174 /* Target info declarations.
176 The block currently being scheduled is referred to as the "target" block,
177 while other blocks in the region from which insns can be moved to the
178 target are called "source" blocks. The candidate structure holds info
179 about such sources: are they valid? Speculative? Etc. */
180 typedef bitlst bblst;
191 static candidate *candidate_table;
193 /* A speculative motion requires checking live information on the path
194 from 'source' to 'target'. The split blocks are those to be checked.
195 After a speculative motion, live information should be modified in
198 Lists of split and update blocks for each candidate of the current
199 target are in array bblst_table. */
200 static int *bblst_table, bblst_size, bblst_last;
202 #define IS_VALID(src) ( candidate_table[src].is_valid )
203 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
204 #define SRC_PROB(src) ( candidate_table[src].src_prob )
206 /* The bb being currently scheduled. */
207 static int target_bb;
210 typedef bitlst edgelst;
212 /* Target info functions. */
213 static void split_edges PARAMS ((int, int, edgelst *));
214 static void compute_trg_info PARAMS ((int));
215 void debug_candidate PARAMS ((int));
216 void debug_candidates PARAMS ((int));
218 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
219 typedef bitset bbset;
221 /* Number of words of the bbset. */
222 static int bbset_size;
224 /* Dominators array: dom[i] contains the bbset of dominators of
225 bb i in the region. */
228 /* bb 0 is the only region entry. */
229 #define IS_RGN_ENTRY(bb) (!bb)
231 /* Is bb_src dominated by bb_trg. */
232 #define IS_DOMINATED(bb_src, bb_trg) \
233 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
235 /* Probability: Prob[i] is a float in [0, 1] which is the probability
236 of bb i relative to the region entry. */
239 /* The probability of bb_src, relative to bb_trg. Note, that while the
240 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
245 /* Bit-set of edges, where bit i stands for edge i. */
246 typedef bitset edgeset;
248 /* Number of edges in the region. */
249 static int rgn_nr_edges;
251 /* Array of size rgn_nr_edges. */
252 static int *rgn_edges;
254 /* Number of words in an edgeset. */
255 static int edgeset_size;
257 /* Number of bits in an edgeset. */
258 static int edgeset_bitsize;
260 /* Mapping from each edge in the graph to its number in the rgn. */
261 static int *edge_to_bit;
262 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
264 /* The split edges of a source bb is different for each target
265 bb. In order to compute this efficiently, the 'potential-split edges'
266 are computed for each bb prior to scheduling a region. This is actually
267 the split edges of each bb relative to the region entry.
269 pot_split[bb] is the set of potential split edges of bb. */
270 static edgeset *pot_split;
272 /* For every bb, a set of its ancestor edges. */
273 static edgeset *ancestor_edges;
275 static void compute_dom_prob_ps PARAMS ((int));
277 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
278 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
280 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
282 /* Parameters affecting the decision of rank_for_schedule().
283 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
284 #define MIN_DIFF_PRIORITY 2
285 #define MIN_PROBABILITY 40
286 #define MIN_PROB_DIFF 10
288 /* Speculative scheduling functions. */
289 static int check_live_1 PARAMS ((int, rtx));
290 static void update_live_1 PARAMS ((int, rtx));
291 static int check_live PARAMS ((rtx, int));
292 static void update_live PARAMS ((rtx, int));
293 static void set_spec_fed PARAMS ((rtx));
294 static int is_pfree PARAMS ((rtx, int, int));
295 static int find_conditional_protection PARAMS ((rtx, int));
296 static int is_conditionally_protected PARAMS ((rtx, int, int));
297 static int may_trap_exp PARAMS ((rtx, int));
298 static int haifa_classify_insn PARAMS ((rtx));
299 static int is_prisky PARAMS ((rtx, int, int));
300 static int is_exception_free PARAMS ((rtx, int, int));
302 static void add_branch_dependences PARAMS ((rtx, rtx));
303 static void compute_block_backward_dependences PARAMS ((int));
304 void debug_dependencies PARAMS ((void));
306 static void init_regions PARAMS ((void));
307 static void schedule_region PARAMS ((int));
308 static void propagate_deps PARAMS ((int, struct deps *));
309 static void free_pending_lists PARAMS ((void));
311 /* Functions for construction of the control flow graph. */
313 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
315 We decide not to build the control flow graph if there is possibly more
316 than one entry to the function, if computed branches exist, of if we
317 have nonlocal gotos. */
326 /* If we have a label that could be the target of a nonlocal goto, then
327 the cfg is not well structured. */
328 if (nonlocal_goto_handler_labels)
331 /* If we have any forced labels, then the cfg is not well structured. */
335 /* If this function has a computed jump, then we consider the cfg
336 not well structured. */
337 if (current_function_has_computed_jump)
340 /* If we have exception handlers, then we consider the cfg not well
341 structured. ?!? We should be able to handle this now that flow.c
342 computes an accurate cfg for EH. */
343 if (exception_handler_labels)
346 /* If we have non-jumping insns which refer to labels, then we consider
347 the cfg not well structured. */
348 /* Check for labels referred to other thn by jumps. */
349 for (b = 0; b < n_basic_blocks; b++)
350 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
352 code = GET_CODE (insn);
353 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
355 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
358 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
359 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
364 if (insn == BLOCK_END (b))
368 /* All the tests passed. Consider the cfg well structured. */
372 /* Build the control flow graph and set nr_edges.
374 Instead of trying to build a cfg ourselves, we rely on flow to
375 do it for us. Stamp out useless code (and bug) duplication.
377 Return nonzero if an irregularity in the cfg is found which would
378 prevent cross block scheduling. */
381 build_control_flow (edge_list)
382 struct edge_list *edge_list;
384 int i, unreachable, num_edges;
386 /* This already accounts for entry/exit edges. */
387 num_edges = NUM_EDGES (edge_list);
389 /* Unreachable loops with more than one basic block are detected
390 during the DFS traversal in find_rgns.
392 Unreachable loops with a single block are detected here. This
393 test is redundant with the one in find_rgns, but it's much
394 cheaper to go ahead and catch the trivial case here. */
396 for (i = 0; i < n_basic_blocks; i++)
398 basic_block b = BASIC_BLOCK (i);
401 || (b->pred->src == b
402 && b->pred->pred_next == NULL))
406 /* ??? We can kill these soon. */
407 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
408 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
409 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
412 for (i = 0; i < num_edges; i++)
414 edge e = INDEX_EDGE (edge_list, i);
416 if (e->dest != EXIT_BLOCK_PTR
417 && e->src != ENTRY_BLOCK_PTR)
418 new_edge (e->src->index, e->dest->index);
421 /* Increment by 1, since edge 0 is unused. */
427 /* Record an edge in the control flow graph from SOURCE to TARGET.
429 In theory, this is redundant with the s_succs computed above, but
430 we have not converted all of haifa to use information from the
434 new_edge (source, target)
438 int curr_edge, fst_edge;
440 /* Check for duplicates. */
441 fst_edge = curr_edge = OUT_EDGES (source);
444 if (FROM_BLOCK (curr_edge) == source
445 && TO_BLOCK (curr_edge) == target)
450 curr_edge = NEXT_OUT (curr_edge);
452 if (fst_edge == curr_edge)
458 FROM_BLOCK (e) = source;
459 TO_BLOCK (e) = target;
461 if (OUT_EDGES (source))
463 next_edge = NEXT_OUT (OUT_EDGES (source));
464 NEXT_OUT (OUT_EDGES (source)) = e;
465 NEXT_OUT (e) = next_edge;
469 OUT_EDGES (source) = e;
473 if (IN_EDGES (target))
475 next_edge = NEXT_IN (IN_EDGES (target));
476 NEXT_IN (IN_EDGES (target)) = e;
477 NEXT_IN (e) = next_edge;
481 IN_EDGES (target) = e;
486 /* BITSET macros for operations on the control flow graph. */
488 /* Compute bitwise union of two bitsets. */
489 #define BITSET_UNION(set1, set2, len) \
490 do { register bitset tp = set1, sp = set2; \
492 for (i = 0; i < len; i++) \
493 *(tp++) |= *(sp++); } while (0)
495 /* Compute bitwise intersection of two bitsets. */
496 #define BITSET_INTER(set1, set2, len) \
497 do { register bitset tp = set1, sp = set2; \
499 for (i = 0; i < len; i++) \
500 *(tp++) &= *(sp++); } while (0)
502 /* Compute bitwise difference of two bitsets. */
503 #define BITSET_DIFFER(set1, set2, len) \
504 do { register bitset tp = set1, sp = set2; \
506 for (i = 0; i < len; i++) \
507 *(tp++) &= ~*(sp++); } while (0)
509 /* Inverts every bit of bitset 'set'. */
510 #define BITSET_INVERT(set, len) \
511 do { register bitset tmpset = set; \
513 for (i = 0; i < len; i++, tmpset++) \
514 *tmpset = ~*tmpset; } while (0)
516 /* Turn on the index'th bit in bitset set. */
517 #define BITSET_ADD(set, index, len) \
519 if (index >= HOST_BITS_PER_WIDE_INT * len) \
522 set[index/HOST_BITS_PER_WIDE_INT] |= \
523 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT); \
526 /* Turn off the index'th bit in set. */
527 #define BITSET_REMOVE(set, index, len) \
529 if (index >= HOST_BITS_PER_WIDE_INT * len) \
532 set[index/HOST_BITS_PER_WIDE_INT] &= \
533 ~(((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT)); \
536 /* Check if the index'th bit in bitset set is on. */
539 bitset_member (set, index, len)
543 if (index >= HOST_BITS_PER_WIDE_INT * len)
545 return ((set[index / HOST_BITS_PER_WIDE_INT] &
546 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT))
550 /* Translate a bit-set SET to a list BL of the bit-set members. */
553 extract_bitlst (set, len, bitlen, bl)
560 unsigned HOST_WIDE_INT word;
562 /* bblst table space is reused in each call to extract_bitlst. */
563 bitlst_table_last = 0;
565 bl->first_member = &bitlst_table[bitlst_table_last];
568 /* Iterate over each word in the bitset. */
569 for (i = 0; i < len; i++)
572 offset = i * HOST_BITS_PER_WIDE_INT;
574 /* Iterate over each bit in the word, but do not
575 go beyond the end of the defined bits. */
576 for (j = 0; offset < bitlen && word; j++)
580 bitlst_table[bitlst_table_last++] = offset;
590 /* Functions for the construction of regions. */
592 /* Print the regions, for debugging purposes. Callable from debugger. */
599 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
600 for (rgn = 0; rgn < nr_regions; rgn++)
602 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
603 rgn_table[rgn].rgn_nr_blocks);
604 fprintf (sched_dump, ";;\tbb/block: ");
606 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
608 current_blocks = RGN_BLOCKS (rgn);
610 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
613 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
616 fprintf (sched_dump, "\n\n");
620 /* Build a single block region for each basic block in the function.
621 This allows for using the same code for interblock and basic block
625 find_single_block_region ()
629 for (i = 0; i < n_basic_blocks; i++)
632 RGN_NR_BLOCKS (i) = 1;
634 CONTAINING_RGN (i) = i;
637 nr_regions = n_basic_blocks;
640 /* Update number of blocks and the estimate for number of insns
641 in the region. Return 1 if the region is "too large" for interblock
642 scheduling (compile time considerations), otherwise return 0. */
645 too_large (block, num_bbs, num_insns)
646 int block, *num_bbs, *num_insns;
649 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
650 INSN_LUID (BLOCK_HEAD (block)));
651 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
657 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
658 is still an inner loop. Put in max_hdr[blk] the header of the most inner
659 loop containing blk. */
660 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
662 if (max_hdr[blk] == -1) \
663 max_hdr[blk] = hdr; \
664 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
665 RESET_BIT (inner, hdr); \
666 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
668 RESET_BIT (inner,max_hdr[blk]); \
669 max_hdr[blk] = hdr; \
673 /* Find regions for interblock scheduling.
675 A region for scheduling can be:
677 * A loop-free procedure, or
679 * A reducible inner loop, or
681 * A basic block not contained in any other region.
683 ?!? In theory we could build other regions based on extended basic
684 blocks or reverse extended basic blocks. Is it worth the trouble?
686 Loop blocks that form a region are put into the region's block list
687 in topological order.
689 This procedure stores its results into the following global (ick) variables
697 We use dominator relationships to avoid making regions out of non-reducible
700 This procedure needs to be converted to work on pred/succ lists instead
701 of edge tables. That would simplify it somewhat. */
704 find_rgns (edge_list, dom)
705 struct edge_list *edge_list;
708 int *max_hdr, *dfs_nr, *stack, *degree;
710 int node, child, loop_head, i, head, tail;
711 int count = 0, sp, idx = 0, current_edge = out_edges[0];
712 int num_bbs, num_insns, unreachable;
713 int too_large_failure;
715 /* Note if an edge has been passed. */
718 /* Note if a block is a natural loop header. */
721 /* Note if a block is an natural inner loop header. */
724 /* Note if a block is in the block queue. */
727 /* Note if a block is in the block queue. */
730 int num_edges = NUM_EDGES (edge_list);
732 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
733 and a mapping from block to its loop header (if the block is contained
736 Store results in HEADER, INNER, and MAX_HDR respectively, these will
737 be used as inputs to the second traversal.
739 STACK, SP and DFS_NR are only used during the first traversal. */
741 /* Allocate and initialize variables for the first traversal. */
742 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
743 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
744 stack = (int *) xmalloc (nr_edges * sizeof (int));
746 inner = sbitmap_alloc (n_basic_blocks);
747 sbitmap_ones (inner);
749 header = sbitmap_alloc (n_basic_blocks);
750 sbitmap_zero (header);
752 passed = sbitmap_alloc (nr_edges);
753 sbitmap_zero (passed);
755 in_queue = sbitmap_alloc (n_basic_blocks);
756 sbitmap_zero (in_queue);
758 in_stack = sbitmap_alloc (n_basic_blocks);
759 sbitmap_zero (in_stack);
761 for (i = 0; i < n_basic_blocks; i++)
764 /* DFS traversal to find inner loops in the cfg. */
769 if (current_edge == 0 || TEST_BIT (passed, current_edge))
771 /* We have reached a leaf node or a node that was already
772 processed. Pop edges off the stack until we find
773 an edge that has not yet been processed. */
775 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
777 /* Pop entry off the stack. */
778 current_edge = stack[sp--];
779 node = FROM_BLOCK (current_edge);
780 child = TO_BLOCK (current_edge);
781 RESET_BIT (in_stack, child);
782 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
783 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
784 current_edge = NEXT_OUT (current_edge);
787 /* See if have finished the DFS tree traversal. */
788 if (sp < 0 && TEST_BIT (passed, current_edge))
791 /* Nope, continue the traversal with the popped node. */
795 /* Process a node. */
796 node = FROM_BLOCK (current_edge);
797 child = TO_BLOCK (current_edge);
798 SET_BIT (in_stack, node);
799 dfs_nr[node] = ++count;
801 /* If the successor is in the stack, then we've found a loop.
802 Mark the loop, if it is not a natural loop, then it will
803 be rejected during the second traversal. */
804 if (TEST_BIT (in_stack, child))
807 SET_BIT (header, child);
808 UPDATE_LOOP_RELATIONS (node, child);
809 SET_BIT (passed, current_edge);
810 current_edge = NEXT_OUT (current_edge);
814 /* If the child was already visited, then there is no need to visit
815 it again. Just update the loop relationships and restart
819 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
820 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
821 SET_BIT (passed, current_edge);
822 current_edge = NEXT_OUT (current_edge);
826 /* Push an entry on the stack and continue DFS traversal. */
827 stack[++sp] = current_edge;
828 SET_BIT (passed, current_edge);
829 current_edge = OUT_EDGES (child);
831 /* This is temporary until haifa is converted to use rth's new
832 cfg routines which have true entry/exit blocks and the
833 appropriate edges from/to those blocks.
835 Generally we update dfs_nr for a node when we process its
836 out edge. However, if the node has no out edge then we will
837 not set dfs_nr for that node. This can confuse the scheduler
838 into thinking that we have unreachable blocks, which in turn
839 disables cross block scheduling.
841 So, if we have a node with no out edges, go ahead and mark it
843 if (current_edge == 0)
844 dfs_nr[child] = ++count;
847 /* Another check for unreachable blocks. The earlier test in
848 is_cfg_nonregular only finds unreachable blocks that do not
851 The DFS traversal will mark every block that is reachable from
852 the entry node by placing a nonzero value in dfs_nr. Thus if
853 dfs_nr is zero for any block, then it must be unreachable. */
855 for (i = 0; i < n_basic_blocks; i++)
862 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
863 to hold degree counts. */
866 for (i = 0; i < n_basic_blocks; i++)
868 for (i = 0; i < num_edges; i++)
870 edge e = INDEX_EDGE (edge_list, i);
872 if (e->dest != EXIT_BLOCK_PTR)
873 degree[e->dest->index]++;
876 /* Do not perform region scheduling if there are any unreachable
885 /* Second travsersal:find reducible inner loops and topologically sort
886 block of each region. */
888 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
890 /* Find blocks which are inner loop headers. We still have non-reducible
891 loops to consider at this point. */
892 for (i = 0; i < n_basic_blocks; i++)
894 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
899 /* Now check that the loop is reducible. We do this separate
900 from finding inner loops so that we do not find a reducible
901 loop which contains an inner non-reducible loop.
903 A simple way to find reducible/natural loops is to verify
904 that each block in the loop is dominated by the loop
907 If there exists a block that is not dominated by the loop
908 header, then the block is reachable from outside the loop
909 and thus the loop is not a natural loop. */
910 for (j = 0; j < n_basic_blocks; j++)
912 /* First identify blocks in the loop, except for the loop
914 if (i == max_hdr[j] && i != j)
916 /* Now verify that the block is dominated by the loop
918 if (!TEST_BIT (dom[j], i))
923 /* If we exited the loop early, then I is the header of
924 a non-reducible loop and we should quit processing it
926 if (j != n_basic_blocks)
929 /* I is a header of an inner loop, or block 0 in a subroutine
930 with no loops at all. */
932 too_large_failure = 0;
933 loop_head = max_hdr[i];
935 /* Decrease degree of all I's successors for topological
937 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
938 if (e->dest != EXIT_BLOCK_PTR)
939 --degree[e->dest->index];
941 /* Estimate # insns, and count # blocks in the region. */
943 num_insns = (INSN_LUID (BLOCK_END (i))
944 - INSN_LUID (BLOCK_HEAD (i)));
946 /* Find all loop latches (blocks with back edges to the loop
947 header) or all the leaf blocks in the cfg has no loops.
949 Place those blocks into the queue. */
952 for (j = 0; j < n_basic_blocks; j++)
953 /* Leaf nodes have only a single successor which must
955 if (BASIC_BLOCK (j)->succ
956 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
957 && BASIC_BLOCK (j)->succ->succ_next == NULL)
960 SET_BIT (in_queue, j);
962 if (too_large (j, &num_bbs, &num_insns))
964 too_large_failure = 1;
973 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
975 if (e->src == ENTRY_BLOCK_PTR)
978 node = e->src->index;
980 if (max_hdr[node] == loop_head && node != i)
982 /* This is a loop latch. */
983 queue[++tail] = node;
984 SET_BIT (in_queue, node);
986 if (too_large (node, &num_bbs, &num_insns))
988 too_large_failure = 1;
995 /* Now add all the blocks in the loop to the queue.
997 We know the loop is a natural loop; however the algorithm
998 above will not always mark certain blocks as being in the
1006 The algorithm in the DFS traversal may not mark B & D as part
1007 of the loop (ie they will not have max_hdr set to A).
1009 We know they can not be loop latches (else they would have
1010 had max_hdr set since they'd have a backedge to a dominator
1011 block). So we don't need them on the initial queue.
1013 We know they are part of the loop because they are dominated
1014 by the loop header and can be reached by a backwards walk of
1015 the edges starting with nodes on the initial queue.
1017 It is safe and desirable to include those nodes in the
1018 loop/scheduling region. To do so we would need to decrease
1019 the degree of a node if it is the target of a backedge
1020 within the loop itself as the node is placed in the queue.
1022 We do not do this because I'm not sure that the actual
1023 scheduling code will properly handle this case. ?!? */
1025 while (head < tail && !too_large_failure)
1028 child = queue[++head];
1030 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1032 node = e->src->index;
1034 /* See discussion above about nodes not marked as in
1035 this loop during the initial DFS traversal. */
1036 if (e->src == ENTRY_BLOCK_PTR
1037 || max_hdr[node] != loop_head)
1042 else if (!TEST_BIT (in_queue, node) && node != i)
1044 queue[++tail] = node;
1045 SET_BIT (in_queue, node);
1047 if (too_large (node, &num_bbs, &num_insns))
1049 too_large_failure = 1;
1056 if (tail >= 0 && !too_large_failure)
1058 /* Place the loop header into list of region blocks. */
1060 rgn_bb_table[idx] = i;
1061 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1062 RGN_BLOCKS (nr_regions) = idx++;
1063 CONTAINING_RGN (i) = nr_regions;
1064 BLOCK_TO_BB (i) = count = 0;
1066 /* Remove blocks from queue[] when their in degree
1067 becomes zero. Repeat until no blocks are left on the
1068 list. This produces a topological list of blocks in
1074 child = queue[head];
1075 if (degree[child] == 0)
1080 rgn_bb_table[idx++] = child;
1081 BLOCK_TO_BB (child) = ++count;
1082 CONTAINING_RGN (child) = nr_regions;
1083 queue[head] = queue[tail--];
1085 for (e = BASIC_BLOCK (child)->succ;
1088 if (e->dest != EXIT_BLOCK_PTR)
1089 --degree[e->dest->index];
1101 /* Any block that did not end up in a region is placed into a region
1103 for (i = 0; i < n_basic_blocks; i++)
1106 rgn_bb_table[idx] = i;
1107 RGN_NR_BLOCKS (nr_regions) = 1;
1108 RGN_BLOCKS (nr_regions) = idx++;
1109 CONTAINING_RGN (i) = nr_regions++;
1110 BLOCK_TO_BB (i) = 0;
1123 /* Functions for regions scheduling information. */
1125 /* Compute dominators, probability, and potential-split-edges of bb.
1126 Assume that these values were already computed for bb's predecessors. */
1129 compute_dom_prob_ps (bb)
1132 int nxt_in_edge, fst_in_edge, pred;
1133 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1136 if (IS_RGN_ENTRY (bb))
1138 BITSET_ADD (dom[bb], 0, bbset_size);
1143 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1145 /* Intialize dom[bb] to '111..1'. */
1146 BITSET_INVERT (dom[bb], bbset_size);
1150 pred = FROM_BLOCK (nxt_in_edge);
1151 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1153 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1156 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1159 nr_rgn_out_edges = 0;
1160 fst_out_edge = OUT_EDGES (pred);
1161 nxt_out_edge = NEXT_OUT (fst_out_edge);
1162 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1165 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1167 /* The successor doesn't belong in the region? */
1168 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1169 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1172 while (fst_out_edge != nxt_out_edge)
1175 /* The successor doesn't belong in the region? */
1176 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1177 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1179 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1180 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1184 /* Now nr_rgn_out_edges is the number of region-exit edges from
1185 pred, and nr_out_edges will be the number of pred out edges
1186 not leaving the region. */
1187 nr_out_edges -= nr_rgn_out_edges;
1188 if (nr_rgn_out_edges > 0)
1189 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1191 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1192 nxt_in_edge = NEXT_IN (nxt_in_edge);
1194 while (fst_in_edge != nxt_in_edge);
1196 BITSET_ADD (dom[bb], bb, bbset_size);
1197 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1199 if (sched_verbose >= 2)
1200 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1201 (int) (100.0 * prob[bb]));
1204 /* Functions for target info. */
1206 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1207 Note that bb_trg dominates bb_src. */
1210 split_edges (bb_src, bb_trg, bl)
1215 int es = edgeset_size;
1216 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1219 src[es] = (pot_split[bb_src])[es];
1220 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1221 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1225 /* Find the valid candidate-source-blocks for the target block TRG, compute
1226 their probability, and check if they are speculative or not.
1227 For speculative sources, compute their update-blocks and split-blocks. */
1230 compute_trg_info (trg)
1233 register candidate *sp;
1235 int check_block, update_idx;
1236 int i, j, k, fst_edge, nxt_edge;
1238 /* Define some of the fields for the target bb as well. */
1239 sp = candidate_table + trg;
1241 sp->is_speculative = 0;
1244 for (i = trg + 1; i < current_nr_blocks; i++)
1246 sp = candidate_table + i;
1248 sp->is_valid = IS_DOMINATED (i, trg);
1251 sp->src_prob = GET_SRC_PROB (i, trg);
1252 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1257 split_edges (i, trg, &el);
1258 sp->is_speculative = (el.nr_members) ? 1 : 0;
1259 if (sp->is_speculative && !flag_schedule_speculative)
1265 char *update_blocks;
1267 /* Compute split blocks and store them in bblst_table.
1268 The TO block of every split edge is a split block. */
1269 sp->split_bbs.first_member = &bblst_table[bblst_last];
1270 sp->split_bbs.nr_members = el.nr_members;
1271 for (j = 0; j < el.nr_members; bblst_last++, j++)
1272 bblst_table[bblst_last] =
1273 TO_BLOCK (rgn_edges[el.first_member[j]]);
1274 sp->update_bbs.first_member = &bblst_table[bblst_last];
1276 /* Compute update blocks and store them in bblst_table.
1277 For every split edge, look at the FROM block, and check
1278 all out edges. For each out edge that is not a split edge,
1279 add the TO block to the update block list. This list can end
1280 up with a lot of duplicates. We need to weed them out to avoid
1281 overrunning the end of the bblst_table. */
1282 update_blocks = (char *) alloca (n_basic_blocks);
1283 memset (update_blocks, 0, n_basic_blocks);
1286 for (j = 0; j < el.nr_members; j++)
1288 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1289 fst_edge = nxt_edge = OUT_EDGES (check_block);
1292 if (! update_blocks[TO_BLOCK (nxt_edge)])
1294 for (k = 0; k < el.nr_members; k++)
1295 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1298 if (k >= el.nr_members)
1300 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1301 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1306 nxt_edge = NEXT_OUT (nxt_edge);
1308 while (fst_edge != nxt_edge);
1310 sp->update_bbs.nr_members = update_idx;
1312 /* Make sure we didn't overrun the end of bblst_table. */
1313 if (bblst_last > bblst_size)
1318 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1320 sp->is_speculative = 0;
1326 /* Print candidates info, for debugging purposes. Callable from debugger. */
1332 if (!candidate_table[i].is_valid)
1335 if (candidate_table[i].is_speculative)
1338 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1340 fprintf (sched_dump, "split path: ");
1341 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1343 int b = candidate_table[i].split_bbs.first_member[j];
1345 fprintf (sched_dump, " %d ", b);
1347 fprintf (sched_dump, "\n");
1349 fprintf (sched_dump, "update path: ");
1350 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1352 int b = candidate_table[i].update_bbs.first_member[j];
1354 fprintf (sched_dump, " %d ", b);
1356 fprintf (sched_dump, "\n");
1360 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1364 /* Print candidates info, for debugging purposes. Callable from debugger. */
1367 debug_candidates (trg)
1372 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1373 BB_TO_BLOCK (trg), trg);
1374 for (i = trg + 1; i < current_nr_blocks; i++)
1375 debug_candidate (i);
1378 /* Functions for speculative scheduing. */
1380 /* Return 0 if x is a set of a register alive in the beginning of one
1381 of the split-blocks of src, otherwise return 1. */
1384 check_live_1 (src, x)
1390 register rtx reg = SET_DEST (x);
1395 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1396 || GET_CODE (reg) == SIGN_EXTRACT
1397 || GET_CODE (reg) == STRICT_LOW_PART)
1398 reg = XEXP (reg, 0);
1400 if (GET_CODE (reg) == PARALLEL)
1404 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1405 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1406 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1412 if (GET_CODE (reg) != REG)
1415 regno = REGNO (reg);
1417 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1419 /* Global registers are assumed live. */
1424 if (regno < FIRST_PSEUDO_REGISTER)
1426 /* Check for hard registers. */
1427 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1430 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1432 int b = candidate_table[src].split_bbs.first_member[i];
1434 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1444 /* Check for psuedo registers. */
1445 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1447 int b = candidate_table[src].split_bbs.first_member[i];
1449 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1460 /* If x is a set of a register R, mark that R is alive in the beginning
1461 of every update-block of src. */
1464 update_live_1 (src, x)
1470 register rtx reg = SET_DEST (x);
1475 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1476 || GET_CODE (reg) == SIGN_EXTRACT
1477 || GET_CODE (reg) == STRICT_LOW_PART)
1478 reg = XEXP (reg, 0);
1480 if (GET_CODE (reg) == PARALLEL)
1484 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1485 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1486 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1491 if (GET_CODE (reg) != REG)
1494 /* Global registers are always live, so the code below does not apply
1497 regno = REGNO (reg);
1499 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1501 if (regno < FIRST_PSEUDO_REGISTER)
1503 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1506 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1508 int b = candidate_table[src].update_bbs.first_member[i];
1510 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1517 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1519 int b = candidate_table[src].update_bbs.first_member[i];
1521 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1527 /* Return 1 if insn can be speculatively moved from block src to trg,
1528 otherwise return 0. Called before first insertion of insn to
1529 ready-list or before the scheduling. */
1532 check_live (insn, src)
1536 /* Find the registers set by instruction. */
1537 if (GET_CODE (PATTERN (insn)) == SET
1538 || GET_CODE (PATTERN (insn)) == CLOBBER)
1539 return check_live_1 (src, PATTERN (insn));
1540 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1543 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1544 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1545 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1546 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1555 /* Update the live registers info after insn was moved speculatively from
1556 block src to trg. */
1559 update_live (insn, src)
1563 /* Find the registers set by instruction. */
1564 if (GET_CODE (PATTERN (insn)) == SET
1565 || GET_CODE (PATTERN (insn)) == CLOBBER)
1566 update_live_1 (src, PATTERN (insn));
1567 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1570 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1571 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1572 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1573 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1577 /* Exception Free Loads:
1579 We define five classes of speculative loads: IFREE, IRISKY,
1580 PFREE, PRISKY, and MFREE.
1582 IFREE loads are loads that are proved to be exception-free, just
1583 by examining the load insn. Examples for such loads are loads
1584 from TOC and loads of global data.
1586 IRISKY loads are loads that are proved to be exception-risky,
1587 just by examining the load insn. Examples for such loads are
1588 volatile loads and loads from shared memory.
1590 PFREE loads are loads for which we can prove, by examining other
1591 insns, that they are exception-free. Currently, this class consists
1592 of loads for which we are able to find a "similar load", either in
1593 the target block, or, if only one split-block exists, in that split
1594 block. Load2 is similar to load1 if both have same single base
1595 register. We identify only part of the similar loads, by finding
1596 an insn upon which both load1 and load2 have a DEF-USE dependence.
1598 PRISKY loads are loads for which we can prove, by examining other
1599 insns, that they are exception-risky. Currently we have two proofs for
1600 such loads. The first proof detects loads that are probably guarded by a
1601 test on the memory address. This proof is based on the
1602 backward and forward data dependence information for the region.
1603 Let load-insn be the examined load.
1604 Load-insn is PRISKY iff ALL the following hold:
1606 - insn1 is not in the same block as load-insn
1607 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1608 - test-insn is either a compare or a branch, not in the same block
1610 - load-insn is reachable from test-insn
1611 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1613 This proof might fail when the compare and the load are fed
1614 by an insn not in the region. To solve this, we will add to this
1615 group all loads that have no input DEF-USE dependence.
1617 The second proof detects loads that are directly or indirectly
1618 fed by a speculative load. This proof is affected by the
1619 scheduling process. We will use the flag fed_by_spec_load.
1620 Initially, all insns have this flag reset. After a speculative
1621 motion of an insn, if insn is either a load, or marked as
1622 fed_by_spec_load, we will also mark as fed_by_spec_load every
1623 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1624 load which is fed_by_spec_load is also PRISKY.
1626 MFREE (maybe-free) loads are all the remaining loads. They may be
1627 exception-free, but we cannot prove it.
1629 Now, all loads in IFREE and PFREE classes are considered
1630 exception-free, while all loads in IRISKY and PRISKY classes are
1631 considered exception-risky. As for loads in the MFREE class,
1632 these are considered either exception-free or exception-risky,
1633 depending on whether we are pessimistic or optimistic. We have
1634 to take the pessimistic approach to assure the safety of
1635 speculative scheduling, but we can take the optimistic approach
1636 by invoking the -fsched_spec_load_dangerous option. */
1638 enum INSN_TRAP_CLASS
1640 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
1641 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
1644 #define WORST_CLASS(class1, class2) \
1645 ((class1 > class2) ? class1 : class2)
1647 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1648 #define IS_REACHABLE(bb_from, bb_to) \
1650 || IS_RGN_ENTRY (bb_from) \
1651 || (bitset_member (ancestor_edges[bb_to], \
1652 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1655 /* Non-zero iff the address is comprised from at most 1 register. */
1656 #define CONST_BASED_ADDRESS_P(x) \
1657 (GET_CODE (x) == REG \
1658 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1659 || (GET_CODE (x) == LO_SUM)) \
1660 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1661 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1663 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1666 set_spec_fed (load_insn)
1671 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1672 if (GET_MODE (link) == VOIDmode)
1673 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1674 } /* set_spec_fed */
1676 /* On the path from the insn to load_insn_bb, find a conditional
1677 branch depending on insn, that guards the speculative load. */
1680 find_conditional_protection (insn, load_insn_bb)
1686 /* Iterate through DEF-USE forward dependences. */
1687 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1689 rtx next = XEXP (link, 0);
1690 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1691 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1692 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1693 && load_insn_bb != INSN_BB (next)
1694 && GET_MODE (link) == VOIDmode
1695 && (GET_CODE (next) == JUMP_INSN
1696 || find_conditional_protection (next, load_insn_bb)))
1700 } /* find_conditional_protection */
1702 /* Returns 1 if the same insn1 that participates in the computation
1703 of load_insn's address is feeding a conditional branch that is
1704 guarding on load_insn. This is true if we find a the two DEF-USE
1706 insn1 -> ... -> conditional-branch
1707 insn1 -> ... -> load_insn,
1708 and if a flow path exist:
1709 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1710 and if insn1 is on the path
1711 region-entry -> ... -> bb_trg -> ... load_insn.
1713 Locate insn1 by climbing on LOG_LINKS from load_insn.
1714 Locate the branch by following INSN_DEPEND from insn1. */
1717 is_conditionally_protected (load_insn, bb_src, bb_trg)
1723 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1725 rtx insn1 = XEXP (link, 0);
1727 /* Must be a DEF-USE dependence upon non-branch. */
1728 if (GET_MODE (link) != VOIDmode
1729 || GET_CODE (insn1) == JUMP_INSN)
1732 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1733 if (INSN_BB (insn1) == bb_src
1734 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1735 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1736 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1737 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1740 /* Now search for the conditional-branch. */
1741 if (find_conditional_protection (insn1, bb_src))
1744 /* Recursive step: search another insn1, "above" current insn1. */
1745 return is_conditionally_protected (insn1, bb_src, bb_trg);
1748 /* The chain does not exist. */
1750 } /* is_conditionally_protected */
1752 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1753 load_insn can move speculatively from bb_src to bb_trg. All the
1754 following must hold:
1756 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1757 (2) load_insn and load1 have a def-use dependence upon
1758 the same insn 'insn1'.
1759 (3) either load2 is in bb_trg, or:
1760 - there's only one split-block, and
1761 - load1 is on the escape path, and
1763 From all these we can conclude that the two loads access memory
1764 addresses that differ at most by a constant, and hence if moving
1765 load_insn would cause an exception, it would have been caused by
1769 is_pfree (load_insn, bb_src, bb_trg)
1774 register candidate *candp = candidate_table + bb_src;
1776 if (candp->split_bbs.nr_members != 1)
1777 /* Must have exactly one escape block. */
1780 for (back_link = LOG_LINKS (load_insn);
1781 back_link; back_link = XEXP (back_link, 1))
1783 rtx insn1 = XEXP (back_link, 0);
1785 if (GET_MODE (back_link) == VOIDmode)
1787 /* Found a DEF-USE dependence (insn1, load_insn). */
1790 for (fore_link = INSN_DEPEND (insn1);
1791 fore_link; fore_link = XEXP (fore_link, 1))
1793 rtx insn2 = XEXP (fore_link, 0);
1794 if (GET_MODE (fore_link) == VOIDmode)
1796 /* Found a DEF-USE dependence (insn1, insn2). */
1797 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1798 /* insn2 not guaranteed to be a 1 base reg load. */
1801 if (INSN_BB (insn2) == bb_trg)
1802 /* insn2 is the similar load, in the target block. */
1805 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1806 /* insn2 is a similar load, in a split-block. */
1813 /* Couldn't find a similar load. */
1817 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1818 as found by analyzing insn's expression. */
1821 may_trap_exp (x, is_store)
1829 code = GET_CODE (x);
1839 /* The insn uses memory: a volatile load. */
1840 if (MEM_VOLATILE_P (x))
1842 /* An exception-free load. */
1843 if (!may_trap_p (x))
1845 /* A load with 1 base register, to be further checked. */
1846 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
1847 return PFREE_CANDIDATE;
1848 /* No info on the load, to be further checked. */
1849 return PRISKY_CANDIDATE;
1854 int i, insn_class = TRAP_FREE;
1856 /* Neither store nor load, check if it may cause a trap. */
1859 /* Recursive step: walk the insn... */
1860 fmt = GET_RTX_FORMAT (code);
1861 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1865 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
1866 insn_class = WORST_CLASS (insn_class, tmp_class);
1868 else if (fmt[i] == 'E')
1871 for (j = 0; j < XVECLEN (x, i); j++)
1873 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
1874 insn_class = WORST_CLASS (insn_class, tmp_class);
1875 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1879 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1886 /* Classifies insn for the purpose of verifying that it can be
1887 moved speculatively, by examining it's patterns, returning:
1888 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1889 TRAP_FREE: non-load insn.
1890 IFREE: load from a globaly safe location.
1891 IRISKY: volatile load.
1892 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1893 being either PFREE or PRISKY. */
1896 haifa_classify_insn (insn)
1899 rtx pat = PATTERN (insn);
1900 int tmp_class = TRAP_FREE;
1901 int insn_class = TRAP_FREE;
1904 if (GET_CODE (pat) == PARALLEL)
1906 int i, len = XVECLEN (pat, 0);
1908 for (i = len - 1; i >= 0; i--)
1910 code = GET_CODE (XVECEXP (pat, 0, i));
1914 /* Test if it is a 'store'. */
1915 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
1918 /* Test if it is a store. */
1919 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
1920 if (tmp_class == TRAP_RISKY)
1922 /* Test if it is a load. */
1924 = WORST_CLASS (tmp_class,
1925 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)),
1930 tmp_class = TRAP_RISKY;
1935 insn_class = WORST_CLASS (insn_class, tmp_class);
1936 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
1942 code = GET_CODE (pat);
1946 /* Test if it is a 'store'. */
1947 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
1950 /* Test if it is a store. */
1951 tmp_class = may_trap_exp (SET_DEST (pat), 1);
1952 if (tmp_class == TRAP_RISKY)
1954 /* Test if it is a load. */
1956 WORST_CLASS (tmp_class,
1957 may_trap_exp (SET_SRC (pat), 0));
1961 tmp_class = TRAP_RISKY;
1965 insn_class = tmp_class;
1971 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1972 a load moved speculatively, or if load_insn is protected by
1973 a compare on load_insn's address). */
1976 is_prisky (load_insn, bb_src, bb_trg)
1980 if (FED_BY_SPEC_LOAD (load_insn))
1983 if (LOG_LINKS (load_insn) == NULL)
1984 /* Dependence may 'hide' out of the region. */
1987 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1993 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1994 Return 1 if insn is exception-free (and the motion is valid)
1998 is_exception_free (insn, bb_src, bb_trg)
2002 int insn_class = haifa_classify_insn (insn);
2004 /* Handle non-load insns. */
2015 if (!flag_schedule_speculative_load)
2017 IS_LOAD_INSN (insn) = 1;
2024 case PFREE_CANDIDATE:
2025 if (is_pfree (insn, bb_src, bb_trg))
2027 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2028 case PRISKY_CANDIDATE:
2029 if (!flag_schedule_speculative_load_dangerous
2030 || is_prisky (insn, bb_src, bb_trg))
2036 return flag_schedule_speculative_load_dangerous;
2039 /* The number of insns from the current block scheduled so far. */
2040 static int sched_target_n_insns;
2041 /* The number of insns from the current block to be scheduled in total. */
2042 static int target_n_insns;
2043 /* The number of insns from the entire region scheduled so far. */
2044 static int sched_n_insns;
2045 /* Nonzero if the last scheduled insn was a jump. */
2046 static int last_was_jump;
2048 /* Implementations of the sched_info functions for region scheduling. */
2049 static void init_ready_list PARAMS ((struct ready_list *));
2050 static int can_schedule_ready_p PARAMS ((rtx));
2051 static int new_ready PARAMS ((rtx));
2052 static int schedule_more_p PARAMS ((void));
2053 static const char *rgn_print_insn PARAMS ((rtx, int));
2054 static int rgn_rank PARAMS ((rtx, rtx));
2055 static int contributes_to_priority PARAMS ((rtx, rtx));
2056 static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
2058 /* Return nonzero if there are more insns that should be scheduled. */
2063 return ! last_was_jump && sched_target_n_insns < target_n_insns;
2066 /* Add all insns that are initially ready to the ready list READY. Called
2067 once before scheduling a set of insns. */
2070 init_ready_list (ready)
2071 struct ready_list *ready;
2073 rtx prev_head = current_sched_info->prev_head;
2074 rtx next_tail = current_sched_info->next_tail;
2079 sched_target_n_insns = 0;
2083 /* Print debugging information. */
2084 if (sched_verbose >= 5)
2085 debug_dependencies ();
2087 /* Prepare current target block info. */
2088 if (current_nr_blocks > 1)
2090 candidate_table = (candidate *) xmalloc (current_nr_blocks
2091 * sizeof (candidate));
2094 /* bblst_table holds split blocks and update blocks for each block after
2095 the current one in the region. split blocks and update blocks are
2096 the TO blocks of region edges, so there can be at most rgn_nr_edges
2098 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
2099 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
2101 bitlst_table_last = 0;
2102 bitlst_table_size = rgn_nr_edges;
2103 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2105 compute_trg_info (target_bb);
2108 /* Initialize ready list with all 'ready' insns in target block.
2109 Count number of insns in the target block being scheduled. */
2110 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2114 if (! INSN_P (insn))
2116 next = NEXT_INSN (insn);
2118 if (INSN_DEP_COUNT (insn) == 0
2119 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
2120 ready_add (ready, insn);
2121 if (!(SCHED_GROUP_P (insn)))
2125 /* Add to ready list all 'ready' insns in valid source blocks.
2126 For speculative insns, check-live, exception-free, and
2128 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2129 if (IS_VALID (bb_src))
2135 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
2136 src_next_tail = NEXT_INSN (tail);
2139 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2141 if (! INSN_P (insn))
2144 if (!CANT_MOVE (insn)
2145 && (!IS_SPECULATIVE_INSN (insn)
2147 || (targetm.sched.use_dfa_pipeline_interface
2148 && recog_memoized (insn) >= 0
2149 && min_insn_conflict_delay (curr_state, insn,
2151 || (!targetm.sched.use_dfa_pipeline_interface
2152 && insn_issue_delay (insn) <= 3))
2153 && check_live (insn, bb_src)
2154 && is_exception_free (insn, bb_src, target_bb))))
2158 /* Note that we havn't squirrled away the notes for
2159 blocks other than the current. So if this is a
2160 speculative insn, NEXT might otherwise be a note. */
2161 next = next_nonnote_insn (insn);
2162 if (INSN_DEP_COUNT (insn) == 0
2164 || SCHED_GROUP_P (next) == 0
2165 || ! INSN_P (next)))
2166 ready_add (ready, insn);
2172 /* Called after taking INSN from the ready list. Returns nonzero if this
2173 insn can be scheduled, nonzero if we should silently discard it. */
2176 can_schedule_ready_p (insn)
2179 if (GET_CODE (insn) == JUMP_INSN)
2182 /* An interblock motion? */
2183 if (INSN_BB (insn) != target_bb)
2188 if (IS_SPECULATIVE_INSN (insn))
2190 if (!check_live (insn, INSN_BB (insn)))
2192 update_live (insn, INSN_BB (insn));
2194 /* For speculative load, mark insns fed by it. */
2195 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2196 set_spec_fed (insn);
2202 /* Find the beginning of the scheduling group. */
2203 /* ??? Ought to update basic block here, but later bits of
2204 schedule_block assumes the original insn block is
2208 while (SCHED_GROUP_P (temp))
2209 temp = PREV_INSN (temp);
2211 /* Update source block boundaries. */
2212 b1 = BLOCK_FOR_INSN (temp);
2213 if (temp == b1->head && insn == b1->end)
2215 /* We moved all the insns in the basic block.
2216 Emit a note after the last insn and update the
2217 begin/end boundaries to point to the note. */
2218 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
2222 else if (insn == b1->end)
2224 /* We took insns from the end of the basic block,
2225 so update the end of block boundary so that it
2226 points to the first insn we did not move. */
2227 b1->end = PREV_INSN (temp);
2229 else if (temp == b1->head)
2231 /* We took insns from the start of the basic block,
2232 so update the start of block boundary so that
2233 it points to the first insn we did not move. */
2234 b1->head = NEXT_INSN (insn);
2239 /* In block motion. */
2240 sched_target_n_insns++;
2247 /* Called after INSN has all its dependencies resolved. Return nonzero
2248 if it should be moved to the ready list or the queue, or zero if we
2249 should silently discard it. */
2254 /* For speculative insns, before inserting to ready/queue,
2255 check live, exception-free, and issue-delay. */
2256 if (INSN_BB (next) != target_bb
2257 && (!IS_VALID (INSN_BB (next))
2259 || (IS_SPECULATIVE_INSN (next)
2261 || (targetm.sched.use_dfa_pipeline_interface
2262 && (recog_memoized (next) < 0
2263 || min_insn_conflict_delay (curr_state, next,
2265 || (!targetm.sched.use_dfa_pipeline_interface
2266 && insn_issue_delay (next) > 3)
2267 || !check_live (next, INSN_BB (next))
2268 || !is_exception_free (next, INSN_BB (next), target_bb)))))
2273 /* Return a string that contains the insn uid and optionally anything else
2274 necessary to identify this insn in an output. It's valid to use a
2275 static buffer for this. The ALIGNED parameter should cause the string
2276 to be formatted so that multiple output lines will line up nicely. */
2279 rgn_print_insn (insn, aligned)
2283 static char tmp[80];
2286 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2289 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2290 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2292 sprintf (tmp, "%d", INSN_UID (insn));
2297 /* Compare priority of two insns. Return a positive number if the second
2298 insn is to be preferred for scheduling, and a negative one if the first
2299 is to be preferred. Zero if they are equally good. */
2302 rgn_rank (insn1, insn2)
2305 /* Some comparison make sense in interblock scheduling only. */
2306 if (INSN_BB (insn1) != INSN_BB (insn2))
2308 int spec_val, prob_val;
2310 /* Prefer an inblock motion on an interblock motion. */
2311 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2313 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2316 /* Prefer a useful motion on a speculative one. */
2317 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2321 /* Prefer a more probable (speculative) insn. */
2322 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2329 /* NEXT is an instruction that depends on INSN (a backward dependence);
2330 return nonzero if we should include this dependence in priority
2334 contributes_to_priority (next, insn)
2337 return BLOCK_NUM (next) == BLOCK_NUM (insn);
2340 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
2341 to be set by this jump in SET. */
2344 compute_jump_reg_dependencies (insn, set)
2345 rtx insn ATTRIBUTE_UNUSED;
2346 regset set ATTRIBUTE_UNUSED;
2348 /* Nothing to do here, since we postprocess jumps in
2349 add_branch_dependences. */
2352 /* Used in schedule_insns to initialize current_sched_info for scheduling
2353 regions (or single basic blocks). */
2355 static struct sched_info region_sched_info =
2358 can_schedule_ready_p,
2363 contributes_to_priority,
2364 compute_jump_reg_dependencies,
2371 /* Add dependences so that branches are scheduled to run last in their
2375 add_branch_dependences (head, tail)
2380 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2381 to remain in order at the end of the block by adding dependencies and
2382 giving the last a high priority. There may be notes present, and
2383 prev_head may also be a note.
2385 Branches must obviously remain at the end. Calls should remain at the
2386 end since moving them results in worse register allocation. Uses remain
2387 at the end to ensure proper register allocation. cc0 setters remaim
2388 at the end because they can't be moved away from their cc0 user. */
2391 while (GET_CODE (insn) == CALL_INSN
2392 || GET_CODE (insn) == JUMP_INSN
2393 || (GET_CODE (insn) == INSN
2394 && (GET_CODE (PATTERN (insn)) == USE
2395 || GET_CODE (PATTERN (insn)) == CLOBBER
2397 || sets_cc0_p (PATTERN (insn))
2400 || GET_CODE (insn) == NOTE)
2402 if (GET_CODE (insn) != NOTE)
2405 && !find_insn_list (insn, LOG_LINKS (last)))
2407 add_dependence (last, insn, REG_DEP_ANTI);
2408 INSN_REF_COUNT (insn)++;
2411 CANT_MOVE (insn) = 1;
2414 /* Skip over insns that are part of a group.
2415 Make each insn explicitly depend on the previous insn.
2416 This ensures that only the group header will ever enter
2417 the ready queue (and, when scheduled, will automatically
2418 schedule the SCHED_GROUP_P block). */
2419 while (SCHED_GROUP_P (insn))
2421 rtx temp = prev_nonnote_insn (insn);
2422 add_dependence (insn, temp, REG_DEP_ANTI);
2427 /* Don't overrun the bounds of the basic block. */
2431 insn = PREV_INSN (insn);
2434 /* Make sure these insns are scheduled last in their block. */
2437 while (insn != head)
2439 insn = prev_nonnote_insn (insn);
2441 if (INSN_REF_COUNT (insn) != 0)
2444 add_dependence (last, insn, REG_DEP_ANTI);
2445 INSN_REF_COUNT (insn) = 1;
2447 /* Skip over insns that are part of a group. */
2448 while (SCHED_GROUP_P (insn))
2449 insn = prev_nonnote_insn (insn);
2453 /* Data structures for the computation of data dependences in a regions. We
2454 keep one `deps' structure for every basic block. Before analyzing the
2455 data dependences for a bb, its variables are initialized as a function of
2456 the variables of its predecessors. When the analysis for a bb completes,
2457 we save the contents to the corresponding bb_deps[bb] variable. */
2459 static struct deps *bb_deps;
2461 /* After computing the dependencies for block BB, propagate the dependencies
2462 found in TMP_DEPS to the successors of the block. */
2464 propagate_deps (bb, tmp_deps)
2466 struct deps *tmp_deps;
2468 int b = BB_TO_BLOCK (bb);
2471 rtx link_insn, link_mem;
2474 /* These lists should point to the right place, for correct
2476 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
2477 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
2478 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
2479 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
2481 /* bb's structures are inherited by its successors. */
2482 first_edge = e = OUT_EDGES (b);
2489 int b_succ = TO_BLOCK (e);
2490 int bb_succ = BLOCK_TO_BB (b_succ);
2491 struct deps *succ_deps = bb_deps + bb_succ;
2493 /* Only bbs "below" bb, in the same region, are interesting. */
2494 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2501 /* The reg_last lists are inherited by bb_succ. */
2502 EXECUTE_IF_SET_IN_REG_SET (&tmp_deps->reg_last_in_use, 0, reg,
2504 struct deps_reg *tmp_deps_reg = &tmp_deps->reg_last[reg];
2505 struct deps_reg *succ_deps_reg = &succ_deps->reg_last[reg];
2507 for (u = tmp_deps_reg->uses; u; u = XEXP (u, 1))
2508 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->uses))
2510 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->uses);
2512 for (u = tmp_deps_reg->sets; u; u = XEXP (u, 1))
2513 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->sets))
2515 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->sets);
2517 for (u = tmp_deps_reg->clobbers; u; u = XEXP (u, 1))
2518 if (! find_insn_list (XEXP (u, 0), succ_deps_reg->clobbers))
2519 succ_deps_reg->clobbers
2520 = alloc_INSN_LIST (XEXP (u, 0), succ_deps_reg->clobbers);
2522 IOR_REG_SET (&succ_deps->reg_last_in_use, &tmp_deps->reg_last_in_use);
2524 /* Mem read/write lists are inherited by bb_succ. */
2525 link_insn = tmp_deps->pending_read_insns;
2526 link_mem = tmp_deps->pending_read_mems;
2529 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2531 succ_deps->pending_read_insns,
2532 succ_deps->pending_read_mems)))
2533 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
2534 &succ_deps->pending_read_mems,
2535 XEXP (link_insn, 0), XEXP (link_mem, 0));
2536 link_insn = XEXP (link_insn, 1);
2537 link_mem = XEXP (link_mem, 1);
2540 link_insn = tmp_deps->pending_write_insns;
2541 link_mem = tmp_deps->pending_write_mems;
2544 if (!(find_insn_mem_list (XEXP (link_insn, 0),
2546 succ_deps->pending_write_insns,
2547 succ_deps->pending_write_mems)))
2548 add_insn_mem_dependence (succ_deps,
2549 &succ_deps->pending_write_insns,
2550 &succ_deps->pending_write_mems,
2551 XEXP (link_insn, 0), XEXP (link_mem, 0));
2553 link_insn = XEXP (link_insn, 1);
2554 link_mem = XEXP (link_mem, 1);
2557 /* last_function_call is inherited by bb_succ. */
2558 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
2559 if (! find_insn_list (XEXP (u, 0), succ_deps->last_function_call))
2560 succ_deps->last_function_call
2561 = alloc_INSN_LIST (XEXP (u, 0), succ_deps->last_function_call);
2563 /* last_pending_memory_flush is inherited by bb_succ. */
2564 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
2565 if (! find_insn_list (XEXP (u, 0),
2566 succ_deps->last_pending_memory_flush))
2567 succ_deps->last_pending_memory_flush
2568 = alloc_INSN_LIST (XEXP (u, 0),
2569 succ_deps->last_pending_memory_flush);
2571 /* sched_before_next_call is inherited by bb_succ. */
2572 x = LOG_LINKS (tmp_deps->sched_before_next_call);
2573 for (; x; x = XEXP (x, 1))
2574 add_dependence (succ_deps->sched_before_next_call,
2575 XEXP (x, 0), REG_DEP_ANTI);
2579 while (e != first_edge);
2582 /* Compute backward dependences inside bb. In a multiple blocks region:
2583 (1) a bb is analyzed after its predecessors, and (2) the lists in
2584 effect at the end of bb (after analyzing for bb) are inherited by
2587 Specifically for reg-reg data dependences, the block insns are
2588 scanned by sched_analyze () top-to-bottom. Two lists are
2589 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2590 and reg_last[].uses for register USEs.
2592 When analysis is completed for bb, we update for its successors:
2593 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2594 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2596 The mechanism for computing mem-mem data dependence is very
2597 similar, and the result is interblock dependences in the region. */
2600 compute_block_backward_dependences (bb)
2604 struct deps tmp_deps;
2606 tmp_deps = bb_deps[bb];
2608 /* Do the analysis for this block. */
2609 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2610 sched_analyze (&tmp_deps, head, tail);
2611 add_branch_dependences (head, tail);
2613 if (current_nr_blocks > 1)
2614 propagate_deps (bb, &tmp_deps);
2616 /* Free up the INSN_LISTs. */
2617 free_deps (&tmp_deps);
2620 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2621 them to the unused_*_list variables, so that they can be reused. */
2624 free_pending_lists ()
2628 for (bb = 0; bb < current_nr_blocks; bb++)
2630 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2631 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2632 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2633 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2637 /* Print dependences for debugging, callable from debugger. */
2640 debug_dependencies ()
2644 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2645 for (bb = 0; bb < current_nr_blocks; bb++)
2653 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2654 next_tail = NEXT_INSN (tail);
2655 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2656 BB_TO_BLOCK (bb), bb);
2658 if (targetm.sched.use_dfa_pipeline_interface)
2660 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2661 "insn", "code", "bb", "dep", "prio", "cost",
2663 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2664 "----", "----", "--", "---", "----", "----",
2669 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2670 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2671 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2672 "----", "----", "--", "---", "----", "----", "--------", "-----");
2675 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2679 if (! INSN_P (insn))
2682 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2683 if (GET_CODE (insn) == NOTE)
2685 n = NOTE_LINE_NUMBER (insn);
2687 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2689 fprintf (sched_dump, "line %d, file %s\n", n,
2690 NOTE_SOURCE_FILE (insn));
2693 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2697 if (targetm.sched.use_dfa_pipeline_interface)
2699 fprintf (sched_dump,
2700 ";; %s%5d%6d%6d%6d%6d%6d ",
2701 (SCHED_GROUP_P (insn) ? "+" : " "),
2705 INSN_DEP_COUNT (insn),
2706 INSN_PRIORITY (insn),
2707 insn_cost (insn, 0, 0));
2709 if (recog_memoized (insn) < 0)
2710 fprintf (sched_dump, "nothing");
2712 print_reservation (sched_dump, insn);
2716 int unit = insn_unit (insn);
2719 || function_units[unit].blockage_range_function == 0
2721 : function_units[unit].blockage_range_function (insn));
2722 fprintf (sched_dump,
2723 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2724 (SCHED_GROUP_P (insn) ? "+" : " "),
2728 INSN_DEP_COUNT (insn),
2729 INSN_PRIORITY (insn),
2730 insn_cost (insn, 0, 0),
2731 (int) MIN_BLOCKAGE_COST (range),
2732 (int) MAX_BLOCKAGE_COST (range));
2733 insn_print_units (insn);
2736 fprintf (sched_dump, "\t: ");
2737 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2738 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2739 fprintf (sched_dump, "\n");
2743 fprintf (sched_dump, "\n");
2746 /* Schedule a region. A region is either an inner loop, a loop-free
2747 subroutine, or a single basic block. Each bb in the region is
2748 scheduled after its flow predecessors. */
2751 schedule_region (rgn)
2755 int rgn_n_insns = 0;
2756 int sched_rgn_n_insns = 0;
2758 /* Set variables for the current region. */
2759 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2760 current_blocks = RGN_BLOCKS (rgn);
2762 init_deps_global ();
2764 /* Initializations for region data dependence analyisis. */
2765 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2766 for (bb = 0; bb < current_nr_blocks; bb++)
2767 init_deps (bb_deps + bb);
2769 /* Compute LOG_LINKS. */
2770 for (bb = 0; bb < current_nr_blocks; bb++)
2771 compute_block_backward_dependences (bb);
2773 /* Compute INSN_DEPEND. */
2774 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2777 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2779 compute_forward_dependences (head, tail);
2782 /* Set priorities. */
2783 for (bb = 0; bb < current_nr_blocks; bb++)
2786 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2788 rgn_n_insns += set_priorities (head, tail);
2791 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2792 if (current_nr_blocks > 1)
2796 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2798 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
2799 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
2800 for (i = 0; i < current_nr_blocks; i++)
2801 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
2805 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2806 for (i = 1; i < nr_edges; i++)
2807 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2808 EDGE_TO_BIT (i) = rgn_nr_edges++;
2809 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2812 for (i = 1; i < nr_edges; i++)
2813 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2814 rgn_edges[rgn_nr_edges++] = i;
2817 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
2818 edgeset_bitsize = rgn_nr_edges;
2819 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2821 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
2822 for (i = 0; i < current_nr_blocks; i++)
2825 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2827 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
2830 /* Compute probabilities, dominators, split_edges. */
2831 for (bb = 0; bb < current_nr_blocks; bb++)
2832 compute_dom_prob_ps (bb);
2835 /* Now we can schedule all blocks. */
2836 for (bb = 0; bb < current_nr_blocks; bb++)
2839 int b = BB_TO_BLOCK (bb);
2841 get_block_head_tail (b, &head, &tail);
2843 if (no_real_insns_p (head, tail))
2846 current_sched_info->prev_head = PREV_INSN (head);
2847 current_sched_info->next_tail = NEXT_INSN (tail);
2849 if (write_symbols != NO_DEBUG)
2851 save_line_notes (b, head, tail);
2852 rm_line_notes (head, tail);
2855 /* rm_other_notes only removes notes which are _inside_ the
2856 block---that is, it won't remove notes before the first real insn
2857 or after the last real insn of the block. So if the first insn
2858 has a REG_SAVE_NOTE which would otherwise be emitted before the
2859 insn, it is redundant with the note before the start of the
2860 block, and so we have to take it out. */
2865 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2866 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2868 remove_note (head, note);
2869 note = XEXP (note, 1);
2870 remove_note (head, note);
2874 /* Remove remaining note insns from the block, save them in
2875 note_list. These notes are restored at the end of
2876 schedule_block (). */
2877 rm_other_notes (head, tail);
2881 current_sched_info->queue_must_finish_empty
2882 = current_nr_blocks > 1 && !flag_schedule_interblock;
2884 schedule_block (b, rgn_n_insns);
2885 sched_rgn_n_insns += sched_n_insns;
2887 /* Update target block boundaries. */
2888 if (head == BLOCK_HEAD (b))
2889 BLOCK_HEAD (b) = current_sched_info->head;
2890 if (tail == BLOCK_END (b))
2891 BLOCK_END (b) = current_sched_info->tail;
2894 if (current_nr_blocks > 1)
2896 free (candidate_table);
2898 free (bitlst_table);
2902 /* Sanity check: verify that all region insns were scheduled. */
2903 if (sched_rgn_n_insns != rgn_n_insns)
2906 /* Restore line notes. */
2907 if (write_symbols != NO_DEBUG)
2909 for (bb = 0; bb < current_nr_blocks; bb++)
2912 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2913 restore_line_notes (head, tail);
2917 /* Done with this region. */
2918 free_pending_lists ();
2920 finish_deps_global ();
2924 if (current_nr_blocks > 1)
2929 for (i = 0; i < current_nr_blocks; ++i)
2932 free (pot_split[i]);
2933 free (ancestor_edges[i]);
2939 free (ancestor_edges);
2943 /* Indexed by region, holds the number of death notes found in that region.
2944 Used for consistency checks. */
2945 static int *deaths_in_region;
2947 /* Initialize data structures for region scheduling. */
2956 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2957 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2958 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2959 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2961 blocks = sbitmap_alloc (n_basic_blocks);
2963 /* Compute regions for scheduling. */
2964 if (reload_completed
2965 || n_basic_blocks == 1
2966 || !flag_schedule_interblock)
2968 find_single_block_region ();
2972 /* Verify that a 'good' control flow graph can be built. */
2973 if (is_cfg_nonregular ())
2975 find_single_block_region ();
2980 struct edge_list *edge_list;
2982 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
2984 /* The scheduler runs after flow; therefore, we can't blindly call
2985 back into find_basic_blocks since doing so could invalidate the
2986 info in global_live_at_start.
2988 Consider a block consisting entirely of dead stores; after life
2989 analysis it would be a block of NOTE_INSN_DELETED notes. If
2990 we call find_basic_blocks again, then the block would be removed
2991 entirely and invalidate our the register live information.
2993 We could (should?) recompute register live information. Doing
2994 so may even be beneficial. */
2995 edge_list = create_edge_list ();
2997 /* Compute the dominators and post dominators. */
2998 calculate_dominance_info (NULL, dom, CDI_DOMINATORS);
3000 /* build_control_flow will return nonzero if it detects unreachable
3001 blocks or any other irregularity with the cfg which prevents
3002 cross block scheduling. */
3003 if (build_control_flow (edge_list) != 0)
3004 find_single_block_region ();
3006 find_rgns (edge_list, dom);
3008 if (sched_verbose >= 3)
3011 /* We are done with flow's edge list. */
3012 free_edge_list (edge_list);
3014 /* For now. This will move as more and more of haifa is converted
3015 to using the cfg code in flow.c. */
3020 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
3022 /* Remove all death notes from the subroutine. */
3023 for (rgn = 0; rgn < nr_regions; rgn++)
3027 sbitmap_zero (blocks);
3028 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
3029 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
3031 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
3034 sbitmap_free (blocks);
3037 /* The one entry point in this file. DUMP_FILE is the dump file for
3041 schedule_insns (dump_file)
3044 sbitmap large_region_blocks, blocks;
3046 int any_large_regions;
3048 /* Taking care of this degenerate case makes the rest of
3049 this code simpler. */
3050 if (n_basic_blocks == 0)
3056 sched_init (dump_file);
3060 current_sched_info = ®ion_sched_info;
3062 /* Schedule every region in the subroutine. */
3063 for (rgn = 0; rgn < nr_regions; rgn++)
3064 schedule_region (rgn);
3066 /* Update life analysis for the subroutine. Do single block regions
3067 first so that we can verify that live_at_start didn't change. Then
3068 do all other blocks. */
3069 /* ??? There is an outside possibility that update_life_info, or more
3070 to the point propagate_block, could get called with non-zero flags
3071 more than once for one basic block. This would be kinda bad if it
3072 were to happen, since REG_INFO would be accumulated twice for the
3073 block, and we'd have twice the REG_DEAD notes.
3075 I'm fairly certain that this _shouldn't_ happen, since I don't think
3076 that live_at_start should change at region heads. Not sure what the
3077 best way to test for this kind of thing... */
3079 allocate_reg_life_data ();
3080 compute_bb_for_insn (get_max_uid ());
3082 any_large_regions = 0;
3083 large_region_blocks = sbitmap_alloc (n_basic_blocks);
3084 sbitmap_ones (large_region_blocks);
3086 blocks = sbitmap_alloc (n_basic_blocks);
3088 for (rgn = 0; rgn < nr_regions; rgn++)
3089 if (RGN_NR_BLOCKS (rgn) > 1)
3090 any_large_regions = 1;
3093 sbitmap_zero (blocks);
3094 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3095 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
3097 /* Don't update reg info after reload, since that affects
3098 regs_ever_live, which should not change after reload. */
3099 update_life_info (blocks, UPDATE_LIFE_LOCAL,
3100 (reload_completed ? PROP_DEATH_NOTES
3101 : PROP_DEATH_NOTES | PROP_REG_INFO));
3103 #ifndef HAVE_conditional_execution
3104 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3105 a count of the conditional plus unconditional deaths for this to
3107 /* In the single block case, the count of registers that died should
3108 not have changed during the schedule. */
3109 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
3114 if (any_large_regions)
3116 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
3117 PROP_DEATH_NOTES | PROP_REG_INFO);
3120 /* Reposition the prologue and epilogue notes in case we moved the
3121 prologue/epilogue insns. */
3122 if (reload_completed)
3123 reposition_prologue_and_epilogue_notes (get_insns ());
3125 /* Delete redundant line notes. */
3126 if (write_symbols != NO_DEBUG)
3127 rm_redundant_line_notes ();
3131 if (reload_completed == 0 && flag_schedule_interblock)
3133 fprintf (sched_dump,
3134 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3142 fprintf (sched_dump, "\n\n");
3147 free (rgn_bb_table);
3149 free (containing_rgn);
3170 sbitmap_free (blocks);
3171 sbitmap_free (large_region_blocks);
3173 free (deaths_in_region);