1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* Instruction scheduling pass.
26 This pass implements list scheduling within basic blocks. It is
27 run twice: (1) after flow analysis, but before register allocation,
28 and (2) after register allocation.
30 The first run performs interblock scheduling, moving insns between
31 different blocks in the same "region", and the second runs only
32 basic block scheduling.
34 Interblock motions performed are useful motions and speculative
35 motions, including speculative loads. Motions requiring code
36 duplication are not supported. The identification of motion type
37 and the check for validity of speculative motions requires
38 construction and analysis of the function's control flow graph.
39 The scheduler works as follows:
41 We compute insn priorities based on data dependencies. Flow
42 analysis only creates a fraction of the data-dependencies we must
43 observe: namely, only those dependencies which the combiner can be
44 expected to use. For this pass, we must therefore create the
45 remaining dependencies we need to observe: register dependencies,
46 memory dependencies, dependencies to keep function calls in order,
47 and the dependence between a conditional branch and the setting of
48 condition codes are all dealt with here.
50 The scheduler first traverses the data flow graph, starting with
51 the last instruction, and proceeding to the first, assigning values
52 to insn_priority as it goes. This sorts the instructions
53 topologically by data dependence.
55 Once priorities have been established, we order the insns using
56 list scheduling. This works as follows: starting with a list of
57 all the ready insns, and sorted according to priority number, we
58 schedule the insn from the end of the list by placing its
59 predecessors in the list according to their priority order. We
60 consider this insn scheduled by setting the pointer to the "end" of
61 the list to point to the previous insn. When an insn has no
62 predecessors, we either queue it until sufficient time has elapsed
63 or add it to the ready list. As the instructions are scheduled or
64 when stalls are introduced, the queue advances and dumps insns into
65 the ready list. When all insns down to the lowest priority have
66 been scheduled, the critical path of the basic block has been made
67 as short as possible. The remaining insns are then scheduled in
70 Function unit conflicts are resolved during forward list scheduling
71 by tracking the time when each insn is committed to the schedule
72 and from that, the time the function units it uses must be free.
73 As insns on the ready list are considered for scheduling, those
74 that would result in a blockage of the already committed insns are
75 queued until no blockage will result.
77 The following list shows the order in which we want to break ties
78 among insns in the ready list:
80 1. choose insn with the longest path to end of bb, ties
82 2. choose insn with least contribution to register pressure,
84 3. prefer in-block upon interblock motion, ties broken by
85 4. prefer useful upon speculative motion, ties broken by
86 5. choose insn with largest control flow probability, ties
88 6. choose insn with the least dependences upon the previously
89 scheduled insn, or finally
90 7 choose the insn which has the most insns dependent on it.
91 8. choose insn with lowest UID.
93 Memory references complicate matters. Only if we can be certain
94 that memory references are not part of the data dependency graph
95 (via true, anti, or output dependence), can we move operations past
96 memory references. To first approximation, reads can be done
97 independently, while writes introduce dependencies. Better
98 approximations will yield fewer dependencies.
100 Before reload, an extended analysis of interblock data dependences
101 is required for interblock scheduling. This is performed in
102 compute_block_backward_dependences ().
104 Dependencies set up by memory references are treated in exactly the
105 same way as other dependencies, by using LOG_LINKS backward
106 dependences. LOG_LINKS are translated into INSN_DEPEND forward
107 dependences for the purpose of forward list scheduling.
109 Having optimized the critical path, we may have also unduly
110 extended the lifetimes of some registers. If an operation requires
111 that constants be loaded into registers, it is certainly desirable
112 to load those constants as early as necessary, but no earlier.
113 I.e., it will not do to load up a bunch of registers at the
114 beginning of a basic block only to use them at the end, if they
115 could be loaded later, since this may result in excessive register
118 Note that since branches are never in basic blocks, but only end
119 basic blocks, this pass will not move branches. But that is ok,
120 since we can use GNU's delayed branch scheduling pass to take care
123 Also note that no further optimizations based on algebraic
124 identities are performed, so this pass would be a good one to
125 perform instruction splitting, such as breaking up a multiply
126 instruction into shifts and adds where that is profitable.
128 Given the memory aliasing analysis that this pass should perform,
129 it should be possible to remove redundant stores to memory, and to
130 load values from registers instead of hitting memory.
132 Before reload, speculative insns are moved only if a 'proof' exists
133 that no exception will be caused by this, and if no live registers
134 exist that inhibit the motion (live registers constraints are not
135 represented by data dependence edges).
137 This pass must update information that subsequent passes expect to
138 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
139 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
142 The information in the line number notes is carefully retained by
143 this pass. Notes that refer to the starting and ending of
144 exception regions are also carefully retained by this pass. All
145 other NOTE insns are grouped in their same relative order at the
146 beginning of basic blocks and regions that have been scheduled.
148 The main entry point for this pass is schedule_insns(), called for
149 each function. The work of the scheduler is organized in three
150 levels: (1) function level: insns are subject to splitting,
151 control-flow-graph is constructed, regions are computed (after
152 reload, each region is of one block), (2) region level: control
153 flow graph attributes required for interblock scheduling are
154 computed (dominators, reachability, etc.), data dependences and
155 priorities are computed, and (3) block level: insns in the block
156 are actually scheduled. */
163 #include "hard-reg-set.h"
164 #include "basic-block.h"
166 #include "function.h"
168 #include "insn-config.h"
169 #include "insn-attr.h"
174 extern char *reg_known_equiv_p;
175 extern rtx *reg_known_value;
177 #ifdef INSN_SCHEDULING
179 /* target_units bitmask has 1 for each unit in the cpu. It should be
180 possible to compute this variable from the machine description.
181 But currently it is computed by examining the insn list. Since
182 this is only needed for visualization, it seems an acceptable
183 solution. (For understanding the mapping of bits to units, see
184 definition of function_units[] in "insn-attrtab.c".) */
186 static int target_units = 0;
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
198 /* sched-verbose controls the amount of debugging output the
199 scheduler prints. It is controlled by -fsched-verbose=N:
200 N>0 and no -DSR : the output is directed to stderr.
201 N>=10 will direct the printouts to stderr (regardless of -dSR).
203 N=2: bb's probabilities, detailed ready list info, unit/insn info.
204 N=3: rtl at abort point, control-flow, regions info.
205 N=5: dependences info. */
207 #define MAX_RGN_BLOCKS 10
208 #define MAX_RGN_INSNS 100
210 static int sched_verbose_param = 0;
211 static int sched_verbose = 0;
213 /* nr_inter/spec counts interblock/speculative motion for the function. */
214 static int nr_inter, nr_spec;
216 /* Debugging file. All printouts are sent to dump, which is always set,
217 either to stderr, or to the dump listing file (-dRS). */
218 static FILE *dump = 0;
220 /* fix_sched_param() is called from toplev.c upon detection
221 of the -fsched-verbose=N option. */
224 fix_sched_param (param, val)
225 const char *param, *val;
227 if (!strcmp (param, "verbose"))
228 sched_verbose_param = atoi (val);
230 warning ("fix_sched_param: unknown param: %s", param);
233 /* Describe state of dependencies used during sched_analyze phase. */
236 /* The *_insns and *_mems are paired lists. Each pending memory operation
237 will have a pointer to the MEM rtx on one list and a pointer to the
238 containing insn on the other list in the same place in the list. */
240 /* We can't use add_dependence like the old code did, because a single insn
241 may have multiple memory accesses, and hence needs to be on the list
242 once for each memory access. Add_dependence won't let you add an insn
243 to a list more than once. */
245 /* An INSN_LIST containing all insns with pending read operations. */
246 rtx pending_read_insns;
248 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
249 rtx pending_read_mems;
251 /* An INSN_LIST containing all insns with pending write operations. */
252 rtx pending_write_insns;
254 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
255 rtx pending_write_mems;
257 /* Indicates the combined length of the two pending lists. We must prevent
258 these lists from ever growing too large since the number of dependencies
259 produced is at least O(N*N), and execution time is at least O(4*N*N), as
260 a function of the length of these pending lists. */
261 int pending_lists_length;
263 /* The last insn upon which all memory references must depend.
264 This is an insn which flushed the pending lists, creating a dependency
265 between it and all previously pending memory references. This creates
266 a barrier (or a checkpoint) which no memory reference is allowed to cross.
268 This includes all non constant CALL_INSNs. When we do interprocedural
269 alias analysis, this restriction can be relaxed.
270 This may also be an INSN that writes memory if the pending lists grow
272 rtx last_pending_memory_flush;
274 /* The last function call we have seen. All hard regs, and, of course,
275 the last function call, must depend on this. */
276 rtx last_function_call;
278 /* Used to keep post-call psuedo/hard reg movements together with
280 int in_post_call_group_p;
282 /* The LOG_LINKS field of this is a list of insns which use a pseudo
283 register that does not already cross a call. We create
284 dependencies between each of those insn and the next call insn,
285 to ensure that they won't cross a call after scheduling is done. */
286 rtx sched_before_next_call;
288 /* Element N is the next insn that sets (hard or pseudo) register
289 N within the current basic block; or zero, if there is no
290 such insn. Needed for new registers which may be introduced
291 by splitting insns. */
294 rtx *reg_last_clobbers;
297 static regset reg_pending_sets;
298 static regset reg_pending_clobbers;
299 static int reg_pending_sets_all;
301 /* To speed up the test for duplicate dependency links we keep a record
302 of true dependencies created by add_dependence when the average number
303 of instructions in a basic block is very large.
305 Studies have shown that there is typically around 5 instructions between
306 branches for typical C code. So we can make a guess that the average
307 basic block is approximately 5 instructions long; we will choose 100X
308 the average size as a very large basic block.
310 Each insn has an associated bitmap for its dependencies. Each bitmap
311 has enough entries to represent a dependency on any other insn in the
313 static sbitmap *true_dependency_cache;
315 /* Indexed by INSN_UID, the collection of all data associated with
316 a single instruction. */
318 struct haifa_insn_data
320 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
321 it represents forward dependancies. */
324 /* The line number note in effect for each insn. For line number
325 notes, this indicates whether the note may be reused. */
328 /* Logical uid gives the original ordering of the insns. */
331 /* A priority for each insn. */
334 /* The number of incoming edges in the forward dependency graph.
335 As scheduling proceds, counts are decreased. An insn moves to
336 the ready queue when its counter reaches zero. */
339 /* An encoding of the blockage range function. Both unit and range
341 unsigned int blockage;
343 /* Number of instructions referring to this insn. */
346 /* The minimum clock tick at which the insn becomes ready. This is
347 used to note timing constraints for the insns in the pending list. */
352 /* An encoding of the function units used. */
355 /* This weight is an estimation of the insn's contribution to
356 register pressure. */
359 /* Some insns (e.g. call) are not allowed to move across blocks. */
360 unsigned int cant_move : 1;
362 /* Set if there's DEF-USE dependance between some speculatively
363 moved load insn and this one. */
364 unsigned int fed_by_spec_load : 1;
365 unsigned int is_load_insn : 1;
368 static struct haifa_insn_data *h_i_d;
370 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
371 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
372 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
373 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
374 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
375 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
376 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
378 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
380 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
381 #define ENCODE_BLOCKAGE(U, R) \
382 (((U) << BLOCKAGE_BITS \
383 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
384 | MAX_BLOCKAGE_COST (R))
385 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
386 #define BLOCKAGE_RANGE(B) \
387 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
388 | ((B) & BLOCKAGE_MASK))
390 /* Encodings of the `<name>_unit_blockage_range' function. */
391 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
392 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
394 #define DONE_PRIORITY -1
395 #define MAX_PRIORITY 0x7fffffff
396 #define TAIL_PRIORITY 0x7ffffffe
397 #define LAUNCH_PRIORITY 0x7f000001
398 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
399 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
401 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
402 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
403 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
404 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
405 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
406 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
408 /* Vector indexed by basic block number giving the starting line-number
409 for each basic block. */
410 static rtx *line_note_head;
412 /* List of important notes we must keep around. This is a pointer to the
413 last element in the list. */
414 static rtx note_list;
418 /* An instruction is ready to be scheduled when all insns preceding it
419 have already been scheduled. It is important to ensure that all
420 insns which use its result will not be executed until its result
421 has been computed. An insn is maintained in one of four structures:
423 (P) the "Pending" set of insns which cannot be scheduled until
424 their dependencies have been satisfied.
425 (Q) the "Queued" set of insns that can be scheduled when sufficient
427 (R) the "Ready" list of unscheduled, uncommitted insns.
428 (S) the "Scheduled" list of insns.
430 Initially, all insns are either "Pending" or "Ready" depending on
431 whether their dependencies are satisfied.
433 Insns move from the "Ready" list to the "Scheduled" list as they
434 are committed to the schedule. As this occurs, the insns in the
435 "Pending" list have their dependencies satisfied and move to either
436 the "Ready" list or the "Queued" set depending on whether
437 sufficient time has passed to make them ready. As time passes,
438 insns move from the "Queued" set to the "Ready" list. Insns may
439 move from the "Ready" list to the "Queued" set if they are blocked
440 due to a function unit conflict.
442 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
443 insns, i.e., those that are ready, queued, and pending.
444 The "Queued" set (Q) is implemented by the variable `insn_queue'.
445 The "Ready" list (R) is implemented by the variables `ready' and
447 The "Scheduled" list (S) is the new insn chain built by this pass.
449 The transition (R->S) is implemented in the scheduling loop in
450 `schedule_block' when the best insn to schedule is chosen.
451 The transition (R->Q) is implemented in `queue_insn' when an
452 insn is found to have a function unit conflict with the already
454 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
455 insns move from the ready list to the scheduled list.
456 The transition (Q->R) is implemented in 'queue_to_insn' as time
457 passes or stalls are introduced. */
459 /* Implement a circular buffer to delay instructions until sufficient
460 time has passed. INSN_QUEUE_SIZE is a power of two larger than
461 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
462 longest time an isnsn may be queued. */
463 static rtx insn_queue[INSN_QUEUE_SIZE];
464 static int q_ptr = 0;
465 static int q_size = 0;
466 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
467 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
469 /* Forward declarations. */
470 static void add_dependence PARAMS ((rtx, rtx, enum reg_note));
471 static void remove_dependence PARAMS ((rtx, rtx));
472 static rtx find_insn_list PARAMS ((rtx, rtx));
473 static void set_sched_group_p PARAMS ((rtx));
474 static int insn_unit PARAMS ((rtx));
475 static unsigned int blockage_range PARAMS ((int, rtx));
476 static void clear_units PARAMS ((void));
477 static int actual_hazard_this_instance PARAMS ((int, int, rtx, int, int));
478 static void schedule_unit PARAMS ((int, rtx, int));
479 static int actual_hazard PARAMS ((int, rtx, int, int));
480 static int potential_hazard PARAMS ((int, rtx, int));
481 static int insn_cost PARAMS ((rtx, rtx, rtx));
482 static int priority PARAMS ((rtx));
483 static void free_pending_lists PARAMS ((void));
484 static void add_insn_mem_dependence PARAMS ((struct deps *, rtx *, rtx *, rtx,
486 static void flush_pending_lists PARAMS ((struct deps *, rtx, int));
487 static void sched_analyze_1 PARAMS ((struct deps *, rtx, rtx));
488 static void sched_analyze_2 PARAMS ((struct deps *, rtx, rtx));
489 static void sched_analyze_insn PARAMS ((struct deps *, rtx, rtx, rtx));
490 static void sched_analyze PARAMS ((struct deps *, rtx, rtx));
491 static int rank_for_schedule PARAMS ((const PTR, const PTR));
492 static void swap_sort PARAMS ((rtx *, int));
493 static void queue_insn PARAMS ((rtx, int));
494 static int schedule_insn PARAMS ((rtx, rtx *, int, int));
495 static void find_insn_reg_weight PARAMS ((int));
496 static int schedule_block PARAMS ((int, int));
497 static char *safe_concat PARAMS ((char *, char *, const char *));
498 static int insn_issue_delay PARAMS ((rtx));
499 static void adjust_priority PARAMS ((rtx));
501 /* Control flow graph edges are kept in circular lists. */
510 static haifa_edge *edge_table;
512 #define NEXT_IN(edge) (edge_table[edge].next_in)
513 #define NEXT_OUT(edge) (edge_table[edge].next_out)
514 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
515 #define TO_BLOCK(edge) (edge_table[edge].to_block)
517 /* Number of edges in the control flow graph. (In fact, larger than
518 that by 1, since edge 0 is unused.) */
521 /* Circular list of incoming/outgoing edges of a block. */
522 static int *in_edges;
523 static int *out_edges;
525 #define IN_EDGES(block) (in_edges[block])
526 #define OUT_EDGES(block) (out_edges[block])
528 static int is_cfg_nonregular PARAMS ((void));
529 static int build_control_flow PARAMS ((struct edge_list *));
530 static void new_edge PARAMS ((int, int));
532 /* A region is the main entity for interblock scheduling: insns
533 are allowed to move between blocks in the same region, along
534 control flow graph edges, in the 'up' direction. */
537 int rgn_nr_blocks; /* Number of blocks in region. */
538 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
542 /* Number of regions in the procedure. */
543 static int nr_regions;
545 /* Table of region descriptions. */
546 static region *rgn_table;
548 /* Array of lists of regions' blocks. */
549 static int *rgn_bb_table;
551 /* Topological order of blocks in the region (if b2 is reachable from
552 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
553 always referred to by either block or b, while its topological
554 order name (in the region) is refered to by bb. */
555 static int *block_to_bb;
557 /* The number of the region containing a block. */
558 static int *containing_rgn;
560 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
561 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
562 #define BLOCK_TO_BB(block) (block_to_bb[block])
563 #define CONTAINING_RGN(block) (containing_rgn[block])
565 void debug_regions PARAMS ((void));
566 static void find_single_block_region PARAMS ((void));
567 static void find_rgns PARAMS ((struct edge_list *, sbitmap *));
568 static int too_large PARAMS ((int, int *, int *));
570 extern void debug_live PARAMS ((int, int));
572 /* Blocks of the current region being scheduled. */
573 static int current_nr_blocks;
574 static int current_blocks;
576 /* The mapping from bb to block. */
577 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
579 /* Bit vectors and bitset operations are needed for computations on
580 the control flow graph. */
582 typedef unsigned HOST_WIDE_INT *bitset;
585 int *first_member; /* Pointer to the list start in bitlst_table. */
586 int nr_members; /* The number of members of the bit list. */
590 static int bitlst_table_last;
591 static int bitlst_table_size;
592 static int *bitlst_table;
594 static char bitset_member PARAMS ((bitset, int, int));
595 static void extract_bitlst PARAMS ((bitset, int, int, bitlst *));
597 /* Target info declarations.
599 The block currently being scheduled is referred to as the "target" block,
600 while other blocks in the region from which insns can be moved to the
601 target are called "source" blocks. The candidate structure holds info
602 about such sources: are they valid? Speculative? Etc. */
603 typedef bitlst bblst;
614 static candidate *candidate_table;
616 /* A speculative motion requires checking live information on the path
617 from 'source' to 'target'. The split blocks are those to be checked.
618 After a speculative motion, live information should be modified in
621 Lists of split and update blocks for each candidate of the current
622 target are in array bblst_table. */
623 static int *bblst_table, bblst_size, bblst_last;
625 #define IS_VALID(src) ( candidate_table[src].is_valid )
626 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
627 #define SRC_PROB(src) ( candidate_table[src].src_prob )
629 /* The bb being currently scheduled. */
630 static int target_bb;
633 typedef bitlst edgelst;
635 /* Target info functions. */
636 static void split_edges PARAMS ((int, int, edgelst *));
637 static void compute_trg_info PARAMS ((int));
638 void debug_candidate PARAMS ((int));
639 void debug_candidates PARAMS ((int));
641 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
642 typedef bitset bbset;
644 /* Number of words of the bbset. */
645 static int bbset_size;
647 /* Dominators array: dom[i] contains the bbset of dominators of
648 bb i in the region. */
651 /* bb 0 is the only region entry. */
652 #define IS_RGN_ENTRY(bb) (!bb)
654 /* Is bb_src dominated by bb_trg. */
655 #define IS_DOMINATED(bb_src, bb_trg) \
656 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
658 /* Probability: Prob[i] is a float in [0, 1] which is the probability
659 of bb i relative to the region entry. */
662 /* The probability of bb_src, relative to bb_trg. Note, that while the
663 'prob[bb]' is a float in [0, 1], this macro returns an integer
665 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
668 /* Bit-set of edges, where bit i stands for edge i. */
669 typedef bitset edgeset;
671 /* Number of edges in the region. */
672 static int rgn_nr_edges;
674 /* Array of size rgn_nr_edges. */
675 static int *rgn_edges;
677 /* Number of words in an edgeset. */
678 static int edgeset_size;
680 /* Number of bits in an edgeset. */
681 static int edgeset_bitsize;
683 /* Mapping from each edge in the graph to its number in the rgn. */
684 static int *edge_to_bit;
685 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
687 /* The split edges of a source bb is different for each target
688 bb. In order to compute this efficiently, the 'potential-split edges'
689 are computed for each bb prior to scheduling a region. This is actually
690 the split edges of each bb relative to the region entry.
692 pot_split[bb] is the set of potential split edges of bb. */
693 static edgeset *pot_split;
695 /* For every bb, a set of its ancestor edges. */
696 static edgeset *ancestor_edges;
698 static void compute_dom_prob_ps PARAMS ((int));
700 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
701 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
702 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
703 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
705 /* Parameters affecting the decision of rank_for_schedule(). */
706 #define MIN_DIFF_PRIORITY 2
707 #define MIN_PROBABILITY 40
708 #define MIN_PROB_DIFF 10
710 /* Speculative scheduling functions. */
711 static int check_live_1 PARAMS ((int, rtx));
712 static void update_live_1 PARAMS ((int, rtx));
713 static int check_live PARAMS ((rtx, int));
714 static void update_live PARAMS ((rtx, int));
715 static void set_spec_fed PARAMS ((rtx));
716 static int is_pfree PARAMS ((rtx, int, int));
717 static int find_conditional_protection PARAMS ((rtx, int));
718 static int is_conditionally_protected PARAMS ((rtx, int, int));
719 static int may_trap_exp PARAMS ((rtx, int));
720 static int haifa_classify_insn PARAMS ((rtx));
721 static int is_prisky PARAMS ((rtx, int, int));
722 static int is_exception_free PARAMS ((rtx, int, int));
724 static char find_insn_mem_list PARAMS ((rtx, rtx, rtx, rtx));
725 static void compute_block_forward_dependences PARAMS ((int));
726 static void add_branch_dependences PARAMS ((rtx, rtx));
727 static void compute_block_backward_dependences PARAMS ((int));
728 void debug_dependencies PARAMS ((void));
730 /* Notes handling mechanism:
731 =========================
732 Generally, NOTES are saved before scheduling and restored after scheduling.
733 The scheduler distinguishes between three types of notes:
735 (1) LINE_NUMBER notes, generated and used for debugging. Here,
736 before scheduling a region, a pointer to the LINE_NUMBER note is
737 added to the insn following it (in save_line_notes()), and the note
738 is removed (in rm_line_notes() and unlink_line_notes()). After
739 scheduling the region, this pointer is used for regeneration of
740 the LINE_NUMBER note (in restore_line_notes()).
742 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
743 Before scheduling a region, a pointer to the note is added to the insn
744 that follows or precedes it. (This happens as part of the data dependence
745 computation). After scheduling an insn, the pointer contained in it is
746 used for regenerating the corresponding note (in reemit_notes).
748 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
749 these notes are put in a list (in rm_other_notes() and
750 unlink_other_notes ()). After scheduling the block, these notes are
751 inserted at the beginning of the block (in schedule_block()). */
753 static rtx unlink_other_notes PARAMS ((rtx, rtx));
754 static rtx unlink_line_notes PARAMS ((rtx, rtx));
755 static void rm_line_notes PARAMS ((int));
756 static void save_line_notes PARAMS ((int));
757 static void restore_line_notes PARAMS ((int));
758 static void rm_redundant_line_notes PARAMS ((void));
759 static void rm_other_notes PARAMS ((rtx, rtx));
760 static rtx reemit_notes PARAMS ((rtx, rtx));
762 static void get_block_head_tail PARAMS ((int, rtx *, rtx *));
763 static void get_bb_head_tail PARAMS ((int, rtx *, rtx *));
765 static int queue_to_ready PARAMS ((rtx[], int));
767 static void debug_ready_list PARAMS ((rtx[], int));
768 static void init_target_units PARAMS ((void));
769 static void insn_print_units PARAMS ((rtx));
770 static int get_visual_tbl_length PARAMS ((void));
771 static void init_block_visualization PARAMS ((void));
772 static void print_block_visualization PARAMS ((int, const char *));
773 static void visualize_scheduled_insns PARAMS ((int, int));
774 static void visualize_no_unit PARAMS ((rtx));
775 static void visualize_stall_cycles PARAMS ((int, int));
776 static void print_exp PARAMS ((char *, rtx, int));
777 static void print_value PARAMS ((char *, rtx, int));
778 static void print_pattern PARAMS ((char *, rtx, int));
779 static void print_insn PARAMS ((char *, rtx, int));
780 void debug_reg_vector PARAMS ((regset));
782 static rtx move_insn1 PARAMS ((rtx, rtx));
783 static rtx move_insn PARAMS ((rtx, rtx));
784 static rtx group_leader PARAMS ((rtx));
785 static int set_priorities PARAMS ((int));
786 static void init_deps PARAMS ((struct deps *));
787 static void schedule_region PARAMS ((int));
788 static void propagate_deps PARAMS ((int, struct deps *, int));
790 #endif /* INSN_SCHEDULING */
792 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
794 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
795 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
796 of dependence that this link represents. */
799 add_dependence (insn, elem, dep_type)
802 enum reg_note dep_type;
806 /* Don't depend an insn on itself. */
810 /* We can get a dependency on deleted insns due to optimizations in
811 the register allocation and reloading or due to splitting. Any
812 such dependency is useless and can be ignored. */
813 if (GET_CODE (elem) == NOTE)
816 /* If elem is part of a sequence that must be scheduled together, then
817 make the dependence point to the last insn of the sequence.
818 When HAVE_cc0, it is possible for NOTEs to exist between users and
819 setters of the condition codes, so we must skip past notes here.
820 Otherwise, NOTEs are impossible here. */
821 next = next_nonnote_insn (elem);
822 if (next && SCHED_GROUP_P (next)
823 && GET_CODE (next) != CODE_LABEL)
825 /* Notes will never intervene here though, so don't bother checking
828 /* We must reject CODE_LABELs, so that we don't get confused by one
829 that has LABEL_PRESERVE_P set, which is represented by the same
830 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
834 while ((nnext = next_nonnote_insn (next)) != NULL
835 && SCHED_GROUP_P (nnext)
836 && GET_CODE (nnext) != CODE_LABEL)
839 /* Again, don't depend an insn on itself. */
843 /* Make the dependence to NEXT, the last insn of the group, instead
844 of the original ELEM. */
848 #ifdef INSN_SCHEDULING
849 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
850 No need for interblock dependences with calls, since
851 calls are not moved between blocks. Note: the edge where
852 elem is a CALL is still required. */
853 if (GET_CODE (insn) == CALL_INSN
854 && (INSN_BB (elem) != INSN_BB (insn)))
857 /* If we already have a true dependency for ELEM, then we do not
858 need to do anything. Avoiding the list walk below can cut
859 compile times dramatically for some code. */
860 if (true_dependency_cache
861 && TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
865 /* Check that we don't already have this dependence. */
866 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
867 if (XEXP (link, 0) == elem)
869 /* If this is a more restrictive type of dependence than the existing
870 one, then change the existing dependence to this type. */
871 if ((int) dep_type < (int) REG_NOTE_KIND (link))
872 PUT_REG_NOTE_KIND (link, dep_type);
874 #ifdef INSN_SCHEDULING
875 /* If we are adding a true dependency to INSN's LOG_LINKs, then
876 note that in the bitmap cache of true dependency information. */
877 if ((int) dep_type == 0 && true_dependency_cache)
878 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
882 /* Might want to check one level of transitivity to save conses. */
884 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
885 LOG_LINKS (insn) = link;
887 /* Insn dependency, not data dependency. */
888 PUT_REG_NOTE_KIND (link, dep_type);
890 #ifdef INSN_SCHEDULING
891 /* If we are adding a true dependency to INSN's LOG_LINKs, then
892 note that in the bitmap cache of true dependency information. */
893 if ((int) dep_type == 0 && true_dependency_cache)
894 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
898 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
899 of INSN. Abort if not found. */
902 remove_dependence (insn, elem)
906 rtx prev, link, next;
909 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
911 next = XEXP (link, 1);
912 if (XEXP (link, 0) == elem)
915 XEXP (prev, 1) = next;
917 LOG_LINKS (insn) = next;
919 #ifdef INSN_SCHEDULING
920 /* If we are removing a true dependency from the LOG_LINKS list,
921 make sure to remove it from the cache too. */
922 if (REG_NOTE_KIND (link) == 0 && true_dependency_cache)
923 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
927 free_INSN_LIST_node (link);
940 /* Return the INSN_LIST containing INSN in LIST, or NULL
941 if LIST does not contain INSN. */
944 find_insn_list (insn, list)
950 if (XEXP (list, 0) == insn)
952 list = XEXP (list, 1);
957 /* Set SCHED_GROUP_P and care for the rest of the bookkeeping that
958 goes along with that. */
961 set_sched_group_p (insn)
966 SCHED_GROUP_P (insn) = 1;
968 /* There may be a note before this insn now, but all notes will
969 be removed before we actually try to schedule the insns, so
970 it won't cause a problem later. We must avoid it here though. */
971 prev = prev_nonnote_insn (insn);
973 /* Make a copy of all dependencies on the immediately previous insn,
974 and add to this insn. This is so that all the dependencies will
975 apply to the group. Remove an explicit dependence on this insn
976 as SCHED_GROUP_P now represents it. */
978 if (find_insn_list (prev, LOG_LINKS (insn)))
979 remove_dependence (insn, prev);
981 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
982 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
985 #ifndef INSN_SCHEDULING
987 schedule_insns (dump_file)
988 FILE *dump_file ATTRIBUTE_UNUSED;
997 #define HAIFA_INLINE __inline
1000 /* Computation of memory dependencies. */
1002 /* Data structures for the computation of data dependences in a regions. We
1003 keep one mem_deps structure for every basic block. Before analyzing the
1004 data dependences for a bb, its variables are initialized as a function of
1005 the variables of its predecessors. When the analysis for a bb completes,
1006 we save the contents to the corresponding bb_mem_deps[bb] variable. */
1008 static struct deps *bb_deps;
1010 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
1011 so that insns independent of the last scheduled insn will be preferred
1012 over dependent instructions. */
1014 static rtx last_scheduled_insn;
1016 /* Functions for construction of the control flow graph. */
1018 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
1020 We decide not to build the control flow graph if there is possibly more
1021 than one entry to the function, if computed branches exist, of if we
1022 have nonlocal gotos. */
1025 is_cfg_nonregular ()
1031 /* If we have a label that could be the target of a nonlocal goto, then
1032 the cfg is not well structured. */
1033 if (nonlocal_goto_handler_labels)
1036 /* If we have any forced labels, then the cfg is not well structured. */
1040 /* If this function has a computed jump, then we consider the cfg
1041 not well structured. */
1042 if (current_function_has_computed_jump)
1045 /* If we have exception handlers, then we consider the cfg not well
1046 structured. ?!? We should be able to handle this now that flow.c
1047 computes an accurate cfg for EH. */
1048 if (exception_handler_labels)
1051 /* If we have non-jumping insns which refer to labels, then we consider
1052 the cfg not well structured. */
1053 /* Check for labels referred to other thn by jumps. */
1054 for (b = 0; b < n_basic_blocks; b++)
1055 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1057 code = GET_CODE (insn);
1058 if (GET_RTX_CLASS (code) == 'i')
1062 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1063 if (REG_NOTE_KIND (note) == REG_LABEL)
1067 if (insn == BLOCK_END (b))
1071 /* All the tests passed. Consider the cfg well structured. */
1075 /* Build the control flow graph and set nr_edges.
1077 Instead of trying to build a cfg ourselves, we rely on flow to
1078 do it for us. Stamp out useless code (and bug) duplication.
1080 Return nonzero if an irregularity in the cfg is found which would
1081 prevent cross block scheduling. */
1084 build_control_flow (edge_list)
1085 struct edge_list *edge_list;
1087 int i, unreachable, num_edges;
1089 /* This already accounts for entry/exit edges. */
1090 num_edges = NUM_EDGES (edge_list);
1092 /* Unreachable loops with more than one basic block are detected
1093 during the DFS traversal in find_rgns.
1095 Unreachable loops with a single block are detected here. This
1096 test is redundant with the one in find_rgns, but it's much
1097 cheaper to go ahead and catch the trivial case here. */
1099 for (i = 0; i < n_basic_blocks; i++)
1101 basic_block b = BASIC_BLOCK (i);
1104 || (b->pred->src == b
1105 && b->pred->pred_next == NULL))
1109 /* ??? We can kill these soon. */
1110 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1111 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1112 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
1115 for (i = 0; i < num_edges; i++)
1117 edge e = INDEX_EDGE (edge_list, i);
1119 if (e->dest != EXIT_BLOCK_PTR
1120 && e->src != ENTRY_BLOCK_PTR)
1121 new_edge (e->src->index, e->dest->index);
1124 /* Increment by 1, since edge 0 is unused. */
1130 /* Record an edge in the control flow graph from SOURCE to TARGET.
1132 In theory, this is redundant with the s_succs computed above, but
1133 we have not converted all of haifa to use information from the
1137 new_edge (source, target)
1141 int curr_edge, fst_edge;
1143 /* Check for duplicates. */
1144 fst_edge = curr_edge = OUT_EDGES (source);
1147 if (FROM_BLOCK (curr_edge) == source
1148 && TO_BLOCK (curr_edge) == target)
1153 curr_edge = NEXT_OUT (curr_edge);
1155 if (fst_edge == curr_edge)
1161 FROM_BLOCK (e) = source;
1162 TO_BLOCK (e) = target;
1164 if (OUT_EDGES (source))
1166 next_edge = NEXT_OUT (OUT_EDGES (source));
1167 NEXT_OUT (OUT_EDGES (source)) = e;
1168 NEXT_OUT (e) = next_edge;
1172 OUT_EDGES (source) = e;
1176 if (IN_EDGES (target))
1178 next_edge = NEXT_IN (IN_EDGES (target));
1179 NEXT_IN (IN_EDGES (target)) = e;
1180 NEXT_IN (e) = next_edge;
1184 IN_EDGES (target) = e;
1189 /* BITSET macros for operations on the control flow graph. */
1191 /* Compute bitwise union of two bitsets. */
1192 #define BITSET_UNION(set1, set2, len) \
1193 do { register bitset tp = set1, sp = set2; \
1195 for (i = 0; i < len; i++) \
1196 *(tp++) |= *(sp++); } while (0)
1198 /* Compute bitwise intersection of two bitsets. */
1199 #define BITSET_INTER(set1, set2, len) \
1200 do { register bitset tp = set1, sp = set2; \
1202 for (i = 0; i < len; i++) \
1203 *(tp++) &= *(sp++); } while (0)
1205 /* Compute bitwise difference of two bitsets. */
1206 #define BITSET_DIFFER(set1, set2, len) \
1207 do { register bitset tp = set1, sp = set2; \
1209 for (i = 0; i < len; i++) \
1210 *(tp++) &= ~*(sp++); } while (0)
1212 /* Inverts every bit of bitset 'set'. */
1213 #define BITSET_INVERT(set, len) \
1214 do { register bitset tmpset = set; \
1216 for (i = 0; i < len; i++, tmpset++) \
1217 *tmpset = ~*tmpset; } while (0)
1219 /* Turn on the index'th bit in bitset set. */
1220 #define BITSET_ADD(set, index, len) \
1222 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1225 set[index/HOST_BITS_PER_WIDE_INT] |= \
1226 1 << (index % HOST_BITS_PER_WIDE_INT); \
1229 /* Turn off the index'th bit in set. */
1230 #define BITSET_REMOVE(set, index, len) \
1232 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1235 set[index/HOST_BITS_PER_WIDE_INT] &= \
1236 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1239 /* Check if the index'th bit in bitset set is on. */
1242 bitset_member (set, index, len)
1246 if (index >= HOST_BITS_PER_WIDE_INT * len)
1248 return (set[index / HOST_BITS_PER_WIDE_INT] &
1249 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1252 /* Translate a bit-set SET to a list BL of the bit-set members. */
1255 extract_bitlst (set, len, bitlen, bl)
1262 unsigned HOST_WIDE_INT word;
1264 /* bblst table space is reused in each call to extract_bitlst. */
1265 bitlst_table_last = 0;
1267 bl->first_member = &bitlst_table[bitlst_table_last];
1270 /* Iterate over each word in the bitset. */
1271 for (i = 0; i < len; i++)
1274 offset = i * HOST_BITS_PER_WIDE_INT;
1276 /* Iterate over each bit in the word, but do not
1277 go beyond the end of the defined bits. */
1278 for (j = 0; offset < bitlen && word; j++)
1282 bitlst_table[bitlst_table_last++] = offset;
1292 /* Functions for the construction of regions. */
1294 /* Print the regions, for debugging purposes. Callable from debugger. */
1301 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1302 for (rgn = 0; rgn < nr_regions; rgn++)
1304 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1305 rgn_table[rgn].rgn_nr_blocks);
1306 fprintf (dump, ";;\tbb/block: ");
1308 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1310 current_blocks = RGN_BLOCKS (rgn);
1312 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1315 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1318 fprintf (dump, "\n\n");
1322 /* Build a single block region for each basic block in the function.
1323 This allows for using the same code for interblock and basic block
1327 find_single_block_region ()
1331 for (i = 0; i < n_basic_blocks; i++)
1333 rgn_bb_table[i] = i;
1334 RGN_NR_BLOCKS (i) = 1;
1336 CONTAINING_RGN (i) = i;
1337 BLOCK_TO_BB (i) = 0;
1339 nr_regions = n_basic_blocks;
1342 /* Update number of blocks and the estimate for number of insns
1343 in the region. Return 1 if the region is "too large" for interblock
1344 scheduling (compile time considerations), otherwise return 0. */
1347 too_large (block, num_bbs, num_insns)
1348 int block, *num_bbs, *num_insns;
1351 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1352 INSN_LUID (BLOCK_HEAD (block)));
1353 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1359 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1360 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1361 loop containing blk. */
1362 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1364 if (max_hdr[blk] == -1) \
1365 max_hdr[blk] = hdr; \
1366 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1367 RESET_BIT (inner, hdr); \
1368 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1370 RESET_BIT (inner,max_hdr[blk]); \
1371 max_hdr[blk] = hdr; \
1375 /* Find regions for interblock scheduling.
1377 A region for scheduling can be:
1379 * A loop-free procedure, or
1381 * A reducible inner loop, or
1383 * A basic block not contained in any other region.
1385 ?!? In theory we could build other regions based on extended basic
1386 blocks or reverse extended basic blocks. Is it worth the trouble?
1388 Loop blocks that form a region are put into the region's block list
1389 in topological order.
1391 This procedure stores its results into the following global (ick) variables
1399 We use dominator relationships to avoid making regions out of non-reducible
1402 This procedure needs to be converted to work on pred/succ lists instead
1403 of edge tables. That would simplify it somewhat. */
1406 find_rgns (edge_list, dom)
1407 struct edge_list *edge_list;
1410 int *max_hdr, *dfs_nr, *stack, *degree;
1412 int node, child, loop_head, i, head, tail;
1413 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1414 int num_bbs, num_insns, unreachable;
1415 int too_large_failure;
1417 /* Note if an edge has been passed. */
1420 /* Note if a block is a natural loop header. */
1423 /* Note if a block is an natural inner loop header. */
1426 /* Note if a block is in the block queue. */
1429 /* Note if a block is in the block queue. */
1432 int num_edges = NUM_EDGES (edge_list);
1434 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1435 and a mapping from block to its loop header (if the block is contained
1436 in a loop, else -1).
1438 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1439 be used as inputs to the second traversal.
1441 STACK, SP and DFS_NR are only used during the first traversal. */
1443 /* Allocate and initialize variables for the first traversal. */
1444 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
1445 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
1446 stack = (int *) xmalloc (nr_edges * sizeof (int));
1448 inner = sbitmap_alloc (n_basic_blocks);
1449 sbitmap_ones (inner);
1451 header = sbitmap_alloc (n_basic_blocks);
1452 sbitmap_zero (header);
1454 passed = sbitmap_alloc (nr_edges);
1455 sbitmap_zero (passed);
1457 in_queue = sbitmap_alloc (n_basic_blocks);
1458 sbitmap_zero (in_queue);
1460 in_stack = sbitmap_alloc (n_basic_blocks);
1461 sbitmap_zero (in_stack);
1463 for (i = 0; i < n_basic_blocks; i++)
1466 /* DFS traversal to find inner loops in the cfg. */
1471 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1473 /* We have reached a leaf node or a node that was already
1474 processed. Pop edges off the stack until we find
1475 an edge that has not yet been processed. */
1477 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1479 /* Pop entry off the stack. */
1480 current_edge = stack[sp--];
1481 node = FROM_BLOCK (current_edge);
1482 child = TO_BLOCK (current_edge);
1483 RESET_BIT (in_stack, child);
1484 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1485 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1486 current_edge = NEXT_OUT (current_edge);
1489 /* See if have finished the DFS tree traversal. */
1490 if (sp < 0 && TEST_BIT (passed, current_edge))
1493 /* Nope, continue the traversal with the popped node. */
1497 /* Process a node. */
1498 node = FROM_BLOCK (current_edge);
1499 child = TO_BLOCK (current_edge);
1500 SET_BIT (in_stack, node);
1501 dfs_nr[node] = ++count;
1503 /* If the successor is in the stack, then we've found a loop.
1504 Mark the loop, if it is not a natural loop, then it will
1505 be rejected during the second traversal. */
1506 if (TEST_BIT (in_stack, child))
1509 SET_BIT (header, child);
1510 UPDATE_LOOP_RELATIONS (node, child);
1511 SET_BIT (passed, current_edge);
1512 current_edge = NEXT_OUT (current_edge);
1516 /* If the child was already visited, then there is no need to visit
1517 it again. Just update the loop relationships and restart
1521 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1522 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1523 SET_BIT (passed, current_edge);
1524 current_edge = NEXT_OUT (current_edge);
1528 /* Push an entry on the stack and continue DFS traversal. */
1529 stack[++sp] = current_edge;
1530 SET_BIT (passed, current_edge);
1531 current_edge = OUT_EDGES (child);
1533 /* This is temporary until haifa is converted to use rth's new
1534 cfg routines which have true entry/exit blocks and the
1535 appropriate edges from/to those blocks.
1537 Generally we update dfs_nr for a node when we process its
1538 out edge. However, if the node has no out edge then we will
1539 not set dfs_nr for that node. This can confuse the scheduler
1540 into thinking that we have unreachable blocks, which in turn
1541 disables cross block scheduling.
1543 So, if we have a node with no out edges, go ahead and mark it
1544 as reachable now. */
1545 if (current_edge == 0)
1546 dfs_nr[child] = ++count;
1549 /* Another check for unreachable blocks. The earlier test in
1550 is_cfg_nonregular only finds unreachable blocks that do not
1553 The DFS traversal will mark every block that is reachable from
1554 the entry node by placing a nonzero value in dfs_nr. Thus if
1555 dfs_nr is zero for any block, then it must be unreachable. */
1557 for (i = 0; i < n_basic_blocks; i++)
1564 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1565 to hold degree counts. */
1568 for (i = 0; i < n_basic_blocks; i++)
1570 for (i = 0; i < num_edges; i++)
1572 edge e = INDEX_EDGE (edge_list, i);
1574 if (e->dest != EXIT_BLOCK_PTR)
1575 degree[e->dest->index]++;
1578 /* Do not perform region scheduling if there are any unreachable
1585 SET_BIT (header, 0);
1587 /* Second travsersal:find reducible inner loops and topologically sort
1588 block of each region. */
1590 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
1592 /* Find blocks which are inner loop headers. We still have non-reducible
1593 loops to consider at this point. */
1594 for (i = 0; i < n_basic_blocks; i++)
1596 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1601 /* Now check that the loop is reducible. We do this separate
1602 from finding inner loops so that we do not find a reducible
1603 loop which contains an inner non-reducible loop.
1605 A simple way to find reducible/natural loops is to verify
1606 that each block in the loop is dominated by the loop
1609 If there exists a block that is not dominated by the loop
1610 header, then the block is reachable from outside the loop
1611 and thus the loop is not a natural loop. */
1612 for (j = 0; j < n_basic_blocks; j++)
1614 /* First identify blocks in the loop, except for the loop
1616 if (i == max_hdr[j] && i != j)
1618 /* Now verify that the block is dominated by the loop
1620 if (!TEST_BIT (dom[j], i))
1625 /* If we exited the loop early, then I is the header of
1626 a non-reducible loop and we should quit processing it
1628 if (j != n_basic_blocks)
1631 /* I is a header of an inner loop, or block 0 in a subroutine
1632 with no loops at all. */
1634 too_large_failure = 0;
1635 loop_head = max_hdr[i];
1637 /* Decrease degree of all I's successors for topological
1639 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1640 if (e->dest != EXIT_BLOCK_PTR)
1641 --degree[e->dest->index];
1643 /* Estimate # insns, and count # blocks in the region. */
1645 num_insns = (INSN_LUID (BLOCK_END (i))
1646 - INSN_LUID (BLOCK_HEAD (i)));
1648 /* Find all loop latches (blocks with back edges to the loop
1649 header) or all the leaf blocks in the cfg has no loops.
1651 Place those blocks into the queue. */
1654 for (j = 0; j < n_basic_blocks; j++)
1655 /* Leaf nodes have only a single successor which must
1657 if (BASIC_BLOCK (j)->succ
1658 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
1659 && BASIC_BLOCK (j)->succ->succ_next == NULL)
1662 SET_BIT (in_queue, j);
1664 if (too_large (j, &num_bbs, &num_insns))
1666 too_large_failure = 1;
1675 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
1677 if (e->src == ENTRY_BLOCK_PTR)
1680 node = e->src->index;
1682 if (max_hdr[node] == loop_head && node != i)
1684 /* This is a loop latch. */
1685 queue[++tail] = node;
1686 SET_BIT (in_queue, node);
1688 if (too_large (node, &num_bbs, &num_insns))
1690 too_large_failure = 1;
1697 /* Now add all the blocks in the loop to the queue.
1699 We know the loop is a natural loop; however the algorithm
1700 above will not always mark certain blocks as being in the
1708 The algorithm in the DFS traversal may not mark B & D as part
1709 of the loop (ie they will not have max_hdr set to A).
1711 We know they can not be loop latches (else they would have
1712 had max_hdr set since they'd have a backedge to a dominator
1713 block). So we don't need them on the initial queue.
1715 We know they are part of the loop because they are dominated
1716 by the loop header and can be reached by a backwards walk of
1717 the edges starting with nodes on the initial queue.
1719 It is safe and desirable to include those nodes in the
1720 loop/scheduling region. To do so we would need to decrease
1721 the degree of a node if it is the target of a backedge
1722 within the loop itself as the node is placed in the queue.
1724 We do not do this because I'm not sure that the actual
1725 scheduling code will properly handle this case. ?!? */
1727 while (head < tail && !too_large_failure)
1730 child = queue[++head];
1732 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1734 node = e->src->index;
1736 /* See discussion above about nodes not marked as in
1737 this loop during the initial DFS traversal. */
1738 if (e->src == ENTRY_BLOCK_PTR
1739 || max_hdr[node] != loop_head)
1744 else if (!TEST_BIT (in_queue, node) && node != i)
1746 queue[++tail] = node;
1747 SET_BIT (in_queue, node);
1749 if (too_large (node, &num_bbs, &num_insns))
1751 too_large_failure = 1;
1758 if (tail >= 0 && !too_large_failure)
1760 /* Place the loop header into list of region blocks. */
1762 rgn_bb_table[idx] = i;
1763 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1764 RGN_BLOCKS (nr_regions) = idx++;
1765 CONTAINING_RGN (i) = nr_regions;
1766 BLOCK_TO_BB (i) = count = 0;
1768 /* Remove blocks from queue[] when their in degree
1769 becomes zero. Repeat until no blocks are left on the
1770 list. This produces a topological list of blocks in
1776 child = queue[head];
1777 if (degree[child] == 0)
1782 rgn_bb_table[idx++] = child;
1783 BLOCK_TO_BB (child) = ++count;
1784 CONTAINING_RGN (child) = nr_regions;
1785 queue[head] = queue[tail--];
1787 for (e = BASIC_BLOCK (child)->succ;
1790 if (e->dest != EXIT_BLOCK_PTR)
1791 --degree[e->dest->index];
1803 /* Any block that did not end up in a region is placed into a region
1805 for (i = 0; i < n_basic_blocks; i++)
1808 rgn_bb_table[idx] = i;
1809 RGN_NR_BLOCKS (nr_regions) = 1;
1810 RGN_BLOCKS (nr_regions) = idx++;
1811 CONTAINING_RGN (i) = nr_regions++;
1812 BLOCK_TO_BB (i) = 0;
1825 /* Functions for regions scheduling information. */
1827 /* Compute dominators, probability, and potential-split-edges of bb.
1828 Assume that these values were already computed for bb's predecessors. */
1831 compute_dom_prob_ps (bb)
1834 int nxt_in_edge, fst_in_edge, pred;
1835 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1838 if (IS_RGN_ENTRY (bb))
1840 BITSET_ADD (dom[bb], 0, bbset_size);
1845 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1847 /* Intialize dom[bb] to '111..1'. */
1848 BITSET_INVERT (dom[bb], bbset_size);
1852 pred = FROM_BLOCK (nxt_in_edge);
1853 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1855 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1858 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1861 nr_rgn_out_edges = 0;
1862 fst_out_edge = OUT_EDGES (pred);
1863 nxt_out_edge = NEXT_OUT (fst_out_edge);
1864 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1867 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1869 /* The successor doesn't belong in the region? */
1870 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1871 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1874 while (fst_out_edge != nxt_out_edge)
1877 /* The successor doesn't belong in the region? */
1878 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1879 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1881 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1882 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1886 /* Now nr_rgn_out_edges is the number of region-exit edges from
1887 pred, and nr_out_edges will be the number of pred out edges
1888 not leaving the region. */
1889 nr_out_edges -= nr_rgn_out_edges;
1890 if (nr_rgn_out_edges > 0)
1891 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1893 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1894 nxt_in_edge = NEXT_IN (nxt_in_edge);
1896 while (fst_in_edge != nxt_in_edge);
1898 BITSET_ADD (dom[bb], bb, bbset_size);
1899 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1901 if (sched_verbose >= 2)
1902 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1903 (int) (100.0 * prob[bb]));
1906 /* Functions for target info. */
1908 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1909 Note that bb_trg dominates bb_src. */
1912 split_edges (bb_src, bb_trg, bl)
1917 int es = edgeset_size;
1918 edgeset src = (edgeset) xcalloc (es, sizeof (HOST_WIDE_INT));
1921 src[es] = (pot_split[bb_src])[es];
1922 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1923 extract_bitlst (src, edgeset_size, edgeset_bitsize, bl);
1927 /* Find the valid candidate-source-blocks for the target block TRG, compute
1928 their probability, and check if they are speculative or not.
1929 For speculative sources, compute their update-blocks and split-blocks. */
1932 compute_trg_info (trg)
1935 register candidate *sp;
1937 int check_block, update_idx;
1938 int i, j, k, fst_edge, nxt_edge;
1940 /* Define some of the fields for the target bb as well. */
1941 sp = candidate_table + trg;
1943 sp->is_speculative = 0;
1946 for (i = trg + 1; i < current_nr_blocks; i++)
1948 sp = candidate_table + i;
1950 sp->is_valid = IS_DOMINATED (i, trg);
1953 sp->src_prob = GET_SRC_PROB (i, trg);
1954 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1959 split_edges (i, trg, &el);
1960 sp->is_speculative = (el.nr_members) ? 1 : 0;
1961 if (sp->is_speculative && !flag_schedule_speculative)
1967 sp->split_bbs.first_member = &bblst_table[bblst_last];
1968 sp->split_bbs.nr_members = el.nr_members;
1969 for (j = 0; j < el.nr_members; bblst_last++, j++)
1970 bblst_table[bblst_last] =
1971 TO_BLOCK (rgn_edges[el.first_member[j]]);
1972 sp->update_bbs.first_member = &bblst_table[bblst_last];
1974 for (j = 0; j < el.nr_members; j++)
1976 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1977 fst_edge = nxt_edge = OUT_EDGES (check_block);
1980 for (k = 0; k < el.nr_members; k++)
1981 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1984 if (k >= el.nr_members)
1986 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1990 nxt_edge = NEXT_OUT (nxt_edge);
1992 while (fst_edge != nxt_edge);
1994 sp->update_bbs.nr_members = update_idx;
1999 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
2001 sp->is_speculative = 0;
2007 /* Print candidates info, for debugging purposes. Callable from debugger. */
2013 if (!candidate_table[i].is_valid)
2016 if (candidate_table[i].is_speculative)
2019 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
2021 fprintf (dump, "split path: ");
2022 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
2024 int b = candidate_table[i].split_bbs.first_member[j];
2026 fprintf (dump, " %d ", b);
2028 fprintf (dump, "\n");
2030 fprintf (dump, "update path: ");
2031 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
2033 int b = candidate_table[i].update_bbs.first_member[j];
2035 fprintf (dump, " %d ", b);
2037 fprintf (dump, "\n");
2041 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
2045 /* Print candidates info, for debugging purposes. Callable from debugger. */
2048 debug_candidates (trg)
2053 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2054 BB_TO_BLOCK (trg), trg);
2055 for (i = trg + 1; i < current_nr_blocks; i++)
2056 debug_candidate (i);
2059 /* Functions for speculative scheduing. */
2061 /* Return 0 if x is a set of a register alive in the beginning of one
2062 of the split-blocks of src, otherwise return 1. */
2065 check_live_1 (src, x)
2071 register rtx reg = SET_DEST (x);
2076 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2077 || GET_CODE (reg) == SIGN_EXTRACT
2078 || GET_CODE (reg) == STRICT_LOW_PART)
2079 reg = XEXP (reg, 0);
2081 if (GET_CODE (reg) == PARALLEL
2082 && GET_MODE (reg) == BLKmode)
2085 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2086 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2091 if (GET_CODE (reg) != REG)
2094 regno = REGNO (reg);
2096 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2098 /* Global registers are assumed live. */
2103 if (regno < FIRST_PSEUDO_REGISTER)
2105 /* Check for hard registers. */
2106 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2109 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2111 int b = candidate_table[src].split_bbs.first_member[i];
2113 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2123 /* Check for psuedo registers. */
2124 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2126 int b = candidate_table[src].split_bbs.first_member[i];
2128 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2139 /* If x is a set of a register R, mark that R is alive in the beginning
2140 of every update-block of src. */
2143 update_live_1 (src, x)
2149 register rtx reg = SET_DEST (x);
2154 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2155 || GET_CODE (reg) == SIGN_EXTRACT
2156 || GET_CODE (reg) == STRICT_LOW_PART)
2157 reg = XEXP (reg, 0);
2159 if (GET_CODE (reg) == PARALLEL
2160 && GET_MODE (reg) == BLKmode)
2163 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2164 update_live_1 (src, XVECEXP (reg, 0, i));
2168 if (GET_CODE (reg) != REG)
2171 /* Global registers are always live, so the code below does not apply
2174 regno = REGNO (reg);
2176 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2178 if (regno < FIRST_PSEUDO_REGISTER)
2180 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2183 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2185 int b = candidate_table[src].update_bbs.first_member[i];
2187 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2194 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2196 int b = candidate_table[src].update_bbs.first_member[i];
2198 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2204 /* Return 1 if insn can be speculatively moved from block src to trg,
2205 otherwise return 0. Called before first insertion of insn to
2206 ready-list or before the scheduling. */
2209 check_live (insn, src)
2213 /* Find the registers set by instruction. */
2214 if (GET_CODE (PATTERN (insn)) == SET
2215 || GET_CODE (PATTERN (insn)) == CLOBBER)
2216 return check_live_1 (src, PATTERN (insn));
2217 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2220 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2221 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2222 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2223 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2232 /* Update the live registers info after insn was moved speculatively from
2233 block src to trg. */
2236 update_live (insn, src)
2240 /* Find the registers set by instruction. */
2241 if (GET_CODE (PATTERN (insn)) == SET
2242 || GET_CODE (PATTERN (insn)) == CLOBBER)
2243 update_live_1 (src, PATTERN (insn));
2244 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2247 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2248 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2249 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2250 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2254 /* Exception Free Loads:
2256 We define five classes of speculative loads: IFREE, IRISKY,
2257 PFREE, PRISKY, and MFREE.
2259 IFREE loads are loads that are proved to be exception-free, just
2260 by examining the load insn. Examples for such loads are loads
2261 from TOC and loads of global data.
2263 IRISKY loads are loads that are proved to be exception-risky,
2264 just by examining the load insn. Examples for such loads are
2265 volatile loads and loads from shared memory.
2267 PFREE loads are loads for which we can prove, by examining other
2268 insns, that they are exception-free. Currently, this class consists
2269 of loads for which we are able to find a "similar load", either in
2270 the target block, or, if only one split-block exists, in that split
2271 block. Load2 is similar to load1 if both have same single base
2272 register. We identify only part of the similar loads, by finding
2273 an insn upon which both load1 and load2 have a DEF-USE dependence.
2275 PRISKY loads are loads for which we can prove, by examining other
2276 insns, that they are exception-risky. Currently we have two proofs for
2277 such loads. The first proof detects loads that are probably guarded by a
2278 test on the memory address. This proof is based on the
2279 backward and forward data dependence information for the region.
2280 Let load-insn be the examined load.
2281 Load-insn is PRISKY iff ALL the following hold:
2283 - insn1 is not in the same block as load-insn
2284 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2285 - test-insn is either a compare or a branch, not in the same block
2287 - load-insn is reachable from test-insn
2288 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2290 This proof might fail when the compare and the load are fed
2291 by an insn not in the region. To solve this, we will add to this
2292 group all loads that have no input DEF-USE dependence.
2294 The second proof detects loads that are directly or indirectly
2295 fed by a speculative load. This proof is affected by the
2296 scheduling process. We will use the flag fed_by_spec_load.
2297 Initially, all insns have this flag reset. After a speculative
2298 motion of an insn, if insn is either a load, or marked as
2299 fed_by_spec_load, we will also mark as fed_by_spec_load every
2300 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2301 load which is fed_by_spec_load is also PRISKY.
2303 MFREE (maybe-free) loads are all the remaining loads. They may be
2304 exception-free, but we cannot prove it.
2306 Now, all loads in IFREE and PFREE classes are considered
2307 exception-free, while all loads in IRISKY and PRISKY classes are
2308 considered exception-risky. As for loads in the MFREE class,
2309 these are considered either exception-free or exception-risky,
2310 depending on whether we are pessimistic or optimistic. We have
2311 to take the pessimistic approach to assure the safety of
2312 speculative scheduling, but we can take the optimistic approach
2313 by invoking the -fsched_spec_load_dangerous option. */
2315 enum INSN_TRAP_CLASS
2317 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2318 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2321 #define WORST_CLASS(class1, class2) \
2322 ((class1 > class2) ? class1 : class2)
2324 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2325 #define IS_REACHABLE(bb_from, bb_to) \
2327 || IS_RGN_ENTRY (bb_from) \
2328 || (bitset_member (ancestor_edges[bb_to], \
2329 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2332 /* Non-zero iff the address is comprised from at most 1 register. */
2333 #define CONST_BASED_ADDRESS_P(x) \
2334 (GET_CODE (x) == REG \
2335 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2336 || (GET_CODE (x) == LO_SUM)) \
2337 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2338 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2340 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2343 set_spec_fed (load_insn)
2348 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2349 if (GET_MODE (link) == VOIDmode)
2350 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2351 } /* set_spec_fed */
2353 /* On the path from the insn to load_insn_bb, find a conditional
2354 branch depending on insn, that guards the speculative load. */
2357 find_conditional_protection (insn, load_insn_bb)
2363 /* Iterate through DEF-USE forward dependences. */
2364 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2366 rtx next = XEXP (link, 0);
2367 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
2368 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2369 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2370 && load_insn_bb != INSN_BB (next)
2371 && GET_MODE (link) == VOIDmode
2372 && (GET_CODE (next) == JUMP_INSN
2373 || find_conditional_protection (next, load_insn_bb)))
2377 } /* find_conditional_protection */
2379 /* Returns 1 if the same insn1 that participates in the computation
2380 of load_insn's address is feeding a conditional branch that is
2381 guarding on load_insn. This is true if we find a the two DEF-USE
2383 insn1 -> ... -> conditional-branch
2384 insn1 -> ... -> load_insn,
2385 and if a flow path exist:
2386 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2387 and if insn1 is on the path
2388 region-entry -> ... -> bb_trg -> ... load_insn.
2390 Locate insn1 by climbing on LOG_LINKS from load_insn.
2391 Locate the branch by following INSN_DEPEND from insn1. */
2394 is_conditionally_protected (load_insn, bb_src, bb_trg)
2400 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2402 rtx insn1 = XEXP (link, 0);
2404 /* Must be a DEF-USE dependence upon non-branch. */
2405 if (GET_MODE (link) != VOIDmode
2406 || GET_CODE (insn1) == JUMP_INSN)
2409 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2410 if (INSN_BB (insn1) == bb_src
2411 || (CONTAINING_RGN (BLOCK_NUM (insn1))
2412 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2413 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2414 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2417 /* Now search for the conditional-branch. */
2418 if (find_conditional_protection (insn1, bb_src))
2421 /* Recursive step: search another insn1, "above" current insn1. */
2422 return is_conditionally_protected (insn1, bb_src, bb_trg);
2425 /* The chain does not exist. */
2427 } /* is_conditionally_protected */
2429 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2430 load_insn can move speculatively from bb_src to bb_trg. All the
2431 following must hold:
2433 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2434 (2) load_insn and load1 have a def-use dependence upon
2435 the same insn 'insn1'.
2436 (3) either load2 is in bb_trg, or:
2437 - there's only one split-block, and
2438 - load1 is on the escape path, and
2440 From all these we can conclude that the two loads access memory
2441 addresses that differ at most by a constant, and hence if moving
2442 load_insn would cause an exception, it would have been caused by
2446 is_pfree (load_insn, bb_src, bb_trg)
2451 register candidate *candp = candidate_table + bb_src;
2453 if (candp->split_bbs.nr_members != 1)
2454 /* Must have exactly one escape block. */
2457 for (back_link = LOG_LINKS (load_insn);
2458 back_link; back_link = XEXP (back_link, 1))
2460 rtx insn1 = XEXP (back_link, 0);
2462 if (GET_MODE (back_link) == VOIDmode)
2464 /* Found a DEF-USE dependence (insn1, load_insn). */
2467 for (fore_link = INSN_DEPEND (insn1);
2468 fore_link; fore_link = XEXP (fore_link, 1))
2470 rtx insn2 = XEXP (fore_link, 0);
2471 if (GET_MODE (fore_link) == VOIDmode)
2473 /* Found a DEF-USE dependence (insn1, insn2). */
2474 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2475 /* insn2 not guaranteed to be a 1 base reg load. */
2478 if (INSN_BB (insn2) == bb_trg)
2479 /* insn2 is the similar load, in the target block. */
2482 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
2483 /* insn2 is a similar load, in a split-block. */
2490 /* Couldn't find a similar load. */
2494 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2495 as found by analyzing insn's expression. */
2498 may_trap_exp (x, is_store)
2506 code = GET_CODE (x);
2516 /* The insn uses memory: a volatile load. */
2517 if (MEM_VOLATILE_P (x))
2519 /* An exception-free load. */
2520 if (!may_trap_p (x))
2522 /* A load with 1 base register, to be further checked. */
2523 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2524 return PFREE_CANDIDATE;
2525 /* No info on the load, to be further checked. */
2526 return PRISKY_CANDIDATE;
2531 int i, insn_class = TRAP_FREE;
2533 /* Neither store nor load, check if it may cause a trap. */
2536 /* Recursive step: walk the insn... */
2537 fmt = GET_RTX_FORMAT (code);
2538 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2542 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2543 insn_class = WORST_CLASS (insn_class, tmp_class);
2545 else if (fmt[i] == 'E')
2548 for (j = 0; j < XVECLEN (x, i); j++)
2550 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2551 insn_class = WORST_CLASS (insn_class, tmp_class);
2552 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2556 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2563 /* Classifies insn for the purpose of verifying that it can be
2564 moved speculatively, by examining it's patterns, returning:
2565 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2566 TRAP_FREE: non-load insn.
2567 IFREE: load from a globaly safe location.
2568 IRISKY: volatile load.
2569 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2570 being either PFREE or PRISKY. */
2573 haifa_classify_insn (insn)
2576 rtx pat = PATTERN (insn);
2577 int tmp_class = TRAP_FREE;
2578 int insn_class = TRAP_FREE;
2581 if (GET_CODE (pat) == PARALLEL)
2583 int i, len = XVECLEN (pat, 0);
2585 for (i = len - 1; i >= 0; i--)
2587 code = GET_CODE (XVECEXP (pat, 0, i));
2591 /* Test if it is a 'store'. */
2592 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2595 /* Test if it is a store. */
2596 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2597 if (tmp_class == TRAP_RISKY)
2599 /* Test if it is a load. */
2601 WORST_CLASS (tmp_class,
2602 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2606 tmp_class = TRAP_RISKY;
2610 insn_class = WORST_CLASS (insn_class, tmp_class);
2611 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2617 code = GET_CODE (pat);
2621 /* Test if it is a 'store'. */
2622 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2625 /* Test if it is a store. */
2626 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2627 if (tmp_class == TRAP_RISKY)
2629 /* Test if it is a load. */
2631 WORST_CLASS (tmp_class,
2632 may_trap_exp (SET_SRC (pat), 0));
2636 tmp_class = TRAP_RISKY;
2640 insn_class = tmp_class;
2646 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2647 a load moved speculatively, or if load_insn is protected by
2648 a compare on load_insn's address). */
2651 is_prisky (load_insn, bb_src, bb_trg)
2655 if (FED_BY_SPEC_LOAD (load_insn))
2658 if (LOG_LINKS (load_insn) == NULL)
2659 /* Dependence may 'hide' out of the region. */
2662 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2668 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2669 Return 1 if insn is exception-free (and the motion is valid)
2673 is_exception_free (insn, bb_src, bb_trg)
2677 int insn_class = haifa_classify_insn (insn);
2679 /* Handle non-load insns. */
2690 if (!flag_schedule_speculative_load)
2692 IS_LOAD_INSN (insn) = 1;
2699 case PFREE_CANDIDATE:
2700 if (is_pfree (insn, bb_src, bb_trg))
2702 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2703 case PRISKY_CANDIDATE:
2704 if (!flag_schedule_speculative_load_dangerous
2705 || is_prisky (insn, bb_src, bb_trg))
2711 return flag_schedule_speculative_load_dangerous;
2714 /* Process an insn's memory dependencies. There are four kinds of
2717 (0) read dependence: read follows read
2718 (1) true dependence: read follows write
2719 (2) anti dependence: write follows read
2720 (3) output dependence: write follows write
2722 We are careful to build only dependencies which actually exist, and
2723 use transitivity to avoid building too many links. */
2725 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2728 HAIFA_INLINE static char
2729 find_insn_mem_list (insn, x, list, list1)
2735 if (XEXP (list, 0) == insn
2736 && XEXP (list1, 0) == x)
2738 list = XEXP (list, 1);
2739 list1 = XEXP (list1, 1);
2744 /* Compute the function units used by INSN. This caches the value
2745 returned by function_units_used. A function unit is encoded as the
2746 unit number if the value is non-negative and the compliment of a
2747 mask if the value is negative. A function unit index is the
2748 non-negative encoding. */
2750 HAIFA_INLINE static int
2754 register int unit = INSN_UNIT (insn);
2758 recog_memoized (insn);
2760 /* A USE insn, or something else we don't need to understand.
2761 We can't pass these directly to function_units_used because it will
2762 trigger a fatal error for unrecognizable insns. */
2763 if (INSN_CODE (insn) < 0)
2767 unit = function_units_used (insn);
2768 /* Increment non-negative values so we can cache zero. */
2772 /* We only cache 16 bits of the result, so if the value is out of
2773 range, don't cache it. */
2774 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2776 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2777 INSN_UNIT (insn) = unit;
2779 return (unit > 0 ? unit - 1 : unit);
2782 /* Compute the blockage range for executing INSN on UNIT. This caches
2783 the value returned by the blockage_range_function for the unit.
2784 These values are encoded in an int where the upper half gives the
2785 minimum value and the lower half gives the maximum value. */
2787 HAIFA_INLINE static unsigned int
2788 blockage_range (unit, insn)
2792 unsigned int blockage = INSN_BLOCKAGE (insn);
2795 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2797 range = function_units[unit].blockage_range_function (insn);
2798 /* We only cache the blockage range for one unit and then only if
2800 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2801 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2804 range = BLOCKAGE_RANGE (blockage);
2809 /* A vector indexed by function unit instance giving the last insn to use
2810 the unit. The value of the function unit instance index for unit U
2811 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2812 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2814 /* A vector indexed by function unit instance giving the minimum time when
2815 the unit will unblock based on the maximum blockage cost. */
2816 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2818 /* A vector indexed by function unit number giving the number of insns
2819 that remain to use the unit. */
2820 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2822 /* Reset the function unit state to the null state. */
2827 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2828 bzero ((char *) unit_tick, sizeof (unit_tick));
2829 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2832 /* Return the issue-delay of an insn. */
2834 HAIFA_INLINE static int
2835 insn_issue_delay (insn)
2839 int unit = insn_unit (insn);
2841 /* Efficiency note: in fact, we are working 'hard' to compute a
2842 value that was available in md file, and is not available in
2843 function_units[] structure. It would be nice to have this
2844 value there, too. */
2847 if (function_units[unit].blockage_range_function &&
2848 function_units[unit].blockage_function)
2849 delay = function_units[unit].blockage_function (insn, insn);
2852 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2853 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2854 && function_units[i].blockage_function)
2855 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2860 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2861 instance INSTANCE at time CLOCK if the previous actual hazard cost
2864 HAIFA_INLINE static int
2865 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2866 int unit, instance, clock, cost;
2869 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2871 if (tick - clock > cost)
2873 /* The scheduler is operating forward, so unit's last insn is the
2874 executing insn and INSN is the candidate insn. We want a
2875 more exact measure of the blockage if we execute INSN at CLOCK
2876 given when we committed the execution of the unit's last insn.
2878 The blockage value is given by either the unit's max blockage
2879 constant, blockage range function, or blockage function. Use
2880 the most exact form for the given unit. */
2882 if (function_units[unit].blockage_range_function)
2884 if (function_units[unit].blockage_function)
2885 tick += (function_units[unit].blockage_function
2886 (unit_last_insn[instance], insn)
2887 - function_units[unit].max_blockage);
2889 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2890 - function_units[unit].max_blockage);
2892 if (tick - clock > cost)
2893 cost = tick - clock;
2898 /* Record INSN as having begun execution on the units encoded by UNIT at
2901 HAIFA_INLINE static void
2902 schedule_unit (unit, insn, clock)
2910 int instance = unit;
2911 #if MAX_MULTIPLICITY > 1
2912 /* Find the first free instance of the function unit and use that
2913 one. We assume that one is free. */
2914 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2916 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2918 instance += FUNCTION_UNITS_SIZE;
2921 unit_last_insn[instance] = insn;
2922 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2925 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2926 if ((unit & 1) != 0)
2927 schedule_unit (i, insn, clock);
2930 /* Return the actual hazard cost of executing INSN on the units encoded by
2931 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2933 HAIFA_INLINE static int
2934 actual_hazard (unit, insn, clock, cost)
2935 int unit, clock, cost;
2942 /* Find the instance of the function unit with the minimum hazard. */
2943 int instance = unit;
2944 int best_cost = actual_hazard_this_instance (unit, instance, insn,
2946 #if MAX_MULTIPLICITY > 1
2949 if (best_cost > cost)
2951 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2953 instance += FUNCTION_UNITS_SIZE;
2954 this_cost = actual_hazard_this_instance (unit, instance, insn,
2956 if (this_cost < best_cost)
2958 best_cost = this_cost;
2959 if (this_cost <= cost)
2965 cost = MAX (cost, best_cost);
2968 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2969 if ((unit & 1) != 0)
2970 cost = actual_hazard (i, insn, clock, cost);
2975 /* Return the potential hazard cost of executing an instruction on the
2976 units encoded by UNIT if the previous potential hazard cost was COST.
2977 An insn with a large blockage time is chosen in preference to one
2978 with a smaller time; an insn that uses a unit that is more likely
2979 to be used is chosen in preference to one with a unit that is less
2980 used. We are trying to minimize a subsequent actual hazard. */
2982 HAIFA_INLINE static int
2983 potential_hazard (unit, insn, cost)
2988 unsigned int minb, maxb;
2992 minb = maxb = function_units[unit].max_blockage;
2995 if (function_units[unit].blockage_range_function)
2997 maxb = minb = blockage_range (unit, insn);
2998 maxb = MAX_BLOCKAGE_COST (maxb);
2999 minb = MIN_BLOCKAGE_COST (minb);
3004 /* Make the number of instructions left dominate. Make the
3005 minimum delay dominate the maximum delay. If all these
3006 are the same, use the unit number to add an arbitrary
3007 ordering. Other terms can be added. */
3008 ncost = minb * 0x40 + maxb;
3009 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
3016 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3017 if ((unit & 1) != 0)
3018 cost = potential_hazard (i, insn, cost);
3023 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3024 This is the number of cycles between instruction issue and
3025 instruction results. */
3027 HAIFA_INLINE static int
3028 insn_cost (insn, link, used)
3029 rtx insn, link, used;
3031 register int cost = INSN_COST (insn);
3035 recog_memoized (insn);
3037 /* A USE insn, or something else we don't need to understand.
3038 We can't pass these directly to result_ready_cost because it will
3039 trigger a fatal error for unrecognizable insns. */
3040 if (INSN_CODE (insn) < 0)
3042 INSN_COST (insn) = 1;
3047 cost = result_ready_cost (insn);
3052 INSN_COST (insn) = cost;
3056 /* In this case estimate cost without caring how insn is used. */
3057 if (link == 0 && used == 0)
3060 /* A USE insn should never require the value used to be computed. This
3061 allows the computation of a function's result and parameter values to
3062 overlap the return and call. */
3063 recog_memoized (used);
3064 if (INSN_CODE (used) < 0)
3065 LINK_COST_FREE (link) = 1;
3067 /* If some dependencies vary the cost, compute the adjustment. Most
3068 commonly, the adjustment is complete: either the cost is ignored
3069 (in the case of an output- or anti-dependence), or the cost is
3070 unchanged. These values are cached in the link as LINK_COST_FREE
3071 and LINK_COST_ZERO. */
3073 if (LINK_COST_FREE (link))
3076 else if (!LINK_COST_ZERO (link))
3080 ADJUST_COST (used, link, insn, ncost);
3083 LINK_COST_FREE (link) = 1;
3087 LINK_COST_ZERO (link) = 1;
3094 /* Compute the priority number for INSN. */
3103 if (! INSN_P (insn))
3106 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3108 if (INSN_DEPEND (insn) == 0)
3109 this_priority = insn_cost (insn, 0, 0);
3111 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3116 if (RTX_INTEGRATED_P (link))
3119 next = XEXP (link, 0);
3121 /* Critical path is meaningful in block boundaries only. */
3122 if (BLOCK_NUM (next) != BLOCK_NUM (insn))
3125 next_priority = insn_cost (insn, link, next) + priority (next);
3126 if (next_priority > this_priority)
3127 this_priority = next_priority;
3129 INSN_PRIORITY (insn) = this_priority;
3131 return this_priority;
3134 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3135 them to the unused_*_list variables, so that they can be reused. */
3138 free_pending_lists ()
3142 for (bb = 0; bb < current_nr_blocks; bb++)
3144 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
3145 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
3146 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
3147 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
3151 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3152 The MEM is a memory reference contained within INSN, which we are saving
3153 so that we can do memory aliasing on it. */
3156 add_insn_mem_dependence (deps, insn_list, mem_list, insn, mem)
3158 rtx *insn_list, *mem_list, insn, mem;
3162 link = alloc_INSN_LIST (insn, *insn_list);
3165 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3168 deps->pending_lists_length++;
3171 /* Make a dependency between every memory reference on the pending lists
3172 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3176 flush_pending_lists (deps, insn, only_write)
3184 while (deps->pending_read_insns && ! only_write)
3186 add_dependence (insn, XEXP (deps->pending_read_insns, 0),
3189 link = deps->pending_read_insns;
3190 deps->pending_read_insns = XEXP (deps->pending_read_insns, 1);
3191 free_INSN_LIST_node (link);
3193 link = deps->pending_read_mems;
3194 deps->pending_read_mems = XEXP (deps->pending_read_mems, 1);
3195 free_EXPR_LIST_node (link);
3197 while (deps->pending_write_insns)
3199 add_dependence (insn, XEXP (deps->pending_write_insns, 0),
3202 link = deps->pending_write_insns;
3203 deps->pending_write_insns = XEXP (deps->pending_write_insns, 1);
3204 free_INSN_LIST_node (link);
3206 link = deps->pending_write_mems;
3207 deps->pending_write_mems = XEXP (deps->pending_write_mems, 1);
3208 free_EXPR_LIST_node (link);
3210 deps->pending_lists_length = 0;
3212 /* last_pending_memory_flush is now a list of insns. */
3213 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3214 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3216 free_INSN_LIST_list (&deps->last_pending_memory_flush);
3217 deps->last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3220 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3221 rtx, X, creating all dependencies generated by the write to the
3222 destination of X, and reads of everything mentioned. */
3225 sched_analyze_1 (deps, x, insn)
3231 register rtx dest = XEXP (x, 0);
3232 enum rtx_code code = GET_CODE (x);
3237 if (GET_CODE (dest) == PARALLEL
3238 && GET_MODE (dest) == BLKmode)
3241 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3242 sched_analyze_1 (deps, XVECEXP (dest, 0, i), insn);
3243 if (GET_CODE (x) == SET)
3244 sched_analyze_2 (deps, SET_SRC (x), insn);
3248 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3249 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3251 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3253 /* The second and third arguments are values read by this insn. */
3254 sched_analyze_2 (deps, XEXP (dest, 1), insn);
3255 sched_analyze_2 (deps, XEXP (dest, 2), insn);
3257 dest = XEXP (dest, 0);
3260 if (GET_CODE (dest) == REG)
3264 regno = REGNO (dest);
3266 /* A hard reg in a wide mode may really be multiple registers.
3267 If so, mark all of them just like the first. */
3268 if (regno < FIRST_PSEUDO_REGISTER)
3270 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3276 for (u = deps->reg_last_uses[r]; u; u = XEXP (u, 1))
3277 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3279 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3280 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3282 /* Clobbers need not be ordered with respect to one
3283 another, but sets must be ordered with respect to a
3287 free_INSN_LIST_list (&deps->reg_last_uses[r]);
3288 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3289 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3290 SET_REGNO_REG_SET (reg_pending_sets, r);
3293 SET_REGNO_REG_SET (reg_pending_clobbers, r);
3295 /* Function calls clobber all call_used regs. */
3296 if (global_regs[r] || (code == SET && call_used_regs[r]))
3297 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3298 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3305 for (u = deps->reg_last_uses[regno]; u; u = XEXP (u, 1))
3306 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3308 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3309 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3313 free_INSN_LIST_list (&deps->reg_last_uses[regno]);
3314 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3315 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3316 SET_REGNO_REG_SET (reg_pending_sets, regno);
3319 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3321 /* Pseudos that are REG_EQUIV to something may be replaced
3322 by that during reloading. We need only add dependencies for
3323 the address in the REG_EQUIV note. */
3324 if (!reload_completed
3325 && reg_known_equiv_p[regno]
3326 && GET_CODE (reg_known_value[regno]) == MEM)
3327 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3329 /* Don't let it cross a call after scheduling if it doesn't
3330 already cross one. */
3332 if (REG_N_CALLS_CROSSED (regno) == 0)
3333 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3334 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3337 else if (GET_CODE (dest) == MEM)
3339 /* Writing memory. */
3341 if (deps->pending_lists_length > 32)
3343 /* Flush all pending reads and writes to prevent the pending lists
3344 from getting any larger. Insn scheduling runs too slowly when
3345 these lists get long. The number 32 was chosen because it
3346 seems like a reasonable number. When compiling GCC with itself,
3347 this flush occurs 8 times for sparc, and 10 times for m88k using
3349 flush_pending_lists (deps, insn, 0);
3354 rtx pending, pending_mem;
3356 pending = deps->pending_read_insns;
3357 pending_mem = deps->pending_read_mems;
3360 if (anti_dependence (XEXP (pending_mem, 0), dest))
3361 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3363 pending = XEXP (pending, 1);
3364 pending_mem = XEXP (pending_mem, 1);
3367 pending = deps->pending_write_insns;
3368 pending_mem = deps->pending_write_mems;
3371 if (output_dependence (XEXP (pending_mem, 0), dest))
3372 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3374 pending = XEXP (pending, 1);
3375 pending_mem = XEXP (pending_mem, 1);
3378 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3379 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3381 add_insn_mem_dependence (deps, &deps->pending_write_insns,
3382 &deps->pending_write_mems, insn, dest);
3384 sched_analyze_2 (deps, XEXP (dest, 0), insn);
3387 /* Analyze reads. */
3388 if (GET_CODE (x) == SET)
3389 sched_analyze_2 (deps, SET_SRC (x), insn);
3392 /* Analyze the uses of memory and registers in rtx X in INSN. */
3395 sched_analyze_2 (deps, x, insn)
3402 register enum rtx_code code;
3403 register const char *fmt;
3408 code = GET_CODE (x);
3417 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3418 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3419 this does not mean that this insn is using cc0. */
3424 /* User of CC0 depends on immediately preceding insn. */
3425 set_sched_group_p (insn);
3432 int regno = REGNO (x);
3433 if (regno < FIRST_PSEUDO_REGISTER)
3437 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3441 deps->reg_last_uses[r]
3442 = alloc_INSN_LIST (insn, deps->reg_last_uses[r]);
3444 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3445 add_dependence (insn, XEXP (u, 0), 0);
3447 /* ??? This should never happen. */
3448 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3449 add_dependence (insn, XEXP (u, 0), 0);
3451 if (call_used_regs[r] || global_regs[r])
3452 /* Function calls clobber all call_used regs. */
3453 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3454 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3459 deps->reg_last_uses[regno]
3460 = alloc_INSN_LIST (insn, deps->reg_last_uses[regno]);
3462 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3463 add_dependence (insn, XEXP (u, 0), 0);
3465 /* ??? This should never happen. */
3466 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3467 add_dependence (insn, XEXP (u, 0), 0);
3469 /* Pseudos that are REG_EQUIV to something may be replaced
3470 by that during reloading. We need only add dependencies for
3471 the address in the REG_EQUIV note. */
3472 if (!reload_completed
3473 && reg_known_equiv_p[regno]
3474 && GET_CODE (reg_known_value[regno]) == MEM)
3475 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3477 /* If the register does not already cross any calls, then add this
3478 insn to the sched_before_next_call list so that it will still
3479 not cross calls after scheduling. */
3480 if (REG_N_CALLS_CROSSED (regno) == 0)
3481 add_dependence (deps->sched_before_next_call, insn,
3489 /* Reading memory. */
3491 rtx pending, pending_mem;
3493 pending = deps->pending_read_insns;
3494 pending_mem = deps->pending_read_mems;
3497 if (read_dependence (XEXP (pending_mem, 0), x))
3498 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3500 pending = XEXP (pending, 1);
3501 pending_mem = XEXP (pending_mem, 1);
3504 pending = deps->pending_write_insns;
3505 pending_mem = deps->pending_write_mems;
3508 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3510 add_dependence (insn, XEXP (pending, 0), 0);
3512 pending = XEXP (pending, 1);
3513 pending_mem = XEXP (pending_mem, 1);
3516 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3517 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3519 /* Always add these dependencies to pending_reads, since
3520 this insn may be followed by a write. */
3521 add_insn_mem_dependence (deps, &deps->pending_read_insns,
3522 &deps->pending_read_mems, insn, x);
3524 /* Take advantage of tail recursion here. */
3525 sched_analyze_2 (deps, XEXP (x, 0), insn);
3529 /* Force pending stores to memory in case a trap handler needs them. */
3531 flush_pending_lists (deps, insn, 1);
3536 case UNSPEC_VOLATILE:
3540 /* Traditional and volatile asm instructions must be considered to use
3541 and clobber all hard registers, all pseudo-registers and all of
3542 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3544 Consider for instance a volatile asm that changes the fpu rounding
3545 mode. An insn should not be moved across this even if it only uses
3546 pseudo-regs because it might give an incorrectly rounded result. */
3547 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3549 int max_reg = max_reg_num ();
3550 for (i = 0; i < max_reg; i++)
3552 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3553 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3554 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3556 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3557 add_dependence (insn, XEXP (u, 0), 0);
3559 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3560 add_dependence (insn, XEXP (u, 0), 0);
3562 reg_pending_sets_all = 1;
3564 flush_pending_lists (deps, insn, 0);
3567 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3568 We can not just fall through here since then we would be confused
3569 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3570 traditional asms unlike their normal usage. */
3572 if (code == ASM_OPERANDS)
3574 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3575 sched_analyze_2 (deps, ASM_OPERANDS_INPUT (x, j), insn);
3585 /* These both read and modify the result. We must handle them as writes
3586 to get proper dependencies for following instructions. We must handle
3587 them as reads to get proper dependencies from this to previous
3588 instructions. Thus we need to pass them to both sched_analyze_1
3589 and sched_analyze_2. We must call sched_analyze_2 first in order
3590 to get the proper antecedent for the read. */
3591 sched_analyze_2 (deps, XEXP (x, 0), insn);
3592 sched_analyze_1 (deps, x, insn);
3597 /* op0 = op0 + op1 */
3598 sched_analyze_2 (deps, XEXP (x, 0), insn);
3599 sched_analyze_2 (deps, XEXP (x, 1), insn);
3600 sched_analyze_1 (deps, x, insn);
3607 /* Other cases: walk the insn. */
3608 fmt = GET_RTX_FORMAT (code);
3609 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3612 sched_analyze_2 (deps, XEXP (x, i), insn);
3613 else if (fmt[i] == 'E')
3614 for (j = 0; j < XVECLEN (x, i); j++)
3615 sched_analyze_2 (deps, XVECEXP (x, i, j), insn);
3619 /* Analyze an INSN with pattern X to find all dependencies. */
3622 sched_analyze_insn (deps, x, insn, loop_notes)
3627 register RTX_CODE code = GET_CODE (x);
3629 int maxreg = max_reg_num ();
3632 if (code == COND_EXEC)
3634 sched_analyze_2 (deps, COND_EXEC_TEST (x), insn);
3636 /* ??? Should be recording conditions so we reduce the number of
3637 false dependancies. */
3638 x = COND_EXEC_CODE (x);
3639 code = GET_CODE (x);
3641 if (code == SET || code == CLOBBER)
3642 sched_analyze_1 (deps, x, insn);
3643 else if (code == PARALLEL)
3646 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3648 rtx sub = XVECEXP (x, 0, i);
3649 code = GET_CODE (sub);
3651 if (code == COND_EXEC)
3653 sched_analyze_2 (deps, COND_EXEC_TEST (sub), insn);
3654 sub = COND_EXEC_CODE (sub);
3655 code = GET_CODE (sub);
3657 if (code == SET || code == CLOBBER)
3658 sched_analyze_1 (deps, sub, insn);
3660 sched_analyze_2 (deps, sub, insn);
3664 sched_analyze_2 (deps, x, insn);
3666 /* Mark registers CLOBBERED or used by called function. */
3667 if (GET_CODE (insn) == CALL_INSN)
3668 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3670 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3671 sched_analyze_1 (deps, XEXP (link, 0), insn);
3673 sched_analyze_2 (deps, XEXP (link, 0), insn);
3676 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3677 block, then we must be sure that no instructions are scheduled across it.
3678 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3679 become incorrect. */
3683 int max_reg = max_reg_num ();
3684 int schedule_barrier_found = 0;
3687 /* Update loop_notes with any notes from this insn. Also determine
3688 if any of the notes on the list correspond to instruction scheduling
3689 barriers (loop, eh & setjmp notes, but not range notes. */
3691 while (XEXP (link, 1))
3693 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3694 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3695 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3696 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3697 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3698 schedule_barrier_found = 1;
3700 link = XEXP (link, 1);
3702 XEXP (link, 1) = REG_NOTES (insn);
3703 REG_NOTES (insn) = loop_notes;
3705 /* Add dependencies if a scheduling barrier was found. */
3706 if (schedule_barrier_found)
3708 for (i = 0; i < max_reg; i++)
3711 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3712 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3713 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3715 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3716 add_dependence (insn, XEXP (u, 0), 0);
3718 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3719 add_dependence (insn, XEXP (u, 0), 0);
3721 reg_pending_sets_all = 1;
3723 flush_pending_lists (deps, insn, 0);
3728 /* Accumulate clobbers until the next set so that it will be output dependent
3729 on all of them. At the next set we can clear the clobber list, since
3730 subsequent sets will be output dependent on it. */
3731 EXECUTE_IF_SET_IN_REG_SET
3732 (reg_pending_sets, 0, i,
3734 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3735 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3736 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3738 EXECUTE_IF_SET_IN_REG_SET
3739 (reg_pending_clobbers, 0, i,
3741 deps->reg_last_clobbers[i]
3742 = alloc_INSN_LIST (insn, deps->reg_last_clobbers[i]);
3744 CLEAR_REG_SET (reg_pending_sets);
3745 CLEAR_REG_SET (reg_pending_clobbers);
3747 if (reg_pending_sets_all)
3749 for (i = 0; i < maxreg; i++)
3751 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3752 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3753 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3756 reg_pending_sets_all = 0;
3759 /* If a post-call group is still open, see if it should remain so.
3760 This insn must be a simple move of a hard reg to a pseudo or
3763 We must avoid moving these insns for correctness on
3764 SMALL_REGISTER_CLASS machines, and for special registers like
3765 PIC_OFFSET_TABLE_REGNUM. For simplicity, extend this to all
3766 hard regs for all targets. */
3768 if (deps->in_post_call_group_p)
3770 rtx tmp, set = single_set (insn);
3771 int src_regno, dest_regno;
3774 goto end_call_group;
3776 tmp = SET_DEST (set);
3777 if (GET_CODE (tmp) == SUBREG)
3778 tmp = SUBREG_REG (tmp);
3779 if (GET_CODE (tmp) == REG)
3780 dest_regno = REGNO (tmp);
3782 goto end_call_group;
3784 tmp = SET_SRC (set);
3785 if (GET_CODE (tmp) == SUBREG)
3786 tmp = SUBREG_REG (tmp);
3787 if (GET_CODE (tmp) == REG)
3788 src_regno = REGNO (tmp);
3790 goto end_call_group;
3792 if (src_regno < FIRST_PSEUDO_REGISTER
3793 || dest_regno < FIRST_PSEUDO_REGISTER)
3795 set_sched_group_p (insn);
3796 CANT_MOVE (insn) = 1;
3801 deps->in_post_call_group_p = 0;
3806 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3807 for every dependency. */
3810 sched_analyze (deps, head, tail)
3818 for (insn = head;; insn = NEXT_INSN (insn))
3820 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3822 /* Clear out the stale LOG_LINKS from flow. */
3823 free_INSN_LIST_list (&LOG_LINKS (insn));
3825 /* Clear out stale SCHED_GROUP_P. */
3826 SCHED_GROUP_P (insn) = 0;
3828 /* Make each JUMP_INSN a scheduling barrier for memory
3830 if (GET_CODE (insn) == JUMP_INSN)
3831 deps->last_pending_memory_flush
3832 = alloc_INSN_LIST (insn, deps->last_pending_memory_flush);
3833 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3836 else if (GET_CODE (insn) == CALL_INSN)
3841 /* Clear out stale SCHED_GROUP_P. */
3842 SCHED_GROUP_P (insn) = 0;
3844 CANT_MOVE (insn) = 1;
3846 /* Clear out the stale LOG_LINKS from flow. */
3847 free_INSN_LIST_list (&LOG_LINKS (insn));
3849 /* Any instruction using a hard register which may get clobbered
3850 by a call needs to be marked as dependent on this call.
3851 This prevents a use of a hard return reg from being moved
3852 past a void call (i.e. it does not explicitly set the hard
3855 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3856 all registers, not just hard registers, may be clobbered by this
3859 /* Insn, being a CALL_INSN, magically depends on
3860 `last_function_call' already. */
3862 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3863 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3865 int max_reg = max_reg_num ();
3866 for (i = 0; i < max_reg; i++)
3868 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3869 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3870 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3872 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3873 add_dependence (insn, XEXP (u, 0), 0);
3875 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3876 add_dependence (insn, XEXP (u, 0), 0);
3878 reg_pending_sets_all = 1;
3880 /* Add a pair of REG_SAVE_NOTEs which we will later
3881 convert back into a NOTE_INSN_SETJMP note. See
3882 reemit_notes for why we use a pair of NOTEs. */
3883 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3886 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3887 GEN_INT (NOTE_INSN_SETJMP),
3892 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3893 if (call_used_regs[i] || global_regs[i])
3895 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3896 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3898 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3899 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3901 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3905 /* For each insn which shouldn't cross a call, add a dependence
3906 between that insn and this call insn. */
3907 x = LOG_LINKS (deps->sched_before_next_call);
3910 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3913 free_INSN_LIST_list (&LOG_LINKS (deps->sched_before_next_call));
3915 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3918 /* In the absence of interprocedural alias analysis, we must flush
3919 all pending reads and writes, and start new dependencies starting
3920 from here. But only flush writes for constant calls (which may
3921 be passed a pointer to something we haven't written yet). */
3922 flush_pending_lists (deps, insn, CONST_CALL_P (insn));
3924 /* Depend this function call (actually, the user of this
3925 function call) on all hard register clobberage. */
3927 /* last_function_call is now a list of insns. */
3928 free_INSN_LIST_list (&deps->last_function_call);
3929 deps->last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
3931 /* Before reload, begin a post-call group, so as to keep the
3932 lifetimes of hard registers correct. */
3933 if (! reload_completed)
3934 deps->in_post_call_group_p = 1;
3937 /* See comments on reemit_notes as to why we do this.
3938 ??? Actually, the reemit_notes just say what is done, not why. */
3940 else if (GET_CODE (insn) == NOTE
3941 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_BEG
3942 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
3944 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
3946 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3947 GEN_INT (NOTE_LINE_NUMBER (insn)),
3950 else if (GET_CODE (insn) == NOTE
3951 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
3952 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
3953 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3954 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
3955 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
3956 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
3960 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3961 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
3962 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
3964 rtx_region = GEN_INT (0);
3966 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3969 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3970 GEN_INT (NOTE_LINE_NUMBER (insn)),
3972 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
3981 /* Macros and functions for keeping the priority queue sorted, and
3982 dealing with queueing and dequeueing of instructions. */
3984 #define SCHED_SORT(READY, N_READY) \
3985 do { if ((N_READY) == 2) \
3986 swap_sort (READY, N_READY); \
3987 else if ((N_READY) > 2) \
3988 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3991 /* Returns a positive value if x is preferred; returns a negative value if
3992 y is preferred. Should never return 0, since that will make the sort
3996 rank_for_schedule (x, y)
4000 rtx tmp = *(const rtx *) y;
4001 rtx tmp2 = *(const rtx *) x;
4003 int tmp_class, tmp2_class, depend_count1, depend_count2;
4004 int val, priority_val, spec_val, prob_val, weight_val;
4006 /* Prefer insn with higher priority. */
4007 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
4009 return priority_val;
4011 /* Prefer an insn with smaller contribution to registers-pressure. */
4012 if (!reload_completed &&
4013 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
4014 return (weight_val);
4016 /* Some comparison make sense in interblock scheduling only. */
4017 if (INSN_BB (tmp) != INSN_BB (tmp2))
4019 /* Prefer an inblock motion on an interblock motion. */
4020 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
4022 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
4025 /* Prefer a useful motion on a speculative one. */
4026 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
4029 /* Prefer a more probable (speculative) insn. */
4030 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
4035 /* Compare insns based on their relation to the last-scheduled-insn. */
4036 if (last_scheduled_insn)
4038 /* Classify the instructions into three classes:
4039 1) Data dependent on last schedule insn.
4040 2) Anti/Output dependent on last scheduled insn.
4041 3) Independent of last scheduled insn, or has latency of one.
4042 Choose the insn from the highest numbered class if different. */
4043 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4044 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4046 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4051 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4052 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4054 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4059 if ((val = tmp2_class - tmp_class))
4063 /* Prefer the insn which has more later insns that depend on it.
4064 This gives the scheduler more freedom when scheduling later
4065 instructions at the expense of added register pressure. */
4067 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4071 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4074 val = depend_count2 - depend_count1;
4078 /* If insns are equally good, sort by INSN_LUID (original insn order),
4079 so that we make the sort stable. This minimizes instruction movement,
4080 thus minimizing sched's effect on debugging and cross-jumping. */
4081 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4084 /* Resort the array A in which only element at index N may be out of order. */
4086 HAIFA_INLINE static void
4091 rtx insn = a[n - 1];
4094 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4102 static int max_priority;
4104 /* Add INSN to the insn queue so that it can be executed at least
4105 N_CYCLES after the currently executing insn. Preserve insns
4106 chain for debugging purposes. */
4108 HAIFA_INLINE static void
4109 queue_insn (insn, n_cycles)
4113 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4114 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4115 insn_queue[next_q] = link;
4118 if (sched_verbose >= 2)
4120 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4122 if (INSN_BB (insn) != target_bb)
4123 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4125 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4130 /* PREV is an insn that is ready to execute. Adjust its priority if that
4131 will help shorten or lengthen register lifetimes as appropriate. Also
4132 provide a hook for the target to tweek itself. */
4134 HAIFA_INLINE static void
4135 adjust_priority (prev)
4136 rtx prev ATTRIBUTE_UNUSED;
4138 /* ??? There used to be code here to try and estimate how an insn
4139 affected register lifetimes, but it did it by looking at REG_DEAD
4140 notes, which we removed in schedule_region. Nor did it try to
4141 take into account register pressure or anything useful like that.
4143 Revisit when we have a machine model to work with and not before. */
4145 #ifdef ADJUST_PRIORITY
4146 ADJUST_PRIORITY (prev);
4150 /* Clock at which the previous instruction was issued. */
4151 static int last_clock_var;
4153 /* INSN is the "currently executing insn". Launch each insn which was
4154 waiting on INSN. READY is a vector of insns which are ready to fire.
4155 N_READY is the number of elements in READY. CLOCK is the current
4159 schedule_insn (insn, ready, n_ready, clock)
4168 unit = insn_unit (insn);
4170 if (sched_verbose >= 2)
4172 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4174 insn_print_units (insn);
4175 fprintf (dump, "\n");
4178 if (sched_verbose && unit == -1)
4179 visualize_no_unit (insn);
4181 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4182 schedule_unit (unit, insn, clock);
4184 if (INSN_DEPEND (insn) == 0)
4187 /* This is used by the function adjust_priority above. */
4189 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4191 max_priority = INSN_PRIORITY (insn);
4193 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4195 rtx next = XEXP (link, 0);
4196 int cost = insn_cost (insn, link, next);
4198 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4200 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4202 int effective_cost = INSN_TICK (next) - clock;
4204 /* For speculative insns, before inserting to ready/queue,
4205 check live, exception-free, and issue-delay. */
4206 if (INSN_BB (next) != target_bb
4207 && (!IS_VALID (INSN_BB (next))
4209 || (IS_SPECULATIVE_INSN (next)
4210 && (insn_issue_delay (next) > 3
4211 || !check_live (next, INSN_BB (next))
4212 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4215 if (sched_verbose >= 2)
4217 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4220 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4221 fprintf (dump, "/b%d ", BLOCK_NUM (next));
4223 if (effective_cost < 1)
4224 fprintf (dump, "into ready\n");
4226 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4229 /* Adjust the priority of NEXT and either put it on the ready
4230 list or queue it. */
4231 adjust_priority (next);
4232 if (effective_cost < 1)
4233 ready[n_ready++] = next;
4235 queue_insn (next, effective_cost);
4239 /* Annotate the instruction with issue information -- TImode
4240 indicates that the instruction is expected not to be able
4241 to issue on the same cycle as the previous insn. A machine
4242 may use this information to decide how the instruction should
4244 if (reload_completed && issue_rate > 1)
4246 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4247 last_clock_var = clock;
4253 /* Functions for handling of notes. */
4255 /* Delete notes beginning with INSN and put them in the chain
4256 of notes ended by NOTE_LIST.
4257 Returns the insn following the notes. */
4260 unlink_other_notes (insn, tail)
4263 rtx prev = PREV_INSN (insn);
4265 while (insn != tail && GET_CODE (insn) == NOTE)
4267 rtx next = NEXT_INSN (insn);
4268 /* Delete the note from its current position. */
4270 NEXT_INSN (prev) = next;
4272 PREV_INSN (next) = prev;
4274 /* See sched_analyze to see how these are handled. */
4275 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4276 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4277 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4278 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_BEG
4279 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4280 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4281 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4283 /* Insert the note at the end of the notes list. */
4284 PREV_INSN (insn) = note_list;
4286 NEXT_INSN (note_list) = insn;
4295 /* Delete line notes beginning with INSN. Record line-number notes so
4296 they can be reused. Returns the insn following the notes. */
4299 unlink_line_notes (insn, tail)
4302 rtx prev = PREV_INSN (insn);
4304 while (insn != tail && GET_CODE (insn) == NOTE)
4306 rtx next = NEXT_INSN (insn);
4308 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4310 /* Delete the note from its current position. */
4312 NEXT_INSN (prev) = next;
4314 PREV_INSN (next) = prev;
4316 /* Record line-number notes so they can be reused. */
4317 LINE_NOTE (insn) = insn;
4327 /* Return the head and tail pointers of BB. */
4329 HAIFA_INLINE static void
4330 get_block_head_tail (b, headp, tailp)
4339 /* HEAD and TAIL delimit the basic block being scheduled. */
4340 head = BLOCK_HEAD (b);
4341 tail = BLOCK_END (b);
4343 /* Don't include any notes or labels at the beginning of the
4344 basic block, or notes at the ends of basic blocks. */
4345 while (head != tail)
4347 if (GET_CODE (head) == NOTE)
4348 head = NEXT_INSN (head);
4349 else if (GET_CODE (tail) == NOTE)
4350 tail = PREV_INSN (tail);
4351 else if (GET_CODE (head) == CODE_LABEL)
4352 head = NEXT_INSN (head);
4361 HAIFA_INLINE static void
4362 get_bb_head_tail (bb, headp, tailp)
4367 get_block_head_tail (BB_TO_BLOCK (bb), headp, tailp);
4370 /* Delete line notes from bb. Save them so they can be later restored
4371 (in restore_line_notes ()). */
4382 get_bb_head_tail (bb, &head, &tail);
4384 if (head == tail && (! INSN_P (head)))
4387 next_tail = NEXT_INSN (tail);
4388 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4392 /* Farm out notes, and maybe save them in NOTE_LIST.
4393 This is needed to keep the debugger from
4394 getting completely deranged. */
4395 if (GET_CODE (insn) == NOTE)
4398 insn = unlink_line_notes (insn, next_tail);
4404 if (insn == next_tail)
4410 /* Save line number notes for each insn in bb. */
4413 save_line_notes (bb)
4419 /* We must use the true line number for the first insn in the block
4420 that was computed and saved at the start of this pass. We can't
4421 use the current line number, because scheduling of the previous
4422 block may have changed the current line number. */
4424 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4427 get_bb_head_tail (bb, &head, &tail);
4428 next_tail = NEXT_INSN (tail);
4430 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4432 insn = NEXT_INSN (insn))
4433 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4436 LINE_NOTE (insn) = line;
4439 /* After bb was scheduled, insert line notes into the insns list. */
4442 restore_line_notes (bb)
4445 rtx line, note, prev, new;
4446 int added_notes = 0;
4448 rtx head, next_tail, insn;
4450 b = BB_TO_BLOCK (bb);
4452 head = BLOCK_HEAD (b);
4453 next_tail = NEXT_INSN (BLOCK_END (b));
4455 /* Determine the current line-number. We want to know the current
4456 line number of the first insn of the block here, in case it is
4457 different from the true line number that was saved earlier. If
4458 different, then we need a line number note before the first insn
4459 of this block. If it happens to be the same, then we don't want to
4460 emit another line number note here. */
4461 for (line = head; line; line = PREV_INSN (line))
4462 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4465 /* Walk the insns keeping track of the current line-number and inserting
4466 the line-number notes as needed. */
4467 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4468 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4470 /* This used to emit line number notes before every non-deleted note.
4471 However, this confuses a debugger, because line notes not separated
4472 by real instructions all end up at the same address. I can find no
4473 use for line number notes before other notes, so none are emitted. */
4474 else if (GET_CODE (insn) != NOTE
4475 && (note = LINE_NOTE (insn)) != 0
4478 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4479 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4482 prev = PREV_INSN (insn);
4483 if (LINE_NOTE (note))
4485 /* Re-use the original line-number note. */
4486 LINE_NOTE (note) = 0;
4487 PREV_INSN (note) = prev;
4488 NEXT_INSN (prev) = note;
4489 PREV_INSN (insn) = note;
4490 NEXT_INSN (note) = insn;
4495 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4496 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4497 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4500 if (sched_verbose && added_notes)
4501 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4504 /* After scheduling the function, delete redundant line notes from the
4508 rm_redundant_line_notes ()
4511 rtx insn = get_insns ();
4512 int active_insn = 0;
4515 /* Walk the insns deleting redundant line-number notes. Many of these
4516 are already present. The remainder tend to occur at basic
4517 block boundaries. */
4518 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4519 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4521 /* If there are no active insns following, INSN is redundant. */
4522 if (active_insn == 0)
4525 NOTE_SOURCE_FILE (insn) = 0;
4526 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4528 /* If the line number is unchanged, LINE is redundant. */
4530 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4531 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4534 NOTE_SOURCE_FILE (line) = 0;
4535 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4542 else if (!((GET_CODE (insn) == NOTE
4543 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4544 || (GET_CODE (insn) == INSN
4545 && (GET_CODE (PATTERN (insn)) == USE
4546 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4549 if (sched_verbose && notes)
4550 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4553 /* Delete notes between head and tail and put them in the chain
4554 of notes ended by NOTE_LIST. */
4557 rm_other_notes (head, tail)
4564 if (head == tail && (! INSN_P (head)))
4567 next_tail = NEXT_INSN (tail);
4568 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4572 /* Farm out notes, and maybe save them in NOTE_LIST.
4573 This is needed to keep the debugger from
4574 getting completely deranged. */
4575 if (GET_CODE (insn) == NOTE)
4579 insn = unlink_other_notes (insn, next_tail);
4585 if (insn == next_tail)
4591 /* Functions for computation of registers live/usage info. */
4593 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4596 find_insn_reg_weight (b)
4599 rtx insn, next_tail, head, tail;
4601 get_block_head_tail (b, &head, &tail);
4602 next_tail = NEXT_INSN (tail);
4604 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4609 /* Handle register life information. */
4610 if (! INSN_P (insn))
4613 /* Increment weight for each register born here. */
4615 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4616 && register_operand (SET_DEST (x), VOIDmode))
4618 else if (GET_CODE (x) == PARALLEL)
4621 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4623 x = XVECEXP (PATTERN (insn), 0, j);
4624 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4625 && register_operand (SET_DEST (x), VOIDmode))
4630 /* Decrement weight for each register that dies here. */
4631 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4633 if (REG_NOTE_KIND (x) == REG_DEAD
4634 || REG_NOTE_KIND (x) == REG_UNUSED)
4638 INSN_REG_WEIGHT (insn) = reg_weight;
4642 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4643 static int clock_var;
4645 /* Move insns that became ready to fire from queue to ready list. */
4648 queue_to_ready (ready, n_ready)
4655 q_ptr = NEXT_Q (q_ptr);
4657 /* Add all pending insns that can be scheduled without stalls to the
4659 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4662 insn = XEXP (link, 0);
4665 if (sched_verbose >= 2)
4666 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4668 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4669 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4671 ready[n_ready++] = insn;
4672 if (sched_verbose >= 2)
4673 fprintf (dump, "moving to ready without stalls\n");
4675 insn_queue[q_ptr] = 0;
4677 /* If there are no ready insns, stall until one is ready and add all
4678 of the pending insns at that point to the ready list. */
4681 register int stalls;
4683 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4685 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4687 for (; link; link = XEXP (link, 1))
4689 insn = XEXP (link, 0);
4692 if (sched_verbose >= 2)
4693 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ",
4696 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4697 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4699 ready[n_ready++] = insn;
4700 if (sched_verbose >= 2)
4701 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4703 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4710 if (sched_verbose && stalls)
4711 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4712 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4713 clock_var += stalls;
4718 /* Print the ready list for debugging purposes. Callable from debugger. */
4721 debug_ready_list (ready, n_ready)
4727 for (i = 0; i < n_ready; i++)
4729 fprintf (dump, " %d", INSN_UID (ready[i]));
4730 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4731 fprintf (dump, "/b%d", BLOCK_NUM (ready[i]));
4733 fprintf (dump, "\n");
4736 /* Print names of units on which insn can/should execute, for debugging. */
4739 insn_print_units (insn)
4743 int unit = insn_unit (insn);
4746 fprintf (dump, "none");
4748 fprintf (dump, "%s", function_units[unit].name);
4751 fprintf (dump, "[");
4752 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4755 fprintf (dump, "%s", function_units[i].name);
4757 fprintf (dump, " ");
4759 fprintf (dump, "]");
4763 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4764 of a basic block. If more lines are needed, table is splitted to two.
4765 n_visual_lines is the number of lines printed so far for a block.
4766 visual_tbl contains the block visualization info.
4767 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4768 #define MAX_VISUAL_LINES 100
4773 rtx vis_no_unit[10];
4775 /* Finds units that are in use in this fuction. Required only
4776 for visualization. */
4779 init_target_units ()
4784 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4786 if (! INSN_P (insn))
4789 unit = insn_unit (insn);
4792 target_units |= ~unit;
4794 target_units |= (1 << unit);
4798 /* Return the length of the visualization table. */
4801 get_visual_tbl_length ()
4807 /* Compute length of one field in line. */
4808 s = (char *) alloca (INSN_LEN + 6);
4809 sprintf (s, " %33s", "uname");
4812 /* Compute length of one line. */
4815 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4816 if (function_units[unit].bitmask & target_units)
4817 for (i = 0; i < function_units[unit].multiplicity; i++)
4820 n += strlen ("\n") + 2;
4822 /* Compute length of visualization string. */
4823 return (MAX_VISUAL_LINES * n);
4826 /* Init block visualization debugging info. */
4829 init_block_visualization ()
4831 strcpy (visual_tbl, "");
4836 #define BUF_LEN 2048
4839 safe_concat (buf, cur, str)
4844 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4853 while (cur < end && (c = *str++) != '\0')
4860 /* This recognizes rtx, I classified as expressions. These are always
4861 represent some action on values or results of other expression, that
4862 may be stored in objects representing values. */
4865 print_exp (buf, x, verbose)
4873 const char *fun = (char *) 0;
4878 for (i = 0; i < 4; i++)
4884 switch (GET_CODE (x))
4887 op[0] = XEXP (x, 0);
4888 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4889 && INTVAL (XEXP (x, 1)) < 0)
4892 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4897 op[1] = XEXP (x, 1);
4901 op[0] = XEXP (x, 0);
4903 op[1] = XEXP (x, 1);
4907 op[0] = XEXP (x, 0);
4909 op[1] = XEXP (x, 1);
4913 op[0] = XEXP (x, 0);
4914 op[1] = XEXP (x, 1);
4918 op[0] = XEXP (x, 0);
4921 op[0] = XEXP (x, 0);
4923 op[1] = XEXP (x, 1);
4926 op[0] = XEXP (x, 0);
4928 op[1] = XEXP (x, 1);
4932 op[0] = XEXP (x, 0);
4933 op[1] = XEXP (x, 1);
4936 op[0] = XEXP (x, 0);
4938 op[1] = XEXP (x, 1);
4942 op[0] = XEXP (x, 0);
4943 op[1] = XEXP (x, 1);
4947 op[0] = XEXP (x, 0);
4948 op[1] = XEXP (x, 1);
4952 op[0] = XEXP (x, 0);
4953 op[1] = XEXP (x, 1);
4957 op[0] = XEXP (x, 0);
4958 op[1] = XEXP (x, 1);
4962 op[0] = XEXP (x, 0);
4963 op[1] = XEXP (x, 1);
4967 op[0] = XEXP (x, 0);
4970 op[0] = XEXP (x, 0);
4972 op[1] = XEXP (x, 1);
4975 op[0] = XEXP (x, 0);
4977 op[1] = XEXP (x, 1);
4980 op[0] = XEXP (x, 0);
4982 op[1] = XEXP (x, 1);
4985 op[0] = XEXP (x, 0);
4987 op[1] = XEXP (x, 1);
4990 op[0] = XEXP (x, 0);
4992 op[1] = XEXP (x, 1);
4995 op[0] = XEXP (x, 0);
4997 op[1] = XEXP (x, 1);
5000 op[0] = XEXP (x, 0);
5002 op[1] = XEXP (x, 1);
5005 op[0] = XEXP (x, 0);
5007 op[1] = XEXP (x, 1);
5011 op[0] = XEXP (x, 0);
5015 op[0] = XEXP (x, 0);
5019 op[0] = XEXP (x, 0);
5022 op[0] = XEXP (x, 0);
5024 op[1] = XEXP (x, 1);
5027 op[0] = XEXP (x, 0);
5029 op[1] = XEXP (x, 1);
5032 op[0] = XEXP (x, 0);
5034 op[1] = XEXP (x, 1);
5038 op[0] = XEXP (x, 0);
5039 op[1] = XEXP (x, 1);
5042 op[0] = XEXP (x, 0);
5044 op[1] = XEXP (x, 1);
5048 op[0] = XEXP (x, 0);
5049 op[1] = XEXP (x, 1);
5052 op[0] = XEXP (x, 0);
5054 op[1] = XEXP (x, 1);
5058 op[0] = XEXP (x, 0);
5059 op[1] = XEXP (x, 1);
5062 op[0] = XEXP (x, 0);
5064 op[1] = XEXP (x, 1);
5068 op[0] = XEXP (x, 0);
5069 op[1] = XEXP (x, 1);
5072 fun = (verbose) ? "sign_extract" : "sxt";
5073 op[0] = XEXP (x, 0);
5074 op[1] = XEXP (x, 1);
5075 op[2] = XEXP (x, 2);
5078 fun = (verbose) ? "zero_extract" : "zxt";
5079 op[0] = XEXP (x, 0);
5080 op[1] = XEXP (x, 1);
5081 op[2] = XEXP (x, 2);
5084 fun = (verbose) ? "sign_extend" : "sxn";
5085 op[0] = XEXP (x, 0);
5088 fun = (verbose) ? "zero_extend" : "zxn";
5089 op[0] = XEXP (x, 0);
5092 fun = (verbose) ? "float_extend" : "fxn";
5093 op[0] = XEXP (x, 0);
5096 fun = (verbose) ? "trunc" : "trn";
5097 op[0] = XEXP (x, 0);
5099 case FLOAT_TRUNCATE:
5100 fun = (verbose) ? "float_trunc" : "ftr";
5101 op[0] = XEXP (x, 0);
5104 fun = (verbose) ? "float" : "flt";
5105 op[0] = XEXP (x, 0);
5107 case UNSIGNED_FLOAT:
5108 fun = (verbose) ? "uns_float" : "ufl";
5109 op[0] = XEXP (x, 0);
5113 op[0] = XEXP (x, 0);
5116 fun = (verbose) ? "uns_fix" : "ufx";
5117 op[0] = XEXP (x, 0);
5121 op[0] = XEXP (x, 0);
5125 op[0] = XEXP (x, 0);
5128 op[0] = XEXP (x, 0);
5132 op[0] = XEXP (x, 0);
5137 op[0] = XEXP (x, 0);
5141 op[1] = XEXP (x, 1);
5146 op[0] = XEXP (x, 0);
5148 op[1] = XEXP (x, 1);
5150 op[2] = XEXP (x, 2);
5155 op[0] = TRAP_CONDITION (x);
5158 case UNSPEC_VOLATILE:
5160 cur = safe_concat (buf, cur, "unspec");
5161 if (GET_CODE (x) == UNSPEC_VOLATILE)
5162 cur = safe_concat (buf, cur, "/v");
5163 cur = safe_concat (buf, cur, "[");
5165 for (i = 0; i < XVECLEN (x, 0); i++)
5167 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5168 cur = safe_concat (buf, cur, sep);
5169 cur = safe_concat (buf, cur, tmp);
5172 cur = safe_concat (buf, cur, "] ");
5173 sprintf (tmp, "%d", XINT (x, 1));
5174 cur = safe_concat (buf, cur, tmp);
5178 /* If (verbose) debug_rtx (x); */
5179 st[0] = GET_RTX_NAME (GET_CODE (x));
5183 /* Print this as a function? */
5186 cur = safe_concat (buf, cur, fun);
5187 cur = safe_concat (buf, cur, "(");
5190 for (i = 0; i < 4; i++)
5193 cur = safe_concat (buf, cur, st[i]);
5198 cur = safe_concat (buf, cur, ",");
5200 print_value (tmp, op[i], verbose);
5201 cur = safe_concat (buf, cur, tmp);
5206 cur = safe_concat (buf, cur, ")");
5209 /* Prints rtxes, I customly classified as values. They're constants,
5210 registers, labels, symbols and memory accesses. */
5213 print_value (buf, x, verbose)
5221 switch (GET_CODE (x))
5224 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5225 cur = safe_concat (buf, cur, t);
5228 sprintf (t, "<0x%lx,0x%lx>", (long) XWINT (x, 2), (long) XWINT (x, 3));
5229 cur = safe_concat (buf, cur, t);
5232 cur = safe_concat (buf, cur, "\"");
5233 cur = safe_concat (buf, cur, XSTR (x, 0));
5234 cur = safe_concat (buf, cur, "\"");
5237 cur = safe_concat (buf, cur, "`");
5238 cur = safe_concat (buf, cur, XSTR (x, 0));
5239 cur = safe_concat (buf, cur, "'");
5242 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5243 cur = safe_concat (buf, cur, t);
5246 print_value (t, XEXP (x, 0), verbose);
5247 cur = safe_concat (buf, cur, "const(");
5248 cur = safe_concat (buf, cur, t);
5249 cur = safe_concat (buf, cur, ")");
5252 print_value (t, XEXP (x, 0), verbose);
5253 cur = safe_concat (buf, cur, "high(");
5254 cur = safe_concat (buf, cur, t);
5255 cur = safe_concat (buf, cur, ")");
5258 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5260 int c = reg_names[REGNO (x)][0];
5261 if (c >= '0' && c <= '9')
5262 cur = safe_concat (buf, cur, "%");
5264 cur = safe_concat (buf, cur, reg_names[REGNO (x)]);
5268 sprintf (t, "r%d", REGNO (x));
5269 cur = safe_concat (buf, cur, t);
5273 print_value (t, SUBREG_REG (x), verbose);
5274 cur = safe_concat (buf, cur, t);
5275 sprintf (t, "#%d", SUBREG_WORD (x));
5276 cur = safe_concat (buf, cur, t);
5279 cur = safe_concat (buf, cur, "scratch");
5282 cur = safe_concat (buf, cur, "cc0");
5285 cur = safe_concat (buf, cur, "pc");
5288 print_value (t, XEXP (x, 0), verbose);
5289 cur = safe_concat (buf, cur, "[");
5290 cur = safe_concat (buf, cur, t);
5291 cur = safe_concat (buf, cur, "]");
5294 print_exp (t, x, verbose);
5295 cur = safe_concat (buf, cur, t);
5300 /* The next step in insn detalization, its pattern recognition. */
5303 print_pattern (buf, x, verbose)
5308 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5310 switch (GET_CODE (x))
5313 print_value (t1, SET_DEST (x), verbose);
5314 print_value (t2, SET_SRC (x), verbose);
5315 sprintf (buf, "%s=%s", t1, t2);
5318 sprintf (buf, "return");
5321 print_exp (buf, x, verbose);
5324 print_value (t1, XEXP (x, 0), verbose);
5325 sprintf (buf, "clobber %s", t1);
5328 print_value (t1, XEXP (x, 0), verbose);
5329 sprintf (buf, "use %s", t1);
5332 print_value (t1, COND_EXEC_CODE (x), verbose);
5333 print_value (t2, COND_EXEC_TEST (x), verbose);
5334 sprintf (buf, "cond_exec %s %s", t1, t2);
5341 for (i = 0; i < XVECLEN (x, 0); i++)
5343 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5344 sprintf (t3, "%s%s;", t1, t2);
5347 sprintf (buf, "%s}", t1);
5354 sprintf (t1, "%%{");
5355 for (i = 0; i < XVECLEN (x, 0); i++)
5357 print_insn (t2, XVECEXP (x, 0, i), verbose);
5358 sprintf (t3, "%s%s;", t1, t2);
5361 sprintf (buf, "%s%%}", t1);
5365 sprintf (buf, "asm {%s}", XSTR (x, 0));
5370 print_value (buf, XEXP (x, 0), verbose);
5373 print_value (t1, TRAP_CONDITION (x), verbose);
5374 sprintf (buf, "trap_if %s", t1);
5380 sprintf (t1, "unspec{");
5381 for (i = 0; i < XVECLEN (x, 0); i++)
5383 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5384 sprintf (t3, "%s%s;", t1, t2);
5387 sprintf (buf, "%s}", t1);
5390 case UNSPEC_VOLATILE:
5394 sprintf (t1, "unspec/v{");
5395 for (i = 0; i < XVECLEN (x, 0); i++)
5397 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5398 sprintf (t3, "%s%s;", t1, t2);
5401 sprintf (buf, "%s}", t1);
5405 print_value (buf, x, verbose);
5407 } /* print_pattern */
5409 /* This is the main function in rtl visualization mechanism. It
5410 accepts an rtx and tries to recognize it as an insn, then prints it
5411 properly in human readable form, resembling assembler mnemonics.
5412 For every insn it prints its UID and BB the insn belongs too.
5413 (Probably the last "option" should be extended somehow, since it
5414 depends now on sched.c inner variables ...) */
5417 print_insn (buf, x, verbose)
5425 switch (GET_CODE (x))
5428 print_pattern (t, PATTERN (x), verbose);
5430 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5433 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5436 print_pattern (t, PATTERN (x), verbose);
5438 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5441 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5445 if (GET_CODE (x) == PARALLEL)
5447 x = XVECEXP (x, 0, 0);
5448 print_pattern (t, x, verbose);
5451 strcpy (t, "call <...>");
5453 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5454 INSN_UID (insn), t);
5456 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5459 sprintf (buf, "L%d:", INSN_UID (x));
5462 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5465 if (NOTE_LINE_NUMBER (x) > 0)
5466 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5467 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5469 sprintf (buf, "%4d %s", INSN_UID (x),
5470 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5475 sprintf (buf, "Not an INSN at all\n");
5479 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5483 /* Print visualization debugging info. */
5486 print_block_visualization (b, s)
5493 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5495 /* Print names of units. */
5496 fprintf (dump, ";; %-8s", "clock");
5497 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5498 if (function_units[unit].bitmask & target_units)
5499 for (i = 0; i < function_units[unit].multiplicity; i++)
5500 fprintf (dump, " %-33s", function_units[unit].name);
5501 fprintf (dump, " %-8s\n", "no-unit");
5503 fprintf (dump, ";; %-8s", "=====");
5504 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5505 if (function_units[unit].bitmask & target_units)
5506 for (i = 0; i < function_units[unit].multiplicity; i++)
5507 fprintf (dump, " %-33s", "==============================");
5508 fprintf (dump, " %-8s\n", "=======");
5510 /* Print insns in each cycle. */
5511 fprintf (dump, "%s\n", visual_tbl);
5514 /* Print insns in the 'no_unit' column of visualization. */
5517 visualize_no_unit (insn)
5520 vis_no_unit[n_vis_no_unit] = insn;
5524 /* Print insns scheduled in clock, for visualization. */
5527 visualize_scheduled_insns (b, clock)
5532 /* If no more room, split table into two. */
5533 if (n_visual_lines >= MAX_VISUAL_LINES)
5535 print_block_visualization (b, "(incomplete)");
5536 init_block_visualization ();
5541 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5542 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5543 if (function_units[unit].bitmask & target_units)
5544 for (i = 0; i < function_units[unit].multiplicity; i++)
5546 int instance = unit + i * FUNCTION_UNITS_SIZE;
5547 rtx insn = unit_last_insn[instance];
5549 /* Print insns that still keep the unit busy. */
5551 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5554 print_insn (str, insn, 0);
5555 str[INSN_LEN] = '\0';
5556 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5559 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5562 /* Print insns that are not assigned to any unit. */
5563 for (i = 0; i < n_vis_no_unit; i++)
5564 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5565 INSN_UID (vis_no_unit[i]));
5568 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5571 /* Print stalled cycles. */
5574 visualize_stall_cycles (b, stalls)
5579 /* If no more room, split table into two. */
5580 if (n_visual_lines >= MAX_VISUAL_LINES)
5582 print_block_visualization (b, "(incomplete)");
5583 init_block_visualization ();
5588 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5589 for (i = 0; i < stalls; i++)
5590 sprintf (visual_tbl + strlen (visual_tbl), ".");
5591 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5594 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5597 move_insn1 (insn, last)
5600 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5601 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5603 NEXT_INSN (insn) = NEXT_INSN (last);
5604 PREV_INSN (NEXT_INSN (last)) = insn;
5606 NEXT_INSN (last) = insn;
5607 PREV_INSN (insn) = last;
5612 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5613 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5614 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5615 saved value for NOTE_BLOCK_NUMBER which is useful for
5616 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5617 output by the instruction scheduler. Return the new value of LAST. */
5620 reemit_notes (insn, last)
5627 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5629 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5631 enum insn_note note_type = INTVAL (XEXP (note, 0));
5633 if (note_type == NOTE_INSN_SETJMP)
5635 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5636 CONST_CALL_P (retval) = CONST_CALL_P (note);
5637 remove_note (insn, note);
5638 note = XEXP (note, 1);
5640 else if (note_type == NOTE_INSN_RANGE_BEG
5641 || note_type == NOTE_INSN_RANGE_END)
5643 last = emit_note_before (note_type, last);
5644 remove_note (insn, note);
5645 note = XEXP (note, 1);
5646 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5650 last = emit_note_before (note_type, last);
5651 remove_note (insn, note);
5652 note = XEXP (note, 1);
5653 if (note_type == NOTE_INSN_EH_REGION_BEG
5654 || note_type == NOTE_INSN_EH_REGION_END)
5655 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5657 remove_note (insn, note);
5663 /* Move INSN, and all insns which should be issued before it,
5664 due to SCHED_GROUP_P flag. Reemit notes if needed.
5666 Return the last insn emitted by the scheduler, which is the
5667 return value from the first call to reemit_notes. */
5670 move_insn (insn, last)
5675 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5676 insns with SCHED_GROUP_P set first. */
5677 while (SCHED_GROUP_P (insn))
5679 rtx prev = PREV_INSN (insn);
5681 /* Move a SCHED_GROUP_P insn. */
5682 move_insn1 (insn, last);
5683 /* If this is the first call to reemit_notes, then record
5684 its return value. */
5685 if (retval == NULL_RTX)
5686 retval = reemit_notes (insn, insn);
5688 reemit_notes (insn, insn);
5692 /* Now move the first non SCHED_GROUP_P insn. */
5693 move_insn1 (insn, last);
5695 /* If this is the first call to reemit_notes, then record
5696 its return value. */
5697 if (retval == NULL_RTX)
5698 retval = reemit_notes (insn, insn);
5700 reemit_notes (insn, insn);
5705 /* Return an insn which represents a SCHED_GROUP, which is
5706 the last insn in the group. */
5717 insn = next_nonnote_insn (insn);
5719 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5724 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5725 possibly bringing insns from subsequent blocks in the same region.
5726 Return number of insns scheduled. */
5729 schedule_block (bb, rgn_n_insns)
5733 /* Local variables. */
5739 /* Flow block of this bb. */
5740 int b = BB_TO_BLOCK (bb);
5742 /* target_n_insns == number of insns in b before scheduling starts.
5743 sched_target_n_insns == how many of b's insns were scheduled.
5744 sched_n_insns == how many insns were scheduled in b. */
5745 int target_n_insns = 0;
5746 int sched_target_n_insns = 0;
5747 int sched_n_insns = 0;
5749 #define NEED_NOTHING 0
5754 /* Head/tail info for this block. */
5761 /* We used to have code to avoid getting parameters moved from hard
5762 argument registers into pseudos.
5764 However, it was removed when it proved to be of marginal benefit
5765 and caused problems because schedule_block and compute_forward_dependences
5766 had different notions of what the "head" insn was. */
5767 get_bb_head_tail (bb, &head, &tail);
5769 /* rm_other_notes only removes notes which are _inside_ the
5770 block---that is, it won't remove notes before the first real insn
5771 or after the last real insn of the block. So if the first insn
5772 has a REG_SAVE_NOTE which would otherwise be emitted before the
5773 insn, it is redundant with the note before the start of the
5774 block, and so we have to take it out.
5776 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
5777 referencing NOTE_INSN_SETJMP at the end of the block. */
5782 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5783 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5785 if (INTVAL (XEXP (note, 0)) != NOTE_INSN_SETJMP)
5787 remove_note (head, note);
5788 note = XEXP (note, 1);
5789 remove_note (head, note);
5792 note = XEXP (note, 1);
5796 next_tail = NEXT_INSN (tail);
5797 prev_head = PREV_INSN (head);
5799 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5800 to schedule this block. */
5801 if (head == tail && (! INSN_P (head)))
5802 return (sched_n_insns);
5807 fprintf (dump, ";; ======================================================\n");
5809 ";; -- basic block %d from %d to %d -- %s reload\n",
5810 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5811 (reload_completed ? "after" : "before"));
5812 fprintf (dump, ";; ======================================================\n");
5813 fprintf (dump, "\n");
5815 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5816 init_block_visualization ();
5819 /* Remove remaining note insns from the block, save them in
5820 note_list. These notes are restored at the end of
5821 schedule_block (). */
5823 rm_other_notes (head, tail);
5827 /* Prepare current target block info. */
5828 if (current_nr_blocks > 1)
5830 candidate_table = (candidate *) xmalloc (current_nr_blocks
5831 * sizeof (candidate));
5834 /* ??? It is not clear why bblst_size is computed this way. The original
5835 number was clearly too small as it resulted in compiler failures.
5836 Multiplying by the original number by 2 (to account for update_bbs
5837 members) seems to be a reasonable solution. */
5838 /* ??? Or perhaps there is a bug somewhere else in this file? */
5839 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5840 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
5842 bitlst_table_last = 0;
5843 bitlst_table_size = rgn_nr_edges;
5844 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
5846 compute_trg_info (bb);
5851 /* Allocate the ready list. */
5852 ready = (rtx *) xmalloc ((rgn_n_insns + 1) * sizeof (rtx));
5854 /* Print debugging information. */
5855 if (sched_verbose >= 5)
5856 debug_dependencies ();
5858 /* Initialize ready list with all 'ready' insns in target block.
5859 Count number of insns in the target block being scheduled. */
5861 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5865 if (! INSN_P (insn))
5867 next = NEXT_INSN (insn);
5869 if (INSN_DEP_COUNT (insn) == 0
5870 && (SCHED_GROUP_P (next) == 0 || ! INSN_P (next)))
5871 ready[n_ready++] = insn;
5872 if (!(SCHED_GROUP_P (insn)))
5876 /* Add to ready list all 'ready' insns in valid source blocks.
5877 For speculative insns, check-live, exception-free, and
5879 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5880 if (IS_VALID (bb_src))
5886 get_bb_head_tail (bb_src, &head, &tail);
5887 src_next_tail = NEXT_INSN (tail);
5890 if (head == tail && (! INSN_P (head)))
5893 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5895 if (! INSN_P (insn))
5898 if (!CANT_MOVE (insn)
5899 && (!IS_SPECULATIVE_INSN (insn)
5900 || (insn_issue_delay (insn) <= 3
5901 && check_live (insn, bb_src)
5902 && is_exception_free (insn, bb_src, target_bb))))
5906 /* Note that we havn't squirrled away the notes for
5907 blocks other than the current. So if this is a
5908 speculative insn, NEXT might otherwise be a note. */
5909 next = next_nonnote_insn (insn);
5910 if (INSN_DEP_COUNT (insn) == 0
5912 || SCHED_GROUP_P (next) == 0
5913 || ! INSN_P (next)))
5914 ready[n_ready++] = insn;
5919 #ifdef MD_SCHED_INIT
5920 MD_SCHED_INIT (dump, sched_verbose);
5923 /* No insns scheduled in this block yet. */
5924 last_scheduled_insn = 0;
5926 /* Q_SIZE is the total number of insns in the queue. */
5930 bzero ((char *) insn_queue, sizeof (insn_queue));
5932 /* Start just before the beginning of time. */
5935 /* We start inserting insns after PREV_HEAD. */
5938 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5939 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
5940 ? NEED_HEAD : NEED_NOTHING);
5941 if (PREV_INSN (next_tail) == BLOCK_END (b))
5942 new_needs |= NEED_TAIL;
5944 /* Loop until all the insns in BB are scheduled. */
5945 while (sched_target_n_insns < target_n_insns)
5949 /* Add to the ready list all pending insns that can be issued now.
5950 If there are no ready insns, increment clock until one
5951 is ready and add all pending insns at that point to the ready
5953 n_ready = queue_to_ready (ready, n_ready);
5958 if (sched_verbose >= 2)
5960 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
5961 debug_ready_list (ready, n_ready);
5964 /* Sort the ready list based on priority. */
5965 SCHED_SORT (ready, n_ready);
5967 /* Allow the target to reorder the list, typically for
5968 better instruction bundling. */
5969 #ifdef MD_SCHED_REORDER
5970 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
5973 can_issue_more = issue_rate;
5978 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
5979 debug_ready_list (ready, n_ready);
5982 /* Issue insns from ready list. */
5983 while (n_ready != 0 && can_issue_more)
5985 /* Select and remove the insn from the ready list. */
5986 rtx insn = ready[--n_ready];
5987 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
5991 queue_insn (insn, cost);
5995 /* An interblock motion? */
5996 if (INSN_BB (insn) != target_bb)
6001 if (IS_SPECULATIVE_INSN (insn))
6003 if (!check_live (insn, INSN_BB (insn)))
6005 update_live (insn, INSN_BB (insn));
6007 /* For speculative load, mark insns fed by it. */
6008 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
6009 set_spec_fed (insn);
6015 /* Find the beginning of the scheduling group. */
6016 /* ??? Ought to update basic block here, but later bits of
6017 schedule_block assumes the original insn block is
6021 while (SCHED_GROUP_P (temp))
6022 temp = PREV_INSN (temp);
6024 /* Update source block boundaries. */
6025 b1 = BLOCK_FOR_INSN (temp);
6026 if (temp == b1->head && insn == b1->end)
6028 /* We moved all the insns in the basic block.
6029 Emit a note after the last insn and update the
6030 begin/end boundaries to point to the note. */
6031 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
6035 else if (insn == b1->end)
6037 /* We took insns from the end of the basic block,
6038 so update the end of block boundary so that it
6039 points to the first insn we did not move. */
6040 b1->end = PREV_INSN (temp);
6042 else if (temp == b1->head)
6044 /* We took insns from the start of the basic block,
6045 so update the start of block boundary so that
6046 it points to the first insn we did not move. */
6047 b1->head = NEXT_INSN (insn);
6052 /* In block motion. */
6053 sched_target_n_insns++;
6056 last_scheduled_insn = insn;
6057 last = move_insn (insn, last);
6060 #ifdef MD_SCHED_VARIABLE_ISSUE
6061 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6067 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6069 /* Close this block after scheduling its jump. */
6070 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6076 visualize_scheduled_insns (b, clock_var);
6082 fprintf (dump, ";;\tReady list (final): ");
6083 debug_ready_list (ready, n_ready);
6084 print_block_visualization (b, "");
6087 /* Sanity check -- queue must be empty now. Meaningless if region has
6089 if (current_nr_blocks > 1)
6090 if (!flag_schedule_interblock && q_size != 0)
6093 /* Update head/tail boundaries. */
6094 head = NEXT_INSN (prev_head);
6097 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6098 previously found among the insns. Insert them at the beginning
6102 rtx note_head = note_list;
6104 while (PREV_INSN (note_head))
6106 note_head = PREV_INSN (note_head);
6109 PREV_INSN (note_head) = PREV_INSN (head);
6110 NEXT_INSN (PREV_INSN (head)) = note_head;
6111 PREV_INSN (head) = note_list;
6112 NEXT_INSN (note_list) = head;
6116 /* Update target block boundaries. */
6117 if (new_needs & NEED_HEAD)
6118 BLOCK_HEAD (b) = head;
6120 if (new_needs & NEED_TAIL)
6121 BLOCK_END (b) = tail;
6126 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6127 clock_var, INSN_UID (BLOCK_HEAD (b)));
6128 fprintf (dump, ";; new basic block end = %d\n\n",
6129 INSN_UID (BLOCK_END (b)));
6133 if (current_nr_blocks > 1)
6135 free (candidate_table);
6137 free (bitlst_table);
6141 return (sched_n_insns);
6144 /* Print the bit-set of registers, S, callable from debugger. */
6147 debug_reg_vector (s)
6152 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6154 fprintf (dump, " %d", regno);
6157 fprintf (dump, "\n");
6160 /* Use the backward dependences from LOG_LINKS to build
6161 forward dependences in INSN_DEPEND. */
6164 compute_block_forward_dependences (bb)
6170 enum reg_note dep_type;
6172 get_bb_head_tail (bb, &head, &tail);
6173 next_tail = NEXT_INSN (tail);
6174 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6176 if (! INSN_P (insn))
6179 insn = group_leader (insn);
6181 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6183 rtx x = group_leader (XEXP (link, 0));
6186 if (x != XEXP (link, 0))
6189 #ifdef ENABLE_CHECKING
6190 /* If add_dependence is working properly there should never
6191 be notes, deleted insns or duplicates in the backward
6192 links. Thus we need not check for them here.
6194 However, if we have enabled checking we might as well go
6195 ahead and verify that add_dependence worked properly. */
6196 if (GET_CODE (x) == NOTE
6197 || INSN_DELETED_P (x)
6198 || find_insn_list (insn, INSN_DEPEND (x)))
6202 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6204 dep_type = REG_NOTE_KIND (link);
6205 PUT_REG_NOTE_KIND (new_link, dep_type);
6207 INSN_DEPEND (x) = new_link;
6208 INSN_DEP_COUNT (insn) += 1;
6213 /* Initialize variables for region data dependence analysis.
6214 n_bbs is the number of region blocks. */
6220 int maxreg = max_reg_num ();
6221 deps->reg_last_uses = (rtx *) xcalloc (maxreg, sizeof (rtx));
6222 deps->reg_last_sets = (rtx *) xcalloc (maxreg, sizeof (rtx));
6223 deps->reg_last_clobbers = (rtx *) xcalloc (maxreg, sizeof (rtx));
6225 deps->pending_read_insns = 0;
6226 deps->pending_read_mems = 0;
6227 deps->pending_write_insns = 0;
6228 deps->pending_write_mems = 0;
6229 deps->pending_lists_length = 0;
6230 deps->last_pending_memory_flush = 0;
6231 deps->last_function_call = 0;
6232 deps->in_post_call_group_p = 0;
6234 deps->sched_before_next_call
6235 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6236 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6237 LOG_LINKS (deps->sched_before_next_call) = 0;
6240 /* Add dependences so that branches are scheduled to run last in their
6244 add_branch_dependences (head, tail)
6249 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6250 to remain in order at the end of the block by adding dependencies and
6251 giving the last a high priority. There may be notes present, and
6252 prev_head may also be a note.
6254 Branches must obviously remain at the end. Calls should remain at the
6255 end since moving them results in worse register allocation. Uses remain
6256 at the end to ensure proper register allocation. cc0 setters remaim
6257 at the end because they can't be moved away from their cc0 user. */
6260 while (GET_CODE (insn) == CALL_INSN
6261 || GET_CODE (insn) == JUMP_INSN
6262 || (GET_CODE (insn) == INSN
6263 && (GET_CODE (PATTERN (insn)) == USE
6264 || GET_CODE (PATTERN (insn)) == CLOBBER
6266 || sets_cc0_p (PATTERN (insn))
6269 || GET_CODE (insn) == NOTE)
6271 if (GET_CODE (insn) != NOTE)
6274 && !find_insn_list (insn, LOG_LINKS (last)))
6276 add_dependence (last, insn, REG_DEP_ANTI);
6277 INSN_REF_COUNT (insn)++;
6280 CANT_MOVE (insn) = 1;
6283 /* Skip over insns that are part of a group.
6284 Make each insn explicitly depend on the previous insn.
6285 This ensures that only the group header will ever enter
6286 the ready queue (and, when scheduled, will automatically
6287 schedule the SCHED_GROUP_P block). */
6288 while (SCHED_GROUP_P (insn))
6290 rtx temp = prev_nonnote_insn (insn);
6291 add_dependence (insn, temp, REG_DEP_ANTI);
6296 /* Don't overrun the bounds of the basic block. */
6300 insn = PREV_INSN (insn);
6303 /* Make sure these insns are scheduled last in their block. */
6306 while (insn != head)
6308 insn = prev_nonnote_insn (insn);
6310 if (INSN_REF_COUNT (insn) != 0)
6313 add_dependence (last, insn, REG_DEP_ANTI);
6314 INSN_REF_COUNT (insn) = 1;
6316 /* Skip over insns that are part of a group. */
6317 while (SCHED_GROUP_P (insn))
6318 insn = prev_nonnote_insn (insn);
6322 /* After computing the dependencies for block BB, propagate the dependencies
6323 found in TMP_DEPS to the successors of the block. MAX_REG is the number
6326 propagate_deps (bb, tmp_deps, max_reg)
6328 struct deps *tmp_deps;
6331 int b = BB_TO_BLOCK (bb);
6334 rtx link_insn, link_mem;
6337 /* These lists should point to the right place, for correct
6339 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
6340 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
6341 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
6342 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
6344 /* bb's structures are inherited by its successors. */
6345 first_edge = e = OUT_EDGES (b);
6352 int b_succ = TO_BLOCK (e);
6353 int bb_succ = BLOCK_TO_BB (b_succ);
6354 struct deps *succ_deps = bb_deps + bb_succ;
6356 /* Only bbs "below" bb, in the same region, are interesting. */
6357 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6364 for (reg = 0; reg < max_reg; reg++)
6366 /* reg-last-uses lists are inherited by bb_succ. */
6367 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
6369 if (find_insn_list (XEXP (u, 0),
6370 succ_deps->reg_last_uses[reg]))
6373 succ_deps->reg_last_uses[reg]
6374 = alloc_INSN_LIST (XEXP (u, 0),
6375 succ_deps->reg_last_uses[reg]);
6378 /* reg-last-defs lists are inherited by bb_succ. */
6379 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
6381 if (find_insn_list (XEXP (u, 0),
6382 succ_deps->reg_last_sets[reg]))
6385 succ_deps->reg_last_sets[reg]
6386 = alloc_INSN_LIST (XEXP (u, 0),
6387 succ_deps->reg_last_sets[reg]);
6390 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6392 if (find_insn_list (XEXP (u, 0),
6393 succ_deps->reg_last_clobbers[reg]))
6396 succ_deps->reg_last_clobbers[reg]
6397 = alloc_INSN_LIST (XEXP (u, 0),
6398 succ_deps->reg_last_clobbers[reg]);
6402 /* Mem read/write lists are inherited by bb_succ. */
6403 link_insn = tmp_deps->pending_read_insns;
6404 link_mem = tmp_deps->pending_read_mems;
6407 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6409 succ_deps->pending_read_insns,
6410 succ_deps->pending_read_mems)))
6411 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
6412 &succ_deps->pending_read_mems,
6413 XEXP (link_insn, 0), XEXP (link_mem, 0));
6414 link_insn = XEXP (link_insn, 1);
6415 link_mem = XEXP (link_mem, 1);
6418 link_insn = tmp_deps->pending_write_insns;
6419 link_mem = tmp_deps->pending_write_mems;
6422 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6424 succ_deps->pending_write_insns,
6425 succ_deps->pending_write_mems)))
6426 add_insn_mem_dependence (succ_deps,
6427 &succ_deps->pending_write_insns,
6428 &succ_deps->pending_write_mems,
6429 XEXP (link_insn, 0), XEXP (link_mem, 0));
6431 link_insn = XEXP (link_insn, 1);
6432 link_mem = XEXP (link_mem, 1);
6435 /* last_function_call is inherited by bb_succ. */
6436 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
6438 if (find_insn_list (XEXP (u, 0),
6439 succ_deps->last_function_call))
6442 succ_deps->last_function_call
6443 = alloc_INSN_LIST (XEXP (u, 0),
6444 succ_deps->last_function_call);
6447 /* last_pending_memory_flush is inherited by bb_succ. */
6448 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
6450 if (find_insn_list (XEXP (u, 0),
6451 succ_deps->last_pending_memory_flush))
6454 succ_deps->last_pending_memory_flush
6455 = alloc_INSN_LIST (XEXP (u, 0),
6456 succ_deps->last_pending_memory_flush);
6459 /* sched_before_next_call is inherited by bb_succ. */
6460 x = LOG_LINKS (tmp_deps->sched_before_next_call);
6461 for (; x; x = XEXP (x, 1))
6462 add_dependence (succ_deps->sched_before_next_call,
6463 XEXP (x, 0), REG_DEP_ANTI);
6467 while (e != first_edge);
6470 /* Compute backward dependences inside bb. In a multiple blocks region:
6471 (1) a bb is analyzed after its predecessors, and (2) the lists in
6472 effect at the end of bb (after analyzing for bb) are inherited by
6475 Specifically for reg-reg data dependences, the block insns are
6476 scanned by sched_analyze () top-to-bottom. Two lists are
6477 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6478 and reg_last_uses[] for register USEs.
6480 When analysis is completed for bb, we update for its successors:
6481 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6482 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6484 The mechanism for computing mem-mem data dependence is very
6485 similar, and the result is interblock dependences in the region. */
6488 compute_block_backward_dependences (bb)
6493 int max_reg = max_reg_num ();
6494 struct deps tmp_deps;
6496 tmp_deps = bb_deps[bb];
6498 /* Do the analysis for this block. */
6499 get_bb_head_tail (bb, &head, &tail);
6500 sched_analyze (&tmp_deps, head, tail);
6501 add_branch_dependences (head, tail);
6503 if (current_nr_blocks > 1)
6504 propagate_deps (bb, &tmp_deps, max_reg);
6506 /* Free up the INSN_LISTs.
6508 Note this loop is executed max_reg * nr_regions times. It's first
6509 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6510 The list was empty for the vast majority of those calls. On the PA, not
6511 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6513 for (i = 0; i < max_reg; ++i)
6515 if (tmp_deps.reg_last_clobbers[i])
6516 free_INSN_LIST_list (&tmp_deps.reg_last_clobbers[i]);
6517 if (tmp_deps.reg_last_sets[i])
6518 free_INSN_LIST_list (&tmp_deps.reg_last_sets[i]);
6519 if (tmp_deps.reg_last_uses[i])
6520 free_INSN_LIST_list (&tmp_deps.reg_last_uses[i]);
6523 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6524 free (bb_deps[bb].reg_last_uses);
6525 free (bb_deps[bb].reg_last_sets);
6526 free (bb_deps[bb].reg_last_clobbers);
6527 bb_deps[bb].reg_last_uses = 0;
6528 bb_deps[bb].reg_last_sets = 0;
6529 bb_deps[bb].reg_last_clobbers = 0;
6532 /* Print dependences for debugging, callable from debugger. */
6535 debug_dependencies ()
6539 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6540 for (bb = 0; bb < current_nr_blocks; bb++)
6548 get_bb_head_tail (bb, &head, &tail);
6549 next_tail = NEXT_INSN (tail);
6550 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6551 BB_TO_BLOCK (bb), bb);
6553 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6554 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6555 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6556 "----", "----", "--", "---", "----", "----", "--------", "-----");
6557 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6562 if (! INSN_P (insn))
6565 fprintf (dump, ";; %6d ", INSN_UID (insn));
6566 if (GET_CODE (insn) == NOTE)
6568 n = NOTE_LINE_NUMBER (insn);
6570 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6572 fprintf (dump, "line %d, file %s\n", n,
6573 NOTE_SOURCE_FILE (insn));
6576 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6580 unit = insn_unit (insn);
6582 || function_units[unit].blockage_range_function == 0) ? 0 :
6583 function_units[unit].blockage_range_function (insn);
6585 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6586 (SCHED_GROUP_P (insn) ? "+" : " "),
6590 INSN_DEP_COUNT (insn),
6591 INSN_PRIORITY (insn),
6592 insn_cost (insn, 0, 0),
6593 (int) MIN_BLOCKAGE_COST (range),
6594 (int) MAX_BLOCKAGE_COST (range));
6595 insn_print_units (insn);
6596 fprintf (dump, "\t: ");
6597 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6598 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6599 fprintf (dump, "\n");
6603 fprintf (dump, "\n");
6606 /* Set_priorities: compute priority of each insn in the block. */
6619 get_bb_head_tail (bb, &head, &tail);
6620 prev_head = PREV_INSN (head);
6622 if (head == tail && (! INSN_P (head)))
6626 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6629 if (GET_CODE (insn) == NOTE)
6632 if (!(SCHED_GROUP_P (insn)))
6634 (void) priority (insn);
6640 /* Schedule a region. A region is either an inner loop, a loop-free
6641 subroutine, or a single basic block. Each bb in the region is
6642 scheduled after its flow predecessors. */
6645 schedule_region (rgn)
6649 int rgn_n_insns = 0;
6650 int sched_rgn_n_insns = 0;
6651 regset_head reg_pending_sets_head;
6652 regset_head reg_pending_clobbers_head;
6654 /* Set variables for the current region. */
6655 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6656 current_blocks = RGN_BLOCKS (rgn);
6658 reg_pending_sets = INITIALIZE_REG_SET (reg_pending_sets_head);
6659 reg_pending_clobbers = INITIALIZE_REG_SET (reg_pending_clobbers_head);
6660 reg_pending_sets_all = 0;
6662 /* Initializations for region data dependence analyisis. */
6663 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
6664 for (bb = 0; bb < current_nr_blocks; bb++)
6665 init_deps (bb_deps + bb);
6667 /* Compute LOG_LINKS. */
6668 for (bb = 0; bb < current_nr_blocks; bb++)
6669 compute_block_backward_dependences (bb);
6671 /* Compute INSN_DEPEND. */
6672 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6673 compute_block_forward_dependences (bb);
6675 /* Delete line notes and set priorities. */
6676 for (bb = 0; bb < current_nr_blocks; bb++)
6678 if (write_symbols != NO_DEBUG)
6680 save_line_notes (bb);
6684 rgn_n_insns += set_priorities (bb);
6687 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6688 if (current_nr_blocks > 1)
6692 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
6694 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6695 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
6696 for (i = 0; i < current_nr_blocks; i++)
6697 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
6701 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
6702 for (i = 1; i < nr_edges; i++)
6703 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6704 EDGE_TO_BIT (i) = rgn_nr_edges++;
6705 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
6708 for (i = 1; i < nr_edges; i++)
6709 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6710 rgn_edges[rgn_nr_edges++] = i;
6713 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6714 edgeset_bitsize = rgn_nr_edges;
6715 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6717 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6718 for (i = 0; i < current_nr_blocks; i++)
6721 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6723 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6726 /* Compute probabilities, dominators, split_edges. */
6727 for (bb = 0; bb < current_nr_blocks; bb++)
6728 compute_dom_prob_ps (bb);
6731 /* Now we can schedule all blocks. */
6732 for (bb = 0; bb < current_nr_blocks; bb++)
6733 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6735 /* Sanity check: verify that all region insns were scheduled. */
6736 if (sched_rgn_n_insns != rgn_n_insns)
6739 /* Restore line notes. */
6740 if (write_symbols != NO_DEBUG)
6742 for (bb = 0; bb < current_nr_blocks; bb++)
6743 restore_line_notes (bb);
6746 /* Done with this region. */
6747 free_pending_lists ();
6749 FREE_REG_SET (reg_pending_sets);
6750 FREE_REG_SET (reg_pending_clobbers);
6754 if (current_nr_blocks > 1)
6759 for (i = 0; i < current_nr_blocks; ++i)
6762 free (pot_split[i]);
6763 free (ancestor_edges[i]);
6769 free (ancestor_edges);
6773 /* The one entry point in this file. DUMP_FILE is the dump file for
6777 schedule_insns (dump_file)
6780 int *deaths_in_region;
6781 sbitmap blocks, large_region_blocks;
6787 int any_large_regions;
6789 /* Disable speculative loads in their presence if cc0 defined. */
6791 flag_schedule_speculative_load = 0;
6794 /* Taking care of this degenerate case makes the rest of
6795 this code simpler. */
6796 if (n_basic_blocks == 0)
6799 /* Set dump and sched_verbose for the desired debugging output. If no
6800 dump-file was specified, but -fsched-verbose=N (any N), print to stderr.
6801 For -fsched-verbose=N, N>=10, print everything to stderr. */
6802 sched_verbose = sched_verbose_param;
6803 if (sched_verbose_param == 0 && dump_file)
6805 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6810 /* Initialize issue_rate. */
6811 issue_rate = ISSUE_RATE;
6813 split_all_insns (1);
6815 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6816 pseudos which do not cross calls. */
6817 max_uid = get_max_uid () + 1;
6819 h_i_d = (struct haifa_insn_data *) xcalloc (max_uid, sizeof (*h_i_d));
6823 for (b = 0; b < n_basic_blocks; b++)
6824 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6826 INSN_LUID (insn) = luid;
6828 /* Increment the next luid, unless this is a note. We don't
6829 really need separate IDs for notes and we don't want to
6830 schedule differently depending on whether or not there are
6831 line-number notes, i.e., depending on whether or not we're
6832 generating debugging information. */
6833 if (GET_CODE (insn) != NOTE)
6836 if (insn == BLOCK_END (b))
6840 /* ?!? We could save some memory by computing a per-region luid mapping
6841 which could reduce both the number of vectors in the cache and the size
6842 of each vector. Instead we just avoid the cache entirely unless the
6843 average number of instructions in a basic block is very high. See
6844 the comment before the declaration of true_dependency_cache for
6845 what we consider "very high". */
6846 if (luid / n_basic_blocks > 100 * 5)
6848 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6849 sbitmap_vector_zero (true_dependency_cache, luid);
6853 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
6854 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6855 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6856 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6858 blocks = sbitmap_alloc (n_basic_blocks);
6859 large_region_blocks = sbitmap_alloc (n_basic_blocks);
6861 compute_bb_for_insn (max_uid);
6863 /* Compute regions for scheduling. */
6864 if (reload_completed
6865 || n_basic_blocks == 1
6866 || !flag_schedule_interblock)
6868 find_single_block_region ();
6872 /* Verify that a 'good' control flow graph can be built. */
6873 if (is_cfg_nonregular ())
6875 find_single_block_region ();
6880 struct edge_list *edge_list;
6882 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6884 /* The scheduler runs after flow; therefore, we can't blindly call
6885 back into find_basic_blocks since doing so could invalidate the
6886 info in global_live_at_start.
6888 Consider a block consisting entirely of dead stores; after life
6889 analysis it would be a block of NOTE_INSN_DELETED notes. If
6890 we call find_basic_blocks again, then the block would be removed
6891 entirely and invalidate our the register live information.
6893 We could (should?) recompute register live information. Doing
6894 so may even be beneficial. */
6895 edge_list = create_edge_list ();
6897 /* Compute the dominators and post dominators. We don't
6898 currently use post dominators, but we should for
6899 speculative motion analysis. */
6900 compute_flow_dominators (dom, NULL);
6902 /* build_control_flow will return nonzero if it detects unreachable
6903 blocks or any other irregularity with the cfg which prevents
6904 cross block scheduling. */
6905 if (build_control_flow (edge_list) != 0)
6906 find_single_block_region ();
6908 find_rgns (edge_list, dom);
6910 if (sched_verbose >= 3)
6913 /* We are done with flow's edge list. */
6914 free_edge_list (edge_list);
6916 /* For now. This will move as more and more of haifa is converted
6917 to using the cfg code in flow.c. */
6922 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
6924 init_alias_analysis ();
6926 if (write_symbols != NO_DEBUG)
6930 line_note_head = (rtx *) xcalloc (n_basic_blocks, sizeof (rtx));
6932 /* Save-line-note-head:
6933 Determine the line-number at the start of each basic block.
6934 This must be computed and saved now, because after a basic block's
6935 predecessor has been scheduled, it is impossible to accurately
6936 determine the correct line number for the first insn of the block. */
6938 for (b = 0; b < n_basic_blocks; b++)
6939 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
6940 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
6942 line_note_head[b] = line;
6947 /* Find units used in this fuction, for visualization. */
6949 init_target_units ();
6951 /* ??? Add a NOTE after the last insn of the last basic block. It is not
6952 known why this is done. */
6954 insn = BLOCK_END (n_basic_blocks - 1);
6955 if (NEXT_INSN (insn) == 0
6956 || (GET_CODE (insn) != NOTE
6957 && GET_CODE (insn) != CODE_LABEL
6958 /* Don't emit a NOTE if it would end up between an unconditional
6959 jump and a BARRIER. */
6960 && !(GET_CODE (insn) == JUMP_INSN
6961 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
6962 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
6964 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
6965 removing death notes. */
6966 for (b = n_basic_blocks - 1; b >= 0; b--)
6967 find_insn_reg_weight (b);
6969 /* Remove all death notes from the subroutine. */
6970 for (rgn = 0; rgn < nr_regions; rgn++)
6972 sbitmap_zero (blocks);
6973 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
6974 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
6976 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
6979 /* Schedule every region in the subroutine. */
6980 for (rgn = 0; rgn < nr_regions; rgn++)
6981 schedule_region (rgn);
6983 /* Update life analysis for the subroutine. Do single block regions
6984 first so that we can verify that live_at_start didn't change. Then
6985 do all other blocks. */
6986 /* ??? There is an outside possibility that update_life_info, or more
6987 to the point propagate_block, could get called with non-zero flags
6988 more than once for one basic block. This would be kinda bad if it
6989 were to happen, since REG_INFO would be accumulated twice for the
6990 block, and we'd have twice the REG_DEAD notes.
6992 I'm fairly certain that this _shouldn't_ happen, since I don't think
6993 that live_at_start should change at region heads. Not sure what the
6994 best way to test for this kind of thing... */
6996 allocate_reg_life_data ();
6997 compute_bb_for_insn (max_uid);
6999 any_large_regions = 0;
7000 sbitmap_ones (large_region_blocks);
7002 for (rgn = 0; rgn < nr_regions; rgn++)
7003 if (RGN_NR_BLOCKS (rgn) > 1)
7004 any_large_regions = 1;
7007 sbitmap_zero (blocks);
7008 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7009 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7011 /* Don't update reg info after reload, since that affects
7012 regs_ever_live, which should not change after reload. */
7013 update_life_info (blocks, UPDATE_LIFE_LOCAL,
7014 (reload_completed ? PROP_DEATH_NOTES
7015 : PROP_DEATH_NOTES | PROP_REG_INFO));
7017 #ifndef HAVE_conditional_execution
7018 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
7019 a count of the conditional plus unconditional deaths for this to
7021 /* In the single block case, the count of registers that died should
7022 not have changed during the schedule. */
7023 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
7028 if (any_large_regions)
7030 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
7031 PROP_DEATH_NOTES | PROP_REG_INFO);
7034 /* Reposition the prologue and epilogue notes in case we moved the
7035 prologue/epilogue insns. */
7036 if (reload_completed)
7037 reposition_prologue_and_epilogue_notes (get_insns ());
7039 /* Delete redundant line notes. */
7040 if (write_symbols != NO_DEBUG)
7041 rm_redundant_line_notes ();
7045 if (reload_completed == 0 && flag_schedule_interblock)
7048 "\n;; Procedure interblock/speculative motions == %d/%d \n",
7056 fprintf (dump, "\n\n");
7060 end_alias_analysis ();
7062 if (true_dependency_cache)
7064 free (true_dependency_cache);
7065 true_dependency_cache = NULL;
7068 free (rgn_bb_table);
7070 free (containing_rgn);
7074 if (write_symbols != NO_DEBUG)
7075 free (line_note_head);
7094 sbitmap_free (blocks);
7095 sbitmap_free (large_region_blocks);
7097 free (deaths_in_region);
7100 #endif /* INSN_SCHEDULING */