1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU CC is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to the Free
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
24 /* Instruction scheduling pass.
26 This pass implements list scheduling within basic blocks. It is
27 run twice: (1) after flow analysis, but before register allocation,
28 and (2) after register allocation.
30 The first run performs interblock scheduling, moving insns between
31 different blocks in the same "region", and the second runs only
32 basic block scheduling.
34 Interblock motions performed are useful motions and speculative
35 motions, including speculative loads. Motions requiring code
36 duplication are not supported. The identification of motion type
37 and the check for validity of speculative motions requires
38 construction and analysis of the function's control flow graph.
39 The scheduler works as follows:
41 We compute insn priorities based on data dependencies. Flow
42 analysis only creates a fraction of the data-dependencies we must
43 observe: namely, only those dependencies which the combiner can be
44 expected to use. For this pass, we must therefore create the
45 remaining dependencies we need to observe: register dependencies,
46 memory dependencies, dependencies to keep function calls in order,
47 and the dependence between a conditional branch and the setting of
48 condition codes are all dealt with here.
50 The scheduler first traverses the data flow graph, starting with
51 the last instruction, and proceeding to the first, assigning values
52 to insn_priority as it goes. This sorts the instructions
53 topologically by data dependence.
55 Once priorities have been established, we order the insns using
56 list scheduling. This works as follows: starting with a list of
57 all the ready insns, and sorted according to priority number, we
58 schedule the insn from the end of the list by placing its
59 predecessors in the list according to their priority order. We
60 consider this insn scheduled by setting the pointer to the "end" of
61 the list to point to the previous insn. When an insn has no
62 predecessors, we either queue it until sufficient time has elapsed
63 or add it to the ready list. As the instructions are scheduled or
64 when stalls are introduced, the queue advances and dumps insns into
65 the ready list. When all insns down to the lowest priority have
66 been scheduled, the critical path of the basic block has been made
67 as short as possible. The remaining insns are then scheduled in
70 Function unit conflicts are resolved during forward list scheduling
71 by tracking the time when each insn is committed to the schedule
72 and from that, the time the function units it uses must be free.
73 As insns on the ready list are considered for scheduling, those
74 that would result in a blockage of the already committed insns are
75 queued until no blockage will result.
77 The following list shows the order in which we want to break ties
78 among insns in the ready list:
80 1. choose insn with the longest path to end of bb, ties
82 2. choose insn with least contribution to register pressure,
84 3. prefer in-block upon interblock motion, ties broken by
85 4. prefer useful upon speculative motion, ties broken by
86 5. choose insn with largest control flow probability, ties
88 6. choose insn with the least dependences upon the previously
89 scheduled insn, or finally
90 7 choose the insn which has the most insns dependent on it.
91 8. choose insn with lowest UID.
93 Memory references complicate matters. Only if we can be certain
94 that memory references are not part of the data dependency graph
95 (via true, anti, or output dependence), can we move operations past
96 memory references. To first approximation, reads can be done
97 independently, while writes introduce dependencies. Better
98 approximations will yield fewer dependencies.
100 Before reload, an extended analysis of interblock data dependences
101 is required for interblock scheduling. This is performed in
102 compute_block_backward_dependences ().
104 Dependencies set up by memory references are treated in exactly the
105 same way as other dependencies, by using LOG_LINKS backward
106 dependences. LOG_LINKS are translated into INSN_DEPEND forward
107 dependences for the purpose of forward list scheduling.
109 Having optimized the critical path, we may have also unduly
110 extended the lifetimes of some registers. If an operation requires
111 that constants be loaded into registers, it is certainly desirable
112 to load those constants as early as necessary, but no earlier.
113 I.e., it will not do to load up a bunch of registers at the
114 beginning of a basic block only to use them at the end, if they
115 could be loaded later, since this may result in excessive register
118 Note that since branches are never in basic blocks, but only end
119 basic blocks, this pass will not move branches. But that is ok,
120 since we can use GNU's delayed branch scheduling pass to take care
123 Also note that no further optimizations based on algebraic
124 identities are performed, so this pass would be a good one to
125 perform instruction splitting, such as breaking up a multiply
126 instruction into shifts and adds where that is profitable.
128 Given the memory aliasing analysis that this pass should perform,
129 it should be possible to remove redundant stores to memory, and to
130 load values from registers instead of hitting memory.
132 Before reload, speculative insns are moved only if a 'proof' exists
133 that no exception will be caused by this, and if no live registers
134 exist that inhibit the motion (live registers constraints are not
135 represented by data dependence edges).
137 This pass must update information that subsequent passes expect to
138 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
139 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
142 The information in the line number notes is carefully retained by
143 this pass. Notes that refer to the starting and ending of
144 exception regions are also carefully retained by this pass. All
145 other NOTE insns are grouped in their same relative order at the
146 beginning of basic blocks and regions that have been scheduled.
148 The main entry point for this pass is schedule_insns(), called for
149 each function. The work of the scheduler is organized in three
150 levels: (1) function level: insns are subject to splitting,
151 control-flow-graph is constructed, regions are computed (after
152 reload, each region is of one block), (2) region level: control
153 flow graph attributes required for interblock scheduling are
154 computed (dominators, reachability, etc.), data dependences and
155 priorities are computed, and (3) block level: insns in the block
156 are actually scheduled. */
163 #include "basic-block.h"
165 #include "function.h"
166 #include "hard-reg-set.h"
168 #include "insn-config.h"
169 #include "insn-attr.h"
174 extern char *reg_known_equiv_p;
175 extern rtx *reg_known_value;
177 #ifdef INSN_SCHEDULING
179 /* target_units bitmask has 1 for each unit in the cpu. It should be
180 possible to compute this variable from the machine description.
181 But currently it is computed by examining the insn list. Since
182 this is only needed for visualization, it seems an acceptable
183 solution. (For understanding the mapping of bits to units, see
184 definition of function_units[] in "insn-attrtab.c".) */
186 static int target_units = 0;
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
198 /* sched-verbose controls the amount of debugging output the
199 scheduler prints. It is controlled by -fsched-verbose-N:
200 N>0 and no -DSR : the output is directed to stderr.
201 N>=10 will direct the printouts to stderr (regardless of -dSR).
203 N=2: bb's probabilities, detailed ready list info, unit/insn info.
204 N=3: rtl at abort point, control-flow, regions info.
205 N=5: dependences info. */
207 #define MAX_RGN_BLOCKS 10
208 #define MAX_RGN_INSNS 100
210 static int sched_verbose_param = 0;
211 static int sched_verbose = 0;
213 /* nr_inter/spec counts interblock/speculative motion for the function. */
214 static int nr_inter, nr_spec;
217 /* Debugging file. All printouts are sent to dump, which is always set,
218 either to stderr, or to the dump listing file (-dRS). */
219 static FILE *dump = 0;
221 /* fix_sched_param() is called from toplev.c upon detection
222 of the -fsched-***-N options. */
225 fix_sched_param (param, val)
226 const char *param, *val;
228 if (!strcmp (param, "verbose"))
229 sched_verbose_param = atoi (val);
231 warning ("fix_sched_param: unknown param: %s", param);
235 /* Element N is the next insn that sets (hard or pseudo) register
236 N within the current basic block; or zero, if there is no
237 such insn. Needed for new registers which may be introduced
238 by splitting insns. */
239 static rtx *reg_last_uses;
240 static rtx *reg_last_sets;
241 static rtx *reg_last_clobbers;
242 static regset reg_pending_sets;
243 static regset reg_pending_clobbers;
244 static int reg_pending_sets_all;
246 /* To speed up the test for duplicate dependency links we keep a record
247 of true dependencies created by add_dependence when the average number
248 of instructions in a basic block is very large.
250 Studies have shown that there is typically around 5 instructions between
251 branches for typical C code. So we can make a guess that the average
252 basic block is approximately 5 instructions long; we will choose 100X
253 the average size as a very large basic block.
255 Each insn has an associated bitmap for its dependencies. Each bitmap
256 has enough entries to represent a dependency on any other insn in the
258 static sbitmap *true_dependency_cache;
260 /* Indexed by INSN_UID, the collection of all data associated with
261 a single instruction. */
263 struct haifa_insn_data
265 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
266 it represents forward dependancies. */
269 /* The line number note in effect for each insn. For line number
270 notes, this indicates whether the note may be reused. */
273 /* Logical uid gives the original ordering of the insns. */
276 /* A priority for each insn. */
279 /* The number of incoming edges in the forward dependency graph.
280 As scheduling proceds, counts are decreased. An insn moves to
281 the ready queue when its counter reaches zero. */
284 /* An encoding of the blockage range function. Both unit and range
286 unsigned int blockage;
288 /* Number of instructions referring to this insn. */
291 /* The minimum clock tick at which the insn becomes ready. This is
292 used to note timing constraints for the insns in the pending list. */
297 /* An encoding of the function units used. */
300 /* This weight is an estimation of the insn's contribution to
301 register pressure. */
304 /* Some insns (e.g. call) are not allowed to move across blocks. */
305 unsigned int cant_move : 1;
307 /* Set if there's DEF-USE dependance between some speculatively
308 moved load insn and this one. */
309 unsigned int fed_by_spec_load : 1;
310 unsigned int is_load_insn : 1;
313 static struct haifa_insn_data *h_i_d;
315 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
316 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
317 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
318 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
319 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
320 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
321 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
323 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
325 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
326 #define ENCODE_BLOCKAGE(U, R) \
327 (((U) << BLOCKAGE_BITS \
328 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
329 | MAX_BLOCKAGE_COST (R))
330 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
331 #define BLOCKAGE_RANGE(B) \
332 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
333 | ((B) & BLOCKAGE_MASK))
335 /* Encodings of the `<name>_unit_blockage_range' function. */
336 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
337 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
339 #define DONE_PRIORITY -1
340 #define MAX_PRIORITY 0x7fffffff
341 #define TAIL_PRIORITY 0x7ffffffe
342 #define LAUNCH_PRIORITY 0x7f000001
343 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
344 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
346 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
347 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
348 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
349 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
350 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
351 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
353 /* Vector indexed by basic block number giving the starting line-number
354 for each basic block. */
355 static rtx *line_note_head;
357 /* List of important notes we must keep around. This is a pointer to the
358 last element in the list. */
359 static rtx note_list;
363 /* An instruction is ready to be scheduled when all insns preceding it
364 have already been scheduled. It is important to ensure that all
365 insns which use its result will not be executed until its result
366 has been computed. An insn is maintained in one of four structures:
368 (P) the "Pending" set of insns which cannot be scheduled until
369 their dependencies have been satisfied.
370 (Q) the "Queued" set of insns that can be scheduled when sufficient
372 (R) the "Ready" list of unscheduled, uncommitted insns.
373 (S) the "Scheduled" list of insns.
375 Initially, all insns are either "Pending" or "Ready" depending on
376 whether their dependencies are satisfied.
378 Insns move from the "Ready" list to the "Scheduled" list as they
379 are committed to the schedule. As this occurs, the insns in the
380 "Pending" list have their dependencies satisfied and move to either
381 the "Ready" list or the "Queued" set depending on whether
382 sufficient time has passed to make them ready. As time passes,
383 insns move from the "Queued" set to the "Ready" list. Insns may
384 move from the "Ready" list to the "Queued" set if they are blocked
385 due to a function unit conflict.
387 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
388 insns, i.e., those that are ready, queued, and pending.
389 The "Queued" set (Q) is implemented by the variable `insn_queue'.
390 The "Ready" list (R) is implemented by the variables `ready' and
392 The "Scheduled" list (S) is the new insn chain built by this pass.
394 The transition (R->S) is implemented in the scheduling loop in
395 `schedule_block' when the best insn to schedule is chosen.
396 The transition (R->Q) is implemented in `queue_insn' when an
397 insn is found to have a function unit conflict with the already
399 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
400 insns move from the ready list to the scheduled list.
401 The transition (Q->R) is implemented in 'queue_to_insn' as time
402 passes or stalls are introduced. */
404 /* Implement a circular buffer to delay instructions until sufficient
405 time has passed. INSN_QUEUE_SIZE is a power of two larger than
406 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
407 longest time an isnsn may be queued. */
408 static rtx insn_queue[INSN_QUEUE_SIZE];
409 static int q_ptr = 0;
410 static int q_size = 0;
411 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
412 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
414 /* Forward declarations. */
415 static void add_dependence PROTO ((rtx, rtx, enum reg_note));
417 static void remove_dependence PROTO ((rtx, rtx));
419 static rtx find_insn_list PROTO ((rtx, rtx));
420 static int insn_unit PROTO ((rtx));
421 static unsigned int blockage_range PROTO ((int, rtx));
422 static void clear_units PROTO ((void));
423 static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int));
424 static void schedule_unit PROTO ((int, rtx, int));
425 static int actual_hazard PROTO ((int, rtx, int, int));
426 static int potential_hazard PROTO ((int, rtx, int));
427 static int insn_cost PROTO ((rtx, rtx, rtx));
428 static int priority PROTO ((rtx));
429 static void free_pending_lists PROTO ((void));
430 static void add_insn_mem_dependence PROTO ((rtx *, rtx *, rtx, rtx));
431 static void flush_pending_lists PROTO ((rtx, int));
432 static void sched_analyze_1 PROTO ((rtx, rtx));
433 static void sched_analyze_2 PROTO ((rtx, rtx));
434 static void sched_analyze_insn PROTO ((rtx, rtx, rtx));
435 static void sched_analyze PROTO ((rtx, rtx));
436 static int rank_for_schedule PROTO ((const PTR, const PTR));
437 static void swap_sort PROTO ((rtx *, int));
438 static void queue_insn PROTO ((rtx, int));
439 static int schedule_insn PROTO ((rtx, rtx *, int, int));
440 static void find_insn_reg_weight PROTO ((int));
441 static int schedule_block PROTO ((int, int));
442 static char *safe_concat PROTO ((char *, char *, const char *));
443 static int insn_issue_delay PROTO ((rtx));
444 static void adjust_priority PROTO ((rtx));
446 /* Control flow graph edges are kept in circular lists. */
455 static haifa_edge *edge_table;
457 #define NEXT_IN(edge) (edge_table[edge].next_in)
458 #define NEXT_OUT(edge) (edge_table[edge].next_out)
459 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
460 #define TO_BLOCK(edge) (edge_table[edge].to_block)
462 /* Number of edges in the control flow graph. (In fact, larger than
463 that by 1, since edge 0 is unused.) */
466 /* Circular list of incoming/outgoing edges of a block. */
467 static int *in_edges;
468 static int *out_edges;
470 #define IN_EDGES(block) (in_edges[block])
471 #define OUT_EDGES(block) (out_edges[block])
475 static int is_cfg_nonregular PROTO ((void));
476 static int build_control_flow PROTO ((int_list_ptr *, int_list_ptr *,
478 static void new_edge PROTO ((int, int));
481 /* A region is the main entity for interblock scheduling: insns
482 are allowed to move between blocks in the same region, along
483 control flow graph edges, in the 'up' direction. */
486 int rgn_nr_blocks; /* Number of blocks in region. */
487 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
491 /* Number of regions in the procedure. */
492 static int nr_regions;
494 /* Table of region descriptions. */
495 static region *rgn_table;
497 /* Array of lists of regions' blocks. */
498 static int *rgn_bb_table;
500 /* Topological order of blocks in the region (if b2 is reachable from
501 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
502 always referred to by either block or b, while its topological
503 order name (in the region) is refered to by bb. */
504 static int *block_to_bb;
506 /* The number of the region containing a block. */
507 static int *containing_rgn;
509 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
510 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
511 #define BLOCK_TO_BB(block) (block_to_bb[block])
512 #define CONTAINING_RGN(block) (containing_rgn[block])
514 void debug_regions PROTO ((void));
515 static void find_single_block_region PROTO ((void));
516 static void find_rgns PROTO ((int_list_ptr *, int_list_ptr *,
517 int *, int *, sbitmap *));
518 static int too_large PROTO ((int, int *, int *));
520 extern void debug_live PROTO ((int, int));
522 /* Blocks of the current region being scheduled. */
523 static int current_nr_blocks;
524 static int current_blocks;
526 /* The mapping from bb to block. */
527 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
530 /* Bit vectors and bitset operations are needed for computations on
531 the control flow graph. */
533 typedef unsigned HOST_WIDE_INT *bitset;
536 int *first_member; /* Pointer to the list start in bitlst_table. */
537 int nr_members; /* The number of members of the bit list. */
541 static int bitlst_table_last;
542 static int bitlst_table_size;
543 static int *bitlst_table;
545 static char bitset_member PROTO ((bitset, int, int));
546 static void extract_bitlst PROTO ((bitset, int, bitlst *));
548 /* Target info declarations.
550 The block currently being scheduled is referred to as the "target" block,
551 while other blocks in the region from which insns can be moved to the
552 target are called "source" blocks. The candidate structure holds info
553 about such sources: are they valid? Speculative? Etc. */
554 typedef bitlst bblst;
565 static candidate *candidate_table;
567 /* A speculative motion requires checking live information on the path
568 from 'source' to 'target'. The split blocks are those to be checked.
569 After a speculative motion, live information should be modified in
572 Lists of split and update blocks for each candidate of the current
573 target are in array bblst_table. */
574 static int *bblst_table, bblst_size, bblst_last;
576 #define IS_VALID(src) ( candidate_table[src].is_valid )
577 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
578 #define SRC_PROB(src) ( candidate_table[src].src_prob )
580 /* The bb being currently scheduled. */
581 static int target_bb;
584 typedef bitlst edgelst;
586 /* Target info functions. */
587 static void split_edges PROTO ((int, int, edgelst *));
588 static void compute_trg_info PROTO ((int));
589 void debug_candidate PROTO ((int));
590 void debug_candidates PROTO ((int));
593 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
594 typedef bitset bbset;
596 /* Number of words of the bbset. */
597 static int bbset_size;
599 /* Dominators array: dom[i] contains the bbset of dominators of
600 bb i in the region. */
603 /* bb 0 is the only region entry. */
604 #define IS_RGN_ENTRY(bb) (!bb)
606 /* Is bb_src dominated by bb_trg. */
607 #define IS_DOMINATED(bb_src, bb_trg) \
608 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
610 /* Probability: Prob[i] is a float in [0, 1] which is the probability
611 of bb i relative to the region entry. */
614 /* The probability of bb_src, relative to bb_trg. Note, that while the
615 'prob[bb]' is a float in [0, 1], this macro returns an integer
617 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
620 /* Bit-set of edges, where bit i stands for edge i. */
621 typedef bitset edgeset;
623 /* Number of edges in the region. */
624 static int rgn_nr_edges;
626 /* Array of size rgn_nr_edges. */
627 static int *rgn_edges;
629 /* Number of words in an edgeset. */
630 static int edgeset_size;
632 /* Mapping from each edge in the graph to its number in the rgn. */
633 static int *edge_to_bit;
634 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
636 /* The split edges of a source bb is different for each target
637 bb. In order to compute this efficiently, the 'potential-split edges'
638 are computed for each bb prior to scheduling a region. This is actually
639 the split edges of each bb relative to the region entry.
641 pot_split[bb] is the set of potential split edges of bb. */
642 static edgeset *pot_split;
644 /* For every bb, a set of its ancestor edges. */
645 static edgeset *ancestor_edges;
647 static void compute_dom_prob_ps PROTO ((int));
649 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
650 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
651 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
652 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
654 /* Parameters affecting the decision of rank_for_schedule(). */
655 #define MIN_DIFF_PRIORITY 2
656 #define MIN_PROBABILITY 40
657 #define MIN_PROB_DIFF 10
659 /* Speculative scheduling functions. */
660 static int check_live_1 PROTO ((int, rtx));
661 static void update_live_1 PROTO ((int, rtx));
662 static int check_live PROTO ((rtx, int));
663 static void update_live PROTO ((rtx, int));
664 static void set_spec_fed PROTO ((rtx));
665 static int is_pfree PROTO ((rtx, int, int));
666 static int find_conditional_protection PROTO ((rtx, int));
667 static int is_conditionally_protected PROTO ((rtx, int, int));
668 static int may_trap_exp PROTO ((rtx, int));
669 static int haifa_classify_insn PROTO ((rtx));
670 static int is_prisky PROTO ((rtx, int, int));
671 static int is_exception_free PROTO ((rtx, int, int));
673 static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
674 static void compute_block_forward_dependences PROTO ((int));
675 static void init_rgn_data_dependences PROTO ((int));
676 static void add_branch_dependences PROTO ((rtx, rtx));
677 static void compute_block_backward_dependences PROTO ((int));
678 void debug_dependencies PROTO ((void));
680 /* Notes handling mechanism:
681 =========================
682 Generally, NOTES are saved before scheduling and restored after scheduling.
683 The scheduler distinguishes between three types of notes:
685 (1) LINE_NUMBER notes, generated and used for debugging. Here,
686 before scheduling a region, a pointer to the LINE_NUMBER note is
687 added to the insn following it (in save_line_notes()), and the note
688 is removed (in rm_line_notes() and unlink_line_notes()). After
689 scheduling the region, this pointer is used for regeneration of
690 the LINE_NUMBER note (in restore_line_notes()).
692 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
693 Before scheduling a region, a pointer to the note is added to the insn
694 that follows or precedes it. (This happens as part of the data dependence
695 computation). After scheduling an insn, the pointer contained in it is
696 used for regenerating the corresponding note (in reemit_notes).
698 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
699 these notes are put in a list (in rm_other_notes() and
700 unlink_other_notes ()). After scheduling the block, these notes are
701 inserted at the beginning of the block (in schedule_block()). */
703 static rtx unlink_other_notes PROTO ((rtx, rtx));
704 static rtx unlink_line_notes PROTO ((rtx, rtx));
705 static void rm_line_notes PROTO ((int));
706 static void save_line_notes PROTO ((int));
707 static void restore_line_notes PROTO ((int));
708 static void rm_redundant_line_notes PROTO ((void));
709 static void rm_other_notes PROTO ((rtx, rtx));
710 static rtx reemit_notes PROTO ((rtx, rtx));
712 static void get_block_head_tail PROTO ((int, rtx *, rtx *));
713 static void get_bb_head_tail PROTO ((int, rtx *, rtx *));
715 static int queue_to_ready PROTO ((rtx [], int));
717 static void debug_ready_list PROTO ((rtx[], int));
718 static void init_target_units PROTO ((void));
719 static void insn_print_units PROTO ((rtx));
720 static int get_visual_tbl_length PROTO ((void));
721 static void init_block_visualization PROTO ((void));
722 static void print_block_visualization PROTO ((int, const char *));
723 static void visualize_scheduled_insns PROTO ((int, int));
724 static void visualize_no_unit PROTO ((rtx));
725 static void visualize_stall_cycles PROTO ((int, int));
726 static void print_exp PROTO ((char *, rtx, int));
727 static void print_value PROTO ((char *, rtx, int));
728 static void print_pattern PROTO ((char *, rtx, int));
729 static void print_insn PROTO ((char *, rtx, int));
730 void debug_reg_vector PROTO ((regset));
732 static rtx move_insn1 PROTO ((rtx, rtx));
733 static rtx move_insn PROTO ((rtx, rtx));
734 static rtx group_leader PROTO ((rtx));
735 static int set_priorities PROTO ((int));
736 static void init_rtx_vector PROTO ((rtx **, rtx *, int, int));
737 static void schedule_region PROTO ((int));
739 #endif /* INSN_SCHEDULING */
741 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
743 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
744 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
745 of dependence that this link represents. */
748 add_dependence (insn, elem, dep_type)
751 enum reg_note dep_type;
755 /* Don't depend an insn on itself. */
759 /* We can get a dependency on deleted insns due to optimizations in
760 the register allocation and reloading or due to splitting. Any
761 such dependency is useless and can be ignored. */
762 if (GET_CODE (elem) == NOTE)
765 /* If elem is part of a sequence that must be scheduled together, then
766 make the dependence point to the last insn of the sequence.
767 When HAVE_cc0, it is possible for NOTEs to exist between users and
768 setters of the condition codes, so we must skip past notes here.
769 Otherwise, NOTEs are impossible here. */
771 next = NEXT_INSN (elem);
774 while (next && GET_CODE (next) == NOTE)
775 next = NEXT_INSN (next);
778 if (next && SCHED_GROUP_P (next)
779 && GET_CODE (next) != CODE_LABEL)
781 /* Notes will never intervene here though, so don't bother checking
783 /* We must reject CODE_LABELs, so that we don't get confused by one
784 that has LABEL_PRESERVE_P set, which is represented by the same
785 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
787 while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next))
788 && GET_CODE (NEXT_INSN (next)) != CODE_LABEL)
789 next = NEXT_INSN (next);
791 /* Again, don't depend an insn on itself. */
795 /* Make the dependence to NEXT, the last insn of the group, instead
796 of the original ELEM. */
800 #ifdef INSN_SCHEDULING
801 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
802 No need for interblock dependences with calls, since
803 calls are not moved between blocks. Note: the edge where
804 elem is a CALL is still required. */
805 if (GET_CODE (insn) == CALL_INSN
806 && (INSN_BB (elem) != INSN_BB (insn)))
810 /* If we already have a true dependency for ELEM, then we do not
811 need to do anything. Avoiding the list walk below can cut
812 compile times dramatically for some code. */
813 if (true_dependency_cache
814 && TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
818 /* Check that we don't already have this dependence. */
819 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
820 if (XEXP (link, 0) == elem)
822 /* If this is a more restrictive type of dependence than the existing
823 one, then change the existing dependence to this type. */
824 if ((int) dep_type < (int) REG_NOTE_KIND (link))
825 PUT_REG_NOTE_KIND (link, dep_type);
827 #ifdef INSN_SCHEDULING
828 /* If we are adding a true dependency to INSN's LOG_LINKs, then
829 note that in the bitmap cache of true dependency information. */
830 if ((int)dep_type == 0 && true_dependency_cache)
831 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
835 /* Might want to check one level of transitivity to save conses. */
837 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
838 LOG_LINKS (insn) = link;
840 /* Insn dependency, not data dependency. */
841 PUT_REG_NOTE_KIND (link, dep_type);
843 #ifdef INSN_SCHEDULING
844 /* If we are adding a true dependency to INSN's LOG_LINKs, then
845 note that in the bitmap cache of true dependency information. */
846 if ((int)dep_type == 0 && true_dependency_cache)
847 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
852 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
853 of INSN. Abort if not found. */
856 remove_dependence (insn, elem)
860 rtx prev, link, next;
863 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
865 next = XEXP (link, 1);
866 if (XEXP (link, 0) == elem)
869 XEXP (prev, 1) = next;
871 LOG_LINKS (insn) = next;
873 #ifdef INSN_SCHEDULING
874 /* If we are removing a true dependency from the LOG_LINKS list,
875 make sure to remove it from the cache too. */
876 if (REG_NOTE_KIND (link) == 0 && true_dependency_cache)
877 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
881 free_INSN_LIST_node (link);
893 #endif /* HAVE_cc0 */
895 #ifndef INSN_SCHEDULING
897 schedule_insns (dump_file)
907 #define HAIFA_INLINE __inline
910 /* Computation of memory dependencies. */
912 /* The *_insns and *_mems are paired lists. Each pending memory operation
913 will have a pointer to the MEM rtx on one list and a pointer to the
914 containing insn on the other list in the same place in the list. */
916 /* We can't use add_dependence like the old code did, because a single insn
917 may have multiple memory accesses, and hence needs to be on the list
918 once for each memory access. Add_dependence won't let you add an insn
919 to a list more than once. */
921 /* An INSN_LIST containing all insns with pending read operations. */
922 static rtx pending_read_insns;
924 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
925 static rtx pending_read_mems;
927 /* An INSN_LIST containing all insns with pending write operations. */
928 static rtx pending_write_insns;
930 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
931 static rtx pending_write_mems;
933 /* Indicates the combined length of the two pending lists. We must prevent
934 these lists from ever growing too large since the number of dependencies
935 produced is at least O(N*N), and execution time is at least O(4*N*N), as
936 a function of the length of these pending lists. */
938 static int pending_lists_length;
940 /* The last insn upon which all memory references must depend.
941 This is an insn which flushed the pending lists, creating a dependency
942 between it and all previously pending memory references. This creates
943 a barrier (or a checkpoint) which no memory reference is allowed to cross.
945 This includes all non constant CALL_INSNs. When we do interprocedural
946 alias analysis, this restriction can be relaxed.
947 This may also be an INSN that writes memory if the pending lists grow
950 static rtx last_pending_memory_flush;
952 /* The last function call we have seen. All hard regs, and, of course,
953 the last function call, must depend on this. */
955 static rtx last_function_call;
957 /* The LOG_LINKS field of this is a list of insns which use a pseudo register
958 that does not already cross a call. We create dependencies between each
959 of those insn and the next call insn, to ensure that they won't cross a call
960 after scheduling is done. */
962 static rtx sched_before_next_call;
964 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
965 so that insns independent of the last scheduled insn will be preferred
966 over dependent instructions. */
968 static rtx last_scheduled_insn;
970 /* Data structures for the computation of data dependences in a regions. We
971 keep one copy of each of the declared above variables for each bb in the
972 region. Before analyzing the data dependences for a bb, its variables
973 are initialized as a function of the variables of its predecessors. When
974 the analysis for a bb completes, we save the contents of each variable X
975 to a corresponding bb_X[bb] variable. For example, pending_read_insns is
976 copied to bb_pending_read_insns[bb]. Another change is that few
977 variables are now a list of insns rather than a single insn:
978 last_pending_memory_flash, last_function_call, reg_last_sets. The
979 manipulation of these variables was changed appropriately. */
981 static rtx **bb_reg_last_uses;
982 static rtx **bb_reg_last_sets;
983 static rtx **bb_reg_last_clobbers;
985 static rtx *bb_pending_read_insns;
986 static rtx *bb_pending_read_mems;
987 static rtx *bb_pending_write_insns;
988 static rtx *bb_pending_write_mems;
989 static int *bb_pending_lists_length;
991 static rtx *bb_last_pending_memory_flush;
992 static rtx *bb_last_function_call;
993 static rtx *bb_sched_before_next_call;
995 /* Functions for construction of the control flow graph. */
997 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
999 We decide not to build the control flow graph if there is possibly more
1000 than one entry to the function, if computed branches exist, of if we
1001 have nonlocal gotos. */
1004 is_cfg_nonregular ()
1010 /* If we have a label that could be the target of a nonlocal goto, then
1011 the cfg is not well structured. */
1012 if (nonlocal_goto_handler_labels)
1015 /* If we have any forced labels, then the cfg is not well structured. */
1019 /* If this function has a computed jump, then we consider the cfg
1020 not well structured. */
1021 if (current_function_has_computed_jump)
1024 /* If we have exception handlers, then we consider the cfg not well
1025 structured. ?!? We should be able to handle this now that flow.c
1026 computes an accurate cfg for EH. */
1027 if (exception_handler_labels)
1030 /* If we have non-jumping insns which refer to labels, then we consider
1031 the cfg not well structured. */
1032 /* Check for labels referred to other thn by jumps. */
1033 for (b = 0; b < n_basic_blocks; b++)
1034 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1036 code = GET_CODE (insn);
1037 if (GET_RTX_CLASS (code) == 'i')
1041 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1042 if (REG_NOTE_KIND (note) == REG_LABEL)
1046 if (insn == BLOCK_END (b))
1050 /* All the tests passed. Consider the cfg well structured. */
1054 /* Build the control flow graph and set nr_edges.
1056 Instead of trying to build a cfg ourselves, we rely on flow to
1057 do it for us. Stamp out useless code (and bug) duplication.
1059 Return nonzero if an irregularity in the cfg is found which would
1060 prevent cross block scheduling. */
1063 build_control_flow (s_preds, s_succs, num_preds, num_succs)
1064 int_list_ptr *s_preds;
1065 int_list_ptr *s_succs;
1073 /* Count the number of edges in the cfg. */
1076 for (i = 0; i < n_basic_blocks; i++)
1078 nr_edges += num_succs[i];
1080 /* Unreachable loops with more than one basic block are detected
1081 during the DFS traversal in find_rgns.
1083 Unreachable loops with a single block are detected here. This
1084 test is redundant with the one in find_rgns, but it's much
1085 cheaper to go ahead and catch the trivial case here. */
1086 if (num_preds[i] == 0
1087 || (num_preds[i] == 1 && INT_LIST_VAL (s_preds[i]) == i))
1091 /* Account for entry/exit edges. */
1094 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1095 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1096 edge_table = (haifa_edge *) xcalloc (nr_edges, sizeof (haifa_edge));
1099 for (i = 0; i < n_basic_blocks; i++)
1100 for (succ = s_succs[i]; succ; succ = succ->next)
1102 if (INT_LIST_VAL (succ) != EXIT_BLOCK)
1103 new_edge (i, INT_LIST_VAL (succ));
1106 /* Increment by 1, since edge 0 is unused. */
1113 /* Record an edge in the control flow graph from SOURCE to TARGET.
1115 In theory, this is redundant with the s_succs computed above, but
1116 we have not converted all of haifa to use information from the
1120 new_edge (source, target)
1124 int curr_edge, fst_edge;
1126 /* Check for duplicates. */
1127 fst_edge = curr_edge = OUT_EDGES (source);
1130 if (FROM_BLOCK (curr_edge) == source
1131 && TO_BLOCK (curr_edge) == target)
1136 curr_edge = NEXT_OUT (curr_edge);
1138 if (fst_edge == curr_edge)
1144 FROM_BLOCK (e) = source;
1145 TO_BLOCK (e) = target;
1147 if (OUT_EDGES (source))
1149 next_edge = NEXT_OUT (OUT_EDGES (source));
1150 NEXT_OUT (OUT_EDGES (source)) = e;
1151 NEXT_OUT (e) = next_edge;
1155 OUT_EDGES (source) = e;
1159 if (IN_EDGES (target))
1161 next_edge = NEXT_IN (IN_EDGES (target));
1162 NEXT_IN (IN_EDGES (target)) = e;
1163 NEXT_IN (e) = next_edge;
1167 IN_EDGES (target) = e;
1173 /* BITSET macros for operations on the control flow graph. */
1175 /* Compute bitwise union of two bitsets. */
1176 #define BITSET_UNION(set1, set2, len) \
1177 do { register bitset tp = set1, sp = set2; \
1179 for (i = 0; i < len; i++) \
1180 *(tp++) |= *(sp++); } while (0)
1182 /* Compute bitwise intersection of two bitsets. */
1183 #define BITSET_INTER(set1, set2, len) \
1184 do { register bitset tp = set1, sp = set2; \
1186 for (i = 0; i < len; i++) \
1187 *(tp++) &= *(sp++); } while (0)
1189 /* Compute bitwise difference of two bitsets. */
1190 #define BITSET_DIFFER(set1, set2, len) \
1191 do { register bitset tp = set1, sp = set2; \
1193 for (i = 0; i < len; i++) \
1194 *(tp++) &= ~*(sp++); } while (0)
1196 /* Inverts every bit of bitset 'set'. */
1197 #define BITSET_INVERT(set, len) \
1198 do { register bitset tmpset = set; \
1200 for (i = 0; i < len; i++, tmpset++) \
1201 *tmpset = ~*tmpset; } while (0)
1203 /* Turn on the index'th bit in bitset set. */
1204 #define BITSET_ADD(set, index, len) \
1206 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1209 set[index/HOST_BITS_PER_WIDE_INT] |= \
1210 1 << (index % HOST_BITS_PER_WIDE_INT); \
1213 /* Turn off the index'th bit in set. */
1214 #define BITSET_REMOVE(set, index, len) \
1216 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1219 set[index/HOST_BITS_PER_WIDE_INT] &= \
1220 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1224 /* Check if the index'th bit in bitset set is on. */
1227 bitset_member (set, index, len)
1231 if (index >= HOST_BITS_PER_WIDE_INT * len)
1233 return (set[index / HOST_BITS_PER_WIDE_INT] &
1234 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1238 /* Translate a bit-set SET to a list BL of the bit-set members. */
1241 extract_bitlst (set, len, bl)
1247 unsigned HOST_WIDE_INT word;
1249 /* bblst table space is reused in each call to extract_bitlst. */
1250 bitlst_table_last = 0;
1252 bl->first_member = &bitlst_table[bitlst_table_last];
1255 for (i = 0; i < len; i++)
1258 offset = i * HOST_BITS_PER_WIDE_INT;
1259 for (j = 0; word; j++)
1263 bitlst_table[bitlst_table_last++] = offset;
1274 /* Functions for the construction of regions. */
1276 /* Print the regions, for debugging purposes. Callable from debugger. */
1283 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1284 for (rgn = 0; rgn < nr_regions; rgn++)
1286 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1287 rgn_table[rgn].rgn_nr_blocks);
1288 fprintf (dump, ";;\tbb/block: ");
1290 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1292 current_blocks = RGN_BLOCKS (rgn);
1294 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1297 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1300 fprintf (dump, "\n\n");
1305 /* Build a single block region for each basic block in the function.
1306 This allows for using the same code for interblock and basic block
1310 find_single_block_region ()
1314 for (i = 0; i < n_basic_blocks; i++)
1316 rgn_bb_table[i] = i;
1317 RGN_NR_BLOCKS (i) = 1;
1319 CONTAINING_RGN (i) = i;
1320 BLOCK_TO_BB (i) = 0;
1322 nr_regions = n_basic_blocks;
1326 /* Update number of blocks and the estimate for number of insns
1327 in the region. Return 1 if the region is "too large" for interblock
1328 scheduling (compile time considerations), otherwise return 0. */
1331 too_large (block, num_bbs, num_insns)
1332 int block, *num_bbs, *num_insns;
1335 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1336 INSN_LUID (BLOCK_HEAD (block)));
1337 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1344 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1345 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1346 loop containing blk. */
1347 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1349 if (max_hdr[blk] == -1) \
1350 max_hdr[blk] = hdr; \
1351 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1352 RESET_BIT (inner, hdr); \
1353 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1355 RESET_BIT (inner,max_hdr[blk]); \
1356 max_hdr[blk] = hdr; \
1361 /* Find regions for interblock scheduling.
1363 A region for scheduling can be:
1365 * A loop-free procedure, or
1367 * A reducible inner loop, or
1369 * A basic block not contained in any other region.
1372 ?!? In theory we could build other regions based on extended basic
1373 blocks or reverse extended basic blocks. Is it worth the trouble?
1375 Loop blocks that form a region are put into the region's block list
1376 in topological order.
1378 This procedure stores its results into the following global (ick) variables
1387 We use dominator relationships to avoid making regions out of non-reducible
1390 This procedure needs to be converted to work on pred/succ lists instead
1391 of edge tables. That would simplify it somewhat. */
1394 find_rgns (s_preds, s_succs, num_preds, num_succs, dom)
1395 int_list_ptr *s_preds;
1396 int_list_ptr *s_succs;
1401 int *max_hdr, *dfs_nr, *stack, *degree;
1403 int node, child, loop_head, i, head, tail;
1404 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1405 int num_bbs, num_insns, unreachable;
1406 int too_large_failure;
1408 /* Note if an edge has been passed. */
1411 /* Note if a block is a natural loop header. */
1414 /* Note if a block is an natural inner loop header. */
1417 /* Note if a block is in the block queue. */
1420 /* Note if a block is in the block queue. */
1423 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1424 and a mapping from block to its loop header (if the block is contained
1425 in a loop, else -1).
1427 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1428 be used as inputs to the second traversal.
1430 STACK, SP and DFS_NR are only used during the first traversal. */
1432 /* Allocate and initialize variables for the first traversal. */
1433 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
1434 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
1435 stack = (int *) xmalloc (nr_edges * sizeof (int));
1437 inner = sbitmap_alloc (n_basic_blocks);
1438 sbitmap_ones (inner);
1440 header = sbitmap_alloc (n_basic_blocks);
1441 sbitmap_zero (header);
1443 passed = sbitmap_alloc (nr_edges);
1444 sbitmap_zero (passed);
1446 in_queue = sbitmap_alloc (n_basic_blocks);
1447 sbitmap_zero (in_queue);
1449 in_stack = sbitmap_alloc (n_basic_blocks);
1450 sbitmap_zero (in_stack);
1452 for (i = 0; i < n_basic_blocks; i++)
1455 /* DFS traversal to find inner loops in the cfg. */
1460 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1462 /* We have reached a leaf node or a node that was already
1463 processed. Pop edges off the stack until we find
1464 an edge that has not yet been processed. */
1466 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1468 /* Pop entry off the stack. */
1469 current_edge = stack[sp--];
1470 node = FROM_BLOCK (current_edge);
1471 child = TO_BLOCK (current_edge);
1472 RESET_BIT (in_stack, child);
1473 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1474 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1475 current_edge = NEXT_OUT (current_edge);
1478 /* See if have finished the DFS tree traversal. */
1479 if (sp < 0 && TEST_BIT (passed, current_edge))
1482 /* Nope, continue the traversal with the popped node. */
1486 /* Process a node. */
1487 node = FROM_BLOCK (current_edge);
1488 child = TO_BLOCK (current_edge);
1489 SET_BIT (in_stack, node);
1490 dfs_nr[node] = ++count;
1492 /* If the successor is in the stack, then we've found a loop.
1493 Mark the loop, if it is not a natural loop, then it will
1494 be rejected during the second traversal. */
1495 if (TEST_BIT (in_stack, child))
1498 SET_BIT (header, child);
1499 UPDATE_LOOP_RELATIONS (node, child);
1500 SET_BIT (passed, current_edge);
1501 current_edge = NEXT_OUT (current_edge);
1505 /* If the child was already visited, then there is no need to visit
1506 it again. Just update the loop relationships and restart
1510 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1511 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1512 SET_BIT (passed, current_edge);
1513 current_edge = NEXT_OUT (current_edge);
1517 /* Push an entry on the stack and continue DFS traversal. */
1518 stack[++sp] = current_edge;
1519 SET_BIT (passed, current_edge);
1520 current_edge = OUT_EDGES (child);
1522 /* This is temporary until haifa is converted to use rth's new
1523 cfg routines which have true entry/exit blocks and the
1524 appropriate edges from/to those blocks.
1526 Generally we update dfs_nr for a node when we process its
1527 out edge. However, if the node has no out edge then we will
1528 not set dfs_nr for that node. This can confuse the scheduler
1529 into thinking that we have unreachable blocks, which in turn
1530 disables cross block scheduling.
1532 So, if we have a node with no out edges, go ahead and mark it
1533 as reachable now. */
1534 if (current_edge == 0)
1535 dfs_nr[child] = ++count;
1538 /* Another check for unreachable blocks. The earlier test in
1539 is_cfg_nonregular only finds unreachable blocks that do not
1542 The DFS traversal will mark every block that is reachable from
1543 the entry node by placing a nonzero value in dfs_nr. Thus if
1544 dfs_nr is zero for any block, then it must be unreachable. */
1546 for (i = 0; i < n_basic_blocks; i++)
1553 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1554 to hold degree counts. */
1557 /* Compute the in-degree of every block in the graph. */
1558 for (i = 0; i < n_basic_blocks; i++)
1559 degree[i] = num_preds[i];
1561 /* Do not perform region scheduling if there are any unreachable
1568 SET_BIT (header, 0);
1570 /* Second travsersal:find reducible inner loops and topologically sort
1571 block of each region. */
1573 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
1575 /* Find blocks which are inner loop headers. We still have non-reducible
1576 loops to consider at this point. */
1577 for (i = 0; i < n_basic_blocks; i++)
1579 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1584 /* Now check that the loop is reducible. We do this separate
1585 from finding inner loops so that we do not find a reducible
1586 loop which contains an inner non-reducible loop.
1588 A simple way to find reducible/natural loops is to verify
1589 that each block in the loop is dominated by the loop
1592 If there exists a block that is not dominated by the loop
1593 header, then the block is reachable from outside the loop
1594 and thus the loop is not a natural loop. */
1595 for (j = 0; j < n_basic_blocks; j++)
1597 /* First identify blocks in the loop, except for the loop
1599 if (i == max_hdr[j] && i != j)
1601 /* Now verify that the block is dominated by the loop
1603 if (!TEST_BIT (dom[j], i))
1608 /* If we exited the loop early, then I is the header of
1609 a non-reducible loop and we should quit processing it
1611 if (j != n_basic_blocks)
1614 /* I is a header of an inner loop, or block 0 in a subroutine
1615 with no loops at all. */
1617 too_large_failure = 0;
1618 loop_head = max_hdr[i];
1620 /* Decrease degree of all I's successors for topological
1622 for (ps = s_succs[i]; ps; ps = ps->next)
1623 if (INT_LIST_VAL (ps) != EXIT_BLOCK
1624 && INT_LIST_VAL (ps) != ENTRY_BLOCK)
1625 --degree[INT_LIST_VAL(ps)];
1627 /* Estimate # insns, and count # blocks in the region. */
1629 num_insns = (INSN_LUID (BLOCK_END (i))
1630 - INSN_LUID (BLOCK_HEAD (i)));
1633 /* Find all loop latches (blocks with back edges to the loop
1634 header) or all the leaf blocks in the cfg has no loops.
1636 Place those blocks into the queue. */
1639 for (j = 0; j < n_basic_blocks; j++)
1640 /* Leaf nodes have only a single successor which must
1642 if (num_succs[j] == 1
1643 && INT_LIST_VAL (s_succs[j]) == EXIT_BLOCK)
1646 SET_BIT (in_queue, j);
1648 if (too_large (j, &num_bbs, &num_insns))
1650 too_large_failure = 1;
1659 for (ps = s_preds[i]; ps; ps = ps->next)
1661 node = INT_LIST_VAL (ps);
1663 if (node == ENTRY_BLOCK || node == EXIT_BLOCK)
1666 if (max_hdr[node] == loop_head && node != i)
1668 /* This is a loop latch. */
1669 queue[++tail] = node;
1670 SET_BIT (in_queue, node);
1672 if (too_large (node, &num_bbs, &num_insns))
1674 too_large_failure = 1;
1682 /* Now add all the blocks in the loop to the queue.
1684 We know the loop is a natural loop; however the algorithm
1685 above will not always mark certain blocks as being in the
1694 The algorithm in the DFS traversal may not mark B & D as part
1695 of the loop (ie they will not have max_hdr set to A).
1697 We know they can not be loop latches (else they would have
1698 had max_hdr set since they'd have a backedge to a dominator
1699 block). So we don't need them on the initial queue.
1701 We know they are part of the loop because they are dominated
1702 by the loop header and can be reached by a backwards walk of
1703 the edges starting with nodes on the initial queue.
1705 It is safe and desirable to include those nodes in the
1706 loop/scheduling region. To do so we would need to decrease
1707 the degree of a node if it is the target of a backedge
1708 within the loop itself as the node is placed in the queue.
1710 We do not do this because I'm not sure that the actual
1711 scheduling code will properly handle this case. ?!? */
1713 while (head < tail && !too_large_failure)
1716 child = queue[++head];
1718 for (ps = s_preds[child]; ps; ps = ps->next)
1720 node = INT_LIST_VAL (ps);
1722 /* See discussion above about nodes not marked as in
1723 this loop during the initial DFS traversal. */
1724 if (node == ENTRY_BLOCK || node == EXIT_BLOCK
1725 || max_hdr[node] != loop_head)
1730 else if (!TEST_BIT (in_queue, node) && node != i)
1732 queue[++tail] = node;
1733 SET_BIT (in_queue, node);
1735 if (too_large (node, &num_bbs, &num_insns))
1737 too_large_failure = 1;
1744 if (tail >= 0 && !too_large_failure)
1746 /* Place the loop header into list of region blocks. */
1748 rgn_bb_table[idx] = i;
1749 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1750 RGN_BLOCKS (nr_regions) = idx++;
1751 CONTAINING_RGN (i) = nr_regions;
1752 BLOCK_TO_BB (i) = count = 0;
1754 /* Remove blocks from queue[] when their in degree
1755 becomes zero. Repeat until no blocks are left on the
1756 list. This produces a topological list of blocks in
1764 child = queue[head];
1765 if (degree[child] == 0)
1768 rgn_bb_table[idx++] = child;
1769 BLOCK_TO_BB (child) = ++count;
1770 CONTAINING_RGN (child) = nr_regions;
1771 queue[head] = queue[tail--];
1773 for (ps = s_succs[child]; ps; ps = ps->next)
1774 if (INT_LIST_VAL (ps) != ENTRY_BLOCK
1775 && INT_LIST_VAL (ps) != EXIT_BLOCK)
1776 --degree[INT_LIST_VAL (ps)];
1788 /* Any block that did not end up in a region is placed into a region
1790 for (i = 0; i < n_basic_blocks; i++)
1793 rgn_bb_table[idx] = i;
1794 RGN_NR_BLOCKS (nr_regions) = 1;
1795 RGN_BLOCKS (nr_regions) = idx++;
1796 CONTAINING_RGN (i) = nr_regions++;
1797 BLOCK_TO_BB (i) = 0;
1811 /* Functions for regions scheduling information. */
1813 /* Compute dominators, probability, and potential-split-edges of bb.
1814 Assume that these values were already computed for bb's predecessors. */
1817 compute_dom_prob_ps (bb)
1820 int nxt_in_edge, fst_in_edge, pred;
1821 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1824 if (IS_RGN_ENTRY (bb))
1826 BITSET_ADD (dom[bb], 0, bbset_size);
1831 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1833 /* Intialize dom[bb] to '111..1'. */
1834 BITSET_INVERT (dom[bb], bbset_size);
1838 pred = FROM_BLOCK (nxt_in_edge);
1839 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1841 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1844 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1847 nr_rgn_out_edges = 0;
1848 fst_out_edge = OUT_EDGES (pred);
1849 nxt_out_edge = NEXT_OUT (fst_out_edge);
1850 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1853 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1855 /* The successor doesn't belong in the region? */
1856 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1857 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1860 while (fst_out_edge != nxt_out_edge)
1863 /* The successor doesn't belong in the region? */
1864 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1865 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1867 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1868 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1872 /* Now nr_rgn_out_edges is the number of region-exit edges from
1873 pred, and nr_out_edges will be the number of pred out edges
1874 not leaving the region. */
1875 nr_out_edges -= nr_rgn_out_edges;
1876 if (nr_rgn_out_edges > 0)
1877 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1879 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1880 nxt_in_edge = NEXT_IN (nxt_in_edge);
1882 while (fst_in_edge != nxt_in_edge);
1884 BITSET_ADD (dom[bb], bb, bbset_size);
1885 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1887 if (sched_verbose >= 2)
1888 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]));
1889 } /* compute_dom_prob_ps */
1891 /* Functions for target info. */
1893 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1894 Note that bb_trg dominates bb_src. */
1897 split_edges (bb_src, bb_trg, bl)
1902 int es = edgeset_size;
1903 edgeset src = (edgeset) xmalloc (es * sizeof (HOST_WIDE_INT));
1906 src[es] = (pot_split[bb_src])[es];
1907 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1908 extract_bitlst (src, edgeset_size, bl);
1913 /* Find the valid candidate-source-blocks for the target block TRG, compute
1914 their probability, and check if they are speculative or not.
1915 For speculative sources, compute their update-blocks and split-blocks. */
1918 compute_trg_info (trg)
1921 register candidate *sp;
1923 int check_block, update_idx;
1924 int i, j, k, fst_edge, nxt_edge;
1926 /* Define some of the fields for the target bb as well. */
1927 sp = candidate_table + trg;
1929 sp->is_speculative = 0;
1932 for (i = trg + 1; i < current_nr_blocks; i++)
1934 sp = candidate_table + i;
1936 sp->is_valid = IS_DOMINATED (i, trg);
1939 sp->src_prob = GET_SRC_PROB (i, trg);
1940 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1945 split_edges (i, trg, &el);
1946 sp->is_speculative = (el.nr_members) ? 1 : 0;
1947 if (sp->is_speculative && !flag_schedule_speculative)
1953 sp->split_bbs.first_member = &bblst_table[bblst_last];
1954 sp->split_bbs.nr_members = el.nr_members;
1955 for (j = 0; j < el.nr_members; bblst_last++, j++)
1956 bblst_table[bblst_last] =
1957 TO_BLOCK (rgn_edges[el.first_member[j]]);
1958 sp->update_bbs.first_member = &bblst_table[bblst_last];
1960 for (j = 0; j < el.nr_members; j++)
1962 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1963 fst_edge = nxt_edge = OUT_EDGES (check_block);
1966 for (k = 0; k < el.nr_members; k++)
1967 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1970 if (k >= el.nr_members)
1972 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1976 nxt_edge = NEXT_OUT (nxt_edge);
1978 while (fst_edge != nxt_edge);
1980 sp->update_bbs.nr_members = update_idx;
1985 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1987 sp->is_speculative = 0;
1991 } /* compute_trg_info */
1994 /* Print candidates info, for debugging purposes. Callable from debugger. */
2000 if (!candidate_table[i].is_valid)
2003 if (candidate_table[i].is_speculative)
2006 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
2008 fprintf (dump, "split path: ");
2009 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
2011 int b = candidate_table[i].split_bbs.first_member[j];
2013 fprintf (dump, " %d ", b);
2015 fprintf (dump, "\n");
2017 fprintf (dump, "update path: ");
2018 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
2020 int b = candidate_table[i].update_bbs.first_member[j];
2022 fprintf (dump, " %d ", b);
2024 fprintf (dump, "\n");
2028 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
2033 /* Print candidates info, for debugging purposes. Callable from debugger. */
2036 debug_candidates (trg)
2041 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2042 BB_TO_BLOCK (trg), trg);
2043 for (i = trg + 1; i < current_nr_blocks; i++)
2044 debug_candidate (i);
2048 /* Functions for speculative scheduing. */
2050 /* Return 0 if x is a set of a register alive in the beginning of one
2051 of the split-blocks of src, otherwise return 1. */
2054 check_live_1 (src, x)
2060 register rtx reg = SET_DEST (x);
2065 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2066 || GET_CODE (reg) == SIGN_EXTRACT
2067 || GET_CODE (reg) == STRICT_LOW_PART)
2068 reg = XEXP (reg, 0);
2070 if (GET_CODE (reg) == PARALLEL
2071 && GET_MODE (reg) == BLKmode)
2074 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2075 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2080 if (GET_CODE (reg) != REG)
2083 regno = REGNO (reg);
2085 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2087 /* Global registers are assumed live. */
2092 if (regno < FIRST_PSEUDO_REGISTER)
2094 /* Check for hard registers. */
2095 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2098 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2100 int b = candidate_table[src].split_bbs.first_member[i];
2102 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2112 /* Check for psuedo registers. */
2113 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2115 int b = candidate_table[src].split_bbs.first_member[i];
2117 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2129 /* If x is a set of a register R, mark that R is alive in the beginning
2130 of every update-block of src. */
2133 update_live_1 (src, x)
2139 register rtx reg = SET_DEST (x);
2144 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2145 || GET_CODE (reg) == SIGN_EXTRACT
2146 || GET_CODE (reg) == STRICT_LOW_PART)
2147 reg = XEXP (reg, 0);
2149 if (GET_CODE (reg) == PARALLEL
2150 && GET_MODE (reg) == BLKmode)
2153 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2154 update_live_1 (src, XVECEXP (reg, 0, i));
2158 if (GET_CODE (reg) != REG)
2161 /* Global registers are always live, so the code below does not apply
2164 regno = REGNO (reg);
2166 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2168 if (regno < FIRST_PSEUDO_REGISTER)
2170 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2173 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2175 int b = candidate_table[src].update_bbs.first_member[i];
2177 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2184 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2186 int b = candidate_table[src].update_bbs.first_member[i];
2188 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2195 /* Return 1 if insn can be speculatively moved from block src to trg,
2196 otherwise return 0. Called before first insertion of insn to
2197 ready-list or before the scheduling. */
2200 check_live (insn, src)
2204 /* Find the registers set by instruction. */
2205 if (GET_CODE (PATTERN (insn)) == SET
2206 || GET_CODE (PATTERN (insn)) == CLOBBER)
2207 return check_live_1 (src, PATTERN (insn));
2208 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2211 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2212 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2213 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2214 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2224 /* Update the live registers info after insn was moved speculatively from
2225 block src to trg. */
2228 update_live (insn, src)
2232 /* Find the registers set by instruction. */
2233 if (GET_CODE (PATTERN (insn)) == SET
2234 || GET_CODE (PATTERN (insn)) == CLOBBER)
2235 update_live_1 (src, PATTERN (insn));
2236 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2239 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2240 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2241 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2242 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2246 /* Exception Free Loads:
2248 We define five classes of speculative loads: IFREE, IRISKY,
2249 PFREE, PRISKY, and MFREE.
2251 IFREE loads are loads that are proved to be exception-free, just
2252 by examining the load insn. Examples for such loads are loads
2253 from TOC and loads of global data.
2255 IRISKY loads are loads that are proved to be exception-risky,
2256 just by examining the load insn. Examples for such loads are
2257 volatile loads and loads from shared memory.
2259 PFREE loads are loads for which we can prove, by examining other
2260 insns, that they are exception-free. Currently, this class consists
2261 of loads for which we are able to find a "similar load", either in
2262 the target block, or, if only one split-block exists, in that split
2263 block. Load2 is similar to load1 if both have same single base
2264 register. We identify only part of the similar loads, by finding
2265 an insn upon which both load1 and load2 have a DEF-USE dependence.
2267 PRISKY loads are loads for which we can prove, by examining other
2268 insns, that they are exception-risky. Currently we have two proofs for
2269 such loads. The first proof detects loads that are probably guarded by a
2270 test on the memory address. This proof is based on the
2271 backward and forward data dependence information for the region.
2272 Let load-insn be the examined load.
2273 Load-insn is PRISKY iff ALL the following hold:
2275 - insn1 is not in the same block as load-insn
2276 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2277 - test-insn is either a compare or a branch, not in the same block
2279 - load-insn is reachable from test-insn
2280 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2282 This proof might fail when the compare and the load are fed
2283 by an insn not in the region. To solve this, we will add to this
2284 group all loads that have no input DEF-USE dependence.
2286 The second proof detects loads that are directly or indirectly
2287 fed by a speculative load. This proof is affected by the
2288 scheduling process. We will use the flag fed_by_spec_load.
2289 Initially, all insns have this flag reset. After a speculative
2290 motion of an insn, if insn is either a load, or marked as
2291 fed_by_spec_load, we will also mark as fed_by_spec_load every
2292 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2293 load which is fed_by_spec_load is also PRISKY.
2295 MFREE (maybe-free) loads are all the remaining loads. They may be
2296 exception-free, but we cannot prove it.
2298 Now, all loads in IFREE and PFREE classes are considered
2299 exception-free, while all loads in IRISKY and PRISKY classes are
2300 considered exception-risky. As for loads in the MFREE class,
2301 these are considered either exception-free or exception-risky,
2302 depending on whether we are pessimistic or optimistic. We have
2303 to take the pessimistic approach to assure the safety of
2304 speculative scheduling, but we can take the optimistic approach
2305 by invoking the -fsched_spec_load_dangerous option. */
2307 enum INSN_TRAP_CLASS
2309 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2310 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2313 #define WORST_CLASS(class1, class2) \
2314 ((class1 > class2) ? class1 : class2)
2316 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2317 #define IS_REACHABLE(bb_from, bb_to) \
2319 || IS_RGN_ENTRY (bb_from) \
2320 || (bitset_member (ancestor_edges[bb_to], \
2321 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2324 /* Non-zero iff the address is comprised from at most 1 register. */
2325 #define CONST_BASED_ADDRESS_P(x) \
2326 (GET_CODE (x) == REG \
2327 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2328 || (GET_CODE (x) == LO_SUM)) \
2329 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2330 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2332 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2335 set_spec_fed (load_insn)
2340 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2341 if (GET_MODE (link) == VOIDmode)
2342 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2343 } /* set_spec_fed */
2345 /* On the path from the insn to load_insn_bb, find a conditional
2346 branch depending on insn, that guards the speculative load. */
2349 find_conditional_protection (insn, load_insn_bb)
2355 /* Iterate through DEF-USE forward dependences. */
2356 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2358 rtx next = XEXP (link, 0);
2359 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
2360 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2361 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2362 && load_insn_bb != INSN_BB (next)
2363 && GET_MODE (link) == VOIDmode
2364 && (GET_CODE (next) == JUMP_INSN
2365 || find_conditional_protection (next, load_insn_bb)))
2369 } /* find_conditional_protection */
2371 /* Returns 1 if the same insn1 that participates in the computation
2372 of load_insn's address is feeding a conditional branch that is
2373 guarding on load_insn. This is true if we find a the two DEF-USE
2375 insn1 -> ... -> conditional-branch
2376 insn1 -> ... -> load_insn,
2377 and if a flow path exist:
2378 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2379 and if insn1 is on the path
2380 region-entry -> ... -> bb_trg -> ... load_insn.
2382 Locate insn1 by climbing on LOG_LINKS from load_insn.
2383 Locate the branch by following INSN_DEPEND from insn1. */
2386 is_conditionally_protected (load_insn, bb_src, bb_trg)
2392 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2394 rtx insn1 = XEXP (link, 0);
2396 /* Must be a DEF-USE dependence upon non-branch. */
2397 if (GET_MODE (link) != VOIDmode
2398 || GET_CODE (insn1) == JUMP_INSN)
2401 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2402 if (INSN_BB (insn1) == bb_src
2403 || (CONTAINING_RGN (BLOCK_NUM (insn1))
2404 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2405 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2406 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2409 /* Now search for the conditional-branch. */
2410 if (find_conditional_protection (insn1, bb_src))
2413 /* Recursive step: search another insn1, "above" current insn1. */
2414 return is_conditionally_protected (insn1, bb_src, bb_trg);
2417 /* The chain does not exist. */
2419 } /* is_conditionally_protected */
2421 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2422 load_insn can move speculatively from bb_src to bb_trg. All the
2423 following must hold:
2425 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2426 (2) load_insn and load1 have a def-use dependence upon
2427 the same insn 'insn1'.
2428 (3) either load2 is in bb_trg, or:
2429 - there's only one split-block, and
2430 - load1 is on the escape path, and
2432 From all these we can conclude that the two loads access memory
2433 addresses that differ at most by a constant, and hence if moving
2434 load_insn would cause an exception, it would have been caused by
2438 is_pfree (load_insn, bb_src, bb_trg)
2443 register candidate *candp = candidate_table + bb_src;
2445 if (candp->split_bbs.nr_members != 1)
2446 /* Must have exactly one escape block. */
2449 for (back_link = LOG_LINKS (load_insn);
2450 back_link; back_link = XEXP (back_link, 1))
2452 rtx insn1 = XEXP (back_link, 0);
2454 if (GET_MODE (back_link) == VOIDmode)
2456 /* Found a DEF-USE dependence (insn1, load_insn). */
2459 for (fore_link = INSN_DEPEND (insn1);
2460 fore_link; fore_link = XEXP (fore_link, 1))
2462 rtx insn2 = XEXP (fore_link, 0);
2463 if (GET_MODE (fore_link) == VOIDmode)
2465 /* Found a DEF-USE dependence (insn1, insn2). */
2466 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2467 /* insn2 not guaranteed to be a 1 base reg load. */
2470 if (INSN_BB (insn2) == bb_trg)
2471 /* insn2 is the similar load, in the target block. */
2474 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
2475 /* insn2 is a similar load, in a split-block. */
2482 /* Couldn't find a similar load. */
2486 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2487 as found by analyzing insn's expression. */
2490 may_trap_exp (x, is_store)
2498 code = GET_CODE (x);
2508 /* The insn uses memory: a volatile load. */
2509 if (MEM_VOLATILE_P (x))
2511 /* An exception-free load. */
2512 if (!may_trap_p (x))
2514 /* A load with 1 base register, to be further checked. */
2515 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2516 return PFREE_CANDIDATE;
2517 /* No info on the load, to be further checked. */
2518 return PRISKY_CANDIDATE;
2523 int i, insn_class = TRAP_FREE;
2525 /* Neither store nor load, check if it may cause a trap. */
2528 /* Recursive step: walk the insn... */
2529 fmt = GET_RTX_FORMAT (code);
2530 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2534 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2535 insn_class = WORST_CLASS (insn_class, tmp_class);
2537 else if (fmt[i] == 'E')
2540 for (j = 0; j < XVECLEN (x, i); j++)
2542 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2543 insn_class = WORST_CLASS (insn_class, tmp_class);
2544 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2548 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2553 } /* may_trap_exp */
2556 /* Classifies insn for the purpose of verifying that it can be
2557 moved speculatively, by examining it's patterns, returning:
2558 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2559 TRAP_FREE: non-load insn.
2560 IFREE: load from a globaly safe location.
2561 IRISKY: volatile load.
2562 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2563 being either PFREE or PRISKY. */
2566 haifa_classify_insn (insn)
2569 rtx pat = PATTERN (insn);
2570 int tmp_class = TRAP_FREE;
2571 int insn_class = TRAP_FREE;
2574 if (GET_CODE (pat) == PARALLEL)
2576 int i, len = XVECLEN (pat, 0);
2578 for (i = len - 1; i >= 0; i--)
2580 code = GET_CODE (XVECEXP (pat, 0, i));
2584 /* Test if it is a 'store'. */
2585 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2588 /* Test if it is a store. */
2589 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2590 if (tmp_class == TRAP_RISKY)
2592 /* Test if it is a load. */
2594 WORST_CLASS (tmp_class,
2595 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2598 tmp_class = TRAP_RISKY;
2602 insn_class = WORST_CLASS (insn_class, tmp_class);
2603 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2609 code = GET_CODE (pat);
2613 /* Test if it is a 'store'. */
2614 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2617 /* Test if it is a store. */
2618 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2619 if (tmp_class == TRAP_RISKY)
2621 /* Test if it is a load. */
2623 WORST_CLASS (tmp_class,
2624 may_trap_exp (SET_SRC (pat), 0));
2627 tmp_class = TRAP_RISKY;
2631 insn_class = tmp_class;
2636 } /* haifa_classify_insn */
2638 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2639 a load moved speculatively, or if load_insn is protected by
2640 a compare on load_insn's address). */
2643 is_prisky (load_insn, bb_src, bb_trg)
2647 if (FED_BY_SPEC_LOAD (load_insn))
2650 if (LOG_LINKS (load_insn) == NULL)
2651 /* Dependence may 'hide' out of the region. */
2654 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2660 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2661 Return 1 if insn is exception-free (and the motion is valid)
2665 is_exception_free (insn, bb_src, bb_trg)
2669 int insn_class = haifa_classify_insn (insn);
2671 /* Handle non-load insns. */
2682 if (!flag_schedule_speculative_load)
2684 IS_LOAD_INSN (insn) = 1;
2691 case PFREE_CANDIDATE:
2692 if (is_pfree (insn, bb_src, bb_trg))
2694 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2695 case PRISKY_CANDIDATE:
2696 if (!flag_schedule_speculative_load_dangerous
2697 || is_prisky (insn, bb_src, bb_trg))
2703 return flag_schedule_speculative_load_dangerous;
2704 } /* is_exception_free */
2707 /* Process an insn's memory dependencies. There are four kinds of
2710 (0) read dependence: read follows read
2711 (1) true dependence: read follows write
2712 (2) anti dependence: write follows read
2713 (3) output dependence: write follows write
2715 We are careful to build only dependencies which actually exist, and
2716 use transitivity to avoid building too many links. */
2718 /* Return the INSN_LIST containing INSN in LIST, or NULL
2719 if LIST does not contain INSN. */
2721 HAIFA_INLINE static rtx
2722 find_insn_list (insn, list)
2728 if (XEXP (list, 0) == insn)
2730 list = XEXP (list, 1);
2736 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2739 HAIFA_INLINE static char
2740 find_insn_mem_list (insn, x, list, list1)
2746 if (XEXP (list, 0) == insn
2747 && XEXP (list1, 0) == x)
2749 list = XEXP (list, 1);
2750 list1 = XEXP (list1, 1);
2756 /* Compute the function units used by INSN. This caches the value
2757 returned by function_units_used. A function unit is encoded as the
2758 unit number if the value is non-negative and the compliment of a
2759 mask if the value is negative. A function unit index is the
2760 non-negative encoding. */
2762 HAIFA_INLINE static int
2766 register int unit = INSN_UNIT (insn);
2770 recog_memoized (insn);
2772 /* A USE insn, or something else we don't need to understand.
2773 We can't pass these directly to function_units_used because it will
2774 trigger a fatal error for unrecognizable insns. */
2775 if (INSN_CODE (insn) < 0)
2779 unit = function_units_used (insn);
2780 /* Increment non-negative values so we can cache zero. */
2784 /* We only cache 16 bits of the result, so if the value is out of
2785 range, don't cache it. */
2786 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2788 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2789 INSN_UNIT (insn) = unit;
2791 return (unit > 0 ? unit - 1 : unit);
2794 /* Compute the blockage range for executing INSN on UNIT. This caches
2795 the value returned by the blockage_range_function for the unit.
2796 These values are encoded in an int where the upper half gives the
2797 minimum value and the lower half gives the maximum value. */
2799 HAIFA_INLINE static unsigned int
2800 blockage_range (unit, insn)
2804 unsigned int blockage = INSN_BLOCKAGE (insn);
2807 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2809 range = function_units[unit].blockage_range_function (insn);
2810 /* We only cache the blockage range for one unit and then only if
2812 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2813 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2816 range = BLOCKAGE_RANGE (blockage);
2821 /* A vector indexed by function unit instance giving the last insn to use
2822 the unit. The value of the function unit instance index for unit U
2823 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2824 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2826 /* A vector indexed by function unit instance giving the minimum time when
2827 the unit will unblock based on the maximum blockage cost. */
2828 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2830 /* A vector indexed by function unit number giving the number of insns
2831 that remain to use the unit. */
2832 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2834 /* Reset the function unit state to the null state. */
2839 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2840 bzero ((char *) unit_tick, sizeof (unit_tick));
2841 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2844 /* Return the issue-delay of an insn. */
2846 HAIFA_INLINE static int
2847 insn_issue_delay (insn)
2851 int unit = insn_unit (insn);
2853 /* Efficiency note: in fact, we are working 'hard' to compute a
2854 value that was available in md file, and is not available in
2855 function_units[] structure. It would be nice to have this
2856 value there, too. */
2859 if (function_units[unit].blockage_range_function &&
2860 function_units[unit].blockage_function)
2861 delay = function_units[unit].blockage_function (insn, insn);
2864 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2865 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2866 && function_units[i].blockage_function)
2867 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2872 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2873 instance INSTANCE at time CLOCK if the previous actual hazard cost
2876 HAIFA_INLINE static int
2877 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2878 int unit, instance, clock, cost;
2881 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2883 if (tick - clock > cost)
2885 /* The scheduler is operating forward, so unit's last insn is the
2886 executing insn and INSN is the candidate insn. We want a
2887 more exact measure of the blockage if we execute INSN at CLOCK
2888 given when we committed the execution of the unit's last insn.
2890 The blockage value is given by either the unit's max blockage
2891 constant, blockage range function, or blockage function. Use
2892 the most exact form for the given unit. */
2894 if (function_units[unit].blockage_range_function)
2896 if (function_units[unit].blockage_function)
2897 tick += (function_units[unit].blockage_function
2898 (unit_last_insn[instance], insn)
2899 - function_units[unit].max_blockage);
2901 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2902 - function_units[unit].max_blockage);
2904 if (tick - clock > cost)
2905 cost = tick - clock;
2910 /* Record INSN as having begun execution on the units encoded by UNIT at
2913 HAIFA_INLINE static void
2914 schedule_unit (unit, insn, clock)
2922 int instance = unit;
2923 #if MAX_MULTIPLICITY > 1
2924 /* Find the first free instance of the function unit and use that
2925 one. We assume that one is free. */
2926 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2928 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2930 instance += FUNCTION_UNITS_SIZE;
2933 unit_last_insn[instance] = insn;
2934 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2937 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2938 if ((unit & 1) != 0)
2939 schedule_unit (i, insn, clock);
2942 /* Return the actual hazard cost of executing INSN on the units encoded by
2943 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2945 HAIFA_INLINE static int
2946 actual_hazard (unit, insn, clock, cost)
2947 int unit, clock, cost;
2954 /* Find the instance of the function unit with the minimum hazard. */
2955 int instance = unit;
2956 int best_cost = actual_hazard_this_instance (unit, instance, insn,
2958 #if MAX_MULTIPLICITY > 1
2961 if (best_cost > cost)
2963 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2965 instance += FUNCTION_UNITS_SIZE;
2966 this_cost = actual_hazard_this_instance (unit, instance, insn,
2968 if (this_cost < best_cost)
2970 best_cost = this_cost;
2971 if (this_cost <= cost)
2977 cost = MAX (cost, best_cost);
2980 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2981 if ((unit & 1) != 0)
2982 cost = actual_hazard (i, insn, clock, cost);
2987 /* Return the potential hazard cost of executing an instruction on the
2988 units encoded by UNIT if the previous potential hazard cost was COST.
2989 An insn with a large blockage time is chosen in preference to one
2990 with a smaller time; an insn that uses a unit that is more likely
2991 to be used is chosen in preference to one with a unit that is less
2992 used. We are trying to minimize a subsequent actual hazard. */
2994 HAIFA_INLINE static int
2995 potential_hazard (unit, insn, cost)
3000 unsigned int minb, maxb;
3004 minb = maxb = function_units[unit].max_blockage;
3007 if (function_units[unit].blockage_range_function)
3009 maxb = minb = blockage_range (unit, insn);
3010 maxb = MAX_BLOCKAGE_COST (maxb);
3011 minb = MIN_BLOCKAGE_COST (minb);
3016 /* Make the number of instructions left dominate. Make the
3017 minimum delay dominate the maximum delay. If all these
3018 are the same, use the unit number to add an arbitrary
3019 ordering. Other terms can be added. */
3020 ncost = minb * 0x40 + maxb;
3021 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
3028 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3029 if ((unit & 1) != 0)
3030 cost = potential_hazard (i, insn, cost);
3035 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3036 This is the number of cycles between instruction issue and
3037 instruction results. */
3039 HAIFA_INLINE static int
3040 insn_cost (insn, link, used)
3041 rtx insn, link, used;
3043 register int cost = INSN_COST (insn);
3047 recog_memoized (insn);
3049 /* A USE insn, or something else we don't need to understand.
3050 We can't pass these directly to result_ready_cost because it will
3051 trigger a fatal error for unrecognizable insns. */
3052 if (INSN_CODE (insn) < 0)
3054 INSN_COST (insn) = 1;
3059 cost = result_ready_cost (insn);
3064 INSN_COST (insn) = cost;
3068 /* In this case estimate cost without caring how insn is used. */
3069 if (link == 0 && used == 0)
3072 /* A USE insn should never require the value used to be computed. This
3073 allows the computation of a function's result and parameter values to
3074 overlap the return and call. */
3075 recog_memoized (used);
3076 if (INSN_CODE (used) < 0)
3077 LINK_COST_FREE (link) = 1;
3079 /* If some dependencies vary the cost, compute the adjustment. Most
3080 commonly, the adjustment is complete: either the cost is ignored
3081 (in the case of an output- or anti-dependence), or the cost is
3082 unchanged. These values are cached in the link as LINK_COST_FREE
3083 and LINK_COST_ZERO. */
3085 if (LINK_COST_FREE (link))
3088 else if (!LINK_COST_ZERO (link))
3092 ADJUST_COST (used, link, insn, ncost);
3095 LINK_COST_FREE (link) = 1;
3099 LINK_COST_ZERO (link) = 1;
3106 /* Compute the priority number for INSN. */
3115 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3118 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3120 if (INSN_DEPEND (insn) == 0)
3121 this_priority = insn_cost (insn, 0, 0);
3123 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3128 if (RTX_INTEGRATED_P (link))
3131 next = XEXP (link, 0);
3133 /* Critical path is meaningful in block boundaries only. */
3134 if (BLOCK_NUM (next) != BLOCK_NUM (insn))
3137 next_priority = insn_cost (insn, link, next) + priority (next);
3138 if (next_priority > this_priority)
3139 this_priority = next_priority;
3141 INSN_PRIORITY (insn) = this_priority;
3143 return this_priority;
3147 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3148 them to the unused_*_list variables, so that they can be reused. */
3151 free_pending_lists ()
3153 if (current_nr_blocks <= 1)
3155 free_INSN_LIST_list (&pending_read_insns);
3156 free_INSN_LIST_list (&pending_write_insns);
3157 free_EXPR_LIST_list (&pending_read_mems);
3158 free_EXPR_LIST_list (&pending_write_mems);
3162 /* Interblock scheduling. */
3165 for (bb = 0; bb < current_nr_blocks; bb++)
3167 free_INSN_LIST_list (&bb_pending_read_insns[bb]);
3168 free_INSN_LIST_list (&bb_pending_write_insns[bb]);
3169 free_EXPR_LIST_list (&bb_pending_read_mems[bb]);
3170 free_EXPR_LIST_list (&bb_pending_write_mems[bb]);
3175 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3176 The MEM is a memory reference contained within INSN, which we are saving
3177 so that we can do memory aliasing on it. */
3180 add_insn_mem_dependence (insn_list, mem_list, insn, mem)
3181 rtx *insn_list, *mem_list, insn, mem;
3185 link = alloc_INSN_LIST (insn, *insn_list);
3188 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3191 pending_lists_length++;
3195 /* Make a dependency between every memory reference on the pending lists
3196 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3200 flush_pending_lists (insn, only_write)
3207 while (pending_read_insns && ! only_write)
3209 add_dependence (insn, XEXP (pending_read_insns, 0), REG_DEP_ANTI);
3211 link = pending_read_insns;
3212 pending_read_insns = XEXP (pending_read_insns, 1);
3213 free_INSN_LIST_node (link);
3215 link = pending_read_mems;
3216 pending_read_mems = XEXP (pending_read_mems, 1);
3217 free_EXPR_LIST_node (link);
3219 while (pending_write_insns)
3221 add_dependence (insn, XEXP (pending_write_insns, 0), REG_DEP_ANTI);
3223 link = pending_write_insns;
3224 pending_write_insns = XEXP (pending_write_insns, 1);
3225 free_INSN_LIST_node (link);
3227 link = pending_write_mems;
3228 pending_write_mems = XEXP (pending_write_mems, 1);
3229 free_EXPR_LIST_node (link);
3231 pending_lists_length = 0;
3233 /* last_pending_memory_flush is now a list of insns. */
3234 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3235 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3237 free_INSN_LIST_list (&last_pending_memory_flush);
3238 last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3241 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3242 rtx, X, creating all dependencies generated by the write to the
3243 destination of X, and reads of everything mentioned. */
3246 sched_analyze_1 (x, insn)
3251 register rtx dest = XEXP (x, 0);
3252 enum rtx_code code = GET_CODE (x);
3257 if (GET_CODE (dest) == PARALLEL
3258 && GET_MODE (dest) == BLKmode)
3261 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3262 sched_analyze_1 (XVECEXP (dest, 0, i), insn);
3263 if (GET_CODE (x) == SET)
3264 sched_analyze_2 (SET_SRC (x), insn);
3268 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3269 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3271 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3273 /* The second and third arguments are values read by this insn. */
3274 sched_analyze_2 (XEXP (dest, 1), insn);
3275 sched_analyze_2 (XEXP (dest, 2), insn);
3277 dest = XEXP (dest, 0);
3280 if (GET_CODE (dest) == REG)
3284 regno = REGNO (dest);
3286 /* A hard reg in a wide mode may really be multiple registers.
3287 If so, mark all of them just like the first. */
3288 if (regno < FIRST_PSEUDO_REGISTER)
3290 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3295 for (u = reg_last_uses[regno + i]; u; u = XEXP (u, 1))
3296 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3298 for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
3299 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3301 /* Clobbers need not be ordered with respect to one
3302 another, but sets must be ordered with respect to a
3306 free_INSN_LIST_list (®_last_uses[regno + i]);
3307 for (u = reg_last_clobbers[regno + i]; u; u = XEXP (u, 1))
3308 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3309 SET_REGNO_REG_SET (reg_pending_sets, regno + i);
3312 SET_REGNO_REG_SET (reg_pending_clobbers, regno + i);
3314 /* Function calls clobber all call_used regs. */
3315 if (global_regs[regno + i]
3316 || (code == SET && call_used_regs[regno + i]))
3317 for (u = last_function_call; u; u = XEXP (u, 1))
3318 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3325 for (u = reg_last_uses[regno]; u; u = XEXP (u, 1))
3326 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3328 for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
3329 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3333 free_INSN_LIST_list (®_last_uses[regno]);
3334 for (u = reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3335 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3336 SET_REGNO_REG_SET (reg_pending_sets, regno);
3339 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3341 /* Pseudos that are REG_EQUIV to something may be replaced
3342 by that during reloading. We need only add dependencies for
3343 the address in the REG_EQUIV note. */
3344 if (!reload_completed
3345 && reg_known_equiv_p[regno]
3346 && GET_CODE (reg_known_value[regno]) == MEM)
3347 sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
3349 /* Don't let it cross a call after scheduling if it doesn't
3350 already cross one. */
3352 if (REG_N_CALLS_CROSSED (regno) == 0)
3353 for (u = last_function_call; u; u = XEXP (u, 1))
3354 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3357 else if (GET_CODE (dest) == MEM)
3359 /* Writing memory. */
3361 if (pending_lists_length > 32)
3363 /* Flush all pending reads and writes to prevent the pending lists
3364 from getting any larger. Insn scheduling runs too slowly when
3365 these lists get long. The number 32 was chosen because it
3366 seems like a reasonable number. When compiling GCC with itself,
3367 this flush occurs 8 times for sparc, and 10 times for m88k using
3369 flush_pending_lists (insn, 0);
3374 rtx pending, pending_mem;
3376 pending = pending_read_insns;
3377 pending_mem = pending_read_mems;
3380 if (anti_dependence (XEXP (pending_mem, 0), dest))
3381 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3383 pending = XEXP (pending, 1);
3384 pending_mem = XEXP (pending_mem, 1);
3387 pending = pending_write_insns;
3388 pending_mem = pending_write_mems;
3391 if (output_dependence (XEXP (pending_mem, 0), dest))
3392 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3394 pending = XEXP (pending, 1);
3395 pending_mem = XEXP (pending_mem, 1);
3398 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3399 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3401 add_insn_mem_dependence (&pending_write_insns, &pending_write_mems,
3404 sched_analyze_2 (XEXP (dest, 0), insn);
3407 /* Analyze reads. */
3408 if (GET_CODE (x) == SET)
3409 sched_analyze_2 (SET_SRC (x), insn);
3412 /* Analyze the uses of memory and registers in rtx X in INSN. */
3415 sched_analyze_2 (x, insn)
3421 register enum rtx_code code;
3422 register const char *fmt;
3427 code = GET_CODE (x);
3436 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3437 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3438 this does not mean that this insn is using cc0. */
3446 /* User of CC0 depends on immediately preceding insn. */
3447 SCHED_GROUP_P (insn) = 1;
3449 /* There may be a note before this insn now, but all notes will
3450 be removed before we actually try to schedule the insns, so
3451 it won't cause a problem later. We must avoid it here though. */
3452 prev = prev_nonnote_insn (insn);
3454 /* Make a copy of all dependencies on the immediately previous insn,
3455 and add to this insn. This is so that all the dependencies will
3456 apply to the group. Remove an explicit dependence on this insn
3457 as SCHED_GROUP_P now represents it. */
3459 if (find_insn_list (prev, LOG_LINKS (insn)))
3460 remove_dependence (insn, prev);
3462 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
3463 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3472 int regno = REGNO (x);
3473 if (regno < FIRST_PSEUDO_REGISTER)
3477 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3480 reg_last_uses[regno + i]
3481 = alloc_INSN_LIST (insn, reg_last_uses[regno + i]);
3483 for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
3484 add_dependence (insn, XEXP (u, 0), 0);
3486 /* ??? This should never happen. */
3487 for (u = reg_last_clobbers[regno + i]; u; u = XEXP (u, 1))
3488 add_dependence (insn, XEXP (u, 0), 0);
3490 if ((call_used_regs[regno + i] || global_regs[regno + i]))
3491 /* Function calls clobber all call_used regs. */
3492 for (u = last_function_call; u; u = XEXP (u, 1))
3493 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3498 reg_last_uses[regno] = alloc_INSN_LIST (insn,
3499 reg_last_uses[regno]);
3501 for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
3502 add_dependence (insn, XEXP (u, 0), 0);
3504 /* ??? This should never happen. */
3505 for (u = reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3506 add_dependence (insn, XEXP (u, 0), 0);
3508 /* Pseudos that are REG_EQUIV to something may be replaced
3509 by that during reloading. We need only add dependencies for
3510 the address in the REG_EQUIV note. */
3511 if (!reload_completed
3512 && reg_known_equiv_p[regno]
3513 && GET_CODE (reg_known_value[regno]) == MEM)
3514 sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
3516 /* If the register does not already cross any calls, then add this
3517 insn to the sched_before_next_call list so that it will still
3518 not cross calls after scheduling. */
3519 if (REG_N_CALLS_CROSSED (regno) == 0)
3520 add_dependence (sched_before_next_call, insn, REG_DEP_ANTI);
3527 /* Reading memory. */
3529 rtx pending, pending_mem;
3531 pending = pending_read_insns;
3532 pending_mem = pending_read_mems;
3535 if (read_dependence (XEXP (pending_mem, 0), x))
3536 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3538 pending = XEXP (pending, 1);
3539 pending_mem = XEXP (pending_mem, 1);
3542 pending = pending_write_insns;
3543 pending_mem = pending_write_mems;
3546 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3548 add_dependence (insn, XEXP (pending, 0), 0);
3550 pending = XEXP (pending, 1);
3551 pending_mem = XEXP (pending_mem, 1);
3554 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
3555 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3557 /* Always add these dependencies to pending_reads, since
3558 this insn may be followed by a write. */
3559 add_insn_mem_dependence (&pending_read_insns, &pending_read_mems,
3562 /* Take advantage of tail recursion here. */
3563 sched_analyze_2 (XEXP (x, 0), insn);
3567 /* Force pending stores to memory in case a trap handler needs them. */
3569 flush_pending_lists (insn, 1);
3574 case UNSPEC_VOLATILE:
3578 /* Traditional and volatile asm instructions must be considered to use
3579 and clobber all hard registers, all pseudo-registers and all of
3580 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3582 Consider for instance a volatile asm that changes the fpu rounding
3583 mode. An insn should not be moved across this even if it only uses
3584 pseudo-regs because it might give an incorrectly rounded result. */
3585 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3587 int max_reg = max_reg_num ();
3588 for (i = 0; i < max_reg; i++)
3590 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3591 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3592 free_INSN_LIST_list (®_last_uses[i]);
3594 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3595 add_dependence (insn, XEXP (u, 0), 0);
3597 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3598 add_dependence (insn, XEXP (u, 0), 0);
3600 reg_pending_sets_all = 1;
3602 flush_pending_lists (insn, 0);
3605 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3606 We can not just fall through here since then we would be confused
3607 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3608 traditional asms unlike their normal usage. */
3610 if (code == ASM_OPERANDS)
3612 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3613 sched_analyze_2 (ASM_OPERANDS_INPUT (x, j), insn);
3623 /* These both read and modify the result. We must handle them as writes
3624 to get proper dependencies for following instructions. We must handle
3625 them as reads to get proper dependencies from this to previous
3626 instructions. Thus we need to pass them to both sched_analyze_1
3627 and sched_analyze_2. We must call sched_analyze_2 first in order
3628 to get the proper antecedent for the read. */
3629 sched_analyze_2 (XEXP (x, 0), insn);
3630 sched_analyze_1 (x, insn);
3637 /* Other cases: walk the insn. */
3638 fmt = GET_RTX_FORMAT (code);
3639 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3642 sched_analyze_2 (XEXP (x, i), insn);
3643 else if (fmt[i] == 'E')
3644 for (j = 0; j < XVECLEN (x, i); j++)
3645 sched_analyze_2 (XVECEXP (x, i, j), insn);
3649 /* Analyze an INSN with pattern X to find all dependencies. */
3652 sched_analyze_insn (x, insn, loop_notes)
3656 register RTX_CODE code = GET_CODE (x);
3658 int maxreg = max_reg_num ();
3661 if (code == SET || code == CLOBBER)
3662 sched_analyze_1 (x, insn);
3663 else if (code == PARALLEL)
3666 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3668 code = GET_CODE (XVECEXP (x, 0, i));
3669 if (code == SET || code == CLOBBER)
3670 sched_analyze_1 (XVECEXP (x, 0, i), insn);
3672 sched_analyze_2 (XVECEXP (x, 0, i), insn);
3676 sched_analyze_2 (x, insn);
3678 /* Mark registers CLOBBERED or used by called function. */
3679 if (GET_CODE (insn) == CALL_INSN)
3680 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3682 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3683 sched_analyze_1 (XEXP (link, 0), insn);
3685 sched_analyze_2 (XEXP (link, 0), insn);
3688 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3689 block, then we must be sure that no instructions are scheduled across it.
3690 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3691 become incorrect. */
3695 int max_reg = max_reg_num ();
3696 int schedule_barrier_found = 0;
3699 /* Update loop_notes with any notes from this insn. Also determine
3700 if any of the notes on the list correspond to instruction scheduling
3701 barriers (loop, eh & setjmp notes, but not range notes. */
3703 while (XEXP (link, 1))
3705 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3706 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3707 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3708 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3709 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3710 schedule_barrier_found = 1;
3712 link = XEXP (link, 1);
3714 XEXP (link, 1) = REG_NOTES (insn);
3715 REG_NOTES (insn) = loop_notes;
3717 /* Add dependencies if a scheduling barrier was found. */
3718 if (schedule_barrier_found)
3720 for (i = 0; i < max_reg; i++)
3723 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3724 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3725 free_INSN_LIST_list (®_last_uses[i]);
3727 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3728 add_dependence (insn, XEXP (u, 0), 0);
3730 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3731 add_dependence (insn, XEXP (u, 0), 0);
3733 reg_pending_sets_all = 1;
3735 flush_pending_lists (insn, 0);
3740 /* Accumulate clobbers until the next set so that it will be output dependent
3741 on all of them. At the next set we can clear the clobber list, since
3742 subsequent sets will be output dependent on it. */
3743 EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i,
3745 free_INSN_LIST_list (®_last_sets[i]);
3746 free_INSN_LIST_list (®_last_clobbers[i]);
3748 = alloc_INSN_LIST (insn, NULL_RTX);
3750 EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i,
3752 reg_last_clobbers[i]
3753 = alloc_INSN_LIST (insn,
3754 reg_last_clobbers[i]);
3756 CLEAR_REG_SET (reg_pending_sets);
3757 CLEAR_REG_SET (reg_pending_clobbers);
3759 if (reg_pending_sets_all)
3761 for (i = 0; i < maxreg; i++)
3763 free_INSN_LIST_list (®_last_sets[i]);
3764 free_INSN_LIST_list (®_last_clobbers[i]);
3765 reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3768 reg_pending_sets_all = 0;
3771 /* Handle function calls and function returns created by the epilogue
3773 if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
3778 /* When scheduling instructions, we make sure calls don't lose their
3779 accompanying USE insns by depending them one on another in order.
3781 Also, we must do the same thing for returns created by the epilogue
3782 threading code. Note this code works only in this special case,
3783 because other passes make no guarantee that they will never emit
3784 an instruction between a USE and a RETURN. There is such a guarantee
3785 for USE instructions immediately before a call. */
3787 prev_dep_insn = insn;
3788 dep_insn = PREV_INSN (insn);
3789 while (GET_CODE (dep_insn) == INSN
3790 && GET_CODE (PATTERN (dep_insn)) == USE
3791 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG)
3793 SCHED_GROUP_P (prev_dep_insn) = 1;
3795 /* Make a copy of all dependencies on dep_insn, and add to insn.
3796 This is so that all of the dependencies will apply to the
3799 for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1))
3800 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3802 prev_dep_insn = dep_insn;
3803 dep_insn = PREV_INSN (dep_insn);
3808 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3809 for every dependency. */
3812 sched_analyze (head, tail)
3819 for (insn = head;; insn = NEXT_INSN (insn))
3821 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3823 /* Clear out the stale LOG_LINKS from flow. */
3824 free_INSN_LIST_list (&LOG_LINKS (insn));
3826 /* Make each JUMP_INSN a scheduling barrier for memory
3828 if (GET_CODE (insn) == JUMP_INSN)
3829 last_pending_memory_flush
3830 = alloc_INSN_LIST (insn, last_pending_memory_flush);
3831 sched_analyze_insn (PATTERN (insn), insn, loop_notes);
3834 else if (GET_CODE (insn) == CALL_INSN)
3839 CANT_MOVE (insn) = 1;
3841 /* Clear out the stale LOG_LINKS from flow. */
3842 free_INSN_LIST_list (&LOG_LINKS (insn));
3844 /* Any instruction using a hard register which may get clobbered
3845 by a call needs to be marked as dependent on this call.
3846 This prevents a use of a hard return reg from being moved
3847 past a void call (i.e. it does not explicitly set the hard
3850 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3851 all registers, not just hard registers, may be clobbered by this
3854 /* Insn, being a CALL_INSN, magically depends on
3855 `last_function_call' already. */
3857 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3858 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3860 int max_reg = max_reg_num ();
3861 for (i = 0; i < max_reg; i++)
3863 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3864 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3865 free_INSN_LIST_list (®_last_uses[i]);
3867 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3868 add_dependence (insn, XEXP (u, 0), 0);
3870 for (u = reg_last_clobbers[i]; u; u = XEXP (u, 1))
3871 add_dependence (insn, XEXP (u, 0), 0);
3873 reg_pending_sets_all = 1;
3875 /* Add a pair of REG_SAVE_NOTEs which we will later
3876 convert back into a NOTE_INSN_SETJMP note. See
3877 reemit_notes for why we use a pair of NOTEs. */
3878 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3881 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3882 GEN_INT (NOTE_INSN_SETJMP),
3887 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3888 if (call_used_regs[i] || global_regs[i])
3890 for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
3891 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3893 for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
3894 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3896 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3900 /* For each insn which shouldn't cross a call, add a dependence
3901 between that insn and this call insn. */
3902 x = LOG_LINKS (sched_before_next_call);
3905 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3908 free_INSN_LIST_list (&LOG_LINKS (sched_before_next_call));
3910 sched_analyze_insn (PATTERN (insn), insn, loop_notes);
3913 /* In the absence of interprocedural alias analysis, we must flush
3914 all pending reads and writes, and start new dependencies starting
3915 from here. But only flush writes for constant calls (which may
3916 be passed a pointer to something we haven't written yet). */
3917 flush_pending_lists (insn, CONST_CALL_P (insn));
3919 /* Depend this function call (actually, the user of this
3920 function call) on all hard register clobberage. */
3922 /* last_function_call is now a list of insns. */
3923 free_INSN_LIST_list(&last_function_call);
3924 last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
3927 /* See comments on reemit_notes as to why we do this.
3928 ??? Actually, the reemit_notes just say what is done, not why. */
3930 else if (GET_CODE (insn) == NOTE
3931 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_START
3932 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
3934 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
3936 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3937 GEN_INT (NOTE_LINE_NUMBER (insn)),
3940 else if (GET_CODE (insn) == NOTE
3941 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
3942 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
3943 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3944 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
3945 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
3946 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
3950 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3951 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
3952 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
3954 rtx_region = GEN_INT (0);
3956 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3959 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3960 GEN_INT (NOTE_LINE_NUMBER (insn)),
3962 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
3971 /* Macros and functions for keeping the priority queue sorted, and
3972 dealing with queueing and dequeueing of instructions. */
3974 #define SCHED_SORT(READY, N_READY) \
3975 do { if ((N_READY) == 2) \
3976 swap_sort (READY, N_READY); \
3977 else if ((N_READY) > 2) \
3978 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3981 /* Returns a positive value if x is preferred; returns a negative value if
3982 y is preferred. Should never return 0, since that will make the sort
3986 rank_for_schedule (x, y)
3990 rtx tmp = *(rtx *)y;
3991 rtx tmp2 = *(rtx *)x;
3993 int tmp_class, tmp2_class, depend_count1, depend_count2;
3994 int val, priority_val, spec_val, prob_val, weight_val;
3997 /* Prefer insn with higher priority. */
3998 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
4000 return priority_val;
4002 /* Prefer an insn with smaller contribution to registers-pressure. */
4003 if (!reload_completed &&
4004 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
4005 return (weight_val);
4007 /* Some comparison make sense in interblock scheduling only. */
4008 if (INSN_BB (tmp) != INSN_BB (tmp2))
4010 /* Prefer an inblock motion on an interblock motion. */
4011 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
4013 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
4016 /* Prefer a useful motion on a speculative one. */
4017 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
4020 /* Prefer a more probable (speculative) insn. */
4021 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
4026 /* Compare insns based on their relation to the last-scheduled-insn. */
4027 if (last_scheduled_insn)
4029 /* Classify the instructions into three classes:
4030 1) Data dependent on last schedule insn.
4031 2) Anti/Output dependent on last scheduled insn.
4032 3) Independent of last scheduled insn, or has latency of one.
4033 Choose the insn from the highest numbered class if different. */
4034 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4035 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4037 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4042 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4043 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4045 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4050 if ((val = tmp2_class - tmp_class))
4054 /* Prefer the insn which has more later insns that depend on it.
4055 This gives the scheduler more freedom when scheduling later
4056 instructions at the expense of added register pressure. */
4058 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4062 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4065 val = depend_count2 - depend_count1;
4069 /* If insns are equally good, sort by INSN_LUID (original insn order),
4070 so that we make the sort stable. This minimizes instruction movement,
4071 thus minimizing sched's effect on debugging and cross-jumping. */
4072 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4075 /* Resort the array A in which only element at index N may be out of order. */
4077 HAIFA_INLINE static void
4082 rtx insn = a[n - 1];
4085 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4093 static int max_priority;
4095 /* Add INSN to the insn queue so that it can be executed at least
4096 N_CYCLES after the currently executing insn. Preserve insns
4097 chain for debugging purposes. */
4099 HAIFA_INLINE static void
4100 queue_insn (insn, n_cycles)
4104 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4105 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4106 insn_queue[next_q] = link;
4109 if (sched_verbose >= 2)
4111 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4113 if (INSN_BB (insn) != target_bb)
4114 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4116 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4121 /* PREV is an insn that is ready to execute. Adjust its priority if that
4122 will help shorten or lengthen register lifetimes as appropriate. Also
4123 provide a hook for the target to tweek itself. */
4125 HAIFA_INLINE static void
4126 adjust_priority (prev)
4127 rtx prev ATTRIBUTE_UNUSED;
4129 /* ??? There used to be code here to try and estimate how an insn
4130 affected register lifetimes, but it did it by looking at REG_DEAD
4131 notes, which we removed in schedule_region. Nor did it try to
4132 take into account register pressure or anything useful like that.
4134 Revisit when we have a machine model to work with and not before. */
4136 #ifdef ADJUST_PRIORITY
4137 ADJUST_PRIORITY (prev);
4141 /* Clock at which the previous instruction was issued. */
4142 static int last_clock_var;
4144 /* INSN is the "currently executing insn". Launch each insn which was
4145 waiting on INSN. READY is a vector of insns which are ready to fire.
4146 N_READY is the number of elements in READY. CLOCK is the current
4150 schedule_insn (insn, ready, n_ready, clock)
4159 unit = insn_unit (insn);
4161 if (sched_verbose >= 2)
4163 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4165 insn_print_units (insn);
4166 fprintf (dump, "\n");
4169 if (sched_verbose && unit == -1)
4170 visualize_no_unit (insn);
4172 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4173 schedule_unit (unit, insn, clock);
4175 if (INSN_DEPEND (insn) == 0)
4178 /* This is used by the function adjust_priority above. */
4180 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4182 max_priority = INSN_PRIORITY (insn);
4184 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4186 rtx next = XEXP (link, 0);
4187 int cost = insn_cost (insn, link, next);
4189 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4191 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4193 int effective_cost = INSN_TICK (next) - clock;
4195 /* For speculative insns, before inserting to ready/queue,
4196 check live, exception-free, and issue-delay. */
4197 if (INSN_BB (next) != target_bb
4198 && (!IS_VALID (INSN_BB (next))
4200 || (IS_SPECULATIVE_INSN (next)
4201 && (insn_issue_delay (next) > 3
4202 || !check_live (next, INSN_BB (next))
4203 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4206 if (sched_verbose >= 2)
4208 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4211 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4212 fprintf (dump, "/b%d ", BLOCK_NUM (next));
4214 if (effective_cost < 1)
4215 fprintf (dump, "into ready\n");
4217 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4220 /* Adjust the priority of NEXT and either put it on the ready
4221 list or queue it. */
4222 adjust_priority (next);
4223 if (effective_cost < 1)
4224 ready[n_ready++] = next;
4226 queue_insn (next, effective_cost);
4230 /* Annotate the instruction with issue information -- TImode
4231 indicates that the instruction is expected not to be able
4232 to issue on the same cycle as the previous insn. A machine
4233 may use this information to decide how the instruction should
4235 if (reload_completed && issue_rate > 1)
4237 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4238 last_clock_var = clock;
4244 /* Functions for handling of notes. */
4246 /* Delete notes beginning with INSN and put them in the chain
4247 of notes ended by NOTE_LIST.
4248 Returns the insn following the notes. */
4251 unlink_other_notes (insn, tail)
4254 rtx prev = PREV_INSN (insn);
4256 while (insn != tail && GET_CODE (insn) == NOTE)
4258 rtx next = NEXT_INSN (insn);
4259 /* Delete the note from its current position. */
4261 NEXT_INSN (prev) = next;
4263 PREV_INSN (next) = prev;
4265 /* See sched_analyze to see how these are handled. */
4266 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4267 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4268 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4269 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_START
4270 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4271 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4272 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4274 /* Insert the note at the end of the notes list. */
4275 PREV_INSN (insn) = note_list;
4277 NEXT_INSN (note_list) = insn;
4286 /* Delete line notes beginning with INSN. Record line-number notes so
4287 they can be reused. Returns the insn following the notes. */
4290 unlink_line_notes (insn, tail)
4293 rtx prev = PREV_INSN (insn);
4295 while (insn != tail && GET_CODE (insn) == NOTE)
4297 rtx next = NEXT_INSN (insn);
4299 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4301 /* Delete the note from its current position. */
4303 NEXT_INSN (prev) = next;
4305 PREV_INSN (next) = prev;
4307 /* Record line-number notes so they can be reused. */
4308 LINE_NOTE (insn) = insn;
4318 /* Return the head and tail pointers of BB. */
4320 HAIFA_INLINE static void
4321 get_block_head_tail (b, headp, tailp)
4330 /* HEAD and TAIL delimit the basic block being scheduled. */
4331 head = BLOCK_HEAD (b);
4332 tail = BLOCK_END (b);
4334 /* Don't include any notes or labels at the beginning of the
4335 basic block, or notes at the ends of basic blocks. */
4336 while (head != tail)
4338 if (GET_CODE (head) == NOTE)
4339 head = NEXT_INSN (head);
4340 else if (GET_CODE (tail) == NOTE)
4341 tail = PREV_INSN (tail);
4342 else if (GET_CODE (head) == CODE_LABEL)
4343 head = NEXT_INSN (head);
4352 HAIFA_INLINE static void
4353 get_bb_head_tail (bb, headp, tailp)
4358 get_block_head_tail (BB_TO_BLOCK (bb), headp, tailp);
4361 /* Delete line notes from bb. Save them so they can be later restored
4362 (in restore_line_notes ()). */
4373 get_bb_head_tail (bb, &head, &tail);
4376 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4379 next_tail = NEXT_INSN (tail);
4380 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4384 /* Farm out notes, and maybe save them in NOTE_LIST.
4385 This is needed to keep the debugger from
4386 getting completely deranged. */
4387 if (GET_CODE (insn) == NOTE)
4390 insn = unlink_line_notes (insn, next_tail);
4396 if (insn == next_tail)
4402 /* Save line number notes for each insn in bb. */
4405 save_line_notes (bb)
4411 /* We must use the true line number for the first insn in the block
4412 that was computed and saved at the start of this pass. We can't
4413 use the current line number, because scheduling of the previous
4414 block may have changed the current line number. */
4416 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4419 get_bb_head_tail (bb, &head, &tail);
4420 next_tail = NEXT_INSN (tail);
4422 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4424 insn = NEXT_INSN (insn))
4425 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4428 LINE_NOTE (insn) = line;
4432 /* After bb was scheduled, insert line notes into the insns list. */
4435 restore_line_notes (bb)
4438 rtx line, note, prev, new;
4439 int added_notes = 0;
4441 rtx head, next_tail, insn;
4443 b = BB_TO_BLOCK (bb);
4445 head = BLOCK_HEAD (b);
4446 next_tail = NEXT_INSN (BLOCK_END (b));
4448 /* Determine the current line-number. We want to know the current
4449 line number of the first insn of the block here, in case it is
4450 different from the true line number that was saved earlier. If
4451 different, then we need a line number note before the first insn
4452 of this block. If it happens to be the same, then we don't want to
4453 emit another line number note here. */
4454 for (line = head; line; line = PREV_INSN (line))
4455 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4458 /* Walk the insns keeping track of the current line-number and inserting
4459 the line-number notes as needed. */
4460 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4461 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4463 /* This used to emit line number notes before every non-deleted note.
4464 However, this confuses a debugger, because line notes not separated
4465 by real instructions all end up at the same address. I can find no
4466 use for line number notes before other notes, so none are emitted. */
4467 else if (GET_CODE (insn) != NOTE
4468 && (note = LINE_NOTE (insn)) != 0
4471 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4472 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4475 prev = PREV_INSN (insn);
4476 if (LINE_NOTE (note))
4478 /* Re-use the original line-number note. */
4479 LINE_NOTE (note) = 0;
4480 PREV_INSN (note) = prev;
4481 NEXT_INSN (prev) = note;
4482 PREV_INSN (insn) = note;
4483 NEXT_INSN (note) = insn;
4488 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4489 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4490 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4493 if (sched_verbose && added_notes)
4494 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4497 /* After scheduling the function, delete redundant line notes from the
4501 rm_redundant_line_notes ()
4504 rtx insn = get_insns ();
4505 int active_insn = 0;
4508 /* Walk the insns deleting redundant line-number notes. Many of these
4509 are already present. The remainder tend to occur at basic
4510 block boundaries. */
4511 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4512 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4514 /* If there are no active insns following, INSN is redundant. */
4515 if (active_insn == 0)
4518 NOTE_SOURCE_FILE (insn) = 0;
4519 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4521 /* If the line number is unchanged, LINE is redundant. */
4523 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4524 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4527 NOTE_SOURCE_FILE (line) = 0;
4528 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4535 else if (!((GET_CODE (insn) == NOTE
4536 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4537 || (GET_CODE (insn) == INSN
4538 && (GET_CODE (PATTERN (insn)) == USE
4539 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4542 if (sched_verbose && notes)
4543 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4546 /* Delete notes between head and tail and put them in the chain
4547 of notes ended by NOTE_LIST. */
4550 rm_other_notes (head, tail)
4558 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4561 next_tail = NEXT_INSN (tail);
4562 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4566 /* Farm out notes, and maybe save them in NOTE_LIST.
4567 This is needed to keep the debugger from
4568 getting completely deranged. */
4569 if (GET_CODE (insn) == NOTE)
4573 insn = unlink_other_notes (insn, next_tail);
4579 if (insn == next_tail)
4585 /* Functions for computation of registers live/usage info. */
4587 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4590 find_insn_reg_weight (b)
4593 rtx insn, next_tail, head, tail;
4595 get_block_head_tail (b, &head, &tail);
4596 next_tail = NEXT_INSN (tail);
4598 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4603 /* Handle register life information. */
4604 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4607 /* Increment weight for each register born here. */
4609 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4610 && register_operand (SET_DEST (x), VOIDmode))
4612 else if (GET_CODE (x) == PARALLEL)
4615 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4617 x = XVECEXP (PATTERN (insn), 0, j);
4618 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4619 && register_operand (SET_DEST (x), VOIDmode))
4624 /* Decrement weight for each register that dies here. */
4625 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4627 if (REG_NOTE_KIND (x) == REG_DEAD
4628 || REG_NOTE_KIND (x) == REG_UNUSED)
4632 INSN_REG_WEIGHT (insn) = reg_weight;
4636 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4637 static int clock_var;
4639 /* Move insns that became ready to fire from queue to ready list. */
4642 queue_to_ready (ready, n_ready)
4649 q_ptr = NEXT_Q (q_ptr);
4651 /* Add all pending insns that can be scheduled without stalls to the
4653 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4656 insn = XEXP (link, 0);
4659 if (sched_verbose >= 2)
4660 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4662 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4663 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4665 ready[n_ready++] = insn;
4666 if (sched_verbose >= 2)
4667 fprintf (dump, "moving to ready without stalls\n");
4669 insn_queue[q_ptr] = 0;
4671 /* If there are no ready insns, stall until one is ready and add all
4672 of the pending insns at that point to the ready list. */
4675 register int stalls;
4677 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4679 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4681 for (; link; link = XEXP (link, 1))
4683 insn = XEXP (link, 0);
4686 if (sched_verbose >= 2)
4687 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4689 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4690 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4692 ready[n_ready++] = insn;
4693 if (sched_verbose >= 2)
4694 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4696 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4703 if (sched_verbose && stalls)
4704 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4705 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4706 clock_var += stalls;
4711 /* Print the ready list for debugging purposes. Callable from debugger. */
4714 debug_ready_list (ready, n_ready)
4720 for (i = 0; i < n_ready; i++)
4722 fprintf (dump, " %d", INSN_UID (ready[i]));
4723 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4724 fprintf (dump, "/b%d", BLOCK_NUM (ready[i]));
4726 fprintf (dump, "\n");
4729 /* Print names of units on which insn can/should execute, for debugging. */
4732 insn_print_units (insn)
4736 int unit = insn_unit (insn);
4739 fprintf (dump, "none");
4741 fprintf (dump, "%s", function_units[unit].name);
4744 fprintf (dump, "[");
4745 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4748 fprintf (dump, "%s", function_units[i].name);
4750 fprintf (dump, " ");
4752 fprintf (dump, "]");
4756 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4757 of a basic block. If more lines are needed, table is splitted to two.
4758 n_visual_lines is the number of lines printed so far for a block.
4759 visual_tbl contains the block visualization info.
4760 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4761 #define MAX_VISUAL_LINES 100
4766 rtx vis_no_unit[10];
4768 /* Finds units that are in use in this fuction. Required only
4769 for visualization. */
4772 init_target_units ()
4777 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4779 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4782 unit = insn_unit (insn);
4785 target_units |= ~unit;
4787 target_units |= (1 << unit);
4791 /* Return the length of the visualization table. */
4794 get_visual_tbl_length ()
4800 /* Compute length of one field in line. */
4801 s = (char *) alloca (INSN_LEN + 6);
4802 sprintf (s, " %33s", "uname");
4805 /* Compute length of one line. */
4808 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4809 if (function_units[unit].bitmask & target_units)
4810 for (i = 0; i < function_units[unit].multiplicity; i++)
4813 n += strlen ("\n") + 2;
4815 /* Compute length of visualization string. */
4816 return (MAX_VISUAL_LINES * n);
4819 /* Init block visualization debugging info. */
4822 init_block_visualization ()
4824 strcpy (visual_tbl, "");
4832 safe_concat (buf, cur, str)
4837 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4846 while (cur < end && (c = *str++) != '\0')
4853 /* This recognizes rtx, I classified as expressions. These are always
4854 represent some action on values or results of other expression, that
4855 may be stored in objects representing values. */
4858 print_exp (buf, x, verbose)
4866 const char *fun = (char *)0;
4871 for (i = 0; i < 4; i++)
4877 switch (GET_CODE (x))
4880 op[0] = XEXP (x, 0);
4881 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4882 && INTVAL (XEXP (x, 1)) < 0)
4885 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4890 op[1] = XEXP (x, 1);
4894 op[0] = XEXP (x, 0);
4896 op[1] = XEXP (x, 1);
4900 op[0] = XEXP (x, 0);
4902 op[1] = XEXP (x, 1);
4906 op[0] = XEXP (x, 0);
4907 op[1] = XEXP (x, 1);
4911 op[0] = XEXP (x, 0);
4914 op[0] = XEXP (x, 0);
4916 op[1] = XEXP (x, 1);
4919 op[0] = XEXP (x, 0);
4921 op[1] = XEXP (x, 1);
4925 op[0] = XEXP (x, 0);
4926 op[1] = XEXP (x, 1);
4929 op[0] = XEXP (x, 0);
4931 op[1] = XEXP (x, 1);
4935 op[0] = XEXP (x, 0);
4936 op[1] = XEXP (x, 1);
4940 op[0] = XEXP (x, 0);
4941 op[1] = XEXP (x, 1);
4945 op[0] = XEXP (x, 0);
4946 op[1] = XEXP (x, 1);
4950 op[0] = XEXP (x, 0);
4951 op[1] = XEXP (x, 1);
4955 op[0] = XEXP (x, 0);
4956 op[1] = XEXP (x, 1);
4960 op[0] = XEXP (x, 0);
4963 op[0] = XEXP (x, 0);
4965 op[1] = XEXP (x, 1);
4968 op[0] = XEXP (x, 0);
4970 op[1] = XEXP (x, 1);
4973 op[0] = XEXP (x, 0);
4975 op[1] = XEXP (x, 1);
4978 op[0] = XEXP (x, 0);
4980 op[1] = XEXP (x, 1);
4983 op[0] = XEXP (x, 0);
4985 op[1] = XEXP (x, 1);
4988 op[0] = XEXP (x, 0);
4990 op[1] = XEXP (x, 1);
4993 op[0] = XEXP (x, 0);
4995 op[1] = XEXP (x, 1);
4998 op[0] = XEXP (x, 0);
5000 op[1] = XEXP (x, 1);
5004 op[0] = XEXP (x, 0);
5008 op[0] = XEXP (x, 0);
5012 op[0] = XEXP (x, 0);
5015 op[0] = XEXP (x, 0);
5017 op[1] = XEXP (x, 1);
5020 op[0] = XEXP (x, 0);
5022 op[1] = XEXP (x, 1);
5025 op[0] = XEXP (x, 0);
5027 op[1] = XEXP (x, 1);
5031 op[0] = XEXP (x, 0);
5032 op[1] = XEXP (x, 1);
5035 op[0] = XEXP (x, 0);
5037 op[1] = XEXP (x, 1);
5041 op[0] = XEXP (x, 0);
5042 op[1] = XEXP (x, 1);
5045 op[0] = XEXP (x, 0);
5047 op[1] = XEXP (x, 1);
5051 op[0] = XEXP (x, 0);
5052 op[1] = XEXP (x, 1);
5055 op[0] = XEXP (x, 0);
5057 op[1] = XEXP (x, 1);
5061 op[0] = XEXP (x, 0);
5062 op[1] = XEXP (x, 1);
5065 fun = (verbose) ? "sign_extract" : "sxt";
5066 op[0] = XEXP (x, 0);
5067 op[1] = XEXP (x, 1);
5068 op[2] = XEXP (x, 2);
5071 fun = (verbose) ? "zero_extract" : "zxt";
5072 op[0] = XEXP (x, 0);
5073 op[1] = XEXP (x, 1);
5074 op[2] = XEXP (x, 2);
5077 fun = (verbose) ? "sign_extend" : "sxn";
5078 op[0] = XEXP (x, 0);
5081 fun = (verbose) ? "zero_extend" : "zxn";
5082 op[0] = XEXP (x, 0);
5085 fun = (verbose) ? "float_extend" : "fxn";
5086 op[0] = XEXP (x, 0);
5089 fun = (verbose) ? "trunc" : "trn";
5090 op[0] = XEXP (x, 0);
5092 case FLOAT_TRUNCATE:
5093 fun = (verbose) ? "float_trunc" : "ftr";
5094 op[0] = XEXP (x, 0);
5097 fun = (verbose) ? "float" : "flt";
5098 op[0] = XEXP (x, 0);
5100 case UNSIGNED_FLOAT:
5101 fun = (verbose) ? "uns_float" : "ufl";
5102 op[0] = XEXP (x, 0);
5106 op[0] = XEXP (x, 0);
5109 fun = (verbose) ? "uns_fix" : "ufx";
5110 op[0] = XEXP (x, 0);
5114 op[0] = XEXP (x, 0);
5118 op[0] = XEXP (x, 0);
5121 op[0] = XEXP (x, 0);
5125 op[0] = XEXP (x, 0);
5130 op[0] = XEXP (x, 0);
5134 op[1] = XEXP (x, 1);
5139 op[0] = XEXP (x, 0);
5141 op[1] = XEXP (x, 1);
5143 op[2] = XEXP (x, 2);
5148 op[0] = TRAP_CONDITION (x);
5151 case UNSPEC_VOLATILE:
5153 cur = safe_concat (buf, cur, "unspec");
5154 if (GET_CODE (x) == UNSPEC_VOLATILE)
5155 cur = safe_concat (buf, cur, "/v");
5156 cur = safe_concat (buf, cur, "[");
5158 for (i = 0; i < XVECLEN (x, 0); i++)
5160 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5161 cur = safe_concat (buf, cur, sep);
5162 cur = safe_concat (buf, cur, tmp);
5165 cur = safe_concat (buf, cur, "] ");
5166 sprintf (tmp, "%d", XINT (x, 1));
5167 cur = safe_concat (buf, cur, tmp);
5171 /* If (verbose) debug_rtx (x); */
5172 st[0] = GET_RTX_NAME (GET_CODE (x));
5176 /* Print this as a function? */
5179 cur = safe_concat (buf, cur, fun);
5180 cur = safe_concat (buf, cur, "(");
5183 for (i = 0; i < 4; i++)
5186 cur = safe_concat (buf, cur, st[i]);
5191 cur = safe_concat (buf, cur, ",");
5193 print_value (tmp, op[i], verbose);
5194 cur = safe_concat (buf, cur, tmp);
5199 cur = safe_concat (buf, cur, ")");
5202 /* Prints rtxes, I customly classified as values. They're constants,
5203 registers, labels, symbols and memory accesses. */
5206 print_value (buf, x, verbose)
5214 switch (GET_CODE (x))
5217 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5218 cur = safe_concat (buf, cur, t);
5221 sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
5222 cur = safe_concat (buf, cur, t);
5225 cur = safe_concat (buf, cur, "\"");
5226 cur = safe_concat (buf, cur, XSTR (x, 0));
5227 cur = safe_concat (buf, cur, "\"");
5230 cur = safe_concat (buf, cur, "`");
5231 cur = safe_concat (buf, cur, XSTR (x, 0));
5232 cur = safe_concat (buf, cur, "'");
5235 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5236 cur = safe_concat (buf, cur, t);
5239 print_value (t, XEXP (x, 0), verbose);
5240 cur = safe_concat (buf, cur, "const(");
5241 cur = safe_concat (buf, cur, t);
5242 cur = safe_concat (buf, cur, ")");
5245 print_value (t, XEXP (x, 0), verbose);
5246 cur = safe_concat (buf, cur, "high(");
5247 cur = safe_concat (buf, cur, t);
5248 cur = safe_concat (buf, cur, ")");
5251 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5253 int c = reg_names[ REGNO (x) ][0];
5254 if (c >= '0' && c <= '9')
5255 cur = safe_concat (buf, cur, "%");
5257 cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
5261 sprintf (t, "r%d", REGNO (x));
5262 cur = safe_concat (buf, cur, t);
5266 print_value (t, SUBREG_REG (x), verbose);
5267 cur = safe_concat (buf, cur, t);
5268 sprintf (t, "#%d", SUBREG_WORD (x));
5269 cur = safe_concat (buf, cur, t);
5272 cur = safe_concat (buf, cur, "scratch");
5275 cur = safe_concat (buf, cur, "cc0");
5278 cur = safe_concat (buf, cur, "pc");
5281 print_value (t, XEXP (x, 0), verbose);
5282 cur = safe_concat (buf, cur, "[");
5283 cur = safe_concat (buf, cur, t);
5284 cur = safe_concat (buf, cur, "]");
5287 print_exp (t, x, verbose);
5288 cur = safe_concat (buf, cur, t);
5293 /* The next step in insn detalization, its pattern recognition. */
5296 print_pattern (buf, x, verbose)
5301 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5303 switch (GET_CODE (x))
5306 print_value (t1, SET_DEST (x), verbose);
5307 print_value (t2, SET_SRC (x), verbose);
5308 sprintf (buf, "%s=%s", t1, t2);
5311 sprintf (buf, "return");
5314 print_exp (buf, x, verbose);
5317 print_value (t1, XEXP (x, 0), verbose);
5318 sprintf (buf, "clobber %s", t1);
5321 print_value (t1, XEXP (x, 0), verbose);
5322 sprintf (buf, "use %s", t1);
5329 for (i = 0; i < XVECLEN (x, 0); i++)
5331 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5332 sprintf (t3, "%s%s;", t1, t2);
5335 sprintf (buf, "%s}", t1);
5342 sprintf (t1, "%%{");
5343 for (i = 0; i < XVECLEN (x, 0); i++)
5345 print_insn (t2, XVECEXP (x, 0, i), verbose);
5346 sprintf (t3, "%s%s;", t1, t2);
5349 sprintf (buf, "%s%%}", t1);
5353 sprintf (buf, "asm {%s}", XSTR (x, 0));
5358 print_value (buf, XEXP (x, 0), verbose);
5361 print_value (t1, TRAP_CONDITION (x), verbose);
5362 sprintf (buf, "trap_if %s", t1);
5368 sprintf (t1, "unspec{");
5369 for (i = 0; i < XVECLEN (x, 0); i++)
5371 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5372 sprintf (t3, "%s%s;", t1, t2);
5375 sprintf (buf, "%s}", t1);
5378 case UNSPEC_VOLATILE:
5382 sprintf (t1, "unspec/v{");
5383 for (i = 0; i < XVECLEN (x, 0); i++)
5385 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5386 sprintf (t3, "%s%s;", t1, t2);
5389 sprintf (buf, "%s}", t1);
5393 print_value (buf, x, verbose);
5395 } /* print_pattern */
5397 /* This is the main function in rtl visualization mechanism. It
5398 accepts an rtx and tries to recognize it as an insn, then prints it
5399 properly in human readable form, resembling assembler mnemonics.
5400 For every insn it prints its UID and BB the insn belongs too.
5401 (Probably the last "option" should be extended somehow, since it
5402 depends now on sched.c inner variables ...) */
5405 print_insn (buf, x, verbose)
5413 switch (GET_CODE (x))
5416 print_pattern (t, PATTERN (x), verbose);
5418 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5421 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5424 print_pattern (t, PATTERN (x), verbose);
5426 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5429 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5433 if (GET_CODE (x) == PARALLEL)
5435 x = XVECEXP (x, 0, 0);
5436 print_pattern (t, x, verbose);
5439 strcpy (t, "call <...>");
5441 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5442 INSN_UID (insn), t);
5444 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5447 sprintf (buf, "L%d:", INSN_UID (x));
5450 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5453 if (NOTE_LINE_NUMBER (x) > 0)
5454 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5455 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5457 sprintf (buf, "%4d %s", INSN_UID (x),
5458 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5463 sprintf (buf, "Not an INSN at all\n");
5467 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5471 /* Print visualization debugging info. */
5474 print_block_visualization (b, s)
5481 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5483 /* Print names of units. */
5484 fprintf (dump, ";; %-8s", "clock");
5485 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5486 if (function_units[unit].bitmask & target_units)
5487 for (i = 0; i < function_units[unit].multiplicity; i++)
5488 fprintf (dump, " %-33s", function_units[unit].name);
5489 fprintf (dump, " %-8s\n", "no-unit");
5491 fprintf (dump, ";; %-8s", "=====");
5492 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5493 if (function_units[unit].bitmask & target_units)
5494 for (i = 0; i < function_units[unit].multiplicity; i++)
5495 fprintf (dump, " %-33s", "==============================");
5496 fprintf (dump, " %-8s\n", "=======");
5498 /* Print insns in each cycle. */
5499 fprintf (dump, "%s\n", visual_tbl);
5502 /* Print insns in the 'no_unit' column of visualization. */
5505 visualize_no_unit (insn)
5508 vis_no_unit[n_vis_no_unit] = insn;
5512 /* Print insns scheduled in clock, for visualization. */
5515 visualize_scheduled_insns (b, clock)
5520 /* If no more room, split table into two. */
5521 if (n_visual_lines >= MAX_VISUAL_LINES)
5523 print_block_visualization (b, "(incomplete)");
5524 init_block_visualization ();
5529 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5530 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5531 if (function_units[unit].bitmask & target_units)
5532 for (i = 0; i < function_units[unit].multiplicity; i++)
5534 int instance = unit + i * FUNCTION_UNITS_SIZE;
5535 rtx insn = unit_last_insn[instance];
5537 /* Print insns that still keep the unit busy. */
5539 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5542 print_insn (str, insn, 0);
5543 str[INSN_LEN] = '\0';
5544 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5547 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5550 /* Print insns that are not assigned to any unit. */
5551 for (i = 0; i < n_vis_no_unit; i++)
5552 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5553 INSN_UID (vis_no_unit[i]));
5556 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5559 /* Print stalled cycles. */
5562 visualize_stall_cycles (b, stalls)
5567 /* If no more room, split table into two. */
5568 if (n_visual_lines >= MAX_VISUAL_LINES)
5570 print_block_visualization (b, "(incomplete)");
5571 init_block_visualization ();
5576 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5577 for (i = 0; i < stalls; i++)
5578 sprintf (visual_tbl + strlen (visual_tbl), ".");
5579 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5582 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5585 move_insn1 (insn, last)
5588 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5589 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5591 NEXT_INSN (insn) = NEXT_INSN (last);
5592 PREV_INSN (NEXT_INSN (last)) = insn;
5594 NEXT_INSN (last) = insn;
5595 PREV_INSN (insn) = last;
5600 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5601 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5602 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5603 saved value for NOTE_BLOCK_NUMBER which is useful for
5604 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5605 output by the instruction scheduler. Return the new value of LAST. */
5608 reemit_notes (insn, last)
5615 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5617 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5619 int note_type = INTVAL (XEXP (note, 0));
5620 if (note_type == NOTE_INSN_SETJMP)
5622 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5623 CONST_CALL_P (retval) = CONST_CALL_P (note);
5624 remove_note (insn, note);
5625 note = XEXP (note, 1);
5627 else if (note_type == NOTE_INSN_RANGE_START
5628 || note_type == NOTE_INSN_RANGE_END)
5630 last = emit_note_before (note_type, last);
5631 remove_note (insn, note);
5632 note = XEXP (note, 1);
5633 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5637 last = emit_note_before (note_type, last);
5638 remove_note (insn, note);
5639 note = XEXP (note, 1);
5640 if (note_type == NOTE_INSN_EH_REGION_BEG
5641 || note_type == NOTE_INSN_EH_REGION_END)
5642 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5644 remove_note (insn, note);
5650 /* Move INSN, and all insns which should be issued before it,
5651 due to SCHED_GROUP_P flag. Reemit notes if needed.
5653 Return the last insn emitted by the scheduler, which is the
5654 return value from the first call to reemit_notes. */
5657 move_insn (insn, last)
5662 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5663 insns with SCHED_GROUP_P set first. */
5664 while (SCHED_GROUP_P (insn))
5666 rtx prev = PREV_INSN (insn);
5668 /* Move a SCHED_GROUP_P insn. */
5669 move_insn1 (insn, last);
5670 /* If this is the first call to reemit_notes, then record
5671 its return value. */
5672 if (retval == NULL_RTX)
5673 retval = reemit_notes (insn, insn);
5675 reemit_notes (insn, insn);
5679 /* Now move the first non SCHED_GROUP_P insn. */
5680 move_insn1 (insn, last);
5682 /* If this is the first call to reemit_notes, then record
5683 its return value. */
5684 if (retval == NULL_RTX)
5685 retval = reemit_notes (insn, insn);
5687 reemit_notes (insn, insn);
5692 /* Return an insn which represents a SCHED_GROUP, which is
5693 the last insn in the group. */
5704 insn = next_nonnote_insn (insn);
5706 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5711 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5712 possibly bringing insns from subsequent blocks in the same region.
5713 Return number of insns scheduled. */
5716 schedule_block (bb, rgn_n_insns)
5720 /* Local variables. */
5726 /* Flow block of this bb. */
5727 int b = BB_TO_BLOCK (bb);
5729 /* target_n_insns == number of insns in b before scheduling starts.
5730 sched_target_n_insns == how many of b's insns were scheduled.
5731 sched_n_insns == how many insns were scheduled in b. */
5732 int target_n_insns = 0;
5733 int sched_target_n_insns = 0;
5734 int sched_n_insns = 0;
5736 #define NEED_NOTHING 0
5741 /* Head/tail info for this block. */
5748 /* We used to have code to avoid getting parameters moved from hard
5749 argument registers into pseudos.
5751 However, it was removed when it proved to be of marginal benefit
5752 and caused problems because schedule_block and compute_forward_dependences
5753 had different notions of what the "head" insn was. */
5754 get_bb_head_tail (bb, &head, &tail);
5756 /* Interblock scheduling could have moved the original head insn from this
5757 block into a proceeding block. This may also cause schedule_block and
5758 compute_forward_dependences to have different notions of what the
5761 If the interblock movement happened to make this block start with
5762 some notes (LOOP, EH or SETJMP) before the first real insn, then
5763 HEAD will have various special notes attached to it which must be
5764 removed so that we don't end up with extra copies of the notes. */
5765 if (GET_RTX_CLASS (GET_CODE (head)) == 'i')
5769 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5770 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5771 remove_note (head, note);
5774 next_tail = NEXT_INSN (tail);
5775 prev_head = PREV_INSN (head);
5777 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5778 to schedule this block. */
5780 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5781 return (sched_n_insns);
5786 fprintf (dump, ";; ======================================================\n");
5788 ";; -- basic block %d from %d to %d -- %s reload\n",
5789 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5790 (reload_completed ? "after" : "before"));
5791 fprintf (dump, ";; ======================================================\n");
5792 fprintf (dump, "\n");
5794 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5795 init_block_visualization ();
5798 /* Remove remaining note insns from the block, save them in
5799 note_list. These notes are restored at the end of
5800 schedule_block (). */
5802 rm_other_notes (head, tail);
5806 /* Prepare current target block info. */
5807 if (current_nr_blocks > 1)
5809 candidate_table = (candidate *) xmalloc (current_nr_blocks
5810 * sizeof (candidate));
5813 /* ??? It is not clear why bblst_size is computed this way. The original
5814 number was clearly too small as it resulted in compiler failures.
5815 Multiplying by the original number by 2 (to account for update_bbs
5816 members) seems to be a reasonable solution. */
5817 /* ??? Or perhaps there is a bug somewhere else in this file? */
5818 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5819 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
5821 bitlst_table_last = 0;
5822 bitlst_table_size = rgn_nr_edges;
5823 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
5825 compute_trg_info (bb);
5830 /* Allocate the ready list. */
5831 ready = (rtx *) xmalloc ((rgn_n_insns + 1) * sizeof (rtx));
5833 /* Print debugging information. */
5834 if (sched_verbose >= 5)
5835 debug_dependencies ();
5838 /* Initialize ready list with all 'ready' insns in target block.
5839 Count number of insns in the target block being scheduled. */
5841 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5845 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5847 next = NEXT_INSN (insn);
5849 if (INSN_DEP_COUNT (insn) == 0
5850 && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5851 ready[n_ready++] = insn;
5852 if (!(SCHED_GROUP_P (insn)))
5856 /* Add to ready list all 'ready' insns in valid source blocks.
5857 For speculative insns, check-live, exception-free, and
5859 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5860 if (IS_VALID (bb_src))
5866 get_bb_head_tail (bb_src, &head, &tail);
5867 src_next_tail = NEXT_INSN (tail);
5871 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5874 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5876 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5879 if (!CANT_MOVE (insn)
5880 && (!IS_SPECULATIVE_INSN (insn)
5881 || (insn_issue_delay (insn) <= 3
5882 && check_live (insn, bb_src)
5883 && is_exception_free (insn, bb_src, target_bb))))
5887 /* Note that we havn't squirrled away the notes for
5888 blocks other than the current. So if this is a
5889 speculative insn, NEXT might otherwise be a note. */
5890 next = next_nonnote_insn (insn);
5891 if (INSN_DEP_COUNT (insn) == 0
5893 || SCHED_GROUP_P (next) == 0
5894 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5895 ready[n_ready++] = insn;
5900 #ifdef MD_SCHED_INIT
5901 MD_SCHED_INIT (dump, sched_verbose);
5904 /* No insns scheduled in this block yet. */
5905 last_scheduled_insn = 0;
5907 /* Q_SIZE is the total number of insns in the queue. */
5911 bzero ((char *) insn_queue, sizeof (insn_queue));
5913 /* Start just before the beginning of time. */
5916 /* We start inserting insns after PREV_HEAD. */
5919 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5920 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
5921 ? NEED_HEAD : NEED_NOTHING);
5922 if (PREV_INSN (next_tail) == BLOCK_END (b))
5923 new_needs |= NEED_TAIL;
5925 /* Loop until all the insns in BB are scheduled. */
5926 while (sched_target_n_insns < target_n_insns)
5930 /* Add to the ready list all pending insns that can be issued now.
5931 If there are no ready insns, increment clock until one
5932 is ready and add all pending insns at that point to the ready
5934 n_ready = queue_to_ready (ready, n_ready);
5939 if (sched_verbose >= 2)
5941 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
5942 debug_ready_list (ready, n_ready);
5945 /* Sort the ready list based on priority. */
5946 SCHED_SORT (ready, n_ready);
5948 /* Allow the target to reorder the list, typically for
5949 better instruction bundling. */
5950 #ifdef MD_SCHED_REORDER
5951 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
5954 can_issue_more = issue_rate;
5959 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
5960 debug_ready_list (ready, n_ready);
5963 /* Issue insns from ready list. */
5964 while (n_ready != 0 && can_issue_more)
5966 /* Select and remove the insn from the ready list. */
5967 rtx insn = ready[--n_ready];
5968 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
5972 queue_insn (insn, cost);
5976 /* An interblock motion? */
5977 if (INSN_BB (insn) != target_bb)
5982 if (IS_SPECULATIVE_INSN (insn))
5984 if (!check_live (insn, INSN_BB (insn)))
5986 update_live (insn, INSN_BB (insn));
5988 /* For speculative load, mark insns fed by it. */
5989 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
5990 set_spec_fed (insn);
5996 /* Find the beginning of the scheduling group. */
5997 /* ??? Ought to update basic block here, but later bits of
5998 schedule_block assumes the original insn block is
6002 while (SCHED_GROUP_P (temp))
6003 temp = PREV_INSN (temp);
6005 /* Update source block boundaries. */
6006 b1 = BLOCK_FOR_INSN (temp);
6007 if (temp == b1->head && insn == b1->end)
6009 /* We moved all the insns in the basic block.
6010 Emit a note after the last insn and update the
6011 begin/end boundaries to point to the note. */
6012 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
6016 else if (insn == b1->end)
6018 /* We took insns from the end of the basic block,
6019 so update the end of block boundary so that it
6020 points to the first insn we did not move. */
6021 b1->end = PREV_INSN (temp);
6023 else if (temp == b1->head)
6025 /* We took insns from the start of the basic block,
6026 so update the start of block boundary so that
6027 it points to the first insn we did not move. */
6028 b1->head = NEXT_INSN (insn);
6033 /* In block motion. */
6034 sched_target_n_insns++;
6037 last_scheduled_insn = insn;
6038 last = move_insn (insn, last);
6041 #ifdef MD_SCHED_VARIABLE_ISSUE
6042 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6048 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6050 /* Close this block after scheduling its jump. */
6051 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6057 visualize_scheduled_insns (b, clock_var);
6063 fprintf (dump, ";;\tReady list (final): ");
6064 debug_ready_list (ready, n_ready);
6065 print_block_visualization (b, "");
6068 /* Sanity check -- queue must be empty now. Meaningless if region has
6070 if (current_nr_blocks > 1)
6071 if (!flag_schedule_interblock && q_size != 0)
6074 /* Update head/tail boundaries. */
6075 head = NEXT_INSN (prev_head);
6078 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6079 previously found among the insns. Insert them at the beginning
6083 rtx note_head = note_list;
6085 while (PREV_INSN (note_head))
6087 note_head = PREV_INSN (note_head);
6090 PREV_INSN (note_head) = PREV_INSN (head);
6091 NEXT_INSN (PREV_INSN (head)) = note_head;
6092 PREV_INSN (head) = note_list;
6093 NEXT_INSN (note_list) = head;
6097 /* Update target block boundaries. */
6098 if (new_needs & NEED_HEAD)
6099 BLOCK_HEAD (b) = head;
6101 if (new_needs & NEED_TAIL)
6102 BLOCK_END (b) = tail;
6107 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6108 clock_var, INSN_UID (BLOCK_HEAD (b)));
6109 fprintf (dump, ";; new basic block end = %d\n\n",
6110 INSN_UID (BLOCK_END (b)));
6114 if (current_nr_blocks > 1)
6116 free (candidate_table);
6118 free (bitlst_table);
6122 return (sched_n_insns);
6123 } /* schedule_block () */
6126 /* Print the bit-set of registers, S, callable from debugger. */
6129 debug_reg_vector (s)
6134 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6136 fprintf (dump, " %d", regno);
6139 fprintf (dump, "\n");
6142 /* Use the backward dependences from LOG_LINKS to build
6143 forward dependences in INSN_DEPEND. */
6146 compute_block_forward_dependences (bb)
6152 enum reg_note dep_type;
6154 get_bb_head_tail (bb, &head, &tail);
6155 next_tail = NEXT_INSN (tail);
6156 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6158 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6161 insn = group_leader (insn);
6163 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6165 rtx x = group_leader (XEXP (link, 0));
6168 if (x != XEXP (link, 0))
6171 #ifdef ENABLE_CHECKING
6172 /* If add_dependence is working properly there should never
6173 be notes, deleted insns or duplicates in the backward
6174 links. Thus we need not check for them here.
6176 However, if we have enabled checking we might as well go
6177 ahead and verify that add_dependence worked properly. */
6178 if (GET_CODE (x) == NOTE
6179 || INSN_DELETED_P (x)
6180 || find_insn_list (insn, INSN_DEPEND (x)))
6184 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6186 dep_type = REG_NOTE_KIND (link);
6187 PUT_REG_NOTE_KIND (new_link, dep_type);
6189 INSN_DEPEND (x) = new_link;
6190 INSN_DEP_COUNT (insn) += 1;
6195 /* Initialize variables for region data dependence analysis.
6196 n_bbs is the number of region blocks. */
6198 __inline static void
6199 init_rgn_data_dependences (n_bbs)
6204 /* Variables for which one copy exists for each block. */
6205 bzero ((char *) bb_pending_read_insns, n_bbs * sizeof (rtx));
6206 bzero ((char *) bb_pending_read_mems, n_bbs * sizeof (rtx));
6207 bzero ((char *) bb_pending_write_insns, n_bbs * sizeof (rtx));
6208 bzero ((char *) bb_pending_write_mems, n_bbs * sizeof (rtx));
6209 bzero ((char *) bb_pending_lists_length, n_bbs * sizeof (int));
6210 bzero ((char *) bb_last_pending_memory_flush, n_bbs * sizeof (rtx));
6211 bzero ((char *) bb_last_function_call, n_bbs * sizeof (rtx));
6212 bzero ((char *) bb_sched_before_next_call, n_bbs * sizeof (rtx));
6214 /* Create an insn here so that we can hang dependencies off of it later. */
6215 for (bb = 0; bb < n_bbs; bb++)
6217 bb_sched_before_next_call[bb] =
6218 gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6219 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6220 LOG_LINKS (bb_sched_before_next_call[bb]) = 0;
6224 /* Add dependences so that branches are scheduled to run last in their
6228 add_branch_dependences (head, tail)
6234 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6235 to remain in order at the end of the block by adding dependencies and
6236 giving the last a high priority. There may be notes present, and
6237 prev_head may also be a note.
6239 Branches must obviously remain at the end. Calls should remain at the
6240 end since moving them results in worse register allocation. Uses remain
6241 at the end to ensure proper register allocation. cc0 setters remaim
6242 at the end because they can't be moved away from their cc0 user. */
6245 while (GET_CODE (insn) == CALL_INSN
6246 || GET_CODE (insn) == JUMP_INSN
6247 || (GET_CODE (insn) == INSN
6248 && (GET_CODE (PATTERN (insn)) == USE
6249 || GET_CODE (PATTERN (insn)) == CLOBBER
6251 || sets_cc0_p (PATTERN (insn))
6254 || GET_CODE (insn) == NOTE)
6256 if (GET_CODE (insn) != NOTE)
6259 && !find_insn_list (insn, LOG_LINKS (last)))
6261 add_dependence (last, insn, REG_DEP_ANTI);
6262 INSN_REF_COUNT (insn)++;
6265 CANT_MOVE (insn) = 1;
6268 /* Skip over insns that are part of a group.
6269 Make each insn explicitly depend on the previous insn.
6270 This ensures that only the group header will ever enter
6271 the ready queue (and, when scheduled, will automatically
6272 schedule the SCHED_GROUP_P block). */
6273 while (SCHED_GROUP_P (insn))
6275 rtx temp = prev_nonnote_insn (insn);
6276 add_dependence (insn, temp, REG_DEP_ANTI);
6281 /* Don't overrun the bounds of the basic block. */
6285 insn = PREV_INSN (insn);
6288 /* Make sure these insns are scheduled last in their block. */
6291 while (insn != head)
6293 insn = prev_nonnote_insn (insn);
6295 if (INSN_REF_COUNT (insn) != 0)
6298 add_dependence (last, insn, REG_DEP_ANTI);
6299 INSN_REF_COUNT (insn) = 1;
6301 /* Skip over insns that are part of a group. */
6302 while (SCHED_GROUP_P (insn))
6303 insn = prev_nonnote_insn (insn);
6307 /* Compute backward dependences inside bb. In a multiple blocks region:
6308 (1) a bb is analyzed after its predecessors, and (2) the lists in
6309 effect at the end of bb (after analyzing for bb) are inherited by
6312 Specifically for reg-reg data dependences, the block insns are
6313 scanned by sched_analyze () top-to-bottom. Two lists are
6314 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6315 and reg_last_uses[] for register USEs.
6317 When analysis is completed for bb, we update for its successors:
6318 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6319 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6321 The mechanism for computing mem-mem data dependence is very
6322 similar, and the result is interblock dependences in the region. */
6325 compute_block_backward_dependences (bb)
6331 int max_reg = max_reg_num ();
6333 b = BB_TO_BLOCK (bb);
6335 if (current_nr_blocks == 1)
6337 reg_last_uses = (rtx *) xcalloc (max_reg, sizeof (rtx));
6338 reg_last_sets = (rtx *) xcalloc (max_reg, sizeof (rtx));
6339 reg_last_clobbers = (rtx *) xcalloc (max_reg, sizeof (rtx));
6341 pending_read_insns = 0;
6342 pending_read_mems = 0;
6343 pending_write_insns = 0;
6344 pending_write_mems = 0;
6345 pending_lists_length = 0;
6346 last_function_call = 0;
6347 last_pending_memory_flush = 0;
6348 sched_before_next_call
6349 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6350 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6351 LOG_LINKS (sched_before_next_call) = 0;
6355 reg_last_uses = bb_reg_last_uses[bb];
6356 reg_last_sets = bb_reg_last_sets[bb];
6357 reg_last_clobbers = bb_reg_last_clobbers[bb];
6359 pending_read_insns = bb_pending_read_insns[bb];
6360 pending_read_mems = bb_pending_read_mems[bb];
6361 pending_write_insns = bb_pending_write_insns[bb];
6362 pending_write_mems = bb_pending_write_mems[bb];
6363 pending_lists_length = bb_pending_lists_length[bb];
6364 last_function_call = bb_last_function_call[bb];
6365 last_pending_memory_flush = bb_last_pending_memory_flush[bb];
6367 sched_before_next_call = bb_sched_before_next_call[bb];
6370 /* Do the analysis for this block. */
6371 get_bb_head_tail (bb, &head, &tail);
6372 sched_analyze (head, tail);
6373 add_branch_dependences (head, tail);
6375 if (current_nr_blocks > 1)
6378 int b_succ, bb_succ;
6380 rtx link_insn, link_mem;
6383 /* These lists should point to the right place, for correct
6385 bb_pending_read_insns[bb] = pending_read_insns;
6386 bb_pending_read_mems[bb] = pending_read_mems;
6387 bb_pending_write_insns[bb] = pending_write_insns;
6388 bb_pending_write_mems[bb] = pending_write_mems;
6390 /* bb's structures are inherited by it's successors. */
6391 first_edge = e = OUT_EDGES (b);
6395 b_succ = TO_BLOCK (e);
6396 bb_succ = BLOCK_TO_BB (b_succ);
6398 /* Only bbs "below" bb, in the same region, are interesting. */
6399 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6406 for (reg = 0; reg < max_reg; reg++)
6409 /* reg-last-uses lists are inherited by bb_succ. */
6410 for (u = reg_last_uses[reg]; u; u = XEXP (u, 1))
6412 if (find_insn_list (XEXP (u, 0),
6413 (bb_reg_last_uses[bb_succ])[reg]))
6416 (bb_reg_last_uses[bb_succ])[reg]
6417 = alloc_INSN_LIST (XEXP (u, 0),
6418 (bb_reg_last_uses[bb_succ])[reg]);
6421 /* reg-last-defs lists are inherited by bb_succ. */
6422 for (u = reg_last_sets[reg]; u; u = XEXP (u, 1))
6424 if (find_insn_list (XEXP (u, 0),
6425 (bb_reg_last_sets[bb_succ])[reg]))
6428 (bb_reg_last_sets[bb_succ])[reg]
6429 = alloc_INSN_LIST (XEXP (u, 0),
6430 (bb_reg_last_sets[bb_succ])[reg]);
6433 for (u = reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6435 if (find_insn_list (XEXP (u, 0),
6436 (bb_reg_last_clobbers[bb_succ])[reg]))
6439 (bb_reg_last_clobbers[bb_succ])[reg]
6440 = alloc_INSN_LIST (XEXP (u, 0),
6441 (bb_reg_last_clobbers[bb_succ])[reg]);
6445 /* Mem read/write lists are inherited by bb_succ. */
6446 link_insn = pending_read_insns;
6447 link_mem = pending_read_mems;
6450 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6452 bb_pending_read_insns[bb_succ],
6453 bb_pending_read_mems[bb_succ])))
6454 add_insn_mem_dependence (&bb_pending_read_insns[bb_succ],
6455 &bb_pending_read_mems[bb_succ],
6456 XEXP (link_insn, 0), XEXP (link_mem, 0));
6457 link_insn = XEXP (link_insn, 1);
6458 link_mem = XEXP (link_mem, 1);
6461 link_insn = pending_write_insns;
6462 link_mem = pending_write_mems;
6465 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6467 bb_pending_write_insns[bb_succ],
6468 bb_pending_write_mems[bb_succ])))
6469 add_insn_mem_dependence (&bb_pending_write_insns[bb_succ],
6470 &bb_pending_write_mems[bb_succ],
6471 XEXP (link_insn, 0), XEXP (link_mem, 0));
6473 link_insn = XEXP (link_insn, 1);
6474 link_mem = XEXP (link_mem, 1);
6477 /* last_function_call is inherited by bb_succ. */
6478 for (u = last_function_call; u; u = XEXP (u, 1))
6480 if (find_insn_list (XEXP (u, 0),
6481 bb_last_function_call[bb_succ]))
6484 bb_last_function_call[bb_succ]
6485 = alloc_INSN_LIST (XEXP (u, 0),
6486 bb_last_function_call[bb_succ]);
6489 /* last_pending_memory_flush is inherited by bb_succ. */
6490 for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
6492 if (find_insn_list (XEXP (u, 0),
6493 bb_last_pending_memory_flush[bb_succ]))
6496 bb_last_pending_memory_flush[bb_succ]
6497 = alloc_INSN_LIST (XEXP (u, 0),
6498 bb_last_pending_memory_flush[bb_succ]);
6501 /* sched_before_next_call is inherited by bb_succ. */
6502 x = LOG_LINKS (sched_before_next_call);
6503 for (; x; x = XEXP (x, 1))
6504 add_dependence (bb_sched_before_next_call[bb_succ],
6505 XEXP (x, 0), REG_DEP_ANTI);
6509 while (e != first_edge);
6512 /* Free up the INSN_LISTs.
6514 Note this loop is executed max_reg * nr_regions times. It's first
6515 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6516 The list was empty for the vast majority of those calls. On the PA, not
6517 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6519 for (b = 0; b < max_reg; ++b)
6521 if (reg_last_clobbers[b])
6522 free_INSN_LIST_list (®_last_clobbers[b]);
6523 if (reg_last_sets[b])
6524 free_INSN_LIST_list (®_last_sets[b]);
6525 if (reg_last_uses[b])
6526 free_INSN_LIST_list (®_last_uses[b]);
6529 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6530 if (current_nr_blocks > 1)
6532 bb_reg_last_uses[bb] = (rtx *) NULL_RTX;
6533 bb_reg_last_sets[bb] = (rtx *) NULL_RTX;
6534 bb_reg_last_clobbers[bb] = (rtx *) NULL_RTX;
6536 else if (current_nr_blocks == 1)
6538 free (reg_last_uses);
6539 free (reg_last_sets);
6540 free (reg_last_clobbers);
6544 /* Print dependences for debugging, callable from debugger. */
6547 debug_dependencies ()
6551 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6552 for (bb = 0; bb < current_nr_blocks; bb++)
6560 get_bb_head_tail (bb, &head, &tail);
6561 next_tail = NEXT_INSN (tail);
6562 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6563 BB_TO_BLOCK (bb), bb);
6565 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6566 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6567 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6568 "----", "----", "--", "---", "----", "----", "--------", "-----");
6569 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6574 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6577 fprintf (dump, ";; %6d ", INSN_UID (insn));
6578 if (GET_CODE (insn) == NOTE)
6580 n = NOTE_LINE_NUMBER (insn);
6582 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6584 fprintf (dump, "line %d, file %s\n", n,
6585 NOTE_SOURCE_FILE (insn));
6588 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6592 unit = insn_unit (insn);
6594 || function_units[unit].blockage_range_function == 0) ? 0 :
6595 function_units[unit].blockage_range_function (insn);
6597 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6598 (SCHED_GROUP_P (insn) ? "+" : " "),
6602 INSN_DEP_COUNT (insn),
6603 INSN_PRIORITY (insn),
6604 insn_cost (insn, 0, 0),
6605 (int) MIN_BLOCKAGE_COST (range),
6606 (int) MAX_BLOCKAGE_COST (range));
6607 insn_print_units (insn);
6608 fprintf (dump, "\t: ");
6609 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6610 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6611 fprintf (dump, "\n");
6615 fprintf (dump, "\n");
6618 /* Set_priorities: compute priority of each insn in the block. */
6631 get_bb_head_tail (bb, &head, &tail);
6632 prev_head = PREV_INSN (head);
6635 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
6639 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6642 if (GET_CODE (insn) == NOTE)
6645 if (!(SCHED_GROUP_P (insn)))
6647 (void) priority (insn);
6653 /* Make each element of VECTOR point at an rtx-vector,
6654 taking the space for all those rtx-vectors from SPACE.
6655 SPACE is of type (rtx *), but it is really as long as NELTS rtx-vectors.
6656 BYTES_PER_ELT is the number of bytes in one rtx-vector.
6657 (this is the same as init_regset_vector () in flow.c) */
6660 init_rtx_vector (vector, space, nelts, bytes_per_elt)
6667 register rtx *p = space;
6669 for (i = 0; i < nelts; i++)
6672 p += bytes_per_elt / sizeof (*p);
6676 /* Schedule a region. A region is either an inner loop, a loop-free
6677 subroutine, or a single basic block. Each bb in the region is
6678 scheduled after its flow predecessors. */
6681 schedule_region (rgn)
6685 int rgn_n_insns = 0;
6686 int sched_rgn_n_insns = 0;
6687 rtx *bb_reg_last_uses_space = NULL;
6688 rtx *bb_reg_last_sets_space = NULL;
6689 rtx *bb_reg_last_clobbers_space = NULL;
6691 /* Set variables for the current region. */
6692 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6693 current_blocks = RGN_BLOCKS (rgn);
6695 reg_pending_sets = ALLOCA_REG_SET ();
6696 reg_pending_clobbers = ALLOCA_REG_SET ();
6697 reg_pending_sets_all = 0;
6699 /* Initializations for region data dependence analyisis. */
6700 if (current_nr_blocks > 1)
6702 int maxreg = max_reg_num ();
6704 bb_reg_last_uses = (rtx **) xmalloc (current_nr_blocks * sizeof (rtx *));
6705 bb_reg_last_uses_space
6706 = (rtx *) xcalloc (current_nr_blocks * maxreg, sizeof (rtx));
6707 init_rtx_vector (bb_reg_last_uses, bb_reg_last_uses_space,
6708 current_nr_blocks, maxreg * sizeof (rtx *));
6710 bb_reg_last_sets = (rtx **) xmalloc (current_nr_blocks * sizeof (rtx *));
6711 bb_reg_last_sets_space
6712 = (rtx *) xcalloc (current_nr_blocks * maxreg, sizeof (rtx));
6713 init_rtx_vector (bb_reg_last_sets, bb_reg_last_sets_space,
6714 current_nr_blocks, maxreg * sizeof (rtx *));
6716 bb_reg_last_clobbers =
6717 (rtx **) xmalloc (current_nr_blocks * sizeof (rtx *));
6718 bb_reg_last_clobbers_space
6719 = (rtx *) xcalloc (current_nr_blocks * maxreg, sizeof (rtx));
6720 init_rtx_vector (bb_reg_last_clobbers, bb_reg_last_clobbers_space,
6721 current_nr_blocks, maxreg * sizeof (rtx *));
6723 bb_pending_read_insns
6724 = (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6725 bb_pending_read_mems
6726 = (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6727 bb_pending_write_insns =
6728 (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6729 bb_pending_write_mems
6730 = (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6731 bb_pending_lists_length =
6732 (int *) xmalloc (current_nr_blocks * sizeof (int));
6733 bb_last_pending_memory_flush =
6734 (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6735 bb_last_function_call
6736 = (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6737 bb_sched_before_next_call =
6738 (rtx *) xmalloc (current_nr_blocks * sizeof (rtx));
6740 init_rgn_data_dependences (current_nr_blocks);
6743 /* Compute LOG_LINKS. */
6744 for (bb = 0; bb < current_nr_blocks; bb++)
6745 compute_block_backward_dependences (bb);
6747 /* Compute INSN_DEPEND. */
6748 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6749 compute_block_forward_dependences (bb);
6751 /* Delete line notes and set priorities. */
6752 for (bb = 0; bb < current_nr_blocks; bb++)
6754 if (write_symbols != NO_DEBUG)
6756 save_line_notes (bb);
6760 rgn_n_insns += set_priorities (bb);
6763 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6764 if (current_nr_blocks > 1)
6768 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
6770 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6771 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
6772 for (i = 0; i < current_nr_blocks; i++)
6773 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
6777 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
6778 for (i = 1; i < nr_edges; i++)
6779 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6780 EDGE_TO_BIT (i) = rgn_nr_edges++;
6781 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
6784 for (i = 1; i < nr_edges; i++)
6785 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6786 rgn_edges[rgn_nr_edges++] = i;
6789 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6790 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6792 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6793 for (i = 0; i < current_nr_blocks; i++)
6796 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6798 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6801 /* Compute probabilities, dominators, split_edges. */
6802 for (bb = 0; bb < current_nr_blocks; bb++)
6803 compute_dom_prob_ps (bb);
6806 /* Now we can schedule all blocks. */
6807 for (bb = 0; bb < current_nr_blocks; bb++)
6808 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6810 /* Sanity check: verify that all region insns were scheduled. */
6811 if (sched_rgn_n_insns != rgn_n_insns)
6814 /* Restore line notes. */
6815 if (write_symbols != NO_DEBUG)
6817 for (bb = 0; bb < current_nr_blocks; bb++)
6818 restore_line_notes (bb);
6821 /* Done with this region. */
6822 free_pending_lists ();
6824 FREE_REG_SET (reg_pending_sets);
6825 FREE_REG_SET (reg_pending_clobbers);
6827 if (current_nr_blocks > 1)
6831 free (bb_reg_last_uses_space);
6832 free (bb_reg_last_uses);
6833 free (bb_reg_last_sets_space);
6834 free (bb_reg_last_sets);
6835 free (bb_reg_last_clobbers_space);
6836 free (bb_reg_last_clobbers);
6837 free (bb_pending_read_insns);
6838 free (bb_pending_read_mems);
6839 free (bb_pending_write_insns);
6840 free (bb_pending_write_mems);
6841 free (bb_pending_lists_length);
6842 free (bb_last_pending_memory_flush);
6843 free (bb_last_function_call);
6844 free (bb_sched_before_next_call);
6846 for (i = 0; i < current_nr_blocks; ++i)
6849 free (pot_split[i]);
6850 free (ancestor_edges[i]);
6856 free (ancestor_edges);
6860 /* The one entry point in this file. DUMP_FILE is the dump file for
6864 schedule_insns (dump_file)
6867 int *deaths_in_region;
6868 sbitmap blocks, large_region_blocks;
6874 int any_large_regions;
6876 /* Disable speculative loads in their presence if cc0 defined. */
6878 flag_schedule_speculative_load = 0;
6881 /* Taking care of this degenerate case makes the rest of
6882 this code simpler. */
6883 if (n_basic_blocks == 0)
6886 /* Set dump and sched_verbose for the desired debugging output. If no
6887 dump-file was specified, but -fsched-verbose-N (any N), print to stderr.
6888 For -fsched-verbose-N, N>=10, print everything to stderr. */
6889 sched_verbose = sched_verbose_param;
6890 if (sched_verbose_param == 0 && dump_file)
6892 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6897 /* Initialize issue_rate. */
6898 issue_rate = ISSUE_RATE;
6900 split_all_insns (1);
6902 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6903 pseudos which do not cross calls. */
6904 max_uid = get_max_uid () + 1;
6906 h_i_d = (struct haifa_insn_data *) xcalloc (max_uid, sizeof (*h_i_d));
6910 for (b = 0; b < n_basic_blocks; b++)
6911 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6913 INSN_LUID (insn) = luid;
6915 /* Increment the next luid, unless this is a note. We don't
6916 really need separate IDs for notes and we don't want to
6917 schedule differently depending on whether or not there are
6918 line-number notes, i.e., depending on whether or not we're
6919 generating debugging information. */
6920 if (GET_CODE (insn) != NOTE)
6923 if (insn == BLOCK_END (b))
6927 /* ?!? We could save some memory by computing a per-region luid mapping
6928 which could reduce both the number of vectors in the cache and the size
6929 of each vector. Instead we just avoid the cache entirely unless the
6930 average number of instructions in a basic block is very high. See
6931 the comment before the declaration of true_dependency_cache for
6932 what we consider "very high". */
6933 if (luid / n_basic_blocks > 100 * 5)
6935 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6936 sbitmap_vector_zero (true_dependency_cache, luid);
6940 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
6941 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6942 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6943 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6945 blocks = sbitmap_alloc (n_basic_blocks);
6946 large_region_blocks = sbitmap_alloc (n_basic_blocks);
6948 compute_bb_for_insn (max_uid);
6950 /* Compute regions for scheduling. */
6951 if (reload_completed
6952 || n_basic_blocks == 1
6953 || !flag_schedule_interblock)
6955 find_single_block_region ();
6959 /* Verify that a 'good' control flow graph can be built. */
6960 if (is_cfg_nonregular ())
6962 find_single_block_region ();
6966 int_list_ptr *s_preds, *s_succs;
6967 int *num_preds, *num_succs;
6968 sbitmap *dom, *pdom;
6970 s_preds = (int_list_ptr *) xmalloc (n_basic_blocks
6971 * sizeof (int_list_ptr));
6972 s_succs = (int_list_ptr *) xmalloc (n_basic_blocks
6973 * sizeof (int_list_ptr));
6974 num_preds = (int *) xmalloc (n_basic_blocks * sizeof (int));
6975 num_succs = (int *) xmalloc (n_basic_blocks * sizeof (int));
6976 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6977 pdom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6979 /* The scheduler runs after flow; therefore, we can't blindly call
6980 back into find_basic_blocks since doing so could invalidate the
6981 info in global_live_at_start.
6983 Consider a block consisting entirely of dead stores; after life
6984 analysis it would be a block of NOTE_INSN_DELETED notes. If
6985 we call find_basic_blocks again, then the block would be removed
6986 entirely and invalidate our the register live information.
6988 We could (should?) recompute register live information. Doing
6989 so may even be beneficial. */
6991 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
6993 /* Compute the dominators and post dominators. We don't
6994 currently use post dominators, but we should for
6995 speculative motion analysis. */
6996 compute_dominators (dom, pdom, s_preds, s_succs);
6998 /* build_control_flow will return nonzero if it detects unreachable
6999 blocks or any other irregularity with the cfg which prevents
7000 cross block scheduling. */
7001 if (build_control_flow (s_preds, s_succs, num_preds, num_succs) != 0)
7002 find_single_block_region ();
7004 find_rgns (s_preds, s_succs, num_preds, num_succs, dom);
7006 if (sched_verbose >= 3)
7009 /* For now. This will move as more and more of haifa is converted
7010 to using the cfg code in flow.c. */
7021 deaths_in_region = (int *) xmalloc (sizeof(int) * nr_regions);
7023 init_alias_analysis ();
7025 if (write_symbols != NO_DEBUG)
7029 line_note_head = (rtx *) xcalloc (n_basic_blocks, sizeof (rtx));
7031 /* Save-line-note-head:
7032 Determine the line-number at the start of each basic block.
7033 This must be computed and saved now, because after a basic block's
7034 predecessor has been scheduled, it is impossible to accurately
7035 determine the correct line number for the first insn of the block. */
7037 for (b = 0; b < n_basic_blocks; b++)
7038 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
7039 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
7041 line_note_head[b] = line;
7046 /* Find units used in this fuction, for visualization. */
7048 init_target_units ();
7050 /* ??? Add a NOTE after the last insn of the last basic block. It is not
7051 known why this is done. */
7053 insn = BLOCK_END (n_basic_blocks - 1);
7054 if (NEXT_INSN (insn) == 0
7055 || (GET_CODE (insn) != NOTE
7056 && GET_CODE (insn) != CODE_LABEL
7057 /* Don't emit a NOTE if it would end up between an unconditional
7058 jump and a BARRIER. */
7059 && !(GET_CODE (insn) == JUMP_INSN
7060 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
7061 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
7063 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
7064 removing death notes. */
7065 for (b = n_basic_blocks - 1; b >= 0; b--)
7066 find_insn_reg_weight (b);
7068 /* Remove all death notes from the subroutine. */
7069 for (rgn = 0; rgn < nr_regions; rgn++)
7071 sbitmap_zero (blocks);
7072 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
7073 SET_BIT (blocks, rgn_bb_table [RGN_BLOCKS (rgn) + b]);
7075 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
7078 /* Schedule every region in the subroutine. */
7079 for (rgn = 0; rgn < nr_regions; rgn++)
7080 schedule_region (rgn);
7082 /* Update life analysis for the subroutine. Do single block regions
7083 first so that we can verify that live_at_start didn't change. Then
7084 do all other blocks. */
7085 /* ??? There is an outside possibility that update_life_info, or more
7086 to the point propagate_block, could get called with non-zero flags
7087 more than once for one basic block. This would be kinda bad if it
7088 were to happen, since REG_INFO would be accumulated twice for the
7089 block, and we'd have twice the REG_DEAD notes.
7091 I'm fairly certain that this _shouldn't_ happen, since I don't think
7092 that live_at_start should change at region heads. Not sure what the
7093 best way to test for this kind of thing... */
7095 allocate_reg_life_data ();
7096 compute_bb_for_insn (max_uid);
7098 any_large_regions = 0;
7099 sbitmap_ones (large_region_blocks);
7101 for (rgn = 0; rgn < nr_regions; rgn++)
7102 if (RGN_NR_BLOCKS (rgn) > 1)
7103 any_large_regions = 1;
7106 sbitmap_zero (blocks);
7107 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7108 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
7110 update_life_info (blocks, UPDATE_LIFE_LOCAL,
7111 PROP_DEATH_NOTES | PROP_REG_INFO);
7113 /* In the single block case, the count of registers that died should
7114 not have changed during the schedule. */
7115 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
7119 if (any_large_regions)
7121 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
7122 PROP_DEATH_NOTES | PROP_REG_INFO);
7125 /* Reposition the prologue and epilogue notes in case we moved the
7126 prologue/epilogue insns. */
7127 if (reload_completed)
7128 reposition_prologue_and_epilogue_notes (get_insns ());
7130 /* Delete redundant line notes. */
7131 if (write_symbols != NO_DEBUG)
7132 rm_redundant_line_notes ();
7136 if (reload_completed == 0 && flag_schedule_interblock)
7138 fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n",
7146 fprintf (dump, "\n\n");
7150 end_alias_analysis ();
7152 if (true_dependency_cache)
7154 free (true_dependency_cache);
7155 true_dependency_cache = NULL;
7158 free (rgn_bb_table);
7160 free (containing_rgn);
7164 if (write_symbols != NO_DEBUG)
7165 free (line_note_head);
7184 sbitmap_free (blocks);
7185 sbitmap_free (large_region_blocks);
7187 free (deaths_in_region);
7190 #endif /* INSN_SCHEDULING */