1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "diagnostic-core.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
38 #include "sched-int.h"
40 #include "cfglayout.h"
48 #include "tree-pass.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops (1) whose control part can be easily
88 decoupled from the rest of the loop and (2) whose loop count can
89 be easily adjusted. This is because we peel a constant number of
90 iterations into a prologue and epilogue for which we want to avoid
91 emitting the control part, and a kernel which is to iterate that
92 constant number of iterations less than the original loop. So the
93 control part should be a set of insns clearly identified and having
94 its own iv, not otherwise used in the loop (at-least for now), which
95 initializes a register before the loop to the number of iterations.
96 Currently SMS relies on the do-loop pattern to recognize such loops,
97 where (1) the control part comprises of all insns defining and/or
98 using a certain 'count' register and (2) the loop count can be
99 adjusted by modifying this register prior to the loop.
100 TODO: Rely on cfgloop analysis instead. */
102 /* This page defines partial-schedule structures and functions for
103 modulo scheduling. */
105 typedef struct partial_schedule *partial_schedule_ptr;
106 typedef struct ps_insn *ps_insn_ptr;
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
114 /* Perform signed modulo, always returning a non-negative value. */
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
117 /* The number of different iterations the nodes in ps span, assuming
118 the stage boundaries are placed efficiently. */
119 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
121 /* The stage count of ps. */
122 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
124 /* A single instruction in the partial schedule. */
127 /* Identifies the instruction to be scheduled. Values smaller than
128 the ddg's num_nodes refer directly to ddg nodes. A value of
129 X - num_nodes refers to register move X. */
132 /* The (absolute) cycle in which the PS instruction is scheduled.
133 Same as SCHED_TIME (node). */
136 /* The next/prev PS_INSN in the same row. */
137 ps_insn_ptr next_in_row,
142 /* Information about a register move that has been added to a partial
144 struct ps_reg_move_info
146 /* The source of the move is defined by the ps_insn with id DEF.
147 The destination is used by the ps_insns with the ids in USES. */
151 /* The original form of USES' instructions used OLD_REG, but they
152 should now use NEW_REG. */
156 /* An instruction that sets NEW_REG to the correct value. The first
157 move associated with DEF will have an rhs of OLD_REG; later moves
158 use the result of the previous move. */
162 typedef struct ps_reg_move_info ps_reg_move_info;
163 DEF_VEC_O (ps_reg_move_info);
164 DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
166 /* Holds the partial schedule as an array of II rows. Each entry of the
167 array points to a linked list of PS_INSNs, which represents the
168 instructions that are scheduled for that row. */
169 struct partial_schedule
171 int ii; /* Number of rows in the partial schedule. */
172 int history; /* Threshold for conflict checking using DFA. */
174 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
177 /* All the moves added for this partial schedule. Index X has
178 a ps_insn id of X + g->num_nodes. */
179 VEC (ps_reg_move_info, heap) *reg_moves;
181 /* rows_length[i] holds the number of instructions in the row.
182 It is used only (as an optimization) to back off quickly from
183 trying to schedule a node in a full row; that is, to avoid running
184 through futile DFA state transitions. */
187 /* The earliest absolute cycle of an insn in the partial schedule. */
190 /* The latest absolute cycle of an insn in the partial schedule. */
193 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
195 int stage_count; /* The stage count of the partial schedule. */
199 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
200 static void free_partial_schedule (partial_schedule_ptr);
201 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
202 void print_partial_schedule (partial_schedule_ptr, FILE *);
203 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
204 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
205 int, int, sbitmap, sbitmap);
206 static void rotate_partial_schedule (partial_schedule_ptr, int);
207 void set_row_column_for_ps (partial_schedule_ptr);
208 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
209 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
212 /* This page defines constants and structures for the modulo scheduling
215 static int sms_order_nodes (ddg_ptr, int, int *, int *);
216 static void set_node_sched_params (ddg_ptr);
217 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
218 static void permute_partial_schedule (partial_schedule_ptr, rtx);
219 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
221 static void duplicate_insns_of_cycles (partial_schedule_ptr,
223 static int calculate_stage_count (partial_schedule_ptr, int);
224 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
225 int, int, sbitmap, sbitmap, sbitmap);
226 static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
227 sbitmap, int, int *, int *, int *);
228 static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
229 sbitmap, int *, sbitmap, sbitmap);
230 static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
232 #define NODE_ASAP(node) ((node)->aux.count)
234 #define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
235 #define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
236 #define SCHED_FIRST_REG_MOVE(x) (SCHED_PARAMS (x)->first_reg_move)
237 #define SCHED_NREG_MOVES(x) (SCHED_PARAMS (x)->nreg_moves)
238 #define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
239 #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
240 #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
242 /* The scheduling parameters held for each node. */
243 typedef struct node_sched_params
245 int time; /* The absolute scheduling cycle. */
247 /* The following field (first_reg_move) is the ps_insn id of the first
248 register-move instruction added to handle the modulo-variable-expansion
249 of the register defined by this node. This register-move copies the
250 original register defined by the node. */
253 /* The number of register-move instructions added. */
256 int row; /* Holds time % ii. */
257 int stage; /* Holds time / ii. */
259 /* The column of a node inside the ps. If nodes u, v are on the same row,
260 u will precede v if column (u) < column (v). */
262 } *node_sched_params_ptr;
264 typedef struct node_sched_params node_sched_params;
265 DEF_VEC_O (node_sched_params);
266 DEF_VEC_ALLOC_O (node_sched_params, heap);
268 /* The following three functions are copied from the current scheduler
269 code in order to use sched_analyze() for computing the dependencies.
270 They are used when initializing the sched_info structure. */
272 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
276 sprintf (tmp, "i%4d", INSN_UID (insn));
281 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
282 regset used ATTRIBUTE_UNUSED)
286 static struct common_sched_info_def sms_common_sched_info;
288 static struct sched_deps_info_def sms_sched_deps_info =
290 compute_jump_reg_dependencies,
291 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
296 static struct haifa_sched_info sms_sched_info =
305 NULL, /* insn_finishes_block_p */
310 NULL, NULL, NULL, NULL,
315 /* Partial schedule instruction ID in PS is a register move. Return
316 information about it. */
317 static struct ps_reg_move_info *
318 ps_reg_move (partial_schedule_ptr ps, int id)
320 gcc_checking_assert (id >= ps->g->num_nodes);
321 return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
324 /* Return the rtl instruction that is being scheduled by partial schedule
325 instruction ID, which belongs to schedule PS. */
327 ps_rtl_insn (partial_schedule_ptr ps, int id)
329 if (id < ps->g->num_nodes)
330 return ps->g->nodes[id].insn;
332 return ps_reg_move (ps, id)->insn;
335 /* Partial schedule instruction ID, which belongs to PS, occured in
336 the original (unscheduled) loop. Return the first instruction
337 in the loop that was associated with ps_rtl_insn (PS, ID).
338 If the instruction had some notes before it, this is the first
341 ps_first_note (partial_schedule_ptr ps, int id)
343 gcc_assert (id < ps->g->num_nodes);
344 return ps->g->nodes[id].first_note;
347 /* Given HEAD and TAIL which are the first and last insns in a loop;
348 return the register which controls the loop. Return zero if it has
349 more than one occurrence in the loop besides the control part or the
350 do-loop pattern is not of the form we expect. */
352 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
354 #ifdef HAVE_doloop_end
355 rtx reg, condition, insn, first_insn_not_to_check;
360 /* TODO: Free SMS's dependence on doloop_condition_get. */
361 condition = doloop_condition_get (tail);
365 if (REG_P (XEXP (condition, 0)))
366 reg = XEXP (condition, 0);
367 else if (GET_CODE (XEXP (condition, 0)) == PLUS
368 && REG_P (XEXP (XEXP (condition, 0), 0)))
369 reg = XEXP (XEXP (condition, 0), 0);
373 /* Check that the COUNT_REG has no other occurrences in the loop
374 until the decrement. We assume the control part consists of
375 either a single (parallel) branch-on-count or a (non-parallel)
376 branch immediately preceded by a single (decrement) insn. */
377 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
378 : prev_nondebug_insn (tail));
380 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
381 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
385 fprintf (dump_file, "SMS count_reg found ");
386 print_rtl_single (dump_file, reg);
387 fprintf (dump_file, " outside control in insn:\n");
388 print_rtl_single (dump_file, insn);
400 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
401 that the number of iterations is a compile-time constant. If so,
402 return the rtx that sets COUNT_REG to a constant, and set COUNT to
403 this constant. Otherwise return 0. */
405 const_iteration_count (rtx count_reg, basic_block pre_header,
406 HOST_WIDEST_INT * count)
414 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
416 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
417 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
418 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
420 rtx pat = single_set (insn);
422 if (CONST_INT_P (SET_SRC (pat)))
424 *count = INTVAL (SET_SRC (pat));
434 /* A very simple resource-based lower bound on the initiation interval.
435 ??? Improve the accuracy of this bound by considering the
436 utilization of various units. */
440 if (targetm.sched.sms_res_mii)
441 return targetm.sched.sms_res_mii (g);
443 return ((g->num_nodes - g->num_debug) / issue_rate);
447 /* A vector that contains the sched data for each ps_insn. */
448 static VEC (node_sched_params, heap) *node_sched_param_vec;
450 /* Allocate sched_params for each node and initialize it. */
452 set_node_sched_params (ddg_ptr g)
454 VEC_truncate (node_sched_params, node_sched_param_vec, 0);
455 VEC_safe_grow_cleared (node_sched_params, heap,
456 node_sched_param_vec, g->num_nodes);
460 print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
466 for (i = 0; i < num_nodes; i++)
468 node_sched_params_ptr nsp = SCHED_PARAMS (i);
471 fprintf (file, "Node = %d; INSN = %d\n", i,
472 INSN_UID (ps_rtl_insn (ps, i)));
473 fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
474 fprintf (file, " time = %d:\n", nsp->time);
475 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
476 for (j = 0; j < nsp->nreg_moves; j++)
478 ps_reg_move_info *move = ps_reg_move (ps, nsp->first_reg_move + j);
480 fprintf (file, " reg_move = ");
481 print_rtl_single (file, move->insn);
487 Breaking intra-loop register anti-dependences:
488 Each intra-loop register anti-dependence implies a cross-iteration true
489 dependence of distance 1. Therefore, we can remove such false dependencies
490 and figure out if the partial schedule broke them by checking if (for a
491 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
492 if so generate a register move. The number of such moves is equal to:
493 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
494 nreg_moves = ----------------------------------- + 1 - { dependence.
498 schedule_reg_moves (partial_schedule_ptr ps)
504 for (i = 0; i < g->num_nodes; i++)
506 ddg_node_ptr u = &g->nodes[i];
508 int nreg_moves = 0, i_reg_move;
509 rtx prev_reg, old_reg;
511 rtx set = single_set (u->insn);
513 /* Skip instructions that do not set a register. */
514 if ((set && !REG_P (SET_DEST (set))))
517 /* Compute the number of reg_moves needed for u, by looking at life
518 ranges started at u (excluding self-loops). */
519 for (e = u->out; e; e = e->next_out)
520 if (e->type == TRUE_DEP && e->dest != e->src)
522 int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
523 - SCHED_TIME (e->src->cuid)) / ii;
525 if (e->distance == 1)
526 nreg_moves4e = (SCHED_TIME (e->dest->cuid)
527 - SCHED_TIME (e->src->cuid) + ii) / ii;
529 /* If dest precedes src in the schedule of the kernel, then dest
530 will read before src writes and we can save one reg_copy. */
531 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
532 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
535 if (nreg_moves4e >= 1)
537 /* !single_set instructions are not supported yet and
538 thus we do not except to encounter them in the loop
539 except from the doloop part. For the latter case
540 we assume no regmoves are generated as the doloop
541 instructions are tied to the branch with an edge. */
543 /* If the instruction contains auto-inc register then
544 validate that the regmov is being generated for the
545 target regsiter rather then the inc'ed register. */
546 gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
549 nreg_moves = MAX (nreg_moves, nreg_moves4e);
555 /* Create NREG_MOVES register moves. */
556 first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
557 VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
558 first_move + nreg_moves);
560 /* Record the moves associated with this node. */
561 first_move += ps->g->num_nodes;
562 SCHED_FIRST_REG_MOVE (i) = first_move;
563 SCHED_NREG_MOVES (i) = nreg_moves;
565 /* Generate each move. */
566 old_reg = prev_reg = SET_DEST (single_set (u->insn));
567 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
569 ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
571 move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
572 move->uses = sbitmap_alloc (g->num_nodes);
573 move->old_reg = old_reg;
574 move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
575 move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
576 sbitmap_zero (move->uses);
578 prev_reg = move->new_reg;
581 /* Every use of the register defined by node may require a different
582 copy of this register, depending on the time the use is scheduled.
583 Record which uses require which move results. */
584 for (e = u->out; e; e = e->next_out)
585 if (e->type == TRUE_DEP && e->dest != e->src)
587 int dest_copy = (SCHED_TIME (e->dest->cuid)
588 - SCHED_TIME (e->src->cuid)) / ii;
590 if (e->distance == 1)
591 dest_copy = (SCHED_TIME (e->dest->cuid)
592 - SCHED_TIME (e->src->cuid) + ii) / ii;
594 if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
595 && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
600 ps_reg_move_info *move;
602 move = ps_reg_move (ps, first_move + dest_copy - 1);
603 SET_BIT (move->uses, e->dest->cuid);
610 /* Emit the moves associatied with PS. Apply the substitutions
611 associated with them. */
613 apply_reg_moves (partial_schedule_ptr ps)
615 ps_reg_move_info *move;
618 FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
621 sbitmap_iterator sbi;
623 EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
625 replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
626 df_insn_rescan (ps->g->nodes[i_use].insn);
630 FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
631 add_insn_before (move->insn, ps_first_note (ps, move->def), NULL);
634 /* Update the sched_params (time, row and stage) for node U using the II,
635 the CYCLE of U and MIN_CYCLE.
636 We're not simply taking the following
637 SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
638 because the stages may not be aligned on cycle 0. */
640 update_node_sched_params (int u, int ii, int cycle, int min_cycle)
642 int sc_until_cycle_zero;
645 SCHED_TIME (u) = cycle;
646 SCHED_ROW (u) = SMODULO (cycle, ii);
648 /* The calculation of stage count is done adding the number
649 of stages before cycle zero and after cycle zero. */
650 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
652 if (SCHED_TIME (u) < 0)
654 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
655 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
659 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
660 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
664 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
665 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
666 will move to cycle zero. */
668 reset_sched_times (partial_schedule_ptr ps, int amount)
672 ps_insn_ptr crr_insn;
674 for (row = 0; row < ii; row++)
675 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
677 int u = crr_insn->id;
678 int normalized_time = SCHED_TIME (u) - amount;
679 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
683 /* Print the scheduling times after the rotation. */
684 rtx insn = ps_rtl_insn (ps, u);
686 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
687 "crr_insn->cycle=%d, min_cycle=%d", u,
688 INSN_UID (insn), normalized_time, new_min_cycle);
690 fprintf (dump_file, " (branch)");
691 fprintf (dump_file, "\n");
694 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
695 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
697 crr_insn->cycle = normalized_time;
698 update_node_sched_params (u, ii, normalized_time, new_min_cycle);
702 /* Set SCHED_COLUMN of each node according to its position in PS. */
704 set_columns_for_ps (partial_schedule_ptr ps)
708 for (row = 0; row < ps->ii; row++)
710 ps_insn_ptr cur_insn = ps->rows[row];
713 for (; cur_insn; cur_insn = cur_insn->next_in_row)
714 SCHED_COLUMN (cur_insn->id) = column++;
718 /* Permute the insns according to their order in PS, from row 0 to
719 row ii-1, and position them right before LAST. This schedules
720 the insns of the loop kernel. */
722 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
728 for (row = 0; row < ii ; row++)
729 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
731 rtx insn = ps_rtl_insn (ps, ps_ij->id);
733 if (PREV_INSN (last) != insn)
734 reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
739 /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
740 respectively only if cycle C falls on the border of the scheduling
741 window boundaries marked by START and END cycles. STEP is the
742 direction of the window. */
744 set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
745 sbitmap *tmp_precede, sbitmap must_precede, int c,
746 int start, int end, int step)
754 *tmp_precede = must_precede;
755 else /* step == -1. */
756 *tmp_follow = must_follow;
761 *tmp_follow = must_follow;
762 else /* step == -1. */
763 *tmp_precede = must_precede;
768 /* Return True if the branch can be moved to row ii-1 while
769 normalizing the partial schedule PS to start from cycle zero and thus
770 optimize the SC. Otherwise return False. */
772 optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
774 int amount = PS_MIN_CYCLE (ps);
775 sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
776 int start, end, step;
779 int stage_count, stage_count_curr;
781 /* Compare the SC after normalization and SC after bringing the branch
782 to row ii-1. If they are equal just bail out. */
783 stage_count = calculate_stage_count (ps, amount);
785 calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
787 if (stage_count == stage_count_curr)
790 fprintf (dump_file, "SMS SC already optimized.\n");
798 fprintf (dump_file, "SMS Trying to optimize branch location\n");
799 fprintf (dump_file, "SMS partial schedule before trial:\n");
800 print_partial_schedule (ps, dump_file);
803 /* First, normalize the partial scheduling. */
804 reset_sched_times (ps, amount);
805 rotate_partial_schedule (ps, amount);
809 "SMS partial schedule after normalization (ii, %d, SC %d):\n",
811 print_partial_schedule (ps, dump_file);
814 if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
820 sbitmap_ones (sched_nodes);
822 /* Calculate the new placement of the branch. It should be in row
823 ii-1 and fall into it's scheduling window. */
824 if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
828 ps_insn_ptr next_ps_i;
829 int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
830 int row = SMODULO (branch_cycle, ps->ii);
832 sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
836 fprintf (dump_file, "\nTrying to schedule node %d "
837 "INSN = %d in (%d .. %d) step %d\n",
838 g->closing_branch->cuid,
839 (INSN_UID (g->closing_branch->insn)), start, end, step);
841 gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
844 c = start + ii - SMODULO (start, ii) - 1;
845 gcc_assert (c >= start);
851 "SMS failed to schedule branch at cycle: %d\n", c);
857 c = start - SMODULO (start, ii) - 1;
858 gcc_assert (c <= start);
864 "SMS failed to schedule branch at cycle: %d\n", c);
870 must_precede = sbitmap_alloc (g->num_nodes);
871 must_follow = sbitmap_alloc (g->num_nodes);
873 /* Try to schedule the branch is it's new cycle. */
874 calculate_must_precede_follow (g->closing_branch, start, end,
875 step, ii, sched_nodes,
876 must_precede, must_follow);
878 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
879 must_precede, c, start, end, step);
881 /* Find the element in the partial schedule related to the closing
882 branch so we can remove it from it's current cycle. */
883 for (next_ps_i = ps->rows[row];
884 next_ps_i; next_ps_i = next_ps_i->next_in_row)
885 if (next_ps_i->id == g->closing_branch->cuid)
888 remove_node_from_ps (ps, next_ps_i);
890 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
891 sched_nodes, &num_splits,
892 tmp_precede, tmp_follow);
893 gcc_assert (num_splits == 0);
898 "SMS failed to schedule branch at cycle: %d, "
899 "bringing it back to cycle %d\n", c, branch_cycle);
901 /* The branch was failed to be placed in row ii - 1.
902 Put it back in it's original place in the partial
904 set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
905 must_precede, branch_cycle, start, end,
908 try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
909 branch_cycle, sched_nodes,
910 &num_splits, tmp_precede,
912 gcc_assert (success && (num_splits == 0));
917 /* The branch is placed in row ii - 1. */
920 "SMS success in moving branch to cycle %d\n", c);
922 update_node_sched_params (g->closing_branch->cuid, ii, c,
937 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
938 int to_stage, int for_prolog, rtx count_reg)
943 for (row = 0; row < ps->ii; row++)
944 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
947 int j, i_reg_moves, i_reg_move;
950 /* Do not duplicate any insn which refers to count_reg as it
951 belongs to the control part.
952 The closing branch is scheduled as well and thus should
954 TODO: This should be done by analyzing the control part of
956 u_insn = ps_rtl_insn (ps, u);
957 if (reg_mentioned_p (count_reg, u_insn)
963 /* SCHED_STAGE (u) >= from_stage == 0. Generate increasing
964 number of reg_moves starting with the second occurrence of
965 u, which is generated if its SCHED_STAGE <= to_stage. */
966 i_reg_moves = to_stage - SCHED_STAGE (u) + 1;
967 i_reg_moves = MAX (i_reg_moves, 0);
968 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u));
970 /* The reg_moves start from the *first* reg_move backwards. */
971 i_reg_move = SCHED_FIRST_REG_MOVE (u) + (i_reg_moves - 1);
973 else /* It's for the epilog. */
975 /* SCHED_STAGE (u) <= to_stage. Generate all reg_moves,
976 starting to decrease one stage after u no longer occurs;
977 that is, generate all reg_moves until
978 SCHED_STAGE (u) == from_stage - 1. */
979 i_reg_moves = (SCHED_NREG_MOVES (u)
980 - (from_stage - SCHED_STAGE (u) - 1));
981 i_reg_moves = MAX (i_reg_moves, 0);
982 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u));
984 /* The reg_moves start from the *last* reg_move forwards. */
985 i_reg_move = SCHED_FIRST_REG_MOVE (u) + (SCHED_NREG_MOVES (u) - 1);
988 for (j = 0; j < i_reg_moves; j++)
990 ps_reg_move_info *move = ps_reg_move (ps, i_reg_move - j);
992 emit_insn (copy_rtx (PATTERN (move->insn)));
994 if (SCHED_STAGE (u) >= from_stage
995 && SCHED_STAGE (u) <= to_stage)
996 duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1001 /* Generate the instructions (including reg_moves) for prolog & epilog. */
1003 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1004 rtx count_reg, rtx count_init)
1007 int last_stage = PS_STAGE_COUNT (ps) - 1;
1010 /* Generate the prolog, inserting its insns on the loop-entry edge. */
1015 /* Generate instructions at the beginning of the prolog to
1016 adjust the loop count by STAGE_COUNT. If loop count is constant
1017 (count_init), this constant is adjusted by STAGE_COUNT in
1018 generate_prolog_epilog function. */
1019 rtx sub_reg = NULL_RTX;
1021 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
1022 count_reg, GEN_INT (last_stage),
1023 count_reg, 1, OPTAB_DIRECT);
1024 gcc_assert (REG_P (sub_reg));
1025 if (REGNO (sub_reg) != REGNO (count_reg))
1026 emit_move_insn (count_reg, sub_reg);
1029 for (i = 0; i < last_stage; i++)
1030 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
1032 /* Put the prolog on the entry edge. */
1033 e = loop_preheader_edge (loop);
1034 split_edge_and_insert (e, get_insns ());
1038 /* Generate the epilog, inserting its insns on the loop-exit edge. */
1041 for (i = 0; i < last_stage; i++)
1042 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
1044 /* Put the epilogue on the exit edge. */
1045 gcc_assert (single_exit (loop));
1046 e = single_exit (loop);
1047 split_edge_and_insert (e, get_insns ());
1051 /* Return true if all the BBs of the loop are empty except the
1054 loop_single_full_bb_p (struct loop *loop)
1057 basic_block *bbs = get_loop_body (loop);
1059 for (i = 0; i < loop->num_nodes ; i++)
1062 bool empty_bb = true;
1064 if (bbs[i] == loop->header)
1067 /* Make sure that basic blocks other than the header
1068 have only notes labels or jumps. */
1069 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1070 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1072 if (NOTE_P (head) || LABEL_P (head)
1073 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1089 /* A simple loop from SMS point of view; it is a loop that is composed of
1090 either a single basic block or two BBs - a header and a latch. */
1091 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
1092 && (EDGE_COUNT (loop->latch->preds) == 1) \
1093 && (EDGE_COUNT (loop->latch->succs) == 1))
1095 /* Return true if the loop is in its canonical form and false if not.
1096 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
1098 loop_canon_p (struct loop *loop)
1101 if (loop->inner || !loop_outer (loop))
1104 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1108 if (!single_exit (loop))
1112 rtx insn = BB_END (loop->header);
1114 fprintf (dump_file, "SMS loop many exits ");
1115 fprintf (dump_file, " %s %d (file, line)\n",
1116 insn_file (insn), insn_line (insn));
1121 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1125 rtx insn = BB_END (loop->header);
1127 fprintf (dump_file, "SMS loop many BBs. ");
1128 fprintf (dump_file, " %s %d (file, line)\n",
1129 insn_file (insn), insn_line (insn));
1137 /* If there are more than one entry for the loop,
1138 make it one by splitting the first entry edge and
1139 redirecting the others to the new BB. */
1141 canon_loop (struct loop *loop)
1146 /* Avoid annoying special cases of edges going to exit
1148 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
1149 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1152 if (loop->latch == loop->header
1153 || EDGE_COUNT (loop->latch->succs) > 1)
1155 FOR_EACH_EDGE (e, i, loop->header->preds)
1156 if (e->src == loop->latch)
1164 setup_sched_infos (void)
1166 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1167 sizeof (sms_common_sched_info));
1168 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1169 common_sched_info = &sms_common_sched_info;
1171 sched_deps_info = &sms_sched_deps_info;
1172 current_sched_info = &sms_sched_info;
1175 /* Probability in % that the sms-ed loop rolls enough so that optimized
1176 version may be entered. Just a guess. */
1177 #define PROB_SMS_ENOUGH_ITERATIONS 80
1179 /* Used to calculate the upper bound of ii. */
1180 #define MAXII_FACTOR 2
1182 /* Main entry point, perform SMS scheduling on the loops of the function
1183 that consist of single basic blocks. */
1190 int maxii, max_asap;
1192 partial_schedule_ptr ps;
1193 basic_block bb = NULL;
1195 basic_block condition_bb = NULL;
1197 gcov_type trip_count = 0;
1199 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1200 | LOOPS_HAVE_RECORDED_EXITS);
1201 if (number_of_loops () <= 1)
1203 loop_optimizer_finalize ();
1204 return; /* There are no loops to schedule. */
1207 /* Initialize issue_rate. */
1208 if (targetm.sched.issue_rate)
1210 int temp = reload_completed;
1212 reload_completed = 1;
1213 issue_rate = targetm.sched.issue_rate ();
1214 reload_completed = temp;
1219 /* Initialize the scheduler. */
1220 setup_sched_infos ();
1221 haifa_sched_init ();
1223 /* Allocate memory to hold the DDG array one entry for each loop.
1224 We use loop->num as index into this array. */
1225 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
1229 fprintf (dump_file, "\n\nSMS analysis phase\n");
1230 fprintf (dump_file, "===================\n\n");
1233 /* Build DDGs for all the relevant loops and hold them in G_ARR
1234 indexed by the loop index. */
1235 FOR_EACH_LOOP (li, loop, 0)
1240 /* For debugging. */
1241 if (dbg_cnt (sms_sched_loop) == false)
1244 fprintf (dump_file, "SMS reached max limit... \n");
1251 rtx insn = BB_END (loop->header);
1253 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1254 loop->num, insn_file (insn), insn_line (insn));
1258 if (! loop_canon_p (loop))
1261 if (! loop_single_full_bb_p (loop))
1264 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1270 get_ebb_head_tail (bb, bb, &head, &tail);
1271 latch_edge = loop_latch_edge (loop);
1272 gcc_assert (single_exit (loop));
1273 if (single_exit (loop)->count)
1274 trip_count = latch_edge->count / single_exit (loop)->count;
1276 /* Perform SMS only on loops that their average count is above threshold. */
1278 if ( latch_edge->count
1279 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1283 fprintf (dump_file, " %s %d (file, line)\n",
1284 insn_file (tail), insn_line (tail));
1285 fprintf (dump_file, "SMS single-bb-loop\n");
1286 if (profile_info && flag_branch_probabilities)
1288 fprintf (dump_file, "SMS loop-count ");
1289 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1290 (HOST_WIDEST_INT) bb->count);
1291 fprintf (dump_file, "\n");
1292 fprintf (dump_file, "SMS trip-count ");
1293 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1294 (HOST_WIDEST_INT) trip_count);
1295 fprintf (dump_file, "\n");
1296 fprintf (dump_file, "SMS profile-sum-max ");
1297 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1298 (HOST_WIDEST_INT) profile_info->sum_max);
1299 fprintf (dump_file, "\n");
1305 /* Make sure this is a doloop. */
1306 if ( !(count_reg = doloop_register_get (head, tail)))
1309 fprintf (dump_file, "SMS doloop_register_get failed\n");
1313 /* Don't handle BBs with calls or barriers
1314 or !single_set with the exception of instructions that include
1315 count_reg---these instructions are part of the control part
1316 that do-loop recognizes.
1317 ??? Should handle insns defining subregs. */
1318 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1324 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1325 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1326 && !reg_mentioned_p (count_reg, insn))
1327 || (INSN_P (insn) && (set = single_set (insn))
1328 && GET_CODE (SET_DEST (set)) == SUBREG))
1332 if (insn != NEXT_INSN (tail))
1337 fprintf (dump_file, "SMS loop-with-call\n");
1338 else if (BARRIER_P (insn))
1339 fprintf (dump_file, "SMS loop-with-barrier\n");
1340 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1341 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1342 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1344 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1345 print_rtl_single (dump_file, insn);
1351 /* Always schedule the closing branch with the rest of the
1352 instructions. The branch is rotated to be in row ii-1 at the
1353 end of the scheduling procedure to make sure it's the last
1354 instruction in the iteration. */
1355 if (! (g = create_ddg (bb, 1)))
1358 fprintf (dump_file, "SMS create_ddg failed\n");
1362 g_arr[loop->num] = g;
1364 fprintf (dump_file, "...OK\n");
1369 fprintf (dump_file, "\nSMS transformation phase\n");
1370 fprintf (dump_file, "=========================\n\n");
1373 /* We don't want to perform SMS on new loops - created by versioning. */
1374 FOR_EACH_LOOP (li, loop, 0)
1377 rtx count_reg, count_init;
1379 unsigned stage_count;
1380 HOST_WIDEST_INT loop_count = 0;
1383 if (! (g = g_arr[loop->num]))
1388 rtx insn = BB_END (loop->header);
1390 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1391 loop->num, insn_file (insn), insn_line (insn));
1393 print_ddg (dump_file, g);
1396 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1398 latch_edge = loop_latch_edge (loop);
1399 gcc_assert (single_exit (loop));
1400 if (single_exit (loop)->count)
1401 trip_count = latch_edge->count / single_exit (loop)->count;
1405 fprintf (dump_file, " %s %d (file, line)\n",
1406 insn_file (tail), insn_line (tail));
1407 fprintf (dump_file, "SMS single-bb-loop\n");
1408 if (profile_info && flag_branch_probabilities)
1410 fprintf (dump_file, "SMS loop-count ");
1411 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1412 (HOST_WIDEST_INT) bb->count);
1413 fprintf (dump_file, "\n");
1414 fprintf (dump_file, "SMS profile-sum-max ");
1415 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1416 (HOST_WIDEST_INT) profile_info->sum_max);
1417 fprintf (dump_file, "\n");
1419 fprintf (dump_file, "SMS doloop\n");
1420 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1421 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1422 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1426 /* In case of th loop have doloop register it gets special
1428 count_init = NULL_RTX;
1429 if ((count_reg = doloop_register_get (head, tail)))
1431 basic_block pre_header;
1433 pre_header = loop_preheader_edge (loop)->src;
1434 count_init = const_iteration_count (count_reg, pre_header,
1437 gcc_assert (count_reg);
1439 if (dump_file && count_init)
1441 fprintf (dump_file, "SMS const-doloop ");
1442 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1444 fprintf (dump_file, "\n");
1447 node_order = XNEWVEC (int, g->num_nodes);
1449 mii = 1; /* Need to pass some estimate of mii. */
1450 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1451 mii = MAX (res_MII (g), rec_mii);
1452 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1455 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1456 rec_mii, mii, maxii);
1460 set_node_sched_params (g);
1464 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1468 /* Try to achieve optimized SC by normalizing the partial
1469 schedule (having the cycles start from cycle zero).
1470 The branch location must be placed in row ii-1 in the
1471 final scheduling. If failed, shift all instructions to
1472 position the branch in row ii-1. */
1473 opt_sc_p = optimize_sc (ps, g);
1475 stage_count = calculate_stage_count (ps, 0);
1478 /* Bring the branch to cycle ii-1. */
1479 int amount = (SCHED_TIME (g->closing_branch->cuid)
1483 fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1485 stage_count = calculate_stage_count (ps, amount);
1488 gcc_assert (stage_count >= 1);
1489 PS_STAGE_COUNT (ps) = stage_count;
1492 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1493 1 means that there is no interleaving between iterations thus
1494 we let the scheduling passes do the job in this case. */
1495 if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1496 || (count_init && (loop_count <= stage_count))
1497 || (flag_branch_probabilities && (trip_count <= stage_count)))
1501 fprintf (dump_file, "SMS failed... \n");
1502 fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1503 " loop-count=", stage_count);
1504 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1505 fprintf (dump_file, ", trip-count=");
1506 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1507 fprintf (dump_file, ")\n");
1514 /* Rotate the partial schedule to have the branch in row ii-1. */
1515 int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1517 reset_sched_times (ps, amount);
1518 rotate_partial_schedule (ps, amount);
1521 set_columns_for_ps (ps);
1523 if (!schedule_reg_moves (ps))
1526 free_partial_schedule (ps);
1535 "%s:%d SMS succeeded %d %d (with ii, sc)\n",
1536 insn_file (tail), insn_line (tail), ps->ii, stage_count);
1537 print_partial_schedule (ps, dump_file);
1540 /* case the BCT count is not known , Do loop-versioning */
1541 if (count_reg && ! count_init)
1543 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1544 GEN_INT(stage_count));
1545 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1546 * REG_BR_PROB_BASE) / 100;
1548 loop_version (loop, comp_rtx, &condition_bb,
1549 prob, prob, REG_BR_PROB_BASE - prob,
1553 /* Set new iteration count of loop kernel. */
1554 if (count_reg && count_init)
1555 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1558 /* Now apply the scheduled kernel to the RTL of the loop. */
1559 permute_partial_schedule (ps, g->closing_branch->first_note);
1561 /* Mark this loop as software pipelined so the later
1562 scheduling passes doesn't touch it. */
1563 if (! flag_resched_modulo_sched)
1564 g->bb->flags |= BB_DISABLE_SCHEDULE;
1565 /* The life-info is not valid any more. */
1566 df_set_bb_dirty (g->bb);
1568 apply_reg_moves (ps);
1570 print_node_sched_params (dump_file, g->num_nodes, ps);
1571 /* Generate prolog and epilog. */
1572 generate_prolog_epilog (ps, loop, count_reg, count_init);
1576 free_partial_schedule (ps);
1577 VEC_free (node_sched_params, heap, node_sched_param_vec);
1584 /* Release scheduler data, needed until now because of DFA. */
1585 haifa_sched_finish ();
1586 loop_optimizer_finalize ();
1589 /* The SMS scheduling algorithm itself
1590 -----------------------------------
1591 Input: 'O' an ordered list of insns of a loop.
1592 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1594 'Q' is the empty Set
1595 'PS' is the partial schedule; it holds the currently scheduled nodes with
1597 'PSP' previously scheduled predecessors.
1598 'PSS' previously scheduled successors.
1599 't(u)' the cycle where u is scheduled.
1600 'l(u)' is the latency of u.
1601 'd(v,u)' is the dependence distance from v to u.
1602 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1603 the node ordering phase.
1604 'check_hardware_resources_conflicts(u, PS, c)'
1605 run a trace around cycle/slot through DFA model
1606 to check resource conflicts involving instruction u
1607 at cycle c given the partial schedule PS.
1608 'add_to_partial_schedule_at_time(u, PS, c)'
1609 Add the node/instruction u to the partial schedule
1611 'calculate_register_pressure(PS)'
1612 Given a schedule of instructions, calculate the register
1613 pressure it implies. One implementation could be the
1614 maximum number of overlapping live ranges.
1615 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1616 registers available in the hardware.
1620 3. for each node u in O in pre-computed order
1621 4. if (PSP(u) != Q && PSS(u) == Q) then
1622 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1623 6. start = Early_start; end = Early_start + II - 1; step = 1
1624 11. else if (PSP(u) == Q && PSS(u) != Q) then
1625 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1626 13. start = Late_start; end = Late_start - II + 1; step = -1
1627 14. else if (PSP(u) != Q && PSS(u) != Q) then
1628 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1629 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1630 17. start = Early_start;
1631 18. end = min(Early_start + II - 1 , Late_start);
1633 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1634 21. start = ASAP(u); end = start + II - 1; step = 1
1638 24. for (c = start ; c != end ; c += step)
1639 25. if check_hardware_resources_conflicts(u, PS, c) then
1640 26. add_to_partial_schedule_at_time(u, PS, c)
1645 31. if (success == false) then
1647 33. if (II > maxII) then
1648 34. finish - failed to schedule
1653 39. if (calculate_register_pressure(PS) > maxRP) then
1656 42. compute epilogue & prologue
1657 43. finish - succeeded to schedule
1660 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1661 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1662 set to 0 to save compile time. */
1663 #define DFA_HISTORY SMS_DFA_HISTORY
1665 /* A threshold for the number of repeated unsuccessful attempts to insert
1666 an empty row, before we flush the partial schedule and start over. */
1667 #define MAX_SPLIT_NUM 10
1668 /* Given the partial schedule PS, this function calculates and returns the
1669 cycles in which we can schedule the node with the given index I.
1670 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1671 noticed that there are several cases in which we fail to SMS the loop
1672 because the sched window of a node is empty due to tight data-deps. In
1673 such cases we want to unschedule some of the predecessors/successors
1674 until we get non-empty scheduling window. It returns -1 if the
1675 scheduling window is empty and zero otherwise. */
1678 get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1679 sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1682 int start, step, end;
1683 int early_start, late_start;
1685 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1686 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1687 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1688 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1694 /* 1. compute sched window for u (start, end, step). */
1697 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1698 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1700 /* We first compute a forward range (start <= end), then decide whether
1702 early_start = INT_MIN;
1703 late_start = INT_MAX;
1711 if (dump_file && (psp_not_empty || pss_not_empty))
1713 fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1714 "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1715 fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1716 "start", "early start", "late start", "end", "time");
1717 fprintf (dump_file, "=========== =========== =========== ==========="
1720 /* Calculate early_start and limit end. Both bounds are inclusive. */
1722 for (e = u_node->in; e != 0; e = e->next_in)
1724 int v = e->src->cuid;
1726 if (TEST_BIT (sched_nodes, v))
1728 int p_st = SCHED_TIME (v);
1729 int earliest = p_st + e->latency - (e->distance * ii);
1730 int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1734 fprintf (dump_file, "%11s %11d %11s %11d %5d",
1735 "", earliest, "", latest, p_st);
1736 print_ddg_edge (dump_file, e);
1737 fprintf (dump_file, "\n");
1740 early_start = MAX (early_start, earliest);
1741 end = MIN (end, latest);
1743 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1748 /* Calculate late_start and limit start. Both bounds are inclusive. */
1750 for (e = u_node->out; e != 0; e = e->next_out)
1752 int v = e->dest->cuid;
1754 if (TEST_BIT (sched_nodes, v))
1756 int s_st = SCHED_TIME (v);
1757 int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1758 int latest = s_st - e->latency + (e->distance * ii);
1762 fprintf (dump_file, "%11d %11s %11d %11s %5d",
1763 earliest, "", latest, "", s_st);
1764 print_ddg_edge (dump_file, e);
1765 fprintf (dump_file, "\n");
1768 start = MAX (start, earliest);
1769 late_start = MIN (late_start, latest);
1771 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1776 if (dump_file && (psp_not_empty || pss_not_empty))
1778 fprintf (dump_file, "----------- ----------- ----------- -----------"
1780 fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1781 start, early_start, late_start, end, "",
1782 "(max, max, min, min)");
1785 /* Get a target scheduling window no bigger than ii. */
1786 if (early_start == INT_MIN && late_start == INT_MAX)
1787 early_start = NODE_ASAP (u_node);
1788 else if (early_start == INT_MIN)
1789 early_start = late_start - (ii - 1);
1790 late_start = MIN (late_start, early_start + (ii - 1));
1792 /* Apply memory dependence limits. */
1793 start = MAX (start, early_start);
1794 end = MIN (end, late_start);
1796 if (dump_file && (psp_not_empty || pss_not_empty))
1797 fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1798 "", start, end, "", "");
1800 /* If there are at least as many successors as predecessors, schedule the
1801 node close to its successors. */
1802 if (pss_not_empty && count_succs >= count_preds)
1810 /* Now that we've finalized the window, make END an exclusive rather
1811 than an inclusive bound. */
1820 if ((start >= end && step == 1) || (start <= end && step == -1))
1823 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1831 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1832 node currently been scheduled. At the end of the calculation
1833 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1834 U_NODE which are (1) already scheduled in the first/last row of
1835 U_NODE's scheduling window, (2) whose dependence inequality with U
1836 becomes an equality when U is scheduled in this same row, and (3)
1837 whose dependence latency is zero.
1839 The first and last rows are calculated using the following parameters:
1840 START/END rows - The cycles that begins/ends the traversal on the window;
1841 searching for an empty cycle to schedule U_NODE.
1842 STEP - The direction in which we traverse the window.
1843 II - The initiation interval. */
1846 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1847 int step, int ii, sbitmap sched_nodes,
1848 sbitmap must_precede, sbitmap must_follow)
1851 int first_cycle_in_window, last_cycle_in_window;
1853 gcc_assert (must_precede && must_follow);
1855 /* Consider the following scheduling window:
1856 {first_cycle_in_window, first_cycle_in_window+1, ...,
1857 last_cycle_in_window}. If step is 1 then the following will be
1858 the order we traverse the window: {start=first_cycle_in_window,
1859 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1860 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1861 end=first_cycle_in_window-1} if step is -1. */
1862 first_cycle_in_window = (step == 1) ? start : end - step;
1863 last_cycle_in_window = (step == 1) ? end - step : start;
1865 sbitmap_zero (must_precede);
1866 sbitmap_zero (must_follow);
1869 fprintf (dump_file, "\nmust_precede: ");
1871 /* Instead of checking if:
1872 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1873 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1874 first_cycle_in_window)
1876 we use the fact that latency is non-negative:
1877 SCHED_TIME (e->src) - (e->distance * ii) <=
1878 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1879 first_cycle_in_window
1881 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1882 for (e = u_node->in; e != 0; e = e->next_in)
1883 if (TEST_BIT (sched_nodes, e->src->cuid)
1884 && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
1885 first_cycle_in_window))
1888 fprintf (dump_file, "%d ", e->src->cuid);
1890 SET_BIT (must_precede, e->src->cuid);
1894 fprintf (dump_file, "\nmust_follow: ");
1896 /* Instead of checking if:
1897 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1898 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1899 last_cycle_in_window)
1901 we use the fact that latency is non-negative:
1902 SCHED_TIME (e->dest) + (e->distance * ii) >=
1903 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1904 last_cycle_in_window
1906 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1907 for (e = u_node->out; e != 0; e = e->next_out)
1908 if (TEST_BIT (sched_nodes, e->dest->cuid)
1909 && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
1910 last_cycle_in_window))
1913 fprintf (dump_file, "%d ", e->dest->cuid);
1915 SET_BIT (must_follow, e->dest->cuid);
1919 fprintf (dump_file, "\n");
1922 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1923 parameters to decide if that's possible:
1924 PS - The partial schedule.
1925 U - The serial number of U_NODE.
1926 NUM_SPLITS - The number of row splits made so far.
1927 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1928 the first row of the scheduling window)
1929 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1930 last row of the scheduling window) */
1933 try_scheduling_node_in_cycle (partial_schedule_ptr ps,
1934 int u, int cycle, sbitmap sched_nodes,
1935 int *num_splits, sbitmap must_precede,
1936 sbitmap must_follow)
1941 verify_partial_schedule (ps, sched_nodes);
1942 psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
1945 SCHED_TIME (u) = cycle;
1946 SET_BIT (sched_nodes, u);
1950 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1957 /* This function implements the scheduling algorithm for SMS according to the
1959 static partial_schedule_ptr
1960 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1963 int i, c, success, num_splits = 0;
1964 int flush_and_start_over = true;
1965 int num_nodes = g->num_nodes;
1966 int start, end, step; /* Place together into one struct? */
1967 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1968 sbitmap must_precede = sbitmap_alloc (num_nodes);
1969 sbitmap must_follow = sbitmap_alloc (num_nodes);
1970 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1972 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1974 sbitmap_ones (tobe_scheduled);
1975 sbitmap_zero (sched_nodes);
1977 while (flush_and_start_over && (ii < maxii))
1981 fprintf (dump_file, "Starting with ii=%d\n", ii);
1982 flush_and_start_over = false;
1983 sbitmap_zero (sched_nodes);
1985 for (i = 0; i < num_nodes; i++)
1987 int u = nodes_order[i];
1988 ddg_node_ptr u_node = &ps->g->nodes[u];
1989 rtx insn = u_node->insn;
1991 if (!NONDEBUG_INSN_P (insn))
1993 RESET_BIT (tobe_scheduled, u);
1997 if (TEST_BIT (sched_nodes, u))
2000 /* Try to get non-empty scheduling window. */
2002 if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2006 fprintf (dump_file, "\nTrying to schedule node %d "
2007 "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
2008 (g->nodes[u].insn)), start, end, step);
2010 gcc_assert ((step > 0 && start < end)
2011 || (step < 0 && start > end));
2013 calculate_must_precede_follow (u_node, start, end, step, ii,
2014 sched_nodes, must_precede,
2017 for (c = start; c != end; c += step)
2019 sbitmap tmp_precede, tmp_follow;
2021 set_must_precede_follow (&tmp_follow, must_follow,
2022 &tmp_precede, must_precede,
2023 c, start, end, step);
2025 try_scheduling_node_in_cycle (ps, u, c,
2027 &num_splits, tmp_precede,
2033 verify_partial_schedule (ps, sched_nodes);
2042 if (num_splits >= MAX_SPLIT_NUM)
2045 flush_and_start_over = true;
2046 verify_partial_schedule (ps, sched_nodes);
2047 reset_partial_schedule (ps, ii);
2048 verify_partial_schedule (ps, sched_nodes);
2053 /* The scheduling window is exclusive of 'end'
2054 whereas compute_split_window() expects an inclusive,
2057 split_row = compute_split_row (sched_nodes, start, end - 1,
2060 split_row = compute_split_row (sched_nodes, end + 1, start,
2063 ps_insert_empty_row (ps, split_row, sched_nodes);
2064 i--; /* Go back and retry node i. */
2067 fprintf (dump_file, "num_splits=%d\n", num_splits);
2070 /* ??? If (success), check register pressure estimates. */
2071 } /* Continue with next node. */
2072 } /* While flush_and_start_over. */
2075 free_partial_schedule (ps);
2079 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
2081 sbitmap_free (sched_nodes);
2082 sbitmap_free (must_precede);
2083 sbitmap_free (must_follow);
2084 sbitmap_free (tobe_scheduled);
2089 /* This function inserts a new empty row into PS at the position
2090 according to SPLITROW, keeping all already scheduled instructions
2091 intact and updating their SCHED_TIME and cycle accordingly. */
2093 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2094 sbitmap sched_nodes)
2096 ps_insn_ptr crr_insn;
2097 ps_insn_ptr *rows_new;
2099 int new_ii = ii + 1;
2101 int *rows_length_new;
2103 verify_partial_schedule (ps, sched_nodes);
2105 /* We normalize sched_time and rotate ps to have only non-negative sched
2106 times, for simplicity of updating cycles after inserting new row. */
2107 split_row -= ps->min_cycle;
2108 split_row = SMODULO (split_row, ii);
2110 fprintf (dump_file, "split_row=%d\n", split_row);
2112 reset_sched_times (ps, PS_MIN_CYCLE (ps));
2113 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2115 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2116 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2117 for (row = 0; row < split_row; row++)
2119 rows_new[row] = ps->rows[row];
2120 rows_length_new[row] = ps->rows_length[row];
2121 ps->rows[row] = NULL;
2122 for (crr_insn = rows_new[row];
2123 crr_insn; crr_insn = crr_insn->next_in_row)
2125 int u = crr_insn->id;
2126 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2128 SCHED_TIME (u) = new_time;
2129 crr_insn->cycle = new_time;
2130 SCHED_ROW (u) = new_time % new_ii;
2131 SCHED_STAGE (u) = new_time / new_ii;
2136 rows_new[split_row] = NULL;
2138 for (row = split_row; row < ii; row++)
2140 rows_new[row + 1] = ps->rows[row];
2141 rows_length_new[row + 1] = ps->rows_length[row];
2142 ps->rows[row] = NULL;
2143 for (crr_insn = rows_new[row + 1];
2144 crr_insn; crr_insn = crr_insn->next_in_row)
2146 int u = crr_insn->id;
2147 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2149 SCHED_TIME (u) = new_time;
2150 crr_insn->cycle = new_time;
2151 SCHED_ROW (u) = new_time % new_ii;
2152 SCHED_STAGE (u) = new_time / new_ii;
2157 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2158 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2159 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2160 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2162 ps->rows = rows_new;
2163 free (ps->rows_length);
2164 ps->rows_length = rows_length_new;
2166 gcc_assert (ps->min_cycle >= 0);
2168 verify_partial_schedule (ps, sched_nodes);
2171 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2175 /* Given U_NODE which is the node that failed to be scheduled; LOW and
2176 UP which are the boundaries of it's scheduling window; compute using
2177 SCHED_NODES and II a row in the partial schedule that can be split
2178 which will separate a critical predecessor from a critical successor
2179 thereby expanding the window, and return it. */
2181 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2182 ddg_node_ptr u_node)
2185 int lower = INT_MIN, upper = INT_MAX;
2190 for (e = u_node->in; e != 0; e = e->next_in)
2192 int v = e->src->cuid;
2194 if (TEST_BIT (sched_nodes, v)
2195 && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2196 if (SCHED_TIME (v) > lower)
2199 lower = SCHED_TIME (v);
2205 crit_cycle = SCHED_TIME (crit_pred) + 1;
2206 return SMODULO (crit_cycle, ii);
2209 for (e = u_node->out; e != 0; e = e->next_out)
2211 int v = e->dest->cuid;
2213 if (TEST_BIT (sched_nodes, v)
2214 && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2215 if (SCHED_TIME (v) < upper)
2218 upper = SCHED_TIME (v);
2224 crit_cycle = SCHED_TIME (crit_succ);
2225 return SMODULO (crit_cycle, ii);
2229 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2231 return SMODULO ((low + up + 1) / 2, ii);
2235 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2238 ps_insn_ptr crr_insn;
2240 for (row = 0; row < ps->ii; row++)
2244 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2246 int u = crr_insn->id;
2249 gcc_assert (TEST_BIT (sched_nodes, u));
2250 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2251 popcount (sched_nodes) == number of insns in ps. */
2252 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2253 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2256 gcc_assert (ps->rows_length[row] == length);
2261 /* This page implements the algorithm for ordering the nodes of a DDG
2262 for modulo scheduling, activated through the
2263 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2265 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2266 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2267 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2268 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2269 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2270 #define DEPTH(x) (ASAP ((x)))
2272 typedef struct node_order_params * nopa;
2274 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2275 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2276 static nopa calculate_order_params (ddg_ptr, int, int *);
2277 static int find_max_asap (ddg_ptr, sbitmap);
2278 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2279 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2281 enum sms_direction {BOTTOMUP, TOPDOWN};
2283 struct node_order_params
2290 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2292 check_nodes_order (int *node_order, int num_nodes)
2295 sbitmap tmp = sbitmap_alloc (num_nodes);
2300 fprintf (dump_file, "SMS final nodes order: \n");
2302 for (i = 0; i < num_nodes; i++)
2304 int u = node_order[i];
2307 fprintf (dump_file, "%d ", u);
2308 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2314 fprintf (dump_file, "\n");
2319 /* Order the nodes of G for scheduling and pass the result in
2320 NODE_ORDER. Also set aux.count of each node to ASAP.
2321 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2323 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2327 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2329 nopa nops = calculate_order_params (g, mii, pmax_asap);
2332 print_sccs (dump_file, sccs, g);
2334 order_nodes_of_sccs (sccs, node_order);
2336 if (sccs->num_sccs > 0)
2337 /* First SCC has the largest recurrence_length. */
2338 rec_mii = sccs->sccs[0]->recurrence_length;
2340 /* Save ASAP before destroying node_order_params. */
2341 for (i = 0; i < g->num_nodes; i++)
2343 ddg_node_ptr v = &g->nodes[i];
2344 v->aux.count = ASAP (v);
2348 free_ddg_all_sccs (sccs);
2349 check_nodes_order (node_order, g->num_nodes);
2355 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2358 ddg_ptr g = all_sccs->ddg;
2359 int num_nodes = g->num_nodes;
2360 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2361 sbitmap on_path = sbitmap_alloc (num_nodes);
2362 sbitmap tmp = sbitmap_alloc (num_nodes);
2363 sbitmap ones = sbitmap_alloc (num_nodes);
2365 sbitmap_zero (prev_sccs);
2366 sbitmap_ones (ones);
2368 /* Perform the node ordering starting from the SCC with the highest recMII.
2369 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2370 for (i = 0; i < all_sccs->num_sccs; i++)
2372 ddg_scc_ptr scc = all_sccs->sccs[i];
2374 /* Add nodes on paths from previous SCCs to the current SCC. */
2375 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2376 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2378 /* Add nodes on paths from the current SCC to previous SCCs. */
2379 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2380 sbitmap_a_or_b (tmp, tmp, on_path);
2382 /* Remove nodes of previous SCCs from current extended SCC. */
2383 sbitmap_difference (tmp, tmp, prev_sccs);
2385 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2386 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2389 /* Handle the remaining nodes that do not belong to any scc. Each call
2390 to order_nodes_in_scc handles a single connected component. */
2391 while (pos < g->num_nodes)
2393 sbitmap_difference (tmp, ones, prev_sccs);
2394 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2396 sbitmap_free (prev_sccs);
2397 sbitmap_free (on_path);
2399 sbitmap_free (ones);
2402 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2403 static struct node_order_params *
2404 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2408 int num_nodes = g->num_nodes;
2410 /* Allocate a place to hold ordering params for each node in the DDG. */
2411 nopa node_order_params_arr;
2413 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2414 node_order_params_arr = (nopa) xcalloc (num_nodes,
2415 sizeof (struct node_order_params));
2417 /* Set the aux pointer of each node to point to its order_params structure. */
2418 for (u = 0; u < num_nodes; u++)
2419 g->nodes[u].aux.info = &node_order_params_arr[u];
2421 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2422 calculate ASAP, ALAP, mobility, distance, and height for each node
2423 in the dependence (direct acyclic) graph. */
2425 /* We assume that the nodes in the array are in topological order. */
2428 for (u = 0; u < num_nodes; u++)
2430 ddg_node_ptr u_node = &g->nodes[u];
2433 for (e = u_node->in; e; e = e->next_in)
2434 if (e->distance == 0)
2435 ASAP (u_node) = MAX (ASAP (u_node),
2436 ASAP (e->src) + e->latency);
2437 max_asap = MAX (max_asap, ASAP (u_node));
2440 for (u = num_nodes - 1; u > -1; u--)
2442 ddg_node_ptr u_node = &g->nodes[u];
2444 ALAP (u_node) = max_asap;
2445 HEIGHT (u_node) = 0;
2446 for (e = u_node->out; e; e = e->next_out)
2447 if (e->distance == 0)
2449 ALAP (u_node) = MIN (ALAP (u_node),
2450 ALAP (e->dest) - e->latency);
2451 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2452 HEIGHT (e->dest) + e->latency);
2457 fprintf (dump_file, "\nOrder params\n");
2458 for (u = 0; u < num_nodes; u++)
2460 ddg_node_ptr u_node = &g->nodes[u];
2462 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2463 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2467 *pmax_asap = max_asap;
2468 return node_order_params_arr;
2472 find_max_asap (ddg_ptr g, sbitmap nodes)
2477 sbitmap_iterator sbi;
2479 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2481 ddg_node_ptr u_node = &g->nodes[u];
2483 if (max_asap < ASAP (u_node))
2485 max_asap = ASAP (u_node);
2493 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2497 int min_mob = INT_MAX;
2499 sbitmap_iterator sbi;
2501 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2503 ddg_node_ptr u_node = &g->nodes[u];
2505 if (max_hv < HEIGHT (u_node))
2507 max_hv = HEIGHT (u_node);
2508 min_mob = MOB (u_node);
2511 else if ((max_hv == HEIGHT (u_node))
2512 && (min_mob > MOB (u_node)))
2514 min_mob = MOB (u_node);
2522 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2526 int min_mob = INT_MAX;
2528 sbitmap_iterator sbi;
2530 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2532 ddg_node_ptr u_node = &g->nodes[u];
2534 if (max_dv < DEPTH (u_node))
2536 max_dv = DEPTH (u_node);
2537 min_mob = MOB (u_node);
2540 else if ((max_dv == DEPTH (u_node))
2541 && (min_mob > MOB (u_node)))
2543 min_mob = MOB (u_node);
2550 /* Places the nodes of SCC into the NODE_ORDER array starting
2551 at position POS, according to the SMS ordering algorithm.
2552 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2553 the NODE_ORDER array, starting from position zero. */
2555 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2556 int * node_order, int pos)
2558 enum sms_direction dir;
2559 int num_nodes = g->num_nodes;
2560 sbitmap workset = sbitmap_alloc (num_nodes);
2561 sbitmap tmp = sbitmap_alloc (num_nodes);
2562 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2563 sbitmap predecessors = sbitmap_alloc (num_nodes);
2564 sbitmap successors = sbitmap_alloc (num_nodes);
2566 sbitmap_zero (predecessors);
2567 find_predecessors (predecessors, g, nodes_ordered);
2569 sbitmap_zero (successors);
2570 find_successors (successors, g, nodes_ordered);
2573 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2575 sbitmap_copy (workset, tmp);
2578 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2580 sbitmap_copy (workset, tmp);
2587 sbitmap_zero (workset);
2588 if ((u = find_max_asap (g, scc)) >= 0)
2589 SET_BIT (workset, u);
2593 sbitmap_zero (zero_bitmap);
2594 while (!sbitmap_equal (workset, zero_bitmap))
2597 ddg_node_ptr v_node;
2598 sbitmap v_node_preds;
2599 sbitmap v_node_succs;
2603 while (!sbitmap_equal (workset, zero_bitmap))
2605 v = find_max_hv_min_mob (g, workset);
2606 v_node = &g->nodes[v];
2607 node_order[pos++] = v;
2608 v_node_succs = NODE_SUCCESSORS (v_node);
2609 sbitmap_a_and_b (tmp, v_node_succs, scc);
2611 /* Don't consider the already ordered successors again. */
2612 sbitmap_difference (tmp, tmp, nodes_ordered);
2613 sbitmap_a_or_b (workset, workset, tmp);
2614 RESET_BIT (workset, v);
2615 SET_BIT (nodes_ordered, v);
2618 sbitmap_zero (predecessors);
2619 find_predecessors (predecessors, g, nodes_ordered);
2620 sbitmap_a_and_b (workset, predecessors, scc);
2624 while (!sbitmap_equal (workset, zero_bitmap))
2626 v = find_max_dv_min_mob (g, workset);
2627 v_node = &g->nodes[v];
2628 node_order[pos++] = v;
2629 v_node_preds = NODE_PREDECESSORS (v_node);
2630 sbitmap_a_and_b (tmp, v_node_preds, scc);
2632 /* Don't consider the already ordered predecessors again. */
2633 sbitmap_difference (tmp, tmp, nodes_ordered);
2634 sbitmap_a_or_b (workset, workset, tmp);
2635 RESET_BIT (workset, v);
2636 SET_BIT (nodes_ordered, v);
2639 sbitmap_zero (successors);
2640 find_successors (successors, g, nodes_ordered);
2641 sbitmap_a_and_b (workset, successors, scc);
2645 sbitmap_free (workset);
2646 sbitmap_free (zero_bitmap);
2647 sbitmap_free (predecessors);
2648 sbitmap_free (successors);
2653 /* This page contains functions for manipulating partial-schedules during
2654 modulo scheduling. */
2656 /* Create a partial schedule and allocate a memory to hold II rows. */
2658 static partial_schedule_ptr
2659 create_partial_schedule (int ii, ddg_ptr g, int history)
2661 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2662 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2663 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2664 ps->reg_moves = NULL;
2666 ps->history = history;
2667 ps->min_cycle = INT_MAX;
2668 ps->max_cycle = INT_MIN;
2674 /* Free the PS_INSNs in rows array of the given partial schedule.
2675 ??? Consider caching the PS_INSN's. */
2677 free_ps_insns (partial_schedule_ptr ps)
2681 for (i = 0; i < ps->ii; i++)
2685 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2688 ps->rows[i] = ps_insn;
2694 /* Free all the memory allocated to the partial schedule. */
2697 free_partial_schedule (partial_schedule_ptr ps)
2699 ps_reg_move_info *move;
2705 FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
2706 sbitmap_free (move->uses);
2707 VEC_free (ps_reg_move_info, heap, ps->reg_moves);
2711 free (ps->rows_length);
2715 /* Clear the rows array with its PS_INSNs, and create a new one with
2719 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2724 if (new_ii == ps->ii)
2726 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2727 * sizeof (ps_insn_ptr));
2728 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2729 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2730 memset (ps->rows_length, 0, new_ii * sizeof (int));
2732 ps->min_cycle = INT_MAX;
2733 ps->max_cycle = INT_MIN;
2736 /* Prints the partial schedule as an ii rows array, for each rows
2737 print the ids of the insns in it. */
2739 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2743 for (i = 0; i < ps->ii; i++)
2745 ps_insn_ptr ps_i = ps->rows[i];
2747 fprintf (dump, "\n[ROW %d ]: ", i);
2750 rtx insn = ps_rtl_insn (ps, ps_i->id);
2753 fprintf (dump, "%d (branch), ", INSN_UID (insn));
2755 fprintf (dump, "%d, ", INSN_UID (insn));
2757 ps_i = ps_i->next_in_row;
2762 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2764 create_ps_insn (int id, int cycle)
2766 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2769 ps_i->next_in_row = NULL;
2770 ps_i->prev_in_row = NULL;
2771 ps_i->cycle = cycle;
2777 /* Removes the given PS_INSN from the partial schedule. */
2779 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2783 gcc_assert (ps && ps_i);
2785 row = SMODULO (ps_i->cycle, ps->ii);
2786 if (! ps_i->prev_in_row)
2788 gcc_assert (ps_i == ps->rows[row]);
2789 ps->rows[row] = ps_i->next_in_row;
2791 ps->rows[row]->prev_in_row = NULL;
2795 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2796 if (ps_i->next_in_row)
2797 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2800 ps->rows_length[row] -= 1;
2805 /* Unlike what literature describes for modulo scheduling (which focuses
2806 on VLIW machines) the order of the instructions inside a cycle is
2807 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2808 where the current instruction should go relative to the already
2809 scheduled instructions in the given cycle. Go over these
2810 instructions and find the first possible column to put it in. */
2812 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2813 sbitmap must_precede, sbitmap must_follow)
2815 ps_insn_ptr next_ps_i;
2816 ps_insn_ptr first_must_follow = NULL;
2817 ps_insn_ptr last_must_precede = NULL;
2818 ps_insn_ptr last_in_row = NULL;
2824 row = SMODULO (ps_i->cycle, ps->ii);
2826 /* Find the first must follow and the last must precede
2827 and insert the node immediately after the must precede
2828 but make sure that it there is no must follow after it. */
2829 for (next_ps_i = ps->rows[row];
2831 next_ps_i = next_ps_i->next_in_row)
2834 && TEST_BIT (must_follow, next_ps_i->id)
2835 && ! first_must_follow)
2836 first_must_follow = next_ps_i;
2837 if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
2839 /* If we have already met a node that must follow, then
2840 there is no possible column. */
2841 if (first_must_follow)
2844 last_must_precede = next_ps_i;
2846 /* The closing branch must be the last in the row. */
2848 && TEST_BIT (must_precede, next_ps_i->id)
2849 && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
2852 last_in_row = next_ps_i;
2855 /* The closing branch is scheduled as well. Make sure there is no
2856 dependent instruction after it as the branch should be the last
2857 instruction in the row. */
2858 if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
2860 if (first_must_follow)
2864 /* Make the branch the last in the row. New instructions
2865 will be inserted at the beginning of the row or after the
2866 last must_precede instruction thus the branch is guaranteed
2867 to remain the last instruction in the row. */
2868 last_in_row->next_in_row = ps_i;
2869 ps_i->prev_in_row = last_in_row;
2870 ps_i->next_in_row = NULL;
2873 ps->rows[row] = ps_i;
2877 /* Now insert the node after INSERT_AFTER_PSI. */
2879 if (! last_must_precede)
2881 ps_i->next_in_row = ps->rows[row];
2882 ps_i->prev_in_row = NULL;
2883 if (ps_i->next_in_row)
2884 ps_i->next_in_row->prev_in_row = ps_i;
2885 ps->rows[row] = ps_i;
2889 ps_i->next_in_row = last_must_precede->next_in_row;
2890 last_must_precede->next_in_row = ps_i;
2891 ps_i->prev_in_row = last_must_precede;
2892 if (ps_i->next_in_row)
2893 ps_i->next_in_row->prev_in_row = ps_i;
2899 /* Advances the PS_INSN one column in its current row; returns false
2900 in failure and true in success. Bit N is set in MUST_FOLLOW if
2901 the node with cuid N must be come after the node pointed to by
2902 PS_I when scheduled in the same cycle. */
2904 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2905 sbitmap must_follow)
2907 ps_insn_ptr prev, next;
2913 row = SMODULO (ps_i->cycle, ps->ii);
2915 if (! ps_i->next_in_row)
2918 /* Check if next_in_row is dependent on ps_i, both having same sched
2919 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2920 if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
2923 /* Advance PS_I over its next_in_row in the doubly linked list. */
2924 prev = ps_i->prev_in_row;
2925 next = ps_i->next_in_row;
2927 if (ps_i == ps->rows[row])
2928 ps->rows[row] = next;
2930 ps_i->next_in_row = next->next_in_row;
2932 if (next->next_in_row)
2933 next->next_in_row->prev_in_row = ps_i;
2935 next->next_in_row = ps_i;
2936 ps_i->prev_in_row = next;
2938 next->prev_in_row = prev;
2940 prev->next_in_row = next;
2945 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2946 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2947 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2948 before/after (respectively) the node pointed to by PS_I when scheduled
2949 in the same cycle. */
2951 add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
2952 sbitmap must_precede, sbitmap must_follow)
2955 int row = SMODULO (cycle, ps->ii);
2957 if (ps->rows_length[row] >= issue_rate)
2960 ps_i = create_ps_insn (id, cycle);
2962 /* Finds and inserts PS_I according to MUST_FOLLOW and
2964 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2970 ps->rows_length[row] += 1;
2974 /* Advance time one cycle. Assumes DFA is being used. */
2976 advance_one_cycle (void)
2978 if (targetm.sched.dfa_pre_cycle_insn)
2979 state_transition (curr_state,
2980 targetm.sched.dfa_pre_cycle_insn ());
2982 state_transition (curr_state, NULL);
2984 if (targetm.sched.dfa_post_cycle_insn)
2985 state_transition (curr_state,
2986 targetm.sched.dfa_post_cycle_insn ());
2991 /* Checks if PS has resource conflicts according to DFA, starting from
2992 FROM cycle to TO cycle; returns true if there are conflicts and false
2993 if there are no conflicts. Assumes DFA is being used. */
2995 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2999 state_reset (curr_state);
3001 for (cycle = from; cycle <= to; cycle++)
3003 ps_insn_ptr crr_insn;
3004 /* Holds the remaining issue slots in the current row. */
3005 int can_issue_more = issue_rate;
3007 /* Walk through the DFA for the current row. */
3008 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3010 crr_insn = crr_insn->next_in_row)
3012 rtx insn = ps_rtl_insn (ps, crr_insn->id);
3014 if (!NONDEBUG_INSN_P (insn))
3017 /* Check if there is room for the current insn. */
3018 if (!can_issue_more || state_dead_lock_p (curr_state))
3021 /* Update the DFA state and return with failure if the DFA found
3022 resource conflicts. */
3023 if (state_transition (curr_state, insn) >= 0)
3026 if (targetm.sched.variable_issue)
3028 targetm.sched.variable_issue (sched_dump, sched_verbose,
3029 insn, can_issue_more);
3030 /* A naked CLOBBER or USE generates no instruction, so don't
3031 let them consume issue slots. */
3032 else if (GET_CODE (PATTERN (insn)) != USE
3033 && GET_CODE (PATTERN (insn)) != CLOBBER)
3037 /* Advance the DFA to the next cycle. */
3038 advance_one_cycle ();
3043 /* Checks if the given node causes resource conflicts when added to PS at
3044 cycle C. If not the node is added to PS and returned; otherwise zero
3045 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3046 cuid N must be come before/after (respectively) the node pointed to by
3047 PS_I when scheduled in the same cycle. */
3049 ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3050 int c, sbitmap must_precede,
3051 sbitmap must_follow)
3053 int has_conflicts = 0;
3056 /* First add the node to the PS, if this succeeds check for
3057 conflicts, trying different issue slots in the same row. */
3058 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3059 return NULL; /* Failed to insert the node at the given cycle. */
3061 has_conflicts = ps_has_conflicts (ps, c, c)
3063 && ps_has_conflicts (ps,
3067 /* Try different issue slots to find one that the given node can be
3068 scheduled in without conflicts. */
3069 while (has_conflicts)
3071 if (! ps_insn_advance_column (ps, ps_i, must_follow))
3073 has_conflicts = ps_has_conflicts (ps, c, c)
3075 && ps_has_conflicts (ps,
3082 remove_node_from_ps (ps, ps_i);
3086 ps->min_cycle = MIN (ps->min_cycle, c);
3087 ps->max_cycle = MAX (ps->max_cycle, c);
3091 /* Calculate the stage count of the partial schedule PS. The calculation
3092 takes into account the rotation amount passed in ROTATION_AMOUNT. */
3094 calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3096 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3097 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3098 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3100 /* The calculation of stage count is done adding the number of stages
3101 before cycle zero and after cycle zero. */
3102 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3107 /* Rotate the rows of PS such that insns scheduled at time
3108 START_CYCLE will appear in row 0. Updates max/min_cycles. */
3110 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3112 int i, row, backward_rotates;
3113 int last_row = ps->ii - 1;
3115 if (start_cycle == 0)
3118 backward_rotates = SMODULO (start_cycle, ps->ii);
3120 /* Revisit later and optimize this into a single loop. */
3121 for (i = 0; i < backward_rotates; i++)
3123 ps_insn_ptr first_row = ps->rows[0];
3124 int first_row_length = ps->rows_length[0];
3126 for (row = 0; row < last_row; row++)
3128 ps->rows[row] = ps->rows[row + 1];
3129 ps->rows_length[row] = ps->rows_length[row + 1];
3132 ps->rows[last_row] = first_row;
3133 ps->rows_length[last_row] = first_row_length;
3136 ps->max_cycle -= start_cycle;
3137 ps->min_cycle -= start_cycle;
3140 #endif /* INSN_SCHEDULING */
3143 gate_handle_sms (void)
3145 return (optimize > 0 && flag_modulo_sched);
3149 /* Run instruction scheduler. */
3150 /* Perform SMS module scheduling. */
3152 rest_of_handle_sms (void)
3154 #ifdef INSN_SCHEDULING
3157 /* Collect loop information to be used in SMS. */
3158 cfg_layout_initialize (0);
3161 /* Update the life information, because we add pseudos. */
3162 max_regno = max_reg_num ();
3164 /* Finalize layout changes. */
3166 if (bb->next_bb != EXIT_BLOCK_PTR)
3167 bb->aux = bb->next_bb;
3168 free_dominance_info (CDI_DOMINATORS);
3169 cfg_layout_finalize ();
3170 #endif /* INSN_SCHEDULING */
3174 struct rtl_opt_pass pass_sms =
3179 gate_handle_sms, /* gate */
3180 rest_of_handle_sms, /* execute */
3183 0, /* static_pass_number */
3185 0, /* properties_required */
3186 0, /* properties_provided */
3187 0, /* properties_destroyed */
3188 0, /* todo_flags_start */
3191 | TODO_verify_rtl_sharing
3192 | TODO_ggc_collect /* todo_flags_finish */