1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
26 #include "coretypes.h"
31 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-attr.h"
40 #include "sched-int.h"
42 #include "cfglayout.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
88 /* This page defines partial-schedule structures and functions for
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
108 /* A single instruction in the partial schedule. */
111 /* The corresponding DDG_NODE. */
114 /* The (absolute) cycle in which the PS instruction is scheduled.
115 Same as SCHED_TIME (node). */
118 /* The next/prev PS_INSN in the same row. */
119 ps_insn_ptr next_in_row,
122 /* The number of nodes in the same row that come after this node. */
126 /* Holds the partial schedule as an array of II rows. Each entry of the
127 array points to a linked list of PS_INSNs, which represents the
128 instructions that are scheduled for that row. */
129 struct partial_schedule
131 int ii; /* Number of rows in the partial schedule. */
132 int history; /* Threshold for conflict checking using DFA. */
134 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 /* The earliest absolute cycle of an insn in the partial schedule. */
140 /* The latest absolute cycle of an insn in the partial schedule. */
143 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
146 /* We use this to record all the register replacements we do in
147 the kernel so we can undo SMS if it is not profitable. */
148 struct undo_replace_buff_elem
153 struct undo_replace_buff_elem *next;
158 partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
159 void free_partial_schedule (partial_schedule_ptr);
160 void reset_partial_schedule (partial_schedule_ptr, int new_ii);
161 void print_partial_schedule (partial_schedule_ptr, FILE *);
162 static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
163 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
164 ddg_node_ptr node, int cycle,
165 sbitmap must_precede,
166 sbitmap must_follow);
167 static void rotate_partial_schedule (partial_schedule_ptr, int);
168 void set_row_column_for_ps (partial_schedule_ptr);
169 static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
172 /* This page defines constants and structures for the modulo scheduling
175 /* As in haifa-sched.c: */
176 /* issue_rate is the number of insns that can be scheduled in the same
177 machine cycle. It can be defined in the config/mach/mach.h file,
178 otherwise we set it to 1. */
180 static int issue_rate;
182 /* For printing statistics. */
183 static FILE *stats_file;
185 static int sms_order_nodes (ddg_ptr, int, int * result);
186 static void set_node_sched_params (ddg_ptr);
187 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
189 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
190 static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
191 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
192 int from_stage, int to_stage,
195 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
196 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
197 #define SCHED_FIRST_REG_MOVE(x) \
198 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
199 #define SCHED_NREG_MOVES(x) \
200 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
201 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
202 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
203 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
205 /* The scheduling parameters held for each node. */
206 typedef struct node_sched_params
208 int asap; /* A lower-bound on the absolute scheduling cycle. */
209 int time; /* The absolute scheduling cycle (time >= asap). */
211 /* The following field (first_reg_move) is a pointer to the first
212 register-move instruction added to handle the modulo-variable-expansion
213 of the register defined by this node. This register-move copies the
214 original register defined by the node. */
217 /* The number of register-move instructions added, immediately preceding
221 int row; /* Holds time % ii. */
222 int stage; /* Holds time / ii. */
224 /* The column of a node inside the ps. If nodes u, v are on the same row,
225 u will precede v if column (u) < column (v). */
227 } *node_sched_params_ptr;
230 /* The following three functions are copied from the current scheduler
231 code in order to use sched_analyze() for computing the dependencies.
232 They are used when initializing the sched_info structure. */
234 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
238 sprintf (tmp, "i%4d", INSN_UID (insn));
243 contributes_to_priority (rtx next, rtx insn)
245 return BLOCK_NUM (next) == BLOCK_NUM (insn);
249 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
250 regset cond_exec ATTRIBUTE_UNUSED,
251 regset used ATTRIBUTE_UNUSED,
252 regset set ATTRIBUTE_UNUSED)
256 static struct sched_info sms_sched_info =
264 contributes_to_priority,
265 compute_jump_reg_dependencies,
272 /* Return the register decremented and tested in INSN,
273 or zero if it is not a decrement-and-branch insn. */
276 doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
278 #ifdef HAVE_doloop_end
279 rtx pattern, reg, condition;
284 pattern = PATTERN (insn);
285 condition = doloop_condition_get (pattern);
289 if (REG_P (XEXP (condition, 0)))
290 reg = XEXP (condition, 0);
291 else if (GET_CODE (XEXP (condition, 0)) == PLUS
292 && REG_P (XEXP (XEXP (condition, 0), 0)))
293 reg = XEXP (XEXP (condition, 0), 0);
303 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
304 that the number of iterations is a compile-time constant. If so,
305 return the rtx that sets COUNT_REG to a constant, and set COUNT to
306 this constant. Otherwise return 0. */
308 const_iteration_count (rtx count_reg, basic_block pre_header,
309 HOST_WIDEST_INT * count)
317 get_block_head_tail (pre_header->index, &head, &tail);
319 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
320 if (INSN_P (insn) && single_set (insn) &&
321 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
323 rtx pat = single_set (insn);
325 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
327 *count = INTVAL (SET_SRC (pat));
337 /* A very simple resource-based lower bound on the initiation interval.
338 ??? Improve the accuracy of this bound by considering the
339 utilization of various units. */
343 return (g->num_nodes / issue_rate);
347 /* Points to the array that contains the sched data for each node. */
348 static node_sched_params_ptr node_sched_params;
350 /* Allocate sched_params for each node and initialize it. Assumes that
351 the aux field of each node contain the asap bound (computed earlier),
352 and copies it into the sched_params field. */
354 set_node_sched_params (ddg_ptr g)
358 /* Allocate for each node in the DDG a place to hold the "sched_data". */
359 /* Initialize ASAP/ALAP/HIGHT to zero. */
360 node_sched_params = (node_sched_params_ptr)
361 xcalloc (g->num_nodes,
362 sizeof (struct node_sched_params));
364 /* Set the pointer of the general data of the node to point to the
365 appropriate sched_params structure. */
366 for (i = 0; i < g->num_nodes; i++)
368 /* Watch out for aliasing problems? */
369 node_sched_params[i].asap = g->nodes[i].aux.count;
370 g->nodes[i].aux.info = &node_sched_params[i];
375 print_node_sched_params (FILE * dump_file, int num_nodes)
381 for (i = 0; i < num_nodes; i++)
383 node_sched_params_ptr nsp = &node_sched_params[i];
384 rtx reg_move = nsp->first_reg_move;
387 fprintf (dump_file, "Node %d:\n", i);
388 fprintf (dump_file, " asap = %d:\n", nsp->asap);
389 fprintf (dump_file, " time = %d:\n", nsp->time);
390 fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
391 for (j = 0; j < nsp->nreg_moves; j++)
393 fprintf (dump_file, " reg_move = ");
394 print_rtl_single (dump_file, reg_move);
395 reg_move = PREV_INSN (reg_move);
400 /* Calculate an upper bound for II. SMS should not schedule the loop if it
401 requires more cycles than this bound. Currently set to the sum of the
402 longest latency edge for each node. Reset based on experiments. */
404 calculate_maxii (ddg_ptr g)
409 for (i = 0; i < g->num_nodes; i++)
411 ddg_node_ptr u = &g->nodes[i];
413 int max_edge_latency = 0;
415 for (e = u->out; e; e = e->next_out)
416 max_edge_latency = MAX (max_edge_latency, e->latency);
418 maxii += max_edge_latency;
424 Breaking intra-loop register anti-dependences:
425 Each intra-loop register anti-dependence implies a cross-iteration true
426 dependence of distance 1. Therefore, we can remove such false dependencies
427 and figure out if the partial schedule broke them by checking if (for a
428 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
429 if so generate a register move. The number of such moves is equal to:
430 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
431 nreg_moves = ----------------------------------- + 1 - { dependence.
434 static struct undo_replace_buff_elem *
435 generate_reg_moves (partial_schedule_ptr ps)
440 struct undo_replace_buff_elem *reg_move_replaces = NULL;
442 for (i = 0; i < g->num_nodes; i++)
444 ddg_node_ptr u = &g->nodes[i];
446 int nreg_moves = 0, i_reg_move;
447 sbitmap *uses_of_defs;
449 rtx prev_reg, old_reg;
451 /* Compute the number of reg_moves needed for u, by looking at life
452 ranges started at u (excluding self-loops). */
453 for (e = u->out; e; e = e->next_out)
454 if (e->type == TRUE_DEP && e->dest != e->src)
456 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
458 if (e->distance == 1)
459 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
461 /* If dest precedes src in the schedule of the kernel, then dest
462 will read before src writes and we can save one reg_copy. */
463 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
464 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
467 nreg_moves = MAX (nreg_moves, nreg_moves4e);
473 /* Every use of the register defined by node may require a different
474 copy of this register, depending on the time the use is scheduled.
475 Set a bitmap vector, telling which nodes use each copy of this
477 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
478 sbitmap_vector_zero (uses_of_defs, nreg_moves);
479 for (e = u->out; e; e = e->next_out)
480 if (e->type == TRUE_DEP && e->dest != e->src)
482 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
484 if (e->distance == 1)
485 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
487 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
488 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
492 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
495 /* Now generate the reg_moves, attaching relevant uses to them. */
496 SCHED_NREG_MOVES (u) = nreg_moves;
497 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
498 last_reg_move = u->insn;
500 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
503 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
504 rtx reg_move = gen_move_insn (new_reg, prev_reg);
506 add_insn_before (reg_move, last_reg_move);
507 last_reg_move = reg_move;
509 if (!SCHED_FIRST_REG_MOVE (u))
510 SCHED_FIRST_REG_MOVE (u) = reg_move;
512 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
514 struct undo_replace_buff_elem *rep;
516 rep = (struct undo_replace_buff_elem *)
517 xcalloc (1, sizeof (struct undo_replace_buff_elem));
518 rep->insn = g->nodes[i_use].insn;
519 rep->orig_reg = old_reg;
520 rep->new_reg = new_reg;
522 if (! reg_move_replaces)
523 reg_move_replaces = rep;
526 rep->next = reg_move_replaces;
527 reg_move_replaces = rep;
530 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
536 return reg_move_replaces;
539 /* We call this when we want to undo the SMS schedule for a given loop.
540 One of the things that we do is to delete the register moves generated
541 for the sake of SMS; this function deletes the register move instructions
542 recorded in the undo buffer. */
544 undo_generate_reg_moves (partial_schedule_ptr ps,
545 struct undo_replace_buff_elem *reg_move_replaces)
549 for (i = 0; i < ps->g->num_nodes; i++)
551 ddg_node_ptr u = &ps->g->nodes[i];
553 rtx crr = SCHED_FIRST_REG_MOVE (u);
555 for (j = 0; j < SCHED_NREG_MOVES (u); j++)
557 prev = PREV_INSN (crr);
561 SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
564 while (reg_move_replaces)
566 struct undo_replace_buff_elem *rep = reg_move_replaces;
568 reg_move_replaces = reg_move_replaces->next;
569 replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
573 /* Free memory allocated for the undo buffer. */
575 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
578 while (reg_move_replaces)
580 struct undo_replace_buff_elem *rep = reg_move_replaces;
582 reg_move_replaces = reg_move_replaces->next;
587 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
588 of SCHED_ROW and SCHED_STAGE. */
590 normalize_sched_times (partial_schedule_ptr ps)
594 int amount = PS_MIN_CYCLE (ps);
597 /* Don't include the closing branch assuming that it is the last node. */
598 for (i = 0; i < g->num_nodes - 1; i++)
600 ddg_node_ptr u = &g->nodes[i];
601 int normalized_time = SCHED_TIME (u) - amount;
603 gcc_assert (normalized_time >= 0);
605 SCHED_TIME (u) = normalized_time;
606 SCHED_ROW (u) = normalized_time % ii;
607 SCHED_STAGE (u) = normalized_time / ii;
611 /* Set SCHED_COLUMN of each node according to its position in PS. */
613 set_columns_for_ps (partial_schedule_ptr ps)
617 for (row = 0; row < ps->ii; row++)
619 ps_insn_ptr cur_insn = ps->rows[row];
622 for (; cur_insn; cur_insn = cur_insn->next_in_row)
623 SCHED_COLUMN (cur_insn->node) = column++;
627 /* Permute the insns according to their order in PS, from row 0 to
628 row ii-1, and position them right before LAST. This schedules
629 the insns of the loop kernel. */
631 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
637 for (row = 0; row < ii ; row++)
638 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
639 if (PREV_INSN (last) != ps_ij->node->insn)
640 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
644 /* As part of undoing SMS we return to the original ordering of the
645 instructions inside the loop kernel. Given the partial schedule PS, this
646 function returns the ordering of the instruction according to their CUID
647 in the DDG (PS->G), which is the original order of the instruction before
650 undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
654 for (i = 0 ; i < ps->g->num_nodes; i++)
655 if (last == ps->g->nodes[i].insn
656 || last == ps->g->nodes[i].first_note)
658 else if (PREV_INSN (last) != ps->g->nodes[i].insn)
659 reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
663 /* Used to generate the prologue & epilogue. Duplicate the subset of
664 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
665 of both), together with a prefix/suffix of their reg_moves. */
667 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
668 int to_stage, int for_prolog)
673 for (row = 0; row < ps->ii; row++)
674 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
676 ddg_node_ptr u_node = ps_ij->node;
678 rtx reg_move = NULL_RTX;
682 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
683 number of reg_moves starting with the second occurrence of
684 u_node, which is generated if its SCHED_STAGE <= to_stage. */
685 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
686 i_reg_moves = MAX (i_reg_moves, 0);
687 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
689 /* The reg_moves start from the *first* reg_move backwards. */
692 reg_move = SCHED_FIRST_REG_MOVE (u_node);
693 for (j = 1; j < i_reg_moves; j++)
694 reg_move = PREV_INSN (reg_move);
697 else /* It's for the epilog. */
699 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
700 starting to decrease one stage after u_node no longer occurs;
701 that is, generate all reg_moves until
702 SCHED_STAGE (u_node) == from_stage - 1. */
703 i_reg_moves = SCHED_NREG_MOVES (u_node)
704 - (from_stage - SCHED_STAGE (u_node) - 1);
705 i_reg_moves = MAX (i_reg_moves, 0);
706 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
708 /* The reg_moves start from the *last* reg_move forwards. */
711 reg_move = SCHED_FIRST_REG_MOVE (u_node);
712 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
713 reg_move = PREV_INSN (reg_move);
717 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
718 emit_insn (copy_rtx (PATTERN (reg_move)));
719 if (SCHED_STAGE (u_node) >= from_stage
720 && SCHED_STAGE (u_node) <= to_stage)
721 duplicate_insn_chain (u_node->first_note, u_node->insn);
726 /* Generate the instructions (including reg_moves) for prolog & epilog. */
728 generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
731 int last_stage = PS_STAGE_COUNT (ps) - 1;
734 /* Generate the prolog, inserting its insns on the loop-entry edge. */
738 /* Generate a subtract instruction at the beginning of the prolog to
739 adjust the loop count by STAGE_COUNT. */
740 emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
742 for (i = 0; i < last_stage; i++)
743 duplicate_insns_of_cycles (ps, 0, i, 1);
745 /* Put the prolog , on the one and only entry edge. */
746 e = loop_preheader_edge (loop);
747 loop_split_edge_with(e , get_insns());
751 /* Generate the epilog, inserting its insns on the loop-exit edge. */
754 for (i = 0; i < last_stage; i++)
755 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
757 /* Put the epilogue on the one and only one exit edge. */
758 gcc_assert (loop->single_exit);
759 e = loop->single_exit;
760 loop_split_edge_with(e , get_insns());
764 /* Return the line note insn preceding INSN, for debugging. Taken from
767 find_line_note (rtx insn)
769 for (; insn; insn = PREV_INSN (insn))
771 && NOTE_LINE_NUMBER (insn) >= 0)
777 /* Return true if all the BBs of the loop are empty except the
780 loop_single_full_bb_p (struct loop *loop)
783 basic_block *bbs = get_loop_body (loop);
785 for (i = 0; i < loop->num_nodes ; i++)
788 bool empty_bb = true;
790 if (bbs[i] == loop->header)
793 /* Make sure that basic blocks other than the header
794 have only notes labels or jumps. */
795 get_block_head_tail (bbs[i]->index, &head, &tail);
796 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
798 if (NOTE_P (head) || LABEL_P (head)
799 || (INSN_P (head) && JUMP_P (head)))
815 /* A simple loop from SMS point of view; it is a loop that is composed of
816 either a single basic block or two BBs - a header and a latch. */
817 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
818 && (EDGE_COUNT (loop->latch->preds) == 1) \
819 && (EDGE_COUNT (loop->latch->succs) == 1))
821 /* Return true if the loop is in its canonical form and false if not.
822 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
824 loop_canon_p (struct loop *loop, FILE *dump_file)
827 if (loop->inner || ! loop->outer)
830 if (!loop->single_exit)
834 rtx line_note = find_line_note (BB_END (loop->header));
836 fprintf (dump_file, "SMS loop many exits ");
839 expanded_location xloc;
840 NOTE_EXPANDED_LOCATION (xloc, line_note);
841 fprintf (stats_file, " %s %d (file, line)\n",
842 xloc.file, xloc.line);
848 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
852 rtx line_note = find_line_note (BB_END (loop->header));
854 fprintf (dump_file, "SMS loop many BBs. ");
857 expanded_location xloc;
858 NOTE_EXPANDED_LOCATION (xloc, line_note);
859 fprintf (stats_file, " %s %d (file, line)\n",
860 xloc.file, xloc.line);
869 /* If there are more than one entry for the loop,
870 make it one by splitting the first entry edge and
871 redirecting the others to the new BB. */
873 canon_loop (struct loop *loop)
878 /* Avoid annoying special cases of edges going to exit
880 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
881 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
882 loop_split_edge_with (e, NULL_RTX);
884 if (loop->latch == loop->header
885 || EDGE_COUNT (loop->latch->succs) > 1)
887 FOR_EACH_EDGE (e, i, loop->header->preds)
888 if (e->src == loop->latch)
890 loop_split_edge_with (e, NULL_RTX);
894 /* Build the loop information without loop
895 canonization, the loop canonization will
896 be performed if the loop is SMSable. */
897 static struct loops *
898 build_loops_structure (FILE *dumpfile)
900 struct loops *loops = xcalloc (1, sizeof (struct loops));
902 /* Find the loops. */
904 if (flow_loops_find (loops) <= 1)
907 flow_loops_free (loops);
913 /* Not going to update these. */
914 free (loops->cfg.rc_order);
915 loops->cfg.rc_order = NULL;
916 free (loops->cfg.dfs_order);
917 loops->cfg.dfs_order = NULL;
919 create_preheaders (loops, CP_SIMPLE_PREHEADERS);
920 mark_single_exit_loops (loops);
922 flow_loops_dump (loops, dumpfile, NULL, 1);
924 #ifdef ENABLE_CHECKING
925 verify_dominators (CDI_DOMINATORS);
926 verify_loop_structure (loops);
932 /* Main entry point, perform SMS scheduling on the loops of the function
933 that consist of single basic blocks. */
935 sms_schedule (FILE *dump_file)
937 static int passes = 0;
942 unsigned i,num_loops;
943 partial_schedule_ptr ps;
946 basic_block bb = NULL;
947 /* vars to the versioning only if needed*/
949 basic_block condition_bb = NULL;
951 gcov_type trip_count = 0;
953 if (! (loops = build_loops_structure (dump_file)))
954 return; /* There is no loops to schedule. */
957 stats_file = dump_file;
959 /* Initialize issue_rate. */
960 if (targetm.sched.issue_rate)
962 int temp = reload_completed;
964 reload_completed = 1;
965 issue_rate = targetm.sched.issue_rate ();
966 reload_completed = temp;
971 /* Initialize the scheduler. */
972 current_sched_info = &sms_sched_info;
975 /* Init Data Flow analysis, to be used in interloop dep calculation. */
977 df_analyze (df, 0, DF_ALL);
979 /* Allocate memory to hold the DDG array one entry for each loop.
980 We use loop->num as index into this array. */
981 g_arr = xcalloc (loops->num, sizeof (ddg_ptr));
984 /* Build DDGs for all the relevant loops and hold them in G_ARR
985 indexed by the loop index. */
986 for (i = 0; i < loops->num; i++)
990 struct loop *loop = loops->parray[i];
993 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
996 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
1001 if (! loop_canon_p (loop, dump_file))
1004 if (! loop_single_full_bb_p (loop))
1009 get_block_head_tail (bb->index, &head, &tail);
1010 latch_edge = loop_latch_edge (loop);
1011 gcc_assert (loop->single_exit);
1012 if (loop->single_exit->count)
1013 trip_count = latch_edge->count / loop->single_exit->count;
1015 /* Perfrom SMS only on loops that their average count is above threshold. */
1017 if ( latch_edge->count
1018 && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1022 rtx line_note = find_line_note (tail);
1026 expanded_location xloc;
1027 NOTE_EXPANDED_LOCATION (xloc, line_note);
1028 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1029 xloc.file, xloc.line);
1031 fprintf (stats_file, "SMS single-bb-loop\n");
1032 if (profile_info && flag_branch_probabilities)
1034 fprintf (stats_file, "SMS loop-count ");
1035 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1036 (HOST_WIDEST_INT) bb->count);
1037 fprintf (stats_file, "\n");
1038 fprintf (stats_file, "SMS trip-count ");
1039 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1040 (HOST_WIDEST_INT) trip_count);
1041 fprintf (stats_file, "\n");
1042 fprintf (stats_file, "SMS profile-sum-max ");
1043 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1044 (HOST_WIDEST_INT) profile_info->sum_max);
1045 fprintf (stats_file, "\n");
1051 /* Make sure this is a doloop. */
1052 if ( !(count_reg = doloop_register_get (tail)))
1055 /* Don't handle BBs with calls or barriers, or !single_set insns. */
1056 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1059 || (INSN_P (insn) && !JUMP_P (insn)
1060 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1063 if (insn != NEXT_INSN (tail))
1068 fprintf (stats_file, "SMS loop-with-call\n");
1069 else if (BARRIER_P (insn))
1070 fprintf (stats_file, "SMS loop-with-barrier\n");
1072 fprintf (stats_file, "SMS loop-with-not-single-set\n");
1073 print_rtl_single (stats_file, insn);
1079 if (! (g = create_ddg (bb, df, 0)))
1082 fprintf (stats_file, "SMS doloop\n");
1089 /* Release Data Flow analysis data structures. */
1092 /* We don't want to perform SMS on new loops - created by versioning. */
1093 num_loops = loops->num;
1094 /* Go over the built DDGs and perfrom SMS for each one of them. */
1095 for (i = 0; i < num_loops; i++)
1098 rtx count_reg, count_init;
1100 unsigned stage_count = 0;
1101 HOST_WIDEST_INT loop_count = 0;
1102 struct loop *loop = loops->parray[i];
1104 if (! (g = g_arr[i]))
1108 print_ddg (dump_file, g);
1110 get_block_head_tail (loop->header->index, &head, &tail);
1112 latch_edge = loop_latch_edge (loop);
1113 gcc_assert (loop->single_exit);
1114 if (loop->single_exit->count)
1115 trip_count = latch_edge->count / loop->single_exit->count;
1119 rtx line_note = find_line_note (tail);
1123 expanded_location xloc;
1124 NOTE_EXPANDED_LOCATION (xloc, line_note);
1125 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1126 xloc.file, xloc.line);
1128 fprintf (stats_file, "SMS single-bb-loop\n");
1129 if (profile_info && flag_branch_probabilities)
1131 fprintf (stats_file, "SMS loop-count ");
1132 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1133 (HOST_WIDEST_INT) bb->count);
1134 fprintf (stats_file, "\n");
1135 fprintf (stats_file, "SMS profile-sum-max ");
1136 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1137 (HOST_WIDEST_INT) profile_info->sum_max);
1138 fprintf (stats_file, "\n");
1140 fprintf (stats_file, "SMS doloop\n");
1141 fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1142 fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1143 fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1147 /* In case of th loop have doloop register it gets special
1149 count_init = NULL_RTX;
1150 if ((count_reg = doloop_register_get (tail)))
1152 basic_block pre_header;
1154 pre_header = loop_preheader_edge (loop)->src;
1155 count_init = const_iteration_count (count_reg, pre_header,
1158 gcc_assert (count_reg);
1160 if (stats_file && count_init)
1162 fprintf (stats_file, "SMS const-doloop ");
1163 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1165 fprintf (stats_file, "\n");
1168 node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1170 mii = 1; /* Need to pass some estimate of mii. */
1171 rec_mii = sms_order_nodes (g, mii, node_order);
1172 mii = MAX (res_MII (g), rec_mii);
1173 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1176 fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1177 rec_mii, mii, maxii);
1179 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1181 set_node_sched_params (g);
1183 ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1186 stage_count = PS_STAGE_COUNT (ps);
1188 /* Stage count of 1 means that there is no interleaving between
1189 iterations, let the scheduling passes do the job. */
1191 || (count_init && (loop_count <= stage_count))
1192 || (flag_branch_probabilities && (trip_count <= stage_count)))
1196 fprintf (dump_file, "SMS failed... \n");
1197 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1198 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1199 fprintf (dump_file, ", trip-count=");
1200 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1201 fprintf (dump_file, ")\n");
1207 int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1209 struct undo_replace_buff_elem *reg_move_replaces;
1213 fprintf (stats_file,
1214 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1216 print_partial_schedule (ps, stats_file);
1217 fprintf (stats_file,
1218 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1219 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1222 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1223 the closing_branch was scheduled and should appear in the last (ii-1)
1224 row. Otherwise, we are free to schedule the branch, and we let nodes
1225 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1226 row; this should reduce stage_count to minimum. */
1227 normalize_sched_times (ps);
1228 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1229 set_columns_for_ps (ps);
1231 /* Generate the kernel just to be able to measure its cycles. */
1232 permute_partial_schedule (ps, g->closing_branch->first_note);
1233 reg_move_replaces = generate_reg_moves (ps);
1235 /* Get the number of cycles the new kernel expect to execute in. */
1236 new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1238 /* Get back to the original loop so we can do loop versioning. */
1239 undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1240 if (reg_move_replaces)
1241 undo_generate_reg_moves (ps, reg_move_replaces);
1243 if ( new_cycles >= orig_cycles)
1245 /* SMS is not profitable so undo the permutation and reg move generation
1246 and return the kernel to its original state. */
1248 fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1255 /* case the BCT count is not known , Do loop-versioning */
1256 if (count_reg && ! count_init)
1258 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1259 GEN_INT(stage_count));
1261 nloop = loop_version (loops, loop, comp_rtx, &condition_bb);
1264 /* Set new iteration count of loop kernel. */
1265 if (count_reg && count_init)
1266 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1269 /* Now apply the scheduled kernel to the RTL of the loop. */
1270 permute_partial_schedule (ps, g->closing_branch->first_note);
1272 /* Mark this loop as software pipelined so the later
1273 scheduling passes doesn't touch it. */
1274 if (! flag_resched_modulo_sched)
1275 g->bb->flags |= BB_DISABLE_SCHEDULE;
1276 /* The life-info is not valid any more. */
1277 g->bb->flags |= BB_DIRTY;
1279 reg_move_replaces = generate_reg_moves (ps);
1281 print_node_sched_params (dump_file, g->num_nodes);
1282 /* Generate prolog and epilog. */
1283 if (count_reg && !count_init)
1284 generate_prolog_epilog (ps, loop, count_reg);
1286 generate_prolog_epilog (ps, loop, NULL_RTX);
1288 free_undo_replace_buff (reg_move_replaces);
1291 free_partial_schedule (ps);
1292 free (node_sched_params);
1297 /* Release scheduler data, needed until now because of DFA. */
1299 loop_optimizer_finalize (loops, dump_file);
1302 /* The SMS scheduling algorithm itself
1303 -----------------------------------
1304 Input: 'O' an ordered list of insns of a loop.
1305 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1307 'Q' is the empty Set
1308 'PS' is the partial schedule; it holds the currently scheduled nodes with
1310 'PSP' previously scheduled predecessors.
1311 'PSS' previously scheduled successors.
1312 't(u)' the cycle where u is scheduled.
1313 'l(u)' is the latency of u.
1314 'd(v,u)' is the dependence distance from v to u.
1315 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1316 the node ordering phase.
1317 'check_hardware_resources_conflicts(u, PS, c)'
1318 run a trace around cycle/slot through DFA model
1319 to check resource conflicts involving instruction u
1320 at cycle c given the partial schedule PS.
1321 'add_to_partial_schedule_at_time(u, PS, c)'
1322 Add the node/instruction u to the partial schedule
1324 'calculate_register_pressure(PS)'
1325 Given a schedule of instructions, calculate the register
1326 pressure it implies. One implementation could be the
1327 maximum number of overlapping live ranges.
1328 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1329 registers available in the hardware.
1333 3. for each node u in O in pre-computed order
1334 4. if (PSP(u) != Q && PSS(u) == Q) then
1335 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1336 6. start = Early_start; end = Early_start + II - 1; step = 1
1337 11. else if (PSP(u) == Q && PSS(u) != Q) then
1338 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1339 13. start = Late_start; end = Late_start - II + 1; step = -1
1340 14. else if (PSP(u) != Q && PSS(u) != Q) then
1341 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1342 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1343 17. start = Early_start;
1344 18. end = min(Early_start + II - 1 , Late_start);
1346 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1347 21. start = ASAP(u); end = start + II - 1; step = 1
1351 24. for (c = start ; c != end ; c += step)
1352 25. if check_hardware_resources_conflicts(u, PS, c) then
1353 26. add_to_partial_schedule_at_time(u, PS, c)
1358 31. if (success == false) then
1360 33. if (II > maxII) then
1361 34. finish - failed to schedule
1366 39. if (calculate_register_pressure(PS) > maxRP) then
1369 42. compute epilogue & prologue
1370 43. finish - succeeded to schedule
1373 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1374 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1375 set to 0 to save compile time. */
1376 #define DFA_HISTORY SMS_DFA_HISTORY
1378 /* Given the partial schedule PS, this function calculates and returns the
1379 cycles in which we can schedule the node with the given index I.
1380 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1381 noticed that there are several cases in which we fail to SMS the loop
1382 because the sched window of a node is empty due to tight data-deps. In
1383 such cases we want to unschedule some of the predecessors/successors
1384 until we get non-empty scheduling window. It returns -1 if the
1385 scheduling window is empty and zero otherwise. */
1388 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1389 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1391 int start, step, end;
1393 int u = nodes_order [i];
1394 ddg_node_ptr u_node = &ps->g->nodes[u];
1395 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1396 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1397 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1398 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1402 /* 1. compute sched window for u (start, end, step). */
1405 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1406 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1408 if (psp_not_empty && !pss_not_empty)
1410 int early_start = INT_MIN;
1413 for (e = u_node->in; e != 0; e = e->next_in)
1415 ddg_node_ptr v_node = e->src;
1416 if (TEST_BIT (sched_nodes, v_node->cuid))
1418 int node_st = SCHED_TIME (v_node)
1419 + e->latency - (e->distance * ii);
1421 early_start = MAX (early_start, node_st);
1423 if (e->data_type == MEM_DEP)
1424 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1427 start = early_start;
1428 end = MIN (end, early_start + ii);
1432 else if (!psp_not_empty && pss_not_empty)
1434 int late_start = INT_MAX;
1437 for (e = u_node->out; e != 0; e = e->next_out)
1439 ddg_node_ptr v_node = e->dest;
1440 if (TEST_BIT (sched_nodes, v_node->cuid))
1442 late_start = MIN (late_start,
1443 SCHED_TIME (v_node) - e->latency
1444 + (e->distance * ii));
1445 if (e->data_type == MEM_DEP)
1446 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1450 end = MAX (end, late_start - ii);
1454 else if (psp_not_empty && pss_not_empty)
1456 int early_start = INT_MIN;
1457 int late_start = INT_MAX;
1461 for (e = u_node->in; e != 0; e = e->next_in)
1463 ddg_node_ptr v_node = e->src;
1465 if (TEST_BIT (sched_nodes, v_node->cuid))
1467 early_start = MAX (early_start,
1468 SCHED_TIME (v_node) + e->latency
1469 - (e->distance * ii));
1470 if (e->data_type == MEM_DEP)
1471 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1474 for (e = u_node->out; e != 0; e = e->next_out)
1476 ddg_node_ptr v_node = e->dest;
1478 if (TEST_BIT (sched_nodes, v_node->cuid))
1480 late_start = MIN (late_start,
1481 SCHED_TIME (v_node) - e->latency
1482 + (e->distance * ii));
1483 if (e->data_type == MEM_DEP)
1484 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1487 start = MAX (start, early_start);
1488 end = MIN (end, MIN (early_start + ii, late_start + 1));
1491 else /* psp is empty && pss is empty. */
1493 start = SCHED_ASAP (u_node);
1504 if ((start >= end && step == 1) || (start <= end && step == -1))
1510 /* This function implements the scheduling algorithm for SMS according to the
1512 static partial_schedule_ptr
1513 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1517 int try_again_with_larger_ii = true;
1518 int num_nodes = g->num_nodes;
1520 int start, end, step; /* Place together into one struct? */
1521 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1522 sbitmap must_precede = sbitmap_alloc (num_nodes);
1523 sbitmap must_follow = sbitmap_alloc (num_nodes);
1524 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1526 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1528 sbitmap_ones (tobe_scheduled);
1529 sbitmap_zero (sched_nodes);
1531 while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1532 || try_again_with_larger_ii ) && ii < maxii)
1535 bool unscheduled_nodes = false;
1538 fprintf(dump_file, "Starting with ii=%d\n", ii);
1539 if (try_again_with_larger_ii)
1541 try_again_with_larger_ii = false;
1542 sbitmap_zero (sched_nodes);
1545 for (i = 0; i < num_nodes; i++)
1547 int u = nodes_order[i];
1548 ddg_node_ptr u_node = &ps->g->nodes[u];
1549 rtx insn = u_node->insn;
1553 RESET_BIT (tobe_scheduled, u);
1557 if (JUMP_P (insn)) /* Closing branch handled later. */
1559 RESET_BIT (tobe_scheduled, u);
1563 if (TEST_BIT (sched_nodes, u))
1566 /* Try to get non-empty scheduling window. */
1568 while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1571 unscheduled_nodes = true;
1572 if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1573 || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1575 ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1576 RESET_BIT (sched_nodes, nodes_order [j - 1]);
1582 /* ??? Try backtracking instead of immediately ii++? */
1584 try_again_with_larger_ii = true;
1585 reset_partial_schedule (ps, ii);
1588 /* 2. Try scheduling u in window. */
1590 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1591 u, start, end, step);
1593 /* use must_follow & must_precede bitmaps to determine order
1594 of nodes within the cycle. */
1595 sbitmap_zero (must_precede);
1596 sbitmap_zero (must_follow);
1597 for (e = u_node->in; e != 0; e = e->next_in)
1598 if (TEST_BIT (sched_nodes, e->src->cuid)
1599 && e->latency == (ii * e->distance)
1600 && start == SCHED_TIME (e->src))
1601 SET_BIT (must_precede, e->src->cuid);
1603 for (e = u_node->out; e != 0; e = e->next_out)
1604 if (TEST_BIT (sched_nodes, e->dest->cuid)
1605 && e->latency == (ii * e->distance)
1606 && end == SCHED_TIME (e->dest))
1607 SET_BIT (must_follow, e->dest->cuid);
1610 if ((step > 0 && start < end) || (step < 0 && start > end))
1611 for (c = start; c != end; c += step)
1615 psi = ps_add_node_check_conflicts (ps, u_node, c,
1621 SCHED_TIME (u_node) = c;
1622 SET_BIT (sched_nodes, u);
1625 fprintf(dump_file, "Schedule in %d\n", c);
1631 /* ??? Try backtracking instead of immediately ii++? */
1633 try_again_with_larger_ii = true;
1634 reset_partial_schedule (ps, ii);
1637 if (unscheduled_nodes)
1640 /* ??? If (success), check register pressure estimates. */
1641 } /* Continue with next node. */
1642 } /* While try_again_with_larger_ii. */
1644 sbitmap_free (sched_nodes);
1648 free_partial_schedule (ps);
1655 /* This page implements the algorithm for ordering the nodes of a DDG
1656 for modulo scheduling, activated through the
1657 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1659 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1660 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1661 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1662 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1663 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1664 #define DEPTH(x) (ASAP ((x)))
1666 typedef struct node_order_params * nopa;
1668 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1669 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1670 static nopa calculate_order_params (ddg_ptr, int mii);
1671 static int find_max_asap (ddg_ptr, sbitmap);
1672 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1673 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1675 enum sms_direction {BOTTOMUP, TOPDOWN};
1677 struct node_order_params
1684 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1686 check_nodes_order (int *node_order, int num_nodes)
1689 sbitmap tmp = sbitmap_alloc (num_nodes);
1693 for (i = 0; i < num_nodes; i++)
1695 int u = node_order[i];
1697 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1705 /* Order the nodes of G for scheduling and pass the result in
1706 NODE_ORDER. Also set aux.count of each node to ASAP.
1707 Return the recMII for the given DDG. */
1709 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1713 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1715 nopa nops = calculate_order_params (g, mii);
1717 order_nodes_of_sccs (sccs, node_order);
1719 if (sccs->num_sccs > 0)
1720 /* First SCC has the largest recurrence_length. */
1721 rec_mii = sccs->sccs[0]->recurrence_length;
1723 /* Save ASAP before destroying node_order_params. */
1724 for (i = 0; i < g->num_nodes; i++)
1726 ddg_node_ptr v = &g->nodes[i];
1727 v->aux.count = ASAP (v);
1731 free_ddg_all_sccs (sccs);
1732 check_nodes_order (node_order, g->num_nodes);
1738 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1741 ddg_ptr g = all_sccs->ddg;
1742 int num_nodes = g->num_nodes;
1743 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1744 sbitmap on_path = sbitmap_alloc (num_nodes);
1745 sbitmap tmp = sbitmap_alloc (num_nodes);
1746 sbitmap ones = sbitmap_alloc (num_nodes);
1748 sbitmap_zero (prev_sccs);
1749 sbitmap_ones (ones);
1751 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1752 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1753 for (i = 0; i < all_sccs->num_sccs; i++)
1755 ddg_scc_ptr scc = all_sccs->sccs[i];
1757 /* Add nodes on paths from previous SCCs to the current SCC. */
1758 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1759 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1761 /* Add nodes on paths from the current SCC to previous SCCs. */
1762 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1763 sbitmap_a_or_b (tmp, tmp, on_path);
1765 /* Remove nodes of previous SCCs from current extended SCC. */
1766 sbitmap_difference (tmp, tmp, prev_sccs);
1768 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1769 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1772 /* Handle the remaining nodes that do not belong to any scc. Each call
1773 to order_nodes_in_scc handles a single connected component. */
1774 while (pos < g->num_nodes)
1776 sbitmap_difference (tmp, ones, prev_sccs);
1777 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1779 sbitmap_free (prev_sccs);
1780 sbitmap_free (on_path);
1782 sbitmap_free (ones);
1785 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1786 static struct node_order_params *
1787 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1791 int num_nodes = g->num_nodes;
1793 /* Allocate a place to hold ordering params for each node in the DDG. */
1794 nopa node_order_params_arr;
1796 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1797 node_order_params_arr = (nopa) xcalloc (num_nodes,
1798 sizeof (struct node_order_params));
1800 /* Set the aux pointer of each node to point to its order_params structure. */
1801 for (u = 0; u < num_nodes; u++)
1802 g->nodes[u].aux.info = &node_order_params_arr[u];
1804 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1805 calculate ASAP, ALAP, mobility, distance, and height for each node
1806 in the dependence (direct acyclic) graph. */
1808 /* We assume that the nodes in the array are in topological order. */
1811 for (u = 0; u < num_nodes; u++)
1813 ddg_node_ptr u_node = &g->nodes[u];
1816 for (e = u_node->in; e; e = e->next_in)
1817 if (e->distance == 0)
1818 ASAP (u_node) = MAX (ASAP (u_node),
1819 ASAP (e->src) + e->latency);
1820 max_asap = MAX (max_asap, ASAP (u_node));
1823 for (u = num_nodes - 1; u > -1; u--)
1825 ddg_node_ptr u_node = &g->nodes[u];
1827 ALAP (u_node) = max_asap;
1828 HEIGHT (u_node) = 0;
1829 for (e = u_node->out; e; e = e->next_out)
1830 if (e->distance == 0)
1832 ALAP (u_node) = MIN (ALAP (u_node),
1833 ALAP (e->dest) - e->latency);
1834 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1835 HEIGHT (e->dest) + e->latency);
1839 return node_order_params_arr;
1843 find_max_asap (ddg_ptr g, sbitmap nodes)
1849 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1851 ddg_node_ptr u_node = &g->nodes[u];
1853 if (max_asap < ASAP (u_node))
1855 max_asap = ASAP (u_node);
1863 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1867 int min_mob = INT_MAX;
1870 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1872 ddg_node_ptr u_node = &g->nodes[u];
1874 if (max_hv < HEIGHT (u_node))
1876 max_hv = HEIGHT (u_node);
1877 min_mob = MOB (u_node);
1880 else if ((max_hv == HEIGHT (u_node))
1881 && (min_mob > MOB (u_node)))
1883 min_mob = MOB (u_node);
1891 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1895 int min_mob = INT_MAX;
1898 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1900 ddg_node_ptr u_node = &g->nodes[u];
1902 if (max_dv < DEPTH (u_node))
1904 max_dv = DEPTH (u_node);
1905 min_mob = MOB (u_node);
1908 else if ((max_dv == DEPTH (u_node))
1909 && (min_mob > MOB (u_node)))
1911 min_mob = MOB (u_node);
1918 /* Places the nodes of SCC into the NODE_ORDER array starting
1919 at position POS, according to the SMS ordering algorithm.
1920 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1921 the NODE_ORDER array, starting from position zero. */
1923 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1924 int * node_order, int pos)
1926 enum sms_direction dir;
1927 int num_nodes = g->num_nodes;
1928 sbitmap workset = sbitmap_alloc (num_nodes);
1929 sbitmap tmp = sbitmap_alloc (num_nodes);
1930 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1931 sbitmap predecessors = sbitmap_alloc (num_nodes);
1932 sbitmap successors = sbitmap_alloc (num_nodes);
1934 sbitmap_zero (predecessors);
1935 find_predecessors (predecessors, g, nodes_ordered);
1937 sbitmap_zero (successors);
1938 find_successors (successors, g, nodes_ordered);
1941 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1943 sbitmap_copy (workset, tmp);
1946 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1948 sbitmap_copy (workset, tmp);
1955 sbitmap_zero (workset);
1956 if ((u = find_max_asap (g, scc)) >= 0)
1957 SET_BIT (workset, u);
1961 sbitmap_zero (zero_bitmap);
1962 while (!sbitmap_equal (workset, zero_bitmap))
1965 ddg_node_ptr v_node;
1966 sbitmap v_node_preds;
1967 sbitmap v_node_succs;
1971 while (!sbitmap_equal (workset, zero_bitmap))
1973 v = find_max_hv_min_mob (g, workset);
1974 v_node = &g->nodes[v];
1975 node_order[pos++] = v;
1976 v_node_succs = NODE_SUCCESSORS (v_node);
1977 sbitmap_a_and_b (tmp, v_node_succs, scc);
1979 /* Don't consider the already ordered successors again. */
1980 sbitmap_difference (tmp, tmp, nodes_ordered);
1981 sbitmap_a_or_b (workset, workset, tmp);
1982 RESET_BIT (workset, v);
1983 SET_BIT (nodes_ordered, v);
1986 sbitmap_zero (predecessors);
1987 find_predecessors (predecessors, g, nodes_ordered);
1988 sbitmap_a_and_b (workset, predecessors, scc);
1992 while (!sbitmap_equal (workset, zero_bitmap))
1994 v = find_max_dv_min_mob (g, workset);
1995 v_node = &g->nodes[v];
1996 node_order[pos++] = v;
1997 v_node_preds = NODE_PREDECESSORS (v_node);
1998 sbitmap_a_and_b (tmp, v_node_preds, scc);
2000 /* Don't consider the already ordered predecessors again. */
2001 sbitmap_difference (tmp, tmp, nodes_ordered);
2002 sbitmap_a_or_b (workset, workset, tmp);
2003 RESET_BIT (workset, v);
2004 SET_BIT (nodes_ordered, v);
2007 sbitmap_zero (successors);
2008 find_successors (successors, g, nodes_ordered);
2009 sbitmap_a_and_b (workset, successors, scc);
2013 sbitmap_free (workset);
2014 sbitmap_free (zero_bitmap);
2015 sbitmap_free (predecessors);
2016 sbitmap_free (successors);
2021 /* This page contains functions for manipulating partial-schedules during
2022 modulo scheduling. */
2024 /* Create a partial schedule and allocate a memory to hold II rows. */
2025 partial_schedule_ptr
2026 create_partial_schedule (int ii, ddg_ptr g, int history)
2028 partial_schedule_ptr ps = (partial_schedule_ptr)
2029 xmalloc (sizeof (struct partial_schedule));
2030 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2032 ps->history = history;
2033 ps->min_cycle = INT_MAX;
2034 ps->max_cycle = INT_MIN;
2040 /* Free the PS_INSNs in rows array of the given partial schedule.
2041 ??? Consider caching the PS_INSN's. */
2043 free_ps_insns (partial_schedule_ptr ps)
2047 for (i = 0; i < ps->ii; i++)
2051 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2054 ps->rows[i] = ps_insn;
2060 /* Free all the memory allocated to the partial schedule. */
2062 free_partial_schedule (partial_schedule_ptr ps)
2071 /* Clear the rows array with its PS_INSNs, and create a new one with
2074 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2079 if (new_ii == ps->ii)
2081 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2082 * sizeof (ps_insn_ptr));
2083 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2085 ps->min_cycle = INT_MAX;
2086 ps->max_cycle = INT_MIN;
2089 /* Prints the partial schedule as an ii rows array, for each rows
2090 print the ids of the insns in it. */
2092 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2096 for (i = 0; i < ps->ii; i++)
2098 ps_insn_ptr ps_i = ps->rows[i];
2100 fprintf (dump, "\n[CYCLE %d ]: ", i);
2103 fprintf (dump, "%d, ",
2104 INSN_UID (ps_i->node->insn));
2105 ps_i = ps_i->next_in_row;
2110 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2112 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2114 ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
2117 ps_i->next_in_row = NULL;
2118 ps_i->prev_in_row = NULL;
2119 ps_i->row_rest_count = rest_count;
2120 ps_i->cycle = cycle;
2126 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2127 node is not found in the partial schedule, else returns true. */
2129 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2136 row = SMODULO (ps_i->cycle, ps->ii);
2137 if (! ps_i->prev_in_row)
2139 if (ps_i != ps->rows[row])
2142 ps->rows[row] = ps_i->next_in_row;
2144 ps->rows[row]->prev_in_row = NULL;
2148 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2149 if (ps_i->next_in_row)
2150 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2156 /* Unlike what literature describes for modulo scheduling (which focuses
2157 on VLIW machines) the order of the instructions inside a cycle is
2158 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2159 where the current instruction should go relative to the already
2160 scheduled instructions in the given cycle. Go over these
2161 instructions and find the first possible column to put it in. */
2163 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2164 sbitmap must_precede, sbitmap must_follow)
2166 ps_insn_ptr next_ps_i;
2167 ps_insn_ptr first_must_follow = NULL;
2168 ps_insn_ptr last_must_precede = NULL;
2174 row = SMODULO (ps_i->cycle, ps->ii);
2176 /* Find the first must follow and the last must precede
2177 and insert the node immediately after the must precede
2178 but make sure that it there is no must follow after it. */
2179 for (next_ps_i = ps->rows[row];
2181 next_ps_i = next_ps_i->next_in_row)
2183 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2184 && ! first_must_follow)
2185 first_must_follow = next_ps_i;
2186 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2188 /* If we have already met a node that must follow, then
2189 there is no possible column. */
2190 if (first_must_follow)
2193 last_must_precede = next_ps_i;
2197 /* Now insert the node after INSERT_AFTER_PSI. */
2199 if (! last_must_precede)
2201 ps_i->next_in_row = ps->rows[row];
2202 ps_i->prev_in_row = NULL;
2203 if (ps_i->next_in_row)
2204 ps_i->next_in_row->prev_in_row = ps_i;
2205 ps->rows[row] = ps_i;
2209 ps_i->next_in_row = last_must_precede->next_in_row;
2210 last_must_precede->next_in_row = ps_i;
2211 ps_i->prev_in_row = last_must_precede;
2212 if (ps_i->next_in_row)
2213 ps_i->next_in_row->prev_in_row = ps_i;
2219 /* Advances the PS_INSN one column in its current row; returns false
2220 in failure and true in success. Bit N is set in MUST_FOLLOW if
2221 the node with cuid N must be come after the node pointed to by
2222 PS_I when scheduled in the same cycle. */
2224 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2225 sbitmap must_follow)
2227 ps_insn_ptr prev, next;
2229 ddg_node_ptr next_node;
2234 row = SMODULO (ps_i->cycle, ps->ii);
2236 if (! ps_i->next_in_row)
2239 next_node = ps_i->next_in_row->node;
2241 /* Check if next_in_row is dependent on ps_i, both having same sched
2242 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2243 if (TEST_BIT (must_follow, next_node->cuid))
2246 /* Advance PS_I over its next_in_row in the doubly linked list. */
2247 prev = ps_i->prev_in_row;
2248 next = ps_i->next_in_row;
2250 if (ps_i == ps->rows[row])
2251 ps->rows[row] = next;
2253 ps_i->next_in_row = next->next_in_row;
2255 if (next->next_in_row)
2256 next->next_in_row->prev_in_row = ps_i;
2258 next->next_in_row = ps_i;
2259 ps_i->prev_in_row = next;
2261 next->prev_in_row = prev;
2263 prev->next_in_row = next;
2268 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2269 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2270 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2271 before/after (respectively) the node pointed to by PS_I when scheduled
2272 in the same cycle. */
2274 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2275 sbitmap must_precede, sbitmap must_follow)
2279 int row = SMODULO (cycle, ps->ii);
2282 && ps->rows[row]->row_rest_count >= issue_rate)
2286 rest_count += ps->rows[row]->row_rest_count;
2288 ps_i = create_ps_insn (node, rest_count, cycle);
2290 /* Finds and inserts PS_I according to MUST_FOLLOW and
2292 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2301 /* Advance time one cycle. Assumes DFA is being used. */
2303 advance_one_cycle (void)
2305 if (targetm.sched.dfa_pre_cycle_insn)
2306 state_transition (curr_state,
2307 targetm.sched.dfa_pre_cycle_insn ());
2309 state_transition (curr_state, NULL);
2311 if (targetm.sched.dfa_post_cycle_insn)
2312 state_transition (curr_state,
2313 targetm.sched.dfa_post_cycle_insn ());
2316 /* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2317 the number of cycles according to DFA that the kernel fits in,
2318 we use this to check if we done well with SMS after we add
2319 register moves. In some cases register moves overhead makes
2320 it even worse than the original loop. We want SMS to be performed
2321 when it gives less cycles after register moves are added. */
2323 kernel_number_of_cycles (rtx first_insn, rtx last_insn)
2327 int can_issue_more = issue_rate;
2329 state_reset (curr_state);
2331 for (insn = first_insn;
2332 insn != NULL_RTX && insn != last_insn;
2333 insn = NEXT_INSN (insn))
2335 if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2338 /* Check if there is room for the current insn. */
2339 if (!can_issue_more || state_dead_lock_p (curr_state))
2342 advance_one_cycle ();
2343 can_issue_more = issue_rate;
2346 /* Update the DFA state and return with failure if the DFA found
2347 recource conflicts. */
2348 if (state_transition (curr_state, insn) >= 0)
2351 advance_one_cycle ();
2352 can_issue_more = issue_rate;
2355 if (targetm.sched.variable_issue)
2357 targetm.sched.variable_issue (sched_dump, sched_verbose,
2358 insn, can_issue_more);
2359 /* A naked CLOBBER or USE generates no instruction, so don't
2360 let them consume issue slots. */
2361 else if (GET_CODE (PATTERN (insn)) != USE
2362 && GET_CODE (PATTERN (insn)) != CLOBBER)
2368 /* Checks if PS has resource conflicts according to DFA, starting from
2369 FROM cycle to TO cycle; returns true if there are conflicts and false
2370 if there are no conflicts. Assumes DFA is being used. */
2372 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2376 state_reset (curr_state);
2378 for (cycle = from; cycle <= to; cycle++)
2380 ps_insn_ptr crr_insn;
2381 /* Holds the remaining issue slots in the current row. */
2382 int can_issue_more = issue_rate;
2384 /* Walk through the DFA for the current row. */
2385 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2387 crr_insn = crr_insn->next_in_row)
2389 rtx insn = crr_insn->node->insn;
2394 /* Check if there is room for the current insn. */
2395 if (!can_issue_more || state_dead_lock_p (curr_state))
2398 /* Update the DFA state and return with failure if the DFA found
2399 recource conflicts. */
2400 if (state_transition (curr_state, insn) >= 0)
2403 if (targetm.sched.variable_issue)
2405 targetm.sched.variable_issue (sched_dump, sched_verbose,
2406 insn, can_issue_more);
2407 /* A naked CLOBBER or USE generates no instruction, so don't
2408 let them consume issue slots. */
2409 else if (GET_CODE (PATTERN (insn)) != USE
2410 && GET_CODE (PATTERN (insn)) != CLOBBER)
2414 /* Advance the DFA to the next cycle. */
2415 advance_one_cycle ();
2420 /* Checks if the given node causes resource conflicts when added to PS at
2421 cycle C. If not the node is added to PS and returned; otherwise zero
2422 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2423 cuid N must be come before/after (respectively) the node pointed to by
2424 PS_I when scheduled in the same cycle. */
2426 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2427 int c, sbitmap must_precede,
2428 sbitmap must_follow)
2430 int has_conflicts = 0;
2433 /* First add the node to the PS, if this succeeds check for
2434 conflicts, trying different issue slots in the same row. */
2435 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2436 return NULL; /* Failed to insert the node at the given cycle. */
2438 has_conflicts = ps_has_conflicts (ps, c, c)
2440 && ps_has_conflicts (ps,
2444 /* Try different issue slots to find one that the given node can be
2445 scheduled in without conflicts. */
2446 while (has_conflicts)
2448 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2450 has_conflicts = ps_has_conflicts (ps, c, c)
2452 && ps_has_conflicts (ps,
2459 remove_node_from_ps (ps, ps_i);
2463 ps->min_cycle = MIN (ps->min_cycle, c);
2464 ps->max_cycle = MAX (ps->max_cycle, c);
2468 /* Rotate the rows of PS such that insns scheduled at time
2469 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2471 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2473 int i, row, backward_rotates;
2474 int last_row = ps->ii - 1;
2476 if (start_cycle == 0)
2479 backward_rotates = SMODULO (start_cycle, ps->ii);
2481 /* Revisit later and optimize this into a single loop. */
2482 for (i = 0; i < backward_rotates; i++)
2484 ps_insn_ptr first_row = ps->rows[0];
2486 for (row = 0; row < last_row; row++)
2487 ps->rows[row] = ps->rows[row+1];
2489 ps->rows[last_row] = first_row;
2492 ps->max_cycle -= start_cycle;
2493 ps->min_cycle -= start_cycle;
2496 /* Remove the node N from the partial schedule PS; because we restart the DFA
2497 each time we want to check for resource conflicts; this is equivalent to
2498 unscheduling the node N. */
2500 ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2503 int row = SMODULO (SCHED_TIME (n), ps->ii);
2505 if (row < 0 || row > ps->ii)
2508 for (ps_i = ps->rows[row];
2509 ps_i && ps_i->node != n;
2510 ps_i = ps_i->next_in_row);
2514 return remove_node_from_ps (ps, ps_i);
2516 #endif /* INSN_SCHEDULING*/