1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
27 #include "diagnostic-core.h"
30 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
38 #include "sched-int.h"
40 #include "cfglayout.h"
48 #include "tree-pass.h"
52 #ifdef INSN_SCHEDULING
54 /* This file contains the implementation of the Swing Modulo Scheduler,
55 described in the following references:
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57 Lifetime--sensitive modulo scheduling in a production environment.
58 IEEE Trans. on Comps., 50(3), March 2001
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63 The basic structure is:
64 1. Build a data-dependence graph (DDG) for each loop.
65 2. Use the DDG to order the insns of a loop (not in topological order
66 necessarily, but rather) trying to place each insn after all its
67 predecessors _or_ after all its successors.
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69 4. Use the ordering to perform list-scheduling of the loop:
70 1. Set II = MII. We will try to schedule the loop within II cycles.
71 2. Try to schedule the insns one by one according to the ordering.
72 For each insn compute an interval of cycles by considering already-
73 scheduled preds and succs (and associated latencies); try to place
74 the insn in the cycles of this window checking for potential
75 resource conflicts (using the DFA interface).
76 Note: this is different from the cycle-scheduling of schedule_insns;
77 here the insns are not scheduled monotonically top-down (nor bottom-
79 3. If failed in scheduling all insns - bump II++ and try again, unless
80 II reaches an upper bound MaxII, in which case report failure.
81 5. If we succeeded in scheduling the loop within II cycles, we now
82 generate prolog and epilog, decrease the counter of the loop, and
83 perform modulo variable expansion for live ranges that span more than
84 II cycles (i.e. use register copies to prevent a def from overwriting
85 itself before reaching the use).
87 SMS works with countable loops whose loop count can be easily
88 adjusted. This is because we peel a constant number of iterations
89 into a prologue and epilogue for which we want to avoid emitting
90 the control part, and a kernel which is to iterate that constant
91 number of iterations less than the original loop. So the control
92 part should be a set of insns clearly identified and having its
93 own iv, not otherwise used in the loop (at-least for now), which
94 initializes a register before the loop to the number of iterations.
95 Currently SMS relies on the do-loop pattern to recognize such loops,
96 where (1) the control part comprises of all insns defining and/or
97 using a certain 'count' register and (2) the loop count can be
98 adjusted by modifying this register prior to the loop.
99 TODO: Rely on cfgloop analysis instead. */
101 /* This page defines partial-schedule structures and functions for
102 modulo scheduling. */
104 typedef struct partial_schedule *partial_schedule_ptr;
105 typedef struct ps_insn *ps_insn_ptr;
107 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
108 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
111 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113 /* Perform signed modulo, always returning a non-negative value. */
114 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116 /* The number of different iterations the nodes in ps span, assuming
117 the stage boundaries are placed efficiently. */
118 #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
120 /* The stage count of ps. */
121 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123 /* A single instruction in the partial schedule. */
126 /* The corresponding DDG_NODE. */
129 /* The (absolute) cycle in which the PS instruction is scheduled.
130 Same as SCHED_TIME (node). */
133 /* The next/prev PS_INSN in the same row. */
134 ps_insn_ptr next_in_row,
139 /* Holds the partial schedule as an array of II rows. Each entry of the
140 array points to a linked list of PS_INSNs, which represents the
141 instructions that are scheduled for that row. */
142 struct partial_schedule
144 int ii; /* Number of rows in the partial schedule. */
145 int history; /* Threshold for conflict checking using DFA. */
147 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
150 /* rows_length[i] holds the number of instructions in the row.
151 It is used only (as an optimization) to back off quickly from
152 trying to schedule a node in a full row; that is, to avoid running
153 through futile DFA state transitions. */
156 /* The earliest absolute cycle of an insn in the partial schedule. */
159 /* The latest absolute cycle of an insn in the partial schedule. */
162 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
164 int stage_count; /* The stage count of the partial schedule. */
167 /* We use this to record all the register replacements we do in
168 the kernel so we can undo SMS if it is not profitable. */
169 struct undo_replace_buff_elem
174 struct undo_replace_buff_elem *next;
179 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
180 static void free_partial_schedule (partial_schedule_ptr);
181 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
182 void print_partial_schedule (partial_schedule_ptr, FILE *);
183 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
184 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
185 ddg_node_ptr node, int cycle,
186 sbitmap must_precede,
187 sbitmap must_follow);
188 static void rotate_partial_schedule (partial_schedule_ptr, int);
189 void set_row_column_for_ps (partial_schedule_ptr);
190 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
191 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
194 /* This page defines constants and structures for the modulo scheduling
197 static int sms_order_nodes (ddg_ptr, int, int *, int *);
198 static void set_node_sched_params (ddg_ptr);
199 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
200 static void permute_partial_schedule (partial_schedule_ptr, rtx);
201 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
203 static void duplicate_insns_of_cycles (partial_schedule_ptr,
205 static int calculate_stage_count (partial_schedule_ptr ps);
206 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
207 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
208 #define SCHED_FIRST_REG_MOVE(x) \
209 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
210 #define SCHED_NREG_MOVES(x) \
211 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
212 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
213 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
214 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
216 /* The scheduling parameters held for each node. */
217 typedef struct node_sched_params
219 int asap; /* A lower-bound on the absolute scheduling cycle. */
220 int time; /* The absolute scheduling cycle (time >= asap). */
222 /* The following field (first_reg_move) is a pointer to the first
223 register-move instruction added to handle the modulo-variable-expansion
224 of the register defined by this node. This register-move copies the
225 original register defined by the node. */
228 /* The number of register-move instructions added, immediately preceding
232 int row; /* Holds time % ii. */
233 int stage; /* Holds time / ii. */
235 /* The column of a node inside the ps. If nodes u, v are on the same row,
236 u will precede v if column (u) < column (v). */
238 } *node_sched_params_ptr;
241 /* The following three functions are copied from the current scheduler
242 code in order to use sched_analyze() for computing the dependencies.
243 They are used when initializing the sched_info structure. */
245 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
249 sprintf (tmp, "i%4d", INSN_UID (insn));
254 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
255 regset cond_exec ATTRIBUTE_UNUSED,
256 regset used ATTRIBUTE_UNUSED,
257 regset set ATTRIBUTE_UNUSED)
261 static struct common_sched_info_def sms_common_sched_info;
263 static struct sched_deps_info_def sms_sched_deps_info =
265 compute_jump_reg_dependencies,
266 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
271 static struct haifa_sched_info sms_sched_info =
280 NULL, /* insn_finishes_block_p */
285 NULL, NULL, NULL, NULL,
290 /* Given HEAD and TAIL which are the first and last insns in a loop;
291 return the register which controls the loop. Return zero if it has
292 more than one occurrence in the loop besides the control part or the
293 do-loop pattern is not of the form we expect. */
295 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
297 #ifdef HAVE_doloop_end
298 rtx reg, condition, insn, first_insn_not_to_check;
303 /* TODO: Free SMS's dependence on doloop_condition_get. */
304 condition = doloop_condition_get (tail);
308 if (REG_P (XEXP (condition, 0)))
309 reg = XEXP (condition, 0);
310 else if (GET_CODE (XEXP (condition, 0)) == PLUS
311 && REG_P (XEXP (XEXP (condition, 0), 0)))
312 reg = XEXP (XEXP (condition, 0), 0);
316 /* Check that the COUNT_REG has no other occurrences in the loop
317 until the decrement. We assume the control part consists of
318 either a single (parallel) branch-on-count or a (non-parallel)
319 branch immediately preceded by a single (decrement) insn. */
320 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
321 : prev_nondebug_insn (tail));
323 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
324 if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
328 fprintf (dump_file, "SMS count_reg found ");
329 print_rtl_single (dump_file, reg);
330 fprintf (dump_file, " outside control in insn:\n");
331 print_rtl_single (dump_file, insn);
343 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
344 that the number of iterations is a compile-time constant. If so,
345 return the rtx that sets COUNT_REG to a constant, and set COUNT to
346 this constant. Otherwise return 0. */
348 const_iteration_count (rtx count_reg, basic_block pre_header,
349 HOST_WIDEST_INT * count)
357 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
359 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
360 if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
361 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
363 rtx pat = single_set (insn);
365 if (CONST_INT_P (SET_SRC (pat)))
367 *count = INTVAL (SET_SRC (pat));
377 /* A very simple resource-based lower bound on the initiation interval.
378 ??? Improve the accuracy of this bound by considering the
379 utilization of various units. */
383 if (targetm.sched.sms_res_mii)
384 return targetm.sched.sms_res_mii (g);
386 return ((g->num_nodes - g->num_debug) / issue_rate);
390 /* Points to the array that contains the sched data for each node. */
391 static node_sched_params_ptr node_sched_params;
393 /* Allocate sched_params for each node and initialize it. Assumes that
394 the aux field of each node contain the asap bound (computed earlier),
395 and copies it into the sched_params field. */
397 set_node_sched_params (ddg_ptr g)
401 /* Allocate for each node in the DDG a place to hold the "sched_data". */
402 /* Initialize ASAP/ALAP/HIGHT to zero. */
403 node_sched_params = (node_sched_params_ptr)
404 xcalloc (g->num_nodes,
405 sizeof (struct node_sched_params));
407 /* Set the pointer of the general data of the node to point to the
408 appropriate sched_params structure. */
409 for (i = 0; i < g->num_nodes; i++)
411 /* Watch out for aliasing problems? */
412 node_sched_params[i].asap = g->nodes[i].aux.count;
413 g->nodes[i].aux.info = &node_sched_params[i];
418 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
424 for (i = 0; i < num_nodes; i++)
426 node_sched_params_ptr nsp = &node_sched_params[i];
427 rtx reg_move = nsp->first_reg_move;
430 fprintf (file, "Node = %d; INSN = %d\n", i,
431 (INSN_UID (g->nodes[i].insn)));
432 fprintf (file, " asap = %d:\n", nsp->asap);
433 fprintf (file, " time = %d:\n", nsp->time);
434 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
435 for (j = 0; j < nsp->nreg_moves; j++)
437 fprintf (file, " reg_move = ");
438 print_rtl_single (file, reg_move);
439 reg_move = PREV_INSN (reg_move);
445 Breaking intra-loop register anti-dependences:
446 Each intra-loop register anti-dependence implies a cross-iteration true
447 dependence of distance 1. Therefore, we can remove such false dependencies
448 and figure out if the partial schedule broke them by checking if (for a
449 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
450 if so generate a register move. The number of such moves is equal to:
451 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
452 nreg_moves = ----------------------------------- + 1 - { dependence.
455 static struct undo_replace_buff_elem *
456 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
461 struct undo_replace_buff_elem *reg_move_replaces = NULL;
463 for (i = 0; i < g->num_nodes; i++)
465 ddg_node_ptr u = &g->nodes[i];
467 int nreg_moves = 0, i_reg_move;
468 sbitmap *uses_of_defs;
470 rtx prev_reg, old_reg;
472 /* Compute the number of reg_moves needed for u, by looking at life
473 ranges started at u (excluding self-loops). */
474 for (e = u->out; e; e = e->next_out)
475 if (e->type == TRUE_DEP && e->dest != e->src)
477 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
479 if (e->distance == 1)
480 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
482 /* If dest precedes src in the schedule of the kernel, then dest
483 will read before src writes and we can save one reg_copy. */
484 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
485 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
488 nreg_moves = MAX (nreg_moves, nreg_moves4e);
494 /* Every use of the register defined by node may require a different
495 copy of this register, depending on the time the use is scheduled.
496 Set a bitmap vector, telling which nodes use each copy of this
498 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
499 sbitmap_vector_zero (uses_of_defs, nreg_moves);
500 for (e = u->out; e; e = e->next_out)
501 if (e->type == TRUE_DEP && e->dest != e->src)
503 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
505 if (e->distance == 1)
506 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
508 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
509 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
513 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
516 /* Now generate the reg_moves, attaching relevant uses to them. */
517 SCHED_NREG_MOVES (u) = nreg_moves;
518 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
519 /* Insert the reg-moves right before the notes which precede
520 the insn they relates to. */
521 last_reg_move = u->first_note;
523 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
525 unsigned int i_use = 0;
526 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
527 rtx reg_move = gen_move_insn (new_reg, prev_reg);
528 sbitmap_iterator sbi;
530 add_insn_before (reg_move, last_reg_move, NULL);
531 last_reg_move = reg_move;
533 if (!SCHED_FIRST_REG_MOVE (u))
534 SCHED_FIRST_REG_MOVE (u) = reg_move;
536 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
538 struct undo_replace_buff_elem *rep;
540 rep = (struct undo_replace_buff_elem *)
541 xcalloc (1, sizeof (struct undo_replace_buff_elem));
542 rep->insn = g->nodes[i_use].insn;
543 rep->orig_reg = old_reg;
544 rep->new_reg = new_reg;
546 if (! reg_move_replaces)
547 reg_move_replaces = rep;
550 rep->next = reg_move_replaces;
551 reg_move_replaces = rep;
554 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
556 df_insn_rescan (g->nodes[i_use].insn);
561 sbitmap_vector_free (uses_of_defs);
563 return reg_move_replaces;
566 /* Free memory allocated for the undo buffer. */
568 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
571 while (reg_move_replaces)
573 struct undo_replace_buff_elem *rep = reg_move_replaces;
575 reg_move_replaces = reg_move_replaces->next;
580 /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
581 SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
582 will move to cycle zero. */
584 reset_sched_times (partial_schedule_ptr ps, int amount)
588 ps_insn_ptr crr_insn;
590 for (row = 0; row < ii; row++)
591 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
593 ddg_node_ptr u = crr_insn->node;
594 int normalized_time = SCHED_TIME (u) - amount;
595 int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
596 int sc_until_cycle_zero, stage;
600 /* Print the scheduling times after the rotation. */
601 fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
602 "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
603 INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
605 if (JUMP_P (crr_insn->node->insn))
606 fprintf (dump_file, " (branch)");
607 fprintf (dump_file, "\n");
610 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
611 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
612 SCHED_TIME (u) = normalized_time;
613 SCHED_ROW (u) = SMODULO (normalized_time, ii);
615 /* The calculation of stage count is done adding the number
616 of stages before cycle zero and after cycle zero. */
617 sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
619 if (SCHED_TIME (u) < 0)
621 stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
622 SCHED_STAGE (u) = sc_until_cycle_zero - stage;
626 stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
627 SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
632 /* Set SCHED_COLUMN of each node according to its position in PS. */
634 set_columns_for_ps (partial_schedule_ptr ps)
638 for (row = 0; row < ps->ii; row++)
640 ps_insn_ptr cur_insn = ps->rows[row];
643 for (; cur_insn; cur_insn = cur_insn->next_in_row)
644 SCHED_COLUMN (cur_insn->node) = column++;
648 /* Permute the insns according to their order in PS, from row 0 to
649 row ii-1, and position them right before LAST. This schedules
650 the insns of the loop kernel. */
652 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
658 for (row = 0; row < ii ; row++)
659 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
660 if (PREV_INSN (last) != ps_ij->node->insn)
661 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
666 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
667 int to_stage, int for_prolog, rtx count_reg)
672 for (row = 0; row < ps->ii; row++)
673 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
675 ddg_node_ptr u_node = ps_ij->node;
677 rtx reg_move = NULL_RTX;
679 /* Do not duplicate any insn which refers to count_reg as it
680 belongs to the control part.
681 The closing branch is scheduled as well and thus should
683 TODO: This should be done by analyzing the control part of
685 if (reg_mentioned_p (count_reg, u_node->insn)
686 || JUMP_P (ps_ij->node->insn))
691 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
692 number of reg_moves starting with the second occurrence of
693 u_node, which is generated if its SCHED_STAGE <= to_stage. */
694 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
695 i_reg_moves = MAX (i_reg_moves, 0);
696 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
698 /* The reg_moves start from the *first* reg_move backwards. */
701 reg_move = SCHED_FIRST_REG_MOVE (u_node);
702 for (j = 1; j < i_reg_moves; j++)
703 reg_move = PREV_INSN (reg_move);
706 else /* It's for the epilog. */
708 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
709 starting to decrease one stage after u_node no longer occurs;
710 that is, generate all reg_moves until
711 SCHED_STAGE (u_node) == from_stage - 1. */
712 i_reg_moves = SCHED_NREG_MOVES (u_node)
713 - (from_stage - SCHED_STAGE (u_node) - 1);
714 i_reg_moves = MAX (i_reg_moves, 0);
715 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
717 /* The reg_moves start from the *last* reg_move forwards. */
720 reg_move = SCHED_FIRST_REG_MOVE (u_node);
721 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
722 reg_move = PREV_INSN (reg_move);
726 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
727 emit_insn (copy_rtx (PATTERN (reg_move)));
728 if (SCHED_STAGE (u_node) >= from_stage
729 && SCHED_STAGE (u_node) <= to_stage)
730 duplicate_insn_chain (u_node->first_note, u_node->insn);
735 /* Generate the instructions (including reg_moves) for prolog & epilog. */
737 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
738 rtx count_reg, rtx count_init)
741 int last_stage = PS_STAGE_COUNT (ps) - 1;
744 /* Generate the prolog, inserting its insns on the loop-entry edge. */
749 /* Generate instructions at the beginning of the prolog to
750 adjust the loop count by STAGE_COUNT. If loop count is constant
751 (count_init), this constant is adjusted by STAGE_COUNT in
752 generate_prolog_epilog function. */
753 rtx sub_reg = NULL_RTX;
755 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
756 count_reg, GEN_INT (last_stage),
757 count_reg, 1, OPTAB_DIRECT);
758 gcc_assert (REG_P (sub_reg));
759 if (REGNO (sub_reg) != REGNO (count_reg))
760 emit_move_insn (count_reg, sub_reg);
763 for (i = 0; i < last_stage; i++)
764 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
766 /* Put the prolog on the entry edge. */
767 e = loop_preheader_edge (loop);
768 split_edge_and_insert (e, get_insns ());
772 /* Generate the epilog, inserting its insns on the loop-exit edge. */
775 for (i = 0; i < last_stage; i++)
776 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
778 /* Put the epilogue on the exit edge. */
779 gcc_assert (single_exit (loop));
780 e = single_exit (loop);
781 split_edge_and_insert (e, get_insns ());
785 /* Return true if all the BBs of the loop are empty except the
788 loop_single_full_bb_p (struct loop *loop)
791 basic_block *bbs = get_loop_body (loop);
793 for (i = 0; i < loop->num_nodes ; i++)
796 bool empty_bb = true;
798 if (bbs[i] == loop->header)
801 /* Make sure that basic blocks other than the header
802 have only notes labels or jumps. */
803 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
804 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
806 if (NOTE_P (head) || LABEL_P (head)
807 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
823 /* A simple loop from SMS point of view; it is a loop that is composed of
824 either a single basic block or two BBs - a header and a latch. */
825 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
826 && (EDGE_COUNT (loop->latch->preds) == 1) \
827 && (EDGE_COUNT (loop->latch->succs) == 1))
829 /* Return true if the loop is in its canonical form and false if not.
830 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
832 loop_canon_p (struct loop *loop)
835 if (loop->inner || !loop_outer (loop))
838 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
842 if (!single_exit (loop))
846 rtx insn = BB_END (loop->header);
848 fprintf (dump_file, "SMS loop many exits ");
849 fprintf (dump_file, " %s %d (file, line)\n",
850 insn_file (insn), insn_line (insn));
855 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
859 rtx insn = BB_END (loop->header);
861 fprintf (dump_file, "SMS loop many BBs. ");
862 fprintf (dump_file, " %s %d (file, line)\n",
863 insn_file (insn), insn_line (insn));
871 /* If there are more than one entry for the loop,
872 make it one by splitting the first entry edge and
873 redirecting the others to the new BB. */
875 canon_loop (struct loop *loop)
880 /* Avoid annoying special cases of edges going to exit
882 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
883 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
886 if (loop->latch == loop->header
887 || EDGE_COUNT (loop->latch->succs) > 1)
889 FOR_EACH_EDGE (e, i, loop->header->preds)
890 if (e->src == loop->latch)
898 setup_sched_infos (void)
900 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
901 sizeof (sms_common_sched_info));
902 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
903 common_sched_info = &sms_common_sched_info;
905 sched_deps_info = &sms_sched_deps_info;
906 current_sched_info = &sms_sched_info;
909 /* Probability in % that the sms-ed loop rolls enough so that optimized
910 version may be entered. Just a guess. */
911 #define PROB_SMS_ENOUGH_ITERATIONS 80
913 /* Used to calculate the upper bound of ii. */
914 #define MAXII_FACTOR 2
916 /* Main entry point, perform SMS scheduling on the loops of the function
917 that consist of single basic blocks. */
926 partial_schedule_ptr ps;
927 basic_block bb = NULL;
929 basic_block condition_bb = NULL;
931 gcov_type trip_count = 0;
933 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
934 | LOOPS_HAVE_RECORDED_EXITS);
935 if (number_of_loops () <= 1)
937 loop_optimizer_finalize ();
938 return; /* There are no loops to schedule. */
941 /* Initialize issue_rate. */
942 if (targetm.sched.issue_rate)
944 int temp = reload_completed;
946 reload_completed = 1;
947 issue_rate = targetm.sched.issue_rate ();
948 reload_completed = temp;
953 /* Initialize the scheduler. */
954 setup_sched_infos ();
957 /* Allocate memory to hold the DDG array one entry for each loop.
958 We use loop->num as index into this array. */
959 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
963 fprintf (dump_file, "\n\nSMS analysis phase\n");
964 fprintf (dump_file, "===================\n\n");
967 /* Build DDGs for all the relevant loops and hold them in G_ARR
968 indexed by the loop index. */
969 FOR_EACH_LOOP (li, loop, 0)
975 if (dbg_cnt (sms_sched_loop) == false)
978 fprintf (dump_file, "SMS reached max limit... \n");
985 rtx insn = BB_END (loop->header);
987 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
988 loop->num, insn_file (insn), insn_line (insn));
992 if (! loop_canon_p (loop))
995 if (! loop_single_full_bb_p (loop))
998 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1004 get_ebb_head_tail (bb, bb, &head, &tail);
1005 latch_edge = loop_latch_edge (loop);
1006 gcc_assert (single_exit (loop));
1007 if (single_exit (loop)->count)
1008 trip_count = latch_edge->count / single_exit (loop)->count;
1010 /* Perform SMS only on loops that their average count is above threshold. */
1012 if ( latch_edge->count
1013 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1017 fprintf (dump_file, " %s %d (file, line)\n",
1018 insn_file (tail), insn_line (tail));
1019 fprintf (dump_file, "SMS single-bb-loop\n");
1020 if (profile_info && flag_branch_probabilities)
1022 fprintf (dump_file, "SMS loop-count ");
1023 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1024 (HOST_WIDEST_INT) bb->count);
1025 fprintf (dump_file, "\n");
1026 fprintf (dump_file, "SMS trip-count ");
1027 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1028 (HOST_WIDEST_INT) trip_count);
1029 fprintf (dump_file, "\n");
1030 fprintf (dump_file, "SMS profile-sum-max ");
1031 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1032 (HOST_WIDEST_INT) profile_info->sum_max);
1033 fprintf (dump_file, "\n");
1039 /* Make sure this is a doloop. */
1040 if ( !(count_reg = doloop_register_get (head, tail)))
1043 fprintf (dump_file, "SMS doloop_register_get failed\n");
1047 /* Don't handle BBs with calls or barriers or auto-increment insns
1048 (to avoid creating invalid reg-moves for the auto-increment insns),
1049 or !single_set with the exception of instructions that include
1050 count_reg---these instructions are part of the control part
1051 that do-loop recognizes.
1052 ??? Should handle auto-increment insns.
1053 ??? Should handle insns defining subregs. */
1054 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1060 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1061 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1062 && !reg_mentioned_p (count_reg, insn))
1063 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1064 || (INSN_P (insn) && (set = single_set (insn))
1065 && GET_CODE (SET_DEST (set)) == SUBREG))
1069 if (insn != NEXT_INSN (tail))
1074 fprintf (dump_file, "SMS loop-with-call\n");
1075 else if (BARRIER_P (insn))
1076 fprintf (dump_file, "SMS loop-with-barrier\n");
1077 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1078 fprintf (dump_file, "SMS reg inc\n");
1079 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1080 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1081 fprintf (dump_file, "SMS loop-with-not-single-set\n");
1083 fprintf (dump_file, "SMS loop with subreg in lhs\n");
1084 print_rtl_single (dump_file, insn);
1090 /* Always schedule the closing branch with the rest of the
1091 instructions. The branch is rotated to be in row ii-1 at the
1092 end of the scheduling procedure to make sure it's the last
1093 instruction in the iteration. */
1094 if (! (g = create_ddg (bb, 1)))
1097 fprintf (dump_file, "SMS create_ddg failed\n");
1101 g_arr[loop->num] = g;
1103 fprintf (dump_file, "...OK\n");
1108 fprintf (dump_file, "\nSMS transformation phase\n");
1109 fprintf (dump_file, "=========================\n\n");
1112 /* We don't want to perform SMS on new loops - created by versioning. */
1113 FOR_EACH_LOOP (li, loop, 0)
1116 rtx count_reg, count_init;
1118 unsigned stage_count = 0;
1119 HOST_WIDEST_INT loop_count = 0;
1121 if (! (g = g_arr[loop->num]))
1126 rtx insn = BB_END (loop->header);
1128 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1129 loop->num, insn_file (insn), insn_line (insn));
1131 print_ddg (dump_file, g);
1134 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1136 latch_edge = loop_latch_edge (loop);
1137 gcc_assert (single_exit (loop));
1138 if (single_exit (loop)->count)
1139 trip_count = latch_edge->count / single_exit (loop)->count;
1143 fprintf (dump_file, " %s %d (file, line)\n",
1144 insn_file (tail), insn_line (tail));
1145 fprintf (dump_file, "SMS single-bb-loop\n");
1146 if (profile_info && flag_branch_probabilities)
1148 fprintf (dump_file, "SMS loop-count ");
1149 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1150 (HOST_WIDEST_INT) bb->count);
1151 fprintf (dump_file, "\n");
1152 fprintf (dump_file, "SMS profile-sum-max ");
1153 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1154 (HOST_WIDEST_INT) profile_info->sum_max);
1155 fprintf (dump_file, "\n");
1157 fprintf (dump_file, "SMS doloop\n");
1158 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1159 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1160 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1164 /* In case of th loop have doloop register it gets special
1166 count_init = NULL_RTX;
1167 if ((count_reg = doloop_register_get (head, tail)))
1169 basic_block pre_header;
1171 pre_header = loop_preheader_edge (loop)->src;
1172 count_init = const_iteration_count (count_reg, pre_header,
1175 gcc_assert (count_reg);
1177 if (dump_file && count_init)
1179 fprintf (dump_file, "SMS const-doloop ");
1180 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1182 fprintf (dump_file, "\n");
1185 node_order = XNEWVEC (int, g->num_nodes);
1187 mii = 1; /* Need to pass some estimate of mii. */
1188 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1189 mii = MAX (res_MII (g), rec_mii);
1190 maxii = MAX (max_asap, MAXII_FACTOR * mii);
1193 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1194 rec_mii, mii, maxii);
1196 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1198 set_node_sched_params (g);
1200 ps = sms_schedule_by_order (g, mii, maxii, node_order);
1204 stage_count = calculate_stage_count (ps);
1205 gcc_assert(stage_count >= 1);
1206 PS_STAGE_COUNT(ps) = stage_count;
1209 /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1210 1 means that there is no interleaving between iterations thus
1211 we let the scheduling passes do the job in this case. */
1212 if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
1213 || (count_init && (loop_count <= stage_count))
1214 || (flag_branch_probabilities && (trip_count <= stage_count)))
1218 fprintf (dump_file, "SMS failed... \n");
1219 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1220 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1221 fprintf (dump_file, ", trip-count=");
1222 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1223 fprintf (dump_file, ")\n");
1228 struct undo_replace_buff_elem *reg_move_replaces;
1229 int amount = SCHED_TIME (g->closing_branch) + 1;
1231 /* Set the stage boundaries. The closing_branch was scheduled
1232 and should appear in the last (ii-1) row. */
1233 reset_sched_times (ps, amount);
1234 rotate_partial_schedule (ps, amount);
1235 set_columns_for_ps (ps);
1242 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1244 print_partial_schedule (ps, dump_file);
1247 /* case the BCT count is not known , Do loop-versioning */
1248 if (count_reg && ! count_init)
1250 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1251 GEN_INT(stage_count));
1252 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1253 * REG_BR_PROB_BASE) / 100;
1255 loop_version (loop, comp_rtx, &condition_bb,
1256 prob, prob, REG_BR_PROB_BASE - prob,
1260 /* Set new iteration count of loop kernel. */
1261 if (count_reg && count_init)
1262 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1265 /* Now apply the scheduled kernel to the RTL of the loop. */
1266 permute_partial_schedule (ps, g->closing_branch->first_note);
1268 /* Mark this loop as software pipelined so the later
1269 scheduling passes doesn't touch it. */
1270 if (! flag_resched_modulo_sched)
1271 g->bb->flags |= BB_DISABLE_SCHEDULE;
1272 /* The life-info is not valid any more. */
1273 df_set_bb_dirty (g->bb);
1275 reg_move_replaces = generate_reg_moves (ps, true);
1277 print_node_sched_params (dump_file, g->num_nodes, g);
1278 /* Generate prolog and epilog. */
1279 generate_prolog_epilog (ps, loop, count_reg, count_init);
1281 free_undo_replace_buff (reg_move_replaces);
1284 free_partial_schedule (ps);
1285 free (node_sched_params);
1292 /* Release scheduler data, needed until now because of DFA. */
1293 haifa_sched_finish ();
1294 loop_optimizer_finalize ();
1297 /* The SMS scheduling algorithm itself
1298 -----------------------------------
1299 Input: 'O' an ordered list of insns of a loop.
1300 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1302 'Q' is the empty Set
1303 'PS' is the partial schedule; it holds the currently scheduled nodes with
1305 'PSP' previously scheduled predecessors.
1306 'PSS' previously scheduled successors.
1307 't(u)' the cycle where u is scheduled.
1308 'l(u)' is the latency of u.
1309 'd(v,u)' is the dependence distance from v to u.
1310 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1311 the node ordering phase.
1312 'check_hardware_resources_conflicts(u, PS, c)'
1313 run a trace around cycle/slot through DFA model
1314 to check resource conflicts involving instruction u
1315 at cycle c given the partial schedule PS.
1316 'add_to_partial_schedule_at_time(u, PS, c)'
1317 Add the node/instruction u to the partial schedule
1319 'calculate_register_pressure(PS)'
1320 Given a schedule of instructions, calculate the register
1321 pressure it implies. One implementation could be the
1322 maximum number of overlapping live ranges.
1323 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1324 registers available in the hardware.
1328 3. for each node u in O in pre-computed order
1329 4. if (PSP(u) != Q && PSS(u) == Q) then
1330 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1331 6. start = Early_start; end = Early_start + II - 1; step = 1
1332 11. else if (PSP(u) == Q && PSS(u) != Q) then
1333 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1334 13. start = Late_start; end = Late_start - II + 1; step = -1
1335 14. else if (PSP(u) != Q && PSS(u) != Q) then
1336 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1337 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1338 17. start = Early_start;
1339 18. end = min(Early_start + II - 1 , Late_start);
1341 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1342 21. start = ASAP(u); end = start + II - 1; step = 1
1346 24. for (c = start ; c != end ; c += step)
1347 25. if check_hardware_resources_conflicts(u, PS, c) then
1348 26. add_to_partial_schedule_at_time(u, PS, c)
1353 31. if (success == false) then
1355 33. if (II > maxII) then
1356 34. finish - failed to schedule
1361 39. if (calculate_register_pressure(PS) > maxRP) then
1364 42. compute epilogue & prologue
1365 43. finish - succeeded to schedule
1368 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1369 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1370 set to 0 to save compile time. */
1371 #define DFA_HISTORY SMS_DFA_HISTORY
1373 /* A threshold for the number of repeated unsuccessful attempts to insert
1374 an empty row, before we flush the partial schedule and start over. */
1375 #define MAX_SPLIT_NUM 10
1376 /* Given the partial schedule PS, this function calculates and returns the
1377 cycles in which we can schedule the node with the given index I.
1378 NOTE: Here we do the backtracking in SMS, in some special cases. We have
1379 noticed that there are several cases in which we fail to SMS the loop
1380 because the sched window of a node is empty due to tight data-deps. In
1381 such cases we want to unschedule some of the predecessors/successors
1382 until we get non-empty scheduling window. It returns -1 if the
1383 scheduling window is empty and zero otherwise. */
1386 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1387 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1389 int start, step, end;
1391 int u = nodes_order [i];
1392 ddg_node_ptr u_node = &ps->g->nodes[u];
1393 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1394 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1395 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1396 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1400 /* 1. compute sched window for u (start, end, step). */
1403 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1404 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1406 if (psp_not_empty && !pss_not_empty)
1408 int early_start = INT_MIN;
1411 for (e = u_node->in; e != 0; e = e->next_in)
1413 ddg_node_ptr v_node = e->src;
1417 fprintf (dump_file, "\nProcessing edge: ");
1418 print_ddg_edge (dump_file, e);
1420 "\nScheduling %d (%d) in psp_not_empty,"
1421 " checking p %d (%d): ", u_node->cuid,
1422 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1426 if (TEST_BIT (sched_nodes, v_node->cuid))
1428 int p_st = SCHED_TIME (v_node);
1431 MAX (early_start, p_st + e->latency - (e->distance * ii));
1435 "pred st = %d; early_start = %d; latency: %d",
1436 p_st, early_start, e->latency);
1438 if (e->data_type == MEM_DEP)
1439 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1442 fprintf (dump_file, "the node is not scheduled\n");
1444 start = early_start;
1445 end = MIN (end, early_start + ii);
1446 /* Schedule the node close to it's predecessors. */
1451 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1452 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1455 else if (!psp_not_empty && pss_not_empty)
1457 int late_start = INT_MAX;
1460 for (e = u_node->out; e != 0; e = e->next_out)
1462 ddg_node_ptr v_node = e->dest;
1466 fprintf (dump_file, "\nProcessing edge:");
1467 print_ddg_edge (dump_file, e);
1469 "\nScheduling %d (%d) in pss_not_empty,"
1470 " checking s %d (%d): ", u_node->cuid,
1471 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1475 if (TEST_BIT (sched_nodes, v_node->cuid))
1477 int s_st = SCHED_TIME (v_node);
1479 late_start = MIN (late_start,
1480 s_st - e->latency + (e->distance * ii));
1484 "succ st = %d; late_start = %d; latency = %d",
1485 s_st, late_start, e->latency);
1487 if (e->data_type == MEM_DEP)
1488 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1490 fprintf (dump_file, "end = %d\n", end);
1494 fprintf (dump_file, "the node is not scheduled\n");
1498 end = MAX (end, late_start - ii);
1499 /* Schedule the node close to it's successors. */
1504 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1505 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1509 else if (psp_not_empty && pss_not_empty)
1511 int early_start = INT_MIN;
1512 int late_start = INT_MAX;
1513 int count_preds = 0;
1514 int count_succs = 0;
1518 for (e = u_node->in; e != 0; e = e->next_in)
1520 ddg_node_ptr v_node = e->src;
1524 fprintf (dump_file, "\nProcessing edge:");
1525 print_ddg_edge (dump_file, e);
1527 "\nScheduling %d (%d) in psp_pss_not_empty,"
1528 " checking p %d (%d): ", u_node->cuid, INSN_UID
1529 (u_node->insn), v_node->cuid, INSN_UID
1533 if (TEST_BIT (sched_nodes, v_node->cuid))
1535 int p_st = SCHED_TIME (v_node);
1537 early_start = MAX (early_start,
1539 - (e->distance * ii));
1543 "pred st = %d; early_start = %d; latency = %d",
1544 p_st, early_start, e->latency);
1546 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1549 if (e->data_type == MEM_DEP)
1550 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1553 fprintf (dump_file, "the node is not scheduled\n");
1556 for (e = u_node->out; e != 0; e = e->next_out)
1558 ddg_node_ptr v_node = e->dest;
1562 fprintf (dump_file, "\nProcessing edge:");
1563 print_ddg_edge (dump_file, e);
1565 "\nScheduling %d (%d) in psp_pss_not_empty,"
1566 " checking s %d (%d): ", u_node->cuid, INSN_UID
1567 (u_node->insn), v_node->cuid, INSN_UID
1571 if (TEST_BIT (sched_nodes, v_node->cuid))
1573 int s_st = SCHED_TIME (v_node);
1575 late_start = MIN (late_start,
1577 + (e->distance * ii));
1581 "succ st = %d; late_start = %d; latency = %d",
1582 s_st, late_start, e->latency);
1584 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1587 if (e->data_type == MEM_DEP)
1588 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1591 fprintf (dump_file, "the node is not scheduled\n");
1594 start = MAX (start, early_start);
1595 end = MIN (end, MIN (early_start + ii, late_start + 1));
1597 /* If there are more successors than predecessors schedule the
1598 node close to it's successors. */
1599 if (count_succs >= count_preds)
1601 int old_start = start;
1604 end = old_start - 1;
1608 else /* psp is empty && pss is empty. */
1610 start = SCHED_ASAP (u_node);
1621 if ((start >= end && step == 1) || (start <= end && step == -1))
1624 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1632 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1633 node currently been scheduled. At the end of the calculation
1634 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1635 U_NODE which are (1) already scheduled in the first/last row of
1636 U_NODE's scheduling window, (2) whose dependence inequality with U
1637 becomes an equality when U is scheduled in this same row, and (3)
1638 whose dependence latency is zero.
1640 The first and last rows are calculated using the following parameters:
1641 START/END rows - The cycles that begins/ends the traversal on the window;
1642 searching for an empty cycle to schedule U_NODE.
1643 STEP - The direction in which we traverse the window.
1644 II - The initiation interval. */
1647 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1648 int step, int ii, sbitmap sched_nodes,
1649 sbitmap must_precede, sbitmap must_follow)
1652 int first_cycle_in_window, last_cycle_in_window;
1654 gcc_assert (must_precede && must_follow);
1656 /* Consider the following scheduling window:
1657 {first_cycle_in_window, first_cycle_in_window+1, ...,
1658 last_cycle_in_window}. If step is 1 then the following will be
1659 the order we traverse the window: {start=first_cycle_in_window,
1660 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1661 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1662 end=first_cycle_in_window-1} if step is -1. */
1663 first_cycle_in_window = (step == 1) ? start : end - step;
1664 last_cycle_in_window = (step == 1) ? end - step : start;
1666 sbitmap_zero (must_precede);
1667 sbitmap_zero (must_follow);
1670 fprintf (dump_file, "\nmust_precede: ");
1672 /* Instead of checking if:
1673 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1674 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1675 first_cycle_in_window)
1677 we use the fact that latency is non-negative:
1678 SCHED_TIME (e->src) - (e->distance * ii) <=
1679 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1680 first_cycle_in_window
1682 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
1683 for (e = u_node->in; e != 0; e = e->next_in)
1684 if (TEST_BIT (sched_nodes, e->src->cuid)
1685 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1686 first_cycle_in_window))
1689 fprintf (dump_file, "%d ", e->src->cuid);
1691 SET_BIT (must_precede, e->src->cuid);
1695 fprintf (dump_file, "\nmust_follow: ");
1697 /* Instead of checking if:
1698 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1699 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1700 last_cycle_in_window)
1702 we use the fact that latency is non-negative:
1703 SCHED_TIME (e->dest) + (e->distance * ii) >=
1704 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1705 last_cycle_in_window
1707 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
1708 for (e = u_node->out; e != 0; e = e->next_out)
1709 if (TEST_BIT (sched_nodes, e->dest->cuid)
1710 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1711 last_cycle_in_window))
1714 fprintf (dump_file, "%d ", e->dest->cuid);
1716 SET_BIT (must_follow, e->dest->cuid);
1720 fprintf (dump_file, "\n");
1723 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
1724 parameters to decide if that's possible:
1725 PS - The partial schedule.
1726 U - The serial number of U_NODE.
1727 NUM_SPLITS - The number of row splits made so far.
1728 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1729 the first row of the scheduling window)
1730 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1731 last row of the scheduling window) */
1734 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1735 int u, int cycle, sbitmap sched_nodes,
1736 int *num_splits, sbitmap must_precede,
1737 sbitmap must_follow)
1742 verify_partial_schedule (ps, sched_nodes);
1743 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1744 must_precede, must_follow);
1747 SCHED_TIME (u_node) = cycle;
1748 SET_BIT (sched_nodes, u);
1752 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1759 /* This function implements the scheduling algorithm for SMS according to the
1761 static partial_schedule_ptr
1762 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1765 int i, c, success, num_splits = 0;
1766 int flush_and_start_over = true;
1767 int num_nodes = g->num_nodes;
1768 int start, end, step; /* Place together into one struct? */
1769 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1770 sbitmap must_precede = sbitmap_alloc (num_nodes);
1771 sbitmap must_follow = sbitmap_alloc (num_nodes);
1772 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1774 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1776 sbitmap_ones (tobe_scheduled);
1777 sbitmap_zero (sched_nodes);
1779 while (flush_and_start_over && (ii < maxii))
1783 fprintf (dump_file, "Starting with ii=%d\n", ii);
1784 flush_and_start_over = false;
1785 sbitmap_zero (sched_nodes);
1787 for (i = 0; i < num_nodes; i++)
1789 int u = nodes_order[i];
1790 ddg_node_ptr u_node = &ps->g->nodes[u];
1791 rtx insn = u_node->insn;
1793 if (!NONDEBUG_INSN_P (insn))
1795 RESET_BIT (tobe_scheduled, u);
1799 if (TEST_BIT (sched_nodes, u))
1802 /* Try to get non-empty scheduling window. */
1804 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1808 fprintf (dump_file, "\nTrying to schedule node %d \
1809 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
1810 (g->nodes[u].insn)), start, end, step);
1812 gcc_assert ((step > 0 && start < end)
1813 || (step < 0 && start > end));
1815 calculate_must_precede_follow (u_node, start, end, step, ii,
1816 sched_nodes, must_precede,
1819 for (c = start; c != end; c += step)
1821 sbitmap tmp_precede = NULL;
1822 sbitmap tmp_follow = NULL;
1827 tmp_precede = must_precede;
1828 else /* step == -1. */
1829 tmp_follow = must_follow;
1831 if (c == end - step)
1834 tmp_follow = must_follow;
1835 else /* step == -1. */
1836 tmp_precede = must_precede;
1840 try_scheduling_node_in_cycle (ps, u_node, u, c,
1842 &num_splits, tmp_precede,
1848 verify_partial_schedule (ps, sched_nodes);
1857 if (num_splits >= MAX_SPLIT_NUM)
1860 flush_and_start_over = true;
1861 verify_partial_schedule (ps, sched_nodes);
1862 reset_partial_schedule (ps, ii);
1863 verify_partial_schedule (ps, sched_nodes);
1868 /* The scheduling window is exclusive of 'end'
1869 whereas compute_split_window() expects an inclusive,
1872 split_row = compute_split_row (sched_nodes, start, end - 1,
1875 split_row = compute_split_row (sched_nodes, end + 1, start,
1878 ps_insert_empty_row (ps, split_row, sched_nodes);
1879 i--; /* Go back and retry node i. */
1882 fprintf (dump_file, "num_splits=%d\n", num_splits);
1885 /* ??? If (success), check register pressure estimates. */
1886 } /* Continue with next node. */
1887 } /* While flush_and_start_over. */
1890 free_partial_schedule (ps);
1894 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1896 sbitmap_free (sched_nodes);
1897 sbitmap_free (must_precede);
1898 sbitmap_free (must_follow);
1899 sbitmap_free (tobe_scheduled);
1904 /* This function inserts a new empty row into PS at the position
1905 according to SPLITROW, keeping all already scheduled instructions
1906 intact and updating their SCHED_TIME and cycle accordingly. */
1908 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1909 sbitmap sched_nodes)
1911 ps_insn_ptr crr_insn;
1912 ps_insn_ptr *rows_new;
1914 int new_ii = ii + 1;
1916 int *rows_length_new;
1918 verify_partial_schedule (ps, sched_nodes);
1920 /* We normalize sched_time and rotate ps to have only non-negative sched
1921 times, for simplicity of updating cycles after inserting new row. */
1922 split_row -= ps->min_cycle;
1923 split_row = SMODULO (split_row, ii);
1925 fprintf (dump_file, "split_row=%d\n", split_row);
1927 reset_sched_times (ps, PS_MIN_CYCLE (ps));
1928 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1930 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1931 rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
1932 for (row = 0; row < split_row; row++)
1934 rows_new[row] = ps->rows[row];
1935 rows_length_new[row] = ps->rows_length[row];
1936 ps->rows[row] = NULL;
1937 for (crr_insn = rows_new[row];
1938 crr_insn; crr_insn = crr_insn->next_in_row)
1940 ddg_node_ptr u = crr_insn->node;
1941 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1943 SCHED_TIME (u) = new_time;
1944 crr_insn->cycle = new_time;
1945 SCHED_ROW (u) = new_time % new_ii;
1946 SCHED_STAGE (u) = new_time / new_ii;
1951 rows_new[split_row] = NULL;
1953 for (row = split_row; row < ii; row++)
1955 rows_new[row + 1] = ps->rows[row];
1956 rows_length_new[row + 1] = ps->rows_length[row];
1957 ps->rows[row] = NULL;
1958 for (crr_insn = rows_new[row + 1];
1959 crr_insn; crr_insn = crr_insn->next_in_row)
1961 ddg_node_ptr u = crr_insn->node;
1962 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1964 SCHED_TIME (u) = new_time;
1965 crr_insn->cycle = new_time;
1966 SCHED_ROW (u) = new_time % new_ii;
1967 SCHED_STAGE (u) = new_time / new_ii;
1972 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1973 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1974 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1975 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1977 ps->rows = rows_new;
1978 free (ps->rows_length);
1979 ps->rows_length = rows_length_new;
1981 gcc_assert (ps->min_cycle >= 0);
1983 verify_partial_schedule (ps, sched_nodes);
1986 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1990 /* Given U_NODE which is the node that failed to be scheduled; LOW and
1991 UP which are the boundaries of it's scheduling window; compute using
1992 SCHED_NODES and II a row in the partial schedule that can be split
1993 which will separate a critical predecessor from a critical successor
1994 thereby expanding the window, and return it. */
1996 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1997 ddg_node_ptr u_node)
2000 int lower = INT_MIN, upper = INT_MAX;
2001 ddg_node_ptr crit_pred = NULL;
2002 ddg_node_ptr crit_succ = NULL;
2005 for (e = u_node->in; e != 0; e = e->next_in)
2007 ddg_node_ptr v_node = e->src;
2009 if (TEST_BIT (sched_nodes, v_node->cuid)
2010 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
2011 if (SCHED_TIME (v_node) > lower)
2014 lower = SCHED_TIME (v_node);
2018 if (crit_pred != NULL)
2020 crit_cycle = SCHED_TIME (crit_pred) + 1;
2021 return SMODULO (crit_cycle, ii);
2024 for (e = u_node->out; e != 0; e = e->next_out)
2026 ddg_node_ptr v_node = e->dest;
2027 if (TEST_BIT (sched_nodes, v_node->cuid)
2028 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
2029 if (SCHED_TIME (v_node) < upper)
2032 upper = SCHED_TIME (v_node);
2036 if (crit_succ != NULL)
2038 crit_cycle = SCHED_TIME (crit_succ);
2039 return SMODULO (crit_cycle, ii);
2043 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2045 return SMODULO ((low + up + 1) / 2, ii);
2049 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2052 ps_insn_ptr crr_insn;
2054 for (row = 0; row < ps->ii; row++)
2058 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2060 ddg_node_ptr u = crr_insn->node;
2063 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2064 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2065 popcount (sched_nodes) == number of insns in ps. */
2066 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2067 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2070 gcc_assert (ps->rows_length[row] == length);
2075 /* This page implements the algorithm for ordering the nodes of a DDG
2076 for modulo scheduling, activated through the
2077 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
2079 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2080 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
2081 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
2082 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2083 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
2084 #define DEPTH(x) (ASAP ((x)))
2086 typedef struct node_order_params * nopa;
2088 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2089 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2090 static nopa calculate_order_params (ddg_ptr, int, int *);
2091 static int find_max_asap (ddg_ptr, sbitmap);
2092 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2093 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2095 enum sms_direction {BOTTOMUP, TOPDOWN};
2097 struct node_order_params
2104 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
2106 check_nodes_order (int *node_order, int num_nodes)
2109 sbitmap tmp = sbitmap_alloc (num_nodes);
2114 fprintf (dump_file, "SMS final nodes order: \n");
2116 for (i = 0; i < num_nodes; i++)
2118 int u = node_order[i];
2121 fprintf (dump_file, "%d ", u);
2122 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2128 fprintf (dump_file, "\n");
2133 /* Order the nodes of G for scheduling and pass the result in
2134 NODE_ORDER. Also set aux.count of each node to ASAP.
2135 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
2137 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2141 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2143 nopa nops = calculate_order_params (g, mii, pmax_asap);
2146 print_sccs (dump_file, sccs, g);
2148 order_nodes_of_sccs (sccs, node_order);
2150 if (sccs->num_sccs > 0)
2151 /* First SCC has the largest recurrence_length. */
2152 rec_mii = sccs->sccs[0]->recurrence_length;
2154 /* Save ASAP before destroying node_order_params. */
2155 for (i = 0; i < g->num_nodes; i++)
2157 ddg_node_ptr v = &g->nodes[i];
2158 v->aux.count = ASAP (v);
2162 free_ddg_all_sccs (sccs);
2163 check_nodes_order (node_order, g->num_nodes);
2169 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2172 ddg_ptr g = all_sccs->ddg;
2173 int num_nodes = g->num_nodes;
2174 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2175 sbitmap on_path = sbitmap_alloc (num_nodes);
2176 sbitmap tmp = sbitmap_alloc (num_nodes);
2177 sbitmap ones = sbitmap_alloc (num_nodes);
2179 sbitmap_zero (prev_sccs);
2180 sbitmap_ones (ones);
2182 /* Perform the node ordering starting from the SCC with the highest recMII.
2183 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
2184 for (i = 0; i < all_sccs->num_sccs; i++)
2186 ddg_scc_ptr scc = all_sccs->sccs[i];
2188 /* Add nodes on paths from previous SCCs to the current SCC. */
2189 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2190 sbitmap_a_or_b (tmp, scc->nodes, on_path);
2192 /* Add nodes on paths from the current SCC to previous SCCs. */
2193 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2194 sbitmap_a_or_b (tmp, tmp, on_path);
2196 /* Remove nodes of previous SCCs from current extended SCC. */
2197 sbitmap_difference (tmp, tmp, prev_sccs);
2199 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2200 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
2203 /* Handle the remaining nodes that do not belong to any scc. Each call
2204 to order_nodes_in_scc handles a single connected component. */
2205 while (pos < g->num_nodes)
2207 sbitmap_difference (tmp, ones, prev_sccs);
2208 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2210 sbitmap_free (prev_sccs);
2211 sbitmap_free (on_path);
2213 sbitmap_free (ones);
2216 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
2217 static struct node_order_params *
2218 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2222 int num_nodes = g->num_nodes;
2224 /* Allocate a place to hold ordering params for each node in the DDG. */
2225 nopa node_order_params_arr;
2227 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
2228 node_order_params_arr = (nopa) xcalloc (num_nodes,
2229 sizeof (struct node_order_params));
2231 /* Set the aux pointer of each node to point to its order_params structure. */
2232 for (u = 0; u < num_nodes; u++)
2233 g->nodes[u].aux.info = &node_order_params_arr[u];
2235 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2236 calculate ASAP, ALAP, mobility, distance, and height for each node
2237 in the dependence (direct acyclic) graph. */
2239 /* We assume that the nodes in the array are in topological order. */
2242 for (u = 0; u < num_nodes; u++)
2244 ddg_node_ptr u_node = &g->nodes[u];
2247 for (e = u_node->in; e; e = e->next_in)
2248 if (e->distance == 0)
2249 ASAP (u_node) = MAX (ASAP (u_node),
2250 ASAP (e->src) + e->latency);
2251 max_asap = MAX (max_asap, ASAP (u_node));
2254 for (u = num_nodes - 1; u > -1; u--)
2256 ddg_node_ptr u_node = &g->nodes[u];
2258 ALAP (u_node) = max_asap;
2259 HEIGHT (u_node) = 0;
2260 for (e = u_node->out; e; e = e->next_out)
2261 if (e->distance == 0)
2263 ALAP (u_node) = MIN (ALAP (u_node),
2264 ALAP (e->dest) - e->latency);
2265 HEIGHT (u_node) = MAX (HEIGHT (u_node),
2266 HEIGHT (e->dest) + e->latency);
2271 fprintf (dump_file, "\nOrder params\n");
2272 for (u = 0; u < num_nodes; u++)
2274 ddg_node_ptr u_node = &g->nodes[u];
2276 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2277 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2281 *pmax_asap = max_asap;
2282 return node_order_params_arr;
2286 find_max_asap (ddg_ptr g, sbitmap nodes)
2291 sbitmap_iterator sbi;
2293 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2295 ddg_node_ptr u_node = &g->nodes[u];
2297 if (max_asap < ASAP (u_node))
2299 max_asap = ASAP (u_node);
2307 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2311 int min_mob = INT_MAX;
2313 sbitmap_iterator sbi;
2315 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2317 ddg_node_ptr u_node = &g->nodes[u];
2319 if (max_hv < HEIGHT (u_node))
2321 max_hv = HEIGHT (u_node);
2322 min_mob = MOB (u_node);
2325 else if ((max_hv == HEIGHT (u_node))
2326 && (min_mob > MOB (u_node)))
2328 min_mob = MOB (u_node);
2336 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2340 int min_mob = INT_MAX;
2342 sbitmap_iterator sbi;
2344 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2346 ddg_node_ptr u_node = &g->nodes[u];
2348 if (max_dv < DEPTH (u_node))
2350 max_dv = DEPTH (u_node);
2351 min_mob = MOB (u_node);
2354 else if ((max_dv == DEPTH (u_node))
2355 && (min_mob > MOB (u_node)))
2357 min_mob = MOB (u_node);
2364 /* Places the nodes of SCC into the NODE_ORDER array starting
2365 at position POS, according to the SMS ordering algorithm.
2366 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2367 the NODE_ORDER array, starting from position zero. */
2369 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2370 int * node_order, int pos)
2372 enum sms_direction dir;
2373 int num_nodes = g->num_nodes;
2374 sbitmap workset = sbitmap_alloc (num_nodes);
2375 sbitmap tmp = sbitmap_alloc (num_nodes);
2376 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2377 sbitmap predecessors = sbitmap_alloc (num_nodes);
2378 sbitmap successors = sbitmap_alloc (num_nodes);
2380 sbitmap_zero (predecessors);
2381 find_predecessors (predecessors, g, nodes_ordered);
2383 sbitmap_zero (successors);
2384 find_successors (successors, g, nodes_ordered);
2387 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2389 sbitmap_copy (workset, tmp);
2392 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2394 sbitmap_copy (workset, tmp);
2401 sbitmap_zero (workset);
2402 if ((u = find_max_asap (g, scc)) >= 0)
2403 SET_BIT (workset, u);
2407 sbitmap_zero (zero_bitmap);
2408 while (!sbitmap_equal (workset, zero_bitmap))
2411 ddg_node_ptr v_node;
2412 sbitmap v_node_preds;
2413 sbitmap v_node_succs;
2417 while (!sbitmap_equal (workset, zero_bitmap))
2419 v = find_max_hv_min_mob (g, workset);
2420 v_node = &g->nodes[v];
2421 node_order[pos++] = v;
2422 v_node_succs = NODE_SUCCESSORS (v_node);
2423 sbitmap_a_and_b (tmp, v_node_succs, scc);
2425 /* Don't consider the already ordered successors again. */
2426 sbitmap_difference (tmp, tmp, nodes_ordered);
2427 sbitmap_a_or_b (workset, workset, tmp);
2428 RESET_BIT (workset, v);
2429 SET_BIT (nodes_ordered, v);
2432 sbitmap_zero (predecessors);
2433 find_predecessors (predecessors, g, nodes_ordered);
2434 sbitmap_a_and_b (workset, predecessors, scc);
2438 while (!sbitmap_equal (workset, zero_bitmap))
2440 v = find_max_dv_min_mob (g, workset);
2441 v_node = &g->nodes[v];
2442 node_order[pos++] = v;
2443 v_node_preds = NODE_PREDECESSORS (v_node);
2444 sbitmap_a_and_b (tmp, v_node_preds, scc);
2446 /* Don't consider the already ordered predecessors again. */
2447 sbitmap_difference (tmp, tmp, nodes_ordered);
2448 sbitmap_a_or_b (workset, workset, tmp);
2449 RESET_BIT (workset, v);
2450 SET_BIT (nodes_ordered, v);
2453 sbitmap_zero (successors);
2454 find_successors (successors, g, nodes_ordered);
2455 sbitmap_a_and_b (workset, successors, scc);
2459 sbitmap_free (workset);
2460 sbitmap_free (zero_bitmap);
2461 sbitmap_free (predecessors);
2462 sbitmap_free (successors);
2467 /* This page contains functions for manipulating partial-schedules during
2468 modulo scheduling. */
2470 /* Create a partial schedule and allocate a memory to hold II rows. */
2472 static partial_schedule_ptr
2473 create_partial_schedule (int ii, ddg_ptr g, int history)
2475 partial_schedule_ptr ps = XNEW (struct partial_schedule);
2476 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2477 ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2479 ps->history = history;
2480 ps->min_cycle = INT_MAX;
2481 ps->max_cycle = INT_MIN;
2487 /* Free the PS_INSNs in rows array of the given partial schedule.
2488 ??? Consider caching the PS_INSN's. */
2490 free_ps_insns (partial_schedule_ptr ps)
2494 for (i = 0; i < ps->ii; i++)
2498 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2501 ps->rows[i] = ps_insn;
2507 /* Free all the memory allocated to the partial schedule. */
2510 free_partial_schedule (partial_schedule_ptr ps)
2516 free (ps->rows_length);
2520 /* Clear the rows array with its PS_INSNs, and create a new one with
2524 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2529 if (new_ii == ps->ii)
2531 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2532 * sizeof (ps_insn_ptr));
2533 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2534 ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2535 memset (ps->rows_length, 0, new_ii * sizeof (int));
2537 ps->min_cycle = INT_MAX;
2538 ps->max_cycle = INT_MIN;
2541 /* Prints the partial schedule as an ii rows array, for each rows
2542 print the ids of the insns in it. */
2544 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2548 for (i = 0; i < ps->ii; i++)
2550 ps_insn_ptr ps_i = ps->rows[i];
2552 fprintf (dump, "\n[ROW %d ]: ", i);
2555 fprintf (dump, "%d, ",
2556 INSN_UID (ps_i->node->insn));
2557 ps_i = ps_i->next_in_row;
2562 /* Creates an object of PS_INSN and initializes it to the given parameters. */
2564 create_ps_insn (ddg_node_ptr node, int cycle)
2566 ps_insn_ptr ps_i = XNEW (struct ps_insn);
2569 ps_i->next_in_row = NULL;
2570 ps_i->prev_in_row = NULL;
2571 ps_i->cycle = cycle;
2577 /* Removes the given PS_INSN from the partial schedule. Returns false if the
2578 node is not found in the partial schedule, else returns true. */
2580 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2587 row = SMODULO (ps_i->cycle, ps->ii);
2588 if (! ps_i->prev_in_row)
2590 if (ps_i != ps->rows[row])
2593 ps->rows[row] = ps_i->next_in_row;
2595 ps->rows[row]->prev_in_row = NULL;
2599 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2600 if (ps_i->next_in_row)
2601 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2604 ps->rows_length[row] -= 1;
2609 /* Unlike what literature describes for modulo scheduling (which focuses
2610 on VLIW machines) the order of the instructions inside a cycle is
2611 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2612 where the current instruction should go relative to the already
2613 scheduled instructions in the given cycle. Go over these
2614 instructions and find the first possible column to put it in. */
2616 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2617 sbitmap must_precede, sbitmap must_follow)
2619 ps_insn_ptr next_ps_i;
2620 ps_insn_ptr first_must_follow = NULL;
2621 ps_insn_ptr last_must_precede = NULL;
2622 ps_insn_ptr last_in_row = NULL;
2628 row = SMODULO (ps_i->cycle, ps->ii);
2630 /* Find the first must follow and the last must precede
2631 and insert the node immediately after the must precede
2632 but make sure that it there is no must follow after it. */
2633 for (next_ps_i = ps->rows[row];
2635 next_ps_i = next_ps_i->next_in_row)
2637 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2638 && ! first_must_follow)
2639 first_must_follow = next_ps_i;
2640 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2642 /* If we have already met a node that must follow, then
2643 there is no possible column. */
2644 if (first_must_follow)
2647 last_must_precede = next_ps_i;
2649 /* The closing branch must be the last in the row. */
2651 && TEST_BIT (must_precede, next_ps_i->node->cuid)
2652 && JUMP_P (next_ps_i->node->insn))
2655 last_in_row = next_ps_i;
2658 /* The closing branch is scheduled as well. Make sure there is no
2659 dependent instruction after it as the branch should be the last
2660 instruction in the row. */
2661 if (JUMP_P (ps_i->node->insn))
2663 if (first_must_follow)
2667 /* Make the branch the last in the row. New instructions
2668 will be inserted at the beginning of the row or after the
2669 last must_precede instruction thus the branch is guaranteed
2670 to remain the last instruction in the row. */
2671 last_in_row->next_in_row = ps_i;
2672 ps_i->prev_in_row = last_in_row;
2673 ps_i->next_in_row = NULL;
2676 ps->rows[row] = ps_i;
2680 /* Now insert the node after INSERT_AFTER_PSI. */
2682 if (! last_must_precede)
2684 ps_i->next_in_row = ps->rows[row];
2685 ps_i->prev_in_row = NULL;
2686 if (ps_i->next_in_row)
2687 ps_i->next_in_row->prev_in_row = ps_i;
2688 ps->rows[row] = ps_i;
2692 ps_i->next_in_row = last_must_precede->next_in_row;
2693 last_must_precede->next_in_row = ps_i;
2694 ps_i->prev_in_row = last_must_precede;
2695 if (ps_i->next_in_row)
2696 ps_i->next_in_row->prev_in_row = ps_i;
2702 /* Advances the PS_INSN one column in its current row; returns false
2703 in failure and true in success. Bit N is set in MUST_FOLLOW if
2704 the node with cuid N must be come after the node pointed to by
2705 PS_I when scheduled in the same cycle. */
2707 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2708 sbitmap must_follow)
2710 ps_insn_ptr prev, next;
2712 ddg_node_ptr next_node;
2717 row = SMODULO (ps_i->cycle, ps->ii);
2719 if (! ps_i->next_in_row)
2722 next_node = ps_i->next_in_row->node;
2724 /* Check if next_in_row is dependent on ps_i, both having same sched
2725 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2726 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2729 /* Advance PS_I over its next_in_row in the doubly linked list. */
2730 prev = ps_i->prev_in_row;
2731 next = ps_i->next_in_row;
2733 if (ps_i == ps->rows[row])
2734 ps->rows[row] = next;
2736 ps_i->next_in_row = next->next_in_row;
2738 if (next->next_in_row)
2739 next->next_in_row->prev_in_row = ps_i;
2741 next->next_in_row = ps_i;
2742 ps_i->prev_in_row = next;
2744 next->prev_in_row = prev;
2746 prev->next_in_row = next;
2751 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2752 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2753 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2754 before/after (respectively) the node pointed to by PS_I when scheduled
2755 in the same cycle. */
2757 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2758 sbitmap must_precede, sbitmap must_follow)
2761 int row = SMODULO (cycle, ps->ii);
2763 if (ps->rows_length[row] >= issue_rate)
2766 ps_i = create_ps_insn (node, cycle);
2768 /* Finds and inserts PS_I according to MUST_FOLLOW and
2770 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2776 ps->rows_length[row] += 1;
2780 /* Advance time one cycle. Assumes DFA is being used. */
2782 advance_one_cycle (void)
2784 if (targetm.sched.dfa_pre_cycle_insn)
2785 state_transition (curr_state,
2786 targetm.sched.dfa_pre_cycle_insn ());
2788 state_transition (curr_state, NULL);
2790 if (targetm.sched.dfa_post_cycle_insn)
2791 state_transition (curr_state,
2792 targetm.sched.dfa_post_cycle_insn ());
2797 /* Checks if PS has resource conflicts according to DFA, starting from
2798 FROM cycle to TO cycle; returns true if there are conflicts and false
2799 if there are no conflicts. Assumes DFA is being used. */
2801 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2805 state_reset (curr_state);
2807 for (cycle = from; cycle <= to; cycle++)
2809 ps_insn_ptr crr_insn;
2810 /* Holds the remaining issue slots in the current row. */
2811 int can_issue_more = issue_rate;
2813 /* Walk through the DFA for the current row. */
2814 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2816 crr_insn = crr_insn->next_in_row)
2818 rtx insn = crr_insn->node->insn;
2820 if (!NONDEBUG_INSN_P (insn))
2823 /* Check if there is room for the current insn. */
2824 if (!can_issue_more || state_dead_lock_p (curr_state))
2827 /* Update the DFA state and return with failure if the DFA found
2828 resource conflicts. */
2829 if (state_transition (curr_state, insn) >= 0)
2832 if (targetm.sched.variable_issue)
2834 targetm.sched.variable_issue (sched_dump, sched_verbose,
2835 insn, can_issue_more);
2836 /* A naked CLOBBER or USE generates no instruction, so don't
2837 let them consume issue slots. */
2838 else if (GET_CODE (PATTERN (insn)) != USE
2839 && GET_CODE (PATTERN (insn)) != CLOBBER)
2843 /* Advance the DFA to the next cycle. */
2844 advance_one_cycle ();
2849 /* Checks if the given node causes resource conflicts when added to PS at
2850 cycle C. If not the node is added to PS and returned; otherwise zero
2851 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2852 cuid N must be come before/after (respectively) the node pointed to by
2853 PS_I when scheduled in the same cycle. */
2855 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2856 int c, sbitmap must_precede,
2857 sbitmap must_follow)
2859 int has_conflicts = 0;
2862 /* First add the node to the PS, if this succeeds check for
2863 conflicts, trying different issue slots in the same row. */
2864 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2865 return NULL; /* Failed to insert the node at the given cycle. */
2867 has_conflicts = ps_has_conflicts (ps, c, c)
2869 && ps_has_conflicts (ps,
2873 /* Try different issue slots to find one that the given node can be
2874 scheduled in without conflicts. */
2875 while (has_conflicts)
2877 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2879 has_conflicts = ps_has_conflicts (ps, c, c)
2881 && ps_has_conflicts (ps,
2888 remove_node_from_ps (ps, ps_i);
2892 ps->min_cycle = MIN (ps->min_cycle, c);
2893 ps->max_cycle = MAX (ps->max_cycle, c);
2897 /* Calculate the stage count of the partial schedule PS. The calculation
2898 takes into account the rotation to bring the closing branch to row
2901 calculate_stage_count (partial_schedule_ptr ps)
2903 int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
2904 int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
2905 int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
2906 int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
2908 /* The calculation of stage count is done adding the number of stages
2909 before cycle zero and after cycle zero. */
2910 stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
2915 /* Rotate the rows of PS such that insns scheduled at time
2916 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2918 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2920 int i, row, backward_rotates;
2921 int last_row = ps->ii - 1;
2923 if (start_cycle == 0)
2926 backward_rotates = SMODULO (start_cycle, ps->ii);
2928 /* Revisit later and optimize this into a single loop. */
2929 for (i = 0; i < backward_rotates; i++)
2931 ps_insn_ptr first_row = ps->rows[0];
2932 int first_row_length = ps->rows_length[0];
2934 for (row = 0; row < last_row; row++)
2936 ps->rows[row] = ps->rows[row + 1];
2937 ps->rows_length[row] = ps->rows_length[row + 1];
2940 ps->rows[last_row] = first_row;
2941 ps->rows_length[last_row] = first_row_length;
2944 ps->max_cycle -= start_cycle;
2945 ps->min_cycle -= start_cycle;
2948 #endif /* INSN_SCHEDULING */
2951 gate_handle_sms (void)
2953 return (optimize > 0 && flag_modulo_sched);
2957 /* Run instruction scheduler. */
2958 /* Perform SMS module scheduling. */
2960 rest_of_handle_sms (void)
2962 #ifdef INSN_SCHEDULING
2965 /* Collect loop information to be used in SMS. */
2966 cfg_layout_initialize (0);
2969 /* Update the life information, because we add pseudos. */
2970 max_regno = max_reg_num ();
2972 /* Finalize layout changes. */
2974 if (bb->next_bb != EXIT_BLOCK_PTR)
2975 bb->aux = bb->next_bb;
2976 free_dominance_info (CDI_DOMINATORS);
2977 cfg_layout_finalize ();
2978 #endif /* INSN_SCHEDULING */
2982 struct rtl_opt_pass pass_sms =
2987 gate_handle_sms, /* gate */
2988 rest_of_handle_sms, /* execute */
2991 0, /* static_pass_number */
2993 0, /* properties_required */
2994 0, /* properties_provided */
2995 0, /* properties_destroyed */
2996 0, /* todo_flags_start */
2999 | TODO_verify_rtl_sharing
3000 | TODO_ggc_collect /* todo_flags_finish */