/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004
+ Copyright (C) 2004, 2005
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
#include "rtl.h"
#include "tm_p.h"
#include "hard-reg-set.h"
-#include "basic-block.h"
#include "regs.h"
#include "function.h"
#include "flags.h"
here the insns are not scheduled monotonically top-down (nor bottom-
up).
3. If failed in scheduling all insns - bump II++ and try again, unless
- II reaches an upper bound MaxII, inwhich case report failure.
+ II reaches an upper bound MaxII, in which case report failure.
5. If we succeeded in scheduling the loop within II cycles, we now
generate prolog and epilog, decrease the counter of the loop, and
perform modulo variable expansion for live ranges that span more than
};
-partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
-void free_partial_schedule (partial_schedule_ptr);
-void reset_partial_schedule (partial_schedule_ptr, int new_ii);
+static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
+static void free_partial_schedule (partial_schedule_ptr);
+static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
void print_partial_schedule (partial_schedule_ptr, FILE *);
-ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
- ddg_node_ptr node, int cycle);
-void rotate_partial_schedule (partial_schedule_ptr, int);
-void set_row_column_for_ps (partial_schedule_ptr);
-
+static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
+ ddg_node_ptr node, int cycle,
+ sbitmap must_precede,
+ sbitmap must_follow);
+static void rotate_partial_schedule (partial_schedule_ptr, int);
\f
/* This page defines constants and structures for the modulo scheduling
driver. */
{
rtx pattern, cmp, inc, reg, condition;
- if (GET_CODE (insn) != JUMP_INSN)
+ if (!JUMP_P (insn))
return NULL_RTX;
pattern = PATTERN (insn);
}
-/* Given the partial schdule, generate register moves when the length
+/* Given the partial schedule, generate register moves when the length
of the register live range is more than ii; the number of moves is
determined according to the following equation:
SCHED_TIME (use) - SCHED_TIME (def) { 1 broken loop-carried
rtx orig_loop_bct = NULL_RTX;
/* Loop header edge. */
- e = ps->g->bb->pred;
+ e = EDGE_PRED (ps->g->bb, 0);
if (e->src == ps->g->bb)
- e = e->pred_next;
+ e = EDGE_PRED (ps->g->bb, 1);
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
loop_exit_label_insn = emit_label (loop_exit_label);
}
- e = ps->g->bb->succ;
+ e = EDGE_SUCC (ps->g->bb, 0);
if (e->dest == ps->g->bb)
- e = e->succ_next;
+ e = EDGE_SUCC (ps->g->bb, 1);
e->insns.r = get_insns ();
end_sequence ();
basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
- edge epilog_exit_edge = epilog_bb->succ;
+ edge epilog_exit_edge = EDGE_SUCC (epilog_bb, 0);
/* Do loop preconditioning to take care of cases were the loop count is
less than the stage count. Update the CFG properly. */
find_line_note (rtx insn)
{
for (; insn; insn = PREV_INSN (insn))
- if (GET_CODE (insn) == NOTE
+ if (NOTE_P (insn)
&& NOTE_LINE_NUMBER (insn) >= 0)
break;
int max_bb_index = last_basic_block;
struct df *df;
- /* SMS uses the DFA interface. */
- if (! targetm.sched.use_dfa_pipeline_interface
- || ! (*targetm.sched.use_dfa_pipeline_interface) ())
- return;
-
stats_file = dump_file;
-
/* Initialize issue_rate. */
if (targetm.sched.issue_rate)
{
continue;
/* Check if bb has two successors, one being itself. */
- e = bb->succ;
- if (!e || !e->succ_next || e->succ_next->succ_next)
+ if (EDGE_COUNT (bb->succs) != 2)
continue;
- if (e->dest != bb && e->succ_next->dest != bb)
+ if (EDGE_SUCC (bb, 0)->dest != bb && EDGE_SUCC (bb, 1)->dest != bb)
continue;
- if ((e->flags & EDGE_COMPLEX)
- || (e->succ_next->flags & EDGE_COMPLEX))
+ if ((EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
+ || (EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
continue;
/* Check if bb has two predecessors, one being itself. */
- /* In view of above tests, suffices to check e->pred_next->pred_next? */
- e = bb->pred;
- if (!e || !e->pred_next || e->pred_next->pred_next)
+ if (EDGE_COUNT (bb->preds) != 2)
continue;
- if (e->src != bb && e->pred_next->src != bb)
+ if (EDGE_PRED (bb, 0)->src != bb && EDGE_PRED (bb, 1)->src != bb)
continue;
- if ((e->flags & EDGE_COMPLEX)
- || (e->pred_next->flags & EDGE_COMPLEX))
+ if ((EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
+ || (EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX))
continue;
/* For debugging. */
- if (passes++ > MAX_SMS_LOOP_NUMBER && MAX_SMS_LOOP_NUMBER != -1)
+ if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
{
if (dump_file)
fprintf (dump_file, "SMS reached MAX_PASSES... \n");
}
get_block_head_tail (bb->index, &head, &tail);
- pre_header_edge = bb->pred;
- if (bb->pred->src != bb)
- pre_header_edge = bb->pred->pred_next;
+ pre_header_edge = EDGE_PRED (bb, 0);
+ if (EDGE_PRED (bb, 0)->src != bb)
+ pre_header_edge = EDGE_PRED (bb, 1);
/* Perfrom SMS only on loops that their average count is above threshold. */
if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
if ( !(count_reg = doloop_register_get (tail, &comp)))
continue;
- e = bb->pred;
+ e = EDGE_PRED (bb, 0);
if (e->src == bb)
- pre_header = e->pred_next->src;
+ pre_header = EDGE_PRED (bb, 1)->src;
else
pre_header = e->src;
/* Don't handle BBs with calls or barriers, or !single_set insns. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
- if (GET_CODE (insn) == CALL_INSN
- || GET_CODE (insn) == BARRIER
- || (INSN_P (insn) && GET_CODE (insn) != JUMP_INSN
+ if (CALL_P (insn)
+ || BARRIER_P (insn)
+ || (INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
break;
{
if (stats_file)
{
- if (GET_CODE (insn) == CALL_INSN)
+ if (CALL_P (insn))
fprintf (stats_file, "SMS loop-with-call\n");
- else if (GET_CODE (insn) == BARRIER)
+ else if (BARRIER_P (insn))
fprintf (stats_file, "SMS loop-with-barrier\n");
else
fprintf (stats_file, "SMS loop-with-not-single-set\n");
get_block_head_tail (g->bb->index, &head, &tail);
- pre_header_edge = g->bb->pred;
- if (g->bb->pred->src != g->bb)
- pre_header_edge = g->bb->pred->pred_next;
+ pre_header_edge = EDGE_PRED (g->bb, 0);
+ if (EDGE_PRED (g->bb, 0)->src != g->bb)
+ pre_header_edge = EDGE_PRED (g->bb, 1);
if (stats_file)
{
int i;
start_sequence ();
- /* Copy the original loop code before modifying it - so we can use
- it later. */
+ /* Copy the original loop code before modifying it -
+ so we can use it later. */
for (i = 0; i < ps->g->num_nodes; i++)
duplicate_insn_chain (ps->g->nodes[i].first_note,
ps->g->nodes[i].insn);
set_columns_for_ps (ps);
permute_partial_schedule (ps, g->closing_branch->first_note);
+
+ /* Mark this loop as software pipelined so the later
+ scheduling passes doesn't touch it. */
+ if (! flag_resched_modulo_sched)
+ g->bb->flags |= BB_DISABLE_SCHEDULE;
+
generate_reg_moves (ps);
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes);
ddg_edge_ptr e;
int start, end, step; /* Place together into one struct? */
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
- sbitmap psp = sbitmap_alloc (num_nodes);
- sbitmap pss = sbitmap_alloc (num_nodes);
+ sbitmap must_precede = sbitmap_alloc (num_nodes);
+ sbitmap must_follow = sbitmap_alloc (num_nodes);
+
partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
while (try_again_with_larger_ii && ii < maxii)
if (!INSN_P (insn))
continue;
- if (GET_CODE (insn) == JUMP_INSN) /* Closing branch handled later. */
+ if (JUMP_P (insn)) /* Closing branch handled later. */
continue;
/* 1. compute sched window for u (start, end, step). */
- sbitmap_zero (psp);
- sbitmap_zero (pss);
- psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
- pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
+ psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes);
+ pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes);
if (psp_not_empty && !pss_not_empty)
{
ddg_node_ptr v_node = e->src;
if (TEST_BIT (sched_nodes, v_node->cuid))
{
- early_start = MAX (early_start,
- SCHED_TIME (v_node)
- + e->latency - (e->distance * ii));
+ int node_st = SCHED_TIME (v_node)
+ + e->latency - (e->distance * ii);
+
+ early_start = MAX (early_start, node_st);
+
if (e->data_type == MEM_DEP)
end = MIN (end, SCHED_TIME (v_node) + ii - 1);
}
fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
u, start, end, step);
+ /* use must_follow & must_precede bitmaps to determine order
+ of nodes within the cycle. */
+ sbitmap_zero (must_precede);
+ sbitmap_zero (must_follow);
+ for (e = u_node->in; e != 0; e = e->next_in)
+ if (TEST_BIT (sched_nodes, e->src->cuid)
+ && e->latency == (ii * e->distance)
+ && start == SCHED_TIME (e->src))
+ SET_BIT (must_precede, e->src->cuid);
+
+ for (e = u_node->out; e != 0; e = e->next_out)
+ if (TEST_BIT (sched_nodes, e->dest->cuid)
+ && e->latency == (ii * e->distance)
+ && end == SCHED_TIME (e->dest))
+ SET_BIT (must_follow, e->dest->cuid);
+
success = 0;
if ((step > 0 && start < end) || (step < 0 && start > end))
for (c = start; c != end; c += step)
{
- ps_insn_ptr psi = ps_add_node_check_conflicts (ps, u_node, c);
+ ps_insn_ptr psi;
+
+ psi = ps_add_node_check_conflicts (ps, u_node, c,
+ must_precede,
+ must_follow);
if (psi)
{
} /* While try_again_with_larger_ii. */
sbitmap_free (sched_nodes);
- sbitmap_free (psp);
- sbitmap_free (pss);
if (ii >= maxii)
{
modulo scheduling. */
/* Create a partial schedule and allocate a memory to hold II rows. */
-partial_schedule_ptr
+static partial_schedule_ptr
create_partial_schedule (int ii, ddg_ptr g, int history)
{
partial_schedule_ptr ps = (partial_schedule_ptr)
}
/* Free all the memory allocated to the partial schedule. */
-void
+static void
free_partial_schedule (partial_schedule_ptr ps)
{
if (!ps)
/* Clear the rows array with its PS_INSNs, and create a new one with
NEW_II rows. */
-void
+static void
reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
{
if (!ps)
return true;
}
+/* Unlike what literature describes for modulo scheduling (which focuses
+ on VLIW machines) the order of the instructions inside a cycle is
+ important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
+ where the current instruction should go relative to the already
+ scheduled instructions in the given cycle. Go over these
+ instructions and find the first possible column to put it in. */
+static bool
+ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
+ sbitmap must_precede, sbitmap must_follow)
+{
+ ps_insn_ptr next_ps_i;
+ ps_insn_ptr first_must_follow = NULL;
+ ps_insn_ptr last_must_precede = NULL;
+ int row;
+
+ if (! ps_i)
+ return false;
+
+ row = SMODULO (ps_i->cycle, ps->ii);
+
+ /* Find the first must follow and the last must precede
+ and insert the node immediately after the must precede
+ but make sure that it there is no must follow after it. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i;
+ next_ps_i = next_ps_i->next_in_row)
+ {
+ if (TEST_BIT (must_follow, next_ps_i->node->cuid)
+ && ! first_must_follow)
+ first_must_follow = next_ps_i;
+ if (TEST_BIT (must_precede, next_ps_i->node->cuid))
+ {
+ /* If we have already met a node that must follow, then
+ there is no possible column. */
+ if (first_must_follow)
+ return false;
+ else
+ last_must_precede = next_ps_i;
+ }
+ }
+
+ /* Now insert the node after INSERT_AFTER_PSI. */
+
+ if (! last_must_precede)
+ {
+ ps_i->next_in_row = ps->rows[row];
+ ps_i->prev_in_row = NULL;
+ if (ps_i->next_in_row)
+ ps_i->next_in_row->prev_in_row = ps_i;
+ ps->rows[row] = ps_i;
+ }
+ else
+ {
+ ps_i->next_in_row = last_must_precede->next_in_row;
+ last_must_precede->next_in_row = ps_i;
+ ps_i->prev_in_row = last_must_precede;
+ if (ps_i->next_in_row)
+ ps_i->next_in_row->prev_in_row = ps_i;
+ }
+
+ return true;
+}
+
/* Advances the PS_INSN one column in its current row; returns false
- in failure and true in success. */
+ in failure and true in success. Bit N is set in MUST_FOLLOW if
+ the node with cuid N must be come after the node pointed to by
+ PS_I when scheduled in the same cycle. */
static int
-ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
+ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
+ sbitmap must_follow)
{
ps_insn_ptr prev, next;
int row;
+ ddg_node_ptr next_node;
if (!ps || !ps_i)
return false;
if (! ps_i->next_in_row)
return false;
+ next_node = ps_i->next_in_row->node;
+
/* Check if next_in_row is dependent on ps_i, both having same sched
times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
- if (ps_i->cycle == ps_i->next_in_row->cycle)
- {
- ddg_edge_ptr e;
- ddg_node_ptr next_node = ps_i->next_in_row->node;
-
- for (e = ps_i->node->out; e; e = e->next_out)
- if (e->dest == next_node)
- return false;
- }
+ if (TEST_BIT (must_follow, next_node->cuid))
+ return false;
- /* Advace PS_I over its next_in_row in the doubly linked list. */
+ /* Advance PS_I over its next_in_row in the doubly linked list. */
prev = ps_i->prev_in_row;
next = ps_i->next_in_row;
}
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
- Returns 0 if this is not possible and a PS_INSN otherwise. */
+ Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
+ set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
+ before/after (respectively) the node pointed to by PS_I when scheduled
+ in the same cycle. */
static ps_insn_ptr
-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
+add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
+ sbitmap must_precede, sbitmap must_follow)
{
- ps_insn_ptr ps_i, next_ps_i, advance_after;
+ ps_insn_ptr ps_i;
int rest_count = 1;
int row = SMODULO (cycle, ps->ii);
- ddg_edge_ptr e;
if (ps->rows[row]
&& ps->rows[row]->row_rest_count >= issue_rate)
rest_count += ps->rows[row]->row_rest_count;
ps_i = create_ps_insn (node, rest_count, cycle);
- ps_i->next_in_row = ps->rows[row];
- ps_i->prev_in_row = NULL;
- if (ps_i->next_in_row)
- ps_i->next_in_row->prev_in_row = ps_i;
- ps->rows[row] = ps_i;
-
- /* Check if n is dependent on an insn already in row, having same cycle
- (typically ANTI_DEP). If so, n must skip over it. */
- advance_after = NULL;
- for (next_ps_i = ps_i->next_in_row;
- next_ps_i;
- next_ps_i = next_ps_i->next_in_row)
- if (next_ps_i->cycle == cycle)
- for (e = node->in; e; e = e->next_in)
- if (e->src == next_ps_i->node)
- advance_after = next_ps_i;
-
- if (advance_after)
- while (ps_i->prev_in_row != advance_after)
- if (!ps_insn_advance_column (ps, ps_i))
- {
- remove_node_from_ps (ps, ps_i);
- return NULL;
- }
+
+ /* Finds and inserts PS_I according to MUST_FOLLOW and
+ MUST_PRECEDE. */
+ if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
+ {
+ free (ps_i);
+ return NULL;
+ }
return ps_i;
}
static void
advance_one_cycle (void)
{
- if (targetm.sched.use_dfa_pipeline_interface
- && (*targetm.sched.use_dfa_pipeline_interface) ())
- {
- if (targetm.sched.dfa_pre_cycle_insn)
- state_transition (curr_state,
- (*targetm.sched.dfa_pre_cycle_insn) ());
+ if (targetm.sched.dfa_pre_cycle_insn)
+ state_transition (curr_state,
+ (*targetm.sched.dfa_pre_cycle_insn) ());
- state_transition (curr_state, NULL);
+ state_transition (curr_state, NULL);
- if (targetm.sched.dfa_post_cycle_insn)
- state_transition (curr_state,
- (*targetm.sched.dfa_post_cycle_insn) ());
- }
+ if (targetm.sched.dfa_post_cycle_insn)
+ state_transition (curr_state,
+ (*targetm.sched.dfa_post_cycle_insn) ());
}
/* Checks if PS has resource conflicts according to DFA, starting from
{
int cycle;
- if (! targetm.sched.use_dfa_pipeline_interface
- || ! (*targetm.sched.use_dfa_pipeline_interface) ())
- return true;
-
state_reset (curr_state);
for (cycle = from; cycle <= to; cycle++)
/* Checks if the given node causes resource conflicts when added to PS at
cycle C. If not the node is added to PS and returned; otherwise zero
- is returned. */
-ps_insn_ptr
-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
+ is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
+ cuid N must be come before/after (respectively) the node pointed to by
+ PS_I when scheduled in the same cycle. */
+static ps_insn_ptr
+ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
+ int c, sbitmap must_precede,
+ sbitmap must_follow)
{
int has_conflicts = 0;
ps_insn_ptr ps_i;
- /* First add the node to the PS, if this succeeds check for conflicts,
- trying different issue slots in the same row. */
- if (! (ps_i = add_node_to_ps (ps, n, c)))
+ /* First add the node to the PS, if this succeeds check for
+ conflicts, trying different issue slots in the same row. */
+ if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
return NULL; /* Failed to insert the node at the given cycle. */
has_conflicts = ps_has_conflicts (ps, c, c)
scheduled in without conflicts. */
while (has_conflicts)
{
- if (! ps_insn_advance_column (ps, ps_i))
+ if (! ps_insn_advance_column (ps, ps_i, must_follow))
break;
has_conflicts = ps_has_conflicts (ps, c, c)
|| (ps->history > 0
/* Rotate the rows of PS such that insns scheduled at time
START_CYCLE will appear in row 0. Updates max/min_cycles. */
-void
+static void
rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
{
int i, row, backward_rotates;