/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004
+ Copyright (C) 2004, 2005
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
sbitmap must_precede,
sbitmap must_follow);
static void rotate_partial_schedule (partial_schedule_ptr, int);
-void set_row_column_for_ps (partial_schedule_ptr);
-
\f
/* This page defines constants and structures for the modulo scheduling
driver. */
{
rtx insn;
rtx head, tail;
+
+ if (! pre_header)
+ return NULL_RTX;
+
get_block_head_tail (pre_header->index, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
{
int i;
+ if (! dump_file)
+ return;
for (i = 0; i < num_nodes; i++)
{
node_sched_params_ptr nsp = &node_sched_params[i];
return maxii;
}
-
-/* Given the partial schedule, generate register moves when the length
- of the register live range is more than ii; the number of moves is
- determined according to the following equation:
- SCHED_TIME (use) - SCHED_TIME (def) { 1 broken loop-carried
- nreg_moves = ----------------------------------- - { dependence.
- ii { 0 if not.
- This handles the modulo-variable-expansions (mve's) needed for the ps. */
+/*
+ Breaking intra-loop register anti-dependences:
+ Each intra-loop register anti-dependence implies a cross-iteration true
+ dependence of distance 1. Therefore, we can remove such false dependencies
+ and figure out if the partial schedule broke them by checking if (for a
+ true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
+ if so generate a register move. The number of such moves is equal to:
+ SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
+ nreg_moves = ----------------------------------- + 1 - { dependecnce.
+ ii { 1 if not.
+*/
static void
generate_reg_moves (partial_schedule_ptr ps)
{
{
int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
+ if (e->distance == 1)
+ nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
+
/* If dest precedes src in the schedule of the kernel, then dest
will read before src writes and we can save one reg_copy. */
if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
{
int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
+ if (e->distance == 1)
+ dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
+
if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
dest_copy--;
int amount = PS_MIN_CYCLE (ps);
int ii = ps->ii;
- for (i = 0; i < g->num_nodes; i++)
+ /* Don't include the closing branch assuming that it is the last node. */
+ for (i = 0; i < g->num_nodes - 1; i++)
{
ddg_node_ptr u = &g->nodes[i];
int normalized_time = SCHED_TIME (u) - amount;
/* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
number of reg_moves starting with the second occurrence of
u_node, which is generated if its SCHED_STAGE <= to_stage. */
- i_reg_moves = to_stage - SCHED_STAGE (u_node);
+ i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
i_reg_moves = MAX (i_reg_moves, 0);
i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
- edge epilog_exit_edge = EDGE_SUCC (epilog_bb, 0);
+ edge epilog_exit_edge = single_succ_edge (epilog_bb);
/* Do loop preconditioning to take care of cases were the loop count is
less than the stage count. Update the CFG properly. */
scheduling passes doesn't touch it. */
if (! flag_resched_modulo_sched)
g->bb->flags |= BB_DISABLE_SCHEDULE;
+ /* The life-info is not valid any more. */
+ g->bb->flags |= BB_DIRTY;
generate_reg_moves (ps);
if (dump_file)
ddg_edge_ptr e;
int start, end, step; /* Place together into one struct? */
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
- sbitmap psp = sbitmap_alloc (num_nodes);
- sbitmap pss = sbitmap_alloc (num_nodes);
sbitmap must_precede = sbitmap_alloc (num_nodes);
sbitmap must_follow = sbitmap_alloc (num_nodes);
continue;
/* 1. compute sched window for u (start, end, step). */
- sbitmap_zero (psp);
- sbitmap_zero (pss);
- psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
- pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
+ psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes);
+ pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes);
if (psp_not_empty && !pss_not_empty)
{
} /* While try_again_with_larger_ii. */
sbitmap_free (sched_nodes);
- sbitmap_free (psp);
- sbitmap_free (pss);
if (ii >= maxii)
{