/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004, 2005, 2006, 2007
+ Copyright (C) 2004, 2005, 2006, 2007, 2008
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
/* This page defines constants and structures for the modulo scheduling
driver. */
-/* As in haifa-sched.c: */
-/* issue_rate is the number of insns that can be scheduled in the same
- machine cycle. It can be defined in the config/mach/mach.h file,
- otherwise we set it to 1. */
-
-static int issue_rate;
-
static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
code in order to use sched_analyze() for computing the dependencies.
They are used when initializing the sched_info structure. */
static const char *
-sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
+sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
{
static char tmp[80];
{
}
-static struct sched_info sms_sched_info =
+static struct common_sched_info_def sms_common_sched_info;
+
+static struct sched_deps_info_def sms_sched_deps_info =
+ {
+ compute_jump_reg_dependencies,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL,
+ 0, 0, 0
+ };
+
+static struct haifa_sched_info sms_sched_info =
{
NULL,
NULL,
NULL,
sms_print_insn,
NULL,
- compute_jump_reg_dependencies,
+ NULL, /* insn_finishes_block_p */
NULL, NULL,
NULL, NULL,
- 0, 0, 0,
+ 0, 0,
- NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL,
0
};
-
/* Given HEAD and TAIL which are the first and last insns in a loop;
return the register which controls the loop. Return zero if it has
more than one occurrence in the loop besides the control part or the
{
rtx pat = single_set (insn);
- if (GET_CODE (SET_SRC (pat)) == CONST_INT)
+ if (CONST_INT_P (SET_SRC (pat)))
{
*count = INTVAL (SET_SRC (pat));
return insn;
}
}
+/* Setup infos. */
+static void
+setup_sched_infos (void)
+{
+ memcpy (&sms_common_sched_info, &haifa_common_sched_info,
+ sizeof (sms_common_sched_info));
+ sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
+ common_sched_info = &sms_common_sched_info;
+
+ sched_deps_info = &sms_sched_deps_info;
+ current_sched_info = &sms_sched_info;
+}
+
/* Probability in % that the sms-ed loop rolls enough so that optimized
version may be entered. Just a guess. */
#define PROB_SMS_ENOUGH_ITERATIONS 80
issue_rate = 1;
/* Initialize the scheduler. */
- current_sched_info = &sms_sched_info;
-
- /* Init Data Flow analysis, to be used in interloop dep calculation. */
- df_set_flags (DF_LR_RUN_DCE);
- df_rd_add_problem ();
- df_note_add_problem ();
- df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
- df_analyze ();
- regstat_compute_calls_crossed ();
- sched_init ();
+ setup_sched_infos ();
+ haifa_sched_init ();
/* Allocate memory to hold the DDG array one entry for each loop.
We use loop->num as index into this array. */
if (single_exit (loop)->count)
trip_count = latch_edge->count / single_exit (loop)->count;
- /* Perfrom SMS only on loops that their average count is above threshold. */
+ /* Perform SMS only on loops that their average count is above threshold. */
if ( latch_edge->count
&& (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
if (! (g = create_ddg (bb, 0)))
{
if (dump_file)
- fprintf (dump_file, "SMS doloop\n");
+ fprintf (dump_file, "SMS create_ddg failed\n");
continue;
}
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps)
+ if (ps){
stage_count = PS_STAGE_COUNT (ps);
+ gcc_assert(stage_count >= 1);
+ }
/* Stage count of 1 means that there is no interleaving between
iterations, let the scheduling passes do the job. */
- if (stage_count < 1
+ if (stage_count <= 1
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
free_ddg (g);
}
- regstat_free_calls_crossed ();
free (g_arr);
/* Release scheduler data, needed until now because of DFA. */
- sched_finish ();
+ haifa_sched_finish ();
loop_optimizer_finalize ();
}
/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
node currently been scheduled. At the end of the calculation
- MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
- which are in SCHED_NODES (already scheduled nodes) and scheduled at
- the same row as the first/last row of U_NODE's scheduling window.
+ MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
+ U_NODE which are (1) already scheduled in the first/last row of
+ U_NODE's scheduling window, (2) whose dependence inequality with U
+ becomes an equality when U is scheduled in this same row, and (3)
+ whose dependence latency is zero.
+
The first and last rows are calculated using the following parameters:
START/END rows - The cycles that begins/ends the traversal on the window;
searching for an empty cycle to schedule U_NODE.
STEP - The direction in which we traverse the window.
- II - The initiation interval.
- TODO: We can add an insn to the must_precede/must_follow bitmap only
- if it has tight dependence to U and they are both scheduled in the
- same row. The current check is more conservative and content with
- the fact that both U and the insn are scheduled in the same row. */
+ II - The initiation interval. */
static void
calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
{
ddg_edge_ptr e;
int first_cycle_in_window, last_cycle_in_window;
- int first_row_in_window, last_row_in_window;
gcc_assert (must_precede && must_follow);
first_cycle_in_window = (step == 1) ? start : end - step;
last_cycle_in_window = (step == 1) ? end - step : start;
- first_row_in_window = SMODULO (first_cycle_in_window, ii);
- last_row_in_window = SMODULO (last_cycle_in_window, ii);
-
sbitmap_zero (must_precede);
sbitmap_zero (must_follow);
if (dump_file)
fprintf (dump_file, "\nmust_precede: ");
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
+ && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
+ first_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->src) - (e->distance * ii) <=
+ SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
+ first_cycle_in_window
+ and check only if
+ SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
for (e = u_node->in; e != 0; e = e->next_in)
if (TEST_BIT (sched_nodes, e->src->cuid)
- && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
+ && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+ first_cycle_in_window))
{
if (dump_file)
fprintf (dump_file, "%d ", e->src->cuid);
if (dump_file)
fprintf (dump_file, "\nmust_follow: ");
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
+ && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
+ last_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->dest) + (e->distance * ii) >=
+ SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
+ last_cycle_in_window
+ and check only if
+ SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
for (e = u_node->out; e != 0; e = e->next_out)
if (TEST_BIT (sched_nodes, e->dest->cuid)
- && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
+ && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+ last_cycle_in_window))
{
if (dump_file)
fprintf (dump_file, "%d ", e->dest->cuid);
parameters to decide if that's possible:
PS - The partial schedule.
U - The serial number of U_NODE.
- NUM_SPLITS - The number of row spilts made so far.
+ NUM_SPLITS - The number of row splits made so far.
MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
the first row of the scheduling window)
MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
static bool
try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
- int u, int row, sbitmap sched_nodes,
+ int u, int cycle, sbitmap sched_nodes,
int *num_splits, sbitmap must_precede,
sbitmap must_follow)
{
bool success = 0;
verify_partial_schedule (ps, sched_nodes);
- psi = ps_add_node_check_conflicts (ps, u_node, row,
+ psi = ps_add_node_check_conflicts (ps, u_node, cycle,
must_precede, must_follow);
if (psi)
{
- SCHED_TIME (u_node) = row;
+ SCHED_TIME (u_node) = cycle;
SET_BIT (sched_nodes, u);
success = 1;
*num_splits = 0;
if (dump_file)
- fprintf (dump_file, "Scheduled w/o split in %d\n", row);
+ fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
}
}
num_splits++;
+ /* The scheduling window is exclusive of 'end'
+ whereas compute_split_window() expects an inclusive,
+ ordered range. */
if (step == 1)
- split_row = compute_split_row (sched_nodes, start, end,
+ split_row = compute_split_row (sched_nodes, start, end - 1,
ps->ii, u_node);
else
- split_row = compute_split_row (sched_nodes, end, start,
+ split_row = compute_split_row (sched_nodes, end + 1, start,
ps->ii, u_node);
ps_insert_empty_row (ps, split_row, sched_nodes);
sbitmap_zero (prev_sccs);
sbitmap_ones (ones);
- /* Perfrom the node ordering starting from the SCC with the highest recMII.
+ /* Perform the node ordering starting from the SCC with the highest recMII.
For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
for (i = 0; i < all_sccs->num_sccs; i++)
{
{
ps_insn_ptr ps_i = ps->rows[i];
- fprintf (dump, "\n[CYCLE %d ]: ", i);
+ fprintf (dump, "\n[ROW %d ]: ", i);
while (ps_i)
{
fprintf (dump, "%d, ",
return true;
/* Update the DFA state and return with failure if the DFA found
- recource conflicts. */
+ resource conflicts. */
if (state_transition (curr_state, insn) >= 0)
return true;
return 0;
}
-struct tree_opt_pass pass_sms =
+struct rtl_opt_pass pass_sms =
{
+ {
+ RTL_PASS,
"sms", /* name */
gate_handle_sms, /* gate */
rest_of_handle_sms, /* execute */
TODO_dump_func, /* todo_flags_start */
TODO_df_finish | TODO_verify_rtl_sharing |
TODO_dump_func |
- TODO_ggc_collect, /* todo_flags_finish */
- 'm' /* letter */
+ TODO_ggc_collect /* todo_flags_finish */
+ }
};