/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004, 2005, 2006, 2007
+ Copyright (C) 2004, 2005, 2006, 2007, 2008
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
/* This page defines constants and structures for the modulo scheduling
driver. */
-/* As in haifa-sched.c: */
-/* issue_rate is the number of insns that can be scheduled in the same
- machine cycle. It can be defined in the config/mach/mach.h file,
- otherwise we set it to 1. */
-
-static int issue_rate;
-
static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
code in order to use sched_analyze() for computing the dependencies.
They are used when initializing the sched_info structure. */
static const char *
-sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
+sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
{
static char tmp[80];
{
}
-static struct sched_info sms_sched_info =
+static struct common_sched_info_def sms_common_sched_info;
+
+static struct sched_deps_info_def sms_sched_deps_info =
+ {
+ compute_jump_reg_dependencies,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL,
+ 0, 0, 0
+ };
+
+static struct haifa_sched_info sms_sched_info =
{
NULL,
NULL,
NULL,
sms_print_insn,
NULL,
- compute_jump_reg_dependencies,
+ NULL, /* insn_finishes_block_p */
NULL, NULL,
NULL, NULL,
- 0, 0, 0,
+ 0, 0,
- NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL,
0
};
-
/* Given HEAD and TAIL which are the first and last insns in a loop;
return the register which controls the loop. Return zero if it has
more than one occurrence in the loop besides the control part or the
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
- if (INSN_P (insn) && single_set (insn) &&
+ if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
{
rtx pat = single_set (insn);
- if (GET_CODE (SET_SRC (pat)) == CONST_INT)
+ if (CONST_INT_P (SET_SRC (pat)))
{
*count = INTVAL (SET_SRC (pat));
return insn;
if (targetm.sched.sms_res_mii)
return targetm.sched.sms_res_mii (g);
- return (g->num_nodes / issue_rate);
+ return ((g->num_nodes - g->num_debug) / issue_rate);
}
for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
{
if (NOTE_P (head) || LABEL_P (head)
- || (INSN_P (head) && JUMP_P (head)))
+ || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
continue;
empty_bb = false;
break;
}
}
+/* Setup infos. */
+static void
+setup_sched_infos (void)
+{
+ memcpy (&sms_common_sched_info, &haifa_common_sched_info,
+ sizeof (sms_common_sched_info));
+ sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
+ common_sched_info = &sms_common_sched_info;
+
+ sched_deps_info = &sms_sched_deps_info;
+ current_sched_info = &sms_sched_info;
+}
+
/* Probability in % that the sms-ed loop rolls enough so that optimized
version may be entered. Just a guess. */
#define PROB_SMS_ENOUGH_ITERATIONS 80
issue_rate = 1;
/* Initialize the scheduler. */
- current_sched_info = &sms_sched_info;
-
- /* Init Data Flow analysis, to be used in interloop dep calculation. */
- df_set_flags (DF_LR_RUN_DCE);
- df_rd_add_problem ();
- df_note_add_problem ();
- df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
- df_analyze ();
- regstat_compute_calls_crossed ();
- sched_init ();
+ setup_sched_infos ();
+ haifa_sched_init ();
/* Allocate memory to hold the DDG array one entry for each loop.
We use loop->num as index into this array. */
if (single_exit (loop)->count)
trip_count = latch_edge->count / single_exit (loop)->count;
- /* Perfrom SMS only on loops that their average count is above threshold. */
+ /* Perform SMS only on loops that their average count is above threshold. */
if ( latch_edge->count
&& (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
if (CALL_P (insn)
|| BARRIER_P (insn)
- || (INSN_P (insn) && !JUMP_P (insn)
+ || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
fprintf (dump_file, "SMS loop-with-barrier\n");
else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
fprintf (dump_file, "SMS reg inc\n");
- else if ((INSN_P (insn) && !JUMP_P (insn)
+ else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
else
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps)
+ if (ps){
stage_count = PS_STAGE_COUNT (ps);
+ gcc_assert(stage_count >= 1);
+ }
/* Stage count of 1 means that there is no interleaving between
iterations, let the scheduling passes do the job. */
- if (stage_count < 1
+ if (stage_count <= 1
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
free_ddg (g);
}
- regstat_free_calls_crossed ();
free (g_arr);
/* Release scheduler data, needed until now because of DFA. */
- sched_finish ();
+ haifa_sched_finish ();
loop_optimizer_finalize ();
}
/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
node currently been scheduled. At the end of the calculation
- MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
- which are in SCHED_NODES (already scheduled nodes) and scheduled at
- the same row as the first/last row of U_NODE's scheduling window.
+ MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
+ U_NODE which are (1) already scheduled in the first/last row of
+ U_NODE's scheduling window, (2) whose dependence inequality with U
+ becomes an equality when U is scheduled in this same row, and (3)
+ whose dependence latency is zero.
+
The first and last rows are calculated using the following parameters:
START/END rows - The cycles that begins/ends the traversal on the window;
searching for an empty cycle to schedule U_NODE.
STEP - The direction in which we traverse the window.
- II - The initiation interval.
- TODO: We can add an insn to the must_precede/must_follow bitmap only
- if it has tight dependence to U and they are both scheduled in the
- same row. The current check is more conservative and content with
- the fact that both U and the insn are scheduled in the same row. */
+ II - The initiation interval. */
static void
calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
{
ddg_edge_ptr e;
int first_cycle_in_window, last_cycle_in_window;
- int first_row_in_window, last_row_in_window;
gcc_assert (must_precede && must_follow);
first_cycle_in_window = (step == 1) ? start : end - step;
last_cycle_in_window = (step == 1) ? end - step : start;
- first_row_in_window = SMODULO (first_cycle_in_window, ii);
- last_row_in_window = SMODULO (last_cycle_in_window, ii);
-
sbitmap_zero (must_precede);
sbitmap_zero (must_follow);
if (dump_file)
fprintf (dump_file, "\nmust_precede: ");
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
+ && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
+ first_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->src) - (e->distance * ii) <=
+ SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
+ first_cycle_in_window
+ and check only if
+ SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
for (e = u_node->in; e != 0; e = e->next_in)
if (TEST_BIT (sched_nodes, e->src->cuid)
- && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
+ && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+ first_cycle_in_window))
{
if (dump_file)
fprintf (dump_file, "%d ", e->src->cuid);
if (dump_file)
fprintf (dump_file, "\nmust_follow: ");
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
+ && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
+ last_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->dest) + (e->distance * ii) >=
+ SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
+ last_cycle_in_window
+ and check only if
+ SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
for (e = u_node->out; e != 0; e = e->next_out)
if (TEST_BIT (sched_nodes, e->dest->cuid)
- && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
+ && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+ last_cycle_in_window))
{
if (dump_file)
fprintf (dump_file, "%d ", e->dest->cuid);
parameters to decide if that's possible:
PS - The partial schedule.
U - The serial number of U_NODE.
- NUM_SPLITS - The number of row spilts made so far.
+ NUM_SPLITS - The number of row splits made so far.
MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
the first row of the scheduling window)
MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
ddg_node_ptr u_node = &ps->g->nodes[u];
rtx insn = u_node->insn;
- if (!INSN_P (insn))
+ if (!NONDEBUG_INSN_P (insn))
{
RESET_BIT (tobe_scheduled, u);
continue;
}
num_splits++;
+ /* The scheduling window is exclusive of 'end'
+ whereas compute_split_window() expects an inclusive,
+ ordered range. */
if (step == 1)
- split_row = compute_split_row (sched_nodes, start, end,
+ split_row = compute_split_row (sched_nodes, start, end - 1,
ps->ii, u_node);
else
- split_row = compute_split_row (sched_nodes, end, start,
+ split_row = compute_split_row (sched_nodes, end + 1, start,
ps->ii, u_node);
ps_insert_empty_row (ps, split_row, sched_nodes);
sbitmap_zero (prev_sccs);
sbitmap_ones (ones);
- /* Perfrom the node ordering starting from the SCC with the highest recMII.
+ /* Perform the node ordering starting from the SCC with the highest recMII.
For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
for (i = 0; i < all_sccs->num_sccs; i++)
{
{
rtx insn = crr_insn->node->insn;
- if (!INSN_P (insn))
+ if (!NONDEBUG_INSN_P (insn))
continue;
/* Check if there is room for the current insn. */
return true;
/* Update the DFA state and return with failure if the DFA found
- recource conflicts. */
+ resource conflicts. */
if (state_transition (curr_state, insn) >= 0)
return true;
return 0;
}
-struct tree_opt_pass pass_sms =
+struct rtl_opt_pass pass_sms =
{
+ {
+ RTL_PASS,
"sms", /* name */
gate_handle_sms, /* gate */
rest_of_handle_sms, /* execute */
TODO_dump_func, /* todo_flags_start */
TODO_df_finish | TODO_verify_rtl_sharing |
TODO_dump_func |
- TODO_ggc_collect, /* todo_flags_finish */
- 'm' /* letter */
+ TODO_ggc_collect /* todo_flags_finish */
+ }
};