/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004, 2005, 2006, 2007
+ Copyright (C) 2004, 2005, 2006, 2007, 2008
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
using a certain 'count' register and (2) the loop count can be
- adjusted by modifying this register prior to the loop.
+ adjusted by modifying this register prior to the loop.
TODO: Rely on cfgloop analysis instead. */
\f
/* This page defines partial-schedule structures and functions for
};
-
+
static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
static void free_partial_schedule (partial_schedule_ptr);
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
/* This page defines constants and structures for the modulo scheduling
driver. */
-/* As in haifa-sched.c: */
-/* issue_rate is the number of insns that can be scheduled in the same
- machine cycle. It can be defined in the config/mach/mach.h file,
- otherwise we set it to 1. */
-
-static int issue_rate;
-
static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
code in order to use sched_analyze() for computing the dependencies.
They are used when initializing the sched_info structure. */
static const char *
-sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
+sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
{
static char tmp[80];
{
}
-static struct sched_info sms_sched_info =
+static struct common_sched_info_def sms_common_sched_info;
+
+static struct sched_deps_info_def sms_sched_deps_info =
+ {
+ compute_jump_reg_dependencies,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL,
+ 0, 0, 0
+ };
+
+static struct haifa_sched_info sms_sched_info =
{
NULL,
NULL,
NULL,
sms_print_insn,
NULL,
- compute_jump_reg_dependencies,
+ NULL, /* insn_finishes_block_p */
NULL, NULL,
NULL, NULL,
- 0, 0, 0,
+ 0, 0,
- NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL,
0
};
-
/* Given HEAD and TAIL which are the first and last insns in a loop;
return the register which controls the loop. Return zero if it has
more than one occurrence in the loop besides the control part or the
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
- if (INSN_P (insn) && single_set (insn) &&
+ if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
{
rtx pat = single_set (insn);
- if (GET_CODE (SET_SRC (pat)) == CONST_INT)
+ if (CONST_INT_P (SET_SRC (pat)))
{
*count = INTVAL (SET_SRC (pat));
return insn;
res_MII (ddg_ptr g)
{
if (targetm.sched.sms_res_mii)
- return targetm.sched.sms_res_mii (g);
-
- return (g->num_nodes / issue_rate);
+ return targetm.sched.sms_res_mii (g);
+
+ return ((g->num_nodes - g->num_debug) / issue_rate);
}
int i;
int last_stage = PS_STAGE_COUNT (ps) - 1;
edge e;
-
+
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
for (i = 0; i < last_stage; i++)
duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
-
+
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
split_edge_and_insert (e, get_insns ());
for (i = 0; i < last_stage; i++)
duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
-
+
/* Put the epilogue on the exit edge. */
gcc_assert (single_exit (loop));
e = single_exit (loop);
for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
{
if (NOTE_P (head) || LABEL_P (head)
- || (INSN_P (head) && JUMP_P (head)))
+ || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
continue;
empty_bb = false;
break;
if (dump_file)
{
rtx insn = BB_END (loop->header);
-
+
fprintf (dump_file, "SMS loop many exits ");
fprintf (dump_file, " %s %d (file, line)\n",
insn_file (insn), insn_line (insn));
if (dump_file)
{
rtx insn = BB_END (loop->header);
-
+
fprintf (dump_file, "SMS loop many BBs. ");
fprintf (dump_file, " %s %d (file, line)\n",
insn_file (insn), insn_line (insn));
}
}
+/* Setup infos. */
+static void
+setup_sched_infos (void)
+{
+ memcpy (&sms_common_sched_info, &haifa_common_sched_info,
+ sizeof (sms_common_sched_info));
+ sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
+ common_sched_info = &sms_common_sched_info;
+
+ sched_deps_info = &sms_sched_deps_info;
+ current_sched_info = &sms_sched_info;
+}
+
/* Probability in % that the sms-ed loop rolls enough so that optimized
version may be entered. Just a guess. */
#define PROB_SMS_ENOUGH_ITERATIONS 80
issue_rate = 1;
/* Initialize the scheduler. */
- current_sched_info = &sms_sched_info;
-
- /* Init Data Flow analysis, to be used in interloop dep calculation. */
- df_set_flags (DF_LR_RUN_DCE);
- df_rd_add_problem ();
- df_note_add_problem ();
- df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
- df_analyze ();
- regstat_compute_calls_crossed ();
- sched_init ();
+ setup_sched_infos ();
+ haifa_sched_init ();
/* Allocate memory to hold the DDG array one entry for each loop.
We use loop->num as index into this array. */
if (single_exit (loop)->count)
trip_count = latch_edge->count / single_exit (loop)->count;
- /* Perfrom SMS only on loops that their average count is above threshold. */
+ /* Perform SMS only on loops that their average count is above threshold. */
if ( latch_edge->count
&& (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
/* Don't handle BBs with calls or barriers, or !single_set insns,
or auto-increment insns (to avoid creating invalid reg-moves
- for the auto-increment insns).
+ for the auto-increment insns).
??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
if (CALL_P (insn)
|| BARRIER_P (insn)
- || (INSN_P (insn) && !JUMP_P (insn)
+ || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
fprintf (dump_file, "SMS loop-with-barrier\n");
else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
fprintf (dump_file, "SMS reg inc\n");
- else if ((INSN_P (insn) && !JUMP_P (insn)
+ else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
else
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps)
+ if (ps){
stage_count = PS_STAGE_COUNT (ps);
+ gcc_assert(stage_count >= 1);
+ }
/* Stage count of 1 means that there is no interleaving between
iterations, let the scheduling passes do the job. */
- if (stage_count < 1
+ if (stage_count <= 1
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
the closing_branch was scheduled and should appear in the last (ii-1)
row. Otherwise, we are free to schedule the branch, and we let nodes
that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
- row; this should reduce stage_count to minimum.
+ row; this should reduce stage_count to minimum.
TODO: Revisit the issue of scheduling the insns of the
control part relative to the branch when the control part
has more than one insn. */
normalize_sched_times (ps);
rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
set_columns_for_ps (ps);
-
+
canon_loop (loop);
/* case the BCT count is not known , Do loop-versioning */
print_node_sched_params (dump_file, g->num_nodes, g);
/* Generate prolog and epilog. */
generate_prolog_epilog (ps, loop, count_reg, count_init);
-
+
free_undo_replace_buff (reg_move_replaces);
}
free_ddg (g);
}
- regstat_free_calls_crossed ();
free (g_arr);
/* Release scheduler data, needed until now because of DFA. */
- sched_finish ();
+ haifa_sched_finish ();
loop_optimizer_finalize ();
}
ddg_node_ptr v_node = e->src;
if (dump_file)
- {
+ {
fprintf (dump_file, "\nProcessing edge: ");
print_ddg_edge (dump_file, e);
fprintf (dump_file,
MAX (early_start, p_st + e->latency - (e->distance * ii));
if (dump_file)
- fprintf (dump_file,
+ fprintf (dump_file,
"pred st = %d; early_start = %d; latency: %d",
p_st, early_start, e->latency);
s_st - e->latency + (e->distance * ii));
if (dump_file)
- fprintf (dump_file,
+ fprintf (dump_file,
"succ st = %d; late_start = %d; latency = %d",
s_st, late_start, e->latency);
- (e->distance * ii));
if (dump_file)
- fprintf (dump_file,
+ fprintf (dump_file,
"pred st = %d; early_start = %d; latency = %d",
p_st, early_start, e->latency);
+ (e->distance * ii));
if (dump_file)
- fprintf (dump_file,
+ fprintf (dump_file,
"succ st = %d; late_start = %d; latency = %d",
s_st, late_start, e->latency);
&& e->latency == 0
we use the fact that latency is non-negative:
SCHED_TIME (e->dest) + (e->distance * ii) >=
- SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
+ SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
last_cycle_in_window
and check only if
SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
parameters to decide if that's possible:
PS - The partial schedule.
U - The serial number of U_NODE.
- NUM_SPLITS - The number of row spilts made so far.
+ NUM_SPLITS - The number of row splits made so far.
MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
the first row of the scheduling window)
MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
ddg_node_ptr u_node = &ps->g->nodes[u];
rtx insn = u_node->insn;
- if (!INSN_P (insn))
+ if (!NONDEBUG_INSN_P (insn))
{
RESET_BIT (tobe_scheduled, u);
continue;
}
num_splits++;
+ /* The scheduling window is exclusive of 'end'
+ whereas compute_split_window() expects an inclusive,
+ ordered range. */
if (step == 1)
- split_row = compute_split_row (sched_nodes, start, end,
+ split_row = compute_split_row (sched_nodes, start, end - 1,
ps->ii, u_node);
else
- split_row = compute_split_row (sched_nodes, end, start,
+ split_row = compute_split_row (sched_nodes, end + 1, start,
ps->ii, u_node);
ps_insert_empty_row (ps, split_row, sched_nodes);
SET_BIT (tmp, u);
}
-
+
if (dump_file)
fprintf (dump_file, "\n");
-
+
sbitmap_free (tmp);
}
sbitmap_zero (prev_sccs);
sbitmap_ones (ones);
- /* Perfrom the node ordering starting from the SCC with the highest recMII.
+ /* Perform the node ordering starting from the SCC with the highest recMII.
For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
for (i = 0; i < all_sccs->num_sccs; i++)
{
}
/* Advances the PS_INSN one column in its current row; returns false
- in failure and true in success. Bit N is set in MUST_FOLLOW if
- the node with cuid N must be come after the node pointed to by
+ in failure and true in success. Bit N is set in MUST_FOLLOW if
+ the node with cuid N must be come after the node pointed to by
PS_I when scheduled in the same cycle. */
static int
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
}
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
- Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
- set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
- before/after (respectively) the node pointed to by PS_I when scheduled
+ Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
+ set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
+ before/after (respectively) the node pointed to by PS_I when scheduled
in the same cycle. */
static ps_insn_ptr
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
{
rtx insn = crr_insn->node->insn;
- if (!INSN_P (insn))
+ if (!NONDEBUG_INSN_P (insn))
continue;
/* Check if there is room for the current insn. */
return true;
/* Update the DFA state and return with failure if the DFA found
- recource conflicts. */
+ resource conflicts. */
if (state_transition (curr_state, insn) >= 0)
return true;
/* Checks if the given node causes resource conflicts when added to PS at
cycle C. If not the node is added to PS and returned; otherwise zero
- is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
- cuid N must be come before/after (respectively) the node pointed to by
+ is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
+ cuid N must be come before/after (respectively) the node pointed to by
PS_I when scheduled in the same cycle. */
ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,