static int issue_rate;
-static int sms_order_nodes (ddg_ptr, int, int * result);
+static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
-static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *loop,
+static void permute_partial_schedule (partial_schedule_ptr, rtx);
+static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
rtx, rtx);
-static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
- int from_stage, int to_stage,
- int is_prolog, rtx count_reg);
+static void duplicate_insns_of_cycles (partial_schedule_ptr,
+ int, int, int, rtx);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
/* Now generate the reg_moves, attaching relevant uses to them. */
SCHED_NREG_MOVES (u) = nreg_moves;
old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
- last_reg_move = u->insn;
+ /* Insert the reg-moves right before the notes which precede
+ the insn they relates to. */
+ last_reg_move = u->first_note;
for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
{
{
if (loop->inner || !loop_outer (loop))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS loop inner or !loop_outer\n");
return false;
+ }
if (!single_exit (loop))
{
rtx insn;
ddg_ptr *g_arr, g;
int * node_order;
- int maxii;
+ int maxii, max_asap;
loop_iterator li;
partial_schedule_ptr ps;
basic_block bb = NULL;
df_set_flags (DF_LR_RUN_DCE);
df_rd_add_problem ();
df_note_add_problem ();
- df_chain_add_problem (DF_DU_CHAIN);
+ df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
df_analyze ();
regstat_compute_calls_crossed ();
sched_init ();
We use loop->num as index into this array. */
g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n\nSMS analysis phase\n");
+ fprintf (dump_file, "===================\n\n");
+ }
+
/* Build DDGs for all the relevant loops and hold them in G_ARR
indexed by the loop index. */
FOR_EACH_LOOP (li, loop, 0)
break;
}
+ if (dump_file)
+ {
+ rtx insn = BB_END (loop->header);
+
+ fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
+ loop->num, insn_file (insn), insn_line (insn));
+
+ }
+
if (! loop_canon_p (loop))
continue;
if (! loop_single_full_bb_p (loop))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
continue;
+ }
bb = loop->header;
/* Make sure this is a doloop. */
if ( !(count_reg = doloop_register_get (head, tail)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS doloop_register_get failed\n");
continue;
+ }
/* Don't handle BBs with calls or barriers, or !single_set insns,
or auto-increment insns (to avoid creating invalid reg-moves
}
g_arr[loop->num] = g;
+ if (dump_file)
+ fprintf (dump_file, "...OK\n");
+
}
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSMS transformation phase\n");
+ fprintf (dump_file, "=========================\n\n");
+ }
/* We don't want to perform SMS on new loops - created by versioning. */
FOR_EACH_LOOP (li, loop, 0)
continue;
if (dump_file)
- print_ddg (dump_file, g);
+ {
+ rtx insn = BB_END (loop->header);
+
+ fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
+ loop->num, insn_file (insn), insn_line (insn));
+
+ print_ddg (dump_file, g);
+ }
get_ebb_head_tail (loop->header, loop->header, &head, &tail);
node_order = XNEWVEC (int, g->num_nodes);
mii = 1; /* Need to pass some estimate of mii. */
- rec_mii = sms_order_nodes (g, mii, node_order);
+ rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
mii = MAX (res_MII (g), rec_mii);
- maxii = MAXII_FACTOR * mii;
+ maxii = MAX (max_asap, MAXII_FACTOR * mii);
if (dump_file)
fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
MAX (early_start, p_st + e->latency - (e->distance * ii));
if (dump_file)
- fprintf (dump_file, "pred st = %d; early_start = %d; ", p_st,
- early_start);
+ fprintf (dump_file,
+ "pred st = %d; early_start = %d; latency: %d",
+ p_st, early_start, e->latency);
if (e->data_type == MEM_DEP)
end = MIN (end, SCHED_TIME (v_node) + ii - 1);
}
start = early_start;
end = MIN (end, early_start + ii);
+ /* Schedule the node close to it's predecessors. */
step = 1;
if (dump_file)
s_st - e->latency + (e->distance * ii));
if (dump_file)
- fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
- late_start);
+ fprintf (dump_file,
+ "succ st = %d; late_start = %d; latency = %d",
+ s_st, late_start, e->latency);
if (e->data_type == MEM_DEP)
end = MAX (end, SCHED_TIME (v_node) - ii + 1);
}
start = late_start;
end = MAX (end, late_start - ii);
+ /* Schedule the node close to it's successors. */
step = -1;
if (dump_file)
{
int early_start = INT_MIN;
int late_start = INT_MAX;
+ int count_preds = 0;
+ int count_succs = 0;
start = INT_MIN;
end = INT_MAX;
- (e->distance * ii));
if (dump_file)
- fprintf (dump_file, "pred st = %d; early_start = %d;", p_st,
- early_start);
+ fprintf (dump_file,
+ "pred st = %d; early_start = %d; latency = %d",
+ p_st, early_start, e->latency);
+
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_preds++;
if (e->data_type == MEM_DEP)
end = MIN (end, SCHED_TIME (v_node) + ii - 1);
s_st - e->latency
+ (e->distance * ii));
- if (dump_file)
- fprintf (dump_file, "succ st = %d; late_start = %d;", s_st,
- late_start);
+ if (dump_file)
+ fprintf (dump_file,
+ "succ st = %d; late_start = %d; latency = %d",
+ s_st, late_start, e->latency);
+
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_succs++;
if (e->data_type == MEM_DEP)
start = MAX (start, SCHED_TIME (v_node) - ii + 1);
start = MAX (start, early_start);
end = MIN (end, MIN (early_start + ii, late_start + 1));
step = 1;
+ /* If there are more successors than predecessors schedule the
+ node close to it's successors. */
+ if (count_succs >= count_preds)
+ {
+ int old_start = start;
+
+ start = end - 1;
+ end = old_start - 1;
+ step = -1;
+ }
}
else /* psp is empty && pss is empty. */
{
return 0;
}
+/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
+ node currently been scheduled. At the end of the calculation
+ MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of U_NODE
+ which are in SCHED_NODES (already scheduled nodes) and scheduled at
+ the same row as the first/last row of U_NODE's scheduling window.
+ The first and last rows are calculated using the following paramaters:
+ START/END rows - The cycles that begins/ends the traversal on the window;
+ searching for an empty cycle to schedule U_NODE.
+ STEP - The direction in which we traverse the window.
+ II - The initiation interval.
+ TODO: We can add an insn to the must_precede/must_follow bitmap only
+ if it has tight dependence to U and they are both scheduled in the
+ same row. The current check is more conservative and content with
+ the fact that both U and the insn are scheduled in the same row. */
+
+static void
+calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
+ int step, int ii, sbitmap sched_nodes,
+ sbitmap must_precede, sbitmap must_follow)
+{
+ ddg_edge_ptr e;
+ int first_cycle_in_window, last_cycle_in_window;
+ int first_row_in_window, last_row_in_window;
+
+ gcc_assert (must_precede && must_follow);
+
+ /* Consider the following scheduling window:
+ {first_cycle_in_window, first_cycle_in_window+1, ...,
+ last_cycle_in_window}. If step is 1 then the following will be
+ the order we traverse the window: {start=first_cycle_in_window,
+ first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
+ or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
+ end=first_cycle_in_window-1} if step is -1. */
+ first_cycle_in_window = (step == 1) ? start : end - step;
+ last_cycle_in_window = (step == 1) ? end - step : start;
+
+ first_row_in_window = SMODULO (first_cycle_in_window, ii);
+ last_row_in_window = SMODULO (last_cycle_in_window, ii);
+
+ sbitmap_zero (must_precede);
+ sbitmap_zero (must_follow);
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_precede: ");
+
+ for (e = u_node->in; e != 0; e = e->next_in)
+ if (TEST_BIT (sched_nodes, e->src->cuid)
+ && (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->src->cuid);
+
+ SET_BIT (must_precede, e->src->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_follow: ");
+
+ for (e = u_node->out; e != 0; e = e->next_out)
+ if (TEST_BIT (sched_nodes, e->dest->cuid)
+ && (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->dest->cuid);
+
+ SET_BIT (must_follow, e->dest->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\n");
+}
+
+/* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
+ parameters to decide if that's possible:
+ PS - The partial schedule.
+ U - The serial number of U_NODE.
+ NUM_SPLITS - The number of row spilts made so far.
+ MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
+ the first row of the scheduling window)
+ MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
+ last row of the scheduling window) */
+
+static bool
+try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
+ int u, int row, sbitmap sched_nodes,
+ int *num_splits, sbitmap must_precede,
+ sbitmap must_follow)
+{
+ ps_insn_ptr psi;
+ bool success = 0;
+
+ verify_partial_schedule (ps, sched_nodes);
+ psi = ps_add_node_check_conflicts (ps, u_node, row,
+ must_precede, must_follow);
+ if (psi)
+ {
+ SCHED_TIME (u_node) = row;
+ SET_BIT (sched_nodes, u);
+ success = 1;
+ *num_splits = 0;
+ if (dump_file)
+ fprintf (dump_file, "Scheduled w/o split in %d\n", row);
+
+ }
+
+ return success;
+}
+
/* This function implements the scheduling algorithm for SMS according to the
above algorithm. */
static partial_schedule_ptr
int i, c, success, num_splits = 0;
int flush_and_start_over = true;
int num_nodes = g->num_nodes;
- ddg_edge_ptr e;
- ps_insn_ptr psi;
int start, end, step; /* Place together into one struct? */
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
sbitmap must_precede = sbitmap_alloc (num_nodes);
fprintf (dump_file, "\nTrying to schedule node %d \
INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
(g->nodes[u].insn)), start, end, step);
- /* Use must_follow & must_precede bitmaps to determine order
- of nodes within the cycle. */
-
- /* use must_follow & must_precede bitmaps to determine order
- of nodes within the cycle. */
- sbitmap_zero (must_precede);
- sbitmap_zero (must_follow);
- /* TODO: We can add an insn to the must_precede or must_follow
- bitmaps only if it has tight dependence to U and they
- both scheduled in the same row. The current check is less
- conservative and content with the fact that both U and the
- insn are scheduled in the same row. */
- for (e = u_node->in; e != 0; e = e->next_in)
- if (TEST_BIT (sched_nodes, e->src->cuid)
- && (SMODULO (SCHED_TIME (e->src), ii) ==
- SMODULO (start, ii)))
- SET_BIT (must_precede, e->src->cuid);
-
- for (e = u_node->out; e != 0; e = e->next_out)
- if (TEST_BIT (sched_nodes, e->dest->cuid)
- && (SMODULO (SCHED_TIME (e->dest), ii) ==
- SMODULO ((end - step), ii)))
- SET_BIT (must_follow, e->dest->cuid);
gcc_assert ((step > 0 && start < end)
|| (step < 0 && start > end));
+ calculate_must_precede_follow (u_node, start, end, step, ii,
+ sched_nodes, must_precede,
+ must_follow);
+
for (c = start; c != end; c += step)
{
- verify_partial_schedule (ps, sched_nodes);
+ sbitmap tmp_precede = NULL;
+ sbitmap tmp_follow = NULL;
- psi = ps_add_node_check_conflicts (ps, u_node, c,
- must_precede,
- must_follow);
-
- if (psi)
+ if (c == start)
+ {
+ if (step == 1)
+ tmp_precede = must_precede;
+ else /* step == -1. */
+ tmp_follow = must_follow;
+ }
+ if (c == end - step)
{
- SCHED_TIME (u_node) = c;
- SET_BIT (sched_nodes, u);
- success = 1;
- num_splits = 0;
- if (dump_file)
- fprintf (dump_file, "Scheduled w/o split in %d\n", c);
-
- break;
+ if (step == 1)
+ tmp_follow = must_follow;
+ else /* step == -1. */
+ tmp_precede = must_precede;
}
+
+ success =
+ try_scheduling_node_in_cycle (ps, u_node, u, c,
+ sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ if (success)
+ break;
}
+
verify_partial_schedule (ps, sched_nodes);
}
if (!success)
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
-static nopa calculate_order_params (ddg_ptr, int mii);
+static nopa calculate_order_params (ddg_ptr, int, int *);
static int find_max_asap (ddg_ptr, sbitmap);
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
/* Order the nodes of G for scheduling and pass the result in
NODE_ORDER. Also set aux.count of each node to ASAP.
- Return the recMII for the given DDG. */
+ Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
static int
-sms_order_nodes (ddg_ptr g, int mii, int * node_order)
+sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
{
int i;
int rec_mii = 0;
ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
- nopa nops = calculate_order_params (g, mii);
+ nopa nops = calculate_order_params (g, mii, pmax_asap);
if (dump_file)
print_sccs (dump_file, sccs, g);
/* MII is needed if we consider backarcs (that do not close recursive cycles). */
static struct node_order_params *
-calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
+calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
{
int u;
int max_asap;
}
}
+ *pmax_asap = max_asap;
return node_order_params_arr;
}
next_ps_i;
next_ps_i = next_ps_i->next_in_row)
{
- if (TEST_BIT (must_follow, next_ps_i->node->cuid)
+ if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
&& ! first_must_follow)
first_must_follow = next_ps_i;
- if (TEST_BIT (must_precede, next_ps_i->node->cuid))
+ if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
{
/* If we have already met a node that must follow, then
there is no possible column. */
/* Check if next_in_row is dependent on ps_i, both having same sched
times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
- if (TEST_BIT (must_follow, next_node->cuid))
+ if (must_follow && TEST_BIT (must_follow, next_node->cuid))
return false;
/* Advance PS_I over its next_in_row in the doubly linked list. */