+/* A threshold for the number of repeated unsuccessful attempts to insert
+ an empty row, before we flush the partial schedule and start over. */
+#define MAX_SPLIT_NUM 10
+/* Given the partial schedule PS, this function calculates and returns the
+ cycles in which we can schedule the node with the given index I.
+ NOTE: Here we do the backtracking in SMS, in some special cases. We have
+ noticed that there are several cases in which we fail to SMS the loop
+ because the sched window of a node is empty due to tight data-deps. In
+ such cases we want to unschedule some of the predecessors/successors
+ until we get non-empty scheduling window. It returns -1 if the
+ scheduling window is empty and zero otherwise. */
+
+static int
+get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+ sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
+{
+ int start, step, end;
+ ddg_edge_ptr e;
+ int u = nodes_order [i];
+ ddg_node_ptr u_node = &ps->g->nodes[u];
+ sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
+ sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
+ sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
+ sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
+ int psp_not_empty;
+ int pss_not_empty;
+
+ /* 1. compute sched window for u (start, end, step). */
+ sbitmap_zero (psp);
+ sbitmap_zero (pss);
+ psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
+ pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
+
+ if (psp_not_empty && !pss_not_empty)
+ {
+ int early_start = INT_MIN;
+
+ end = INT_MAX;
+ for (e = u_node->in; e != 0; e = e->next_in)
+ {
+ ddg_node_ptr v_node = e->src;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nProcessing edge: ");
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in psp_not_empty,"
+ " checking p %d (%d): ", u_node->cuid,
+ INSN_UID (u_node->insn), v_node->cuid, INSN_UID
+ (v_node->insn));
+ }
+
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int p_st = SCHED_TIME (v_node);
+
+ early_start =
+ MAX (early_start, p_st + e->latency - (e->distance * ii));
+
+ if (dump_file)
+ fprintf (dump_file,
+ "pred st = %d; early_start = %d; latency: %d",
+ p_st, early_start, e->latency);
+
+ if (e->data_type == MEM_DEP)
+ end = MIN (end, SCHED_TIME (v_node) + ii - 1);
+ }
+ else if (dump_file)
+ fprintf (dump_file, "the node is not scheduled\n");
+ }
+ start = early_start;
+ end = MIN (end, early_start + ii);
+ /* Schedule the node close to it's predecessors. */
+ step = 1;
+
+ if (dump_file)
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
+ u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+ }
+
+ else if (!psp_not_empty && pss_not_empty)
+ {
+ int late_start = INT_MAX;
+
+ end = INT_MIN;
+ for (e = u_node->out; e != 0; e = e->next_out)
+ {
+ ddg_node_ptr v_node = e->dest;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nProcessing edge:");
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in pss_not_empty,"
+ " checking s %d (%d): ", u_node->cuid,
+ INSN_UID (u_node->insn), v_node->cuid, INSN_UID
+ (v_node->insn));
+ }
+
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int s_st = SCHED_TIME (v_node);
+
+ late_start = MIN (late_start,
+ s_st - e->latency + (e->distance * ii));
+
+ if (dump_file)
+ fprintf (dump_file,
+ "succ st = %d; late_start = %d; latency = %d",
+ s_st, late_start, e->latency);
+
+ if (e->data_type == MEM_DEP)
+ end = MAX (end, SCHED_TIME (v_node) - ii + 1);
+ if (dump_file)
+ fprintf (dump_file, "end = %d\n", end);
+
+ }
+ else if (dump_file)
+ fprintf (dump_file, "the node is not scheduled\n");
+
+ }
+ start = late_start;
+ end = MAX (end, late_start - ii);
+ /* Schedule the node close to it's successors. */
+ step = -1;
+
+ if (dump_file)
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
+ u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+
+ }
+
+ else if (psp_not_empty && pss_not_empty)
+ {
+ int early_start = INT_MIN;
+ int late_start = INT_MAX;
+ int count_preds = 0;
+ int count_succs = 0;
+
+ start = INT_MIN;
+ end = INT_MAX;
+ for (e = u_node->in; e != 0; e = e->next_in)
+ {
+ ddg_node_ptr v_node = e->src;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nProcessing edge:");
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in psp_pss_not_empty,"
+ " checking p %d (%d): ", u_node->cuid, INSN_UID
+ (u_node->insn), v_node->cuid, INSN_UID
+ (v_node->insn));
+ }
+
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int p_st = SCHED_TIME (v_node);
+
+ early_start = MAX (early_start,
+ p_st + e->latency
+ - (e->distance * ii));
+
+ if (dump_file)
+ fprintf (dump_file,
+ "pred st = %d; early_start = %d; latency = %d",
+ p_st, early_start, e->latency);
+
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_preds++;
+
+ if (e->data_type == MEM_DEP)
+ end = MIN (end, SCHED_TIME (v_node) + ii - 1);
+ }
+ else if (dump_file)
+ fprintf (dump_file, "the node is not scheduled\n");
+
+ }
+ for (e = u_node->out; e != 0; e = e->next_out)
+ {
+ ddg_node_ptr v_node = e->dest;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nProcessing edge:");
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file,
+ "\nScheduling %d (%d) in psp_pss_not_empty,"
+ " checking s %d (%d): ", u_node->cuid, INSN_UID
+ (u_node->insn), v_node->cuid, INSN_UID
+ (v_node->insn));
+ }
+
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int s_st = SCHED_TIME (v_node);
+
+ late_start = MIN (late_start,
+ s_st - e->latency
+ + (e->distance * ii));
+
+ if (dump_file)
+ fprintf (dump_file,
+ "succ st = %d; late_start = %d; latency = %d",
+ s_st, late_start, e->latency);
+
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_succs++;
+
+ if (e->data_type == MEM_DEP)
+ start = MAX (start, SCHED_TIME (v_node) - ii + 1);
+ }
+ else if (dump_file)
+ fprintf (dump_file, "the node is not scheduled\n");
+
+ }
+ start = MAX (start, early_start);
+ end = MIN (end, MIN (early_start + ii, late_start + 1));
+ step = 1;
+ /* If there are more successors than predecessors schedule the
+ node close to it's successors. */
+ if (count_succs >= count_preds)
+ {
+ int old_start = start;
+
+ start = end - 1;
+ end = old_start - 1;
+ step = -1;
+ }
+ }
+ else /* psp is empty && pss is empty. */
+ {
+ start = SCHED_ASAP (u_node);
+ end = start + ii;
+ step = 1;
+ }
+
+ *start_p = start;
+ *step_p = step;
+ *end_p = end;
+ sbitmap_free (psp);
+ sbitmap_free (pss);
+
+ if ((start >= end && step == 1) || (start <= end && step == -1))
+ {
+ if (dump_file)
+ fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
+ start, end, step);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
+ node currently been scheduled. At the end of the calculation
+ MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
+ U_NODE which are (1) already scheduled in the first/last row of
+ U_NODE's scheduling window, (2) whose dependence inequality with U
+ becomes an equality when U is scheduled in this same row, and (3)
+ whose dependence latency is zero.
+
+ The first and last rows are calculated using the following parameters:
+ START/END rows - The cycles that begins/ends the traversal on the window;
+ searching for an empty cycle to schedule U_NODE.
+ STEP - The direction in which we traverse the window.
+ II - The initiation interval. */
+
+static void
+calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
+ int step, int ii, sbitmap sched_nodes,
+ sbitmap must_precede, sbitmap must_follow)
+{
+ ddg_edge_ptr e;
+ int first_cycle_in_window, last_cycle_in_window;
+
+ gcc_assert (must_precede && must_follow);
+
+ /* Consider the following scheduling window:
+ {first_cycle_in_window, first_cycle_in_window+1, ...,
+ last_cycle_in_window}. If step is 1 then the following will be
+ the order we traverse the window: {start=first_cycle_in_window,
+ first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
+ or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
+ end=first_cycle_in_window-1} if step is -1. */
+ first_cycle_in_window = (step == 1) ? start : end - step;
+ last_cycle_in_window = (step == 1) ? end - step : start;
+
+ sbitmap_zero (must_precede);
+ sbitmap_zero (must_follow);
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_precede: ");
+
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
+ && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
+ first_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->src) - (e->distance * ii) <=
+ SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
+ first_cycle_in_window
+ and check only if
+ SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
+ for (e = u_node->in; e != 0; e = e->next_in)
+ if (TEST_BIT (sched_nodes, e->src->cuid)
+ && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+ first_cycle_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->src->cuid);
+
+ SET_BIT (must_precede, e->src->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_follow: ");
+
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
+ && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
+ last_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->dest) + (e->distance * ii) >=
+ SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
+ last_cycle_in_window
+ and check only if
+ SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
+ for (e = u_node->out; e != 0; e = e->next_out)
+ if (TEST_BIT (sched_nodes, e->dest->cuid)
+ && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+ last_cycle_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->dest->cuid);
+
+ SET_BIT (must_follow, e->dest->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\n");
+}
+
+/* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
+ parameters to decide if that's possible:
+ PS - The partial schedule.
+ U - The serial number of U_NODE.
+ NUM_SPLITS - The number of row splits made so far.
+ MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
+ the first row of the scheduling window)
+ MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
+ last row of the scheduling window) */
+
+static bool
+try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
+ int u, int cycle, sbitmap sched_nodes,
+ int *num_splits, sbitmap must_precede,
+ sbitmap must_follow)
+{
+ ps_insn_ptr psi;
+ bool success = 0;
+
+ verify_partial_schedule (ps, sched_nodes);
+ psi = ps_add_node_check_conflicts (ps, u_node, cycle,
+ must_precede, must_follow);
+ if (psi)
+ {
+ SCHED_TIME (u_node) = cycle;
+ SET_BIT (sched_nodes, u);
+ success = 1;
+ *num_splits = 0;
+ if (dump_file)
+ fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
+
+ }
+
+ return success;
+}
+
+/* This function implements the scheduling algorithm for SMS according to the
+ above algorithm. */