/* Swing Modulo Scheduling implementation.
- Copyright (C) 2004, 2005, 2006, 2007
+ Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
#include "system.h"
#include "coretypes.h"
#include "tm.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
#include "rtl.h"
#include "tm_p.h"
#include "hard-reg-set.h"
#include "insn-config.h"
#include "insn-attr.h"
#include "except.h"
-#include "toplev.h"
#include "recog.h"
#include "sched-int.h"
#include "target.h"
#include "timevar.h"
#include "tree-pass.h"
#include "dbgcnt.h"
+#include "df.h"
#ifdef INSN_SCHEDULING
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
using a certain 'count' register and (2) the loop count can be
- adjusted by modifying this register prior to the loop.
+ adjusted by modifying this register prior to the loop.
TODO: Rely on cfgloop analysis instead. */
\f
/* This page defines partial-schedule structures and functions for
/* The number of different iterations the nodes in ps span, assuming
the stage boundaries are placed efficiently. */
-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
- + 1 + (ps)->ii - 1) / (ps)->ii)
+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
+ + 1 + ii - 1) / ii)
+/* The stage count of ps. */
+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
/* A single instruction in the partial schedule. */
struct ps_insn
ps_insn_ptr next_in_row,
prev_in_row;
- /* The number of nodes in the same row that come after this node. */
- int row_rest_count;
};
/* Holds the partial schedule as an array of II rows. Each entry of the
/* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
ps_insn_ptr *rows;
+ /* rows_length[i] holds the number of instructions in the row.
+ It is used only (as an optimization) to back off quickly from
+ trying to schedule a node in a full row; that is, to avoid running
+ through futile DFA state transitions. */
+ int *rows_length;
+
/* The earliest absolute cycle of an insn in the partial schedule. */
int min_cycle;
int max_cycle;
ddg_ptr g; /* The DDG of the insns in the partial schedule. */
+
+ int stage_count; /* The stage count of the partial schedule. */
};
/* We use this to record all the register replacements we do in
};
-
+
static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
static void free_partial_schedule (partial_schedule_ptr);
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
/* This page defines constants and structures for the modulo scheduling
driver. */
-/* As in haifa-sched.c: */
-/* issue_rate is the number of insns that can be scheduled in the same
- machine cycle. It can be defined in the config/mach/mach.h file,
- otherwise we set it to 1. */
-
-static int issue_rate;
-
-static int sms_order_nodes (ddg_ptr, int, int * result);
+static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
-static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *loop,
+static void permute_partial_schedule (partial_schedule_ptr, rtx);
+static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
rtx, rtx);
-static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
- int from_stage, int to_stage,
- int is_prolog, rtx count_reg);
+static void duplicate_insns_of_cycles (partial_schedule_ptr,
+ int, int, int, rtx);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
+ sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ int, int, sbitmap, int *, sbitmap,
+ sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
code in order to use sched_analyze() for computing the dependencies.
They are used when initializing the sched_info structure. */
static const char *
-sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
+sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
{
static char tmp[80];
static void
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
- regset cond_exec ATTRIBUTE_UNUSED,
- regset used ATTRIBUTE_UNUSED,
- regset set ATTRIBUTE_UNUSED)
+ regset used ATTRIBUTE_UNUSED)
{
}
-static struct sched_info sms_sched_info =
+static struct common_sched_info_def sms_common_sched_info;
+
+static struct sched_deps_info_def sms_sched_deps_info =
+ {
+ compute_jump_reg_dependencies,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL,
+ 0, 0, 0
+ };
+
+static struct haifa_sched_info sms_sched_info =
{
NULL,
NULL,
NULL,
sms_print_insn,
NULL,
- compute_jump_reg_dependencies,
+ NULL, /* insn_finishes_block_p */
NULL, NULL,
NULL, NULL,
- 0, 0, 0,
+ 0, 0,
- NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL,
0
};
-
/* Given HEAD and TAIL which are the first and last insns in a loop;
return the register which controls the loop. Return zero if it has
more than one occurrence in the loop besides the control part or the
doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
{
#ifdef HAVE_doloop_end
- rtx reg, condition, insn;
- bool found = false;
+ rtx reg, condition, insn, first_insn_not_to_check;
if (!JUMP_P (tail))
return NULL_RTX;
until the decrement. We assume the control part consists of
either a single (parallel) branch-on-count or a (non-parallel)
branch immediately preceded by a single (decrement) insn. */
- for (insn = head; insn != PREV_INSN (tail); insn = NEXT_INSN (insn))
- if ((found = reg_mentioned_p (reg, insn)) == true)
- break;
- if (found)
- {
- if (dump_file)
- fprintf (dump_file, "SMS count_reg found outside control\n");
+ first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
+ : prev_nondebug_insn (tail));
- return NULL_RTX;
- }
- /* One last check in case the do-loop pattern is parallel. */
- if (GET_CODE (PATTERN (tail)) == PARALLEL)
- if (reg_mentioned_p (reg, PREV_INSN (tail)))
+ for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
+ if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
{
if (dump_file)
- fprintf (dump_file, "SMS count_reg found outside control\n");
+ {
+ fprintf (dump_file, "SMS count_reg found ");
+ print_rtl_single (dump_file, reg);
+ fprintf (dump_file, " outside control in insn:\n");
+ print_rtl_single (dump_file, insn);
+ }
return NULL_RTX;
}
+
return reg;
#else
return NULL_RTX;
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
- if (INSN_P (insn) && single_set (insn) &&
+ if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
{
rtx pat = single_set (insn);
- if (GET_CODE (SET_SRC (pat)) == CONST_INT)
+ if (CONST_INT_P (SET_SRC (pat)))
{
*count = INTVAL (SET_SRC (pat));
return insn;
static int
res_MII (ddg_ptr g)
{
- return (g->num_nodes / issue_rate);
+ if (targetm.sched.sms_res_mii)
+ return targetm.sched.sms_res_mii (g);
+
+ return ((g->num_nodes - g->num_debug) / issue_rate);
}
/* Now generate the reg_moves, attaching relevant uses to them. */
SCHED_NREG_MOVES (u) = nreg_moves;
old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
- last_reg_move = u->insn;
+ /* Insert the reg-moves right before the notes which precede
+ the insn they relates to. */
+ last_reg_move = u->first_note;
for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
{
}
}
-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
- of SCHED_ROW and SCHED_STAGE. */
+/* Update the sched_params (time, row and stage) for node U using the II,
+ the CYCLE of U and MIN_CYCLE.
+ We're not simply taking the following
+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+ because the stages may not be aligned on cycle 0. */
static void
-normalize_sched_times (partial_schedule_ptr ps)
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+ int sc_until_cycle_zero;
+ int stage;
+
+ SCHED_TIME (u) = cycle;
+ SCHED_ROW (u) = SMODULO (cycle, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
+}
+
+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
+ SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
+ will move to cycle zero. */
+static void
+reset_sched_times (partial_schedule_ptr ps, int amount)
{
int row;
- int amount = PS_MIN_CYCLE (ps);
int ii = ps->ii;
ps_insn_ptr crr_insn;
{
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
- if (dump_file)
- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
- (u), ps->min_cycle);
+ if (dump_file)
+ {
+ /* Print the scheduling times after the rotation. */
+ fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+ "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
+ INSN_UID (crr_insn->node->insn), normalized_time,
+ new_min_cycle);
+ if (JUMP_P (crr_insn->node->insn))
+ fprintf (dump_file, " (branch)");
+ fprintf (dump_file, "\n");
+ }
+
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = normalized_time % ii;
- SCHED_STAGE (u) = normalized_time / ii;
+
+ crr_insn->cycle = normalized_time;
+ update_node_sched_params (u, ii, normalized_time, new_min_cycle);
}
}
-
+
/* Set SCHED_COLUMN of each node according to its position in PS. */
static void
set_columns_for_ps (partial_schedule_ptr ps)
PREV_INSN (last));
}
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+ respectively only if cycle C falls on the border of the scheduling
+ window boundaries marked by START and END cycles. STEP is the
+ direction of the window. */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+ sbitmap *tmp_precede, sbitmap must_precede, int c,
+ int start, int end, int step)
+{
+ *tmp_precede = NULL;
+ *tmp_follow = NULL;
+
+ if (c == start)
+ {
+ if (step == 1)
+ *tmp_precede = must_precede;
+ else /* step == -1. */
+ *tmp_follow = must_follow;
+ }
+ if (c == end - step)
+ {
+ if (step == 1)
+ *tmp_follow = must_follow;
+ else /* step == -1. */
+ *tmp_precede = must_precede;
+ }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+ normalizing the partial schedule PS to start from cycle zero and thus
+ optimize the SC. Otherwise return False. */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+ int amount = PS_MIN_CYCLE (ps);
+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+ int start, end, step;
+ int ii = ps->ii;
+ bool ok = false;
+ int stage_count, stage_count_curr;
+
+ /* Compare the SC after normalization and SC after bringing the branch
+ to row ii-1. If they are equal just bail out. */
+ stage_count = calculate_stage_count (ps, amount);
+ stage_count_curr =
+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
+
+ if (stage_count == stage_count_curr)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS SC already optimized.\n");
+
+ ok = false;
+ goto clear;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS Trying to optimize branch location\n");
+ fprintf (dump_file, "SMS partial schedule before trial:\n");
+ print_partial_schedule (ps, dump_file);
+ }
+
+ /* First, normalize the partial scheduling. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+ ii, stage_count);
+ print_partial_schedule (ps, dump_file);
+ }
+
+ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+ {
+ ok = true;
+ goto clear;
+ }
+
+ sbitmap_ones (sched_nodes);
+
+ /* Calculate the new placement of the branch. It should be in row
+ ii-1 and fall into it's scheduling window. */
+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+ &step, &end) == 0)
+ {
+ bool success;
+ ps_insn_ptr next_ps_i;
+ int branch_cycle = SCHED_TIME (g->closing_branch);
+ int row = SMODULO (branch_cycle, ps->ii);
+ int num_splits = 0;
+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+ int c;
+
+ if (dump_file)
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n",
+ g->closing_branch->cuid,
+ (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+ if (step == 1)
+ {
+ c = start + ii - SMODULO (start, ii) - 1;
+ gcc_assert (c >= start);
+ if (c >= end)
+ {
+ ok = false;
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ goto clear;
+ }
+ }
+ else
+ {
+ c = start - SMODULO (start, ii) - 1;
+ gcc_assert (c <= start);
+
+ if (c <= end)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ ok = false;
+ goto clear;
+ }
+ }
+
+ must_precede = sbitmap_alloc (g->num_nodes);
+ must_follow = sbitmap_alloc (g->num_nodes);
+
+ /* Try to schedule the branch is it's new cycle. */
+ calculate_must_precede_follow (g->closing_branch, start, end,
+ step, ii, sched_nodes,
+ must_precede, must_follow);
+
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, c, start, end, step);
+
+ /* Find the element in the partial schedule related to the closing
+ branch so we can remove it from it's current cycle. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i; next_ps_i = next_ps_i->next_in_row)
+ if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ break;
+
+ gcc_assert (next_ps_i);
+ gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid, c,
+ sched_nodes, &num_splits,
+ tmp_precede, tmp_follow);
+ gcc_assert (num_splits == 0);
+ if (!success)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d, "
+ "bringing it back to cycle %d\n", c, branch_cycle);
+
+ /* The branch was failed to be placed in row ii - 1.
+ Put it back in it's original place in the partial
+ schedualing. */
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, branch_cycle, start, end,
+ step);
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid,
+ branch_cycle, sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ gcc_assert (success && (num_splits == 0));
+ ok = false;
+ }
+ else
+ {
+ /* The branch is placed in row ii - 1. */
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS success in moving branch to cycle %d\n", c);
+
+ update_node_sched_params (g->closing_branch, ii, c,
+ PS_MIN_CYCLE (ps));
+ ok = true;
+ }
+
+ free (must_precede);
+ free (must_follow);
+ }
+
+clear:
+ free (sched_nodes);
+ return ok;
+}
+
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog, rtx count_reg)
/* Do not duplicate any insn which refers to count_reg as it
belongs to the control part.
+ The closing branch is scheduled as well and thus should
+ be ignored.
TODO: This should be done by analyzing the control part of
the loop. */
- if (reg_mentioned_p (count_reg, u_node->insn))
+ if (reg_mentioned_p (count_reg, u_node->insn)
+ || JUMP_P (ps_ij->node->insn))
continue;
if (for_prolog)
int i;
int last_stage = PS_STAGE_COUNT (ps) - 1;
edge e;
-
+
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
for (i = 0; i < last_stage; i++)
duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
-
+
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
split_edge_and_insert (e, get_insns ());
for (i = 0; i < last_stage; i++)
duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
-
+
/* Put the epilogue on the exit edge. */
gcc_assert (single_exit (loop));
e = single_exit (loop);
for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
{
if (NOTE_P (head) || LABEL_P (head)
- || (INSN_P (head) && JUMP_P (head)))
+ || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
continue;
empty_bb = false;
break;
{
if (loop->inner || !loop_outer (loop))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS loop inner or !loop_outer\n");
return false;
+ }
if (!single_exit (loop))
{
if (dump_file)
{
rtx insn = BB_END (loop->header);
-
+
fprintf (dump_file, "SMS loop many exits ");
fprintf (dump_file, " %s %d (file, line)\n",
insn_file (insn), insn_line (insn));
if (dump_file)
{
rtx insn = BB_END (loop->header);
-
+
fprintf (dump_file, "SMS loop many BBs. ");
fprintf (dump_file, " %s %d (file, line)\n",
insn_file (insn), insn_line (insn));
}
}
+/* Setup infos. */
+static void
+setup_sched_infos (void)
+{
+ memcpy (&sms_common_sched_info, &haifa_common_sched_info,
+ sizeof (sms_common_sched_info));
+ sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
+ common_sched_info = &sms_common_sched_info;
+
+ sched_deps_info = &sms_sched_deps_info;
+ current_sched_info = &sms_sched_info;
+}
+
/* Probability in % that the sms-ed loop rolls enough so that optimized
version may be entered. Just a guess. */
#define PROB_SMS_ENOUGH_ITERATIONS 80
rtx insn;
ddg_ptr *g_arr, g;
int * node_order;
- int maxii;
+ int maxii, max_asap;
loop_iterator li;
partial_schedule_ptr ps;
basic_block bb = NULL;
issue_rate = 1;
/* Initialize the scheduler. */
- current_sched_info = &sms_sched_info;
-
- /* Init Data Flow analysis, to be used in interloop dep calculation. */
- df_set_flags (DF_LR_RUN_DCE);
- df_rd_add_problem ();
- df_note_add_problem ();
- df_chain_add_problem (DF_DU_CHAIN);
- df_analyze ();
- regstat_compute_calls_crossed ();
- sched_init ();
+ setup_sched_infos ();
+ haifa_sched_init ();
/* Allocate memory to hold the DDG array one entry for each loop.
We use loop->num as index into this array. */
g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n\nSMS analysis phase\n");
+ fprintf (dump_file, "===================\n\n");
+ }
+
/* Build DDGs for all the relevant loops and hold them in G_ARR
indexed by the loop index. */
FOR_EACH_LOOP (li, loop, 0)
break;
}
+ if (dump_file)
+ {
+ rtx insn = BB_END (loop->header);
+
+ fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
+ loop->num, insn_file (insn), insn_line (insn));
+
+ }
+
if (! loop_canon_p (loop))
continue;
if (! loop_single_full_bb_p (loop))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
continue;
+ }
bb = loop->header;
if (single_exit (loop)->count)
trip_count = latch_edge->count / single_exit (loop)->count;
- /* Perfrom SMS only on loops that their average count is above threshold. */
+ /* Perform SMS only on loops that their average count is above threshold. */
if ( latch_edge->count
&& (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
/* Make sure this is a doloop. */
if ( !(count_reg = doloop_register_get (head, tail)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS doloop_register_get failed\n");
continue;
+ }
- /* Don't handle BBs with calls or barriers, or !single_set insns,
- or auto-increment insns (to avoid creating invalid reg-moves
- for the auto-increment insns).
+ /* Don't handle BBs with calls or barriers or auto-increment insns
+ (to avoid creating invalid reg-moves for the auto-increment insns),
+ or !single_set with the exception of instructions that include
+ count_reg---these instructions are part of the control part
+ that do-loop recognizes.
??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
if (CALL_P (insn)
|| BARRIER_P (insn)
- || (INSN_P (insn) && !JUMP_P (insn)
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
+ || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+ && !reg_mentioned_p (count_reg, insn))
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
fprintf (dump_file, "SMS loop-with-barrier\n");
else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
fprintf (dump_file, "SMS reg inc\n");
- else if ((INSN_P (insn) && !JUMP_P (insn)
+ else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
else
continue;
}
- if (! (g = create_ddg (bb, 0)))
+ /* Always schedule the closing branch with the rest of the
+ instructions. The branch is rotated to be in row ii-1 at the
+ end of the scheduling procedure to make sure it's the last
+ instruction in the iteration. */
+ if (! (g = create_ddg (bb, 1)))
{
if (dump_file)
- fprintf (dump_file, "SMS doloop\n");
+ fprintf (dump_file, "SMS create_ddg failed\n");
continue;
}
g_arr[loop->num] = g;
+ if (dump_file)
+ fprintf (dump_file, "...OK\n");
+
}
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSMS transformation phase\n");
+ fprintf (dump_file, "=========================\n\n");
+ }
/* We don't want to perform SMS on new loops - created by versioning. */
FOR_EACH_LOOP (li, loop, 0)
int mii, rec_mii;
unsigned stage_count = 0;
HOST_WIDEST_INT loop_count = 0;
+ bool opt_sc_p = false;
if (! (g = g_arr[loop->num]))
continue;
if (dump_file)
- print_ddg (dump_file, g);
+ {
+ rtx insn = BB_END (loop->header);
+
+ fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
+ loop->num, insn_file (insn), insn_line (insn));
+
+ print_ddg (dump_file, g);
+ }
get_ebb_head_tail (loop->header, loop->header, &head, &tail);
node_order = XNEWVEC (int, g->num_nodes);
mii = 1; /* Need to pass some estimate of mii. */
- rec_mii = sms_order_nodes (g, mii, node_order);
+ rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
mii = MAX (res_MII (g), rec_mii);
- maxii = MAXII_FACTOR * mii;
+ maxii = MAX (max_asap, MAXII_FACTOR * mii);
if (dump_file)
fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
set_node_sched_params (g);
ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
+
if (ps)
- stage_count = PS_STAGE_COUNT (ps);
-
- /* Stage count of 1 means that there is no interleaving between
- iterations, let the scheduling passes do the job. */
- if (stage_count < 1
+ {
+ /* Try to achieve optimized SC by normalizing the partial
+ schedule (having the cycles start from cycle zero).
+ The branch location must be placed in row ii-1 in the
+ final scheduling. If failed, shift all instructions to
+ position the branch in row ii-1. */
+ opt_sc_p = optimize_sc (ps, g);
+ if (opt_sc_p)
+ stage_count = calculate_stage_count (ps, 0);
+ else
+ {
+ /* Bring the branch to cycle ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ if (dump_file)
+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
+
+ stage_count = calculate_stage_count (ps, amount);
+ }
+
+ gcc_assert (stage_count >= 1);
+ PS_STAGE_COUNT (ps) = stage_count;
+ }
+
+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+ 1 means that there is no interleaving between iterations thus
+ we let the scheduling passes do the job in this case. */
+ if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
fprintf (dump_file, ")\n");
}
- continue;
}
else
{
struct undo_replace_buff_elem *reg_move_replaces;
- if (dump_file)
- {
+ if (!opt_sc_p)
+ {
+ /* Rotate the partial schedule to have the branch in row ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ }
+
+ set_columns_for_ps (ps);
+
+ canon_loop (loop);
+
+ if (dump_file)
+ {
fprintf (dump_file,
"SMS succeeded %d %d (with ii, sc)\n", ps->ii,
stage_count);
print_partial_schedule (ps, dump_file);
- fprintf (dump_file,
- "SMS Branch (%d) will later be scheduled at cycle %d.\n",
- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
}
-
- /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
- the closing_branch was scheduled and should appear in the last (ii-1)
- row. Otherwise, we are free to schedule the branch, and we let nodes
- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
- row; this should reduce stage_count to minimum.
- TODO: Revisit the issue of scheduling the insns of the
- control part relative to the branch when the control part
- has more than one insn. */
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
- set_columns_for_ps (ps);
-
- canon_loop (loop);
-
+
/* case the BCT count is not known , Do loop-versioning */
if (count_reg && ! count_init)
{
print_node_sched_params (dump_file, g->num_nodes, g);
/* Generate prolog and epilog. */
generate_prolog_epilog (ps, loop, count_reg, count_init);
-
+
free_undo_replace_buff (reg_move_replaces);
}
free_ddg (g);
}
- regstat_free_calls_crossed ();
free (g_arr);
/* Release scheduler data, needed until now because of DFA. */
- sched_finish ();
+ haifa_sched_finish ();
loop_optimizer_finalize ();
}
scheduling window is empty and zero otherwise. */
static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
+ sbitmap sched_nodes, int ii, int *start_p, int *step_p,
+ int *end_p)
{
int start, step, end;
+ int early_start, late_start;
ddg_edge_ptr e;
- int u = nodes_order [i];
- ddg_node_ptr u_node = &ps->g->nodes[u];
sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
int psp_not_empty;
int pss_not_empty;
+ int count_preds;
+ int count_succs;
/* 1. compute sched window for u (start, end, step). */
sbitmap_zero (psp);
psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
- if (psp_not_empty && !pss_not_empty)
- {
- int early_start = INT_MIN;
-
- end = INT_MAX;
- for (e = u_node->in; e != 0; e = e->next_in)
- {
- ddg_node_ptr v_node = e->src;
-
- if (dump_file)
- {
- fprintf (dump_file, "\nProcessing edge: ");
- print_ddg_edge (dump_file, e);
- fprintf (dump_file,
- "\nScheduling %d (%d) in psp_not_empty,"
- " checking node %d (%d): ", u_node->cuid,
- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
- (v_node->insn));
- }
-
- if (TEST_BIT (sched_nodes, v_node->cuid))
- {
- int node_st = SCHED_TIME (v_node)
- + e->latency - (e->distance * ii);
+ /* We first compute a forward range (start <= end), then decide whether
+ to reverse it. */
+ early_start = INT_MIN;
+ late_start = INT_MAX;
+ start = INT_MIN;
+ end = INT_MAX;
+ step = 1;
- early_start = MAX (early_start, node_st);
+ count_preds = 0;
+ count_succs = 0;
- if (e->data_type == MEM_DEP)
- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
- }
- }
- start = early_start;
- end = MIN (end, early_start + ii);
- step = 1;
-
- if (dump_file)
- fprintf (dump_file,
- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+ if (dump_file && (psp_not_empty || pss_not_empty))
+ {
+ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
+ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
+ fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
+ "start", "early start", "late start", "end", "time");
+ fprintf (dump_file, "=========== =========== =========== ==========="
+ " =====\n");
}
+ /* Calculate early_start and limit end. Both bounds are inclusive. */
+ if (psp_not_empty)
+ for (e = u_node->in; e != 0; e = e->next_in)
+ {
+ ddg_node_ptr v_node = e->src;
- else if (!psp_not_empty && pss_not_empty)
- {
- int late_start = INT_MAX;
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int p_st = SCHED_TIME (v_node);
+ int earliest = p_st + e->latency - (e->distance * ii);
+ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
- end = INT_MIN;
- for (e = u_node->out; e != 0; e = e->next_out)
- {
- ddg_node_ptr v_node = e->dest;
+ if (dump_file)
+ {
+ fprintf (dump_file, "%11s %11d %11s %11d %5d",
+ "", earliest, "", latest, p_st);
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file, "\n");
+ }
- if (dump_file)
- {
- fprintf (dump_file, "\nProcessing edge:");
- print_ddg_edge (dump_file, e);
- fprintf (dump_file,
- "\nScheduling %d (%d) in pss_not_empty,"
- " checking node %d (%d): ", u_node->cuid,
- INSN_UID (u_node->insn), v_node->cuid, INSN_UID
- (v_node->insn));
- }
+ early_start = MAX (early_start, earliest);
+ end = MIN (end, latest);
- if (TEST_BIT (sched_nodes, v_node->cuid))
- {
- late_start = MIN (late_start,
- SCHED_TIME (v_node) - e->latency
- + (e->distance * ii));
- if (dump_file)
- fprintf (dump_file, "late_start = %d;", late_start);
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_preds++;
+ }
+ }
- if (e->data_type == MEM_DEP)
- end = MAX (end, SCHED_TIME (v_node) - ii + 1);
- if (dump_file)
- fprintf (dump_file, "end = %d\n", end);
+ /* Calculate late_start and limit start. Both bounds are inclusive. */
+ if (pss_not_empty)
+ for (e = u_node->out; e != 0; e = e->next_out)
+ {
+ ddg_node_ptr v_node = e->dest;
- }
- else if (dump_file)
- fprintf (dump_file, "the node is not scheduled\n");
+ if (TEST_BIT (sched_nodes, v_node->cuid))
+ {
+ int s_st = SCHED_TIME (v_node);
+ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
+ int latest = s_st - e->latency + (e->distance * ii);
- }
- start = late_start;
- end = MAX (end, late_start - ii);
- step = -1;
+ if (dump_file)
+ {
+ fprintf (dump_file, "%11d %11s %11d %11s %5d",
+ earliest, "", latest, "", s_st);
+ print_ddg_edge (dump_file, e);
+ fprintf (dump_file, "\n");
+ }
- if (dump_file)
- fprintf (dump_file,
- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
- u_node->cuid, INSN_UID (u_node->insn), start, end, step);
+ start = MAX (start, earliest);
+ late_start = MIN (late_start, latest);
- }
+ if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+ count_succs++;
+ }
+ }
- else if (psp_not_empty && pss_not_empty)
+ if (dump_file && (psp_not_empty || pss_not_empty))
{
- int early_start = INT_MIN;
- int late_start = INT_MAX;
-
- start = INT_MIN;
- end = INT_MAX;
- for (e = u_node->in; e != 0; e = e->next_in)
- {
- ddg_node_ptr v_node = e->src;
+ fprintf (dump_file, "----------- ----------- ----------- -----------"
+ " -----\n");
+ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
+ start, early_start, late_start, end, "",
+ "(max, max, min, min)");
+ }
- if (dump_file)
- {
- fprintf (dump_file, "\nProcessing edge:");
- print_ddg_edge (dump_file, e);
- fprintf (dump_file,
- "\nScheduling %d (%d) in psp_pss_not_empty,"
- " checking p %d (%d): ", u_node->cuid, INSN_UID
- (u_node->insn), v_node->cuid, INSN_UID
- (v_node->insn));
- }
+ /* Get a target scheduling window no bigger than ii. */
+ if (early_start == INT_MIN && late_start == INT_MAX)
+ early_start = SCHED_ASAP (u_node);
+ else if (early_start == INT_MIN)
+ early_start = late_start - (ii - 1);
+ late_start = MIN (late_start, early_start + (ii - 1));
- if (TEST_BIT (sched_nodes, v_node->cuid))
- {
- early_start = MAX (early_start,
- SCHED_TIME (v_node) + e->latency
- - (e->distance * ii));
- if (e->data_type == MEM_DEP)
- end = MIN (end, SCHED_TIME (v_node) + ii - 1);
- }
- }
- for (e = u_node->out; e != 0; e = e->next_out)
- {
- ddg_node_ptr v_node = e->dest;
+ /* Apply memory dependence limits. */
+ start = MAX (start, early_start);
+ end = MIN (end, late_start);
- if (dump_file)
- {
- fprintf (dump_file, "\nProcessing edge:");
- print_ddg_edge (dump_file, e);
- fprintf (dump_file,
- "\nScheduling %d (%d) in psp_pss_not_empty,"
- " checking s %d (%d): ", u_node->cuid, INSN_UID
- (u_node->insn), v_node->cuid, INSN_UID
- (v_node->insn));
- }
+ if (dump_file && (psp_not_empty || pss_not_empty))
+ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
+ "", start, end, "", "");
- if (TEST_BIT (sched_nodes, v_node->cuid))
- {
- late_start = MIN (late_start,
- SCHED_TIME (v_node) - e->latency
- + (e->distance * ii));
- if (e->data_type == MEM_DEP)
- start = MAX (start, SCHED_TIME (v_node) - ii + 1);
- }
- }
- start = MAX (start, early_start);
- end = MIN (end, MIN (early_start + ii, late_start + 1));
- step = 1;
- }
- else /* psp is empty && pss is empty. */
+ /* If there are at least as many successors as predecessors, schedule the
+ node close to its successors. */
+ if (pss_not_empty && count_succs >= count_preds)
{
- start = SCHED_ASAP (u_node);
- end = start + ii;
- step = 1;
+ int tmp = end;
+ end = start;
+ start = tmp;
+ step = -1;
}
+ /* Now that we've finalized the window, make END an exclusive rather
+ than an inclusive bound. */
+ end += step;
+
*start_p = start;
*step_p = step;
*end_p = end;
if (dump_file)
fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
start, end, step);
- return -1;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
+ node currently been scheduled. At the end of the calculation
+ MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
+ U_NODE which are (1) already scheduled in the first/last row of
+ U_NODE's scheduling window, (2) whose dependence inequality with U
+ becomes an equality when U is scheduled in this same row, and (3)
+ whose dependence latency is zero.
+
+ The first and last rows are calculated using the following parameters:
+ START/END rows - The cycles that begins/ends the traversal on the window;
+ searching for an empty cycle to schedule U_NODE.
+ STEP - The direction in which we traverse the window.
+ II - The initiation interval. */
+
+static void
+calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
+ int step, int ii, sbitmap sched_nodes,
+ sbitmap must_precede, sbitmap must_follow)
+{
+ ddg_edge_ptr e;
+ int first_cycle_in_window, last_cycle_in_window;
+
+ gcc_assert (must_precede && must_follow);
+
+ /* Consider the following scheduling window:
+ {first_cycle_in_window, first_cycle_in_window+1, ...,
+ last_cycle_in_window}. If step is 1 then the following will be
+ the order we traverse the window: {start=first_cycle_in_window,
+ first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
+ or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
+ end=first_cycle_in_window-1} if step is -1. */
+ first_cycle_in_window = (step == 1) ? start : end - step;
+ last_cycle_in_window = (step == 1) ? end - step : start;
+
+ sbitmap_zero (must_precede);
+ sbitmap_zero (must_follow);
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_precede: ");
+
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
+ && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
+ first_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->src) - (e->distance * ii) <=
+ SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
+ first_cycle_in_window
+ and check only if
+ SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
+ for (e = u_node->in; e != 0; e = e->next_in)
+ if (TEST_BIT (sched_nodes, e->src->cuid)
+ && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+ first_cycle_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->src->cuid);
+
+ SET_BIT (must_precede, e->src->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\nmust_follow: ");
+
+ /* Instead of checking if:
+ (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
+ && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
+ last_cycle_in_window)
+ && e->latency == 0
+ we use the fact that latency is non-negative:
+ SCHED_TIME (e->dest) + (e->distance * ii) >=
+ SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
+ last_cycle_in_window
+ and check only if
+ SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
+ for (e = u_node->out; e != 0; e = e->next_out)
+ if (TEST_BIT (sched_nodes, e->dest->cuid)
+ && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+ last_cycle_in_window))
+ {
+ if (dump_file)
+ fprintf (dump_file, "%d ", e->dest->cuid);
+
+ SET_BIT (must_follow, e->dest->cuid);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\n");
+}
+
+/* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
+ parameters to decide if that's possible:
+ PS - The partial schedule.
+ U - The serial number of U_NODE.
+ NUM_SPLITS - The number of row splits made so far.
+ MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
+ the first row of the scheduling window)
+ MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
+ last row of the scheduling window) */
+
+static bool
+try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
+ int u, int cycle, sbitmap sched_nodes,
+ int *num_splits, sbitmap must_precede,
+ sbitmap must_follow)
+{
+ ps_insn_ptr psi;
+ bool success = 0;
+
+ verify_partial_schedule (ps, sched_nodes);
+ psi = ps_add_node_check_conflicts (ps, u_node, cycle,
+ must_precede, must_follow);
+ if (psi)
+ {
+ SCHED_TIME (u_node) = cycle;
+ SET_BIT (sched_nodes, u);
+ success = 1;
+ *num_splits = 0;
+ if (dump_file)
+ fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
+
}
- return 0;
+ return success;
}
/* This function implements the scheduling algorithm for SMS according to the
int i, c, success, num_splits = 0;
int flush_and_start_over = true;
int num_nodes = g->num_nodes;
- ddg_edge_ptr e;
- ps_insn_ptr psi;
int start, end, step; /* Place together into one struct? */
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
sbitmap must_precede = sbitmap_alloc (num_nodes);
ddg_node_ptr u_node = &ps->g->nodes[u];
rtx insn = u_node->insn;
- if (!INSN_P (insn))
- {
- RESET_BIT (tobe_scheduled, u);
- continue;
- }
-
- if (JUMP_P (insn)) /* Closing branch handled later. */
+ if (!NONDEBUG_INSN_P (insn))
{
RESET_BIT (tobe_scheduled, u);
continue;
/* Try to get non-empty scheduling window. */
success = 0;
- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
&step, &end) == 0)
{
if (dump_file)
- fprintf (dump_file, "\nTrying to schedule node %d \
- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
(g->nodes[u].insn)), start, end, step);
- /* Use must_follow & must_precede bitmaps to determine order
- of nodes within the cycle. */
-
- /* use must_follow & must_precede bitmaps to determine order
- of nodes within the cycle. */
- sbitmap_zero (must_precede);
- sbitmap_zero (must_follow);
- /* TODO: We can add an insn to the must_precede or must_follow
- bitmaps only if it has tight dependence to U and they
- both scheduled in the same row. The current check is less
- conservative and content with the fact that both U and the
- insn are scheduled in the same row. */
- for (e = u_node->in; e != 0; e = e->next_in)
- if (TEST_BIT (sched_nodes, e->src->cuid)
- && (SMODULO (SCHED_TIME (e->src), ii) ==
- SMODULO (start, ii)))
- SET_BIT (must_precede, e->src->cuid);
-
- for (e = u_node->out; e != 0; e = e->next_out)
- if (TEST_BIT (sched_nodes, e->dest->cuid)
- && (SMODULO (SCHED_TIME (e->dest), ii) ==
- SMODULO ((end - step), ii)))
- SET_BIT (must_follow, e->dest->cuid);
gcc_assert ((step > 0 && start < end)
|| (step < 0 && start > end));
+ calculate_must_precede_follow (u_node, start, end, step, ii,
+ sched_nodes, must_precede,
+ must_follow);
+
for (c = start; c != end; c += step)
{
- verify_partial_schedule (ps, sched_nodes);
-
- psi = ps_add_node_check_conflicts (ps, u_node, c,
- must_precede,
- must_follow);
-
- if (psi)
- {
- SCHED_TIME (u_node) = c;
- SET_BIT (sched_nodes, u);
- success = 1;
- num_splits = 0;
- if (dump_file)
- fprintf (dump_file, "Scheduled w/o split in %d\n", c);
-
- break;
- }
+ sbitmap tmp_precede, tmp_follow;
+
+ set_must_precede_follow (&tmp_follow, must_follow,
+ &tmp_precede, must_precede,
+ c, start, end, step);
+ success =
+ try_scheduling_node_in_cycle (ps, u_node, u, c,
+ sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ if (success)
+ break;
}
+
verify_partial_schedule (ps, sched_nodes);
}
if (!success)
}
num_splits++;
+ /* The scheduling window is exclusive of 'end'
+ whereas compute_split_window() expects an inclusive,
+ ordered range. */
if (step == 1)
- split_row = compute_split_row (sched_nodes, start, end,
+ split_row = compute_split_row (sched_nodes, start, end - 1,
ps->ii, u_node);
else
- split_row = compute_split_row (sched_nodes, end, start,
+ split_row = compute_split_row (sched_nodes, end + 1, start,
ps->ii, u_node);
ps_insert_empty_row (ps, split_row, sched_nodes);
int ii = ps->ii;
int new_ii = ii + 1;
int row;
+ int *rows_length_new;
verify_partial_schedule (ps, sched_nodes);
if (dump_file)
fprintf (dump_file, "split_row=%d\n", split_row);
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, ps->min_cycle);
+ reset_sched_times (ps, PS_MIN_CYCLE (ps));
+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
+ rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
for (row = 0; row < split_row; row++)
{
rows_new[row] = ps->rows[row];
+ rows_length_new[row] = ps->rows_length[row];
ps->rows[row] = NULL;
for (crr_insn = rows_new[row];
crr_insn; crr_insn = crr_insn->next_in_row)
for (row = split_row; row < ii; row++)
{
rows_new[row + 1] = ps->rows[row];
+ rows_length_new[row + 1] = ps->rows_length[row];
ps->rows[row] = NULL;
for (crr_insn = rows_new[row + 1];
crr_insn; crr_insn = crr_insn->next_in_row)
+ (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
free (ps->rows);
ps->rows = rows_new;
+ free (ps->rows_length);
+ ps->rows_length = rows_length_new;
ps->ii = new_ii;
gcc_assert (ps->min_cycle >= 0);
/* Given U_NODE which is the node that failed to be scheduled; LOW and
UP which are the boundaries of it's scheduling window; compute using
- SCHED_NODES and II a row in the partial schedule that can be splitted
+ SCHED_NODES and II a row in the partial schedule that can be split
which will separate a critical predecessor from a critical successor
thereby expanding the window, and return it. */
static int
ps_insn_ptr crr_insn;
for (row = 0; row < ps->ii; row++)
- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
- {
- ddg_node_ptr u = crr_insn->node;
-
- gcc_assert (TEST_BIT (sched_nodes, u->cuid));
- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
- popcount (sched_nodes) == number of insns in ps. */
- gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
- gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- }
+ {
+ int length = 0;
+
+ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+ {
+ ddg_node_ptr u = crr_insn->node;
+
+ length++;
+ gcc_assert (TEST_BIT (sched_nodes, u->cuid));
+ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+ popcount (sched_nodes) == number of insns in ps. */
+ gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+ gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+ }
+
+ gcc_assert (ps->rows_length[row] == length);
+ }
}
\f
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
-static nopa calculate_order_params (ddg_ptr, int mii);
+static nopa calculate_order_params (ddg_ptr, int, int *);
static int find_max_asap (ddg_ptr, sbitmap);
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
sbitmap_zero (tmp);
+ if (dump_file)
+ fprintf (dump_file, "SMS final nodes order: \n");
+
for (i = 0; i < num_nodes; i++)
{
int u = node_order[i];
+ if (dump_file)
+ fprintf (dump_file, "%d ", u);
gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
SET_BIT (tmp, u);
}
+ if (dump_file)
+ fprintf (dump_file, "\n");
+
sbitmap_free (tmp);
}
/* Order the nodes of G for scheduling and pass the result in
NODE_ORDER. Also set aux.count of each node to ASAP.
- Return the recMII for the given DDG. */
+ Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
static int
-sms_order_nodes (ddg_ptr g, int mii, int * node_order)
+sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
{
int i;
int rec_mii = 0;
ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
- nopa nops = calculate_order_params (g, mii);
+ nopa nops = calculate_order_params (g, mii, pmax_asap);
if (dump_file)
print_sccs (dump_file, sccs, g);
sbitmap_zero (prev_sccs);
sbitmap_ones (ones);
- /* Perfrom the node ordering starting from the SCC with the highest recMII.
+ /* Perform the node ordering starting from the SCC with the highest recMII.
For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
for (i = 0; i < all_sccs->num_sccs; i++)
{
/* MII is needed if we consider backarcs (that do not close recursive cycles). */
static struct node_order_params *
-calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
+calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
{
int u;
int max_asap;
HEIGHT (e->dest) + e->latency);
}
}
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nOrder params\n");
+ for (u = 0; u < num_nodes; u++)
+ {
+ ddg_node_ptr u_node = &g->nodes[u];
+ fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
+ ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
+ }
+ }
+
+ *pmax_asap = max_asap;
return node_order_params_arr;
}
{
partial_schedule_ptr ps = XNEW (struct partial_schedule);
ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
+ ps->rows_length = (int *) xcalloc (ii, sizeof (int));
ps->ii = ii;
ps->history = history;
ps->min_cycle = INT_MAX;
return;
free_ps_insns (ps);
free (ps->rows);
+ free (ps->rows_length);
free (ps);
}
ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
* sizeof (ps_insn_ptr));
memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
+ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
+ memset (ps->rows_length, 0, new_ii * sizeof (int));
ps->ii = new_ii;
ps->min_cycle = INT_MAX;
ps->max_cycle = INT_MIN;
{
ps_insn_ptr ps_i = ps->rows[i];
- fprintf (dump, "\n[CYCLE %d ]: ", i);
+ fprintf (dump, "\n[ROW %d ]: ", i);
while (ps_i)
{
- fprintf (dump, "%d, ",
- INSN_UID (ps_i->node->insn));
+ if (JUMP_P (ps_i->node->insn))
+ fprintf (dump, "%d (branch), ",
+ INSN_UID (ps_i->node->insn));
+ else
+ fprintf (dump, "%d, ",
+ INSN_UID (ps_i->node->insn));
+
ps_i = ps_i->next_in_row;
}
}
/* Creates an object of PS_INSN and initializes it to the given parameters. */
static ps_insn_ptr
-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
+create_ps_insn (ddg_node_ptr node, int cycle)
{
ps_insn_ptr ps_i = XNEW (struct ps_insn);
ps_i->node = node;
ps_i->next_in_row = NULL;
ps_i->prev_in_row = NULL;
- ps_i->row_rest_count = rest_count;
ps_i->cycle = cycle;
return ps_i;
if (ps_i->next_in_row)
ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
}
+
+ ps->rows_length[row] -= 1;
free (ps_i);
return true;
}
ps_insn_ptr next_ps_i;
ps_insn_ptr first_must_follow = NULL;
ps_insn_ptr last_must_precede = NULL;
+ ps_insn_ptr last_in_row = NULL;
int row;
if (! ps_i)
next_ps_i;
next_ps_i = next_ps_i->next_in_row)
{
- if (TEST_BIT (must_follow, next_ps_i->node->cuid)
+ if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
&& ! first_must_follow)
first_must_follow = next_ps_i;
- if (TEST_BIT (must_precede, next_ps_i->node->cuid))
+ if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
{
/* If we have already met a node that must follow, then
there is no possible column. */
else
last_must_precede = next_ps_i;
}
+ /* The closing branch must be the last in the row. */
+ if (must_precede
+ && TEST_BIT (must_precede, next_ps_i->node->cuid)
+ && JUMP_P (next_ps_i->node->insn))
+ return false;
+
+ last_in_row = next_ps_i;
}
+ /* The closing branch is scheduled as well. Make sure there is no
+ dependent instruction after it as the branch should be the last
+ instruction in the row. */
+ if (JUMP_P (ps_i->node->insn))
+ {
+ if (first_must_follow)
+ return false;
+ if (last_in_row)
+ {
+ /* Make the branch the last in the row. New instructions
+ will be inserted at the beginning of the row or after the
+ last must_precede instruction thus the branch is guaranteed
+ to remain the last instruction in the row. */
+ last_in_row->next_in_row = ps_i;
+ ps_i->prev_in_row = last_in_row;
+ ps_i->next_in_row = NULL;
+ }
+ else
+ ps->rows[row] = ps_i;
+ return true;
+ }
+
/* Now insert the node after INSERT_AFTER_PSI. */
if (! last_must_precede)
}
/* Advances the PS_INSN one column in its current row; returns false
- in failure and true in success. Bit N is set in MUST_FOLLOW if
- the node with cuid N must be come after the node pointed to by
+ in failure and true in success. Bit N is set in MUST_FOLLOW if
+ the node with cuid N must be come after the node pointed to by
PS_I when scheduled in the same cycle. */
static int
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
/* Check if next_in_row is dependent on ps_i, both having same sched
times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
- if (TEST_BIT (must_follow, next_node->cuid))
+ if (must_follow && TEST_BIT (must_follow, next_node->cuid))
return false;
/* Advance PS_I over its next_in_row in the doubly linked list. */
}
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
- Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
- set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
- before/after (respectively) the node pointed to by PS_I when scheduled
+ Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
+ set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
+ before/after (respectively) the node pointed to by PS_I when scheduled
in the same cycle. */
static ps_insn_ptr
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
sbitmap must_precede, sbitmap must_follow)
{
ps_insn_ptr ps_i;
- int rest_count = 1;
int row = SMODULO (cycle, ps->ii);
- if (ps->rows[row]
- && ps->rows[row]->row_rest_count >= issue_rate)
+ if (ps->rows_length[row] >= issue_rate)
return NULL;
- if (ps->rows[row])
- rest_count += ps->rows[row]->row_rest_count;
-
- ps_i = create_ps_insn (node, rest_count, cycle);
+ ps_i = create_ps_insn (node, cycle);
/* Finds and inserts PS_I according to MUST_FOLLOW and
MUST_PRECEDE. */
return NULL;
}
+ ps->rows_length[row] += 1;
return ps_i;
}
{
rtx insn = crr_insn->node->insn;
- if (!INSN_P (insn))
+ if (!NONDEBUG_INSN_P (insn))
continue;
/* Check if there is room for the current insn. */
return true;
/* Update the DFA state and return with failure if the DFA found
- recource conflicts. */
+ resource conflicts. */
if (state_transition (curr_state, insn) >= 0)
return true;
/* Checks if the given node causes resource conflicts when added to PS at
cycle C. If not the node is added to PS and returned; otherwise zero
- is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
- cuid N must be come before/after (respectively) the node pointed to by
+ is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
+ cuid N must be come before/after (respectively) the node pointed to by
PS_I when scheduled in the same cycle. */
ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
return ps_i;
}
+/* Calculate the stage count of the partial schedule PS. The calculation
+ takes into account the rotation amount passed in ROTATION_AMOUNT. */
+int
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
+{
+ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
+ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
+ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
+
+ /* The calculation of stage count is done adding the number of stages
+ before cycle zero and after cycle zero. */
+ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
+
+ return stage_count;
+}
+
/* Rotate the rows of PS such that insns scheduled at time
START_CYCLE will appear in row 0. Updates max/min_cycles. */
void
for (i = 0; i < backward_rotates; i++)
{
ps_insn_ptr first_row = ps->rows[0];
+ int first_row_length = ps->rows_length[0];
for (row = 0; row < last_row; row++)
- ps->rows[row] = ps->rows[row+1];
+ {
+ ps->rows[row] = ps->rows[row + 1];
+ ps->rows_length[row] = ps->rows_length[row + 1];
+ }
ps->rows[last_row] = first_row;
+ ps->rows_length[last_row] = first_row_length;
}
ps->max_cycle -= start_cycle;
return 0;
}
-struct tree_opt_pass pass_sms =
+struct rtl_opt_pass pass_sms =
{
+ {
+ RTL_PASS,
"sms", /* name */
gate_handle_sms, /* gate */
rest_of_handle_sms, /* execute */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
- TODO_dump_func, /* todo_flags_start */
- TODO_df_finish | TODO_verify_rtl_sharing |
- TODO_dump_func |
- TODO_ggc_collect, /* todo_flags_finish */
- 'm' /* letter */
+ 0, /* todo_flags_start */
+ TODO_df_finish
+ | TODO_verify_flow
+ | TODO_verify_rtl_sharing
+ | TODO_ggc_collect /* todo_flags_finish */
+ }
};
-