broken by
6. choose insn with the least dependences upon the previously
scheduled insn, or finally
- 7. choose insn with lowest UID.
+ 7 choose the insn which has the most insns dependent on it.
+ 8. choose insn with lowest UID.
Memory references complicate matters. Only if we can be certain
that memory references are not part of the data dependency graph
#include "insn-config.h"
#include "insn-attr.h"
#include "except.h"
+#include "toplev.h"
extern char *reg_known_equiv_p;
extern rtx *reg_known_value;
#ifdef INSN_SCHEDULING
-/* enable interblock scheduling code */
-
-/* define INTERBLOCK_DEBUG for using the -fsched-max debugging facility */
-/* #define INTERBLOCK_DEBUG */
-
/* target_units bitmask has 1 for each unit in the cpu. It should be
possible to compute this variable from the machine description.
But currently it is computed by examinning the insn list. Since
#define ISSUE_RATE 1
#endif
-/* sched_debug_count is used for debugging the scheduler by limiting
- the number of scheduled insns. It is controlled by the option
- -fsched-max-N (N is a number).
-
- sched-verbose controls the amount of debugging output the
+/* sched-verbose controls the amount of debugging output the
scheduler prints. It is controlled by -fsched-verbose-N:
N>0 and no -DSR : the output is directed to stderr.
N>=10 will direct the printouts to stderr (regardless of -dSR).
N=1: same as -dSR.
N=2: bb's probabilities, detailed ready list info, unit/insn info.
N=3: rtl at abort point, control-flow, regions info.
- N=5: dependences info.
-
- max_rgn_blocks and max_region_insns limit region size for
- interblock scheduling. They are controlled by
- -fsched-interblock-max-blocks-N, -fsched-interblock-max-insns-N */
+ N=5: dependences info. */
#define MAX_RGN_BLOCKS 10
#define MAX_RGN_INSNS 100
-static int sched_debug_count = -1;
static int sched_verbose_param = 0;
static int sched_verbose = 0;
-static int max_rgn_blocks = MAX_RGN_BLOCKS;
-static int max_rgn_insns = MAX_RGN_INSNS;
/* nr_inter/spec counts interblock/speculative motion for the function */
static int nr_inter, nr_spec;
fix_sched_param (param, val)
char *param, *val;
{
- if (!strcmp (param, "max"))
- sched_debug_count = ((sched_debug_count == -1) ?
- atoi (val) : sched_debug_count);
- else if (!strcmp (param, "verbose"))
+ if (!strcmp (param, "verbose"))
sched_verbose_param = atoi (val);
- else if (!strcmp (param, "interblock-max-blocks"))
- max_rgn_blocks = atoi (val);
- else if (!strcmp (param, "interblock-max-insns"))
- max_rgn_insns = atoi (val);
else
warning ("fix_sched_param: unknown param: %s", param);
}
The transition (R->S) is implemented in the scheduling loop in
`schedule_block' when the best insn to schedule is chosen.
The transition (R->Q) is implemented in `queue_insn' when an
- insn is found to to have a function unit conflict with the already
+ insn is found to have a function unit conflict with the already
committed insns.
The transitions (P->R and P->Q) are implemented in `schedule_insn' as
insns move from the ready list to the scheduled list.
static void new_insn_dead_notes PROTO ((rtx, rtx, rtx, rtx));
static void update_n_sets PROTO ((rtx, int));
static void update_flow_info PROTO ((rtx, rtx, rtx, rtx));
+static char *safe_concat PROTO ((char *, char *, char *));
/* Main entry point of this file. */
void schedule_insns PROTO ((FILE *));
static int is_cfg_nonregular PROTO ((void));
-void debug_control_flow PROTO ((void));
-static int build_control_flow PROTO ((void));
+static int build_control_flow PROTO ((int_list_ptr *, int_list_ptr *,
+ int *, int *));
static void new_edge PROTO ((int, int));
void debug_regions PROTO ((void));
static void find_single_block_region PROTO ((void));
-static void find_rgns PROTO ((void));
+static void find_rgns PROTO ((int_list_ptr *, int_list_ptr *,
+ int *, int *, sbitmap *));
static int too_large PROTO ((int, int *, int *));
extern void debug_live PROTO ((int, int));
static int is_conditionally_protected PROTO ((rtx, int, int));
static int may_trap_exp PROTO ((rtx, int));
static int haifa_classify_insn PROTO ((rtx));
+static int is_prisky PROTO ((rtx, int, int));
static int is_exception_free PROTO ((rtx, int, int));
static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
static void find_pre_sched_live PROTO ((int));
static void find_post_sched_live PROTO ((int));
static void update_reg_usage PROTO ((void));
+static int queue_to_ready PROTO ((rtx [], int));
void debug_ready_list PROTO ((rtx[], int));
static void init_target_units PROTO (());
*listp = 0;
}
-rtx
+static rtx
alloc_INSN_LIST (val, next)
rtx val, next;
{
return r;
}
-rtx
+static rtx
alloc_EXPR_LIST (kind, val, next)
int kind;
rtx val, next;
#define __inline
#endif
+#ifndef HAIFA_INLINE
+#define HAIFA_INLINE __inline
+#endif
+
/* Computation of memory dependencies. */
/* The *_insns and *_mems are paired lists. Each pending memory operation
return 0;
}
-/* Print the control flow graph, for debugging purposes.
- Callable from the debugger. */
-
-void
-debug_control_flow ()
-{
- int i, e, next;
-
- fprintf (dump, ";; --------- CONTROL FLOW GRAPH --------- \n\n");
-
- for (i = 0; i < n_basic_blocks; i++)
- {
- fprintf (dump, ";;\tBasic block %d: first insn %d, last %d.\n",
- i,
- INSN_UID (basic_block_head[i]),
- INSN_UID (basic_block_end[i]));
-
- fprintf (dump, ";;\tPredecessor blocks:");
- for (e = IN_EDGES (i); e; e = next)
- {
- fprintf (dump, " %d", FROM_BLOCK (e));
-
- next = NEXT_IN (e);
-
- if (next == IN_EDGES (i))
- break;
- }
-
- fprintf (dump, "\n;;\tSuccesor blocks:");
- for (e = OUT_EDGES (i); e; e = next)
- {
- fprintf (dump, " %d", TO_BLOCK (e));
-
- next = NEXT_OUT (e);
-
- if (next == OUT_EDGES (i))
- break;
- }
-
- fprintf (dump, " \n\n");
-
- }
-}
-
-
/* Build the control flow graph and set nr_edges.
Instead of trying to build a cfg ourselves, we rely on flow to
prevent cross block scheduling. */
static int
-build_control_flow ()
+build_control_flow (s_preds, s_succs, num_preds, num_succs)
+ int_list_ptr *s_preds;
+ int_list_ptr *s_succs;
+ int *num_preds;
+ int *num_succs;
{
int i;
- int_list_ptr *s_preds;
- int_list_ptr *s_succs;
int_list_ptr succ;
- int *num_preds;
- int *num_succs;
int unreachable;
- /* The scheduler runs after flow; therefore, we can't blindly call
- back into find_basic_blocks since doing so could invalidate the
- info in basic_block_live_at_start.
-
- Consider a block consisting entirely of dead stores; after life
- analysis it would be a block of NOTE_INSN_DELETED notes. If
- we call find_basic_blocks again, then the block would be removed
- entirely and invalidate our the register live information.
-
- We could (should?) recompute register live information. Doing
- so may even be beneficial. */
- s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
- s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
- num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
- num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
- compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
-
/* Count the number of edges in the cfg. */
nr_edges = 0;
unreachable = 0;
for (i = 0; i < n_basic_blocks; i++)
{
nr_edges += num_succs[i];
- if (num_preds[i] == 0)
+
+ /* Unreachable loops with more than one basic block are detected
+ during the DFS traversal in find_rgns.
+
+ Unreachable loops with a single block are detected here. This
+ test is redundant with the one in find_rgns, but it's much
+ cheaper to go ahead and catch the trivial case here. */
+ if (num_preds[i] == 0
+ || (num_preds[i] == 1 && INT_LIST_VAL (s_preds[i]) == i))
unreachable = 1;
}
/* increment by 1, since edge 0 is unused. */
nr_edges++;
- /* For now. This will move as more and more of haifa is converted
- to using the cfg code in flow.c */
- free_bb_mem ();
return unreachable;
}
(*num_bbs)++;
(*num_insns) += (INSN_LUID (basic_block_end[block]) -
INSN_LUID (basic_block_head[block]));
- if ((*num_bbs > max_rgn_blocks) || (*num_insns > max_rgn_insns))
+ if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
return 1;
else
return 0;
if (max_hdr[blk] == -1) \
max_hdr[blk] = hdr; \
else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
- inner[hdr] = 0; \
+ RESET_BIT (inner, hdr); \
else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
{ \
- inner[max_hdr[blk]] = 0; \
+ RESET_BIT (inner,max_hdr[blk]); \
max_hdr[blk] = hdr; \
} \
}
-/* Find regions for interblock scheduling: a loop-free procedure, a reducible
- inner loop, or a basic block not contained in any other region.
- The procedures control flow graph is traversed twice.
- First traversal, a DFS, finds the headers of inner loops in the graph,
- and verifies that there are no unreacable blocks.
- Second traversal processes headers of inner loops, checking that the
- loop is reducible. The loop blocks that form a region are put into the
- region's blocks list in topological order.
+/* Find regions for interblock scheduling.
+
+ A region for scheduling can be:
+
+ * A loop-free procedure, or
- The following variables are changed by the function: rgn_nr, rgn_table,
- rgn_bb_table, block_to_bb and containing_rgn. */
+ * A reducible inner loop, or
+
+ * A basic block not contained in any other region.
+
+
+ ?!? In theory we could build other regions based on extended basic
+ blocks or reverse extended basic blocks. Is it worth the trouble?
+
+ Loop blocks that form a region are put into the region's block list
+ in topological order.
+
+ This procedure stores its results into the following global (ick) variables
+
+ * rgn_nr
+ * rgn_table
+ * rgn_bb_table
+ * block_to_bb
+ * containing region
+
+
+ We use dominator relationships to avoid making regions out of non-reducible
+ loops.
+
+ This procedure needs to be converted to work on pred/succ lists instead
+ of edge tables. That would simplify it somewhat. */
static void
-find_rgns ()
+find_rgns (s_preds, s_succs, num_preds, num_succs, dom)
+ int_list_ptr *s_preds;
+ int_list_ptr *s_succs;
+ int *num_preds;
+ int *num_succs;
+ sbitmap *dom;
{
int *max_hdr, *dfs_nr, *stack, *queue, *degree;
- char *header, *inner, *passed, *in_stack, *in_queue, no_loops = 1;
- int node, child, loop_head, i, j, fst_edge, head, tail;
+ char no_loops = 1;
+ int node, child, loop_head, i, head, tail;
int count = 0, sp, idx = 0, current_edge = out_edges[0];
- int num_bbs, num_insns;
+ int num_bbs, num_insns, unreachable;
int too_large_failure;
- /*
- The following data structures are computed by the first traversal and
- are used by the second traversal:
- header[i] - flag set if the block i is the header of a loop.
- inner[i] - initially set. It is reset if the the block i is the header
- of a non-inner loop.
- max_hdr[i] - the header of the inner loop containing block i.
- (for a block i not in an inner loop it may be -1 or the
- header of the most inner loop containing the block).
-
- These data structures are used by the first traversal only:
- stack - non-recursive DFS implementation which uses a stack of edges.
- sp - top of the stack of edges
- dfs_nr[i] - the DFS ordering of block i.
- in_stack[i] - flag set if the block i is in the DFS stack.
-
- These data structures are used by the second traversal only:
- queue - queue containing the blocks of the current region.
- head and tail - queue boundaries.
- in_queue[i] - flag set if the block i is in queue */
-
- /* function's inner arrays allocation and initialization */
+ /* Note if an edge has been passed. */
+ sbitmap passed;
+
+ /* Note if a block is a natural loop header. */
+ sbitmap header;
+
+ /* Note if a block is an natural inner loop header. */
+ sbitmap inner;
+
+ /* Note if a block is in the block queue. */
+ sbitmap in_queue;
+
+ /* Note if a block is in the block queue. */
+ sbitmap in_stack;
+
+ /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
+ and a mapping from block to its loop header (if the block is contained
+ in a loop, else -1).
+
+ Store results in HEADER, INNER, and MAX_HDR respectively, these will
+ be used as inputs to the second traversal.
+
+ STACK, SP and DFS_NR are only used during the first traversal. */
+
+ /* Allocate and initialize variables for the first traversal. */
max_hdr = (int *) alloca (n_basic_blocks * sizeof (int));
dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int));
bzero ((char *) dfs_nr, n_basic_blocks * sizeof (int));
stack = (int *) alloca (nr_edges * sizeof (int));
- queue = (int *) alloca (n_basic_blocks * sizeof (int));
- inner = (char *) alloca (n_basic_blocks * sizeof (char));
- header = (char *) alloca (n_basic_blocks * sizeof (char));
- bzero ((char *) header, n_basic_blocks * sizeof (char));
- passed = (char *) alloca (nr_edges * sizeof (char));
- bzero ((char *) passed, nr_edges * sizeof (char));
- in_stack = (char *) alloca (nr_edges * sizeof (char));
- bzero ((char *) in_stack, nr_edges * sizeof (char));
+ inner = sbitmap_alloc (n_basic_blocks);
+ sbitmap_ones (inner);
+
+ header = sbitmap_alloc (n_basic_blocks);
+ sbitmap_zero (header);
+
+ passed = sbitmap_alloc (nr_edges);
+ sbitmap_zero (passed);
+
+ in_queue = sbitmap_alloc (n_basic_blocks);
+ sbitmap_zero (in_queue);
- in_queue = (char *) alloca (n_basic_blocks * sizeof (char));
+ in_stack = sbitmap_alloc (n_basic_blocks);
+ sbitmap_zero (in_stack);
for (i = 0; i < n_basic_blocks; i++)
- {
- inner[i] = 1;
- max_hdr[i] = -1;
- }
+ max_hdr[i] = -1;
- /* First traversal: DFS, finds inner loops in control flow graph */
+ /* DFS traversal to find inner loops in the cfg. */
sp = -1;
while (1)
{
- if (current_edge == 0 || passed[current_edge])
+ if (current_edge == 0 || TEST_BIT (passed, current_edge))
{
- /* Here, if current_edge < 0, this is a leaf block.
- Otherwise current_edge was already passed. Note that in
- the latter case, not only current_edge but also all its
- NEXT_OUT edges are also passed. We have to "climb up on
- edges in the stack", looking for the first (already
- passed) edge whose NEXT_OUT was not passed yet. */
-
- while (sp >= 0 && (current_edge == 0 || passed[current_edge]))
+ /* We have reached a leaf node or a node that was already
+ processed. Pop edges off the stack until we find
+ an edge that has not yet been processed. */
+ while (sp >= 0
+ && (current_edge == 0 || TEST_BIT (passed, current_edge)))
{
+ /* Pop entry off the stack. */
current_edge = stack[sp--];
node = FROM_BLOCK (current_edge);
child = TO_BLOCK (current_edge);
- in_stack[child] = 0;
- if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+ RESET_BIT (in_stack, child);
+ if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
current_edge = NEXT_OUT (current_edge);
}
- /* stack empty - the whole graph is traversed. */
- if (sp < 0 && passed[current_edge])
+ /* See if have finished the DFS tree traversal. */
+ if (sp < 0 && TEST_BIT (passed, current_edge))
break;
+
+ /* Nope, continue the traversal with the popped node. */
continue;
}
+ /* Process a node. */
node = FROM_BLOCK (current_edge);
- dfs_nr[node] = ++count;
- in_stack[node] = 1;
child = TO_BLOCK (current_edge);
+ SET_BIT (in_stack, node);
+ dfs_nr[node] = ++count;
- /* found a loop header */
- if (in_stack[child])
+ /* If the successor is in the stack, then we've found a loop.
+ Mark the loop, if it is not a natural loop, then it will
+ be rejected during the second traversal. */
+ if (TEST_BIT (in_stack, child))
{
no_loops = 0;
- header[child] = 1;
- max_hdr[child] = child;
+ SET_BIT (header, child);
UPDATE_LOOP_RELATIONS (node, child);
- passed[current_edge] = 1;
+ SET_BIT (passed, current_edge);
current_edge = NEXT_OUT (current_edge);
continue;
}
- /* the child was already visited once, no need to go down from
- it, everything is traversed there. */
+ /* If the child was already visited, then there is no need to visit
+ it again. Just update the loop relationships and restart
+ with a new edge. */
if (dfs_nr[child])
{
- if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+ if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
- passed[current_edge] = 1;
+ SET_BIT (passed, current_edge);
current_edge = NEXT_OUT (current_edge);
continue;
}
- /* this is a step down in the dfs traversal */
+ /* Push an entry on the stack and continue DFS traversal. */
stack[++sp] = current_edge;
- passed[current_edge] = 1;
+ SET_BIT (passed, current_edge);
current_edge = OUT_EDGES (child);
- } /* while (1); */
-
- /* Second travsersal: find reducible inner loops, and sort
- topologically the blocks of each region */
- degree = dfs_nr; /* reuse dfs_nr array - it is not needed anymore */
- bzero ((char *) in_queue, n_basic_blocks * sizeof (char));
+ }
- if (no_loops)
- header[0] = 1;
+ /* Another check for unreachable blocks. The earlier test in
+ is_cfg_nonregular only finds unreachable blocks that do not
+ form a loop.
- /* compute the in-degree of every block in the graph */
+ The DFS traversal will mark every block that is reachable from
+ the entry node by placing a nonzero value in dfs_nr. Thus if
+ dfs_nr is zero for any block, then it must be unreachable. */
+ unreachable = 0;
for (i = 0; i < n_basic_blocks; i++)
- {
- fst_edge = IN_EDGES (i);
- if (fst_edge > 0)
- {
- degree[i] = 1;
- current_edge = NEXT_IN (fst_edge);
- while (fst_edge != current_edge)
- {
- ++degree[i];
- current_edge = NEXT_IN (current_edge);
- }
- }
- else
- degree[i] = 0;
- }
+ if (dfs_nr[i] == 0)
+ {
+ unreachable = 1;
+ break;
+ }
- /* pass through all graph blocks, looking for headers of inner loops */
+ /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
+ to hold degree counts. */
+ degree = dfs_nr;
+
+ /* Compute the in-degree of every block in the graph */
for (i = 0; i < n_basic_blocks; i++)
+ degree[i] = num_preds[i];
+
+ /* Do not perform region scheduling if there are any unreachable
+ blocks. */
+ if (!unreachable)
{
+ if (no_loops)
+ SET_BIT (header, 0);
- if (header[i] && inner[i])
- {
+ /* Second travsersal:find reducible inner loops and topologically sort
+ block of each region. */
- /* i is a header of a potentially reducible inner loop, or
- block 0 in a subroutine with no loops at all */
- head = tail = -1;
- too_large_failure = 0;
- loop_head = max_hdr[i];
+ queue = (int *) alloca (n_basic_blocks * sizeof (int));
- /* decrease in_degree of all i's successors, (this is needed
- for the topological ordering) */
- fst_edge = current_edge = OUT_EDGES (i);
- if (fst_edge > 0)
+ /* Find blocks which are inner loop headers. We still have non-reducible
+ loops to consider at this point. */
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ if (TEST_BIT (header, i) && TEST_BIT (inner, i))
{
- do
+ int_list_ptr ps;
+ int j;
+
+ /* Now check that the loop is reducible. We do this separate
+ from finding inner loops so that we do not find a reducible
+ loop which contains an inner non-reducible loop.
+
+ A simple way to find reducible/natrual loops is to verify
+ that each block in the loop is dominated by the loop
+ header.
+
+ If there exists a block that is not dominated by the loop
+ header, then the block is reachable from outside the loop
+ and thus the loop is not a natural loop. */
+ for (j = 0; j < n_basic_blocks; j++)
{
- --degree[TO_BLOCK (current_edge)];
- current_edge = NEXT_OUT (current_edge);
+ /* First identify blocks in the loop, except for the loop
+ entry block. */
+ if (i == max_hdr[j] && i != j)
+ {
+ /* Now verify that the block is dominated by the loop
+ header. */
+ if (!TEST_BIT (dom[j], i))
+ break;
+ }
}
- while (fst_edge != current_edge);
- }
- /* estimate # insns, and count # blocks in the region. */
- num_bbs = 1;
- num_insns = INSN_LUID (basic_block_end[i]) - INSN_LUID (basic_block_head[i]);
+ /* If we exited the loop early, then I is the header of a non
+ reducible loop and we should quit processing it now. */
+ if (j != n_basic_blocks)
+ continue;
+ /* I is a header of an inner loop, or block 0 in a subroutine
+ with no loops at all. */
+ head = tail = -1;
+ too_large_failure = 0;
+ loop_head = max_hdr[i];
+
+ /* Decrease degree of all I's successors for topological
+ ordering. */
+ for (ps = s_succs[i]; ps; ps = ps->next)
+ if (INT_LIST_VAL (ps) != EXIT_BLOCK
+ && INT_LIST_VAL (ps) != ENTRY_BLOCK)
+ --degree[INT_LIST_VAL(ps)];
+
+ /* Estimate # insns, and count # blocks in the region. */
+ num_bbs = 1;
+ num_insns = (INSN_LUID (basic_block_end[i])
+ - INSN_LUID (basic_block_head[i]));
- /* find all loop latches, if it is a true loop header, or
- all leaves if the graph has no loops at all */
- if (no_loops)
- {
- for (j = 0; j < n_basic_blocks; j++)
- if (out_edges[j] == 0) /* a leaf */
- {
- queue[++tail] = j;
- in_queue[j] = 1;
- if (too_large (j, &num_bbs, &num_insns))
+ /* Find all loop latches (blocks which back edges to the loop
+ header) or all the leaf blocks in the cfg has no loops.
+
+ Place those blocks into the queue. */
+ if (no_loops)
+ {
+ for (j = 0; j < n_basic_blocks; j++)
+ /* Leaf nodes have only a single successor which must
+ be EXIT_BLOCK. */
+ if (num_succs[j] == 1
+ && INT_LIST_VAL (s_succs[j]) == EXIT_BLOCK)
{
- too_large_failure = 1;
- break;
+ queue[++tail] = j;
+ SET_BIT (in_queue, j);
+
+ if (too_large (j, &num_bbs, &num_insns))
+ {
+ too_large_failure = 1;
+ break;
+ }
}
- }
- }
- else
- {
- fst_edge = current_edge = IN_EDGES (i);
- do
+ }
+ else
{
- node = FROM_BLOCK (current_edge);
- if (max_hdr[node] == loop_head && node != i) /* a latch */
+ int_list_ptr ps;
+
+ for (ps = s_preds[i]; ps; ps = ps->next)
{
- queue[++tail] = node;
- in_queue[node] = 1;
+ node = INT_LIST_VAL (ps);
- if (too_large (node, &num_bbs, &num_insns))
+ if (node == ENTRY_BLOCK || node == EXIT_BLOCK)
+ continue;
+
+ if (max_hdr[node] == loop_head && node != i)
{
- too_large_failure = 1;
- break;
+ /* This is a loop latch. */
+ queue[++tail] = node;
+ SET_BIT (in_queue, node);
+
+ if (too_large (node, &num_bbs, &num_insns))
+ {
+ too_large_failure = 1;
+ break;
+ }
}
+
}
- current_edge = NEXT_IN (current_edge);
}
- while (fst_edge != current_edge);
- }
- /* Put in queue[] all blocks that belong to the loop. Check
- that the loop is reducible, traversing back from the loop
- latches up to the loop header. */
- while (head < tail && !too_large_failure)
- {
- child = queue[++head];
- fst_edge = current_edge = IN_EDGES (child);
- do
+ /* Now add all the blocks in the loop to the queue.
+
+ We know the loop is a natural loop; however the algorithm
+ above will not always mark certain blocks as being in the
+ loop. Consider:
+ node children
+ a b,c
+ b c
+ c a,d
+ d b
+
+
+ The algorithm in the DFS traversal may not mark B & D as part
+ of the loop (ie they will not have max_hdr set to A).
+
+ We know they can not be loop latches (else they would have
+ had max_hdr set since they'd have a backedge to a dominator
+ block). So we don't need them on the initial queue.
+
+ We know they are part of the loop because they are dominated
+ by the loop header and can be reached by a backwards walk of
+ the edges starting with nodes on the initial queue.
+
+ It is safe and desirable to include those nodes in the
+ loop/scheduling region. To do so we would need to decrease
+ the degree of a node if it is the target of a backedge
+ within the loop itself as the node is placed in the queue.
+
+ We do not do this because I'm not sure that the actual
+ scheduling code will properly handle this case. ?!? */
+
+ while (head < tail && !too_large_failure)
{
- node = FROM_BLOCK (current_edge);
+ int_list_ptr ps;
+ child = queue[++head];
- if (max_hdr[node] != loop_head)
- { /* another entry to loop, it is irreducible */
- tail = -1;
- break;
- }
- else if (!in_queue[node] && node != i)
+ for (ps = s_preds[child]; ps; ps = ps->next)
{
- queue[++tail] = node;
- in_queue[node] = 1;
+ node = INT_LIST_VAL (ps);
- if (too_large (node, &num_bbs, &num_insns))
+ /* See discussion above about nodes not marked as in
+ this loop during the initial DFS traversal. */
+ if (node == ENTRY_BLOCK || node == EXIT_BLOCK
+ || max_hdr[node] != loop_head)
{
- too_large_failure = 1;
+ tail = -1;
break;
}
+ else if (!TEST_BIT (in_queue, node) && node != i)
+ {
+ queue[++tail] = node;
+ SET_BIT (in_queue, node);
+
+ if (too_large (node, &num_bbs, &num_insns))
+ {
+ too_large_failure = 1;
+ break;
+ }
+ }
}
- current_edge = NEXT_IN (current_edge);
}
- while (fst_edge != current_edge);
- }
- if (tail >= 0 && !too_large_failure)
- {
- /* Place the loop header into list of region blocks */
- degree[i] = -1;
- rgn_bb_table[idx] = i;
- RGN_NR_BLOCKS (nr_regions) = num_bbs;
- RGN_BLOCKS (nr_regions) = idx++;
- CONTAINING_RGN (i) = nr_regions;
- BLOCK_TO_BB (i) = count = 0;
-
- /* remove blocks from queue[], (in topological order), when
- their in_degree becomes 0. We scan the queue over and
- over again until it is empty. Note: there may be a more
- efficient way to do it. */
- while (tail >= 0)
+ if (tail >= 0 && !too_large_failure)
{
- if (head < 0)
- head = tail;
- child = queue[head];
- if (degree[child] == 0)
+ /* Place the loop header into list of region blocks. */
+ degree[i] = -1;
+ rgn_bb_table[idx] = i;
+ RGN_NR_BLOCKS (nr_regions) = num_bbs;
+ RGN_BLOCKS (nr_regions) = idx++;
+ CONTAINING_RGN (i) = nr_regions;
+ BLOCK_TO_BB (i) = count = 0;
+
+ /* Remove blocks from queue[] when their in degree becomes
+ zero. Repeat until no blocks are left on the list. This
+ produces a topological list of blocks in the region. */
+ while (tail >= 0)
{
- degree[child] = -1;
- rgn_bb_table[idx++] = child;
- BLOCK_TO_BB (child) = ++count;
- CONTAINING_RGN (child) = nr_regions;
- queue[head] = queue[tail--];
- fst_edge = current_edge = OUT_EDGES (child);
-
- if (fst_edge > 0)
+ int_list_ptr ps;
+
+ if (head < 0)
+ head = tail;
+ child = queue[head];
+ if (degree[child] == 0)
{
- do
- {
- --degree[TO_BLOCK (current_edge)];
- current_edge = NEXT_OUT (current_edge);
- }
- while (fst_edge != current_edge);
+ degree[child] = -1;
+ rgn_bb_table[idx++] = child;
+ BLOCK_TO_BB (child) = ++count;
+ CONTAINING_RGN (child) = nr_regions;
+ queue[head] = queue[tail--];
+
+ for (ps = s_succs[child]; ps; ps = ps->next)
+ if (INT_LIST_VAL (ps) != ENTRY_BLOCK
+ && INT_LIST_VAL (ps) != EXIT_BLOCK)
+ --degree[INT_LIST_VAL (ps)];
}
+ else
+ --head;
}
- else
- --head;
+ ++nr_regions;
}
- ++nr_regions;
}
}
}
- /* define each of all other blocks as a region itself */
+ /* Any block that did not end up in a region is placed into a region
+ by itself. */
for (i = 0; i < n_basic_blocks; i++)
if (degree[i] >= 0)
{
BLOCK_TO_BB (i) = 0;
}
-} /* find_rgns */
+ free (passed);
+ free (header);
+ free (inner);
+ free (in_queue);
+ free (in_stack);
+}
/* functions for regions scheduling information */
/* Return the INSN_LIST containing INSN in LIST, or NULL
if LIST does not contain INSN. */
-__inline static rtx
+HAIFA_INLINE static rtx
find_insn_list (insn, list)
rtx insn;
rtx list;
/* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0 otherwise. */
-__inline static char
+HAIFA_INLINE static char
find_insn_mem_list (insn, x, list, list1)
rtx insn, x;
rtx list, list1;
mask if the value is negative. A function unit index is the
non-negative encoding. */
-__inline static int
+HAIFA_INLINE static int
insn_unit (insn)
rtx insn;
{
These values are encoded in an int where the upper half gives the
minimum value and the lower half gives the maximum value. */
-__inline static unsigned int
+HAIFA_INLINE static unsigned int
blockage_range (unit, insn)
int unit;
rtx insn;
/* Return the issue-delay of an insn */
-__inline static int
+HAIFA_INLINE static int
insn_issue_delay (insn)
rtx insn;
{
instance INSTANCE at time CLOCK if the previous actual hazard cost
was COST. */
-__inline static int
+HAIFA_INLINE static int
actual_hazard_this_instance (unit, instance, insn, clock, cost)
int unit, instance, clock, cost;
rtx insn;
/* Record INSN as having begun execution on the units encoded by UNIT at
time CLOCK. */
-__inline static void
+HAIFA_INLINE static void
schedule_unit (unit, insn, clock)
int unit, clock;
rtx insn;
/* Return the actual hazard cost of executing INSN on the units encoded by
UNIT at time CLOCK if the previous actual hazard cost was COST. */
-__inline static int
+HAIFA_INLINE static int
actual_hazard (unit, insn, clock, cost)
int unit, clock, cost;
rtx insn;
to be used is chosen in preference to one with a unit that is less
used. We are trying to minimize a subsequent actual hazard. */
-__inline static int
+HAIFA_INLINE static int
potential_hazard (unit, insn, cost)
int unit, cost;
rtx insn;
This is the number of cycles between instruction issue and
instruction results. */
-__inline static int
+HAIFA_INLINE static int
insn_cost (insn, link, used)
rtx insn, link, used;
{
|| NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
|| NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
|| NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
+ || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_START
+ || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END
|| (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
&& GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
{
rtx tmp = *(rtx *)y;
rtx tmp2 = *(rtx *)x;
rtx link;
- int tmp_class, tmp2_class;
+ int tmp_class, tmp2_class, depend_count1, depend_count2;
int val, priority_val, spec_val, prob_val, weight_val;
return val;
}
+ /* Prefer the insn which has more later insns that depend on it.
+ This gives the scheduler more freedom when scheduling later
+ instructions at the expense of added register pressure. */
+ depend_count1 = 0;
+ for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
+ depend_count1++;
+
+ depend_count2 = 0;
+ for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
+ depend_count2++;
+
+ val = depend_count2 - depend_count1;
+ if (val)
+ return val;
+
/* If insns are equally good, sort by INSN_LUID (original insn order),
so that we make the sort stable. This minimizes instruction movement,
thus minimizing sched's effect on debugging and cross-jumping. */
/* Resort the array A in which only element at index N may be out of order. */
-__inline static void
+HAIFA_INLINE static void
swap_sort (a, n)
rtx *a;
int n;
N_CYCLES after the currently executing insn. Preserve insns
chain for debugging purposes. */
-__inline static void
+HAIFA_INLINE static void
queue_insn (insn, n_cycles)
rtx insn;
int n_cycles;
/* Return nonzero if PAT is the pattern of an insn which makes a
register live. */
-__inline static int
+HAIFA_INLINE static int
birthing_insn_p (pat)
rtx pat;
{
/* PREV is an insn that is ready to execute. Adjust its priority if that
will help shorten register lifetimes. */
-__inline static void
+HAIFA_INLINE static void
adjust_priority (prev)
rtx prev;
{
if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_START
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
{
/* Return the head and tail pointers of BB. */
-__inline static void
+HAIFA_INLINE static void
get_block_head_tail (bb, headp, tailp)
int bb;
rtx *headp;
#define BUF_LEN 256
+static char *
+safe_concat (buf, cur, str)
+ char *buf;
+ char *cur;
+ char *str;
+{
+ char *end = buf + BUF_LEN - 2; /* leave room for null */
+ int c;
+
+ if (cur > end)
+ {
+ *end = '\0';
+ return end;
+ }
+
+ while (cur < end && (c = *str++) != '\0')
+ *cur++ = c;
+
+ *cur = '\0';
+ return cur;
+}
+
/* This recognizes rtx, I classified as expressions. These are always */
/* represent some action on values or results of other expression, */
/* that may be stored in objects representing values. */
rtx x;
int verbose;
{
- char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
+ char tmp[BUF_LEN];
+ char *st[4];
+ char *cur = buf;
+ char *fun = (char *)0;
+ char *sep;
+ rtx op[4];
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ st[i] = (char *)0;
+ op[i] = NULL_RTX;
+ }
switch (GET_CODE (x))
{
case PLUS:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s+%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "+";
+ op[1] = XEXP (x, 1);
break;
case LO_SUM:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%sl+%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "+low(";
+ op[1] = XEXP (x, 1);
+ st[2] = ")";
break;
case MINUS:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s-%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "-";
+ op[1] = XEXP (x, 1);
break;
case COMPARE:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s??%s", t1, t2);
+ fun = "cmp";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case NEG:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "-%s", t1);
+ st[0] = "-";
+ op[0] = XEXP (x, 0);
break;
case MULT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s*%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "*";
+ op[1] = XEXP (x, 1);
break;
case DIV:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s/%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "/";
+ op[1] = XEXP (x, 1);
break;
case UDIV:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%su/%s", t1, t2);
+ fun = "udiv";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case MOD:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s%%%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "%";
+ op[1] = XEXP (x, 1);
break;
case UMOD:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%su%%%s", t1, t2);
+ fun = "umod";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case SMIN:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "smin (%s, %s)", t1, t2);
+ fun = "smin";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case SMAX:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "smax(%s,%s)", t1, t2);
+ fun = "smax";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case UMIN:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "umin (%s, %s)", t1, t2);
+ fun = "umin";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case UMAX:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "umax(%s,%s)", t1, t2);
+ fun = "umax";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case NOT:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "!%s", t1);
+ st[0] = "!";
+ op[0] = XEXP (x, 0);
break;
case AND:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s&%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "&";
+ op[1] = XEXP (x, 1);
break;
case IOR:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s|%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "|";
+ op[1] = XEXP (x, 1);
break;
case XOR:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s^%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "^";
+ op[1] = XEXP (x, 1);
break;
case ASHIFT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<<%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "<<";
+ op[1] = XEXP (x, 1);
break;
case LSHIFTRT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s0>%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = " 0>>";
+ op[1] = XEXP (x, 1);
break;
case ASHIFTRT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>>%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = ">>";
+ op[1] = XEXP (x, 1);
break;
case ROTATE:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<-<%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "<-<";
+ op[1] = XEXP (x, 1);
break;
case ROTATERT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>->%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = ">->";
+ op[1] = XEXP (x, 1);
break;
case ABS:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "abs(%s)", t1);
+ fun = "abs";
+ op[0] = XEXP (x, 0);
break;
case SQRT:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "sqrt(%s)", t1);
+ fun = "sqrt";
+ op[0] = XEXP (x, 0);
break;
case FFS:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "ffs(%s)", t1);
+ fun = "ffs";
+ op[0] = XEXP (x, 0);
break;
case EQ:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s == %s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "==";
+ op[1] = XEXP (x, 1);
break;
case NE:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s!=%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "!=";
+ op[1] = XEXP (x, 1);
break;
case GT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = ">";
+ op[1] = XEXP (x, 1);
break;
case GTU:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>u%s", t1, t2);
+ fun = "gtu";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case LT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "<";
+ op[1] = XEXP (x, 1);
break;
case LTU:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<u%s", t1, t2);
+ fun = "ltu";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case GE:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>=%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = ">=";
+ op[1] = XEXP (x, 1);
break;
case GEU:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s>=u%s", t1, t2);
+ fun = "geu";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case LE:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<=%s", t1, t2);
+ op[0] = XEXP (x, 0);
+ st[1] = "<=";
+ op[1] = XEXP (x, 1);
break;
case LEU:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "%s<=u%s", t1, t2);
+ fun = "leu";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
break;
case SIGN_EXTRACT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- print_value (t3, XEXP (x, 2), verbose);
- if (verbose)
- sprintf (buf, "sign_extract(%s,%s,%s)", t1, t2, t3);
- else
- sprintf (buf, "sxt(%s,%s,%s)", t1, t2, t3);
+ fun = (verbose) ? "sign_extract" : "sxt";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
+ op[2] = XEXP (x, 2);
break;
case ZERO_EXTRACT:
- print_value (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- print_value (t3, XEXP (x, 2), verbose);
- if (verbose)
- sprintf (buf, "zero_extract(%s,%s,%s)", t1, t2, t3);
- else
- sprintf (buf, "zxt(%s,%s,%s)", t1, t2, t3);
+ fun = (verbose) ? "zero_extract" : "zxt";
+ op[0] = XEXP (x, 0);
+ op[1] = XEXP (x, 1);
+ op[2] = XEXP (x, 2);
break;
case SIGN_EXTEND:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "sign_extend(%s)", t1);
- else
- sprintf (buf, "sxn(%s)", t1);
+ fun = (verbose) ? "sign_extend" : "sxn";
+ op[0] = XEXP (x, 0);
break;
case ZERO_EXTEND:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "zero_extend(%s)", t1);
- else
- sprintf (buf, "zxn(%s)", t1);
+ fun = (verbose) ? "zero_extend" : "zxn";
+ op[0] = XEXP (x, 0);
break;
case FLOAT_EXTEND:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "float_extend(%s)", t1);
- else
- sprintf (buf, "fxn(%s)", t1);
+ fun = (verbose) ? "float_extend" : "fxn";
+ op[0] = XEXP (x, 0);
break;
case TRUNCATE:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "trunc(%s)", t1);
- else
- sprintf (buf, "trn(%s)", t1);
+ fun = (verbose) ? "trunc" : "trn";
+ op[0] = XEXP (x, 0);
break;
case FLOAT_TRUNCATE:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "float_trunc(%s)", t1);
- else
- sprintf (buf, "ftr(%s)", t1);
+ fun = (verbose) ? "float_trunc" : "ftr";
+ op[0] = XEXP (x, 0);
break;
case FLOAT:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "float(%s)", t1);
- else
- sprintf (buf, "flt(%s)", t1);
+ fun = (verbose) ? "float" : "flt";
+ op[0] = XEXP (x, 0);
break;
case UNSIGNED_FLOAT:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "uns_float(%s)", t1);
- else
- sprintf (buf, "ufl(%s)", t1);
+ fun = (verbose) ? "uns_float" : "ufl";
+ op[0] = XEXP (x, 0);
break;
case FIX:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "fix(%s)", t1);
+ fun = "fix";
+ op[0] = XEXP (x, 0);
break;
case UNSIGNED_FIX:
- print_value (t1, XEXP (x, 0), verbose);
- if (verbose)
- sprintf (buf, "uns_fix(%s)", t1);
- else
- sprintf (buf, "ufx(%s)", t1);
+ fun = (verbose) ? "uns_fix" : "ufx";
+ op[0] = XEXP (x, 0);
break;
case PRE_DEC:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "--%s", t1);
+ st[0] = "--";
+ op[0] = XEXP (x, 0);
break;
case PRE_INC:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "++%s", t1);
+ st[0] = "++";
+ op[0] = XEXP (x, 0);
break;
case POST_DEC:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "%s--", t1);
+ op[0] = XEXP (x, 0);
+ st[1] = "--";
break;
case POST_INC:
- print_value (t1, XEXP (x, 0), verbose);
- sprintf (buf, "%s++", t1);
+ op[0] = XEXP (x, 0);
+ st[1] = "++";
break;
case CALL:
- print_value (t1, XEXP (x, 0), verbose);
+ st[0] = "call ";
+ op[0] = XEXP (x, 0);
if (verbose)
{
- print_value (t2, XEXP (x, 1), verbose);
- sprintf (buf, "call %s argc:%s", t1, t2);
+ st[1] = " argc:";
+ op[1] = XEXP (x, 1);
}
- else
- sprintf (buf, "call %s", t1);
break;
case IF_THEN_ELSE:
- print_exp (t1, XEXP (x, 0), verbose);
- print_value (t2, XEXP (x, 1), verbose);
- print_value (t3, XEXP (x, 2), verbose);
- sprintf (buf, "{(%s)?%s:%s}", t1, t2, t3);
+ st[0] = "{(";
+ op[0] = XEXP (x, 0);
+ st[1] = ")?";
+ op[1] = XEXP (x, 1);
+ st[2] = ":";
+ op[2] = XEXP (x, 2);
+ st[3] = "}";
break;
case TRAP_IF:
- print_value (t1, TRAP_CONDITION (x), verbose);
- sprintf (buf, "trap_if %s", t1);
+ fun = "trap_if";
+ op[0] = TRAP_CONDITION (x);
break;
case UNSPEC:
- {
- int i;
-
- sprintf (t1, "unspec{");
- for (i = 0; i < XVECLEN (x, 0); i++)
- {
- print_pattern (t2, XVECEXP (x, 0, i), verbose);
- sprintf (t3, "%s%s;", t1, t2);
- strcpy (t1, t3);
- }
- sprintf (buf, "%s}", t1);
- }
- break;
case UNSPEC_VOLATILE:
{
- int i;
-
- sprintf (t1, "unspec/v{");
+ cur = safe_concat (buf, cur, "unspec");
+ if (GET_CODE (x) == UNSPEC_VOLATILE)
+ cur = safe_concat (buf, cur, "/v");
+ cur = safe_concat (buf, cur, "[");
+ sep = "";
for (i = 0; i < XVECLEN (x, 0); i++)
{
- print_pattern (t2, XVECEXP (x, 0, i), verbose);
- sprintf (t3, "%s%s;", t1, t2);
- strcpy (t1, t3);
+ print_pattern (tmp, XVECEXP (x, 0, i), verbose);
+ cur = safe_concat (buf, cur, sep);
+ cur = safe_concat (buf, cur, tmp);
+ sep = ",";
}
- sprintf (buf, "%s}", t1);
+ cur = safe_concat (buf, cur, "] ");
+ sprintf (tmp, "%d", XINT (x, 1));
+ cur = safe_concat (buf, cur, tmp);
}
break;
default:
-/* if (verbose) debug_rtx (x); else sprintf (buf, "$$$"); */
- sprintf (buf, "$$$");
+ /* if (verbose) debug_rtx (x); */
+ st[0] = GET_RTX_NAME (GET_CODE (x));
+ break;
}
-} /* print_exp */
+
+ /* Print this as a function? */
+ if (fun)
+ {
+ cur = safe_concat (buf, cur, fun);
+ cur = safe_concat (buf, cur, "(");
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (st[i])
+ cur = safe_concat (buf, cur, st[i]);
+
+ if (op[i])
+ {
+ if (fun && i != 0)
+ cur = safe_concat (buf, cur, ",");
+
+ print_value (tmp, op[i], verbose);
+ cur = safe_concat (buf, cur, tmp);
+ }
+ }
+
+ if (fun)
+ cur = safe_concat (buf, cur, ")");
+} /* print_exp */
/* Prints rtxes, i customly classified as values. They're constants, */
/* registers, labels, symbols and memory accesses. */
int verbose;
{
char t[BUF_LEN];
+ char *cur = buf;
switch (GET_CODE (x))
{
case CONST_INT:
- sprintf (buf, "%Xh", INTVAL (x));
+ sprintf (t, "0x%lx", (long)INTVAL (x));
+ cur = safe_concat (buf, cur, t);
break;
case CONST_DOUBLE:
- print_value (t, XEXP (x, 0), verbose);
- sprintf (buf, "<%s>", t);
+ sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
+ cur = safe_concat (buf, cur, t);
break;
case CONST_STRING:
- sprintf (buf, "\"%s\"", (char *) XEXP (x, 0));
+ cur = safe_concat (buf, cur, "\"");
+ cur = safe_concat (buf, cur, XSTR (x, 0));
+ cur = safe_concat (buf, cur, "\"");
break;
case SYMBOL_REF:
- sprintf (buf, "`%s'", (char *) XEXP (x, 0));
+ cur = safe_concat (buf, cur, "`");
+ cur = safe_concat (buf, cur, XSTR (x, 0));
+ cur = safe_concat (buf, cur, "'");
break;
case LABEL_REF:
- sprintf (buf, "L%d", INSN_UID (XEXP (x, 0)));
+ sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
+ cur = safe_concat (buf, cur, t);
break;
case CONST:
- print_value (buf, XEXP (x, 0), verbose);
+ print_value (t, XEXP (x, 0), verbose);
+ cur = safe_concat (buf, cur, "const(");
+ cur = safe_concat (buf, cur, t);
+ cur = safe_concat (buf, cur, ")");
break;
case HIGH:
- print_value (buf, XEXP (x, 0), verbose);
+ print_value (t, XEXP (x, 0), verbose);
+ cur = safe_concat (buf, cur, "high(");
+ cur = safe_concat (buf, cur, t);
+ cur = safe_concat (buf, cur, ")");
break;
case REG:
- if (GET_MODE (x) == SFmode
- || GET_MODE (x) == DFmode
- || GET_MODE (x) == XFmode
- || GET_MODE (x) == TFmode)
- strcpy (t, "fr");
+ if (REGNO (x) < FIRST_PSEUDO_REGISTER)
+ {
+ int c = reg_names[ REGNO (x) ][0];
+ if (c >= '0' && c <= '9')
+ cur = safe_concat (buf, cur, "%");
+
+ cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
+ }
else
- strcpy (t, "r");
- sprintf (buf, "%s%d", t, REGNO (x));
+ {
+ sprintf (t, "r%d", REGNO (x));
+ cur = safe_concat (buf, cur, t);
+ }
break;
case SUBREG:
- print_value (t, XEXP (x, 0), verbose);
- sprintf (buf, "%s#%d", t, SUBREG_WORD (x));
+ print_value (t, SUBREG_REG (x), verbose);
+ cur = safe_concat (buf, cur, t);
+ sprintf (t, "#%d", SUBREG_WORD (x));
+ cur = safe_concat (buf, cur, t);
break;
case SCRATCH:
- sprintf (buf, "scratch");
+ cur = safe_concat (buf, cur, "scratch");
break;
case CC0:
- sprintf (buf, "cc0");
+ cur = safe_concat (buf, cur, "cc0");
break;
case PC:
- sprintf (buf, "pc");
+ cur = safe_concat (buf, cur, "pc");
break;
case MEM:
print_value (t, XEXP (x, 0), verbose);
- sprintf (buf, "[%s]", t);
+ cur = safe_concat (buf, cur, "[");
+ cur = safe_concat (buf, cur, t);
+ cur = safe_concat (buf, cur, "]");
break;
default:
- print_exp (buf, x, verbose);
+ print_exp (t, x, verbose);
+ cur = safe_concat (buf, cur, t);
+ break;
}
} /* print_value */
INSN_UID (basic_block_end[b]),
(reload_completed ? "after" : "before"));
fprintf (dump, ";; ======================================================\n");
- if (sched_debug_count >= 0)
- fprintf (dump, ";;\t -- sched_debug_count=%d\n", sched_debug_count);
fprintf (dump, "\n");
visual_tbl = (char *) alloca (get_visual_tbl_length ());
if (sched_verbose >= 2)
{
- fprintf (dump, ";;\t\tReady list initially: ");
+ fprintf (dump, ";;\t\tReady list initially: ");
debug_ready_list (ready, n_ready);
}
{
int b1;
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count == 0)
- break;
-#endif
clock_var++;
/* Add to the ready list all pending insns that can be issued now.
if (sched_verbose)
{
- fprintf (dump, ";;\tReady list (t =%3d): ", clock_var);
+ fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
debug_ready_list (ready, n_ready);
}
}
else if (cost == 0)
{
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count == 0)
- break;
-#endif
-
/* an interblock motion? */
if (INSN_BB (insn) != target_bb)
{
can_issue_more--;
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count > 0)
- sched_debug_count--;
-#endif
-
n_ready = schedule_insn (insn, ready, n_ready, clock_var);
/* remove insn from ready list */
if (sched_verbose)
{
visualize_scheduled_insns (b, clock_var);
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count == 0)
- fprintf (dump, "........ sched_debug_count == 0 .................\n");
-#endif
}
}
}
/* Sanity check -- queue must be empty now. Meaningless if region has
- multiple bbs, or if scheduling stopped by sched_debug_count. */
+ multiple bbs. */
if (current_nr_blocks > 1)
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count != 0)
-#endif
- if (!flag_schedule_interblock && q_size != 0)
- abort ();
+ if (!flag_schedule_interblock && q_size != 0)
+ abort ();
/* update head/tail boundaries. */
head = NEXT_INSN (prev_head);
tail = last;
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count == 0)
- /* compensate for stopping scheduling prematurely */
- for (i = sched_target_n_insns; i < target_n_insns; i++)
- tail = move_insn (group_leader (NEXT_INSN (tail)), tail);
-#endif
-
/* Restore-other-notes: NOTE_LIST is the end of a chain of notes
previously found among the insns. Insert them at the beginning
of the insns. */
#endif
}
-#ifdef INTERBLOCK_DEBUG
- if (sched_debug_count != 0)
-#endif
- /* sanity check: verify that all region insns were scheduled */
- if (sched_rgn_n_insns != rgn_n_insns)
- abort ();
+ /* sanity check: verify that all region insns were scheduled */
+ if (sched_rgn_n_insns != rgn_n_insns)
+ abort ();
/* update register life and usage information */
if (reload_completed == 0)
if (GET_CODE (dest) == REG)
{
+ /* If the original insn already used this register, we may not add new
+ notes for it. One example for a split that needs this test is
+ when a multi-word memory access with register-indirect addressing
+ is split into multiple memory accesses with auto-increment and
+ one adjusting add instruction for the address register. */
+ if (reg_referenced_p (dest, PATTERN (orig_insn)))
+ return;
for (tem = last; tem != insn; tem = PREV_INSN (tem))
{
if (GET_RTX_CLASS (GET_CODE (tem)) == 'i'
for (insn = basic_block_head[b];; insn = next)
{
- rtx prev;
- rtx set;
+ rtx set, last, first, notes;
/* Can't use `next_real_insn' because that
might go across CODE_LABELS and short-out basic blocks. */
}
/* Split insns here to get max fine-grain parallelism. */
- prev = PREV_INSN (insn);
- /* It is probably not worthwhile to try to split again in
- the second pass. However, if flag_schedule_insns is not set,
- the first and only (if any) scheduling pass is after reload. */
- if (reload_completed == 0 || ! flag_schedule_insns)
+ first = PREV_INSN (insn);
+ notes = REG_NOTES (insn);
+ last = try_split (PATTERN (insn), insn, 1);
+ if (last != insn)
{
- rtx last, first = PREV_INSN (insn);
- rtx notes = REG_NOTES (insn);
- last = try_split (PATTERN (insn), insn, 1);
- if (last != insn)
+ /* try_split returns the NOTE that INSN became. */
+ first = NEXT_INSN (first);
+ update_flow_info (notes, first, last, insn);
+
+ PUT_CODE (insn, NOTE);
+ NOTE_SOURCE_FILE (insn) = 0;
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ if (insn == basic_block_head[b])
+ basic_block_head[b] = first;
+ if (insn == basic_block_end[b])
{
- /* try_split returns the NOTE that INSN became. */
- first = NEXT_INSN (first);
- update_flow_info (notes, first, last, insn);
-
- PUT_CODE (insn, NOTE);
- NOTE_SOURCE_FILE (insn) = 0;
- NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
- if (insn == basic_block_head[b])
- basic_block_head[b] = first;
- if (insn == basic_block_end[b])
- {
- basic_block_end[b] = last;
- break;
- }
+ basic_block_end[b] = last;
+ break;
}
}
}
else
{
+ int_list_ptr *s_preds, *s_succs;
+ int *num_preds, *num_succs;
+ sbitmap *dom, *pdom;
+
+ s_preds = (int_list_ptr *) alloca (n_basic_blocks
+ * sizeof (int_list_ptr));
+ s_succs = (int_list_ptr *) alloca (n_basic_blocks
+ * sizeof (int_list_ptr));
+ num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
+ num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
+ dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
+ pdom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
+
+ /* The scheduler runs after flow; therefore, we can't blindly call
+ back into find_basic_blocks since doing so could invalidate the
+ info in basic_block_live_at_start.
+
+ Consider a block consisting entirely of dead stores; after life
+ analysis it would be a block of NOTE_INSN_DELETED notes. If
+ we call find_basic_blocks again, then the block would be removed
+ entirely and invalidate our the register live information.
+
+ We could (should?) recompute register live information. Doing
+ so may even be beneficial. */
+
+ compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
+
+ /* Compute the dominators and post dominators. We don't currently use
+ post dominators, but we should for speculative motion analysis. */
+ compute_dominators (dom, pdom, s_preds, s_succs);
+
/* build_control_flow will return nonzero if it detects unreachable
blocks or any other irregularity with the cfg which prevents
cross block scheduling. */
- if (build_control_flow () != 0)
+ if (build_control_flow (s_preds, s_succs, num_preds, num_succs) != 0)
find_single_block_region ();
else
- find_rgns ();
+ find_rgns (s_preds, s_succs, num_preds, num_succs, dom);
if (sched_verbose >= 3)
- {
- debug_control_flow ();
- debug_regions ();
- }
+ debug_regions ();
+ /* For now. This will move as more and more of haifa is converted
+ to using the cfg code in flow.c */
+ free_bb_mem ();
+ free (dom);
+ free (pdom);
}
}