Update.

[pf3gnuchains/gcc-fork.git] / gcc / haifa-sched.c
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c

index b5f4ace..fed2a12 100644 (file)
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -87,7 +87,8 @@
     broken by
     6.  choose insn with the least dependences upon the previously
     scheduled insn, or finally
-   7.  choose insn with lowest UID.
+   7   choose the insn which has the most insns dependent on it.
+   8.  choose insn with lowest UID.
  
     Memory references complicate matters.  Only if we can be certain
     that memory references are not part of the data dependency graph
@@ -164,17 +165,13 @@
  #include "insn-config.h"
  #include "insn-attr.h"
  #include "except.h"
+#include "toplev.h"
  
  extern char *reg_known_equiv_p;
  extern rtx *reg_known_value;
  
  #ifdef INSN_SCHEDULING
  
-/* enable interblock scheduling code */
-
-/* define INTERBLOCK_DEBUG for using the -fsched-max debugging facility */
-/* #define INTERBLOCK_DEBUG */
-
  /* target_units bitmask has 1 for each unit in the cpu.  It should be
     possible to compute this variable from the machine description.
     But currently it is computed by examinning the insn list.  Since
@@ -194,31 +191,20 @@ static int issue_rate;
  #define ISSUE_RATE 1
  #endif
  
-/* sched_debug_count is used for debugging the scheduler by limiting
-   the number of scheduled insns.  It is controlled by the option
-   -fsched-max-N (N is a number).
-
-   sched-verbose controls the amount of debugging output the
+/* sched-verbose controls the amount of debugging output the
     scheduler prints.  It is controlled by -fsched-verbose-N:
     N>0 and no -DSR : the output is directed to stderr.
     N>=10 will direct the printouts to stderr (regardless of -dSR).
     N=1: same as -dSR.
     N=2: bb's probabilities, detailed ready list info, unit/insn info.
     N=3: rtl at abort point, control-flow, regions info.
-   N=5: dependences info.
-
-   max_rgn_blocks and max_region_insns limit region size for
-   interblock scheduling.  They are controlled by
-   -fsched-interblock-max-blocks-N, -fsched-interblock-max-insns-N */
+   N=5: dependences info.  */
  
  #define MAX_RGN_BLOCKS 10
  #define MAX_RGN_INSNS 100
  
-static int sched_debug_count = -1;
  static int sched_verbose_param = 0;
  static int sched_verbose = 0;
-static int max_rgn_blocks = MAX_RGN_BLOCKS;
-static int max_rgn_insns = MAX_RGN_INSNS;
  
  /* nr_inter/spec counts interblock/speculative motion for the function */
  static int nr_inter, nr_spec;
@@ -235,15 +221,8 @@ void
  fix_sched_param (param, val)
       char *param, *val;
  {
-  if (!strcmp (param, "max"))
-    sched_debug_count = ((sched_debug_count == -1) ?
-                        atoi (val) : sched_debug_count);
-  else if (!strcmp (param, "verbose"))
+  if (!strcmp (param, "verbose"))
      sched_verbose_param = atoi (val);
-  else if (!strcmp (param, "interblock-max-blocks"))
-    max_rgn_blocks = atoi (val);
-  else if (!strcmp (param, "interblock-max-insns"))
-    max_rgn_insns = atoi (val);
    else
      warning ("fix_sched_param: unknown param: %s", param);
  }
@@ -404,7 +383,7 @@ static rtx dead_notes;
     The transition (R->S) is implemented in the scheduling loop in
     `schedule_block' when the best insn to schedule is chosen.
     The transition (R->Q) is implemented in `queue_insn' when an
-   insn is found to to have a function unit conflict with the already
+   insn is found to have a function unit conflict with the already
     committed insns.
     The transitions (P->R and P->Q) are implemented in `schedule_insn' as
     insns move from the ready list to the scheduled list.
@@ -473,6 +452,7 @@ static void split_hard_reg_notes PROTO ((rtx, rtx, rtx));
  static void new_insn_dead_notes PROTO ((rtx, rtx, rtx, rtx));
  static void update_n_sets PROTO ((rtx, int));
  static void update_flow_info PROTO ((rtx, rtx, rtx, rtx));
+static char *safe_concat PROTO ((char *, char *, char *));
  
  /* Main entry point of this file.  */
  void schedule_insns PROTO ((FILE *));
@@ -518,8 +498,8 @@ extern rtx forced_labels;
  
  
  static int is_cfg_nonregular PROTO ((void));
-void debug_control_flow PROTO ((void));
-static int build_control_flow PROTO ((void));
+static int build_control_flow PROTO ((int_list_ptr *, int_list_ptr *,
+                                     int *, int *));
  static void new_edge PROTO ((int, int));
  
  
@@ -560,7 +540,8 @@ static int *containing_rgn;
  
  void debug_regions PROTO ((void));
  static void find_single_block_region PROTO ((void));
-static void find_rgns PROTO ((void));
+static void find_rgns PROTO ((int_list_ptr *, int_list_ptr *,
+                             int *, int *, sbitmap *));
  static int too_large PROTO ((int, int *, int *));
  
  extern void debug_live PROTO ((int, int));
@@ -713,6 +694,7 @@ static int find_conditional_protection PROTO ((rtx, int));
  static int is_conditionally_protected PROTO ((rtx, int, int));
  static int may_trap_exp PROTO ((rtx, int));
  static int haifa_classify_insn PROTO ((rtx));
+static int is_prisky PROTO ((rtx, int, int));
  static int is_exception_free PROTO ((rtx, int, int));
  
  static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
@@ -759,6 +741,7 @@ static void get_block_head_tail PROTO ((int, rtx *, rtx *));
  static void find_pre_sched_live PROTO ((int));
  static void find_post_sched_live PROTO ((int));
  static void update_reg_usage PROTO ((void));
+static int queue_to_ready PROTO ((rtx [], int));
  
  void debug_ready_list PROTO ((rtx[], int));
  static void init_target_units PROTO (());
@@ -822,7 +805,7 @@ free_list (listp, unused_listp)
    *listp = 0;
  }
  
-rtx
+static rtx
  alloc_INSN_LIST (val, next)
       rtx val, next;
  {
@@ -842,7 +825,7 @@ alloc_INSN_LIST (val, next)
    return r;
  }
  
-rtx
+static rtx
  alloc_EXPR_LIST (kind, val, next)
       int kind;
       rtx val, next;
@@ -990,6 +973,10 @@ schedule_insns (dump_file)
  #define __inline
  #endif
  
+#ifndef HAIFA_INLINE
+#define HAIFA_INLINE __inline
+#endif
+
  /* Computation of memory dependencies.  */
  
  /* The *_insns and *_mems are paired lists.  Each pending memory operation
@@ -1133,51 +1120,6 @@ is_cfg_nonregular ()
    return 0;
  }
  
-/* Print the control flow graph, for debugging purposes.
-   Callable from the debugger.  */
-
-void
-debug_control_flow ()
-{
-  int i, e, next;
-
-  fprintf (dump, ";;   --------- CONTROL FLOW GRAPH --------- \n\n");
-
-  for (i = 0; i < n_basic_blocks; i++)
-    {
-      fprintf (dump, ";;\tBasic block %d: first insn %d, last %d.\n",
-              i,
-              INSN_UID (basic_block_head[i]),
-              INSN_UID (basic_block_end[i]));
-
-      fprintf (dump, ";;\tPredecessor blocks:");
-      for (e = IN_EDGES (i); e; e = next)
-       {
-         fprintf (dump, " %d", FROM_BLOCK (e));
-
-         next = NEXT_IN (e);
-
-         if (next == IN_EDGES (i))
-           break;
-       }
-
-      fprintf (dump, "\n;;\tSuccesor blocks:");
-      for (e = OUT_EDGES (i); e; e = next)
-       {
-         fprintf (dump, " %d", TO_BLOCK (e));
-
-         next = NEXT_OUT (e);
-
-         if (next == OUT_EDGES (i))
-           break;
-       }
-
-      fprintf (dump, " \n\n");
-
-    }
-}
-
-
  /* Build the control flow graph and set nr_edges.
  
     Instead of trying to build a cfg ourselves, we rely on flow to
@@ -1187,40 +1129,31 @@ debug_control_flow ()
     prevent cross block scheduling.  */
  
  static int
-build_control_flow ()
+build_control_flow (s_preds, s_succs, num_preds, num_succs)
+     int_list_ptr *s_preds;
+     int_list_ptr *s_succs;
+     int *num_preds;
+     int *num_succs;
  {
    int i;
-  int_list_ptr *s_preds;
-  int_list_ptr *s_succs;
    int_list_ptr succ;
-  int *num_preds;
-  int *num_succs;
    int unreachable;
  
-  /* The scheduler runs after flow; therefore, we can't blindly call
-     back into find_basic_blocks since doing so could invalidate the
-     info in basic_block_live_at_start.
-
-     Consider a block consisting entirely of dead stores; after life
-     analysis it would be a block of NOTE_INSN_DELETED notes.  If
-     we call find_basic_blocks again, then the block would be removed
-     entirely and invalidate our the register live information.
-
-     We could (should?) recompute register live information.  Doing
-     so may even be beneficial.  */
-  s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
-  s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
-  num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
-  num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
-  compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
-
    /* Count the number of edges in the cfg.  */
    nr_edges = 0;
    unreachable = 0;
    for (i = 0; i < n_basic_blocks; i++)
      {
        nr_edges += num_succs[i];
-      if (num_preds[i] == 0)
+
+      /* Unreachable loops with more than one basic block are detected
+        during the DFS traversal in find_rgns.
+
+        Unreachable loops with a single block are detected here.  This
+        test is redundant with the one in find_rgns, but it's much
+        cheaper to go ahead and catch the trivial case here.  */
+      if (num_preds[i] == 0
+         || (num_preds[i] == 1 && INT_LIST_VAL (s_preds[i]) == i))
         unreachable = 1;
      }
  
@@ -1246,9 +1179,6 @@ build_control_flow ()
    /* increment by 1, since edge 0 is unused.  */
    nr_edges++;
  
-  /* For now.  This will move as more and more of haifa is converted
-     to using the cfg code in flow.c  */
-  free_bb_mem ();
    return unreachable;
  }
  
@@ -1477,7 +1407,7 @@ too_large (block, num_bbs, num_insns)
    (*num_bbs)++;
    (*num_insns) += (INSN_LUID (basic_block_end[block]) -
                    INSN_LUID (basic_block_head[block]));
-  if ((*num_bbs > max_rgn_blocks) || (*num_insns > max_rgn_insns))
+  if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
      return 1;
    else
      return 0;
@@ -1492,321 +1422,425 @@ too_large (block, num_bbs, num_insns)
    if (max_hdr[blk] == -1)                                            \
      max_hdr[blk] = hdr;                                              \
    else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])                       \
-         inner[hdr] = 0;                                             \
+         RESET_BIT (inner, hdr);                                     \
    else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])                       \
           {                                                           \
-            inner[max_hdr[blk]] = 0;                                 \
+            RESET_BIT (inner,max_hdr[blk]);                         \
              max_hdr[blk] = hdr;                                      \
           }                                                           \
  }
  
  
-/* Find regions for interblock scheduling: a loop-free procedure, a reducible
-   inner loop, or a basic block not contained in any other region.
-   The procedures control flow graph is traversed twice.
-   First traversal, a DFS, finds the headers of inner loops  in the graph,
-   and verifies that there are no unreacable blocks.
-   Second traversal processes headers of inner loops, checking that the
-   loop is reducible.  The loop blocks that form a region are put into the
-   region's blocks list in topological order.
+/* Find regions for interblock scheduling.
+
+   A region for scheduling can be:
+
+     * A loop-free procedure, or
  
-   The following variables are changed by the function: rgn_nr, rgn_table,
-   rgn_bb_table, block_to_bb and containing_rgn.  */
+     * A reducible inner loop, or
+
+     * A basic block not contained in any other region.
+
+
+   ?!? In theory we could build other regions based on extended basic
+   blocks or reverse extended basic blocks.  Is it worth the trouble?
+
+   Loop blocks that form a region are put into the region's block list
+   in topological order.
+
+   This procedure stores its results into the following global (ick) variables
+
+     * rgn_nr
+     * rgn_table
+     * rgn_bb_table
+     * block_to_bb
+     * containing region
+
+
+   We use dominator relationships to avoid making regions out of non-reducible
+   loops.
+
+   This procedure needs to be converted to work on pred/succ lists instead
+   of edge tables.  That would simplify it somewhat.  */
  
  static void
-find_rgns ()
+find_rgns (s_preds, s_succs, num_preds, num_succs, dom)
+     int_list_ptr *s_preds;
+     int_list_ptr *s_succs;
+     int *num_preds;
+     int *num_succs;
+     sbitmap *dom;
  {
    int *max_hdr, *dfs_nr, *stack, *queue, *degree;
-  char *header, *inner, *passed, *in_stack, *in_queue, no_loops = 1;
-  int node, child, loop_head, i, j, fst_edge, head, tail;
+  char no_loops = 1;
+  int node, child, loop_head, i, head, tail;
    int count = 0, sp, idx = 0, current_edge = out_edges[0];
-  int num_bbs, num_insns;
+  int num_bbs, num_insns, unreachable;
    int too_large_failure;
  
-  /*
-     The following data structures are computed by the first traversal and
-     are used by the second traversal:
-     header[i] - flag set if the block i is the header of a loop.
-     inner[i] - initially set. It is reset if the the block i is the header
-     of a non-inner loop.
-     max_hdr[i] - the header of the inner loop containing block i.
-     (for a block i not in an inner loop it may be -1 or the
-     header of the most inner loop containing the block).
-
-     These data structures are used by the first traversal only:
-     stack - non-recursive DFS implementation which uses a stack of edges.
-     sp - top of the stack of edges
-     dfs_nr[i] - the DFS ordering of block i.
-     in_stack[i] - flag set if the block i is in the DFS stack.
-
-     These data structures are used by the second traversal only:
-     queue - queue containing the blocks of the current region.
-     head and tail - queue boundaries.
-     in_queue[i] - flag set if the block i is in queue */
-
-  /* function's inner arrays allocation and initialization */
+  /* Note if an edge has been passed.  */
+  sbitmap passed;
+
+  /* Note if a block is a natural loop header.  */
+  sbitmap header;
+
+  /* Note if a block is an natural inner loop header.  */
+  sbitmap inner;
+
+  /* Note if a block is in the block queue. */
+  sbitmap in_queue;
+
+  /* Note if a block is in the block queue. */
+  sbitmap in_stack;
+
+  /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
+     and a mapping from block to its loop header (if the block is contained
+     in a loop, else -1).
+
+     Store results in HEADER, INNER, and MAX_HDR respectively, these will
+     be used as inputs to the second traversal.
+
+     STACK, SP and DFS_NR are only used during the first traversal.  */
+
+  /* Allocate and initialize variables for the first traversal.  */
    max_hdr = (int *) alloca (n_basic_blocks * sizeof (int));
    dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int));
    bzero ((char *) dfs_nr, n_basic_blocks * sizeof (int));
    stack = (int *) alloca (nr_edges * sizeof (int));
-  queue = (int *) alloca (n_basic_blocks * sizeof (int));
  
-  inner = (char *) alloca (n_basic_blocks * sizeof (char));
-  header = (char *) alloca (n_basic_blocks * sizeof (char));
-  bzero ((char *) header, n_basic_blocks * sizeof (char));
-  passed = (char *) alloca (nr_edges * sizeof (char));
-  bzero ((char *) passed, nr_edges * sizeof (char));
-  in_stack = (char *) alloca (nr_edges * sizeof (char));
-  bzero ((char *) in_stack, nr_edges * sizeof (char));
+  inner = sbitmap_alloc (n_basic_blocks);
+  sbitmap_ones (inner);
+
+  header = sbitmap_alloc (n_basic_blocks);
+  sbitmap_zero (header);
+
+  passed = sbitmap_alloc (nr_edges);
+  sbitmap_zero (passed);
+
+  in_queue = sbitmap_alloc (n_basic_blocks);
+  sbitmap_zero (in_queue);
  
-  in_queue = (char *) alloca (n_basic_blocks * sizeof (char));
+  in_stack = sbitmap_alloc (n_basic_blocks);
+  sbitmap_zero (in_stack);
  
    for (i = 0; i < n_basic_blocks; i++)
-    {
-      inner[i] = 1;
-      max_hdr[i] = -1;
-    }
+    max_hdr[i] = -1;
  
-  /* First traversal: DFS, finds inner loops in control flow graph */
+  /* DFS traversal to find inner loops in the cfg.  */
  
    sp = -1;
    while (1)
      {
-      if (current_edge == 0 || passed[current_edge])
+      if (current_edge == 0 || TEST_BIT (passed, current_edge))
         {
-         /*  Here, if  current_edge <  0, this is  a leaf  block.
-            Otherwise current_edge  was already passed.  Note that in
-            the latter case, not  only current_edge but also  all its
-            NEXT_OUT edges are also passed.   We have to "climb up on
-            edges in  the  stack", looking for the  first  (already
-            passed) edge whose NEXT_OUT was not passed yet.  */
-
-         while (sp >= 0 && (current_edge == 0 || passed[current_edge]))
+         /* We have reached a leaf node or a node that was already
+            processed.  Pop edges off the stack until we find
+            an edge that has not yet been processed.  */
+         while (sp >= 0
+                && (current_edge == 0 || TEST_BIT (passed, current_edge)))
             {
+             /* Pop entry off the stack.  */
               current_edge = stack[sp--];
               node = FROM_BLOCK (current_edge);
               child = TO_BLOCK (current_edge);
-             in_stack[child] = 0;
-             if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+             RESET_BIT (in_stack, child);
+             if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
                 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
               current_edge = NEXT_OUT (current_edge);
             }
  
-         /* stack empty - the whole graph is traversed.  */
-         if (sp < 0 && passed[current_edge])
+         /* See if have finished the DFS tree traversal.  */
+         if (sp < 0 && TEST_BIT (passed, current_edge))
             break;
+
+         /* Nope, continue the traversal with the popped node.  */
           continue;
         }
  
+      /* Process a node.  */
        node = FROM_BLOCK (current_edge);
-      dfs_nr[node] = ++count;
-      in_stack[node] = 1;
        child = TO_BLOCK (current_edge);
+      SET_BIT (in_stack, node);
+      dfs_nr[node] = ++count;
  
-      /* found a loop header */
-      if (in_stack[child])
+      /* If the successor is in the stack, then we've found a loop.
+        Mark the loop, if it is not a natural loop, then it will
+        be rejected during the second traversal.  */
+      if (TEST_BIT (in_stack, child))
         {
           no_loops = 0;
-         header[child] = 1;
-         max_hdr[child] = child;
+         SET_BIT (header, child);
           UPDATE_LOOP_RELATIONS (node, child);
-         passed[current_edge] = 1;
+         SET_BIT (passed, current_edge);
           current_edge = NEXT_OUT (current_edge);
           continue;
         }
  
-      /* the  child was already visited once, no need to go down from
-         it, everything is traversed there.  */
+      /* If the child was already visited, then there is no need to visit
+        it again.  Just update the loop relationships and restart
+        with a new edge.  */
        if (dfs_nr[child])
         {
-         if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+         if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
             UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
-         passed[current_edge] = 1;
+         SET_BIT (passed, current_edge);
           current_edge = NEXT_OUT (current_edge);
           continue;
         }
  
-      /* this is a step down in the dfs traversal */
+      /* Push an entry on the stack and continue DFS traversal.  */
        stack[++sp] = current_edge;
-      passed[current_edge] = 1;
+      SET_BIT (passed, current_edge);
        current_edge = OUT_EDGES (child);
-    }                          /* while (1); */
-
-  /* Second travsersal: find reducible inner loops, and sort
-     topologically the blocks of each region */
-  degree = dfs_nr;             /* reuse dfs_nr array - it is not needed anymore */
-  bzero ((char *) in_queue, n_basic_blocks * sizeof (char));
+    }
  
-  if (no_loops)
-    header[0] = 1;
+  /* Another check for unreachable blocks.  The earlier test in
+     is_cfg_nonregular only finds unreachable blocks that do not
+     form a loop.
  
-  /* compute the in-degree of every block in the graph */
+     The DFS traversal will mark every block that is reachable from
+     the entry node by placing a nonzero value in dfs_nr.  Thus if
+     dfs_nr is zero for any block, then it must be unreachable.  */
+  unreachable = 0;
    for (i = 0; i < n_basic_blocks; i++)
-    {
-      fst_edge = IN_EDGES (i);
-      if (fst_edge > 0)
-       {
-         degree[i] = 1;
-         current_edge = NEXT_IN (fst_edge);
-         while (fst_edge != current_edge)
-           {
-             ++degree[i];
-             current_edge = NEXT_IN (current_edge);
-           }
-       }
-      else
-       degree[i] = 0;
-    }
+    if (dfs_nr[i] == 0)
+      {
+       unreachable = 1;
+       break;
+      }
  
-  /* pass through all graph blocks, looking for headers of inner loops */
+  /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
+     to hold degree counts.  */
+  degree = dfs_nr;
+
+  /* Compute the in-degree of every block in the graph */
    for (i = 0; i < n_basic_blocks; i++)
+    degree[i] = num_preds[i];
+
+  /* Do not perform region scheduling if there are any unreachable
+     blocks.  */
+  if (!unreachable)
      {
+      if (no_loops)
+       SET_BIT (header, 0);
  
-      if (header[i] && inner[i])
-       {
+      /* Second travsersal:find reducible inner loops and topologically sort
+        block of each region.  */
  
-         /* i is a header of a potentially reducible inner loop, or
-            block 0 in a subroutine with no loops at all */
-         head = tail = -1;
-         too_large_failure = 0;
-         loop_head = max_hdr[i];
+      queue = (int *) alloca (n_basic_blocks * sizeof (int));
  
-         /* decrease in_degree of all i's successors, (this is needed
-            for the topological ordering) */
-         fst_edge = current_edge = OUT_EDGES (i);
-         if (fst_edge > 0)
+      /* Find blocks which are inner loop headers.  We still have non-reducible
+        loops to consider at this point.  */
+      for (i = 0; i < n_basic_blocks; i++)
+       {
+         if (TEST_BIT (header, i) && TEST_BIT (inner, i))
             {
-             do
+             int_list_ptr ps;
+             int j;
+
+             /* Now check that the loop is reducible.  We do this separate
+                from finding inner loops so that we do not find a reducible
+                loop which contains an inner  non-reducible loop.
+
+                A simple way to find reducible/natrual loops is to verify
+                that each block in the loop is dominated by the loop
+                header.
+
+                If there exists a block that is not dominated by the loop
+                header, then the block is reachable from outside the loop
+                and thus the loop is not a natural loop.  */
+             for (j = 0; j < n_basic_blocks; j++)      
                 {
-                 --degree[TO_BLOCK (current_edge)];
-                 current_edge = NEXT_OUT (current_edge);
+                 /* First identify blocks in the loop, except for the loop
+                    entry block.  */
+                 if (i == max_hdr[j] && i != j)
+                   {
+                     /* Now verify that the block is dominated by the loop
+                        header.  */
+                     if (!TEST_BIT (dom[j], i))
+                       break;
+                   }
                 }
-             while (fst_edge != current_edge);
-           }
  
-         /* estimate # insns, and count # blocks in the region.  */
-         num_bbs = 1;
-         num_insns = INSN_LUID (basic_block_end[i]) - INSN_LUID (basic_block_head[i]);
+             /* If we exited the loop early, then I is the header of a non
+                reducible loop and we should quit processing it now.  */
+             if (j != n_basic_blocks)
+               continue;
  
+             /* I is a header of an inner loop, or block 0 in a subroutine
+                with no loops at all.  */
+             head = tail = -1;
+             too_large_failure = 0;
+             loop_head = max_hdr[i];
+
+             /* Decrease degree of all I's successors for topological
+                ordering.  */
+             for (ps = s_succs[i]; ps; ps = ps->next)
+               if (INT_LIST_VAL (ps) != EXIT_BLOCK
+                   && INT_LIST_VAL (ps) != ENTRY_BLOCK)
+                 --degree[INT_LIST_VAL(ps)];
+
+             /* Estimate # insns, and count # blocks in the region.  */
+             num_bbs = 1;
+             num_insns = (INSN_LUID (basic_block_end[i])
+                          - INSN_LUID (basic_block_head[i]));
  
-         /* find all loop latches, if it is a true loop header, or
-            all leaves if the graph has no loops at all */
-         if (no_loops)
-           {
-             for (j = 0; j < n_basic_blocks; j++)
-               if (out_edges[j] == 0)  /* a leaf */
-                 {
-                   queue[++tail] = j;
-                   in_queue[j] = 1;
  
-                   if (too_large (j, &num_bbs, &num_insns))
+             /* Find all loop latches (blocks which back edges to the loop
+                header) or all the leaf blocks in the cfg has no loops.
+
+                Place those blocks into the queue.  */
+             if (no_loops)
+               {
+                 for (j = 0; j < n_basic_blocks; j++)
+                   /* Leaf nodes have only a single successor which must
+                      be EXIT_BLOCK.  */
+                   if (num_succs[j] == 1
+                       && INT_LIST_VAL (s_succs[j]) == EXIT_BLOCK)
                       {
-                       too_large_failure = 1;
-                       break;
+                       queue[++tail] = j;
+                       SET_BIT (in_queue, j);
+
+                       if (too_large (j, &num_bbs, &num_insns))
+                         {
+                           too_large_failure = 1;
+                           break;
+                         }
                       }
-                 }
-           }
-         else
-           {
-             fst_edge = current_edge = IN_EDGES (i);
-             do
+               }
+             else
                 {
-                 node = FROM_BLOCK (current_edge);
-                 if (max_hdr[node] == loop_head && node != i)  /* a latch */
+                 int_list_ptr ps;
+
+                 for (ps = s_preds[i]; ps; ps = ps->next)
                     {
-                     queue[++tail] = node;
-                     in_queue[node] = 1;
+                     node = INT_LIST_VAL (ps);
  
-                     if (too_large (node, &num_bbs, &num_insns))
+                     if (node == ENTRY_BLOCK || node == EXIT_BLOCK)
+                       continue;
+ 
+                     if (max_hdr[node] == loop_head && node != i)
                         {
-                         too_large_failure = 1;
-                         break;
+                         /* This is a loop latch.  */
+                         queue[++tail] = node;
+                         SET_BIT (in_queue, node);
+
+                         if (too_large (node, &num_bbs, &num_insns))
+                           {
+                             too_large_failure = 1;
+                             break;
+                           }
                         }
+                     
                     }
-                 current_edge = NEXT_IN (current_edge);
                 }
-             while (fst_edge != current_edge);
-           }
  
-         /* Put in queue[] all blocks that belong to the loop.  Check
-            that the loop is reducible, traversing back from the loop
-            latches up to the loop header.  */
-         while (head < tail && !too_large_failure)
-           {
-             child = queue[++head];
-             fst_edge = current_edge = IN_EDGES (child);
-             do
+             /* Now add all the blocks in the loop to the queue.
+
+            We know the loop is a natural loop; however the algorithm
+            above will not always mark certain blocks as being in the
+            loop.  Consider:
+               node   children
+                a        b,c
+                b        c
+                c        a,d
+                d        b
+
+
+            The algorithm in the DFS traversal may not mark B & D as part
+            of the loop (ie they will not have max_hdr set to A).
+
+            We know they can not be loop latches (else they would have
+            had max_hdr set since they'd have a backedge to a dominator
+            block).  So we don't need them on the initial queue.
+
+            We know they are part of the loop because they are dominated
+            by the loop header and can be reached by a backwards walk of
+            the edges starting with nodes on the initial queue.
+
+            It is safe and desirable to include those nodes in the
+            loop/scheduling region.  To do so we would need to decrease
+            the degree of a node if it is the target of a backedge
+            within the loop itself as the node is placed in the queue.
+
+            We do not do this because I'm not sure that the actual
+            scheduling code will properly handle this case. ?!? */
+       
+             while (head < tail && !too_large_failure)
                 {
-                 node = FROM_BLOCK (current_edge);
+                 int_list_ptr ps;
+                 child = queue[++head];
  
-                 if (max_hdr[node] != loop_head)
-                   {           /* another entry to loop, it is irreducible */
-                     tail = -1;
-                     break;
-                   }
-                 else if (!in_queue[node] && node != i)
+                 for (ps = s_preds[child]; ps; ps = ps->next)
                     {
-                     queue[++tail] = node;
-                     in_queue[node] = 1;
+                     node = INT_LIST_VAL (ps);
  
-                     if (too_large (node, &num_bbs, &num_insns))
+                     /* See discussion above about nodes not marked as in
+                        this loop during the initial DFS traversal.  */
+                     if (node == ENTRY_BLOCK || node == EXIT_BLOCK
+                         || max_hdr[node] != loop_head)
                         {
-                         too_large_failure = 1;
+                         tail = -1;
                           break;
                         }
+                     else if (!TEST_BIT (in_queue, node) && node != i)
+                       {
+                         queue[++tail] = node;
+                         SET_BIT (in_queue, node);
+
+                         if (too_large (node, &num_bbs, &num_insns))
+                           {
+                             too_large_failure = 1;
+                             break;
+                           }
+                       }
                     }
-                 current_edge = NEXT_IN (current_edge);
                 }
-             while (fst_edge != current_edge);
-           }
  
-         if (tail >= 0 && !too_large_failure)
-           {
-             /* Place the loop header into list of region blocks */
-             degree[i] = -1;
-             rgn_bb_table[idx] = i;
-             RGN_NR_BLOCKS (nr_regions) = num_bbs;
-             RGN_BLOCKS (nr_regions) = idx++;
-             CONTAINING_RGN (i) = nr_regions;
-             BLOCK_TO_BB (i) = count = 0;
-
-             /* remove blocks from queue[], (in topological order), when
-                their  in_degree becomes 0.  We  scan  the queue over and
-                over  again until   it is empty.   Note: there may be a more
-                efficient way to do it.  */
-             while (tail >= 0)
+             if (tail >= 0 && !too_large_failure)
                 {
-                 if (head < 0)
-                   head = tail;
-                 child = queue[head];
-                 if (degree[child] == 0)
+                 /* Place the loop header into list of region blocks.  */
+                 degree[i] = -1;
+                 rgn_bb_table[idx] = i;
+                 RGN_NR_BLOCKS (nr_regions) = num_bbs;
+                 RGN_BLOCKS (nr_regions) = idx++;
+                 CONTAINING_RGN (i) = nr_regions;
+                 BLOCK_TO_BB (i) = count = 0;
+
+                 /* Remove blocks from queue[] when their in degree becomes
+                zero.  Repeat until no blocks are left on the list.  This
+                produces a topological list of blocks in the region.  */
+                 while (tail >= 0)
                     {
-                     degree[child] = -1;
-                     rgn_bb_table[idx++] = child;
-                     BLOCK_TO_BB (child) = ++count;
-                     CONTAINING_RGN (child) = nr_regions;
-                     queue[head] = queue[tail--];
-                     fst_edge = current_edge = OUT_EDGES (child);
-
-                     if (fst_edge > 0)
+                     int_list_ptr ps;
+
+                     if (head < 0)
+                       head = tail;
+                     child = queue[head];
+                     if (degree[child] == 0)
                         {
-                         do
-                           {
-                             --degree[TO_BLOCK (current_edge)];
-                             current_edge = NEXT_OUT (current_edge);
-                           }
-                         while (fst_edge != current_edge);
+                         degree[child] = -1;
+                         rgn_bb_table[idx++] = child;
+                         BLOCK_TO_BB (child) = ++count;
+                         CONTAINING_RGN (child) = nr_regions;
+                         queue[head] = queue[tail--];
+
+                         for (ps = s_succs[child]; ps; ps = ps->next)
+                           if (INT_LIST_VAL (ps) != ENTRY_BLOCK
+                               && INT_LIST_VAL (ps) != EXIT_BLOCK)
+                             --degree[INT_LIST_VAL (ps)];
                         }
+                     else
+                       --head;
                     }
-                 else
-                   --head;
+                 ++nr_regions;
                 }
-             ++nr_regions;
             }
         }
      }
  
-  /* define each of all other blocks as a region itself */
+  /* Any block that did not end up in a region is placed into a region
+     by itself.  */
    for (i = 0; i < n_basic_blocks; i++)
      if (degree[i] >= 0)
        {
@@ -1817,7 +1851,12 @@ find_rgns ()
         BLOCK_TO_BB (i) = 0;
        }
  
-}                              /* find_rgns */
+  free (passed);
+  free (header);
+  free (inner);
+  free (in_queue);
+  free (in_stack);
+}
  
  
  /* functions for regions scheduling information */
@@ -2707,7 +2746,7 @@ is_exception_free (insn, bb_src, bb_trg)
  /* Return the INSN_LIST containing INSN in LIST, or NULL
     if LIST does not contain INSN.  */
  
-__inline static rtx
+HAIFA_INLINE static rtx
  find_insn_list (insn, list)
       rtx insn;
       rtx list;
@@ -2724,7 +2763,7 @@ find_insn_list (insn, list)
  
  /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0 otherwise.  */
  
-__inline static char
+HAIFA_INLINE static char
  find_insn_mem_list (insn, x, list, list1)
       rtx insn, x;
       rtx list, list1;
@@ -2747,7 +2786,7 @@ find_insn_mem_list (insn, x, list, list1)
     mask if the value is negative.  A function unit index is the
     non-negative encoding.  */
  
-__inline static int
+HAIFA_INLINE static int
  insn_unit (insn)
       rtx insn;
  {
@@ -2784,7 +2823,7 @@ insn_unit (insn)
     These values are encoded in an int where the upper half gives the
     minimum value and the lower half gives the maximum value.  */
  
-__inline static unsigned int
+HAIFA_INLINE static unsigned int
  blockage_range (unit, insn)
       int unit;
       rtx insn;
@@ -2831,7 +2870,7 @@ clear_units ()
  
  /* Return the issue-delay of an insn */
  
-__inline static int
+HAIFA_INLINE static int
  insn_issue_delay (insn)
       rtx insn;
  {
@@ -2861,7 +2900,7 @@ insn_issue_delay (insn)
     instance INSTANCE at time CLOCK if the previous actual hazard cost
     was COST.  */
  
-__inline static int
+HAIFA_INLINE static int
  actual_hazard_this_instance (unit, instance, insn, clock, cost)
       int unit, instance, clock, cost;
       rtx insn;
@@ -2898,7 +2937,7 @@ actual_hazard_this_instance (unit, instance, insn, clock, cost)
  /* Record INSN as having begun execution on the units encoded by UNIT at
     time CLOCK.  */
  
-__inline static void
+HAIFA_INLINE static void
  schedule_unit (unit, insn, clock)
       int unit, clock;
       rtx insn;
@@ -2930,7 +2969,7 @@ schedule_unit (unit, insn, clock)
  /* Return the actual hazard cost of executing INSN on the units encoded by
     UNIT at time CLOCK if the previous actual hazard cost was COST.  */
  
-__inline static int
+HAIFA_INLINE static int
  actual_hazard (unit, insn, clock, cost)
       int unit, clock, cost;
       rtx insn;
@@ -2979,7 +3018,7 @@ actual_hazard (unit, insn, clock, cost)
     to be used is chosen in preference to one with a unit that is less
     used.  We are trying to minimize a subsequent actual hazard.  */
  
-__inline static int
+HAIFA_INLINE static int
  potential_hazard (unit, insn, cost)
       int unit, cost;
       rtx insn;
@@ -3024,7 +3063,7 @@ potential_hazard (unit, insn, cost)
     This is the number of cycles between instruction issue and
     instruction results.  */
  
-__inline static int
+HAIFA_INLINE static int
  insn_cost (insn, link, used)
       rtx insn, link, used;
  {
@@ -3862,6 +3901,8 @@ sched_analyze (head, tail)
                    || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
                    || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
                    || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
+                  || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_START
+                  || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END
                    || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
                        && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
         {
@@ -3989,7 +4030,7 @@ rank_for_schedule (x, y)
    rtx tmp = *(rtx *)y;
    rtx tmp2 = *(rtx *)x;
    rtx link;
-  int tmp_class, tmp2_class;
+  int tmp_class, tmp2_class, depend_count1, depend_count2;
    int val, priority_val, spec_val, prob_val, weight_val;
  
  
@@ -4050,6 +4091,21 @@ rank_for_schedule (x, y)
         return val;
      }
  
+  /* Prefer the insn which has more later insns that depend on it. 
+     This gives the scheduler more freedom when scheduling later
+     instructions at the expense of added register pressure.  */
+  depend_count1 = 0;
+  for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
+    depend_count1++;
+
+  depend_count2 = 0;
+  for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
+    depend_count2++;
+
+  val = depend_count2 - depend_count1;
+  if (val)
+    return val;
+  
    /* If insns are equally good, sort by INSN_LUID (original insn order),
       so that we make the sort stable.  This minimizes instruction movement,
       thus minimizing sched's effect on debugging and cross-jumping.  */
@@ -4058,7 +4114,7 @@ rank_for_schedule (x, y)
  
  /* Resort the array A in which only element at index N may be out of order.  */
  
-__inline static void
+HAIFA_INLINE static void
  swap_sort (a, n)
       rtx *a;
       int n;
@@ -4080,7 +4136,7 @@ static int max_priority;
     N_CYCLES after the currently executing insn.  Preserve insns
     chain for debugging purposes.  */
  
-__inline static void
+HAIFA_INLINE static void
  queue_insn (insn, n_cycles)
       rtx insn;
       int n_cycles;
@@ -4105,7 +4161,7 @@ queue_insn (insn, n_cycles)
  /* Return nonzero if PAT is the pattern of an insn which makes a
     register live.  */
  
-__inline static int
+HAIFA_INLINE static int
  birthing_insn_p (pat)
       rtx pat;
  {
@@ -4141,7 +4197,7 @@ birthing_insn_p (pat)
  /* PREV is an insn that is ready to execute.  Adjust its priority if that
     will help shorten register lifetimes.  */
  
-__inline static void
+HAIFA_INLINE static void
  adjust_priority (prev)
       rtx prev;
  {
@@ -4619,6 +4675,8 @@ unlink_other_notes (insn, tail)
        if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
           && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
           && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
+         && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_START
+         && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
           && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
           && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
         {
@@ -4668,7 +4726,7 @@ unlink_line_notes (insn, tail)
  
  /* Return the head and tail pointers of BB.  */
  
-__inline static void
+HAIFA_INLINE static void
  get_block_head_tail (bb, headp, tailp)
       int bb;
       rtx *headp;
@@ -5575,6 +5633,28 @@ init_block_visualization ()
  
  #define BUF_LEN 256
  
+static char *
+safe_concat (buf, cur, str)
+     char *buf;
+     char *cur;
+     char *str;
+{
+  char *end = buf + BUF_LEN - 2;       /* leave room for null */
+  int c;
+
+  if (cur > end)
+    {
+      *end = '\0';
+      return end;
+    }
+
+  while (cur < end && (c = *str++) != '\0')
+    *cur++ = c;
+
+  *cur = '\0';
+  return cur;
+}
+
  /* This recognizes rtx, I classified as expressions. These are always */
  /* represent some action on values or results of other expression, */
  /* that may be stored in objects representing values.  */
@@ -5585,332 +5665,335 @@ print_exp (buf, x, verbose)
       rtx x;
       int verbose;
  {
-  char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
+  char tmp[BUF_LEN];
+  char *st[4];
+  char *cur = buf;
+  char *fun = (char *)0;
+  char *sep;
+  rtx op[4];
+  int i;
+
+  for (i = 0; i < 4; i++)
+    {
+      st[i] = (char *)0;
+      op[i] = NULL_RTX;
+    }
  
    switch (GET_CODE (x))
      {
      case PLUS:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s+%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "+";
+      op[1] = XEXP (x, 1);
        break;
      case LO_SUM:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%sl+%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "+low(";
+      op[1] = XEXP (x, 1);
+      st[2] = ")";
        break;
      case MINUS:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s-%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "-";
+      op[1] = XEXP (x, 1);
        break;
      case COMPARE:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s??%s", t1, t2);
+      fun = "cmp";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case NEG:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "-%s", t1);
+      st[0] = "-";
+      op[0] = XEXP (x, 0);
        break;
      case MULT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s*%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "*";
+      op[1] = XEXP (x, 1);
        break;
      case DIV:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s/%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "/";
+      op[1] = XEXP (x, 1);
        break;
      case UDIV:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%su/%s", t1, t2);
+      fun = "udiv";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case MOD:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s%%%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "%";
+      op[1] = XEXP (x, 1);
        break;
      case UMOD:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%su%%%s", t1, t2);
+      fun = "umod";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case SMIN:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "smin (%s, %s)", t1, t2);
+      fun = "smin";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case SMAX:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "smax(%s,%s)", t1, t2);
+      fun = "smax";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case UMIN:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "umin (%s, %s)", t1, t2);
+      fun = "umin";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case UMAX:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "umax(%s,%s)", t1, t2);
+      fun = "umax";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case NOT:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "!%s", t1);
+      st[0] = "!";
+      op[0] = XEXP (x, 0);
        break;
      case AND:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s&%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "&";
+      op[1] = XEXP (x, 1);
        break;
      case IOR:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s|%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "|";
+      op[1] = XEXP (x, 1);
        break;
      case XOR:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s^%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "^";
+      op[1] = XEXP (x, 1);
        break;
      case ASHIFT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<<%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "<<";
+      op[1] = XEXP (x, 1);
        break;
      case LSHIFTRT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s0>%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = " 0>>";
+      op[1] = XEXP (x, 1);
        break;
      case ASHIFTRT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>>%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = ">>";
+      op[1] = XEXP (x, 1);
        break;
      case ROTATE:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<-<%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "<-<";
+      op[1] = XEXP (x, 1);
        break;
      case ROTATERT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>->%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = ">->";
+      op[1] = XEXP (x, 1);
        break;
      case ABS:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "abs(%s)", t1);
+      fun = "abs";
+      op[0] = XEXP (x, 0);
        break;
      case SQRT:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "sqrt(%s)", t1);
+      fun = "sqrt";
+      op[0] = XEXP (x, 0);
        break;
      case FFS:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "ffs(%s)", t1);
+      fun = "ffs";
+      op[0] = XEXP (x, 0);
        break;
      case EQ:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s == %s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "==";
+      op[1] = XEXP (x, 1);
        break;
      case NE:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s!=%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "!=";
+      op[1] = XEXP (x, 1);
        break;
      case GT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = ">";
+      op[1] = XEXP (x, 1);
        break;
      case GTU:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>u%s", t1, t2);
+      fun = "gtu";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case LT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "<";
+      op[1] = XEXP (x, 1);
        break;
      case LTU:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<u%s", t1, t2);
+      fun = "ltu";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case GE:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>=%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = ">=";
+      op[1] = XEXP (x, 1);
        break;
      case GEU:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s>=u%s", t1, t2);
+      fun = "geu";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case LE:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<=%s", t1, t2);
+      op[0] = XEXP (x, 0);
+      st[1] = "<=";
+      op[1] = XEXP (x, 1);
        break;
      case LEU:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      sprintf (buf, "%s<=u%s", t1, t2);
+      fun = "leu";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
        break;
      case SIGN_EXTRACT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      print_value (t3, XEXP (x, 2), verbose);
-      if (verbose)
-       sprintf (buf, "sign_extract(%s,%s,%s)", t1, t2, t3);
-      else
-       sprintf (buf, "sxt(%s,%s,%s)", t1, t2, t3);
+      fun = (verbose) ? "sign_extract" : "sxt";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
+      op[2] = XEXP (x, 2);
        break;
      case ZERO_EXTRACT:
-      print_value (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      print_value (t3, XEXP (x, 2), verbose);
-      if (verbose)
-       sprintf (buf, "zero_extract(%s,%s,%s)", t1, t2, t3);
-      else
-       sprintf (buf, "zxt(%s,%s,%s)", t1, t2, t3);
+      fun = (verbose) ? "zero_extract" : "zxt";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
+      op[2] = XEXP (x, 2);
        break;
      case SIGN_EXTEND:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "sign_extend(%s)", t1);
-      else
-       sprintf (buf, "sxn(%s)", t1);
+      fun = (verbose) ? "sign_extend" : "sxn";
+      op[0] = XEXP (x, 0);
        break;
      case ZERO_EXTEND:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "zero_extend(%s)", t1);
-      else
-       sprintf (buf, "zxn(%s)", t1);
+      fun = (verbose) ? "zero_extend" : "zxn";
+      op[0] = XEXP (x, 0);
        break;
      case FLOAT_EXTEND:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "float_extend(%s)", t1);
-      else
-       sprintf (buf, "fxn(%s)", t1);
+      fun = (verbose) ? "float_extend" : "fxn";
+      op[0] = XEXP (x, 0);
        break;
      case TRUNCATE:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "trunc(%s)", t1);
-      else
-       sprintf (buf, "trn(%s)", t1);
+      fun = (verbose) ? "trunc" : "trn";
+      op[0] = XEXP (x, 0);
        break;
      case FLOAT_TRUNCATE:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "float_trunc(%s)", t1);
-      else
-       sprintf (buf, "ftr(%s)", t1);
+      fun = (verbose) ? "float_trunc" : "ftr";
+      op[0] = XEXP (x, 0);
        break;
      case FLOAT:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "float(%s)", t1);
-      else
-       sprintf (buf, "flt(%s)", t1);
+      fun = (verbose) ? "float" : "flt";
+      op[0] = XEXP (x, 0);
        break;
      case UNSIGNED_FLOAT:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "uns_float(%s)", t1);
-      else
-       sprintf (buf, "ufl(%s)", t1);
+      fun = (verbose) ? "uns_float" : "ufl";
+      op[0] = XEXP (x, 0);
        break;
      case FIX:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "fix(%s)", t1);
+      fun = "fix";
+      op[0] = XEXP (x, 0);
        break;
      case UNSIGNED_FIX:
-      print_value (t1, XEXP (x, 0), verbose);
-      if (verbose)
-       sprintf (buf, "uns_fix(%s)", t1);
-      else
-       sprintf (buf, "ufx(%s)", t1);
+      fun = (verbose) ? "uns_fix" : "ufx";
+      op[0] = XEXP (x, 0);
        break;
      case PRE_DEC:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "--%s", t1);
+      st[0] = "--";
+      op[0] = XEXP (x, 0);
        break;
      case PRE_INC:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "++%s", t1);
+      st[0] = "++";
+      op[0] = XEXP (x, 0);
        break;
      case POST_DEC:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "%s--", t1);
+      op[0] = XEXP (x, 0);
+      st[1] = "--";
        break;
      case POST_INC:
-      print_value (t1, XEXP (x, 0), verbose);
-      sprintf (buf, "%s++", t1);
+      op[0] = XEXP (x, 0);
+      st[1] = "++";
        break;
      case CALL:
-      print_value (t1, XEXP (x, 0), verbose);
+      st[0] = "call ";
+      op[0] = XEXP (x, 0);
        if (verbose)
         {
-         print_value (t2, XEXP (x, 1), verbose);
-         sprintf (buf, "call %s argc:%s", t1, t2);
+         st[1] = " argc:";
+         op[1] = XEXP (x, 1);
         }
-      else
-       sprintf (buf, "call %s", t1);
        break;
      case IF_THEN_ELSE:
-      print_exp (t1, XEXP (x, 0), verbose);
-      print_value (t2, XEXP (x, 1), verbose);
-      print_value (t3, XEXP (x, 2), verbose);
-      sprintf (buf, "{(%s)?%s:%s}", t1, t2, t3);
+      st[0] = "{(";
+      op[0] = XEXP (x, 0);
+      st[1] = ")?";
+      op[1] = XEXP (x, 1);
+      st[2] = ":";
+      op[2] = XEXP (x, 2);
+      st[3] = "}";
        break;
      case TRAP_IF:
-      print_value (t1, TRAP_CONDITION (x), verbose);
-      sprintf (buf, "trap_if %s", t1);
+      fun = "trap_if";
+      op[0] = TRAP_CONDITION (x);
        break;
      case UNSPEC:
-      {
-       int i;
-
-       sprintf (t1, "unspec{");
-       for (i = 0; i < XVECLEN (x, 0); i++)
-         {
-           print_pattern (t2, XVECEXP (x, 0, i), verbose);
-           sprintf (t3, "%s%s;", t1, t2);
-           strcpy (t1, t3);
-         }
-       sprintf (buf, "%s}", t1);
-      }
-      break;
      case UNSPEC_VOLATILE:
        {
-       int i;
-
-       sprintf (t1, "unspec/v{");
+       cur = safe_concat (buf, cur, "unspec");
+       if (GET_CODE (x) == UNSPEC_VOLATILE)
+         cur = safe_concat (buf, cur, "/v");
+       cur = safe_concat (buf, cur, "[");
+       sep = "";
         for (i = 0; i < XVECLEN (x, 0); i++)
           {
-           print_pattern (t2, XVECEXP (x, 0, i), verbose);
-           sprintf (t3, "%s%s;", t1, t2);
-           strcpy (t1, t3);
+           print_pattern (tmp, XVECEXP (x, 0, i), verbose);
+           cur = safe_concat (buf, cur, sep);
+           cur = safe_concat (buf, cur, tmp);
+           sep = ",";
           }
-       sprintf (buf, "%s}", t1);
+       cur = safe_concat (buf, cur, "] ");
+       sprintf (tmp, "%d", XINT (x, 1));
+       cur = safe_concat (buf, cur, tmp);
        }
        break;
      default:
-/*    if (verbose) debug_rtx (x); else sprintf (buf, "$$$"); */
-      sprintf (buf, "$$$");
+      /* if (verbose) debug_rtx (x); */
+      st[0] = GET_RTX_NAME (GET_CODE (x));
+      break;
      }
-}                              /* print_exp */
+
+  /* Print this as a function? */
+  if (fun)
+    {
+      cur = safe_concat (buf, cur, fun);
+      cur = safe_concat (buf, cur, "(");
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      if (st[i])
+       cur = safe_concat (buf, cur, st[i]);
+
+      if (op[i])
+       {
+         if (fun && i != 0)
+           cur = safe_concat (buf, cur, ",");
+
+         print_value (tmp, op[i], verbose);
+         cur = safe_concat (buf, cur, tmp);
+       }
+    }
+
+  if (fun)
+    cur = safe_concat (buf, cur, ")");
+}              /* print_exp */
  
  /* Prints rtxes, i customly classified as values. They're constants, */
  /* registers, labels, symbols and memory accesses.  */
@@ -5922,60 +6005,84 @@ print_value (buf, x, verbose)
       int verbose;
  {
    char t[BUF_LEN];
+  char *cur = buf;
  
    switch (GET_CODE (x))
      {
      case CONST_INT:
-      sprintf (buf, "%Xh", INTVAL (x));
+      sprintf (t, "0x%lx", (long)INTVAL (x));
+      cur = safe_concat (buf, cur, t);
        break;
      case CONST_DOUBLE:
-      print_value (t, XEXP (x, 0), verbose);
-      sprintf (buf, "<%s>", t);
+      sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
+      cur = safe_concat (buf, cur, t);
        break;
      case CONST_STRING:
-      sprintf (buf, "\"%s\"", (char *) XEXP (x, 0));
+      cur = safe_concat (buf, cur, "\"");
+      cur = safe_concat (buf, cur, XSTR (x, 0));
+      cur = safe_concat (buf, cur, "\"");
        break;
      case SYMBOL_REF:
-      sprintf (buf, "`%s'", (char *) XEXP (x, 0));
+      cur = safe_concat (buf, cur, "`");
+      cur = safe_concat (buf, cur, XSTR (x, 0));
+      cur = safe_concat (buf, cur, "'");
        break;
      case LABEL_REF:
-      sprintf (buf, "L%d", INSN_UID (XEXP (x, 0)));
+      sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
+      cur = safe_concat (buf, cur, t);
        break;
      case CONST:
-      print_value (buf, XEXP (x, 0), verbose);
+      print_value (t, XEXP (x, 0), verbose);
+      cur = safe_concat (buf, cur, "const(");
+      cur = safe_concat (buf, cur, t);
+      cur = safe_concat (buf, cur, ")");
        break;
      case HIGH:
-      print_value (buf, XEXP (x, 0), verbose);
+      print_value (t, XEXP (x, 0), verbose);
+      cur = safe_concat (buf, cur, "high(");
+      cur = safe_concat (buf, cur, t);
+      cur = safe_concat (buf, cur, ")");
        break;
      case REG:
-      if (GET_MODE (x) == SFmode
-         || GET_MODE (x) == DFmode
-         || GET_MODE (x) == XFmode
-         || GET_MODE (x) == TFmode)
-       strcpy (t, "fr");
+      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
+       {
+         int c = reg_names[ REGNO (x) ][0];
+         if (c >= '0' && c <= '9')
+           cur = safe_concat (buf, cur, "%");
+
+         cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
+       }
        else
-       strcpy (t, "r");
-      sprintf (buf, "%s%d", t, REGNO (x));
+       {
+         sprintf (t, "r%d", REGNO (x));
+         cur = safe_concat (buf, cur, t);
+       }
        break;
      case SUBREG:
-      print_value (t, XEXP (x, 0), verbose);
-      sprintf (buf, "%s#%d", t, SUBREG_WORD (x));
+      print_value (t, SUBREG_REG (x), verbose);
+      cur = safe_concat (buf, cur, t);
+      sprintf (t, "#%d", SUBREG_WORD (x));
+      cur = safe_concat (buf, cur, t);
        break;
      case SCRATCH:
-      sprintf (buf, "scratch");
+      cur = safe_concat (buf, cur, "scratch");
        break;
      case CC0:
-      sprintf (buf, "cc0");
+      cur = safe_concat (buf, cur, "cc0");
        break;
      case PC:
-      sprintf (buf, "pc");
+      cur = safe_concat (buf, cur, "pc");
        break;
      case MEM:
        print_value (t, XEXP (x, 0), verbose);
-      sprintf (buf, "[%s]", t);
+      cur = safe_concat (buf, cur, "[");
+      cur = safe_concat (buf, cur, t);
+      cur = safe_concat (buf, cur, "]");
        break;
      default:
-      print_exp (buf, x, verbose);
+      print_exp (t, x, verbose);
+      cur = safe_concat (buf, cur, t);
+      break;
      }
  }                              /* print_value */
  
@@ -6501,8 +6608,6 @@ schedule_block (bb, rgn_n_insns)
                INSN_UID (basic_block_end[b]),
                (reload_completed ? "after" : "before"));
        fprintf (dump, ";;   ======================================================\n");
-      if (sched_debug_count >= 0)
-       fprintf (dump, ";;\t -- sched_debug_count=%d\n", sched_debug_count);
        fprintf (dump, "\n");
  
        visual_tbl = (char *) alloca (get_visual_tbl_length ());
@@ -6615,7 +6720,7 @@ schedule_block (bb, rgn_n_insns)
  
    if (sched_verbose >= 2)
      {
-      fprintf (dump, ";;\t\tReady list initially:  ");
+      fprintf (dump, ";;\t\tReady list initially:             ");
        debug_ready_list (ready, n_ready);
      }
  
@@ -6639,10 +6744,6 @@ schedule_block (bb, rgn_n_insns)
      {
        int b1;
  
-#ifdef INTERBLOCK_DEBUG
-      if (sched_debug_count == 0)
-       break;
-#endif
        clock_var++;
  
        /* Add to the ready list all pending insns that can be issued now.
@@ -6665,7 +6766,7 @@ schedule_block (bb, rgn_n_insns)
  
        if (sched_verbose)
         {
-         fprintf (dump, ";;\tReady list (t =%3d):  ", clock_var);
+         fprintf (dump, "\n;;\tReady list (t =%3d):  ", clock_var);
           debug_ready_list (ready, n_ready);
         }
  
@@ -6685,11 +6786,6 @@ schedule_block (bb, rgn_n_insns)
             }
           else if (cost == 0)
             {
-#ifdef INTERBLOCK_DEBUG
-             if (sched_debug_count == 0)
-               break;
-#endif
-
               /* an interblock motion? */
               if (INSN_BB (insn) != target_bb)
                 {
@@ -6758,11 +6854,6 @@ schedule_block (bb, rgn_n_insns)
  
               can_issue_more--;
  
-#ifdef INTERBLOCK_DEBUG
-             if (sched_debug_count > 0)
-               sched_debug_count--;
-#endif
-
               n_ready = schedule_insn (insn, ready, n_ready, clock_var);
  
               /* remove insn from ready list */
@@ -6778,10 +6869,6 @@ schedule_block (bb, rgn_n_insns)
        if (sched_verbose)
         {
           visualize_scheduled_insns (b, clock_var);
-#ifdef INTERBLOCK_DEBUG
-         if (sched_debug_count == 0)
-           fprintf (dump, "........   sched_debug_count == 0  .................\n");
-#endif
         }
      }
  
@@ -6794,25 +6881,15 @@ schedule_block (bb, rgn_n_insns)
      }
  
    /* Sanity check -- queue must be empty now.  Meaningless if region has
-     multiple bbs, or if scheduling stopped by sched_debug_count.  */
+     multiple bbs.  */
    if (current_nr_blocks > 1)
-#ifdef INTERBLOCK_DEBUG
-    if (sched_debug_count != 0)
-#endif
-      if (!flag_schedule_interblock && q_size != 0)
-       abort ();
+    if (!flag_schedule_interblock && q_size != 0)
+      abort ();
  
    /* update head/tail boundaries.  */
    head = NEXT_INSN (prev_head);
    tail = last;
  
-#ifdef INTERBLOCK_DEBUG
-  if (sched_debug_count == 0)
-    /* compensate for stopping scheduling prematurely */
-    for (i = sched_target_n_insns; i < target_n_insns; i++)
-      tail = move_insn (group_leader (NEXT_INSN (tail)), tail);
-#endif
-
    /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
       previously found among the insns.  Insert them at the beginning
       of the insns.  */
@@ -7497,12 +7574,9 @@ schedule_region (rgn)
  #endif
      }
  
-#ifdef INTERBLOCK_DEBUG
-  if (sched_debug_count != 0)
-#endif
-    /* sanity check: verify that all region insns were scheduled */
-    if (sched_rgn_n_insns != rgn_n_insns)
-      abort ();
+  /* sanity check: verify that all region insns were scheduled */
+  if (sched_rgn_n_insns != rgn_n_insns)
+    abort ();
  
    /* update register life and usage information */
    if (reload_completed == 0)
@@ -7632,6 +7706,13 @@ new_insn_dead_notes (pat, insn, last, orig_insn)
  
    if (GET_CODE (dest) == REG)
      {
+      /* If the original insn already used this register, we may not add new
+         notes for it.  One example for a split that needs this test is
+        when a multi-word memory access with register-indirect addressing
+        is split into multiple memory accesses with auto-increment and
+        one adjusting add instruction for the address register.  */
+      if (reg_referenced_p (dest, PATTERN (orig_insn)))
+       return;
        for (tem = last; tem != insn; tem = PREV_INSN (tem))
         {
           if (GET_RTX_CLASS (GET_CODE (tem)) == 'i'
@@ -8234,8 +8315,7 @@ split_block_insns (b)
  
    for (insn = basic_block_head[b];; insn = next)
      {
-      rtx prev;
-      rtx set;
+      rtx set, last, first, notes;
  
        /* Can't use `next_real_insn' because that
           might go across CODE_LABELS and short-out basic blocks.  */
@@ -8272,31 +8352,24 @@ split_block_insns (b)
         }
  
        /* Split insns here to get max fine-grain parallelism.  */
-      prev = PREV_INSN (insn);
-      /* It is probably not worthwhile to try to split again in
-        the second pass.  However, if flag_schedule_insns is not set,
-        the first and only (if any) scheduling pass is after reload.  */
-      if (reload_completed == 0 || ! flag_schedule_insns)
+      first = PREV_INSN (insn);
+      notes = REG_NOTES (insn);
+      last = try_split (PATTERN (insn), insn, 1);
+      if (last != insn)
         {
-         rtx last, first = PREV_INSN (insn);
-         rtx notes = REG_NOTES (insn);
-         last = try_split (PATTERN (insn), insn, 1);
-         if (last != insn)
+         /* try_split returns the NOTE that INSN became.  */
+         first = NEXT_INSN (first);
+         update_flow_info (notes, first, last, insn);
+
+         PUT_CODE (insn, NOTE);
+         NOTE_SOURCE_FILE (insn) = 0;
+         NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+         if (insn == basic_block_head[b])
+           basic_block_head[b] = first;
+         if (insn == basic_block_end[b])
             {
-             /* try_split returns the NOTE that INSN became.  */
-             first = NEXT_INSN (first);
-             update_flow_info (notes, first, last, insn);
-
-             PUT_CODE (insn, NOTE);
-             NOTE_SOURCE_FILE (insn) = 0;
-             NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
-             if (insn == basic_block_head[b])
-               basic_block_head[b] = first;
-             if (insn == basic_block_end[b])
-               {
-                 basic_block_end[b] = last;
-                 break;
-               }
+             basic_block_end[b] = last;
+             break;
             }
         }
  
@@ -8441,20 +8514,53 @@ schedule_insns (dump_file)
         }
        else
         {
+         int_list_ptr *s_preds, *s_succs;
+         int *num_preds, *num_succs;
+         sbitmap *dom, *pdom;
+
+         s_preds = (int_list_ptr *) alloca (n_basic_blocks
+                                            * sizeof (int_list_ptr));
+         s_succs = (int_list_ptr *) alloca (n_basic_blocks
+                                            * sizeof (int_list_ptr));
+         num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
+         num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
+         dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
+         pdom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
+
+         /* The scheduler runs after flow; therefore, we can't blindly call
+            back into find_basic_blocks since doing so could invalidate the
+            info in basic_block_live_at_start.
+
+            Consider a block consisting entirely of dead stores; after life
+            analysis it would be a block of NOTE_INSN_DELETED notes.  If
+            we call find_basic_blocks again, then the block would be removed
+            entirely and invalidate our the register live information.
+
+            We could (should?) recompute register live information.  Doing
+            so may even be beneficial.  */
+
+         compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
+
+         /* Compute the dominators and post dominators.  We don't currently use
+            post dominators, but we should for speculative motion analysis.  */
+         compute_dominators (dom, pdom, s_preds, s_succs);
+
           /* build_control_flow will return nonzero if it detects unreachable
              blocks or any other irregularity with the cfg which prevents
              cross block scheduling.  */
-         if (build_control_flow () != 0)
+         if (build_control_flow (s_preds, s_succs, num_preds, num_succs) != 0)
             find_single_block_region ();
           else
-           find_rgns ();
+           find_rgns (s_preds, s_succs, num_preds, num_succs, dom);
  
           if (sched_verbose >= 3)
-           {
-             debug_control_flow ();
-             debug_regions ();
-           }
+           debug_regions ();
  
+         /* For now.  This will move as more and more of haifa is converted
+            to using the cfg code in flow.c  */
+         free_bb_mem ();
+         free (dom);
+         free (pdom);
         }
      }