2000-07-11 Zack Weinberg <zack@wolery.cumb.org>

[pf3gnuchains/gcc-fork.git] / gcc / ssa.c
diff --git a/gcc/ssa.c b/gcc/ssa.c

index 8a36bbd..e4bdc9b 100644 (file)
--- a/gcc/ssa.c
+++ b/gcc/ssa.c
@@ -1,22 +1,22 @@
  /* Static Single Assignment conversion routines for the GNU compiler.
     Copyright (C) 2000 Free Software Foundation, Inc.
  
-   This file is part of GNU CC.
+This file is part of GNU CC.
  
-   GNU CC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+GNU CC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
  
-   GNU CC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+GNU CC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
  
-   You should have received a copy of the GNU General Public License
-   along with GNU CC; see the file COPYING.  If not, write to
-   the Free Software Foundation, 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
  
  /* References:
  
@@ -48,6 +48,13 @@
  
  /* TODO: 
  
+   Handle subregs better, maybe.  For now, if a reg that's set in a
+   subreg expression is duplicated going into SSA form, an extra copy
+   is inserted first that copies the entire reg into the duplicate, so
+   that the other bits are preserved.  This isn't strictly SSA, since
+   at least part of the reg is assigned in more than one place (though
+   they are adjacent).
+
     ??? What to do about strict_low_part.  Probably I'll have to split
     them out of their current instructions first thing.
  
@@ -55,9 +62,18 @@
     in which the RTL encodes exactly what we want, without exposing a
     lot of niggling processor details.  At some later point we lower
     the representation, calling back into optabs to finish any necessary
-   expansion.
-*/
+   expansion.  */
+
  
+/* If conservative_reg_partition is non-zero, use a conservative
+   register partitioning algorithm (which leaves more regs after
+   emerging from SSA) instead of the coalescing one.  This is being
+   left in for a limited time only, as a debugging tool until the
+   coalescing algorithm is validated.  */
+static int conservative_reg_partition;
+
+/* This flag is set when the CFG is in SSA form.  */
+int in_ssa_form = 0;
  
  /* Element I is the single instruction that sets register I+PSEUDO.  */
  varray_type ssa_definition;
@@ -73,10 +89,12 @@ varray_type ssa_rename_from;
  static rtx *ssa_rename_to;
  
  /* The number of registers that were live on entry to the SSA routines.  */
-static int ssa_max_reg_num;
+static unsigned int ssa_max_reg_num;
  
  /* Local function prototypes.  */
  
+struct rename_context;
+
  static inline rtx * phi_alternative
    PARAMS ((rtx, int));
  
@@ -98,6 +116,10 @@ static void insert_phi_node
    PARAMS ((int regno, int b));
  static void insert_phi_nodes
    PARAMS ((sbitmap *idfs, sbitmap *evals, int nregs));
+static void create_delayed_rename 
+  PARAMS ((struct rename_context *, rtx *));
+static void apply_delayed_renames 
+  PARAMS ((struct rename_context *));
  static int rename_insn_1 
    PARAMS ((rtx *ptr, void *data));
  static void rename_block 
@@ -115,25 +137,40 @@ static void ephi_create
    PARAMS ((int t, sbitmap visited, sbitmap *pred, sbitmap *succ, rtx *nodes));
  static void eliminate_phi
    PARAMS ((edge e, partition reg_partition));
-
  static int make_regs_equivalent_over_bad_edges 
    PARAMS ((int bb, partition reg_partition));
+
+/* These are used only in the conservative register partitioning
+   algorithms.  */
  static int make_equivalent_phi_alternatives_equivalent 
    PARAMS ((int bb, partition reg_partition));
  static partition compute_conservative_reg_partition 
-  PARAMS (());
+  PARAMS ((void));
+static int rename_equivalent_regs_in_insn 
+  PARAMS ((rtx *ptr, void *data));
+
+/* These are used in the register coalescing algorithm.  */
+static int coalesce_if_unconflicting
+  PARAMS ((partition p, conflict_graph conflicts, int reg1, int reg2));
+static int coalesce_regs_in_copies
+  PARAMS ((basic_block bb, partition p, conflict_graph conflicts));
+static int coalesce_reg_in_phi
+  PARAMS ((rtx, int dest_regno, int src_regno, void *data));
+static int coalesce_regs_in_successor_phi_nodes
+  PARAMS ((basic_block bb, partition p, conflict_graph conflicts));
+static partition compute_coalesced_reg_partition
+  PARAMS ((void));
+static int mark_reg_in_phi 
+  PARAMS ((rtx *ptr, void *data));
+static void mark_phi_and_copy_regs
+  PARAMS ((regset phi_set));
+
  static int rename_equivalent_regs_in_insn 
    PARAMS ((rtx *ptr, void *data));
  static void rename_equivalent_regs 
    PARAMS ((partition reg_partition));
  
  
-/* Determine if the insn is a PHI node.  */
-#define PHI_NODE_P(X)                          \
-  (X && GET_CODE (X) == INSN                   \
-   && GET_CODE (PATTERN (X)) == SET            \
-   && GET_CODE (SET_SRC (PATTERN (X))) == PHI)
-
  /* Given the SET of a PHI node, return the address of the alternative
     for predecessor block C.  */
  
@@ -490,15 +527,49 @@ insert_phi_nodes (idfs, evals, nregs)
  struct rename_set_data
  {
    struct rename_set_data *next;
+  /* This is the SET_DEST of the (first) SET that sets the REG.  */
    rtx *reg_loc;
-  rtx set_dest;
+  /* This is what used to be at *REG_LOC.  */
+  rtx old_reg;
+  /* This is the REG that will replace OLD_REG.  It's set only
+     when the rename data is moved onto the DONE_RENAMES queue.  */
    rtx new_reg;
+  /* This is what to restore ssa_rename_to[REGNO (old_reg)] to. 
+     It is usually the previous contents of ssa_rename_to[REGNO (old_reg)].  */
    rtx prev_reg;
+  /* This is the insn that contains all the SETs of the REG.  */
+  rtx set_insn;
  };
  
-static void new_registers_for_updates 
-  PARAMS ((struct rename_set_data *set_data,
-          struct rename_set_data *old_set_data, rtx insn));
+/* This struct is used to pass information to callback functions while
+   renaming registers.  */
+struct rename_context
+{
+  struct rename_set_data *new_renames;
+  struct rename_set_data *done_renames;
+  rtx current_insn;
+};
+
+/* Queue the rename of *REG_LOC.  */
+static void
+create_delayed_rename (c, reg_loc)
+     struct rename_context *c;
+     rtx *reg_loc;
+{
+  struct rename_set_data *r;
+  r = (struct rename_set_data *) xmalloc (sizeof(*r));
+  
+  if (GET_CODE (*reg_loc) != REG
+      || REGNO (*reg_loc) < FIRST_PSEUDO_REGISTER)
+    abort();
+
+  r->reg_loc = reg_loc;
+  r->old_reg = *reg_loc;
+  r->prev_reg = ssa_rename_to [REGNO (r->old_reg) - FIRST_PSEUDO_REGISTER];
+  r->set_insn = c->current_insn;
+  r->next = c->new_renames;
+  c->new_renames = r;
+}
  
  /* This is part of a rather ugly hack to allow the pre-ssa regno to be
     reused.  If, during processing, a register has not yet been touched,
@@ -509,6 +580,60 @@ static void new_registers_for_updates
     already been reused.  */
  #define RENAME_NO_RTX  pc_rtx
  
+/* Move all the entries from NEW_RENAMES onto DONE_RENAMES by
+   applying all the renames on NEW_RENAMES.  */
+
+static void
+apply_delayed_renames (c)
+       struct rename_context *c;
+{
+  struct rename_set_data *r;
+  struct rename_set_data *last_r = NULL;
+  
+  for (r = c->new_renames; r != NULL; r = r->next)
+    {
+      int regno = REGNO (r->old_reg);
+      int new_regno;
+      
+      /* Failure here means that someone has a PARALLEL that sets
+        a register twice (bad!).  */
+      if (ssa_rename_to [regno - FIRST_PSEUDO_REGISTER] != r->prev_reg)
+       abort();
+      /* Failure here means we have changed REG_LOC before applying
+        the rename.  */
+      /* For the first set we come across, reuse the original regno.  */
+      if (r->prev_reg == NULL_RTX)
+       {
+         r->new_reg = r->old_reg;
+         /* We want to restore RENAME_NO_RTX rather than NULL_RTX. */
+         r->prev_reg = RENAME_NO_RTX;
+       }
+      else
+       r->new_reg = gen_reg_rtx (GET_MODE (r->old_reg));
+      new_regno = REGNO (r->new_reg);
+      ssa_rename_to[regno - FIRST_PSEUDO_REGISTER] = r->new_reg;
+
+      if (new_regno >= (int) ssa_definition->num_elements)
+       {
+         int new_limit = new_regno * 5 / 4;
+         ssa_definition = VARRAY_GROW (ssa_definition, new_limit);
+         ssa_uses = VARRAY_GROW (ssa_uses, new_limit);
+         ssa_rename_from = VARRAY_GROW (ssa_rename_from, new_limit);
+       }
+
+      VARRAY_RTX (ssa_definition, new_regno) = r->set_insn;
+      VARRAY_RTX (ssa_rename_from, new_regno) = r->old_reg;
+
+      last_r = r;
+    }
+  if (last_r != NULL)
+    {
+      last_r->next = c->done_renames;
+      c->done_renames = c->new_renames;
+      c->new_renames = NULL;
+    }
+}
+
  /* Part one of the first step of rename_block, called through for_each_rtx. 
     Mark pseudos that are set for later update.  Transform uses of pseudos.  */
  
@@ -518,46 +643,77 @@ rename_insn_1 (ptr, data)
       void *data;
  {
    rtx x = *ptr;
-  struct rename_set_data **set_datap = data;
+  struct rename_context *context = data;
  
    if (x == NULL_RTX)
      return 0;
  
    switch (GET_CODE (x))
      {
+    case CLOBBER:
      case SET:
        {
         rtx *destp = &SET_DEST (x);
         rtx dest = SET_DEST (x);
  
-       /* Subregs at word 0 are interesting.  Subregs at word != 0 are
-          presumed to be part of a contiguous multi-word set sequence.  */
-       while (GET_CODE (dest) == SUBREG
-              && SUBREG_WORD (dest) == 0)
+       /* Some SETs also use the REG specified in their LHS.
+          These can be detected by the presence of
+          STRICT_LOW_PART, SUBREG, SIGN_EXTRACT, and ZERO_EXTRACT
+          in the LHS.  Handle these by changing
+          (set (subreg (reg foo)) ...)
+          into
+          (sequence [(set (reg foo_1) (reg foo))
+                     (set (subreg (reg foo_1)) ...)])  
+
+          FIXME: Much of the time this is too much.  For many libcalls,
+          paradoxical SUBREGs, etc., the input register is dead.  We should
+          recognise this in rename_block or here and not make a false
+          dependency.  */
+          
+       if (GET_CODE (dest) == STRICT_LOW_PART
+           || GET_CODE (dest) == SUBREG
+           || GET_CODE (dest) == SIGN_EXTRACT
+           || GET_CODE (dest) == ZERO_EXTRACT)
           {
-           destp = &SUBREG_REG (dest);
-           dest = SUBREG_REG (dest);
+           rtx i, reg;
+           reg = dest;
+           
+           while (GET_CODE (reg) == STRICT_LOW_PART
+                  || GET_CODE (reg) == SUBREG
+                  || GET_CODE (reg) == SIGN_EXTRACT
+                  || GET_CODE (reg) == ZERO_EXTRACT)
+               reg = XEXP (reg, 0);
+           
+           if (GET_CODE (reg) == REG
+               && REGNO (reg) >= FIRST_PSEUDO_REGISTER)
+             {
+               /* Generate (set reg reg), and do renaming on it so
+                  that it becomes (set reg_1 reg_0), and we will
+                  replace reg with reg_1 in the SUBREG.  */
+
+               struct rename_set_data *saved_new_renames;
+               saved_new_renames = context->new_renames;
+               context->new_renames = NULL;
+               i = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
+               for_each_rtx (&i, rename_insn_1, data);
+               apply_delayed_renames (context);
+               context->new_renames = saved_new_renames;
+             }
           }
-
-       if (GET_CODE (dest) == REG
-           && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
+       else if (GET_CODE (dest) == REG
+                && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
           {
             /* We found a genuine set of an interesting register.  Tag
                it so that we can create a new name for it after we finish
                processing this insn.  */
  
-           struct rename_set_data *r;
-           r = (struct rename_set_data *) xmalloc (sizeof(*r));
-
-           r->reg_loc = destp;
-           r->set_dest = SET_DEST (x);
-           r->next = *set_datap;
-           *set_datap = r;
+           create_delayed_rename (context, destp);
  
             /* Since we do not wish to (directly) traverse the
                SET_DEST, recurse through for_each_rtx for the SET_SRC
                and return.  */
-           for_each_rtx (&SET_SRC (x), rename_insn_1, data);
+           if (GET_CODE (x) == SET)
+             for_each_rtx (&SET_SRC (x), rename_insn_1, data);
             return -1;
           }
  
@@ -577,7 +733,6 @@ rename_insn_1 (ptr, data)
               if (GET_MODE (x) != GET_MODE (new_reg))
                 abort ();
               *ptr = new_reg;
-             /* ??? Mark for a new ssa_uses entry.  */
             }
           /* Else this is a use before a set.  Warn?  */
         }
@@ -593,55 +748,6 @@ rename_insn_1 (ptr, data)
      }
  }
  
-/* Second part of the first step of rename_block.  The portion of the list
-   beginning at SET_DATA through OLD_SET_DATA contain the sets present in
-   INSN.  Update data structures accordingly.  */
-
-static void
-new_registers_for_updates (set_data, old_set_data, insn)
-     struct rename_set_data *set_data, *old_set_data;
-     rtx insn;
-{
-  while (set_data != old_set_data)
-    {
-      int regno, new_regno;
-      rtx old_reg, new_reg, prev_reg;
-
-      old_reg = *set_data->reg_loc;
-      regno = REGNO (*set_data->reg_loc);
-
-      /* For the first set we come across, reuse the original regno.  */
-      if (ssa_rename_to[regno - FIRST_PSEUDO_REGISTER] == NULL_RTX)
-       {
-         new_reg = old_reg;
-         prev_reg = RENAME_NO_RTX;
-       }
-      else
-       {
-         prev_reg = ssa_rename_to[regno - FIRST_PSEUDO_REGISTER];
-         new_reg = gen_reg_rtx (GET_MODE (old_reg));
-       }
-
-      set_data->new_reg = new_reg;
-      set_data->prev_reg = prev_reg;
-      new_regno = REGNO (new_reg);
-      ssa_rename_to[regno - FIRST_PSEUDO_REGISTER] = new_reg;
-
-      if (new_regno >= (int) ssa_definition->num_elements)
-       {
-         int new_limit = new_regno * 5 / 4;
-         ssa_definition = VARRAY_GROW (ssa_definition, new_limit);
-         ssa_uses = VARRAY_GROW (ssa_uses, new_limit);
-         ssa_rename_from = VARRAY_GROW (ssa_rename_from, new_limit);
-       }
-
-      VARRAY_RTX (ssa_definition, new_regno) = insn;
-      VARRAY_RTX (ssa_rename_from, new_regno) = old_reg;
-
-      set_data = set_data->next;
-    }
-}
-
  static void
  rename_block (bb, idom)
       int bb;
@@ -663,12 +769,36 @@ rename_block (bb, idom)
        insn = next;
        if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
         {
-         struct rename_set_data *old_set_data = set_data;
-
-         for_each_rtx (&PATTERN (insn), rename_insn_1, &set_data);
-         for_each_rtx (&REG_NOTES (insn), rename_insn_1, &set_data);
+         struct rename_context context;
+         context.done_renames = set_data;
+         context.new_renames = NULL;
+         context.current_insn = insn;
+
+         start_sequence ();
+         for_each_rtx (&PATTERN (insn), rename_insn_1, &context);
+         for_each_rtx (&REG_NOTES (insn), rename_insn_1, &context);
+
+         /* Sometimes, we end up with a sequence of insns that
+            SSA needs to treat as a single insn.  Wrap these in a
+            SEQUENCE.  (Any notes now get attached to the SEQUENCE,
+            not to the old version inner insn.)  */
+         if (get_insns () != NULL_RTX)
+           {
+             rtx seq;
+             int i;
+             
+             emit (PATTERN (insn));
+             seq = gen_sequence ();
+             /* We really want a SEQUENCE of SETs, not a SEQUENCE
+                of INSNs.  */
+             for (i = 0; i < XVECLEN (seq, 0); i++)
+               XVECEXP (seq, 0, i) = PATTERN (XVECEXP (seq, 0, i));
+             PATTERN (insn) = seq;
+           }
+         end_sequence ();
           
-         new_registers_for_updates (set_data, old_set_data, insn);
+         apply_delayed_renames (&context);
+         set_data = context.done_renames;
         }
  
        next = NEXT_INSN (insn);
@@ -689,7 +819,7 @@ rename_block (bb, idom)
        while (PHI_NODE_P (insn))
         {
           rtx phi = PATTERN (insn);
-         int regno;
+         unsigned int regno;
           rtx reg;
  
           /* Find out which of our outgoing registers this node is
@@ -736,15 +866,18 @@ rename_block (bb, idom)
      if (idom[c] == bb)
        rename_block (c, idom);
  
-  /* Step Four: Update the sets to refer to their new register.  */
+  /* Step Four: Update the sets to refer to their new register,
+     and restore ssa_rename_to to its previous state.  */
  
    while (set_data)
      {
        struct rename_set_data *next;
-      rtx old_reg;
+      rtx old_reg = *set_data->reg_loc;
  
-      old_reg = *set_data->reg_loc;
+      if (*set_data->reg_loc != set_data->old_reg)
+       abort();
        *set_data->reg_loc = set_data->new_reg;
+
        ssa_rename_to[REGNO (old_reg)-FIRST_PSEUDO_REGISTER]
         = set_data->prev_reg;
  
@@ -793,11 +926,12 @@ convert_to_ssa()
  
    int nregs;
  
-  find_basic_blocks (get_insns (), max_reg_num(), NULL);
-  /* The dominator algorithms assume all blocks are reachable, clean
-     up first.  */
-  cleanup_cfg (get_insns ());
-  life_analysis (get_insns (), max_reg_num (), NULL, 1);
+  /* Don't do it twice.  */
+  if (in_ssa_form)
+    abort ();
+
+  /* Need global_live_at_{start,end} up to date.  */
+  life_analysis (get_insns (), NULL, PROP_KILL_DEAD_CODE | PROP_SCAN_DEAD_CODE);
  
    /* Compute dominators.  */
    dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
@@ -862,38 +996,11 @@ convert_to_ssa()
    sbitmap_vector_free (dfs);
    sbitmap_vector_free (evals);
    sbitmap_vector_free (idfs);
-}
-
-
-/* This is intended to be the FIND of a UNION/FIND algorithm managing
-   the partitioning of the pseudos.  Glancing through the rest of the
-   global optimizations, it seems things will work out best if the
-   partition is set up just before convert_from_ssa is called.  See
-   section 11.4 of Morgan.
-
-   ??? Morgan's algorithm, perhaps with some care, may allow copy
-   propagation to happen concurrently with the conversion from SSA.
-
-   However, it presents potential problems with critical edges -- to
-   split or not to split.  He mentions beginning the partitioning by
-   unioning registers associated by a PHI across abnormal critical
-   edges.  This is the approache taken here.  It is unclear to me how
-   we are able to do that arbitrarily, though.
-
-   Alternately, Briggs presents an algorithm in which critical edges
-   need not be split, at the expense of the creation of new pseudos,
-   and the need for some concurrent register renaming.  Moreover, it
-   is ameanable for modification such that the instructions can be
-   placed anywhere in the target block, which solves the before-call
-   placement problem.  However, I don't immediately see how we could
-   do that concurrently with copy propoagation.
-
-   More study is required.  */
+  in_ssa_form = 1;
  
+  reg_scan (get_insns (), max_reg_num (), 1);
+}
  
-/*
- * Eliminate the PHI across the edge from C to B.
- */
  
  /* REG is the representative temporary of its partition.  Add it to the
     set of nodes to be processed, if it hasn't been already.  Return the
@@ -1086,10 +1193,11 @@ eliminate_phi (e, reg_partition)
        if (GET_CODE (reg) != REG || GET_CODE (tgt) != REG)
         abort();
  
+      reg = regno_reg_rtx[partition_find (reg_partition, REGNO (reg))];
+      tgt = regno_reg_rtx[partition_find (reg_partition, REGNO (tgt))];
        /* If the two registers are already in the same partition, 
          nothing will need to be done.  */
-      if (partition_find (reg_partition, REGNO (reg)) 
-         != partition_find (reg_partition, REGNO (tgt)))
+      if (reg != tgt)
         {
           int ireg, itgt;
  
@@ -1188,7 +1296,8 @@ make_regs_equivalent_over_bad_edges (bb, reg_partition)
  
        /* Scan incoming abnormal critical edges.  */
        for (e = b->pred; e; e = e->pred_next)
-       if (e->flags & (EDGE_ABNORMAL | EDGE_CRITICAL))
+       if ((e->flags & (EDGE_ABNORMAL | EDGE_CRITICAL)) 
+               == (EDGE_ABNORMAL | EDGE_CRITICAL))
           {
             rtx *alt = phi_alternative (set, e->src->index);
             int alt_regno;
@@ -1305,7 +1414,6 @@ make_equivalent_phi_alternatives_equivalent (bb, reg_partition)
    return changed;
  }
  
-
  /* Compute a conservative partition of outstanding pseudo registers.
     See Morgan 7.3.1.  */
  
@@ -1341,6 +1449,316 @@ compute_conservative_reg_partition ()
    return p;
  }
  
+/* The following functions compute a register partition that attempts
+   to eliminate as many reg copies and phi node copies as possible by
+   coalescing registers.   This is the strategy:
+
+    1. As in the conservative case, the top priority is to coalesce
+       registers that otherwise would cause copies to be placed on
+       abnormal critical edges (which isn't possible).
+
+    2. Figure out which regs are involved (in the LHS or RHS) of
+       copies and phi nodes.  Compute conflicts among these regs.  
+
+    3. Walk around the instruction stream, placing two regs in the
+       same class of the partition if one appears on the LHS and the
+       other on the RHS of a copy or phi node and the two regs don't
+       conflict.  The conflict information of course needs to be
+       updated.  
+
+    4. If anything has changed, there may be new opportunities to
+       coalesce regs, so go back to 2.
+*/
+
+/* If REG1 and REG2 don't conflict in CONFLICTS, place them in the
+   same class of partition P, if they aren't already.  Update
+   CONFLICTS appropriately.  
+
+   Returns one if REG1 and REG2 were placed in the same class but were
+   not previously; zero otherwise.  
+
+   See Morgan figure 11.15.  */
+
+static int 
+coalesce_if_unconflicting (p, conflicts, reg1, reg2)
+     partition p;
+     conflict_graph conflicts;
+     int reg1;
+     int reg2;
+{
+  int reg;
+
+  /* Don't mess with hard regs.  */
+  if (reg1 < FIRST_PSEUDO_REGISTER || reg2 < FIRST_PSEUDO_REGISTER)
+    return 0;
+
+  /* Find the canonical regs for the classes containing REG1 and
+     REG2.  */
+  reg1 = partition_find (p, reg1);
+  reg2 = partition_find (p, reg2);
+  
+  /* If they're already in the same class, there's nothing to do.  */
+  if (reg1 == reg2)
+    return 0;
+
+  /* If the regs conflict, our hands are tied.  */
+  if (conflict_graph_conflict_p (conflicts, reg1, reg2))
+    return 0;
+
+  /* We're good to go.  Put the regs in the same partition.  */
+  partition_union (p, reg1, reg2);
+
+  /* Find the new canonical reg for the merged class.  */
+  reg = partition_find (p, reg1);
+  
+  /* Merge conflicts from the two previous classes.  */
+  conflict_graph_merge_regs (conflicts, reg, reg1);
+  conflict_graph_merge_regs (conflicts, reg, reg2);
+
+  return 1;
+}
+
+/* For each register copy insn in basic block BB, place the LHS and
+   RHS regs in the same class in partition P if they do not conflict
+   according to CONFLICTS.
+
+   Returns the number of changes that were made to P.
+
+   See Morgan figure 11.14.  */
+
+static int
+coalesce_regs_in_copies (bb, p, conflicts)
+     basic_block bb;
+     partition p;
+     conflict_graph conflicts;
+{
+  int changed = 0;
+  rtx insn;
+  rtx end = bb->end;
+
+  /* Scan the instruction stream of the block.  */
+  for (insn = bb->head; insn != end; insn = NEXT_INSN (insn))
+    {
+      rtx pattern;
+      rtx src;
+      rtx dest;
+
+      /* If this isn't a set insn, go to the next insn.  */
+      if (GET_CODE (insn) != INSN)
+       continue;
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) != SET)
+       continue;
+
+      src = SET_SRC (pattern);
+      dest = SET_DEST (pattern);
+
+      /* We're only looking for copies.  */
+      if (GET_CODE (src) != REG || GET_CODE (dest) != REG)
+       continue;
+
+      /* Coalesce only if the reg modes are the same.  As long as
+        each reg's rtx is unique, it can have only one mode, so two
+        pseudos of different modes can't be coalesced into one.  
+
+         FIXME: We can probably get around this by inserting SUBREGs
+         where appropriate, but for now we don't bother.  */
+      if (GET_MODE (src) != GET_MODE (dest))
+       continue;
+
+      /* Found a copy; see if we can use the same reg for both the
+        source and destination (and thus eliminate the copy,
+        ultimately).  */
+      changed += coalesce_if_unconflicting (p, conflicts, 
+                                           REGNO (src), REGNO (dest));
+    }
+
+  return changed;
+}
+
+
+struct phi_coalesce_context
+{
+  partition p;
+  conflict_graph conflicts;
+  int changed;
+};
+
+/* Callback function for for_each_successor_phi.  If the set
+   destination and the phi alternative regs do not conflict, place
+   them in the same paritition class.  DATA is a pointer to a
+   phi_coalesce_context struct.  */
+
+static int
+coalesce_reg_in_phi (insn, dest_regno, src_regno, data)
+     rtx insn ATTRIBUTE_UNUSED;
+     int dest_regno;
+     int src_regno;
+     void *data;
+{
+  struct phi_coalesce_context *context = 
+    (struct phi_coalesce_context *) data;
+  
+  /* Attempt to use the same reg, if they don't conflict.  */
+  context->changed 
+    += coalesce_if_unconflicting (context->p, context->conflicts, 
+                                 dest_regno, src_regno);
+  return 0;
+}
+
+/* For each alternative in a phi function corresponding to basic block
+   BB (in phi nodes in successor block to BB), place the reg in the
+   phi alternative and the reg to which the phi value is set into the
+   same class in partition P, if allowed by CONFLICTS.  
+
+   Return the number of changes that were made to P.
+   
+   See Morgan figure 11.14.  */
+
+static int
+coalesce_regs_in_successor_phi_nodes (bb, p, conflicts)
+     basic_block bb;
+     partition p;
+     conflict_graph conflicts;
+{
+  struct phi_coalesce_context context;
+  context.p = p;
+  context.conflicts = conflicts;
+  context.changed = 0;
+
+  for_each_successor_phi (bb, &coalesce_reg_in_phi, &context);
+
+  return context.changed;
+}
+
+/* Compute and return a partition of pseudos.  Where possible,
+   non-conflicting pseudos are placed in the same class.  
+
+   The caller is responsible for deallocating the returned partition.  */
+
+static partition
+compute_coalesced_reg_partition ()
+{
+  int bb;
+  int changed = 0;
+
+  /* We don't actually work with hard registers, but it's easier to
+     carry them around anyway rather than constantly doing register
+     number arithmetic.  */
+  partition p = 
+    partition_new (ssa_definition->num_elements + FIRST_PSEUDO_REGISTER);
+
+  /* The first priority is to make sure registers that might have to
+     be copied on abnormal critical edges are placed in the same
+     partition.  This saves us from having to split abnormal critical
+     edges (which can't be done).  */
+  for (bb = n_basic_blocks; --bb >= 0; )
+    make_regs_equivalent_over_bad_edges (bb, p);
+
+  do
+    {
+      regset_head phi_set;
+      conflict_graph conflicts;
+
+      changed = 0;
+
+      /* Build the set of registers involved in phi nodes, either as
+        arguments to the phi function or as the target of a set.  */
+      INITIALIZE_REG_SET (phi_set);
+      mark_phi_and_copy_regs (&phi_set);
+
+      /* Compute conflicts.  */
+      conflicts = conflict_graph_compute (&phi_set, p);
+
+      /* FIXME: Better would be to process most frequently executed
+        blocks first, so that most frequently executed copies would
+        be more likely to be removed by register coalescing.  But any
+        order will generate correct, if non-optimal, results.  */
+      for (bb = n_basic_blocks; --bb >= 0; )
+       {
+         basic_block block = BASIC_BLOCK (bb);
+         changed += coalesce_regs_in_copies (block, p, conflicts);
+         changed += 
+           coalesce_regs_in_successor_phi_nodes (block, p, conflicts);
+       }
+
+      conflict_graph_delete (conflicts);
+    }
+  while (changed > 0);
+
+  return p;
+}
+
+/* Mark the regs in a phi node.  PTR is a phi expression or one of its
+   components (a REG or a CONST_INT).  DATA is a reg set in which to
+   set all regs.  Called from for_each_rtx.  */
+
+static int
+mark_reg_in_phi (ptr, data)
+     rtx *ptr;
+     void *data;
+{
+  rtx expr = *ptr;
+  regset set = (regset) data;
+
+  switch (GET_CODE (expr))
+    {
+    case REG:
+      SET_REGNO_REG_SET (set, REGNO (expr));
+      /* Fall through.  */
+    case CONST_INT:
+    case PHI:
+      return 0;
+    default:
+      abort ();
+    }
+}
+
+/* Mark in PHI_SET all pseudos that are used in a phi node -- either
+   set from a phi expression, or used as an argument in one.  Also
+   mark regs that are the source or target of a reg copy.  Uses
+   ssa_definition.  */
+
+static void
+mark_phi_and_copy_regs (phi_set)
+     regset phi_set;
+{
+  int reg;
+
+  /* Scan the definitions of all regs.  */
+  for (reg = VARRAY_SIZE (ssa_definition); 
+       --reg >= FIRST_PSEUDO_REGISTER; 
+       ) 
+    {
+      rtx insn = VARRAY_RTX (ssa_definition, reg);
+      rtx pattern;
+      rtx src;
+
+      if (insn == NULL)
+       continue;
+      pattern = PATTERN (insn);
+      /* Sometimes we get PARALLEL insns.  These aren't phi nodes or
+        copies.  */
+      if (GET_CODE (pattern) != SET)
+       continue;
+      src = SET_SRC (pattern);
+
+      if (GET_CODE (src) == REG)
+       {
+         /* It's a reg copy.  */
+         SET_REGNO_REG_SET (phi_set, reg);
+         SET_REGNO_REG_SET (phi_set, REGNO (src));
+       }
+      else if (GET_CODE (src) == PHI)
+       {
+         /* It's a phi node.  Mark the reg being set.  */
+         SET_REGNO_REG_SET (phi_set, reg);
+         /* Mark the regs used in the phi function.  */
+         for_each_rtx (&src, mark_reg_in_phi, phi_set);
+       }
+      /* ... else nothing to do.  */
+    }
+}
  
  /* Rename regs in insn PTR that are equivalent.  DATA is the register
     partition which specifies equivalences.  */
@@ -1358,40 +1776,6 @@ rename_equivalent_regs_in_insn (ptr, data)
  
    switch (GET_CODE (x))
      {
-    case SET:
-      {
-       rtx *destp = &SET_DEST (x);
-       rtx dest = SET_DEST (x);
-
-       /* Subregs at word 0 are interesting.  Subregs at word != 0 are
-          presumed to be part of a contiguous multi-word set sequence.  */
-       while (GET_CODE (dest) == SUBREG
-              && SUBREG_WORD (dest) == 0)
-         {
-           destp = &SUBREG_REG (dest);
-           dest = SUBREG_REG (dest);
-         }
-
-       if (GET_CODE (dest) == REG
-           && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
-         {
-           /* Got a pseudo; replace it.  */
-           int regno = REGNO (dest);
-           int new_regno = partition_find (reg_partition, regno);
-           if (regno != new_regno)
-             *destp = regno_reg_rtx [new_regno];
-
-           for_each_rtx (&SET_SRC (x), 
-                         rename_equivalent_regs_in_insn, 
-                         data);
-           return -1;
-         }
-
-       /* Otherwise, this was not an interesting destination.  Continue
-          on, marking uses as normal.  */
-       return 0;
-      }
-
      case REG:
        if (REGNO (x) >= FIRST_PSEUDO_REGISTER)
         {
@@ -1418,8 +1802,8 @@ rename_equivalent_regs_in_insn (ptr, data)
      }
  }
  
-
-/* Rename regs that are equivalent in REG_PARTITION.  */
+/* Rename regs that are equivalent in REG_PARTITION.  
+   Also collapse any SEQUENCE insns.  */
  
  static void
  rename_equivalent_regs (reg_partition)
@@ -1445,6 +1829,20 @@ rename_equivalent_regs (reg_partition)
               for_each_rtx (&REG_NOTES (insn), 
                             rename_equivalent_regs_in_insn, 
                             reg_partition);
+
+             if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+               {
+                 rtx s = PATTERN (insn);
+                 int slen = XVECLEN (s, 0);
+                 int i;
+
+                 if (slen <= 1)
+                   abort();
+
+                 PATTERN (insn) = XVECEXP (s, 0, slen-1);
+                 for (i = 0; i < slen - 1; i++)
+                   emit_block_insn_before (XVECEXP (s, 0, i), insn, b);
+               }
             }
  
           next = NEXT_INSN (insn);
@@ -1453,7 +1851,6 @@ rename_equivalent_regs (reg_partition)
      }
  }
  
-
  /* The main entry point for moving from SSA.  */
  
  void
@@ -1461,8 +1858,19 @@ convert_from_ssa()
  {
    int bb;
    partition reg_partition;
-  
-  reg_partition = compute_conservative_reg_partition ();
+  rtx insns = get_insns ();
+    
+  /* Need global_live_at_{start,end} up to date.  */
+  life_analysis (insns, NULL, 
+         PROP_KILL_DEAD_CODE | PROP_SCAN_DEAD_CODE | PROP_DEATH_NOTES);
+
+  /* Figure out which regs in copies and phi nodes don't conflict and
+     therefore can be coalesced.  */
+  if (conservative_reg_partition)
+    reg_partition = compute_conservative_reg_partition ();
+  else
+    reg_partition = compute_coalesced_reg_partition ();
+
    rename_equivalent_regs (reg_partition);
  
    /* Eliminate the PHI nodes.  */
@@ -1488,6 +1896,11 @@ convert_from_ssa()
         insn = next_nonnote_insn (insn);
        while (PHI_NODE_P (insn))
         {
+         /* If a phi node is the last insn in the block, there must
+            have been nothing else.  Set the block end to the block
+            head.  */
+         if (insn == BLOCK_END (bb))
+           BLOCK_END (bb) = BLOCK_HEAD (bb);
           insn = delete_insn (insn);
           if (GET_CODE (insn) == NOTE)
             insn = next_nonnote_insn (insn);
@@ -1499,5 +1912,75 @@ convert_from_ssa()
    /* Commit all the copy nodes needed to convert out of SSA form.  */
    commit_edge_insertions ();
  
+  in_ssa_form = 0;
+
    count_or_remove_death_notes (NULL, 1);
  }
+
+/* Scan phi nodes in successors to BB.  For each such phi node that
+   has a phi alternative value corresponding to BB, invoke FN.  FN
+   is passed the entire phi node insn, the regno of the set
+   destination, the regno of the phi argument corresponding to BB,
+   and DATA.
+
+   If FN ever returns non-zero, stops immediately and returns this
+   value.  Otherwise, returns zero.  */
+
+int
+for_each_successor_phi (bb, fn, data)
+     basic_block bb;
+     successor_phi_fn fn;
+     void *data;
+{
+  edge e;
+  
+  if (bb == EXIT_BLOCK_PTR)
+    return 0;
+
+  /* Scan outgoing edges.  */
+  for (e = bb->succ; e != NULL; e = e->succ_next)
+    {
+      rtx insn;
+
+      basic_block successor = e->dest;
+      if (successor == ENTRY_BLOCK_PTR 
+         || successor == EXIT_BLOCK_PTR)
+       continue;
+
+      /* Advance to the first non-label insn of the successor block.  */
+      insn = successor->head;
+      while (insn != NULL 
+            && (GET_CODE (insn) == CODE_LABEL
+                || GET_CODE (insn) == NOTE))
+       insn = NEXT_INSN (insn);
+
+      if (insn == NULL)
+       continue;
+
+      /* Scan phi nodes in the successor.  */
+      for ( ; PHI_NODE_P (insn); insn = NEXT_INSN (insn))
+       {
+         int result;
+         rtx phi_set = PATTERN (insn);
+         rtx *alternative = phi_alternative (phi_set, bb->index);
+         rtx phi_src;
+         
+         /* This phi function may not have an alternative
+            corresponding to the incoming edge, indicating the
+            assigned variable is not defined along the edge.  */
+         if (alternative == NULL)
+           continue;
+         phi_src = *alternative;
+
+         /* Invoke the callback.  */
+         result = (*fn) (insn, REGNO (SET_DEST (phi_set)), 
+                         REGNO (phi_src), data);
+
+         /* Terminate if requested.  */
+         if (result != 0)
+           return result;
+       }
+    }
+
+  return 0;
+}