2006-02-15 Paolo Bonzini <bonzini@gnu.org>

[pf3gnuchains/gcc-fork.git] / gcc / cse.c
diff --git a/gcc/cse.c b/gcc/cse.c

index 1313399..7d6f46b 100644 (file)
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -1,6 +1,6 @@
  /* Common subexpression elimination for GNU compiler.
     Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998
-   1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -16,19 +16,18 @@ for more details.
  
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.  */
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
  
  #include "config.h"
  /* stdio.h must precede rtl.h for FFS.  */
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
-
  #include "rtl.h"
  #include "tm_p.h"
-#include "regs.h"
  #include "hard-reg-set.h"
+#include "regs.h"
  #include "basic-block.h"
  #include "flags.h"
  #include "real.h"
@@ -44,6 +43,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "target.h"
  #include "params.h"
  #include "rtlhooks-def.h"
+#include "tree-pass.h"
  
  /* The basic idea of common subexpression elimination is to go
     through the code, keeping a record of expressions that would
@@ -81,11 +81,11 @@ Registers and "quantity numbers":
     copies one register into another, we copy the quantity number.
     When a register is loaded in any other way, we allocate a new
     quantity number to describe the value generated by this operation.
-   `reg_qty' records what quantity a register is currently thought
+   `REG_QTY (N)' records what quantity register N is currently thought
     of as containing.
  
     All real quantity numbers are greater than or equal to zero.
-   If register N has not been assigned a quantity, reg_qty[N] will
+   If register N has not been assigned a quantity, `REG_QTY (N)' will
     equal -N - 1, which is always negative.
  
     Quantity numbers below zero do not exist and none of the `qty_table'
@@ -173,18 +173,20 @@ Other expressions:
     the register's new value.  This sequence of circumstances is rare
     within any one basic block.
  
-   The vectors `reg_tick' and `reg_in_table' are used to detect this case.
-   reg_tick[i] is incremented whenever a value is stored in register i.
-   reg_in_table[i] holds -1 if no references to register i have been
-   entered in the table; otherwise, it contains the value reg_tick[i] had
-   when the references were entered.  If we want to enter a reference
-   and reg_in_table[i] != reg_tick[i], we must scan and remove old references.
-   Until we want to enter a new entry, the mere fact that the two vectors
-   don't match makes the entries be ignored if anyone tries to match them.
+   `REG_TICK' and `REG_IN_TABLE', accessors for members of
+   cse_reg_info, are used to detect this case.  REG_TICK (i) is
+   incremented whenever a value is stored in register i.
+   REG_IN_TABLE (i) holds -1 if no references to register i have been
+   entered in the table; otherwise, it contains the value REG_TICK (i)
+   had when the references were entered.  If we want to enter a
+   reference and REG_IN_TABLE (i) != REG_TICK (i), we must scan and
+   remove old references.  Until we want to enter a new entry, the
+   mere fact that the two vectors don't match makes the entries be
+   ignored if anyone tries to match them.
  
     Registers themselves are entered in the hash table as well as in
-   the equivalent-register chains.  However, the vectors `reg_tick'
-   and `reg_in_table' do not apply to expressions which are simple
+   the equivalent-register chains.  However, `REG_TICK' and
+   `REG_IN_TABLE' do not apply to expressions which are simple
     register references.  These expressions are removed from the table
     immediately when they become invalid, and this can be done even if
     we do not immediately search for all the expressions that refer to
@@ -203,15 +205,6 @@ Related expressions:
     so that it is possible to find out if there exists any
     register equivalent to an expression related to a given expression.  */
  
-/* One plus largest register number used in this function.  */
-
-static int max_reg;
-
-/* One plus largest instruction UID used in this function at time of
-   cse_main call.  */
-
-static int max_insn_uid;
-
  /* Length of qty_table vector.  We know in advance we will not need
     a quantity number this big.  */
  
@@ -263,6 +256,14 @@ struct qty_table_elem
  /* The table of all qtys, indexed by qty number.  */
  static struct qty_table_elem *qty_table;
  
+/* Structure used to pass arguments via for_each_rtx to function
+   cse_change_cc_mode.  */
+struct change_cc_mode_args
+{
+  rtx insn;
+  rtx newreg;
+};
+
  #ifdef HAVE_cc0
  /* For machines that have a CC0, we do not record its value in the hash
     table since its use is guaranteed to be the insn immediately following
@@ -291,7 +292,7 @@ static rtx this_insn;
  
     Or -1 if this register is at the end of the chain.
  
-   If reg_qty[N] == N, reg_eqv_table[N].next is undefined.  */
+   If REG_QTY (N) == -N - 1, reg_eqv_table[N].next is undefined.  */
  
  /* Per-register equivalence chain.  */
  struct reg_eqv_elem
@@ -304,14 +305,8 @@ static struct reg_eqv_elem *reg_eqv_table;
  
  struct cse_reg_info
  {
-  /* Next in hash chain.  */
-  struct cse_reg_info *hash_next;
-
-  /* The next cse_reg_info structure in the free or used list.  */
-  struct cse_reg_info *next;
-
-  /* Search key */
-  unsigned int regno;
+  /* The timestamp at which this register is initialized.  */
+  unsigned int timestamp;
  
    /* The quantity number of the register's current contents.  */
    int reg_qty;
@@ -331,26 +326,22 @@ struct cse_reg_info
    unsigned int subreg_ticked;
  };
  
-/* A free list of cse_reg_info entries.  */
-static struct cse_reg_info *cse_reg_info_free_list;
+/* A table of cse_reg_info indexed by register numbers.  */
+static struct cse_reg_info *cse_reg_info_table;
  
-/* A used list of cse_reg_info entries.  */
-static struct cse_reg_info *cse_reg_info_used_list;
-static struct cse_reg_info *cse_reg_info_used_list_end;
+/* The size of the above table.  */
+static unsigned int cse_reg_info_table_size;
  
-/* A mapping from registers to cse_reg_info data structures.  */
-#define REGHASH_SHIFT  7
-#define REGHASH_SIZE   (1 << REGHASH_SHIFT)
-#define REGHASH_MASK   (REGHASH_SIZE - 1)
-static struct cse_reg_info *reg_hash[REGHASH_SIZE];
+/* The index of the first entry that has not been initialized.  */
+static unsigned int cse_reg_info_table_first_uninitialized;
  
-#define REGHASH_FN(REGNO)      \
-       (((REGNO) ^ ((REGNO) >> REGHASH_SHIFT)) & REGHASH_MASK)
-
-/* The last lookup we did into the cse_reg_info_tree.  This allows us
-   to cache repeated lookups.  */
-static unsigned int cached_regno;
-static struct cse_reg_info *cached_cse_reg_info;
+/* The timestamp at the beginning of the current run of
+   cse_basic_block.  We increment this variable at the beginning of
+   the current run of cse_basic_block.  The timestamp field of a
+   cse_reg_info entry matches the value of this variable if and only
+   if the entry has been initialized during the current run of
+   cse_basic_block.  */
+static unsigned int cse_reg_info_timestamp;
  
  /* A HARD_REG_SET containing all the hard registers for which there is
     currently a REG expression in the hash table.  Note the difference
@@ -400,12 +391,6 @@ static int recorded_label_ref;
  
  static int do_not_record;
  
-#ifdef LOAD_EXTEND_OP
-
-/* Scratch rtl used when looking for load-extended copy of a MEM.  */
-static rtx memory_extend_rtx;
-#endif
-
  /* canon_hash stores 1 in hash_arg_in_memory
     if it notices a reference to memory within the expression being hashed.  */
  
@@ -510,39 +495,31 @@ struct table_elt
     of 0.  Next come pseudos with a cost of one and other hard registers with
     a cost of 2.  Aside from these special cases, call `rtx_cost'.  */
  
-#define CHEAP_REGNO(N) \
-  ((N) == FRAME_POINTER_REGNUM || (N) == HARD_FRAME_POINTER_REGNUM     \
-   || (N) == STACK_POINTER_REGNUM || (N) == ARG_POINTER_REGNUM         \
-   || ((N) >= FIRST_VIRTUAL_REGISTER && (N) <= LAST_VIRTUAL_REGISTER)  \
-   || ((N) < FIRST_PSEUDO_REGISTER                                     \
+#define CHEAP_REGNO(N)                                                 \
+  (REGNO_PTR_FRAME_P(N)                                                        \
+   || (HARD_REGISTER_NUM_P (N)                                         \
         && FIXED_REGNO_P (N) && REGNO_REG_CLASS (N) != NO_REGS))
  
  #define COST(X) (REG_P (X) ? 0 : notreg_cost (X, SET))
  #define COST_IN(X,OUTER) (REG_P (X) ? 0 : notreg_cost (X, OUTER))
  
-/* Get the info associated with register N.  */
-
-#define GET_CSE_REG_INFO(N)                    \
-  (((N) == cached_regno && cached_cse_reg_info)        \
-   ? cached_cse_reg_info : get_cse_reg_info ((N)))
-
  /* Get the number of times this register has been updated in this
     basic block.  */
  
-#define REG_TICK(N) ((GET_CSE_REG_INFO (N))->reg_tick)
+#define REG_TICK(N) (get_cse_reg_info (N)->reg_tick)
  
  /* Get the point at which REG was recorded in the table.  */
  
-#define REG_IN_TABLE(N) ((GET_CSE_REG_INFO (N))->reg_in_table)
+#define REG_IN_TABLE(N) (get_cse_reg_info (N)->reg_in_table)
  
  /* Get the SUBREG set at the last increment to REG_TICK (-1 if not a
     SUBREG).  */
  
-#define SUBREG_TICKED(N) ((GET_CSE_REG_INFO (N))->subreg_ticked)
+#define SUBREG_TICKED(N) (get_cse_reg_info (N)->subreg_ticked)
  
  /* Get the quantity number for REG.  */
  
-#define REG_QTY(N) ((GET_CSE_REG_INFO (N))->reg_qty)
+#define REG_QTY(N) (get_cse_reg_info (N)->reg_qty)
  
  /* Determine if the quantity number for register X represents a valid index
     into the qty_table.  */
@@ -556,15 +533,6 @@ static struct table_elt *table[HASH_SIZE];
  
  static struct table_elt *free_element_chain;
  
-/* Number of `struct table_elt' structures made so far for this function.  */
-
-static int n_elements_made;
-
-/* Maximum value `n_elements_made' has had so far in this compilation
-   for functions previously processed.  */
-
-static int max_elements_made;
-
  /* Set to the cost of a constant pool reference if one was found for a
     symbolic constant.  If this was found, it means we should try to
     convert constants into constant pool entries if they don't fit in
@@ -650,10 +618,11 @@ static rtx cse_process_notes (rtx, rtx);
  static void invalidate_skipped_set (rtx, rtx, void *);
  static void invalidate_skipped_block (rtx);
  static rtx cse_basic_block (rtx, rtx, struct branch_path *);
-static void count_reg_usage (rtx, int *, int);
+static void count_reg_usage (rtx, int *, rtx, int);
  static int check_for_label_ref (rtx *, void *);
  extern void dump_class (struct table_elt*);
-static struct cse_reg_info * get_cse_reg_info (unsigned int);
+static void get_cse_reg_info_1 (unsigned int regno);
+static struct cse_reg_info * get_cse_reg_info (unsigned int regno);
  static int check_dependence (rtx *, void *);
  
  static void flush_hash_table (void);
@@ -661,6 +630,7 @@ static bool insn_live_p (rtx, int *);
  static bool set_live_p (rtx, rtx, int *);
  static bool dead_libcall_p (rtx, int *);
  static int cse_change_cc_mode (rtx *, void *);
+static void cse_change_cc_mode_insn (rtx, rtx);
  static void cse_change_cc_mode_insns (rtx, rtx, rtx);
  static enum machine_mode cse_cc_succs (basic_block, rtx, rtx, bool);
  \f
@@ -761,6 +731,57 @@ approx_reg_cost (rtx x)
    return cost;
  }
  
+/* Returns a canonical version of X for the address, from the point of view,
+   that all multiplications are represented as MULT instead of the multiply
+   by a power of 2 being represented as ASHIFT.  */
+
+static rtx
+canon_for_address (rtx x)
+{
+  enum rtx_code code;
+  enum machine_mode mode;
+  rtx new = 0;
+  int i;
+  const char *fmt;
+  
+  if (!x)
+    return x;
+  
+  code = GET_CODE (x);
+  mode = GET_MODE (x);
+  
+  switch (code)
+    {
+    case ASHIFT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+         && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode)
+         && INTVAL (XEXP (x, 1)) >= 0)
+        {
+         new = canon_for_address (XEXP (x, 0));
+         new = gen_rtx_MULT (mode, new,
+                             gen_int_mode ((HOST_WIDE_INT) 1
+                                           << INTVAL (XEXP (x, 1)),
+                                           mode));
+       }
+      break;
+    default:
+      break;
+      
+    }
+  if (new)
+    return new;
+  
+  /* Now recursively process each operand of this operation.  */
+  fmt = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++)
+    if (fmt[i] == 'e')
+      {
+       new = canon_for_address (XEXP (x, i));
+       XEXP (x, i) = new;
+      }
+  return x;
+}
+
  /* Return a negative value if an rtx A, whose costs are given by COST_A
     and REGCOST_A, is more desirable than an rtx B.
     Return a positive value if A is less desirable, or 0 if the two are
@@ -816,47 +837,85 @@ notreg_cost (rtx x, enum rtx_code outer)
  }
  
  \f
-static struct cse_reg_info *
-get_cse_reg_info (unsigned int regno)
-{
-  struct cse_reg_info **hash_head = &reg_hash[REGHASH_FN (regno)];
-  struct cse_reg_info *p;
+/* Initialize CSE_REG_INFO_TABLE.  */
  
-  for (p = *hash_head; p != NULL; p = p->hash_next)
-    if (p->regno == regno)
-      break;
-
-  if (p == NULL)
+static void
+init_cse_reg_info (unsigned int nregs)
+{
+  /* Do we need to grow the table?  */
+  if (nregs > cse_reg_info_table_size)
      {
-      /* Get a new cse_reg_info structure.  */
-      if (cse_reg_info_free_list)
+      unsigned int new_size;
+
+      if (cse_reg_info_table_size < 2048)
         {
-         p = cse_reg_info_free_list;
-         cse_reg_info_free_list = p->next;
+         /* Compute a new size that is a power of 2 and no smaller
+            than the large of NREGS and 64.  */
+         new_size = (cse_reg_info_table_size
+                     ? cse_reg_info_table_size : 64);
+
+         while (new_size < nregs)
+           new_size *= 2;
         }
        else
-       p = xmalloc (sizeof (struct cse_reg_info));
-
-      /* Insert into hash table.  */
-      p->hash_next = *hash_head;
-      *hash_head = p;
-
-      /* Initialize it.  */
-      p->reg_tick = 1;
-      p->reg_in_table = -1;
-      p->subreg_ticked = -1;
-      p->reg_qty = -regno - 1;
-      p->regno = regno;
-      p->next = cse_reg_info_used_list;
-      cse_reg_info_used_list = p;
-      if (!cse_reg_info_used_list_end)
-       cse_reg_info_used_list_end = p;
+       {
+         /* If we need a big table, allocate just enough to hold
+            NREGS registers.  */
+         new_size = nregs;
+       }
+
+      /* Reallocate the table with NEW_SIZE entries.  */
+      if (cse_reg_info_table)
+       free (cse_reg_info_table);
+      cse_reg_info_table = XNEWVEC (struct cse_reg_info, new_size);
+      cse_reg_info_table_size = new_size;
+      cse_reg_info_table_first_uninitialized = 0;
+    }
+
+  /* Do we have all of the first NREGS entries initialized?  */
+  if (cse_reg_info_table_first_uninitialized < nregs)
+    {
+      unsigned int old_timestamp = cse_reg_info_timestamp - 1;
+      unsigned int i;
+
+      /* Put the old timestamp on newly allocated entries so that they
+        will all be considered out of date.  We do not touch those
+        entries beyond the first NREGS entries to be nice to the
+        virtual memory.  */
+      for (i = cse_reg_info_table_first_uninitialized; i < nregs; i++)
+       cse_reg_info_table[i].timestamp = old_timestamp;
+
+      cse_reg_info_table_first_uninitialized = nregs;
      }
+}
+
+/* Given REGNO, initialize the cse_reg_info entry for REGNO.  */
  
-  /* Cache this lookup; we tend to be looking up information about the
-     same register several times in a row.  */
-  cached_regno = regno;
-  cached_cse_reg_info = p;
+static void
+get_cse_reg_info_1 (unsigned int regno)
+{
+  /* Set TIMESTAMP field to CSE_REG_INFO_TIMESTAMP so that this
+     entry will be considered to have been initialized.  */
+  cse_reg_info_table[regno].timestamp = cse_reg_info_timestamp;
+
+  /* Initialize the rest of the entry.  */
+  cse_reg_info_table[regno].reg_tick = 1;
+  cse_reg_info_table[regno].reg_in_table = -1;
+  cse_reg_info_table[regno].subreg_ticked = -1;
+  cse_reg_info_table[regno].reg_qty = -regno - 1;
+}
+
+/* Find a cse_reg_info entry for REGNO.  */
+
+static inline struct cse_reg_info *
+get_cse_reg_info (unsigned int regno)
+{
+  struct cse_reg_info *p = &cse_reg_info_table[regno];
+
+  /* If this entry has not been initialized, go ahead and initialize
+     it.  */
+  if (p->timestamp != cse_reg_info_timestamp)
+    get_cse_reg_info_1 (regno);
  
    return p;
  }
@@ -871,18 +930,10 @@ new_basic_block (void)
  
    next_qty = 0;
  
-  /* Clear out hash table state for this pass.  */
-
-  memset (reg_hash, 0, sizeof reg_hash);
-
-  if (cse_reg_info_used_list)
-    {
-      cse_reg_info_used_list_end->next = cse_reg_info_free_list;
-      cse_reg_info_free_list = cse_reg_info_used_list;
-      cse_reg_info_used_list = cse_reg_info_used_list_end = 0;
-    }
-  cached_cse_reg_info = 0;
+  /* Invalidate cse_reg_info_table.  */
+  cse_reg_info_timestamp++;
  
+  /* Clear out hash table state for this pass.  */
    CLEAR_HARD_REG_SET (hard_regs_in_table);
  
    /* The per-quantity values used to be initialized here, but it is
@@ -1183,7 +1234,24 @@ insert_regs (rtx x, struct table_elt *classp, int modified)
               if (REG_P (classp->exp)
                   && GET_MODE (classp->exp) == GET_MODE (x))
                 {
-                 make_regs_eqv (regno, REGNO (classp->exp));
+                 unsigned c_regno = REGNO (classp->exp);
+
+                 gcc_assert (REGNO_QTY_VALID_P (c_regno));
+
+                 /* Suppose that 5 is hard reg and 100 and 101 are
+                    pseudos.  Consider
+
+                    (set (reg:si 100) (reg:si 5))
+                    (set (reg:si 5) (reg:si 100))
+                    (set (reg:di 101) (reg:di 5))
+
+                    We would now set REG_QTY (101) = REG_QTY (5), but the
+                    entry for 5 is in SImode.  When we use this later in
+                    copy propagation, we get the register in wrong mode.  */
+                 if (qty_table[REG_QTY (c_regno)].mode != GET_MODE (x))
+                   continue;
+
+                 make_regs_eqv (regno, c_regno);
                   return 1;
                 }
  
@@ -1442,10 +1510,7 @@ insert (rtx x, struct table_elt *classp, unsigned int hash, enum machine_mode mo
    if (elt)
      free_element_chain = elt->next_same_hash;
    else
-    {
-      n_elements_made++;
-      elt = xmalloc (sizeof (struct table_elt));
-    }
+    elt = XNEW (struct table_elt);
  
    elt->exp = x;
    elt->canon_exp = NULL_RTX;
@@ -2432,6 +2497,7 @@ exp_equiv_p (rtx x, rtx y, int validate, bool for_gcse)
      case PC:
      case CC0:
      case CONST_INT:
+    case CONST_DOUBLE:
        return x == y;
  
      case LABEL_REF:
@@ -2471,16 +2537,26 @@ exp_equiv_p (rtx x, rtx y, int validate, bool for_gcse)
      case MEM:
        if (for_gcse)
         {
-         /* Can't merge two expressions in different alias sets, since we
-            can decide that the expression is transparent in a block when
-            it isn't, due to it being set with the different alias set.  */
-         if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
-           return 0;
-
           /* A volatile mem should not be considered equivalent to any
              other.  */
           if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
             return 0;
+
+         /* Can't merge two expressions in different alias sets, since we
+            can decide that the expression is transparent in a block when
+            it isn't, due to it being set with the different alias set.
+
+            Also, can't merge two expressions with different MEM_ATTRS.
+            They could e.g. be two different entities allocated into the
+            same space on the stack (see e.g. PR25130).  In that case, the
+            MEM addresses can be the same, even though the two MEMs are
+            absolutely not equivalent.  
+   
+            But because really all MEM attributes should be the same for
+            equivalent MEMs, we just use the invariant that MEMs that have
+            the same attributes share the same mem_attrs data structure.  */
+         if (MEM_ATTRS (x) != MEM_ATTRS (y))
+           return 0;
         }
        break;
  
@@ -2802,18 +2878,22 @@ find_best_addr (rtx insn, rtx *loc, enum machine_mode mode)
       be valid and produce better code.  */
    if (!REG_P (addr))
      {
-      rtx folded = fold_rtx (copy_rtx (addr), NULL_RTX);
-      int addr_folded_cost = address_cost (folded, mode);
-      int addr_cost = address_cost (addr, mode);
-
-      if ((addr_folded_cost < addr_cost
-          || (addr_folded_cost == addr_cost
-              /* ??? The rtx_cost comparison is left over from an older
-                 version of this code.  It is probably no longer helpful.  */
-              && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM)
-                  || approx_reg_cost (folded) < approx_reg_cost (addr))))
-         && validate_change (insn, loc, folded, 0))
-       addr = folded;
+      rtx folded = canon_for_address (fold_rtx (addr, NULL_RTX));
+
+      if (folded != addr)
+       {
+         int addr_folded_cost = address_cost (folded, mode);
+         int addr_cost = address_cost (addr, mode);
+
+         if ((addr_folded_cost < addr_cost
+              || (addr_folded_cost == addr_cost
+                  /* ??? The rtx_cost comparison is left over from an older
+                     version of this code.  It is probably no longer helpful.*/
+                  && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM)
+                      || approx_reg_cost (folded) < approx_reg_cost (addr))))
+             && validate_change (insn, loc, folded, 0))
+           addr = folded;
+       }
      }
  
    /* If this address is not in the hash table, we can't look for equivalences
@@ -2928,11 +3008,18 @@ find_best_addr (rtx insn, rtx *loc, enum machine_mode mode)
                p = p->next_same_value, count++)
             if (! p->flag
                 && (REG_P (p->exp)
-                   || exp_equiv_p (p->exp, p->exp, 1, false)))
+                   || (GET_CODE (p->exp) != EXPR_LIST
+                       && exp_equiv_p (p->exp, p->exp, 1, false))))
+
               {
                 rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode,
                                                p->exp, op1);
                 int new_cost;
+               
+               /* Get the canonical version of the address so we can accept
+                  more.  */
+               new = canon_for_address (new);
+               
                 new_cost = address_cost (new, mode);
  
                 if (new_cost < best_addr_cost
@@ -3010,7 +3097,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2,
               || (GET_MODE_CLASS (GET_MODE (arg1)) == MODE_INT
                   && code == LT && STORE_FLAG_VALUE == -1)
  #ifdef FLOAT_STORE_FLAG_VALUE
-             || (GET_MODE_CLASS (GET_MODE (arg1)) == MODE_FLOAT
+             || (SCALAR_FLOAT_MODE_P (GET_MODE (arg1))
                   && (fsfv = FLOAT_STORE_FLAG_VALUE (GET_MODE (arg1)),
                       REAL_VALUE_NEGATIVE (fsfv)))
  #endif
@@ -3020,7 +3107,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2,
                    || (GET_MODE_CLASS (GET_MODE (arg1)) == MODE_INT
                        && code == GE && STORE_FLAG_VALUE == -1)
  #ifdef FLOAT_STORE_FLAG_VALUE
-                  || (GET_MODE_CLASS (GET_MODE (arg1)) == MODE_FLOAT
+                  || (SCALAR_FLOAT_MODE_P (GET_MODE (arg1))
                        && (fsfv = FLOAT_STORE_FLAG_VALUE (GET_MODE (arg1)),
                            REAL_VALUE_NEGATIVE (fsfv)))
  #endif
@@ -3083,7 +3170,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2,
                               << (GET_MODE_BITSIZE (inner_mode) - 1))))
  #ifdef FLOAT_STORE_FLAG_VALUE
                    || (code == LT
-                      && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
+                      && SCALAR_FLOAT_MODE_P (inner_mode)
                        && (fsfv = FLOAT_STORE_FLAG_VALUE (GET_MODE (arg1)),
                            REAL_VALUE_NEGATIVE (fsfv)))
  #endif
@@ -3103,7 +3190,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2,
                                << (GET_MODE_BITSIZE (inner_mode) - 1))))
  #ifdef FLOAT_STORE_FLAG_VALUE
                     || (code == GE
-                       && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
+                       && SCALAR_FLOAT_MODE_P (inner_mode)
                         && (fsfv = FLOAT_STORE_FLAG_VALUE (GET_MODE (arg1)),
                             REAL_VALUE_NEGATIVE (fsfv)))
  #endif
@@ -3154,6 +3241,371 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2,
    return code;
  }
  \f
+/* Fold SUBREG.  */
+
+static rtx
+fold_rtx_subreg (rtx x, rtx insn)
+{
+  enum machine_mode mode = GET_MODE (x);
+  rtx folded_arg0;
+  rtx const_arg0;
+  rtx new;
+
+  /* See if we previously assigned a constant value to this SUBREG.  */
+  if ((new = lookup_as_function (x, CONST_INT)) != 0
+      || (new = lookup_as_function (x, CONST_DOUBLE)) != 0)
+    return new;
+
+  /* If this is a paradoxical SUBREG, we have no idea what value the
+     extra bits would have.  However, if the operand is equivalent to
+     a SUBREG whose operand is the same as our mode, and all the modes
+     are within a word, we can just use the inner operand because
+     these SUBREGs just say how to treat the register.
+
+     Similarly if we find an integer constant.  */
+
+  if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))
+    {
+      enum machine_mode imode = GET_MODE (SUBREG_REG (x));
+      struct table_elt *elt;
+
+      if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+         && GET_MODE_SIZE (imode) <= UNITS_PER_WORD
+         && (elt = lookup (SUBREG_REG (x), HASH (SUBREG_REG (x), imode),
+                           imode)) != 0)
+       for (elt = elt->first_same_value; elt; elt = elt->next_same_value)
+         {
+           if (CONSTANT_P (elt->exp)
+               && GET_MODE (elt->exp) == VOIDmode)
+             return elt->exp;
+
+           if (GET_CODE (elt->exp) == SUBREG
+               && GET_MODE (SUBREG_REG (elt->exp)) == mode
+               && exp_equiv_p (elt->exp, elt->exp, 1, false))
+             return copy_rtx (SUBREG_REG (elt->exp));
+         }
+
+      return x;
+    }
+
+  /* Fold SUBREG_REG.  If it changed, see if we can simplify the
+     SUBREG.  We might be able to if the SUBREG is extracting a single
+     word in an integral mode or extracting the low part.  */
+
+  folded_arg0 = fold_rtx (SUBREG_REG (x), insn);
+  const_arg0 = equiv_constant (folded_arg0);
+  if (const_arg0)
+    folded_arg0 = const_arg0;
+
+  if (folded_arg0 != SUBREG_REG (x))
+    {
+      new = simplify_subreg (mode, folded_arg0,
+                            GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
+      if (new)
+       return new;
+    }
+
+  if (REG_P (folded_arg0)
+      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (folded_arg0)))
+    {
+      struct table_elt *elt;
+
+      elt = lookup (folded_arg0,
+                   HASH (folded_arg0, GET_MODE (folded_arg0)),
+                   GET_MODE (folded_arg0));
+
+      if (elt)
+       elt = elt->first_same_value;
+
+      if (subreg_lowpart_p (x))
+       /* If this is a narrowing SUBREG and our operand is a REG, see
+          if we can find an equivalence for REG that is an arithmetic
+          operation in a wider mode where both operands are
+          paradoxical SUBREGs from objects of our result mode.  In
+          that case, we couldn-t report an equivalent value for that
+          operation, since we don't know what the extra bits will be.
+          But we can find an equivalence for this SUBREG by folding
+          that operation in the narrow mode.  This allows us to fold
+          arithmetic in narrow modes when the machine only supports
+          word-sized arithmetic.
+
+          Also look for a case where we have a SUBREG whose operand
+          is the same as our result.  If both modes are smaller than
+          a word, we are simply interpreting a register in different
+          modes and we can use the inner value.  */
+
+       for (; elt; elt = elt->next_same_value)
+         {
+           enum rtx_code eltcode = GET_CODE (elt->exp);
+
+           /* Just check for unary and binary operations.  */
+           if (UNARY_P (elt->exp)
+               && eltcode != SIGN_EXTEND
+               && eltcode != ZERO_EXTEND
+               && GET_CODE (XEXP (elt->exp, 0)) == SUBREG
+               && GET_MODE (SUBREG_REG (XEXP (elt->exp, 0))) == mode
+               && (GET_MODE_CLASS (mode)
+                   == GET_MODE_CLASS (GET_MODE (XEXP (elt->exp, 0)))))
+             {
+               rtx op0 = SUBREG_REG (XEXP (elt->exp, 0));
+
+               if (!REG_P (op0) && ! CONSTANT_P (op0))
+                 op0 = fold_rtx (op0, NULL_RTX);
+
+               op0 = equiv_constant (op0);
+               if (op0)
+                 new = simplify_unary_operation (GET_CODE (elt->exp), mode,
+                                                 op0, mode);
+             }
+           else if (ARITHMETIC_P (elt->exp)
+                    && eltcode != DIV && eltcode != MOD
+                    && eltcode != UDIV && eltcode != UMOD
+                    && eltcode != ASHIFTRT && eltcode != LSHIFTRT
+                    && eltcode != ROTATE && eltcode != ROTATERT
+                    && ((GET_CODE (XEXP (elt->exp, 0)) == SUBREG
+                         && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 0)))
+                             == mode))
+                        || CONSTANT_P (XEXP (elt->exp, 0)))
+                    && ((GET_CODE (XEXP (elt->exp, 1)) == SUBREG
+                         && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 1)))
+                             == mode))
+                        || CONSTANT_P (XEXP (elt->exp, 1))))
+             {
+               rtx op0 = gen_lowpart_common (mode, XEXP (elt->exp, 0));
+               rtx op1 = gen_lowpart_common (mode, XEXP (elt->exp, 1));
+
+               if (op0 && !REG_P (op0) && ! CONSTANT_P (op0))
+                 op0 = fold_rtx (op0, NULL_RTX);
+
+               if (op0)
+                 op0 = equiv_constant (op0);
+
+               if (op1 && !REG_P (op1) && ! CONSTANT_P (op1))
+                 op1 = fold_rtx (op1, NULL_RTX);
+
+               if (op1)
+                 op1 = equiv_constant (op1);
+
+               /* If we are looking for the low SImode part of
+                  (ashift:DI c (const_int 32)), it doesn't work to
+                  compute that in SImode, because a 32-bit shift in
+                  SImode is unpredictable.  We know the value is
+                  0.  */
+               if (op0 && op1
+                   && GET_CODE (elt->exp) == ASHIFT
+                   && GET_CODE (op1) == CONST_INT
+                   && INTVAL (op1) >= GET_MODE_BITSIZE (mode))
+                 {
+                   if (INTVAL (op1)
+                       < GET_MODE_BITSIZE (GET_MODE (elt->exp)))
+                     /* If the count fits in the inner mode's width,
+                        but exceeds the outer mode's width, the value
+                        will get truncated to 0 by the subreg.  */
+                     new = CONST0_RTX (mode);
+                   else
+                     /* If the count exceeds even the inner mode's width,
+                        don't fold this expression.  */
+                     new = 0;
+                 }
+               else if (op0 && op1)
+                 new = simplify_binary_operation (GET_CODE (elt->exp),
+                                                  mode, op0, op1);
+             }
+
+           else if (GET_CODE (elt->exp) == SUBREG
+                    && GET_MODE (SUBREG_REG (elt->exp)) == mode
+                    && (GET_MODE_SIZE (GET_MODE (folded_arg0))
+                        <= UNITS_PER_WORD)
+                    && exp_equiv_p (elt->exp, elt->exp, 1, false))
+             new = copy_rtx (SUBREG_REG (elt->exp));
+
+           if (new)
+             return new;
+         }
+      else
+       /* A SUBREG resulting from a zero extension may fold to zero
+          if it extracts higher bits than the ZERO_EXTEND's source
+          bits.  FIXME: if combine tried to, er, combine these
+          instructions, this transformation may be moved to
+          simplify_subreg.  */
+       for (; elt; elt = elt->next_same_value)
+         {
+           if (GET_CODE (elt->exp) == ZERO_EXTEND
+               && subreg_lsb (x)
+               >= GET_MODE_BITSIZE (GET_MODE (XEXP (elt->exp, 0))))
+             return CONST0_RTX (mode);
+         }
+    }
+
+  return x;
+}
+
+/* Fold MEM.  */
+
+static rtx
+fold_rtx_mem (rtx x, rtx insn)
+{
+  enum machine_mode mode = GET_MODE (x);
+  rtx new;
+
+  /* If we are not actually processing an insn, don't try to find the
+     best address.  Not only don't we care, but we could modify the
+     MEM in an invalid way since we have no insn to validate
+     against.  */
+  if (insn != 0)
+    find_best_addr (insn, &XEXP (x, 0), mode);
+
+  {
+    /* Even if we don't fold in the insn itself, we can safely do so
+       here, in hopes of getting a constant.  */
+    rtx addr = fold_rtx (XEXP (x, 0), NULL_RTX);
+    rtx base = 0;
+    HOST_WIDE_INT offset = 0;
+
+    if (REG_P (addr)
+       && REGNO_QTY_VALID_P (REGNO (addr)))
+      {
+       int addr_q = REG_QTY (REGNO (addr));
+       struct qty_table_elem *addr_ent = &qty_table[addr_q];
+
+       if (GET_MODE (addr) == addr_ent->mode
+           && addr_ent->const_rtx != NULL_RTX)
+         addr = addr_ent->const_rtx;
+      }
+
+    /* Call target hook to avoid the effects of -fpic etc....  */
+    addr = targetm.delegitimize_address (addr);
+
+    /* If address is constant, split it into a base and integer
+       offset.  */
+    if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+      base = addr;
+    else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+            && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+      {
+       base = XEXP (XEXP (addr, 0), 0);
+       offset = INTVAL (XEXP (XEXP (addr, 0), 1));
+      }
+    else if (GET_CODE (addr) == LO_SUM
+            && GET_CODE (XEXP (addr, 1)) == SYMBOL_REF)
+      base = XEXP (addr, 1);
+
+    /* If this is a constant pool reference, we can fold it into its
+       constant to allow better value tracking.  */
+    if (base && GET_CODE (base) == SYMBOL_REF
+       && CONSTANT_POOL_ADDRESS_P (base))
+      {
+       rtx constant = get_pool_constant (base);
+       enum machine_mode const_mode = get_pool_mode (base);
+       rtx new;
+
+       if (CONSTANT_P (constant) && GET_CODE (constant) != CONST_INT)
+         {
+           constant_pool_entries_cost = COST (constant);
+           constant_pool_entries_regcost = approx_reg_cost (constant);
+         }
+
+       /* If we are loading the full constant, we have an
+          equivalence.  */
+       if (offset == 0 && mode == const_mode)
+         return constant;
+
+       /* If this actually isn't a constant (weird!), we can't do
+          anything.  Otherwise, handle the two most common cases:
+          extracting a word from a multi-word constant, and
+          extracting the low-order bits.  Other cases don't seem
+          common enough to worry about.  */
+       if (! CONSTANT_P (constant))
+         return x;
+
+       if (GET_MODE_CLASS (mode) == MODE_INT
+           && GET_MODE_SIZE (mode) == UNITS_PER_WORD
+           && offset % UNITS_PER_WORD == 0
+           && (new = operand_subword (constant,
+                                      offset / UNITS_PER_WORD,
+                                      0, const_mode)) != 0)
+         return new;
+
+       if (((BYTES_BIG_ENDIAN
+             && offset == GET_MODE_SIZE (GET_MODE (constant)) - 1)
+            || (! BYTES_BIG_ENDIAN && offset == 0))
+           && (new = gen_lowpart (mode, constant)) != 0)
+         return new;
+      }
+
+    /* If this is a reference to a label at a known position in a jump
+       table, we also know its value.  */
+    if (base && GET_CODE (base) == LABEL_REF)
+      {
+       rtx label = XEXP (base, 0);
+       rtx table_insn = NEXT_INSN (label);
+
+       if (table_insn && JUMP_P (table_insn)
+           && GET_CODE (PATTERN (table_insn)) == ADDR_VEC)
+         {
+           rtx table = PATTERN (table_insn);
+
+           if (offset >= 0
+               && (offset / GET_MODE_SIZE (GET_MODE (table))
+                   < XVECLEN (table, 0)))
+             {
+               rtx label = XVECEXP
+                 (table, 0, offset / GET_MODE_SIZE (GET_MODE (table)));
+               rtx set;
+
+               /* If we have an insn that loads the label from the
+                  jumptable into a reg, we don't want to set the reg
+                  to the label, because this may cause a reference to
+                  the label to remain after the label is removed in
+                  some very obscure cases (PR middle-end/18628).  */
+               if (!insn)
+                 return label;
+
+               set = single_set (insn);
+
+               if (! set || SET_SRC (set) != x)
+                 return x;
+
+               /* If it's a jump, it's safe to reference the label.  */
+               if (SET_DEST (set) == pc_rtx)
+                 return label;
+
+               return x;
+             }
+         }
+       if (table_insn && JUMP_P (table_insn)
+           && GET_CODE (PATTERN (table_insn)) == ADDR_DIFF_VEC)
+         {
+           rtx table = PATTERN (table_insn);
+
+           if (offset >= 0
+               && (offset / GET_MODE_SIZE (GET_MODE (table))
+                   < XVECLEN (table, 1)))
+             {
+               offset /= GET_MODE_SIZE (GET_MODE (table));
+               new = gen_rtx_MINUS (Pmode, XVECEXP (table, 1, offset),
+                                    XEXP (table, 0));
+
+               if (GET_MODE (table) != Pmode)
+                 new = gen_rtx_TRUNCATE (GET_MODE (table), new);
+
+               /* Indicate this is a constant.  This isn't a valid
+                  form of CONST, but it will only be used to fold the
+                  next insns and then discarded, so it should be
+                  safe.
+
+                  Note this expression must be explicitly discarded,
+                  by cse_insn, else it may end up in a REG_EQUAL note
+                  and "escape" to cause problems elsewhere.  */
+               return gen_rtx_CONST (GET_MODE (new), new);
+             }
+         }
+      }
+
+    return x;
+  }
+}
+
  /* If X is a nontrivial arithmetic operation on an argument
     for which a constant value can be determined, return
     the result of operating on that value, as a constant.
@@ -3206,6 +3658,7 @@ fold_rtx (rtx x, rtx insn)
      case SYMBOL_REF:
      case LABEL_REF:
      case REG:
+    case PC:
        /* No use simplifying an EXPR_LIST
          since they are used only for lists of args
          in a function call's REG_EQUAL note.  */
@@ -3217,202 +3670,8 @@ fold_rtx (rtx x, rtx insn)
        return prev_insn_cc0;
  #endif
  
-    case PC:
-      /* If the next insn is a CODE_LABEL followed by a jump table,
-        PC's value is a LABEL_REF pointing to that label.  That
-        lets us fold switch statements on the VAX.  */
-      {
-       rtx next;
-       if (insn && tablejump_p (insn, &next, NULL))
-         return gen_rtx_LABEL_REF (Pmode, next);
-      }
-      break;
-
      case SUBREG:
-      /* See if we previously assigned a constant value to this SUBREG.  */
-      if ((new = lookup_as_function (x, CONST_INT)) != 0
-         || (new = lookup_as_function (x, CONST_DOUBLE)) != 0)
-       return new;
-
-      /* If this is a paradoxical SUBREG, we have no idea what value the
-        extra bits would have.  However, if the operand is equivalent
-        to a SUBREG whose operand is the same as our mode, and all the
-        modes are within a word, we can just use the inner operand
-        because these SUBREGs just say how to treat the register.
-
-        Similarly if we find an integer constant.  */
-
-      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))
-       {
-         enum machine_mode imode = GET_MODE (SUBREG_REG (x));
-         struct table_elt *elt;
-
-         if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
-             && GET_MODE_SIZE (imode) <= UNITS_PER_WORD
-             && (elt = lookup (SUBREG_REG (x), HASH (SUBREG_REG (x), imode),
-                               imode)) != 0)
-           for (elt = elt->first_same_value; elt; elt = elt->next_same_value)
-             {
-               if (CONSTANT_P (elt->exp)
-                   && GET_MODE (elt->exp) == VOIDmode)
-                 return elt->exp;
-
-               if (GET_CODE (elt->exp) == SUBREG
-                   && GET_MODE (SUBREG_REG (elt->exp)) == mode
-                   && exp_equiv_p (elt->exp, elt->exp, 1, false))
-                 return copy_rtx (SUBREG_REG (elt->exp));
-             }
-
-         return x;
-       }
-
-      /* Fold SUBREG_REG.  If it changed, see if we can simplify the SUBREG.
-        We might be able to if the SUBREG is extracting a single word in an
-        integral mode or extracting the low part.  */
-
-      folded_arg0 = fold_rtx (SUBREG_REG (x), insn);
-      const_arg0 = equiv_constant (folded_arg0);
-      if (const_arg0)
-       folded_arg0 = const_arg0;
-
-      if (folded_arg0 != SUBREG_REG (x))
-       {
-         new = simplify_subreg (mode, folded_arg0,
-                                GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
-         if (new)
-           return new;
-       }
-
-      if (REG_P (folded_arg0)
-         && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (folded_arg0)))
-       {
-         struct table_elt *elt;
-
-         elt = lookup (folded_arg0,
-                       HASH (folded_arg0, GET_MODE (folded_arg0)),
-                       GET_MODE (folded_arg0));
-
-         if (elt)
-           elt = elt->first_same_value;
-
-         if (subreg_lowpart_p (x))
-           /* If this is a narrowing SUBREG and our operand is a REG, see
-              if we can find an equivalence for REG that is an arithmetic
-              operation in a wider mode where both operands are paradoxical
-              SUBREGs from objects of our result mode.  In that case, we
-              couldn-t report an equivalent value for that operation, since we
-              don't know what the extra bits will be.  But we can find an
-              equivalence for this SUBREG by folding that operation in the
-              narrow mode.  This allows us to fold arithmetic in narrow modes
-              when the machine only supports word-sized arithmetic.
-
-              Also look for a case where we have a SUBREG whose operand
-              is the same as our result.  If both modes are smaller
-              than a word, we are simply interpreting a register in
-              different modes and we can use the inner value.  */
-
-           for (; elt; elt = elt->next_same_value)
-             {
-               enum rtx_code eltcode = GET_CODE (elt->exp);
-
-               /* Just check for unary and binary operations.  */
-               if (UNARY_P (elt->exp)
-                   && eltcode != SIGN_EXTEND
-                   && eltcode != ZERO_EXTEND
-                   && GET_CODE (XEXP (elt->exp, 0)) == SUBREG
-                   && GET_MODE (SUBREG_REG (XEXP (elt->exp, 0))) == mode
-                   && (GET_MODE_CLASS (mode)
-                       == GET_MODE_CLASS (GET_MODE (XEXP (elt->exp, 0)))))
-                 {
-                   rtx op0 = SUBREG_REG (XEXP (elt->exp, 0));
-
-                   if (!REG_P (op0) && ! CONSTANT_P (op0))
-                     op0 = fold_rtx (op0, NULL_RTX);
-
-                   op0 = equiv_constant (op0);
-                   if (op0)
-                     new = simplify_unary_operation (GET_CODE (elt->exp), mode,
-                                                     op0, mode);
-                 }
-               else if (ARITHMETIC_P (elt->exp)
-                        && eltcode != DIV && eltcode != MOD
-                        && eltcode != UDIV && eltcode != UMOD
-                        && eltcode != ASHIFTRT && eltcode != LSHIFTRT
-                        && eltcode != ROTATE && eltcode != ROTATERT
-                        && ((GET_CODE (XEXP (elt->exp, 0)) == SUBREG
-                             && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 0)))
-                                 == mode))
-                            || CONSTANT_P (XEXP (elt->exp, 0)))
-                        && ((GET_CODE (XEXP (elt->exp, 1)) == SUBREG
-                             && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 1)))
-                                 == mode))
-                            || CONSTANT_P (XEXP (elt->exp, 1))))
-                 {
-                   rtx op0 = gen_lowpart_common (mode, XEXP (elt->exp, 0));
-                   rtx op1 = gen_lowpart_common (mode, XEXP (elt->exp, 1));
-
-                   if (op0 && !REG_P (op0) && ! CONSTANT_P (op0))
-                     op0 = fold_rtx (op0, NULL_RTX);
-
-                   if (op0)
-                     op0 = equiv_constant (op0);
-
-                   if (op1 && !REG_P (op1) && ! CONSTANT_P (op1))
-                     op1 = fold_rtx (op1, NULL_RTX);
-
-                   if (op1)
-                     op1 = equiv_constant (op1);
-
-                   /* If we are looking for the low SImode part of
-                      (ashift:DI c (const_int 32)), it doesn't work
-                      to compute that in SImode, because a 32-bit shift
-                      in SImode is unpredictable.  We know the value is 0.  */
-                   if (op0 && op1
-                       && GET_CODE (elt->exp) == ASHIFT
-                       && GET_CODE (op1) == CONST_INT
-                       && INTVAL (op1) >= GET_MODE_BITSIZE (mode))
-                     {
-                       if (INTVAL (op1)
-                           < GET_MODE_BITSIZE (GET_MODE (elt->exp)))
-                         /* If the count fits in the inner mode's width,
-                            but exceeds the outer mode's width,
-                            the value will get truncated to 0
-                            by the subreg.  */
-                         new = CONST0_RTX (mode);
-                       else
-                         /* If the count exceeds even the inner mode's width,
-                          don't fold this expression.  */
-                         new = 0;
-                     }
-                   else if (op0 && op1)
-                     new = simplify_binary_operation (GET_CODE (elt->exp),                                                            mode, op0, op1);
-                 }
-
-               else if (GET_CODE (elt->exp) == SUBREG
-                        && GET_MODE (SUBREG_REG (elt->exp)) == mode
-                        && (GET_MODE_SIZE (GET_MODE (folded_arg0))
-                            <= UNITS_PER_WORD)
-                        && exp_equiv_p (elt->exp, elt->exp, 1, false))
-                 new = copy_rtx (SUBREG_REG (elt->exp));
-
-               if (new)
-                 return new;
-             }
-         else
-           /* A SUBREG resulting from a zero extension may fold to zero if
-              it extracts higher bits than the ZERO_EXTEND's source bits.
-              FIXME: if combine tried to, er, combine these instructions,
-              this transformation may be moved to simplify_subreg.  */
-           for (; elt; elt = elt->next_same_value)
-             {
-               if (GET_CODE (elt->exp) == ZERO_EXTEND
-                   && subreg_lsb (x)
-                      >= GET_MODE_BITSIZE (GET_MODE (XEXP (elt->exp, 0))))
-                 return CONST0_RTX (mode);
-             }
-       }
-
-      return x;
+      return fold_rtx_subreg (x, insn);
  
      case NOT:
      case NEG:
@@ -3424,134 +3683,7 @@ fold_rtx (rtx x, rtx insn)
        break;
  
      case MEM:
-      /* If we are not actually processing an insn, don't try to find the
-        best address.  Not only don't we care, but we could modify the
-        MEM in an invalid way since we have no insn to validate against.  */
-      if (insn != 0)
-       find_best_addr (insn, &XEXP (x, 0), GET_MODE (x));
-
-      {
-       /* Even if we don't fold in the insn itself,
-          we can safely do so here, in hopes of getting a constant.  */
-       rtx addr = fold_rtx (XEXP (x, 0), NULL_RTX);
-       rtx base = 0;
-       HOST_WIDE_INT offset = 0;
-
-       if (REG_P (addr)
-           && REGNO_QTY_VALID_P (REGNO (addr)))
-         {
-           int addr_q = REG_QTY (REGNO (addr));
-           struct qty_table_elem *addr_ent = &qty_table[addr_q];
-
-           if (GET_MODE (addr) == addr_ent->mode
-               && addr_ent->const_rtx != NULL_RTX)
-             addr = addr_ent->const_rtx;
-         }
-
-       /* If address is constant, split it into a base and integer offset.  */
-       if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
-         base = addr;
-       else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
-                && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
-         {
-           base = XEXP (XEXP (addr, 0), 0);
-           offset = INTVAL (XEXP (XEXP (addr, 0), 1));
-         }
-       else if (GET_CODE (addr) == LO_SUM
-                && GET_CODE (XEXP (addr, 1)) == SYMBOL_REF)
-         base = XEXP (addr, 1);
-
-       /* If this is a constant pool reference, we can fold it into its
-          constant to allow better value tracking.  */
-       if (base && GET_CODE (base) == SYMBOL_REF
-           && CONSTANT_POOL_ADDRESS_P (base))
-         {
-           rtx constant = get_pool_constant (base);
-           enum machine_mode const_mode = get_pool_mode (base);
-           rtx new;
-
-           if (CONSTANT_P (constant) && GET_CODE (constant) != CONST_INT)
-             {
-               constant_pool_entries_cost = COST (constant);
-               constant_pool_entries_regcost = approx_reg_cost (constant);
-             }
-
-           /* If we are loading the full constant, we have an equivalence.  */
-           if (offset == 0 && mode == const_mode)
-             return constant;
-
-           /* If this actually isn't a constant (weird!), we can't do
-              anything.  Otherwise, handle the two most common cases:
-              extracting a word from a multi-word constant, and extracting
-              the low-order bits.  Other cases don't seem common enough to
-              worry about.  */
-           if (! CONSTANT_P (constant))
-             return x;
-
-           if (GET_MODE_CLASS (mode) == MODE_INT
-               && GET_MODE_SIZE (mode) == UNITS_PER_WORD
-               && offset % UNITS_PER_WORD == 0
-               && (new = operand_subword (constant,
-                                          offset / UNITS_PER_WORD,
-                                          0, const_mode)) != 0)
-             return new;
-
-           if (((BYTES_BIG_ENDIAN
-                 && offset == GET_MODE_SIZE (GET_MODE (constant)) - 1)
-                || (! BYTES_BIG_ENDIAN && offset == 0))
-               && (new = gen_lowpart (mode, constant)) != 0)
-             return new;
-         }
-
-       /* If this is a reference to a label at a known position in a jump
-          table, we also know its value.  */
-       if (base && GET_CODE (base) == LABEL_REF)
-         {
-           rtx label = XEXP (base, 0);
-           rtx table_insn = NEXT_INSN (label);
-
-           if (table_insn && JUMP_P (table_insn)
-               && GET_CODE (PATTERN (table_insn)) == ADDR_VEC)
-             {
-               rtx table = PATTERN (table_insn);
-
-               if (offset >= 0
-                   && (offset / GET_MODE_SIZE (GET_MODE (table))
-                       < XVECLEN (table, 0)))
-                 return XVECEXP (table, 0,
-                                 offset / GET_MODE_SIZE (GET_MODE (table)));
-             }
-           if (table_insn && JUMP_P (table_insn)
-               && GET_CODE (PATTERN (table_insn)) == ADDR_DIFF_VEC)
-             {
-               rtx table = PATTERN (table_insn);
-
-               if (offset >= 0
-                   && (offset / GET_MODE_SIZE (GET_MODE (table))
-                       < XVECLEN (table, 1)))
-                 {
-                   offset /= GET_MODE_SIZE (GET_MODE (table));
-                   new = gen_rtx_MINUS (Pmode, XVECEXP (table, 1, offset),
-                                        XEXP (table, 0));
-
-                   if (GET_MODE (table) != Pmode)
-                     new = gen_rtx_TRUNCATE (GET_MODE (table), new);
-
-                   /* Indicate this is a constant.  This isn't a
-                      valid form of CONST, but it will only be used
-                      to fold the next insns and then discarded, so
-                      it should be safe.
-
-                      Note this expression must be explicitly discarded,
-                      by cse_insn, else it may end up in a REG_EQUAL note
-                      and "escape" to cause problems elsewhere.  */
-                   return gen_rtx_CONST (GET_MODE (new), new);
-                 }
-             }
-         }
-
-       return x;
-      }
+      return fold_rtx_mem (x, insn);
  
  #ifdef NO_FUNCTION_CSE
      case CALL:
@@ -3561,9 +3693,12 @@ fold_rtx (rtx x, rtx insn)
  #endif
  
      case ASM_OPERANDS:
-      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
-       validate_change (insn, &ASM_OPERANDS_INPUT (x, i),
-                        fold_rtx (ASM_OPERANDS_INPUT (x, i), insn), 0);
+      if (insn)
+       {
+         for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
+           validate_change (insn, &ASM_OPERANDS_INPUT (x, i),
+                            fold_rtx (ASM_OPERANDS_INPUT (x, i), insn), 0);
+       }
        break;
  
      default:
@@ -3689,7 +3824,7 @@ fold_rtx (rtx x, rtx insn)
  
             /* It's not safe to substitute the operand of a conversion
                operator with a constant, as the conversion's identity
-              depends upon the mode of it's operand.  This optimization
+              depends upon the mode of its operand.  This optimization
                is handled by the call to simplify_unary_operation.  */
             if (GET_RTX_CLASS (code) == RTX_UNARY
                 && GET_MODE (replacements[j]) != mode_arg0
@@ -3809,6 +3944,10 @@ fold_rtx (rtx x, rtx insn)
          constant, set CONST_ARG0 and CONST_ARG1 appropriately.  We needn't
          do anything if both operands are already known to be constant.  */
  
+      /* ??? Vector mode comparisons are not supported yet.  */
+      if (VECTOR_MODE_P (mode))
+       break;
+
        if (const_arg0 == 0 || const_arg1 == 0)
         {
           struct table_elt *p0, *p1;
@@ -3816,7 +3955,7 @@ fold_rtx (rtx x, rtx insn)
           enum machine_mode mode_arg1;
  
  #ifdef FLOAT_STORE_FLAG_VALUE
-         if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+         if (SCALAR_FLOAT_MODE_P (mode))
             {
               true_rtx = (CONST_DOUBLE_FROM_REAL_VALUE
                           (FLOAT_STORE_FLAG_VALUE (mode), mode));
@@ -3826,8 +3965,6 @@ fold_rtx (rtx x, rtx insn)
  
           code = find_comparison_args (code, &folded_arg0, &folded_arg1,
                                        &mode_arg0, &mode_arg1);
-         const_arg0 = equiv_constant (folded_arg0);
-         const_arg1 = equiv_constant (folded_arg1);
  
           /* If the mode is VOIDmode or a MODE_CC mode, we don't know
              what kinds of things are being compared, so we can't do
@@ -3836,11 +3973,65 @@ fold_rtx (rtx x, rtx insn)
           if (mode_arg0 == VOIDmode || GET_MODE_CLASS (mode_arg0) == MODE_CC)
             break;
  
+         const_arg0 = equiv_constant (folded_arg0);
+         const_arg1 = equiv_constant (folded_arg1);
+
           /* If we do not now have two constants being compared, see
              if we can nevertheless deduce some things about the
              comparison.  */
           if (const_arg0 == 0 || const_arg1 == 0)
             {
+             if (const_arg1 != NULL)
+               {
+                 rtx cheapest_simplification;
+                 int cheapest_cost;
+                 rtx simp_result;
+                 struct table_elt *p;
+
+                 /* See if we can find an equivalent of folded_arg0
+                    that gets us a cheaper expression, possibly a
+                    constant through simplifications.  */
+                 p = lookup (folded_arg0, SAFE_HASH (folded_arg0, mode_arg0),
+                             mode_arg0);
+                 
+                 if (p != NULL)
+                   {
+                     cheapest_simplification = x;
+                     cheapest_cost = COST (x);
+
+                     for (p = p->first_same_value; p != NULL; p = p->next_same_value)
+                       {
+                         int cost;
+
+                         /* If the entry isn't valid, skip it.  */
+                         if (! exp_equiv_p (p->exp, p->exp, 1, false))
+                           continue;
+
+                         /* Try to simplify using this equivalence.  */
+                         simp_result
+                           = simplify_relational_operation (code, mode,
+                                                            mode_arg0,
+                                                            p->exp,
+                                                            const_arg1);
+
+                         if (simp_result == NULL)
+                           continue;
+
+                         cost = COST (simp_result);
+                         if (cost < cheapest_cost)
+                           {
+                             cheapest_cost = cost;
+                             cheapest_simplification = simp_result;
+                           }
+                       }
+
+                     /* If we have a cheaper expression now, use that
+                        and try folding it further, from the top.  */
+                     if (cheapest_simplification != x)
+                       return fold_rtx (cheapest_simplification, insn);
+                   }
+               }
+
               /* Some addresses are known to be nonzero.  We don't know
                  their sign, but equality comparisons are known.  */
               if (const_arg1 == const0_rtx
@@ -3930,7 +4121,7 @@ fold_rtx (rtx x, rtx insn)
               rtx true_rtx = const_true_rtx, false_rtx = const0_rtx;
  
  #ifdef FLOAT_STORE_FLAG_VALUE
-             if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+             if (SCALAR_FLOAT_MODE_P (mode))
                 {
                   true_rtx = (CONST_DOUBLE_FROM_REAL_VALUE
                           (FLOAT_STORE_FLAG_VALUE (mode), mode));
@@ -4237,47 +4428,6 @@ equiv_constant (rtx x)
    return 0;
  }
  \f
-/* Assuming that X is an rtx (e.g., MEM, REG or SUBREG) for a fixed-point
-   number, return an rtx (MEM, SUBREG, or CONST_INT) that refers to the
-   least-significant part of X.
-   MODE specifies how big a part of X to return.
-
-   If the requested operation cannot be done, 0 is returned.
-
-   This is similar to gen_lowpart_general in emit-rtl.c.  */
-
-rtx
-gen_lowpart_if_possible (enum machine_mode mode, rtx x)
-{
-  rtx result = gen_lowpart_common (mode, x);
-
-  if (result)
-    return result;
-  else if (MEM_P (x))
-    {
-      /* This is the only other case we handle.  */
-      int offset = 0;
-      rtx new;
-
-      if (WORDS_BIG_ENDIAN)
-       offset = (MAX (GET_MODE_SIZE (GET_MODE (x)), UNITS_PER_WORD)
-                 - MAX (GET_MODE_SIZE (mode), UNITS_PER_WORD));
-      if (BYTES_BIG_ENDIAN)
-       /* Adjust the address so that the address-after-the-data is
-          unchanged.  */
-       offset -= (MIN (UNITS_PER_WORD, GET_MODE_SIZE (mode))
-                  - MIN (UNITS_PER_WORD, GET_MODE_SIZE (GET_MODE (x))));
-
-      new = adjust_address_nv (x, mode, offset);
-      if (! memory_address_p (mode, XEXP (new, 0)))
-       return 0;
-
-      return new;
-    }
-  else
-    return 0;
-}
-\f
  /* Given INSN, a jump insn, PATH_TAKEN indicates if we are following the "taken"
     branch.  It will be zero if not.
  
@@ -4335,6 +4485,18 @@ record_jump_equiv (rtx insn, int taken)
    record_jump_cond (code, mode, op0, op1, reversed_nonequality);
  }
  
+/* Yet another form of subreg creation.  In this case, we want something in
+   MODE, and we should assume OP has MODE iff it is naturally modeless.  */
+
+static rtx
+record_jump_cond_subreg (enum machine_mode mode, rtx op)
+{
+  enum machine_mode op_mode = GET_MODE (op);
+  if (op_mode == mode || op_mode == VOIDmode)
+    return op;
+  return lowpart_subreg (mode, op, op_mode);
+}
+
  /* We know that comparison CODE applied to OP0 and OP1 in MODE is true.
     REVERSED_NONEQUALITY is nonzero if CODE had to be swapped.
     Make any useful entries we can with that information.  Called from
@@ -4359,11 +4521,10 @@ record_jump_cond (enum rtx_code code, enum machine_mode mode, rtx op0,
           > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
      {
        enum machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
-      rtx tem = gen_lowpart (inner_mode, op1);
-
-      record_jump_cond (code, mode, SUBREG_REG (op0),
-                       tem ? tem : gen_rtx_SUBREG (inner_mode, op1, 0),
-                       reversed_nonequality);
+      rtx tem = record_jump_cond_subreg (inner_mode, op1);
+      if (tem)
+       record_jump_cond (code, mode, SUBREG_REG (op0), tem,
+                         reversed_nonequality);
      }
  
    if (code == EQ && GET_CODE (op1) == SUBREG
@@ -4371,11 +4532,10 @@ record_jump_cond (enum rtx_code code, enum machine_mode mode, rtx op0,
           > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1)))))
      {
        enum machine_mode inner_mode = GET_MODE (SUBREG_REG (op1));
-      rtx tem = gen_lowpart (inner_mode, op0);
-
-      record_jump_cond (code, mode, SUBREG_REG (op1),
-                       tem ? tem : gen_rtx_SUBREG (inner_mode, op0, 0),
-                       reversed_nonequality);
+      rtx tem = record_jump_cond_subreg (inner_mode, op0);
+      if (tem)
+       record_jump_cond (code, mode, SUBREG_REG (op1), tem,
+                         reversed_nonequality);
      }
  
    /* Similarly, if this is an NE comparison, and either is a SUBREG
@@ -4391,11 +4551,10 @@ record_jump_cond (enum rtx_code code, enum machine_mode mode, rtx op0,
           < GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
      {
        enum machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
-      rtx tem = gen_lowpart (inner_mode, op1);
-
-      record_jump_cond (code, mode, SUBREG_REG (op0),
-                       tem ? tem : gen_rtx_SUBREG (inner_mode, op1, 0),
-                       reversed_nonequality);
+      rtx tem = record_jump_cond_subreg (inner_mode, op1);
+      if (tem)
+       record_jump_cond (code, mode, SUBREG_REG (op0), tem,
+                         reversed_nonequality);
      }
  
    if (code == NE && GET_CODE (op1) == SUBREG
@@ -4404,11 +4563,10 @@ record_jump_cond (enum rtx_code code, enum machine_mode mode, rtx op0,
           < GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1)))))
      {
        enum machine_mode inner_mode = GET_MODE (SUBREG_REG (op1));
-      rtx tem = gen_lowpart (inner_mode, op0);
-
-      record_jump_cond (code, mode, SUBREG_REG (op1),
-                       tem ? tem : gen_rtx_SUBREG (inner_mode, op0, 0),
-                       reversed_nonequality);
+      rtx tem = record_jump_cond_subreg (inner_mode, op0);
+      if (tem)
+       record_jump_cond (code, mode, SUBREG_REG (op1), tem,
+                         reversed_nonequality);
      }
  
    /* Hash both operands.  */
@@ -4797,7 +4955,7 @@ cse_insn (rtx insn, rtx libcall_insn)
        else
         SET_SRC (sets[i].rtl) = new;
  
-      if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
+      if (GET_CODE (dest) == ZERO_EXTRACT)
         {
           validate_change (insn, &XEXP (dest, 1),
                            canon_reg (XEXP (dest, 1), insn), 1);
@@ -4805,9 +4963,9 @@ cse_insn (rtx insn, rtx libcall_insn)
                            canon_reg (XEXP (dest, 2), insn), 1);
         }
  
-      while (GET_CODE (dest) == SUBREG || GET_CODE (dest) == STRICT_LOW_PART
+      while (GET_CODE (dest) == SUBREG
              || GET_CODE (dest) == ZERO_EXTRACT
-            || GET_CODE (dest) == SIGN_EXTRACT)
+            || GET_CODE (dest) == STRICT_LOW_PART)
         dest = XEXP (dest, 0);
  
        if (MEM_P (dest))
@@ -4904,8 +5062,7 @@ cse_insn (rtx insn, rtx libcall_insn)
          causes later instructions to be mis-optimized.  */
        /* If storing a constant in a bitfield, pre-truncate the constant
          so we will be able to record it later.  */
-      if (GET_CODE (SET_DEST (sets[i].rtl)) == ZERO_EXTRACT
-         || GET_CODE (SET_DEST (sets[i].rtl)) == SIGN_EXTRACT)
+      if (GET_CODE (SET_DEST (sets[i].rtl)) == ZERO_EXTRACT)
         {
           rtx width = XEXP (SET_DEST (sets[i].rtl), 1);
  
@@ -5154,10 +5311,13 @@ cse_insn (rtx insn, rtx libcall_insn)
           && MEM_P (src) && ! do_not_record
           && LOAD_EXTEND_OP (mode) != UNKNOWN)
         {
+         struct rtx_def memory_extend_buf;
+         rtx memory_extend_rtx = &memory_extend_buf;
           enum machine_mode tmode;
  
           /* Set what we are trying to extend and the operation it might
              have been extended with.  */
+         memset (memory_extend_rtx, 0, sizeof(*memory_extend_rtx));
           PUT_CODE (memory_extend_rtx, LOAD_EXTEND_OP (mode));
           XEXP (memory_extend_rtx, 0) = src;
  
@@ -5406,6 +5566,22 @@ cse_insn (rtx insn, rtx libcall_insn)
               break;
             }
  
+         /* Reject certain invalid forms of CONST that we create.  */
+         else if (CONSTANT_P (trial)
+                  && GET_CODE (trial) == CONST
+                  /* Reject cases that will cause decode_rtx_const to
+                     die.  On the alpha when simplifying a switch, we
+                     get (const (truncate (minus (label_ref)
+                     (label_ref)))).  */
+                  && (GET_CODE (XEXP (trial, 0)) == TRUNCATE
+                      /* Likewise on IA-64, except without the
+                         truncate.  */
+                      || (GET_CODE (XEXP (trial, 0)) == MINUS
+                          && GET_CODE (XEXP (XEXP (trial, 0), 0)) == LABEL_REF
+                          && GET_CODE (XEXP (XEXP (trial, 0), 1)) == LABEL_REF)))
+           /* Do nothing for this case.  */
+           ;
+
           /* Look for a substitution that makes a valid insn.  */
           else if (validate_change (insn, &SET_SRC (sets[i].rtl), trial, 0))
             {
@@ -5441,16 +5617,6 @@ cse_insn (rtx insn, rtx libcall_insn)
  
           else if (constant_pool_entries_cost
                    && CONSTANT_P (trial)
-                  /* Reject cases that will abort in decode_rtx_const.
-                     On the alpha when simplifying a switch, we get
-                     (const (truncate (minus (label_ref) (label_ref)))).  */
-                  && ! (GET_CODE (trial) == CONST
-                        && GET_CODE (XEXP (trial, 0)) == TRUNCATE)
-                  /* Likewise on IA-64, except without the truncate.  */
-                  && ! (GET_CODE (trial) == CONST
-                        && GET_CODE (XEXP (trial, 0)) == MINUS
-                        && GET_CODE (XEXP (XEXP (trial, 0), 0)) == LABEL_REF
-                        && GET_CODE (XEXP (XEXP (trial, 0), 1)) == LABEL_REF)
                    && (src_folded == 0
                        || (!MEM_P (src_folded)
                            && ! src_folded_force_flag))
@@ -5558,11 +5724,9 @@ cse_insn (rtx insn, rtx libcall_insn)
        /* Now deal with the destination.  */
        do_not_record = 0;
  
-      /* Look within any SIGN_EXTRACT or ZERO_EXTRACT
-        to the MEM or REG within it.  */
-      while (GET_CODE (dest) == SIGN_EXTRACT
+      /* Look within any ZERO_EXTRACT to the MEM or REG within it.  */
+      while (GET_CODE (dest) == SUBREG
              || GET_CODE (dest) == ZERO_EXTRACT
-            || GET_CODE (dest) == SUBREG
              || GET_CODE (dest) == STRICT_LOW_PART)
         dest = XEXP (dest, 0);
  
@@ -5590,8 +5754,7 @@ cse_insn (rtx insn, rtx libcall_insn)
          because the value in it after the store
          may not equal what was stored, due to truncation.  */
  
-      if (GET_CODE (SET_DEST (sets[i].rtl)) == ZERO_EXTRACT
-         || GET_CODE (SET_DEST (sets[i].rtl)) == SIGN_EXTRACT)
+      if (GET_CODE (SET_DEST (sets[i].rtl)) == ZERO_EXTRACT)
         {
           rtx width = XEXP (SET_DEST (sets[i].rtl), 1);
  
@@ -5686,12 +5849,7 @@ cse_insn (rtx insn, rtx libcall_insn)
           if (REG_P (dest) || GET_CODE (dest) == SUBREG)
             invalidate (dest, VOIDmode);
           else if (MEM_P (dest))
-           {
-             /* Outgoing arguments for a libcall don't
-                affect any recorded expressions.  */
-             if (! libcall_insn || insn == libcall_insn)
-               invalidate (dest, VOIDmode);
-           }
+           invalidate (dest, VOIDmode);
           else if (GET_CODE (dest) == STRICT_LOW_PART
                    || GET_CODE (dest) == ZERO_EXTRACT)
             invalidate (XEXP (dest, 0), GET_MODE (dest));
@@ -5859,12 +6017,7 @@ cse_insn (rtx insn, rtx libcall_insn)
         if (REG_P (dest) || GET_CODE (dest) == SUBREG)
           invalidate (dest, VOIDmode);
         else if (MEM_P (dest))
-         {
-           /* Outgoing arguments for a libcall don't
-              affect any recorded expressions.  */
-           if (! libcall_insn || insn == libcall_insn)
-             invalidate (dest, VOIDmode);
-         }
+         invalidate (dest, VOIDmode);
         else if (GET_CODE (dest) == STRICT_LOW_PART
                  || GET_CODE (dest) == ZERO_EXTRACT)
           invalidate (XEXP (dest, 0), GET_MODE (dest));
@@ -6650,14 +6803,15 @@ cse_end_of_basic_block (rtx insn, struct cse_basic_block_data *data,
     in conditional jump instructions.  */
  
  int
-cse_main (rtx f, int nregs, FILE *file)
+cse_main (rtx f, int nregs)
  {
    struct cse_basic_block_data val;
    rtx insn = f;
    int i;
  
-  val.path = xmalloc (sizeof (struct branch_path)
-                     * PARAM_VALUE (PARAM_MAX_CSE_PATH_LENGTH));
+  init_cse_reg_info (nregs);
+
+  val.path = XNEWVEC (struct branch_path, PARAM_VALUE (PARAM_MAX_CSE_PATH_LENGTH));
  
    cse_jumps_altered = 0;
    recorded_label_ref = 0;
@@ -6669,27 +6823,12 @@ cse_main (rtx f, int nregs, FILE *file)
    init_recog ();
    init_alias_analysis ();
  
-  max_reg = nregs;
-
-  max_insn_uid = get_max_uid ();
-
-  reg_eqv_table = xmalloc (nregs * sizeof (struct reg_eqv_elem));
-
-#ifdef LOAD_EXTEND_OP
-
-  /* Allocate scratch rtl here.  cse_insn will fill in the memory reference
-     and change the code and mode as appropriate.  */
-  memory_extend_rtx = gen_rtx_ZERO_EXTEND (VOIDmode, NULL_RTX);
-#endif
-
-  /* Reset the counter indicating how many elements have been made
-     thus far.  */
-  n_elements_made = 0;
+  reg_eqv_table = XNEWVEC (struct reg_eqv_elem, nregs);
  
    /* Find the largest uid.  */
  
    max_uid = get_max_uid ();
-  uid_cuid = xcalloc (max_uid + 1, sizeof (int));
+  uid_cuid = XCNEWVEC (int, max_uid + 1);
  
    /* Compute the mapping from uids to cuids.
       CUIDs are numbers assigned to insns, like uids,
@@ -6730,8 +6869,8 @@ cse_main (rtx f, int nregs, FILE *file)
        cse_basic_block_end = val.high_cuid;
        max_qty = val.nsets * 2;
  
-      if (file)
-       fnotice (file, ";; Processing block from %d to %d, %d sets.\n",
+      if (dump_file)
+       fprintf (dump_file, ";; Processing block from %d to %d, %d sets.\n",
                  INSN_UID (insn), val.last ? INSN_UID (val.last) : 0,
                  val.nsets);
  
@@ -6770,9 +6909,6 @@ cse_main (rtx f, int nregs, FILE *file)
  #endif
      }
  
-  if (max_elements_made < n_elements_made)
-    max_elements_made = n_elements_made;
-
    /* Clean up.  */
    end_alias_analysis ();
    free (uid_cuid);
@@ -6785,11 +6921,7 @@ cse_main (rtx f, int nregs, FILE *file)
  
  /* Process a single basic block.  FROM and TO and the limits of the basic
     block.  NEXT_BRANCH points to the branch path when following jumps or
-   a null path when not following jumps.
-
-   AROUND_LOOP is nonzero if we are to try to cse around to the start of a
-   loop.  This is true when we are being called for the last time on a
-   block and this CSE pass is before loop.c.  */
+   a null path when not following jumps.  */
  
  static rtx
  cse_basic_block (rtx from, rtx to, struct branch_path *next_branch)
@@ -6801,7 +6933,7 @@ cse_basic_block (rtx from, rtx to, struct branch_path *next_branch)
    int no_conflict = 0;
  
    /* Allocate the space needed by qty_table.  */
-  qty_table = xmalloc (max_qty * sizeof (struct qty_table_elem));
+  qty_table = XNEWVEC (struct qty_table_elem, max_qty);
  
    new_basic_block ();
  
@@ -6822,7 +6954,7 @@ cse_basic_block (rtx from, rtx to, struct branch_path *next_branch)
  
          ??? This is a real kludge and needs to be done some other way.
          Perhaps for 2.9.  */
-      if (code != NOTE && num_insns++ > 1000)
+      if (code != NOTE && num_insns++ > PARAM_VALUE (PARAM_MAX_CSE_INSNS))
         {
           flush_hash_table ();
           num_insns = 0;
@@ -6964,8 +7096,7 @@ cse_basic_block (rtx from, rtx to, struct branch_path *next_branch)
              following branches in this case.  */
           to_usage = 0;
           val.path_size = 0;
-         val.path = xmalloc (sizeof (struct branch_path)
-                             * PARAM_VALUE (PARAM_MAX_CSE_PATH_LENGTH));
+         val.path = XNEWVEC (struct branch_path, PARAM_VALUE (PARAM_MAX_CSE_PATH_LENGTH));
           cse_end_of_basic_block (insn, &val, 0, 0);
           free (val.path);
  
@@ -7017,10 +7148,16 @@ check_for_label_ref (rtx *rtl, void *data)
  \f
  /* Count the number of times registers are used (not set) in X.
     COUNTS is an array in which we accumulate the count, INCR is how much
-   we count each register usage.  */
+   we count each register usage.
+
+   Don't count a usage of DEST, which is the SET_DEST of a SET which
+   contains X in its SET_SRC.  This is because such a SET does not
+   modify the liveness of DEST.
+   DEST is set to pc_rtx for a trapping insn, which means that we must count
+   uses of a SET_DEST regardless because the insn can't be deleted here.  */
  
  static void
-count_reg_usage (rtx x, int *counts, int incr)
+count_reg_usage (rtx x, int *counts, rtx dest, int incr)
  {
    enum rtx_code code;
    rtx note;
@@ -7033,7 +7170,8 @@ count_reg_usage (rtx x, int *counts, int incr)
    switch (code = GET_CODE (x))
      {
      case REG:
-      counts[REGNO (x)] += incr;
+      if (x != dest)
+       counts[REGNO (x)] += incr;
        return;
  
      case PC:
@@ -7050,23 +7188,28 @@ count_reg_usage (rtx x, int *counts, int incr)
        /* If we are clobbering a MEM, mark any registers inside the address
           as being used.  */
        if (MEM_P (XEXP (x, 0)))
-       count_reg_usage (XEXP (XEXP (x, 0), 0), counts, incr);
+       count_reg_usage (XEXP (XEXP (x, 0), 0), counts, NULL_RTX, incr);
        return;
  
      case SET:
        /* Unless we are setting a REG, count everything in SET_DEST.  */
        if (!REG_P (SET_DEST (x)))
-       count_reg_usage (SET_DEST (x), counts, incr);
-      count_reg_usage (SET_SRC (x), counts, incr);
+       count_reg_usage (SET_DEST (x), counts, NULL_RTX, incr);
+      count_reg_usage (SET_SRC (x), counts,
+                      dest ? dest : SET_DEST (x),
+                      incr);
        return;
  
      case CALL_INSN:
-      count_reg_usage (CALL_INSN_FUNCTION_USAGE (x), counts, incr);
-      /* Fall through.  */
-
      case INSN:
      case JUMP_INSN:
-      count_reg_usage (PATTERN (x), counts, incr);
+    /* We expect dest to be NULL_RTX here.  If the insn may trap, mark
+       this fact by setting DEST to pc_rtx.  */
+      if (flag_non_call_exceptions && may_trap_p (PATTERN (x)))
+       dest = pc_rtx;
+      if (code == CALL_INSN)
+       count_reg_usage (CALL_INSN_FUNCTION_USAGE (x), counts, dest, incr);
+      count_reg_usage (PATTERN (x), counts, dest, incr);
  
        /* Things used in a REG_EQUAL note aren't dead since loop may try to
          use them.  */
@@ -7081,12 +7224,12 @@ count_reg_usage (rtx x, int *counts, int incr)
              Process all the arguments.  */
             do
               {
-               count_reg_usage (XEXP (eqv, 0), counts, incr);
+               count_reg_usage (XEXP (eqv, 0), counts, dest, incr);
                 eqv = XEXP (eqv, 1);
               }
             while (eqv && GET_CODE (eqv) == EXPR_LIST);
           else
-           count_reg_usage (eqv, counts, incr);
+           count_reg_usage (eqv, counts, dest, incr);
         }
        return;
  
@@ -7096,15 +7239,19 @@ count_reg_usage (rtx x, int *counts, int incr)
           /* FUNCTION_USAGE expression lists may include (CLOBBER (mem /u)),
              involving registers in the address.  */
           || GET_CODE (XEXP (x, 0)) == CLOBBER)
-       count_reg_usage (XEXP (x, 0), counts, incr);
+       count_reg_usage (XEXP (x, 0), counts, NULL_RTX, incr);
  
-      count_reg_usage (XEXP (x, 1), counts, incr);
+      count_reg_usage (XEXP (x, 1), counts, NULL_RTX, incr);
        return;
  
      case ASM_OPERANDS:
+      /* If the asm is volatile, then this insn cannot be deleted,
+        and so the inputs *must* be live.  */
+      if (MEM_VOLATILE_P (x))
+       dest = NULL_RTX;
        /* Iterate over just the inputs, not the constraints as well.  */
        for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
-       count_reg_usage (ASM_OPERANDS_INPUT (x, i), counts, incr);
+       count_reg_usage (ASM_OPERANDS_INPUT (x, i), counts, dest, incr);
        return;
  
      case INSN_LIST:
@@ -7118,10 +7265,10 @@ count_reg_usage (rtx x, int *counts, int incr)
    for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
      {
        if (fmt[i] == 'e')
-       count_reg_usage (XEXP (x, i), counts, incr);
+       count_reg_usage (XEXP (x, i), counts, dest, incr);
        else if (fmt[i] == 'E')
         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
-         count_reg_usage (XVECEXP (x, i, j), counts, incr);
+         count_reg_usage (XVECEXP (x, i, j), counts, dest, incr);
      }
  }
  \f
@@ -7208,11 +7355,11 @@ dead_libcall_p (rtx insn, int *counts)
      new = XEXP (note, 0);
  
    /* While changing insn, we must update the counts accordingly.  */
-  count_reg_usage (insn, counts, -1);
+  count_reg_usage (insn, counts, NULL_RTX, -1);
  
    if (validate_change (insn, &SET_SRC (set), new, 0))
      {
-      count_reg_usage (insn, counts, 1);
+      count_reg_usage (insn, counts, NULL_RTX, 1);
        remove_note (insn, find_reg_note (insn, REG_RETVAL, NULL_RTX));
        remove_note (insn, note);
        return true;
@@ -7223,14 +7370,14 @@ dead_libcall_p (rtx insn, int *counts)
        new = force_const_mem (GET_MODE (SET_DEST (set)), new);
        if (new && validate_change (insn, &SET_SRC (set), new, 0))
         {
-         count_reg_usage (insn, counts, 1);
+         count_reg_usage (insn, counts, NULL_RTX, 1);
           remove_note (insn, find_reg_note (insn, REG_RETVAL, NULL_RTX));
           remove_note (insn, note);
           return true;
         }
      }
  
-  count_reg_usage (insn, counts, 1);
+  count_reg_usage (insn, counts, NULL_RTX, 1);
    return false;
  }
  
@@ -7248,73 +7395,66 @@ delete_trivially_dead_insns (rtx insns, int nreg)
    int *counts;
    rtx insn, prev;
    int in_libcall = 0, dead_libcall = 0;
-  int ndead = 0, nlastdead, niterations = 0;
+  int ndead = 0;
  
    timevar_push (TV_DELETE_TRIVIALLY_DEAD);
    /* First count the number of times each register is used.  */
-  counts = xcalloc (nreg, sizeof (int));
-  for (insn = next_real_insn (insns); insn; insn = next_real_insn (insn))
-    count_reg_usage (insn, counts, 1);
-
-  do
+  counts = XCNEWVEC (int, nreg);
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      count_reg_usage (insn, counts, NULL_RTX, 1);
+
+  /* Go from the last insn to the first and delete insns that only set unused
+     registers or copy a register to itself.  As we delete an insn, remove
+     usage counts for registers it uses.
+
+     The first jump optimization pass may leave a real insn as the last
+     insn in the function.   We must not skip that insn or we may end
+     up deleting code that is not really dead.  */
+  for (insn = get_last_insn (); insn; insn = prev)
      {
-      nlastdead = ndead;
-      niterations++;
-      /* Go from the last insn to the first and delete insns that only set unused
-        registers or copy a register to itself.  As we delete an insn, remove
-        usage counts for registers it uses.
-
-        The first jump optimization pass may leave a real insn as the last
-        insn in the function.   We must not skip that insn or we may end
-        up deleting code that is not really dead.  */
-      insn = get_last_insn ();
-      if (! INSN_P (insn))
-       insn = prev_real_insn (insn);
+      int live_insn = 0;
  
-      for (; insn; insn = prev)
-       {
-         int live_insn = 0;
-
-         prev = prev_real_insn (insn);
+      prev = PREV_INSN (insn);
+      if (!INSN_P (insn))
+       continue;
  
-         /* Don't delete any insns that are part of a libcall block unless
-            we can delete the whole libcall block.
+      /* Don't delete any insns that are part of a libcall block unless
+        we can delete the whole libcall block.
  
-            Flow or loop might get confused if we did that.  Remember
-            that we are scanning backwards.  */
-         if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
-           {
-             in_libcall = 1;
-             live_insn = 1;
-             dead_libcall = dead_libcall_p (insn, counts);
-           }
-         else if (in_libcall)
-           live_insn = ! dead_libcall;
-         else
-           live_insn = insn_live_p (insn, counts);
+        Flow or loop might get confused if we did that.  Remember
+        that we are scanning backwards.  */
+      if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
+       {
+         in_libcall = 1;
+         live_insn = 1;
+         dead_libcall = dead_libcall_p (insn, counts);
+       }
+      else if (in_libcall)
+       live_insn = ! dead_libcall;
+      else
+       live_insn = insn_live_p (insn, counts);
  
-         /* If this is a dead insn, delete it and show registers in it aren't
-            being used.  */
+      /* If this is a dead insn, delete it and show registers in it aren't
+        being used.  */
  
-         if (! live_insn)
-           {
-             count_reg_usage (insn, counts, -1);
-             delete_insn_and_edges (insn);
-             ndead++;
-           }
+      if (! live_insn)
+       {
+         count_reg_usage (insn, counts, NULL_RTX, -1);
+         delete_insn_and_edges (insn);
+         ndead++;
+       }
  
-         if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
-           {
-             in_libcall = 0;
-             dead_libcall = 0;
-           }
+      if (in_libcall && find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+       {
+         in_libcall = 0;
+         dead_libcall = 0;
         }
      }
-  while (ndead != nlastdead);
  
    if (dump_file && ndead)
-    fprintf (dump_file, "Deleted %i trivially dead insns; %i iterations\n",
-            ndead, niterations);
+    fprintf (dump_file, "Deleted %i trivially dead insns\n",
+            ndead);
    /* Clean up.  */
    free (counts);
    timevar_pop (TV_DELETE_TRIVIALLY_DEAD);
@@ -7329,20 +7469,47 @@ delete_trivially_dead_insns (rtx insns, int nreg)
  static int
  cse_change_cc_mode (rtx *loc, void *data)
  {
-  rtx newreg = (rtx) data;
+  struct change_cc_mode_args* args = (struct change_cc_mode_args*)data;
  
    if (*loc
        && REG_P (*loc)
-      && REGNO (*loc) == REGNO (newreg)
-      && GET_MODE (*loc) != GET_MODE (newreg))
+      && REGNO (*loc) == REGNO (args->newreg)
+      && GET_MODE (*loc) != GET_MODE (args->newreg))
      {
-      *loc = newreg;
+      validate_change (args->insn, loc, args->newreg, 1);
+      
        return -1;
      }
    return 0;
  }
  
  /* Change the mode of any reference to the register REGNO (NEWREG) to
+   GET_MODE (NEWREG) in INSN.  */
+
+static void
+cse_change_cc_mode_insn (rtx insn, rtx newreg)
+{
+  struct change_cc_mode_args args;
+  int success;
+
+  if (!INSN_P (insn))
+    return;
+
+  args.insn = insn;
+  args.newreg = newreg;
+  
+  for_each_rtx (&PATTERN (insn), cse_change_cc_mode, &args);
+  for_each_rtx (&REG_NOTES (insn), cse_change_cc_mode, &args);
+  
+  /* If the following assertion was triggered, there is most probably
+     something wrong with the cc_modes_compatible back end function.
+     CC modes only can be considered compatible if the insn - with the mode
+     replaced by any of the compatible modes - can still be recognized.  */
+  success = apply_change_group ();
+  gcc_assert (success);
+}
+
+/* Change the mode of any reference to the register REGNO (NEWREG) to
     GET_MODE (NEWREG), starting at START.  Stop before END.  Stop at
     any instruction which modifies NEWREG.  */
  
@@ -7359,8 +7526,7 @@ cse_change_cc_mode_insns (rtx start, rtx end, rtx newreg)
        if (reg_set_p (newreg, insn))
         return;
  
-      for_each_rtx (&PATTERN (insn), cse_change_cc_mode, newreg);
-      for_each_rtx (&REG_NOTES (insn), cse_change_cc_mode, newreg);
+      cse_change_cc_mode_insn (insn, newreg);
      }
  }
  
@@ -7469,6 +7635,8 @@ cse_cc_succs (basic_block bb, rtx cc_reg, rtx cc_src, bool can_change_mode)
                         {
                           gcc_assert (can_change_mode);
                           mode = comp_mode;
+
+                         /* The modified insn will be re-recognized later.  */
                           PUT_MODE (cc_src, mode);
                         }
                     }
@@ -7556,7 +7724,7 @@ cse_cc_succs (basic_block bb, rtx cc_reg, rtx cc_src, bool can_change_mode)
  /* If we have a fixed condition code register (or two), walk through
     the instructions and try to eliminate duplicate assignments.  */
  
-void
+static void
  cse_condition_code_reg (void)
  {
    unsigned int cc_regno_1;
@@ -7648,12 +7816,7 @@ cse_condition_code_reg (void)
             {
               rtx newreg = gen_rtx_REG (mode, REGNO (cc_reg));
  
-             /* Change the mode of CC_REG in CC_SRC_INSN to
-                GET_MODE (NEWREG).  */
-             for_each_rtx (&PATTERN (cc_src_insn), cse_change_cc_mode,
-                           newreg);
-             for_each_rtx (&REG_NOTES (cc_src_insn), cse_change_cc_mode,
-                           newreg);
+             cse_change_cc_mode_insn (cc_src_insn, newreg);
  
               /* Do the same in the following insns that use the
                  current value of CC_REG within BB.  */
@@ -7664,3 +7827,119 @@ cse_condition_code_reg (void)
         }
      }
  }
+\f
+
+/* Perform common subexpression elimination.  Nonzero value from
+   `cse_main' means that jumps were simplified and some code may now
+   be unreachable, so do jump optimization again.  */
+static bool
+gate_handle_cse (void)
+{
+  return optimize > 0;
+}
+
+static void
+rest_of_handle_cse (void)
+{
+  int tem;
+
+  if (dump_file)
+    dump_flow_info (dump_file, dump_flags);
+
+  reg_scan (get_insns (), max_reg_num ());
+
+  tem = cse_main (get_insns (), max_reg_num ());
+  if (tem)
+    rebuild_jump_labels (get_insns ());
+  if (purge_all_dead_edges ())
+    delete_unreachable_blocks ();
+
+  delete_trivially_dead_insns (get_insns (), max_reg_num ());
+
+  /* If we are not running more CSE passes, then we are no longer
+     expecting CSE to be run.  But always rerun it in a cheap mode.  */
+  cse_not_expected = !flag_rerun_cse_after_loop && !flag_gcse;
+
+  if (tem)
+    delete_dead_jumptables ();
+
+  if (tem || optimize > 1)
+    cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
+}
+
+struct tree_opt_pass pass_cse =
+{
+  "cse1",                               /* name */
+  gate_handle_cse,                      /* gate */   
+  rest_of_handle_cse,                  /* execute */       
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_CSE,                               /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  's'                                   /* letter */
+};
+
+
+static bool
+gate_handle_cse2 (void)
+{
+  return optimize > 0 && flag_rerun_cse_after_loop;
+}
+
+/* Run second CSE pass after loop optimizations.  */
+static void
+rest_of_handle_cse2 (void)
+{
+  int tem;
+
+  if (dump_file)
+    dump_flow_info (dump_file, dump_flags);
+
+  tem = cse_main (get_insns (), max_reg_num ());
+
+  /* Run a pass to eliminate duplicated assignments to condition code
+     registers.  We have to run this after bypass_jumps, because it
+     makes it harder for that pass to determine whether a jump can be
+     bypassed safely.  */
+  cse_condition_code_reg ();
+
+  purge_all_dead_edges ();
+  delete_trivially_dead_insns (get_insns (), max_reg_num ());
+
+  if (tem)
+    {
+      timevar_push (TV_JUMP);
+      rebuild_jump_labels (get_insns ());
+      delete_dead_jumptables ();
+      cleanup_cfg (CLEANUP_EXPENSIVE);
+      timevar_pop (TV_JUMP);
+    }
+  reg_scan (get_insns (), max_reg_num ());
+  cse_not_expected = 1;
+}
+
+
+struct tree_opt_pass pass_cse2 =
+{
+  "cse2",                               /* name */
+  gate_handle_cse2,                     /* gate */   
+  rest_of_handle_cse2,                 /* execute */       
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_CSE2,                              /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  't'                                   /* letter */
+};
+