2004-04-05 Caroline Tice <ctice@apple.com>

[pf3gnuchains/gcc-fork.git] / gcc / gcse.c
diff --git a/gcc/gcse.c b/gcc/gcse.c

index 3b57aa1..88f94f6 100644 (file)
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -1,6 +1,6 @@
  /* Global common subexpression elimination/Partial redundancy elimination
     and global constant/copy propagation for GNU compiler.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
     Free Software Foundation, Inc.
  
  This file is part of GCC.
@@ -150,6 +150,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "toplev.h"
  
  #include "rtl.h"
+#include "tree.h"
  #include "tm_p.h"
  #include "regs.h"
  #include "hard-reg-set.h"
@@ -165,7 +166,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "ggc.h"
  #include "params.h"
  #include "cselib.h"
-
+#include "intl.h"
  #include "obstack.h"
  
  /* Propagate flow information through back edges and thus enable PRE's
@@ -468,7 +469,7 @@ struct ls_expr
    struct ls_expr * next;       /* Next in the list.  */
    int invalid;                 /* Invalid for some reason.  */
    int index;                   /* If it maps to a bitmap index.  */
-  int hash_index;              /* Index when in a hash table.  */
+  unsigned int hash_index;     /* Index when in a hash table.  */
    rtx reaching_reg;            /* Register to use when re-writing.  */
  };
  
@@ -549,8 +550,9 @@ struct null_pointer_info
  };
  \f
  static void compute_can_copy (void);
-static void *gmalloc (unsigned int);
-static void *grealloc (void *, unsigned int);
+static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
+static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
+static void *grealloc (void *, size_t);
  static void *gcse_alloc (unsigned long);
  static void alloc_gcse_mem (rtx);
  static void free_gcse_mem (void);
@@ -558,6 +560,7 @@ static void alloc_reg_set_mem (int);
  static void free_reg_set_mem (void);
  static int get_bitmap_width (int, int, int);
  static void record_one_set (int, rtx);
+static void replace_one_set (int, rtx, rtx);
  static void record_set_info (rtx, rtx, void *);
  static void compute_sets (rtx);
  static void hash_scan_insn (rtx, struct hash_table *, int);
@@ -565,6 +568,7 @@ static void hash_scan_set (rtx, rtx, struct hash_table *);
  static void hash_scan_clobber (rtx, rtx, struct hash_table *);
  static void hash_scan_call (rtx, rtx, struct hash_table *);
  static int want_to_gcse_p (rtx);
+static bool can_assign_to_reg_p (rtx);
  static bool gcse_constant_p (rtx);
  static int oprs_unchanged_p (rtx, rtx, int);
  static int oprs_anticipatable_p (rtx, rtx);
@@ -677,6 +681,7 @@ static void compute_ld_motion_mems (void);
  static void trim_ld_motion_mems (void);
  static void update_ld_motion_stores (struct expr *);
  static void reg_set_info (rtx, rtx, void *);
+static void reg_clear_last_set (rtx, rtx, void *);
  static bool store_ops_ok (rtx, int *);
  static rtx extract_mentioned_regs (rtx);
  static rtx extract_mentioned_regs_helper (rtx, rtx);
@@ -690,7 +695,8 @@ static bool store_killed_before (rtx, rtx, rtx, basic_block, int *);
  static void build_store_vectors (void);
  static void insert_insn_start_bb (rtx, basic_block);
  static int insert_store (struct ls_expr *, edge);
-static void replace_store_insn (rtx, rtx, basic_block);
+static void remove_reachable_equiv_notes (basic_block, struct ls_expr *);
+static void replace_store_insn (rtx, rtx, basic_block, struct ls_expr *);
  static void delete_store (struct ls_expr *, basic_block);
  static void free_store_memory (void);
  static void store_motion (void);
@@ -702,7 +708,9 @@ static void local_cprop_find_used_regs (rtx *, void *);
  static bool do_local_cprop (rtx, rtx, int, rtx*);
  static bool adjust_libcall_notes (rtx, rtx, rtx, rtx*);
  static void local_cprop_pass (int);
+static bool is_too_expensive (const char *);
  \f
+
  /* Entry point for global common subexpression elimination.
     F is the first instruction in the function.  */
  
@@ -736,39 +744,10 @@ gcse_main (rtx f, FILE *file)
    if (file)
      dump_flow_info (file);
  
-  /* Return if there's nothing to do.  */
-  if (n_basic_blocks <= 1)
+  /* Return if there's nothing to do, or it is too expensive.  */
+  if (n_basic_blocks <= 1 || is_too_expensive (_("GCSE disabled")))
      return 0;
-
-  /* Trying to perform global optimizations on flow graphs which have
-     a high connectivity will take a long time and is unlikely to be
-     particularly useful.
-
-     In normal circumstances a cfg should have about twice as many edges
-     as blocks.  But we do not want to punish small functions which have
-     a couple switch statements.  So we require a relatively large number
-     of basic blocks and the ratio of edges to blocks to be high.  */
-  if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
-    {
-      if (warn_disabled_optimization)
-       warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
-                n_basic_blocks, n_edges / n_basic_blocks);
-      return 0;
-    }
-
-  /* If allocating memory for the cprop bitmap would take up too much
-     storage it's better just to disable the optimization.  */
-  if ((n_basic_blocks
-       * SBITMAP_SET_SIZE (max_gcse_regno)
-       * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
-    {
-      if (warn_disabled_optimization)
-       warning ("GCSE disabled: %d basic blocks and %d registers",
-                n_basic_blocks, max_gcse_regno);
-
-      return 0;
-    }
-
+  
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
@@ -821,11 +800,8 @@ gcse_main (rtx f, FILE *file)
           if (changed)
             {
               free_modify_mem_tables ();
-             modify_mem_list = gmalloc (last_basic_block * sizeof (rtx));
-             canon_modify_mem_list
-               = gmalloc (last_basic_block * sizeof (rtx));
-             memset (modify_mem_list, 0, last_basic_block * sizeof (rtx));
-             memset (canon_modify_mem_list, 0, last_basic_block * sizeof (rtx));
+             modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
+             canon_modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
             }
           free_reg_set_mem ();
           alloc_reg_set_mem (max_reg_num ());
@@ -842,7 +818,7 @@ gcse_main (rtx f, FILE *file)
          partial redundancy elimination.  */
        free_gcse_mem ();
  
-      /* It does not make sense to run code hoisting unless we optimizing
+      /* It does not make sense to run code hoisting unless we are optimizing
          for code size -- it rarely makes programs faster, and can make
          them bigger if we did partial redundancy elimination (when optimizing
          for space, we use a classic gcse algorithm instead of partial
@@ -880,7 +856,7 @@ gcse_main (rtx f, FILE *file)
    if (file)
      {
        fprintf (file, "GCSE of %s: %d basic blocks, ",
-              current_function_name, n_basic_blocks);
+              current_function_name (), n_basic_blocks);
        fprintf (file, "%d pass%s, %d bytes\n\n",
                pass, pass > 1 ? "es" : "", max_pass_bytes);
      }
@@ -954,18 +930,27 @@ can_copy_p (enum machine_mode mode)
  /* Cover function to xmalloc to record bytes allocated.  */
  
  static void *
-gmalloc (unsigned int size)
+gmalloc (size_t size)
  {
    bytes_used += size;
    return xmalloc (size);
  }
  
+/* Cover function to xcalloc to record bytes allocated.  */
+
+static void *
+gcalloc (size_t nelem, size_t elsize)
+{
+  bytes_used += nelem * elsize;
+  return xcalloc (nelem, elsize);
+}
+
  /* Cover function to xrealloc.
     We don't record the additional size since we don't know it.
     It won't affect memory usage stats much anyway.  */
  
  static void *
-grealloc (void *ptr, unsigned int size)
+grealloc (void *ptr, size_t size)
  {
    return xrealloc (ptr, size);
  }
@@ -987,7 +972,7 @@ gcse_alloc (unsigned long size)
  static void
  alloc_gcse_mem (rtx f)
  {
-  int i, n;
+  int i;
    rtx insn;
  
    /* Find the largest UID and create a mapping from UIDs to CUIDs.
@@ -995,9 +980,7 @@ alloc_gcse_mem (rtx f)
       and only apply to real insns.  */
  
    max_uid = get_max_uid ();
-  n = (max_uid + 1) * sizeof (int);
-  uid_cuid = gmalloc (n);
-  memset (uid_cuid, 0, n);
+  uid_cuid = gcalloc (max_uid + 1, sizeof (int));
    for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
      {
        if (INSN_P (insn))
@@ -1009,9 +992,7 @@ alloc_gcse_mem (rtx f)
    /* Create a table mapping cuids to insns.  */
  
    max_cuid = i;
-  n = (max_cuid + 1) * sizeof (rtx);
-  cuid_insn = gmalloc (n);
-  memset (cuid_insn, 0, n);
+  cuid_insn = gcalloc (max_cuid + 1, sizeof (rtx));
    for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
      if (INSN_P (insn))
        CUID_INSN (i++) = insn;
@@ -1023,10 +1004,8 @@ alloc_gcse_mem (rtx f)
    reg_set_in_block = sbitmap_vector_alloc (last_basic_block, max_gcse_regno);
    /* Allocate array to keep a list of insns which modify memory in each
       basic block.  */
-  modify_mem_list = gmalloc (last_basic_block * sizeof (rtx));
-  canon_modify_mem_list = gmalloc (last_basic_block * sizeof (rtx));
-  memset (modify_mem_list, 0, last_basic_block * sizeof (rtx));
-  memset (canon_modify_mem_list, 0, last_basic_block * sizeof (rtx));
+  modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
+  canon_modify_mem_list = gcalloc (last_basic_block, sizeof (rtx));
    modify_mem_list_set = BITMAP_XMALLOC ();
    canon_modify_mem_list_set = BITMAP_XMALLOC ();
  }
@@ -1189,12 +1168,8 @@ static struct obstack reg_set_obstack;
  static void
  alloc_reg_set_mem (int n_regs)
  {
-  unsigned int n;
-
    reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
-  n = reg_set_table_size * sizeof (struct reg_set *);
-  reg_set_table = gmalloc (n);
-  memset (reg_set_table, 0, n);
+  reg_set_table = gcalloc (reg_set_table_size, sizeof (struct reg_set *));
  
    gcc_obstack_init (&reg_set_obstack);
  }
@@ -1206,6 +1181,24 @@ free_reg_set_mem (void)
    obstack_free (&reg_set_obstack, NULL);
  }
  
+/* An OLD_INSN that used to set REGNO was replaced by NEW_INSN.
+   Update the corresponding `reg_set_table' entry accordingly.
+   We assume that NEW_INSN is not already recorded in reg_set_table[regno].  */
+
+static void
+replace_one_set (int regno, rtx old_insn, rtx new_insn)
+{
+  struct reg_set *reg_info;
+  if (regno >= reg_set_table_size)
+    return;
+  for (reg_info = reg_set_table[regno]; reg_info; reg_info = reg_info->next)
+    if (reg_info->insn == old_insn)
+      {
+        reg_info->insn = new_insn;
+        break;
+      }
+}
+
  /* Record REGNO in the reg_set table.  */
  
  static void
@@ -1277,13 +1270,9 @@ static basic_block current_bb;
  /* See whether X, the source of a set, is something we want to consider for
     GCSE.  */
  
-static GTY(()) rtx test_insn;
  static int
  want_to_gcse_p (rtx x)
  {
-  int num_clobbers = 0;
-  int icode;
-
    switch (GET_CODE (x))
      {
      case REG:
@@ -1296,8 +1285,21 @@ want_to_gcse_p (rtx x)
        return 0;
  
      default:
-      break;
+      return can_assign_to_reg_p (x);
      }
+}
+
+/* Used internally by can_assign_to_reg_p.  */
+
+static GTY(()) rtx test_insn;
+
+/* Return true if we can assign X to a pseudo register.  */
+
+static bool
+can_assign_to_reg_p (rtx x)
+{
+  int num_clobbers = 0;
+  int icode;
  
    /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
    if (general_operand (x, GET_MODE (x)))
@@ -1523,12 +1525,14 @@ oprs_available_p (rtx x, rtx insn)
  
     MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
     indicating if a volatile operand is found or if the expression contains
-   something we don't want to insert in the table.
+   something we don't want to insert in the table.  HASH_TABLE_SIZE is
+   the current size of the hash table to be probed.
  
     ??? One might want to merge this with canon_hash.  Later.  */
  
  static unsigned int
-hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p, int hash_table_size)
+hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p,
+          int hash_table_size)
  {
    unsigned int hash;
  
@@ -1798,6 +1802,10 @@ expr_equiv_p (rtx x, rtx y)
          due to it being set with the different alias set.  */
        if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
         return 0;
+
+      /* A volatile mem should not be considered equivalent to any other.  */
+      if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
+       return 0;
        break;
  
      /*  For commutative operations, check both orders.  */
@@ -1982,6 +1990,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
  
           antic_occr->insn = insn;
           antic_occr->next = NULL;
+         antic_occr->deleted_p = 0;
         }
      }
  
@@ -2018,6 +2027,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
  
           avail_occr->insn = insn;
           avail_occr->next = NULL;
+         avail_occr->deleted_p = 0;
         }
      }
  }
@@ -2104,6 +2114,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table)
  
        cur_occr->insn = insn;
        cur_occr->next = NULL;
+      cur_occr->deleted_p = 0;
      }
  }
  
@@ -2121,7 +2132,7 @@ gcse_constant_p (rtx x)
  
  
    /* Consider a COMPARE of the same registers is a constant
-    if they are not floating point registers. */
+    if they are not floating point registers.  */
    if (GET_CODE(x) == COMPARE
        && GET_CODE (XEXP (x, 0)) == REG
        && GET_CODE (XEXP (x, 1)) == REG
@@ -2206,11 +2217,54 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table *table)
                /* A copy is not available if its src or dest is subsequently
                   modified.  Here we want to search from INSN+1 on, but
                   oprs_available_p searches from INSN on.  */
-              && (insn == BLOCK_END (BLOCK_NUM (insn))
+              && (insn == BB_END (BLOCK_FOR_INSN (insn))
                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
                        && oprs_available_p (pat, tmp))))
         insert_set_in_table (pat, insn, table);
      }
+  /* In case of store we want to consider the memory value as available in
+     the REG stored in that memory. This makes it possible to remove
+     redundant loads from due to stores to the same location.  */
+  else if (flag_gcse_las && GET_CODE (src) == REG && GET_CODE (dest) == MEM)
+      {
+        unsigned int regno = REGNO (src);
+
+        /* Do not do this for constant/copy propagation.  */
+        if (! table->set_p
+            /* Only record sets of pseudo-regs in the hash table.  */
+           && regno >= FIRST_PSEUDO_REGISTER
+          /* Don't GCSE something if we can't do a reg/reg copy.  */
+          && can_copy_p (GET_MODE (src))
+          /* GCSE commonly inserts instruction after the insn.  We can't
+             do that easily for EH_REGION notes so disable GCSE on these
+             for now.  */
+          && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+          /* Is SET_DEST something we want to gcse?  */
+          && want_to_gcse_p (dest)
+          /* Don't CSE a nop.  */
+          && ! set_noop_p (pat)
+          /* Don't GCSE if it has attached REG_EQUIV note.
+             At this point this only function parameters should have
+             REG_EQUIV notes and if the argument slot is used somewhere
+             explicitly, it means address of parameter has been taken,
+             so we should not extend the lifetime of the pseudo.  */
+          && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
+              || GET_CODE (XEXP (note, 0)) != MEM))
+             {
+               /* Stores are never anticipatable.  */
+               int antic_p = 0;
+              /* An expression is not available if its operands are
+                 subsequently modified, including this insn.  It's also not
+                 available if this is a branch, because we can't insert
+                 a set after the branch.  */
+               int avail_p = oprs_available_p (dest, insn)
+                            && ! JUMP_P (insn);
+
+              /* Record the memory expression (DEST) in the hash table.  */
+              insert_expr_in_table (dest, GET_MODE (dest), insn,
+                                    antic_p, avail_p, table);
+             }
+      }
  }
  
  static void
@@ -2470,8 +2524,8 @@ compute_hash_table_work (struct hash_table *table)
          ??? hard-reg reg_set_in_block computation
          could be moved to compute_sets since they currently don't change.  */
  
-      for (insn = current_bb->head;
-          insn && insn != NEXT_INSN (current_bb->end);
+      for (insn = BB_HEAD (current_bb);
+          insn && insn != NEXT_INSN (BB_END (current_bb));
            insn = NEXT_INSN (insn))
         {
           if (! INSN_P (insn))
@@ -2501,12 +2555,12 @@ compute_hash_table_work (struct hash_table *table)
        if (table->set_p
           && implicit_sets[current_bb->index] != NULL_RTX)
         hash_scan_set (implicit_sets[current_bb->index],
-                      current_bb->head, table);
+                      BB_HEAD (current_bb), table);
  
        /* The next pass builds the hash table.  */
  
-      for (insn = current_bb->head, in_libcall_block = 0;
-          insn && insn != NEXT_INSN (current_bb->end);
+      for (insn = BB_HEAD (current_bb), in_libcall_block = 0;
+          insn && insn != NEXT_INSN (BB_END (current_bb));
            insn = NEXT_INSN (insn))
         if (INSN_P (insn))
           {
@@ -3340,8 +3394,11 @@ handle_avail_expr (rtx insn, struct expr *expr)
    if (insn_computes_expr == NULL)
      return 0;
    expr_set = single_set (insn_computes_expr);
+  /* The set might be in a parallel with multiple sets; we could
+     probably handle that, but there's currently no easy way to find
+     the relevant sub-expression.  */
    if (!expr_set)
-    abort ();
+    return 0;
  
    found_setting = 0;
    use_src = 0;
@@ -3500,8 +3557,8 @@ classic_gcse (void)
          start of the block].  */
        reset_opr_set_tables ();
  
-      for (insn = bb->head;
-          insn != NULL && insn != NEXT_INSN (bb->end);
+      for (insn = BB_HEAD (bb);
+          insn != NULL && insn != NEXT_INSN (BB_END (bb));
            insn = NEXT_INSN (insn))
         {
           /* Is insn of form (set (pseudo-reg) ...)?  */
@@ -3573,7 +3630,7 @@ one_classic_gcse_pass (int pass)
      {
        fprintf (gcse_file, "\n");
        fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
-              current_function_name, pass, bytes_used, gcse_subst_count);
+              current_function_name (), pass, bytes_used, gcse_subst_count);
        fprintf (gcse_file, "%d insns created\n", gcse_create_count);
      }
  
@@ -3849,6 +3906,11 @@ try_replace_reg (rtx from, rtx to, rtx insn)
         validate_change (insn, &SET_SRC (set), src, 0);
      }
  
+  /* If there is already a NOTE, update the expression in it with our
+     replacement.  */
+  if (note != 0)
+    XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
+
    if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
      {
        /* If above failed and this is a single set, try to simplify the source of
@@ -3869,11 +3931,6 @@ try_replace_reg (rtx from, rtx to, rtx insn)
         note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
      }
  
-  /* If there is already a NOTE, update the expression in it with our
-     replacement.  */
-  else if (note != 0)
-    XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
-
    /* REG_EQUAL may get simplified into register.
       We don't allow that. Remove that note. This code ought
       not to happen, because previous code ought to synthesize
@@ -4360,7 +4417,7 @@ local_cprop_pass (int alter_jumps)
    rtx libcall_stack[MAX_NESTED_LIBCALLS + 1], *libcall_sp;
    bool changed = false;
  
-  cselib_init ();
+  cselib_init (false);
    libcall_sp = &libcall_stack[MAX_NESTED_LIBCALLS];
    *libcall_sp = 0;
    for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
@@ -4437,8 +4494,8 @@ cprop (int alter_jumps)
          start of the block].  */
        reset_opr_set_tables ();
  
-      for (insn = bb->head;
-          insn != NULL && insn != NEXT_INSN (bb->end);
+      for (insn = BB_HEAD (bb);
+          insn != NULL && insn != NEXT_INSN (BB_END (bb));
            insn = NEXT_INSN (insn))
         if (INSN_P (insn))
           {
@@ -4487,7 +4544,8 @@ fis_get_condition (rtx jump)
  
    /* Use canonicalize_condition to do the dirty work of manipulating
       MODE_CC values and COMPARE rtx codes.  */
-  tmp = canonicalize_condition (jump, cond, reverse, &earliest, NULL_RTX);
+  tmp = canonicalize_condition (jump, cond, reverse, &earliest, NULL_RTX,
+                               false);
    if (!tmp)
      return NULL_RTX;
  
@@ -4505,7 +4563,8 @@ fis_get_condition (rtx jump)
    tmp = XEXP (tmp, 0);
    if (!REG_P (tmp) || GET_MODE_CLASS (GET_MODE (tmp)) != MODE_INT)
      return NULL_RTX;
-  tmp = canonicalize_condition (jump, cond, reverse, &earliest, tmp);
+  tmp = canonicalize_condition (jump, cond, reverse, &earliest, tmp,
+                               false);
    if (!tmp)
      return NULL_RTX;
  
@@ -4517,6 +4576,38 @@ fis_get_condition (rtx jump)
    return tmp;
  }
  
+/* Check the comparison COND to see if we can safely form an implicit set from
+   it.  COND is either an EQ or NE comparison.  */
+
+static bool
+implicit_set_cond_p (rtx cond)
+{
+  enum machine_mode mode = GET_MODE (XEXP (cond, 0));
+  rtx cst = XEXP (cond, 1);
+
+  /* We can't perform this optimization if either operand might be or might
+     contain a signed zero.  */
+  if (HONOR_SIGNED_ZEROS (mode))
+    {
+      /* It is sufficient to check if CST is or contains a zero.  We must
+        handle float, complex, and vector.  If any subpart is a zero, then
+        the optimization can't be performed.  */
+      /* ??? The complex and vector checks are not implemented yet.  We just
+        always return zero for them.  */
+      if (GET_CODE (cst) == CONST_DOUBLE)
+       {
+         REAL_VALUE_TYPE d;
+         REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
+         if (REAL_VALUES_EQUAL (d, dconst0))
+           return 0;
+       }
+      else
+       return 0;
+    }
+
+  return gcse_constant_p (cst);
+}
+
  /* Find the implicit sets of a function.  An "implicit set" is a constraint
     on the value of a variable, implied by a conditional jump.  For example,
     following "if (x == 2)", the then branch may be optimized as though the
@@ -4533,16 +4624,16 @@ find_implicit_sets (void)
  
    count = 0;
    FOR_EACH_BB (bb)
-    /* Check for more than one sucessor.  */
+    /* Check for more than one successor.  */
      if (bb->succ && bb->succ->succ_next)
        {
-       cond = fis_get_condition (bb->end);
+       cond = fis_get_condition (BB_END (bb));
  
         if (cond
             && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
             && GET_CODE (XEXP (cond, 0)) == REG
             && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
-           && gcse_constant_p (XEXP (cond, 1)))
+           && implicit_set_cond_p (cond))
           {
             dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
                                          : FALLTHRU_EDGE (bb)->dest;
@@ -4611,7 +4702,7 @@ one_cprop_pass (int pass, int cprop_jumps, int bypass_jumps)
    if (gcse_file)
      {
        fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
-              current_function_name, pass, bytes_used);
+              current_function_name (), pass, bytes_used);
        fprintf (gcse_file, "%d const props, %d copy props\n\n",
                const_prop_count, copy_prop_count);
      }
@@ -4796,6 +4887,21 @@ bypass_block (basic_block bb, rtx setcc, rtx jump)
           else
             dest = NULL;
  
+         /* Avoid unification of the edge with other edges from original
+            branch.  We would end up emitting the instruction on "both"
+            edges.  */
+           
+         if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc))))
+           {
+             edge e2;
+             for (e2 = e->src->succ; e2; e2 = e2->succ_next)
+               if (e2->dest == dest)
+                 {
+                   dest = NULL;
+                   break;
+                 }
+           }
+
           old_dest = e->dest;
           if (dest != NULL
               && dest != old_dest
@@ -4859,8 +4965,8 @@ bypass_conditional_jumps (void)
        if (bb->pred && bb->pred->pred_next)
         {
           setcc = NULL_RTX;
-         for (insn = bb->head;
-              insn != NULL && insn != NEXT_INSN (bb->end);
+         for (insn = BB_HEAD (bb);
+              insn != NULL && insn != NEXT_INSN (BB_END (bb));
                insn = NEXT_INSN (insn))
             if (GET_CODE (insn) == INSN)
               {
@@ -5151,7 +5257,7 @@ process_insert_insn (struct expr *expr)
  static void
  insert_insn_end_bb (struct expr *expr, basic_block bb, int pre)
  {
-  rtx insn = bb->end;
+  rtx insn = BB_END (bb);
    rtx new_insn;
    rtx reg = expr->reaching_reg;
    int regno = REGNO (reg);
@@ -5232,7 +5338,7 @@ insert_insn_end_bb (struct expr *expr, basic_block bb, int pre)
        /* Since different machines initialize their parameter registers
          in different orders, assume nothing.  Collect the set of all
          parameter registers.  */
-      insn = find_first_parameter_load (insn, bb->head);
+      insn = find_first_parameter_load (insn, BB_HEAD (bb));
  
        /* If we found all the parameter loads, then we want to insert
          before the first parameter load.
@@ -5357,7 +5463,20 @@ pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
    return did_insert;
  }
  
-/* Copy the result of INSN to REG.  INDX is the expression number.  */
+/* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
+   Given "old_reg <- expr" (INSN), instead of adding after it
+     reaching_reg <- old_reg
+   it's better to do the following:
+     reaching_reg <- expr
+     old_reg      <- reaching_reg
+   because this way copy propagation can discover additional PRE
+   opportunities.  But if this fails, we try the old way.
+   When "expr" is a store, i.e.
+   given "MEM <- old_reg", instead of adding after it
+     reaching_reg <- old_reg
+   it's better to add it before as follows:
+     reaching_reg <- old_reg
+     MEM          <- reaching_reg.  */
  
  static void
  pre_insert_copy_insn (struct expr *expr, rtx insn)
@@ -5365,16 +5484,69 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
    rtx reg = expr->reaching_reg;
    int regno = REGNO (reg);
    int indx = expr->bitmap_index;
-  rtx set = single_set (insn);
-  rtx new_insn;
+  rtx pat = PATTERN (insn);
+  rtx set, new_insn;
+  rtx old_reg;
+  int i;
  
-  if (!set)
+  /* This block matches the logic in hash_scan_insn.  */
+  if (GET_CODE (pat) == SET)
+    set = pat;
+  else if (GET_CODE (pat) == PARALLEL)
+    {
+      /* Search through the parallel looking for the set whose
+        source was the expression that we're interested in.  */
+      set = NULL_RTX;
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+       {
+         rtx x = XVECEXP (pat, 0, i);
+         if (GET_CODE (x) == SET
+             && expr_equiv_p (SET_SRC (x), expr->expr))
+           {
+             set = x;
+             break;
+           }
+       }
+    }
+  else
      abort ();
  
-  new_insn = emit_insn_after (gen_move_insn (reg, copy_rtx (SET_DEST (set))), insn);
+  if (GET_CODE (SET_DEST (set)) == REG)
+    {
+      old_reg = SET_DEST (set);
+      /* Check if we can modify the set destination in the original insn.  */
+      if (validate_change (insn, &SET_DEST (set), reg, 0))
+        {
+          new_insn = gen_move_insn (old_reg, reg);
+          new_insn = emit_insn_after (new_insn, insn);
+
+          /* Keep register set table up to date.  */
+          replace_one_set (REGNO (old_reg), insn, new_insn);
+          record_one_set (regno, insn);
+        }
+      else
+        {
+          new_insn = gen_move_insn (reg, old_reg);
+          new_insn = emit_insn_after (new_insn, insn);
+
+          /* Keep register set table up to date.  */
+          record_one_set (regno, new_insn);
+        }
+    }
+  else /* This is possible only in case of a store to memory.  */
+    {
+      old_reg = SET_SRC (set);
+      new_insn = gen_move_insn (reg, old_reg);
+
+      /* Check if we can modify the set source in the original insn.  */
+      if (validate_change (insn, &SET_SRC (set), reg, 0))
+        new_insn = emit_insn_before (new_insn, insn);
+      else
+        new_insn = emit_insn_after (new_insn, insn);
  
-  /* Keep register set table up to date.  */
-  record_one_set (regno, new_insn);
+      /* Keep register set table up to date.  */
+      record_one_set (regno, new_insn);
+    }
  
    gcse_create_count++;
  
@@ -5383,7 +5555,6 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
               INSN_UID (insn), regno);
-  update_ld_motion_stores (expr);
  }
  
  /* Copy available expressions that reach the redundant expression
@@ -5392,7 +5563,7 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
  static void
  pre_insert_copies (void)
  {
-  unsigned int i;
+  unsigned int i, added_copy;
    struct expr *expr;
    struct occr *occr;
    struct occr *avail;
@@ -5413,6 +5584,9 @@ pre_insert_copies (void)
            expression wasn't deleted anywhere.  */
         if (expr->reaching_reg == NULL)
           continue;
+       
+       /* Set when we add a copy for that expression.  */
+       added_copy = 0; 
  
         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
           {
@@ -5437,11 +5611,16 @@ pre_insert_copies (void)
                                                BLOCK_FOR_INSN (occr->insn)))
                   continue;
  
+                added_copy = 1;
+
                 /* Copy the result of avail to reaching_reg.  */
                 pre_insert_copy_insn (expr, insn);
                 avail->copied_p = 1;
               }
           }
+
+         if (added_copy)
+            update_ld_motion_stores (expr);
        }
  }
  
@@ -5491,7 +5670,9 @@ pre_delete (void)
  
    changed = 0;
    for (i = 0; i < expr_hash_table.size; i++)
-    for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
+    for (expr = expr_hash_table.table[i];
+        expr != NULL;
+        expr = expr->next_same_hash)
        {
         int indx = expr->bitmap_index;
  
@@ -5504,12 +5685,10 @@ pre_delete (void)
             rtx set;
             basic_block bb = BLOCK_FOR_INSN (insn);
  
-           if (TEST_BIT (pre_delete_map[bb->index], indx))
+           /* We only delete insns that have a single_set.  */
+           if (TEST_BIT (pre_delete_map[bb->index], indx)
+               && (set = single_set (insn)) != 0)
               {
-               set = single_set (insn);
-               if (! set)
-                 abort ();
-
                 /* Create a pseudo-reg to store the result of reaching
                    expressions into.  Get the mode for the new pseudo from
                    the mode of the original destination pseudo.  */
@@ -5640,7 +5819,7 @@ one_pre_gcse_pass (int pass)
    if (gcse_file)
      {
        fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
-              current_function_name, pass, bytes_used);
+              current_function_name (), pass, bytes_used);
        fprintf (gcse_file, "%d substs, %d insns created\n",
                gcse_subst_count, gcse_create_count);
      }
@@ -5719,7 +5898,7 @@ compute_transpout (void)
        /* Note that flow inserted a nop a the end of basic blocks that
          end in call instructions for reasons other than abnormal
          control flow.  */
-      if (GET_CODE (bb->end) != CALL_INSN)
+      if (GET_CODE (BB_END (bb)) != CALL_INSN)
         continue;
  
        for (i = 0; i < expr_hash_table.size; i++)
@@ -5801,8 +5980,8 @@ delete_null_pointer_checks_1 (unsigned int *block_reg, sbitmap *nonnull_avin,
  
        /* Scan each insn in the basic block looking for memory references and
          register sets.  */
-      stop_insn = NEXT_INSN (current_block->end);
-      for (insn = current_block->head;
+      stop_insn = NEXT_INSN (BB_END (current_block));
+      for (insn = BB_HEAD (current_block);
            insn != stop_insn;
            insn = NEXT_INSN (insn))
         {
@@ -5857,7 +6036,7 @@ delete_null_pointer_checks_1 (unsigned int *block_reg, sbitmap *nonnull_avin,
       against zero.  */
    FOR_EACH_BB (bb)
      {
-      rtx last_insn = bb->end;
+      rtx last_insn = BB_END (bb);
        rtx condition, earliest;
        int compare_and_branch;
  
@@ -5869,7 +6048,7 @@ delete_null_pointer_checks_1 (unsigned int *block_reg, sbitmap *nonnull_avin,
         continue;
  
        /* LAST_INSN is a conditional jump.  Get its condition.  */
-      condition = get_condition (last_insn, &earliest);
+      condition = get_condition (last_insn, &earliest, false);
  
        /* If we can't determine the condition then skip.  */
        if (! condition)
@@ -5903,11 +6082,13 @@ delete_null_pointer_checks_1 (unsigned int *block_reg, sbitmap *nonnull_avin,
  
        something_changed = 1;
        delete_insn (last_insn);
+#ifdef HAVE_cc0
        if (compare_and_branch == 2)
         delete_insn (earliest);
+#endif
        purge_dead_edges (bb);
  
-      /* Don't check this block again.  (Note that BLOCK_END is
+      /* Don't check this block again.  (Note that BB_END is
          invalid here; we deleted the last instruction in the
          block.)  */
        block_reg[bb->index] = 0;
@@ -5948,28 +6129,17 @@ delete_null_pointer_checks (rtx f ATTRIBUTE_UNUSED)
    basic_block bb;
    int reg;
    int regs_per_pass;
-  int max_reg;
+  int max_reg = max_reg_num ();
    struct null_pointer_info npi;
    int something_changed = 0;
  
-  /* If we have only a single block, then there's nothing to do.  */
-  if (n_basic_blocks <= 1)
-    return 0;
-
-  /* Trying to perform global optimizations on flow graphs which have
-     a high connectivity will take a long time and is unlikely to be
-     particularly useful.
-
-     In normal circumstances a cfg should have about twice as many edges
-     as blocks.  But we do not want to punish small functions which have
-     a couple switch statements.  So we require a relatively large number
-     of basic blocks and the ratio of edges to blocks to be high.  */
-  if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
+  /* If we have only a single block, or it is too expensive, give up.  */
+  if (n_basic_blocks <= 1
+      || is_too_expensive (_ ("NULL pointer checks disabled")))
      return 0;
  
    /* We need four bitmaps, each with a bit for each register in each
       basic block.  */
-  max_reg = max_reg_num ();
    regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
  
    /* Allocate bitmaps to hold local and global properties.  */
@@ -5984,7 +6154,7 @@ delete_null_pointer_checks (rtx f ATTRIBUTE_UNUSED)
    block_reg = xcalloc (last_basic_block, sizeof (int));
    FOR_EACH_BB (bb)
      {
-      rtx last_insn = bb->end;
+      rtx last_insn = BB_END (bb);
        rtx condition, earliest, reg;
  
        /* We only want conditional branches.  */
@@ -5994,7 +6164,7 @@ delete_null_pointer_checks (rtx f ATTRIBUTE_UNUSED)
         continue;
  
        /* LAST_INSN is a conditional jump.  Get its condition.  */
-      condition = get_condition (last_insn, &earliest);
+      condition = get_condition (last_insn, &earliest, false);
  
        /* If we were unable to get the condition, or it is not an equality
          comparison against zero then there's nothing we can do.  */
@@ -6045,9 +6215,6 @@ static sbitmap *hoist_vbeout;
  /* Hoistable expressions.  */
  static sbitmap *hoist_exprs;
  
-/* Dominator bitmaps.  */
-dominance_info dominators;
-
  /* ??? We could compute post dominators and run this algorithm in
     reverse to perform tail merging, doing so would probably be
     more effective than the tail merging code in jump.c.
@@ -6084,7 +6251,7 @@ free_code_hoist_mem (void)
    sbitmap_vector_free (hoist_exprs);
    sbitmap_vector_free (transpout);
  
-  free_dominance_info (dominators);
+  free_dominance_info (CDI_DOMINATORS);
  }
  
  /* Compute the very busy expressions at entry/exit from each block.
@@ -6133,7 +6300,7 @@ compute_code_hoist_data (void)
    compute_local_properties (transp, comp, antloc, &expr_hash_table);
    compute_transpout ();
    compute_code_hoist_vbeinout ();
-  dominators = calculate_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_DOMINATORS);
    if (gcse_file)
      fprintf (gcse_file, "\n");
  }
@@ -6225,7 +6392,7 @@ hoist_code (void)
        int found = 0;
        int insn_inserted_p;
  
-      domby_len = get_dominated_by (dominators, bb, &domby);
+      domby_len = get_dominated_by (CDI_DOMINATORS, bb, &domby);
        /* Examine each expression that is very busy at the exit of this
          block.  These are the potentially hoistable expressions.  */
        for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
@@ -6418,28 +6585,29 @@ one_code_hoisting_pass (void)
  static struct ls_expr *
  ldst_entry (rtx x)
  {
+  int do_not_record_p = 0;
    struct ls_expr * ptr;
+  unsigned int hash;
  
-  for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
-    if (expr_equiv_p (ptr->pattern, x))
-      break;
+  hash = hash_expr_1 (x, GET_MODE (x), & do_not_record_p);
  
-  if (!ptr)
-    {
-      ptr = xmalloc (sizeof (struct ls_expr));
+  for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
+    if (ptr->hash_index == hash && expr_equiv_p (ptr->pattern, x))
+      return ptr;
  
-      ptr->next         = pre_ldst_mems;
-      ptr->expr         = NULL;
-      ptr->pattern      = x;
-      ptr->pattern_regs        = NULL_RTX;
-      ptr->loads        = NULL_RTX;
-      ptr->stores       = NULL_RTX;
-      ptr->reaching_reg = NULL_RTX;
-      ptr->invalid      = 0;
-      ptr->index        = 0;
-      ptr->hash_index   = 0;
-      pre_ldst_mems     = ptr;
-    }
+  ptr = xmalloc (sizeof (struct ls_expr));
+
+  ptr->next         = pre_ldst_mems;
+  ptr->expr         = NULL;
+  ptr->pattern      = x;
+  ptr->pattern_regs = NULL_RTX;
+  ptr->loads        = NULL_RTX;
+  ptr->stores       = NULL_RTX;
+  ptr->reaching_reg = NULL_RTX;
+  ptr->invalid      = 0;
+  ptr->index        = 0;
+  ptr->hash_index   = hash;
+  pre_ldst_mems     = ptr;
  
    return ptr;
  }
@@ -6642,8 +6810,8 @@ compute_ld_motion_mems (void)
  
    FOR_EACH_BB (bb)
      {
-      for (insn = bb->head;
-          insn && insn != NEXT_INSN (bb->end);
+      for (insn = BB_HEAD (bb);
+          insn && insn != NEXT_INSN (BB_END (bb));
            insn = NEXT_INSN (insn))
         {
           if (INSN_P (insn))
@@ -6680,7 +6848,7 @@ compute_ld_motion_mems (void)
                           && GET_CODE (src) != ASM_OPERANDS
                           /* Check for REG manually since want_to_gcse_p
                              returns 0 for all REGs.  */
-                         && (REG_P (src) || want_to_gcse_p (src)))
+                         && can_assign_to_reg_p (src))
                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
                       else
                         ptr->invalid = 1;
@@ -6699,56 +6867,41 @@ compute_ld_motion_mems (void)
  static void
  trim_ld_motion_mems (void)
  {
-  struct ls_expr * last = NULL;
-  struct ls_expr * ptr = first_ls_expr ();
+  struct ls_expr * * last = & pre_ldst_mems;
+  struct ls_expr * ptr = pre_ldst_mems;
  
    while (ptr != NULL)
      {
-      int del = ptr->invalid;
-      struct expr * expr = NULL;
+      struct expr * expr;
  
        /* Delete if entry has been made invalid.  */
-      if (!del)
+      if (! ptr->invalid)
         {
-         unsigned int i;
-
-         del = 1;
           /* Delete if we cannot find this mem in the expression list.  */
-         for (i = 0; i < expr_hash_table.size && del; i++)
-           {
-             for (expr = expr_hash_table.table[i];
-                  expr != NULL;
-                  expr = expr->next_same_hash)
-               if (expr_equiv_p (expr->expr, ptr->pattern))
-                 {
-                   del = 0;
-                   break;
-                 }
-           }
-       }
+         unsigned int hash = ptr->hash_index % expr_hash_table.size;
  
-      if (del)
-       {
-         if (last != NULL)
-           {
-             last->next = ptr->next;
-             free_ldst_entry (ptr);
-             ptr = last->next;
-           }
-         else
-           {
-             pre_ldst_mems = pre_ldst_mems->next;
-             free_ldst_entry (ptr);
-             ptr = pre_ldst_mems;
-           }
+         for (expr = expr_hash_table.table[hash];
+              expr != NULL;
+              expr = expr->next_same_hash)
+           if (expr_equiv_p (expr->expr, ptr->pattern))
+             break;
         }
        else
+       expr = (struct expr *) 0;
+
+      if (expr)
         {
           /* Set the expression field if we are keeping it.  */
-         last = ptr;
           ptr->expr = expr;
+         last = & ptr->next;
           ptr = ptr->next;
         }
+      else
+       {
+         *last = ptr->next;
+         free_ldst_entry (ptr);
+         ptr = * last;
+       }
      }
  
    /* Show the world what we've found.  */
@@ -6759,7 +6912,7 @@ trim_ld_motion_mems (void)
  /* This routine will take an expression which we are replacing with
     a reaching register, and update any stores that are needed if
     that expression is in the ld_motion list.  Stores are updated by
-   copying their SRC to the reaching register, and then storeing
+   copying their SRC to the reaching register, and then storing
     the reaching register into the store location. These keeps the
     correct value in the reaching register for the loads.  */
  
@@ -6832,17 +6985,41 @@ static sbitmap * st_antloc;
  /* Global holding the number of store expressions we are dealing with.  */
  static int num_stores;
  
-/* Checks to set if we need to mark a register set. Called from note_stores.  */
+/* Checks to set if we need to mark a register set.  Called from
+   note_stores.  */
  
  static void
  reg_set_info (rtx dest, rtx setter ATTRIBUTE_UNUSED,
-             void *data ATTRIBUTE_UNUSED)
+             void *data)
  {
+  sbitmap bb_reg = data;
+
    if (GET_CODE (dest) == SUBREG)
      dest = SUBREG_REG (dest);
  
    if (GET_CODE (dest) == REG)
-    regvec[REGNO (dest)] = INSN_UID (compute_store_table_current_insn);
+    {
+      regvec[REGNO (dest)] = INSN_UID (compute_store_table_current_insn);
+      if (bb_reg)
+       SET_BIT (bb_reg, REGNO (dest));
+    }
+}
+
+/* Clear any mark that says that this insn sets dest.  Called from
+   note_stores.  */
+
+static void
+reg_clear_last_set (rtx dest, rtx setter ATTRIBUTE_UNUSED,
+             void *data)
+{
+  int *dead_vec = data;
+
+  if (GET_CODE (dest) == SUBREG)
+    dest = SUBREG_REG (dest);
+
+  if (GET_CODE (dest) == REG &&
+      dead_vec[REGNO (dest)] == INSN_UID (compute_store_table_current_insn))
+    dead_vec[REGNO (dest)] = 0;
  }
  
  /* Return zero if some of the registers in list X are killed
@@ -7042,7 +7219,7 @@ find_moveable_store (rtx insn, int *regs_set_before, int *regs_set_after)
          failed last time.  */
        if (LAST_AVAIL_CHECK_FAILURE (ptr))
         {
-         for (tmp = bb->end;
+         for (tmp = BB_END (bb);
                tmp != insn && tmp != LAST_AVAIL_CHECK_FAILURE (ptr);
                tmp = PREV_INSN (tmp))
             continue;
@@ -7076,18 +7253,17 @@ compute_store_table (void)
                                                        max_gcse_regno);
    sbitmap_vector_zero (reg_set_in_block, last_basic_block);
    pre_ldst_mems = 0;
-  last_set_in = xmalloc (sizeof (int) * max_gcse_regno);
+  last_set_in = xcalloc (max_gcse_regno, sizeof (int));
    already_set = xmalloc (sizeof (int) * max_gcse_regno);
  
    /* Find all the stores we care about.  */
    FOR_EACH_BB (bb)
      {
        /* First compute the registers set in this block.  */
-      memset (last_set_in, 0, sizeof (int) * max_gcse_regno);
        regvec = last_set_in;
  
-      for (insn = bb->head;
-          insn != NEXT_INSN (bb->end);
+      for (insn = BB_HEAD (bb);
+          insn != NEXT_INSN (BB_END (bb));
            insn = NEXT_INSN (insn))
         {
           if (! INSN_P (insn))
@@ -7105,24 +7281,22 @@ compute_store_table (void)
               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
                 if (clobbers_all
                     || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
-                 last_set_in[regno] = INSN_UID (insn);
+                 {
+                   last_set_in[regno] = INSN_UID (insn);
+                   SET_BIT (reg_set_in_block[bb->index], regno);
+                 }
             }
  
           pat = PATTERN (insn);
           compute_store_table_current_insn = insn;
-         note_stores (pat, reg_set_info, NULL);
+         note_stores (pat, reg_set_info, reg_set_in_block[bb->index]);
         }
  
-      /* Record the set registers.  */
-      for (regno = 0; regno < max_gcse_regno; regno++)
-       if (last_set_in[regno])
-         SET_BIT (reg_set_in_block[bb->index], regno);
-
        /* Now find the stores.  */
        memset (already_set, 0, sizeof (int) * max_gcse_regno);
        regvec = already_set;
-      for (insn = bb->head;
-          insn != NEXT_INSN (bb->end);
+      for (insn = BB_HEAD (bb);
+          insn != NEXT_INSN (BB_END (bb));
            insn = NEXT_INSN (insn))
         {
           if (! INSN_P (insn))
@@ -7150,11 +7324,32 @@ compute_store_table (void)
           find_moveable_store (insn, already_set, last_set_in);
  
           /* Unmark regs that are no longer set.  */
-         for (regno = 0; regno < max_gcse_regno; regno++)
-           if (last_set_in[regno] == INSN_UID (insn))
-             last_set_in[regno] = 0;
+         compute_store_table_current_insn = insn;
+         note_stores (pat, reg_clear_last_set, last_set_in);
+         if (GET_CODE (insn) == CALL_INSN)
+           {
+             bool clobbers_all = false;
+#ifdef NON_SAVING_SETJMP
+             if (NON_SAVING_SETJMP
+                 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
+               clobbers_all = true;
+#endif
+
+             for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+               if ((clobbers_all
+                    || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+                   && last_set_in[regno] == INSN_UID (insn))
+                 last_set_in[regno] = 0;
+           }
         }
  
+#ifdef ENABLE_CHECKING
+      /* last_set_in should now be all-zero.  */
+      for (regno = 0; regno < max_gcse_regno; regno++)
+       if (last_set_in[regno] != 0)
+         abort ();
+#endif
+
        /* Clear temporary marks.  */
        for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
         {
@@ -7252,7 +7447,7 @@ find_loads (rtx x, rtx store_pattern, int after)
  static bool
  store_killed_in_insn (rtx x, rtx x_regs, rtx insn, int after)
  {
-  rtx reg, base;
+  rtx reg, base, note;
  
    if (!INSN_P (insn))
      return false;
@@ -7303,10 +7498,26 @@ store_killed_in_insn (rtx x, rtx x_regs, rtx insn, int after)
                 return true;
             }
         }
-      return find_loads (SET_SRC (pat), x, after);
+      if (find_loads (SET_SRC (pat), x, after))
+       return true;
      }
-  else
-    return find_loads (PATTERN (insn), x, after);
+  else if (find_loads (PATTERN (insn), x, after))
+    return true;
+
+  /* If this insn has a REG_EQUAL or REG_EQUIV note referencing a memory
+     location aliased with X, then this insn kills X.  */
+  note = find_reg_equal_equiv_note (insn);
+  if (! note)
+    return false;
+  note = XEXP (note, 0);
+
+  /* However, if the note represents a must alias rather than a may
+     alias relationship, then it does not kill X.  */
+  if (expr_equiv_p (note, x))
+    return false;
+
+  /* See if there are any aliased loads in the note.  */
+  return find_loads (note, x, after);
  }
  
  /* Returns true if the expression X is loaded or clobbered on or after INSN
@@ -7318,7 +7529,7 @@ static bool
  store_killed_after (rtx x, rtx x_regs, rtx insn, basic_block bb,
                     int *regs_set_after, rtx *fail_insn)
  {
-  rtx last = bb->end, act;
+  rtx last = BB_END (bb), act;
  
    if (!store_ops_ok (x_regs, regs_set_after))
      {
@@ -7347,7 +7558,7 @@ static bool
  store_killed_before (rtx x, rtx x_regs, rtx insn, basic_block bb,
                      int *regs_set_before)
  {
-  rtx first = bb->head;
+  rtx first = BB_HEAD (bb);
  
    if (!store_ops_ok (x_regs, regs_set_before))
      return true;
@@ -7394,7 +7605,7 @@ build_store_vectors (void)
               rtx r = gen_reg_rtx (GET_MODE (ptr->pattern));
               if (gcse_file)
                 fprintf (gcse_file, "Removing redundant store:\n");
-             replace_store_insn (r, XEXP (st, 0), bb);
+             replace_store_insn (r, XEXP (st, 0), bb, ptr);
               continue;
             }
           SET_BIT (ae_gen[bb->index], ptr->index);
@@ -7422,7 +7633,7 @@ build_store_vectors (void)
  
        for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
         {
-         if (store_killed_after (ptr->pattern, ptr->pattern_regs, bb->head,
+         if (store_killed_after (ptr->pattern, ptr->pattern_regs, BB_HEAD (bb),
                                   bb, regs_set_in_block, NULL))
             {
               /* It should not be necessary to consider the expression
@@ -7448,14 +7659,14 @@ build_store_vectors (void)
  }
  
  /* Insert an instruction at the beginning of a basic block, and update
-   the BLOCK_HEAD if needed.  */
+   the BB_HEAD if needed.  */
  
  static void
  insert_insn_start_bb (rtx insn, basic_block bb)
  {
    /* Insert at start of successor block.  */
-  rtx prev = PREV_INSN (bb->head);
-  rtx before = bb->head;
+  rtx prev = PREV_INSN (BB_HEAD (bb));
+  rtx before = BB_HEAD (bb);
    while (before != 0)
      {
        if (GET_CODE (before) != CODE_LABEL
@@ -7463,7 +7674,7 @@ insert_insn_start_bb (rtx insn, basic_block bb)
               || NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK))
         break;
        prev = before;
-      if (prev == bb->end)
+      if (prev == BB_END (bb))
         break;
        before = NEXT_INSN (before);
      }
@@ -7495,6 +7706,9 @@ insert_store (struct ls_expr * expr, edge e)
    if (expr->reaching_reg == NULL_RTX)
      return 0;
  
+  if (e->flags & EDGE_FAKE)
+    return 0;
+
    reg = expr->reaching_reg;
    insn = gen_move_insn (copy_rtx (expr->pattern), reg);
  
@@ -7503,13 +7717,14 @@ insert_store (struct ls_expr * expr, edge e)
       edges so we don't try to insert it on the other edges.  */
    bb = e->dest;
    for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
-    {
-      int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
-      if (index == EDGE_INDEX_NO_EDGE)
-       abort ();
-      if (! TEST_BIT (pre_insert_map[index], expr->index))
-       break;
-    }
+    if (!(tmp->flags & EDGE_FAKE))
+      {
+       int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
+       if (index == EDGE_INDEX_NO_EDGE)
+         abort ();
+       if (! TEST_BIT (pre_insert_map[index], expr->index))
+         break;
+      }
  
    /* If tmp is NULL, we found an insertion on every edge, blank the
       insertion vector for these edges, and insert at the start of the BB.  */
@@ -7545,13 +7760,87 @@ insert_store (struct ls_expr * expr, edge e)
    return 1;
  }
  
+/* Remove any REG_EQUAL or REG_EQUIV notes containing a reference to the
+   memory location in SMEXPR set in basic block BB.
+
+   This could be rather expensive.  */
+
+static void
+remove_reachable_equiv_notes (basic_block bb, struct ls_expr *smexpr)
+{
+  edge *stack = xmalloc (sizeof (edge) * n_basic_blocks), act;
+  sbitmap visited = sbitmap_alloc (last_basic_block);
+  int stack_top = 0;
+  rtx last, insn, note;
+  rtx mem = smexpr->pattern;
+
+  sbitmap_zero (visited);
+  act = bb->succ;
+
+  while (1)
+    {
+      if (!act)
+       {
+         if (!stack_top)
+           {
+             free (stack);
+             sbitmap_free (visited);
+             return;
+           }
+         act = stack[--stack_top];
+       }
+      bb = act->dest;
+      
+      if (bb == EXIT_BLOCK_PTR
+         || TEST_BIT (visited, bb->index)
+         || TEST_BIT (ae_kill[bb->index], smexpr->index))
+       {
+         act = act->succ_next;
+         continue;
+       }
+      SET_BIT (visited, bb->index);
+
+      if (TEST_BIT (st_antloc[bb->index], smexpr->index))
+       {
+         for (last = ANTIC_STORE_LIST (smexpr);
+              BLOCK_FOR_INSN (XEXP (last, 0)) != bb;
+              last = XEXP (last, 1))
+           continue;
+         last = XEXP (last, 0);
+       }
+      else
+       last = NEXT_INSN (BB_END (bb));
+  
+      for (insn = BB_HEAD (bb); insn != last; insn = NEXT_INSN (insn))
+       if (INSN_P (insn))
+         {
+           note = find_reg_equal_equiv_note (insn);
+           if (!note || !expr_equiv_p (XEXP (note, 0), mem))
+             continue;
+
+           if (gcse_file)
+             fprintf (gcse_file, "STORE_MOTION  drop REG_EQUAL note at insn %d:\n",
+                      INSN_UID (insn));
+           remove_note (insn, note);
+         }
+      act = act->succ_next;
+      if (bb->succ)
+       {
+         if (act)
+           stack[stack_top++] = act;
+         act = bb->succ;
+       }
+    }
+}
+
  /* This routine will replace a store with a SET to a specified register.  */
  
  static void
-replace_store_insn (rtx reg, rtx del, basic_block bb)
+replace_store_insn (rtx reg, rtx del, basic_block bb, struct ls_expr *smexpr)
  {
-  rtx insn;
+  rtx insn, mem, note, set, ptr;
  
+  mem = smexpr->pattern;
    insn = gen_move_insn (reg, SET_SRC (single_set (del)));
    insn = emit_insn_after (insn, del);
  
@@ -7565,7 +7854,35 @@ replace_store_insn (rtx reg, rtx del, basic_block bb)
        fprintf (gcse_file, "\n");
      }
  
+  for (ptr = ANTIC_STORE_LIST (smexpr); ptr; ptr = XEXP (ptr, 1))
+    if (XEXP (ptr, 0) == del)
+      {
+       XEXP (ptr, 0) = insn;
+       break;
+      }
    delete_insn (del);
+
+  /* Now we must handle REG_EQUAL notes whose contents is equal to the mem;
+     they are no longer accurate provided that they are reached by this
+     definition, so drop them.  */
+  for (; insn != NEXT_INSN (BB_END (bb)); insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+       set = single_set (insn);
+       if (!set)
+         continue;
+       if (expr_equiv_p (SET_DEST (set), mem))
+         return;
+       note = find_reg_equal_equiv_note (insn);
+       if (!note || !expr_equiv_p (XEXP (note, 0), mem))
+         continue;
+
+       if (gcse_file)
+         fprintf (gcse_file, "STORE_MOTION  drop REG_EQUAL note at insn %d:\n",
+                  INSN_UID (insn));
+       remove_note (insn, note);
+      }
+  remove_reachable_equiv_notes (bb, smexpr);
  }
  
  
@@ -7589,7 +7906,7 @@ delete_store (struct ls_expr * expr, basic_block bb)
         {
           /* We know there is only one since we deleted redundant
              ones during the available computation.  */
-         replace_store_insn (reg, del, bb);
+         replace_store_insn (reg, del, bb, expr);
           break;
         }
      }
@@ -7652,6 +7969,7 @@ store_motion (void)
    /* Now compute kill & transp vectors.  */
    build_store_vectors ();
    add_noreturn_fake_exit_edges ();
+  connect_infinite_loops_to_exit ();
  
    edge_list = pre_edge_rev_lcm (gcse_file, num_stores, transp, ae_gen,
                                 st_antloc, ae_kill, &pre_insert_map,
@@ -7702,39 +8020,10 @@ bypass_jumps (FILE *file)
    if (file)
      dump_flow_info (file);
  
-  /* Return if there's nothing to do.  */
-  if (n_basic_blocks <= 1)
+  /* Return if there's nothing to do, or it is too expensive.  */
+  if (n_basic_blocks <= 1 || is_too_expensive (_ ("jump bypassing disabled")))
      return 0;
  
-  /* Trying to perform global optimizations on flow graphs which have
-     a high connectivity will take a long time and is unlikely to be
-     particularly useful.
-
-     In normal circumstances a cfg should have about twice as many edges
-     as blocks.  But we do not want to punish small functions which have
-     a couple switch statements.  So we require a relatively large number
-     of basic blocks and the ratio of edges to blocks to be high.  */
-  if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
-    {
-      if (warn_disabled_optimization)
-        warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
-                 n_basic_blocks, n_edges / n_basic_blocks);
-      return 0;
-    }
-
-  /* If allocating memory for the cprop bitmap would take up too much
-     storage it's better just to disable the optimization.  */
-  if ((n_basic_blocks
-       * SBITMAP_SET_SIZE (max_gcse_regno)
-       * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
-    {
-      if (warn_disabled_optimization)
-        warning ("GCSE disabled: %d basic blocks and %d registers",
-                 n_basic_blocks, max_gcse_regno);
-
-      return 0;
-    }
-
    gcc_obstack_init (&gcse_obstack);
    bytes_used = 0;
  
@@ -7761,7 +8050,7 @@ bypass_jumps (FILE *file)
    if (file)
      {
        fprintf (file, "BYPASS of %s: %d basic blocks, ",
-              current_function_name, n_basic_blocks);
+              current_function_name (), n_basic_blocks);
        fprintf (file, "%d bytes\n\n", bytes_used);
      }
  
@@ -7775,4 +8064,680 @@ bypass_jumps (FILE *file)
    return changed;
  }
  
+/* Return true if the graph is too expensive to optimize. PASS is the
+   optimization about to be performed.  */
+
+static bool
+is_too_expensive (const char *pass)
+{
+  /* Trying to perform global optimizations on flow graphs which have
+     a high connectivity will take a long time and is unlikely to be
+     particularly useful.
+     
+     In normal circumstances a cfg should have about twice as many
+     edges as blocks.  But we do not want to punish small functions
+     which have a couple switch statements.  Rather than simply
+     threshold the number of blocks, uses something with a more
+     graceful degradation.  */
+  if (n_edges > 20000 + n_basic_blocks * 4)
+    {
+      if (warn_disabled_optimization)
+       warning ("%s: %d basic blocks and %d edges/basic block",
+                pass, n_basic_blocks, n_edges / n_basic_blocks);
+      
+      return true;
+    }
+
+  /* If allocating memory for the cprop bitmap would take up too much
+     storage it's better just to disable the optimization.  */
+  if ((n_basic_blocks
+       * SBITMAP_SET_SIZE (max_reg_num ())
+       * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
+    {
+      if (warn_disabled_optimization)
+       warning ("%s: %d basic blocks and %d registers",
+                pass, n_basic_blocks, max_reg_num ());
+
+      return true;
+    }
+
+  return false;
+}
+
+/* The following code implements gcse after reload, the purpose of this
+   pass is to cleanup redundant loads generated by reload and other
+   optimizations that come after gcse. It searches for simple inter-block
+   redundancies and tries to eliminate them by adding moves and loads
+   in cold places.  */
+
+/* The following structure holds the information about the occurrences of
+   the redundant instructions.  */
+struct unoccr
+{
+  struct unoccr *next;
+  edge pred;
+  rtx insn;
+};
+
+static bool reg_used_on_edge (rtx, edge);
+static rtx reg_set_between_after_reload_p (rtx, rtx, rtx);
+static rtx reg_used_between_after_reload_p (rtx, rtx, rtx);
+static rtx get_avail_load_store_reg (rtx);
+static bool is_jump_table_basic_block (basic_block);
+static bool bb_has_well_behaved_predecessors (basic_block);
+static struct occr* get_bb_avail_insn (basic_block, struct occr *);
+static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *);
+static void compute_hash_table_after_reload (struct hash_table *);
+static void eliminate_partially_redundant_loads (basic_block,
+                                               rtx,
+                                               struct expr *);
+static void gcse_after_reload (void);
+static struct occr* get_bb_avail_insn (basic_block, struct occr *);
+void gcse_after_reload_main (rtx, FILE *);
+
+
+/* Check if register REG is used in any insn waiting to be inserted on E.
+   Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
+   with PREV(insn),NEXT(insn) instead of calling
+   reg_overlap_mentioned_p.  */
+
+static bool
+reg_used_on_edge (rtx reg, edge e)
+{
+  rtx insn;
+
+  for (insn = e->insns; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+      return true;
+
+  return false;
+}
+
+/* Return the insn that sets register REG or clobbers it in between
+   FROM_INSN and TO_INSN (exclusive of those two).
+   Just like reg_set_between but for hard registers and not pseudos.  */
+
+static rtx
+reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
+{
+  rtx insn;
+  int regno;
+
+  if (GET_CODE (reg) != REG)
+    abort ();
+  regno = REGNO (reg);
+
+  /* We are called after register allocation.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    abort ();
+
+  if (from_insn == to_insn)
+    return NULL_RTX;
+
+  for (insn = NEXT_INSN (from_insn);
+       insn != to_insn;
+       insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+       {
+         if (FIND_REG_INC_NOTE (insn, reg)
+             || (GET_CODE (insn) == CALL_INSN
+                 && call_used_regs[regno])
+             || find_reg_fusage (insn, CLOBBER, reg))
+           return insn;
+       }
+      if (set_of (reg, insn) != NULL_RTX)
+       return insn;
+    }
+  return NULL_RTX;
+}
+
+/* Return the insn that uses register REG in between FROM_INSN and TO_INSN
+   (exclusive of those two). Similar to reg_used_between but for hard
+   registers and not pseudos.  */
+
+static rtx
+reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
+{
+  rtx insn;
+  int regno;
+
+  if (GET_CODE (reg) != REG)
+    return to_insn;
+  regno = REGNO (reg);
+
+  /* We are called after register allocation.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    abort ();
+  if (from_insn == to_insn)
+    return NULL_RTX;
+
+  for (insn = NEXT_INSN (from_insn);
+       insn != to_insn;
+       insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+       && (reg_overlap_mentioned_p (reg, PATTERN (insn))
+           || (GET_CODE (insn) == CALL_INSN
+               && call_used_regs[regno])
+           || find_reg_fusage (insn, USE, reg)
+           || find_reg_fusage (insn, CLOBBER, reg)))
+      return insn;
+  return NULL_RTX;
+}
+
+/* Return the loaded/stored register of a load/store instruction.  */
+
+static rtx
+get_avail_load_store_reg (rtx insn)
+{
+  if (GET_CODE (SET_DEST (PATTERN (insn))) == REG)  /* A load.  */
+    return SET_DEST(PATTERN(insn));
+  if (GET_CODE (SET_SRC (PATTERN (insn))) == REG)  /* A store.  */
+    return SET_SRC (PATTERN (insn));
+  abort ();
+}
+
+/* Don't handle ABNORMAL edges or jump tables.  */
+
+static bool
+is_jump_table_basic_block (basic_block bb)
+{
+  rtx insn = BB_END (bb);
+
+  if (GET_CODE (insn) == JUMP_INSN &&
+      (GET_CODE (PATTERN (insn)) == ADDR_VEC
+       || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+    return true;
+  return false;
+}
+
+/* Return nonzero if the predecessors of BB are "well behaved".  */
+
+static bool
+bb_has_well_behaved_predecessors (basic_block bb)
+{
+  edge pred;
+
+  if (! bb->pred)
+    return false;
+  for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
+    if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
+       || is_jump_table_basic_block (pred->src))
+      return false;
+  return true;
+}
+
+
+/* Search for the occurrences of expression in BB.  */
+
+static struct occr*
+get_bb_avail_insn (basic_block bb, struct occr *occr)
+{
+  for (; occr != NULL; occr = occr->next)
+    if (BLOCK_FOR_INSN (occr->insn)->index == bb->index)
+      return occr;
+  return NULL;
+}
+
+/* Perform partial GCSE pass after reload, try to eliminate redundant loads
+   created by the reload pass. We try to look for a full or partial
+   redundant loads fed by one or more loads/stores in predecessor BBs,
+   and try adding loads to make them fully redundant. We also check if
+   it's worth adding loads to be able to delete the redundant load.
+
+   Algorithm:
+   1. Build available expressions hash table:
+       For each load/store instruction, if the loaded/stored memory didn't
+       change until the end of the basic block add this memory expression to
+       the hash table.
+   2. Perform Redundancy elimination:
+      For each load instruction do the following:
+        perform partial redundancy elimination, check if it's worth adding
+        loads to make the load fully redundant. If so add loads and
+        register copies and delete the load.
+
+   Future enhancement:
+     if loaded register is used/defined between load and some store,
+     look for some other free register between load and all its stores,
+     and replace load with a copy from this register to the loaded
+     register.  */
+
+
+/* This handles the case where several stores feed a partially redundant
+   load. It checks if the redundancy elimination is possible and if it's
+   worth it.  */
+
+static void
+eliminate_partially_redundant_loads (basic_block bb, rtx insn,
+                                    struct expr *expr)
+{
+  edge pred;
+  rtx avail_insn = NULL_RTX;
+  rtx avail_reg;
+  rtx dest, pat;
+  struct occr *a_occr;
+  struct unoccr *occr, *avail_occrs = NULL;
+  struct unoccr *unoccr, *unavail_occrs = NULL;
+  int npred_ok = 0;
+  gcov_type ok_count = 0; /* Redundant load execution count.  */
+  gcov_type critical_count = 0; /* Execution count of critical edges.  */
+
+  /* The execution count of the loads to be added to make the
+     load fully redundant.  */
+  gcov_type not_ok_count = 0;
+  basic_block pred_bb;
+
+  pat = PATTERN (insn);
+  dest = SET_DEST (pat);
+  /* Check that the loaded register is not used, set, or killed from the
+     beginning of the block.  */
+  if (reg_used_between_after_reload_p (dest,
+                                       PREV_INSN (BB_HEAD (bb)), insn)
+      || reg_set_between_after_reload_p (dest,
+                                         PREV_INSN (BB_HEAD (bb)), insn))
+    return;
+
+  /* Check potential for replacing load with copy for predecessors.  */
+  for (pred = bb->pred; pred; pred = pred->pred_next)
+    {
+      rtx next_pred_bb_end;
+
+      avail_insn = NULL_RTX;
+      pred_bb = pred->src;
+      next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
+      for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
+          a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
+       {
+         /* Check if the loaded register is not used.  */
+         avail_insn = a_occr->insn;
+         if (! (avail_reg = get_avail_load_store_reg (avail_insn)))
+           abort ();
+         /* Make sure we can generate a move from register avail_reg to
+            dest.  */
+         extract_insn (gen_move_insn (copy_rtx (dest),
+                                      copy_rtx (avail_reg)));
+         if (! constrain_operands (1)
+             || reg_killed_on_edge (avail_reg, pred)
+             || reg_used_on_edge (dest, pred))
+           {
+             avail_insn = NULL;
+             continue;
+           }
+         if (! reg_set_between_after_reload_p (avail_reg, avail_insn,
+                                               next_pred_bb_end))
+           /* AVAIL_INSN remains non-null.  */
+           break;
+         else
+           avail_insn = NULL;
+       }
+      if (avail_insn != NULL_RTX)
+       {
+         npred_ok++;
+         ok_count += pred->count;
+          if (EDGE_CRITICAL_P (pred))
+            critical_count += pred->count;
+         occr = gmalloc (sizeof (struct unoccr));
+         occr->insn = avail_insn;
+         occr->pred = pred;
+         occr->next = avail_occrs;
+         avail_occrs = occr;
+       }
+      else
+       {
+         not_ok_count += pred->count;
+          if (EDGE_CRITICAL_P (pred))
+            critical_count += pred->count;
+         unoccr = gmalloc (sizeof (struct unoccr));
+         unoccr->insn = NULL_RTX;
+         unoccr->pred = pred;
+         unoccr->next = unavail_occrs;
+         unavail_occrs = unoccr;
+       }
+    }
+
+  if (npred_ok == 0    /* No load can be replaced by copy.  */
+      || (optimize_size && npred_ok > 1)) /* Prevent exploding the code.  */
+    return;
+
+  /* Check if it's worth applying the partial redundancy elimination.  */
+  if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
+    return;
+
+  if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
+    return;
+
+  /* Generate moves to the loaded register from where
+     the memory is available.  */
+  for (occr = avail_occrs; occr; occr = occr->next)
+    {
+      avail_insn = occr->insn;
+      pred = occr->pred;
+      /* Set avail_reg to be the register having the value of the
+        memory.  */
+      avail_reg = get_avail_load_store_reg (avail_insn);
+      if (! avail_reg)
+       abort ();
+
+      insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
+                                         copy_rtx (avail_reg)),
+                          pred);
+
+      if (gcse_file)
+       fprintf (gcse_file,
+                "GCSE AFTER reload generating move from %d to %d on \
+                edge from %d to %d\n",
+                REGNO (avail_reg),
+                REGNO (dest),
+                pred->src->index,
+                pred->dest->index);
+    }
+
+  /* Regenerate loads where the memory is unavailable.  */
+  for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
+    {
+      pred = unoccr->pred;
+      insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
+
+      if (gcse_file)
+       fprintf (gcse_file,
+                "GCSE AFTER reload: generating on edge from %d to %d\
+                 a copy of load:\n",
+                pred->src->index,
+                pred->dest->index);
+    }
+
+  /* Delete the insn if it is not available in this block and mark it
+     for deletion if it is available. If insn is available it may help
+     discover additional redundancies, so mark it for later deletion.*/
+  for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
+       a_occr && (a_occr->insn != insn);
+       a_occr = get_bb_avail_insn (bb, a_occr->next));
+
+  if (!a_occr)
+    delete_insn (insn);
+  else
+    a_occr->deleted_p = 1;
+}
+
+/* Performing the redundancy elimination as described before.  */
+
+static void
+gcse_after_reload (void)
+{
+  unsigned int i;
+  rtx insn;
+  basic_block bb;
+  struct expr *expr;
+  struct occr *occr;
+
+  /* Note we start at block 1.  */
+
+  if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
+    return;
+
+  FOR_BB_BETWEEN (bb,
+                 ENTRY_BLOCK_PTR->next_bb->next_bb,
+                 EXIT_BLOCK_PTR,
+                 next_bb)
+    {
+      if (! bb_has_well_behaved_predecessors (bb))
+       continue;
+
+      /* Do not try this optimization on cold basic blocks.  */
+      if (probably_cold_bb_p (bb))
+       continue;
+
+      reset_opr_set_tables ();
+
+      for (insn = BB_HEAD (bb);
+          insn != NULL
+          && insn != NEXT_INSN (BB_END (bb));
+          insn = NEXT_INSN (insn))
+       {
+         /* Is it a load - of the form (set (reg) (mem))?  */
+         if (GET_CODE (insn) == INSN
+              && GET_CODE (PATTERN (insn)) == SET
+             && GET_CODE (SET_DEST (PATTERN (insn))) == REG
+             && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
+           {
+             rtx pat = PATTERN (insn);
+             rtx src = SET_SRC (pat);
+             struct expr *expr;
+
+             if (general_operand (src, GET_MODE (src))
+                 /* Is the expression recorded?  */
+                 && (expr = lookup_expr (src, &expr_hash_table)) != NULL
+                 /* Are the operands unchanged since the start of the
+                    block?  */
+                 && oprs_not_set_p (src, insn)
+                 && ! MEM_VOLATILE_P (src)
+                 && GET_MODE (src) != BLKmode
+                 && !(flag_non_call_exceptions && may_trap_p (src))
+                 && !side_effects_p (src))
+               {
+                 /* We now have a load (insn) and an available memory at
+                    its BB start (expr). Try to remove the loads if it is
+                    redundant.  */
+                 eliminate_partially_redundant_loads (bb, insn, expr);
+               }
+           }
+
+           /* Keep track of everything modified by this insn.  */
+           if (INSN_P (insn))
+             mark_oprs_set (insn);
+       }
+    }
+
+  commit_edge_insertions ();
+
+  /* Go over the expression hash table and delete insns that were
+     marked for later deletion.  */
+  for (i = 0; i < expr_hash_table.size; i++)
+    {
+      for (expr = expr_hash_table.table[i];
+          expr != NULL;
+          expr = expr->next_same_hash)
+       for (occr = expr->avail_occr; occr; occr = occr->next)
+         if (occr->deleted_p)
+           delete_insn (occr->insn);
+    }
+}
+
+/* Scan pattern PAT of INSN and add an entry to the hash TABLE.
+   After reload we are interested in loads/stores only.  */
+
+static void
+hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table)
+{
+  rtx src = SET_SRC (pat);
+  rtx dest = SET_DEST (pat);
+
+  if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM)
+    return;
+
+  if (GET_CODE (dest) == REG)
+    {
+      if (/* Don't GCSE something if we can't do a reg/reg copy.  */
+         can_copy_p (GET_MODE (dest))
+         /* GCSE commonly inserts instruction after the insn.  We can't
+            do that easily for EH_REGION notes so disable GCSE on these
+            for now.  */
+         && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+         /* Is SET_SRC something we want to gcse?  */
+         && general_operand (src, GET_MODE (src))
+         /* Don't CSE a nop.  */
+         && ! set_noop_p (pat)
+         && ! JUMP_P (insn))
+       {
+         /* An expression is not available if its operands are
+            subsequently modified, including this insn.  */
+         if (oprs_available_p (src, insn))
+           insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table);
+       }
+    }
+  else if ((GET_CODE (src) == REG))
+    {
+      /* Only record sets of pseudo-regs in the hash table.  */
+      if (/* Don't GCSE something if we can't do a reg/reg copy.  */
+         can_copy_p (GET_MODE (src))
+         /* GCSE commonly inserts instruction after the insn.  We can't
+            do that easily for EH_REGION notes so disable GCSE on these
+            for now.  */
+         && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+         /* Is SET_DEST something we want to gcse?  */
+         && general_operand (dest, GET_MODE (dest))
+         /* Don't CSE a nop.  */
+         && ! set_noop_p (pat)
+         &&! JUMP_P (insn)
+         && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
+         /* Check if the memory expression is killed after insn.  */
+         && ! load_killed_in_block_p (BLOCK_FOR_INSN (insn),
+                                      INSN_CUID (insn) + 1,
+                                      dest,
+                                      1)
+         && oprs_unchanged_p (XEXP (dest, 0), insn, 1))
+       {
+         insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table);
+       }
+    }
+}
+
+
+/* Create hash table of memory expressions available at end of basic
+   blocks.  */
+
+static void
+compute_hash_table_after_reload (struct hash_table *table)
+{
+  unsigned int i;
+
+  table->set_p = 0;
+
+  /* Initialize count of number of entries in hash table.  */
+  table->n_elems = 0;
+  memset ((char *) table->table, 0,
+         table->size * sizeof (struct expr *));
+
+  /* While we compute the hash table we also compute a bit array of which
+     registers are set in which blocks.  */
+  sbitmap_vector_zero (reg_set_in_block, last_basic_block);
+
+  /* Re-cache any INSN_LIST nodes we have allocated.  */
+  clear_modify_mem_tables ();
+
+  /* Some working arrays used to track first and last set in each block.  */
+  reg_avail_info = gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
+
+  for (i = 0; i < max_gcse_regno; ++i)
+    reg_avail_info[i].last_bb = NULL;
+
+  FOR_EACH_BB (current_bb)
+    {
+      rtx insn;
+      unsigned int regno;
+
+      /* First pass over the instructions records information used to
+        determine when registers and memory are first and last set.  */
+      for (insn = BB_HEAD (current_bb);
+          insn && insn != NEXT_INSN (BB_END (current_bb));
+          insn = NEXT_INSN (insn))
+       {
+         if (! INSN_P (insn))
+           continue;
+
+         if (GET_CODE (insn) == CALL_INSN)
+           {
+             bool clobbers_all = false;
+
+#ifdef NON_SAVING_SETJMP
+             if (NON_SAVING_SETJMP
+                 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
+               clobbers_all = true;
+#endif
+
+             for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+               if (clobbers_all
+                   || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+                                         regno))
+                 record_last_reg_set_info (insn, regno);
+
+             mark_call (insn);
+           }
+
+           note_stores (PATTERN (insn), record_last_set_info, insn);
+
+           if (GET_CODE (PATTERN (insn)) == SET)
+             {
+               rtx src, dest;
+
+               src = SET_SRC (PATTERN (insn));
+               dest = SET_DEST (PATTERN (insn));
+               if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0)))
+                 {
+                   regno = REGNO (XEXP (XEXP (src, 0), 0));
+                   record_last_reg_set_info (insn, regno);
+                 }
+               if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0)))
+                 {
+                   regno = REGNO (XEXP (XEXP (dest, 0), 0));
+                   record_last_reg_set_info (insn, regno);
+                 }
+               }
+         }
+
+       /* The next pass builds the hash table.  */
+       for (insn = BB_HEAD (current_bb);
+            insn && insn != NEXT_INSN (BB_END (current_bb));
+            insn = NEXT_INSN (insn))
+         if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
+           if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+             hash_scan_set_after_reload (PATTERN (insn), insn, table);
+    }
+
+  free (reg_avail_info);
+  reg_avail_info = NULL;
+}
+
+
+/* Main entry point of the GCSE after reload - clean some redundant loads
+   due to spilling.  */
+
+void
+gcse_after_reload_main (rtx f, FILE* file)
+{
+  gcse_subst_count = 0;
+  gcse_create_count = 0;
+
+  gcse_file = file;
+
+  gcc_obstack_init (&gcse_obstack);
+  bytes_used = 0;
+
+  /* We need alias.  */
+  init_alias_analysis ();
+
+  max_gcse_regno = max_reg_num ();
+
+  alloc_reg_set_mem (max_gcse_regno);
+  alloc_gcse_mem (f);
+  alloc_hash_table (max_cuid, &expr_hash_table, 0);
+  compute_hash_table_after_reload (&expr_hash_table);
+
+  if (gcse_file)
+    dump_hash_table (gcse_file, "Expression", &expr_hash_table);
+
+  if (expr_hash_table.n_elems > 0)
+    gcse_after_reload ();
+
+  free_hash_table (&expr_hash_table);
+
+  free_gcse_mem ();
+  free_reg_set_mem ();
+
+  /* We are finished with alias.  */
+  end_alias_analysis ();
+
+  obstack_free (&gcse_obstack, NULL);
+}
+
  #include "gt-gcse.h"