* tree-inline.c (find_builtin_longjmp_call): Save and restore

[pf3gnuchains/gcc-fork.git] / gcc / loop.c
diff --git a/gcc/loop.c b/gcc/loop.c

index 42f52ff..222d492 100644 (file)
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -1,6 +1,6 @@
  /* Perform various loop optimizations, including strength reduction.
     Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
-   1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -36,9 +36,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  
  #include "config.h"
  #include "system.h"
+#include "coretypes.h"
+#include "tm.h"
  #include "rtl.h"
  #include "tm_p.h"
-#include "obstack.h"
  #include "function.h"
  #include "expr.h"
  #include "hard-reg-set.h"
@@ -55,6 +56,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "predict.h"
  #include "insn-flags.h"
  #include "optabs.h"
+#include "cfgloop.h"
  
  /* Not really meaningful values, but at least something.  */
  #ifndef SIMULTANEOUS_PREFETCHES
@@ -79,12 +81,9 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  /* For very tiny loops it is not worthwhile to prefetch even before the loop,
     since it is likely that the data are already in the cache.  */
  #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN  2
-/* The minimal number of prefetch blocks that a loop must consume to make
-   the emitting of prefetch instruction in the body of loop worthwhile.  */
-#define PREFETCH_BLOCKS_IN_LOOP_MIN  6
  
  /* Parameterize some prefetch heuristics so they can be turned on and off
-   easily for performance testing on new architecures.  These can be
+   easily for performance testing on new architectures.  These can be
     defined in target-dependent files.  */
  
  /* Prefetch is worthwhile only when loads/stores are dense.  */
@@ -143,13 +142,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  
  /* Prefetch even if the GIV is in conditional code.  */
  #ifndef PREFETCH_CONDITIONAL
-#define PREFETCH_CONDITIONAL 0
-#endif
-
-/* If the loop requires more prefetches than the target can process in
-   parallel then don't prefetch anything in that loop.  */
-#ifndef PREFETCH_LIMIT_TO_SIMULTANEOUS
-#define PREFETCH_LIMIT_TO_SIMULTANEOUS 1
+#define PREFETCH_CONDITIONAL 1
  #endif
  
  #define LOOP_REG_LIFETIME(LOOP, REGNO) \
@@ -161,7 +154,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  
  #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
  ((REGNO) < FIRST_PSEUDO_REGISTER \
- ? HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
+ ? (int) HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
  
  
  /* Vector mapping INSN_UIDs to luids.
@@ -179,10 +172,6 @@ struct loop **uid_loop;
  
  int max_uid_for_loop;
  
-/* 1 + luid of last insn.  */
-
-static int max_luid;
-
  /* Number of loops detected in current function.  Used as index to the
     next few tables.  */
  
@@ -194,9 +183,6 @@ unsigned int max_reg_before_loop;
  
  /* The value to pass to the next call of reg_scan_update.  */
  static int loop_max_reg;
-
-#define obstack_chunk_alloc xmalloc
-#define obstack_chunk_free free
  \f
  /* During the analysis of a loop, a chain of `struct movable's
     is made to record all the movable insns found.
@@ -218,6 +204,9 @@ struct movable
    short savings;               /* Number of insns we can move for this reg,
                                    including other movables that force this
                                    or match this one.  */
+  ENUM_BITFIELD(machine_mode) savemode : 8;   /* Nonzero means it is a mode for 
+                                  a low part that we should avoid changing when
+                                  clearing the rest of the reg.  */
    unsigned int cond : 1;       /* 1 if only conditionally movable */
    unsigned int force : 1;      /* 1 means MUST move this insn */
    unsigned int global : 1;     /* 1 means reg is live outside this loop */
@@ -234,9 +223,9 @@ struct movable
    unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
                                     first insn of a consecutive sets group.  */
    unsigned int is_equiv : 1;   /* 1 means a REG_EQUIV is present on INSN.  */
-  enum machine_mode savemode;   /* Nonzero means it is a mode for a low part
-                                  that we should avoid changing when clearing
-                                  the rest of the reg.  */
+  unsigned int insert_temp : 1;  /* 1 means we copy to a new pseudo and replace
+                                   the original insn with a copy from that
+                                   pseudo, rather than deleting it. */
    struct movable *match;       /* First entry for same value */
    struct movable *forces;      /* An insn that must be moved if this is */
    struct movable *next;
@@ -510,7 +499,7 @@ loop_optimize (f, dumpfile, flags)
  
    /* Allocate and initialize auxiliary loop information.  */
    loops_info = xcalloc (loops->num, sizeof (struct loop_info));
-  for (i = 0; i < loops->num; i++)
+  for (i = 0; i < (int) loops->num; i++)
      loops->array[i].aux = loops_info + i;
  
    /* Now find all register lifetimes.  This must be done after
@@ -535,7 +524,7 @@ loop_optimize (f, dumpfile, flags)
    /* find_and_verify_loops has already called compute_luids, but it
       might have rearranged code afterwards, so we need to recompute
       the luids now.  */
-  max_luid = compute_luids (f, NULL_RTX, 0);
+  compute_luids (f, NULL_RTX, 0);
  
    /* Don't leave gaps in uid_luid for insns that have been
       deleted.  It is possible that the first or last insn
@@ -566,13 +555,6 @@ loop_optimize (f, dumpfile, flags)
         scan_loop (loop, flags);
      }
  
-  /* If there were lexical blocks inside the loop, they have been
-     replicated.  We will now have more than one NOTE_INSN_BLOCK_BEG
-     and NOTE_INSN_BLOCK_END for each such block.  We must duplicate
-     the BLOCKs as well.  */
-  if (write_symbols != NO_DEBUG)
-    reorder_blocks ();
-
    end_alias_analysis ();
  
    /* Clean up.  */
@@ -636,8 +618,6 @@ scan_loop (loop, flags)
    /* 1 if we are scanning insns that might never be executed
       due to a subroutine call which might exit before they are reached.  */
    int call_passed = 0;
-  /* Jump insn that enters the loop, or 0 if control drops in.  */
-  rtx loop_entry_jump = 0;
    /* Number of insns in the loop.  */
    int insn_count;
    int tem;
@@ -653,6 +633,7 @@ scan_loop (loop, flags)
    int threshold;
    /* Nonzero if we are scanning instructions in a sub-loop.  */
    int loop_depth = 0;
+  int in_libcall;
  
    loop->top = 0;
  
@@ -704,24 +685,20 @@ scan_loop (loop, flags)
       Start scan from there.
       But record in LOOP->TOP the place where the end-test jumps
       back to so we can scan that after the end of the loop.  */
-  if (GET_CODE (p) == JUMP_INSN)
-    {
-      loop_entry_jump = p;
-
+  if (GET_CODE (p) == JUMP_INSN
        /* Loop entry must be unconditional jump (and not a RETURN)  */
-      if (any_uncondjump_p (p)
-         && JUMP_LABEL (p) != 0
-         /* Check to see whether the jump actually
-            jumps out of the loop (meaning it's no loop).
-            This case can happen for things like
-            do {..} while (0).  If this label was generated previously
-            by loop, we can't tell anything about it and have to reject
-            the loop.  */
-         && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
-       {
-         loop->top = next_label (loop->scan_start);
-         loop->scan_start = JUMP_LABEL (p);
-       }
+      && any_uncondjump_p (p)
+      && JUMP_LABEL (p) != 0
+      /* Check to see whether the jump actually
+        jumps out of the loop (meaning it's no loop).
+        This case can happen for things like
+        do {..} while (0).  If this label was generated previously
+        by loop, we can't tell anything about it and have to reject
+        the loop.  */
+      && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
+    {
+      loop->top = next_label (loop->scan_start);
+      loop->scan_start = JUMP_LABEL (p);
      }
  
    /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
@@ -769,68 +746,73 @@ scan_loop (loop, flags)
       When MAYBE_NEVER is 0, all insns will be executed at least once
       so that is not a problem.  */
  
-  for (p = next_insn_in_loop (loop, loop->scan_start);
+  for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
         p != NULL_RTX;
         p = next_insn_in_loop (loop, p))
      {
-      if (GET_CODE (p) == INSN
-         && (set = single_set (p))
-         && GET_CODE (SET_DEST (set)) == REG
-#ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
-         && SET_DEST (set) != pic_offset_table_rtx
-#endif
-         && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
+      if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
+       in_libcall--;
+      if (GET_CODE (p) == INSN)
         {
-         int tem1 = 0;
-         int tem2 = 0;
-         int move_insn = 0;
-         rtx src = SET_SRC (set);
-         rtx dependencies = 0;
-
-         /* Figure out what to use as a source of this insn.  If a REG_EQUIV
-            note is given or if a REG_EQUAL note with a constant operand is
-            specified, use it as the source and mark that we should move
-            this insn by calling emit_move_insn rather that duplicating the
-            insn.
-
-            Otherwise, only use the REG_EQUAL contents if a REG_RETVAL note
-            is present.  */
-         temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
+         temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
           if (temp)
-           src = XEXP (temp, 0), move_insn = 1;
-         else
+           in_libcall++;
+         if (! in_libcall
+             && (set = single_set (p))
+             && GET_CODE (SET_DEST (set)) == REG
+#ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
+             && SET_DEST (set) != pic_offset_table_rtx
+#endif
+             && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
             {
-             temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
-             if (temp && CONSTANT_P (XEXP (temp, 0)))
+             int tem1 = 0;
+             int tem2 = 0;
+             int move_insn = 0;
+             int insert_temp = 0;
+             rtx src = SET_SRC (set);
+             rtx dependencies = 0;
+
+             /* Figure out what to use as a source of this insn.  If a
+                REG_EQUIV note is given or if a REG_EQUAL note with a
+                constant operand is specified, use it as the source and
+                mark that we should move this insn by calling
+                emit_move_insn rather that duplicating the insn.
+
+                Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
+                note is present.  */
+             temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
+             if (temp)
                 src = XEXP (temp, 0), move_insn = 1;
-             if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
+             else
                 {
-                 src = XEXP (temp, 0);
-                 /* A libcall block can use regs that don't appear in
-                    the equivalent expression.  To move the libcall,
-                    we must move those regs too.  */
-                 dependencies = libcall_other_reg (p, src);
+                 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
+                 if (temp && CONSTANT_P (XEXP (temp, 0)))
+                   src = XEXP (temp, 0), move_insn = 1;
+                 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
+                   {
+                     src = XEXP (temp, 0);
+                     /* A libcall block can use regs that don't appear in
+                        the equivalent expression.  To move the libcall,
+                        we must move those regs too.  */
+                     dependencies = libcall_other_reg (p, src);
+                   }
                 }
-           }
  
-         /* For parallels, add any possible uses to the depencies, as we can't move
-            the insn without resolving them first.  */
-         if (GET_CODE (PATTERN (p)) == PARALLEL)
-           {
-             for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
+             /* For parallels, add any possible uses to the dependencies, as
+                we can't move the insn without resolving them first.  */
+             if (GET_CODE (PATTERN (p)) == PARALLEL)
                 {
-                 rtx x = XVECEXP (PATTERN (p), 0, i);
-                 if (GET_CODE (x) == USE)
-                   dependencies = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0), dependencies);
+                 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
+                   {
+                     rtx x = XVECEXP (PATTERN (p), 0, i);
+                     if (GET_CODE (x) == USE)
+                       dependencies
+                         = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
+                                              dependencies);
+                   }
                 }
-           }
  
-         /* Don't try to optimize a register that was made
-            by loop-optimization for an inner loop.
-            We don't know its life-span, so we can't compute the benefit.  */
-         if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
-           ;
-         else if (/* The register is used in basic blocks other
+             if (/* The register is used in basic blocks other
                       than the one where it is set (meaning that
                       something after this point in the loop might
                       depend on its value before the set).  */
@@ -839,220 +821,251 @@ scan_loop (loop, flags)
                       the loop starts, or the value before the set is
                       needed before the set occurs...
  
-                     ??? Note we have quadratic behaviour here, mitigated
+                     ??? Note we have quadratic behavior here, mitigated
                       by the fact that the previous test will often fail for
                       large loops.  Rather than re-scanning the entire loop
                       each time for register usage, we should build tables
                       of the register usage and use them here instead.  */
                    && (maybe_never
                        || loop_reg_used_before_p (loop, set, p)))
-           /* It is unsafe to move the set.
-
-              This code used to consider it OK to move a set of a variable
-              which was not created by the user and not used in an exit test.
-              That behavior is incorrect and was removed.  */
-           ;
-         else if ((tem = loop_invariant_p (loop, src))
-                  && (dependencies == 0
-                      || (tem2 = loop_invariant_p (loop, dependencies)) != 0)
-                  && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
-                      || (tem1
-                          = consec_sets_invariant_p
-                          (loop, SET_DEST (set),
-                           regs->array[REGNO (SET_DEST (set))].set_in_loop,
-                           p)))
-                  /* If the insn can cause a trap (such as divide by zero),
-                     can't move it unless it's guaranteed to be executed
-                     once loop is entered.  Even a function call might
-                     prevent the trap insn from being reached
-                     (since it might exit!)  */
-                  && ! ((maybe_never || call_passed)
-                        && may_trap_p (src)))
-           {
-             struct movable *m;
-             int regno = REGNO (SET_DEST (set));
-
-             /* A potential lossage is where we have a case where two insns
-                can be combined as long as they are both in the loop, but
-                we move one of them outside the loop.  For large loops,
-                this can lose.  The most common case of this is the address
-                of a function being called.
-
-                Therefore, if this register is marked as being used exactly
-                once if we are in a loop with calls (a "large loop"), see if
-                we can replace the usage of this register with the source
-                of this SET.  If we can, delete this insn.
-
-                Don't do this if P has a REG_RETVAL note or if we have
-                SMALL_REGISTER_CLASSES and SET_SRC is a hard register.  */
-
-             if (loop_info->has_call
-                 && regs->array[regno].single_usage != 0
-                 && regs->array[regno].single_usage != const0_rtx
-                 && REGNO_FIRST_UID (regno) == INSN_UID (p)
-                 && (REGNO_LAST_UID (regno)
-                     == INSN_UID (regs->array[regno].single_usage))
-                 && regs->array[regno].set_in_loop == 1
-                 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
-                 && ! side_effects_p (SET_SRC (set))
-                 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
-                 && (! SMALL_REGISTER_CLASSES
-                     || (! (GET_CODE (SET_SRC (set)) == REG
-                            && REGNO (SET_SRC (set)) < FIRST_PSEUDO_REGISTER)))
-                 /* This test is not redundant; SET_SRC (set) might be
-                    a call-clobbered register and the life of REGNO
-                    might span a call.  */
-                 && ! modified_between_p (SET_SRC (set), p,
-                                          regs->array[regno].single_usage)
-                 && no_labels_between_p (p, regs->array[regno].single_usage)
-                 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
-                                          regs->array[regno].single_usage))
-               {
-                 /* Replace any usage in a REG_EQUAL note.  Must copy the
-                    new source, so that we don't get rtx sharing between the
-                    SET_SOURCE and REG_NOTES of insn p.  */
-                 REG_NOTES (regs->array[regno].single_usage)
-                   = replace_rtx (REG_NOTES (regs->array[regno].single_usage),
-                                  SET_DEST (set), copy_rtx (SET_SRC (set)));
-
-                 delete_insn (p);
-                 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
-                   regs->array[regno+i].set_in_loop = 0;
-                 continue;
-               }
-
-             m = (struct movable *) xmalloc (sizeof (struct movable));
-             m->next = 0;
-             m->insn = p;
-             m->set_src = src;
-             m->dependencies = dependencies;
-             m->set_dest = SET_DEST (set);
-             m->force = 0;
-             m->consec = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
-             m->done = 0;
-             m->forces = 0;
-             m->partial = 0;
-             m->move_insn = move_insn;
-             m->move_insn_first = 0;
-             m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
-             m->savemode = VOIDmode;
-             m->regno = regno;
-             /* Set M->cond if either loop_invariant_p
-                or consec_sets_invariant_p returned 2
-                (only conditionally invariant).  */
-             m->cond = ((tem | tem1 | tem2) > 1);
-             m->global =  LOOP_REG_GLOBAL_P (loop, regno);
-             m->match = 0;
-             m->lifetime = LOOP_REG_LIFETIME (loop, regno);
-             m->savings = regs->array[regno].n_times_set;
-             if (find_reg_note (p, REG_RETVAL, NULL_RTX))
-               m->savings += libcall_benefit (p);
-             for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
-               regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
-             /* Add M to the end of the chain MOVABLES.  */
-             loop_movables_add (movables, m);
-
-             if (m->consec > 0)
+               /* It is unsafe to move the set.  However, it may be OK to
+                  move the source into a new psuedo, and subsitute a 
+                  reg-to-reg copy for the original insn.
+
+                  This code used to consider it OK to move a set of a variable
+                  which was not created by the user and not used in an exit
+                  test.
+                  That behavior is incorrect and was removed.  */
+               insert_temp = 1;
+
+             /* Don't try to optimize a register that was made
+                by loop-optimization for an inner loop.
+                We don't know its life-span, so we can't compute
+                the benefit.  */
+             if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
+               ;
+             /* Don't move the source and add a reg-to-reg copy with -Os
+                (this certainly increases size) or if the source is
+                already a reg (the motion will gain nothing). */
+             else if (insert_temp 
+                      && (optimize_size || GET_CODE (SET_SRC (set)) == REG
+                          || (CONSTANT_P (SET_SRC (set))
+                              && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
+               ;
+             else if ((tem = loop_invariant_p (loop, src))
+                      && (dependencies == 0
+                          || (tem2
+                              = loop_invariant_p (loop, dependencies)) != 0)
+                      && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
+                          || (tem1
+                              = consec_sets_invariant_p
+                              (loop, SET_DEST (set),
+                               regs->array[REGNO (SET_DEST (set))].set_in_loop,
+                               p)))
+                      /* If the insn can cause a trap (such as divide by zero),
+                         can't move it unless it's guaranteed to be executed
+                         once loop is entered.  Even a function call might
+                         prevent the trap insn from being reached
+                         (since it might exit!)  */
+                      && ! ((maybe_never || call_passed)
+                            && may_trap_p (src)))
                 {
-                 /* It is possible for the first instruction to have a
-                    REG_EQUAL note but a non-invariant SET_SRC, so we must
-                    remember the status of the first instruction in case
-                    the last instruction doesn't have a REG_EQUAL note.  */
-                 m->move_insn_first = m->move_insn;
-
-                 /* Skip this insn, not checking REG_LIBCALL notes.  */
-                 p = next_nonnote_insn (p);
-                 /* Skip the consecutive insns, if there are any.  */
-                 p = skip_consec_insns (p, m->consec);
-                 /* Back up to the last insn of the consecutive group.  */
-                 p = prev_nonnote_insn (p);
-
-                 /* We must now reset m->move_insn, m->is_equiv, and possibly
-                    m->set_src to correspond to the effects of all the
-                    insns.  */
-                 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
-                 if (temp)
-                   m->set_src = XEXP (temp, 0), m->move_insn = 1;
-                 else
+                 struct movable *m;
+                 int regno = REGNO (SET_DEST (set));
+
+                 /* A potential lossage is where we have a case where two insns
+                    can be combined as long as they are both in the loop, but
+                    we move one of them outside the loop.  For large loops,
+                    this can lose.  The most common case of this is the address
+                    of a function being called.
+
+                    Therefore, if this register is marked as being used
+                    exactly once if we are in a loop with calls
+                    (a "large loop"), see if we can replace the usage of
+                    this register with the source of this SET.  If we can,
+                    delete this insn.
+
+                    Don't do this if P has a REG_RETVAL note or if we have
+                    SMALL_REGISTER_CLASSES and SET_SRC is a hard register.  */
+
+                 if (loop_info->has_call
+                     && regs->array[regno].single_usage != 0
+                     && regs->array[regno].single_usage != const0_rtx
+                     && REGNO_FIRST_UID (regno) == INSN_UID (p)
+                     && (REGNO_LAST_UID (regno)
+                         == INSN_UID (regs->array[regno].single_usage))
+                     && regs->array[regno].set_in_loop == 1
+                     && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
+                     && ! side_effects_p (SET_SRC (set))
+                     && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
+                     && (! SMALL_REGISTER_CLASSES
+                         || (! (GET_CODE (SET_SRC (set)) == REG
+                                && (REGNO (SET_SRC (set))
+                                    < FIRST_PSEUDO_REGISTER))))
+                     /* This test is not redundant; SET_SRC (set) might be
+                        a call-clobbered register and the life of REGNO
+                        might span a call.  */
+                     && ! modified_between_p (SET_SRC (set), p,
+                                              regs->array[regno].single_usage)
+                     && no_labels_between_p (p,
+                                             regs->array[regno].single_usage)
+                     && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
+                                              regs->array[regno].single_usage))
                     {
-                     temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
-                     if (temp && CONSTANT_P (XEXP (temp, 0)))
-                       m->set_src = XEXP (temp, 0), m->move_insn = 1;
-                     else
-                       m->move_insn = 0;
+                     /* Replace any usage in a REG_EQUAL note.  Must copy
+                        the new source, so that we don't get rtx sharing
+                        between the SET_SOURCE and REG_NOTES of insn p.  */
+                     REG_NOTES (regs->array[regno].single_usage)
+                       = (replace_rtx
+                          (REG_NOTES (regs->array[regno].single_usage),
+                           SET_DEST (set), copy_rtx (SET_SRC (set))));
  
+                     delete_insn (p);
+                     for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
+                          i++)
+                       regs->array[regno+i].set_in_loop = 0;
+                     continue;
                     }
-                 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
-               }
-           }
-         /* If this register is always set within a STRICT_LOW_PART
-            or set to zero, then its high bytes are constant.
-            So clear them outside the loop and within the loop
-            just load the low bytes.
-            We must check that the machine has an instruction to do so.
-            Also, if the value loaded into the register
-            depends on the same register, this cannot be done.  */
-         else if (SET_SRC (set) == const0_rtx
-                  && GET_CODE (NEXT_INSN (p)) == INSN
-                  && (set1 = single_set (NEXT_INSN (p)))
-                  && GET_CODE (set1) == SET
-                  && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
-                  && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
-                  && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
-                      == SET_DEST (set))
-                  && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
-           {
-             int regno = REGNO (SET_DEST (set));
-             if (regs->array[regno].set_in_loop == 2)
-               {
-                 struct movable *m;
+
                   m = (struct movable *) xmalloc (sizeof (struct movable));
                   m->next = 0;
                   m->insn = p;
+                 m->set_src = src;
+                 m->dependencies = dependencies;
                   m->set_dest = SET_DEST (set);
-                 m->dependencies = 0;
                   m->force = 0;
-                 m->consec = 0;
+                 m->consec
+                   = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
                   m->done = 0;
                   m->forces = 0;
-                 m->move_insn = 0;
+                 m->partial = 0;
+                 m->move_insn = move_insn;
                   m->move_insn_first = 0;
-                 m->partial = 1;
-                 /* If the insn may not be executed on some cycles,
-                    we can't clear the whole reg; clear just high part.
-                    Not even if the reg is used only within this loop.
-                    Consider this:
-                    while (1)
-                      while (s != t) {
-                        if (foo ()) x = *s;
-                        use (x);
-                      }
-                    Clearing x before the inner loop could clobber a value
-                    being saved from the last time around the outer loop.
-                    However, if the reg is not used outside this loop
-                    and all uses of the register are in the same
-                    basic block as the store, there is no problem.
-
-                    If this insn was made by loop, we don't know its
-                    INSN_LUID and hence must make a conservative
-                    assumption.  */
-                 m->global = (INSN_UID (p) >= max_uid_for_loop
-                              || LOOP_REG_GLOBAL_P (loop, regno)
-                              || (labels_in_range_p
-                                  (p, REGNO_FIRST_LUID (regno))));
-                 if (maybe_never && m->global)
-                   m->savemode = GET_MODE (SET_SRC (set1));
-                 else
-                   m->savemode = VOIDmode;
+                 m->insert_temp = insert_temp;
+                 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
+                 m->savemode = VOIDmode;
                   m->regno = regno;
-                 m->cond = 0;
+                 /* Set M->cond if either loop_invariant_p
+                    or consec_sets_invariant_p returned 2
+                    (only conditionally invariant).  */
+                 m->cond = ((tem | tem1 | tem2) > 1);
+                 m->global =  LOOP_REG_GLOBAL_P (loop, regno);
                   m->match = 0;
                   m->lifetime = LOOP_REG_LIFETIME (loop, regno);
-                 m->savings = 1;
+                 m->savings = regs->array[regno].n_times_set;
+                 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
+                   m->savings += libcall_benefit (p);
                   for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
-                   regs->array[regno+i].set_in_loop = -1;
+                   regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
                   /* Add M to the end of the chain MOVABLES.  */
                   loop_movables_add (movables, m);
+
+                 if (m->consec > 0)
+                   {
+                     /* It is possible for the first instruction to have a
+                        REG_EQUAL note but a non-invariant SET_SRC, so we must
+                        remember the status of the first instruction in case
+                        the last instruction doesn't have a REG_EQUAL note.  */
+                     m->move_insn_first = m->move_insn;
+
+                     /* Skip this insn, not checking REG_LIBCALL notes.  */
+                     p = next_nonnote_insn (p);
+                     /* Skip the consecutive insns, if there are any.  */
+                     p = skip_consec_insns (p, m->consec);
+                     /* Back up to the last insn of the consecutive group.  */
+                     p = prev_nonnote_insn (p);
+
+                     /* We must now reset m->move_insn, m->is_equiv, and
+                        possibly m->set_src to correspond to the effects of
+                        all the insns.  */
+                     temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
+                     if (temp)
+                       m->set_src = XEXP (temp, 0), m->move_insn = 1;
+                     else
+                       {
+                         temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
+                         if (temp && CONSTANT_P (XEXP (temp, 0)))
+                           m->set_src = XEXP (temp, 0), m->move_insn = 1;
+                         else
+                           m->move_insn = 0;
+
+                       }
+                     m->is_equiv
+                       = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
+                   }
+               }
+             /* If this register is always set within a STRICT_LOW_PART
+                or set to zero, then its high bytes are constant.
+                So clear them outside the loop and within the loop
+                just load the low bytes.
+                We must check that the machine has an instruction to do so.
+                Also, if the value loaded into the register
+                depends on the same register, this cannot be done.  */
+             else if (SET_SRC (set) == const0_rtx
+                      && GET_CODE (NEXT_INSN (p)) == INSN
+                      && (set1 = single_set (NEXT_INSN (p)))
+                      && GET_CODE (set1) == SET
+                      && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
+                      && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
+                      && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
+                          == SET_DEST (set))
+                      && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
+               {
+                 int regno = REGNO (SET_DEST (set));
+                 if (regs->array[regno].set_in_loop == 2)
+                   {
+                     struct movable *m;
+                     m = (struct movable *) xmalloc (sizeof (struct movable));
+                     m->next = 0;
+                     m->insn = p;
+                     m->set_dest = SET_DEST (set);
+                     m->dependencies = 0;
+                     m->force = 0;
+                     m->consec = 0;
+                     m->done = 0;
+                     m->forces = 0;
+                     m->move_insn = 0;
+                     m->move_insn_first = 0;
+                     m->insert_temp = insert_temp;
+                     m->partial = 1;
+                     /* If the insn may not be executed on some cycles,
+                        we can't clear the whole reg; clear just high part.
+                        Not even if the reg is used only within this loop.
+                        Consider this:
+                        while (1)
+                          while (s != t) {
+                            if (foo ()) x = *s;
+                            use (x);
+                          }
+                        Clearing x before the inner loop could clobber a value
+                        being saved from the last time around the outer loop.
+                        However, if the reg is not used outside this loop
+                        and all uses of the register are in the same
+                        basic block as the store, there is no problem.
+
+                        If this insn was made by loop, we don't know its
+                        INSN_LUID and hence must make a conservative
+                        assumption.  */
+                     m->global = (INSN_UID (p) >= max_uid_for_loop
+                                  || LOOP_REG_GLOBAL_P (loop, regno)
+                                  || (labels_in_range_p
+                                      (p, REGNO_FIRST_LUID (regno))));
+                     if (maybe_never && m->global)
+                       m->savemode = GET_MODE (SET_SRC (set1));
+                     else
+                       m->savemode = VOIDmode;
+                     m->regno = regno;
+                     m->cond = 0;
+                     m->match = 0;
+                     m->lifetime = LOOP_REG_LIFETIME (loop, regno);
+                     m->savings = 1;
+                     for (i = 0;
+                          i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
+                          i++)
+                       regs->array[regno+i].set_in_loop = -1;
+                     /* Add M to the end of the chain MOVABLES.  */
+                     loop_movables_add (movables, m);
+                   }
                 }
             }
         }
@@ -1073,7 +1086,7 @@ scan_loop (loop, flags)
                   unconditional jump, otherwise the code at the top of the
                   loop might never be executed.  Unconditional jumps are
                   followed by a barrier then the loop_end.  */
-               && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
+              && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
                      && NEXT_INSN (NEXT_INSN (p)) == loop_end
                      && any_uncondjump_p (p)))
         maybe_never = 1;
@@ -1112,10 +1125,12 @@ scan_loop (loop, flags)
    /* Now consider each movable insn to decide whether it is worth moving.
       Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
  
-     Generally this increases code size, so do not move moveables when
-     optimizing for code size.  */
+     For machines with few registers this increases code size, so do not
+     move moveables when optimizing for code size on such machines.  
+     (The 18 below is the value for i386.)  */
  
-  if (! optimize_size)
+  if (!optimize_size 
+      || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
      {
        move_movables (loop, movables, threshold, insn_count);
  
@@ -1467,6 +1482,7 @@ combine_movables (movables, regs)
    for (m = movables->head; m; m = m->next)
      if (m->match == 0 && regs->array[m->regno].n_times_set == 1
         && m->regno >= FIRST_PSEUDO_REGISTER
+       && !m->insert_temp
         && !m->partial)
        {
         struct movable *m1;
@@ -1479,6 +1495,7 @@ combine_movables (movables, regs)
            one match any later ones.  So start this loop at m->next.  */
         for (m1 = m->next; m1; m1 = m1->next)
           if (m != m1 && m1->match == 0
+             && !m1->insert_temp
               && regs->array[m1->regno].n_times_set == 1
               && m1->regno >= FIRST_PSEUDO_REGISTER
               /* A reg used outside the loop mustn't be eliminated.  */
@@ -1882,6 +1899,10 @@ move_movables (loop, movables, threshold, insn_count)
               int count;
               struct movable *m1;
               rtx first = NULL_RTX;
+             rtx newreg = NULL_RTX;
+
+             if (m->insert_temp)
+               newreg = gen_reg_rtx (GET_MODE (m->set_dest));
  
               /* Now move the insns that set the reg.  */
  
@@ -1925,10 +1946,10 @@ move_movables (loop, movables, threshold, insn_count)
                   for (count = m->consec; count >= 0; count--)
                     {
                       /* If this is the first insn of a library call sequence,
-                        skip to the end.  */
+                        something is very wrong.  */
                       if (GET_CODE (p) != NOTE
                           && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
-                       p = XEXP (temp, 0);
+                       abort ();
  
                       /* If this is the last insn of a libcall sequence, then
                          delete every insn in the sequence except the last.
@@ -1952,15 +1973,26 @@ move_movables (loop, movables, threshold, insn_count)
                          insn stream.  */
                       while (p && GET_CODE (p) == NOTE)
                         p = NEXT_INSN (temp) = NEXT_INSN (p);
+
+                     if (m->insert_temp)
+                       {
+                         /* Replace the original insn with a move from
+                            our newly created temp. */
+                         start_sequence ();
+                         emit_move_insn (m->set_dest, newreg);
+                         seq = get_insns ();
+                         end_sequence ();
+                         emit_insn_before (seq, p);
+                       }
                     }
  
                   start_sequence ();
-                 emit_move_insn (m->set_dest, m->set_src);
-                 temp = get_insns ();
-                 seq = gen_sequence ();
+                 emit_move_insn (m->insert_temp ? newreg : m->set_dest, 
+                                 m->set_src);
+                 seq = get_insns ();
                   end_sequence ();
  
-                 add_label_notes (m->set_src, temp);
+                 add_label_notes (m->set_src, seq);
  
                   i1 = loop_insn_hoist (loop, seq);
                   if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
@@ -2095,7 +2127,7 @@ move_movables (loop, movables, threshold, insn_count)
                             abort ();
                           if (tem != reg)
                             emit_move_insn (reg, tem);
-                         sequence = gen_sequence ();
+                         sequence = get_insns ();
                           end_sequence ();
                           i1 = loop_insn_hoist (loop, sequence);
                         }
@@ -2116,17 +2148,26 @@ move_movables (loop, movables, threshold, insn_count)
                              use the REG_EQUAL note.  */
                           start_sequence ();
                           emit_move_insn (m->set_dest, m->set_src);
-                         temp = get_insns ();
-                         seq = gen_sequence ();
+                         seq = get_insns ();
                           end_sequence ();
  
-                         add_label_notes (m->set_src, temp);
+                         add_label_notes (m->set_src, seq);
  
                           i1 = loop_insn_hoist (loop, seq);
                           if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
                             set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
                                                      : REG_EQUAL, m->set_src);
                         }
+                     else if (m->insert_temp)
+                       {
+                         rtx *reg_map2 = (rtx *) xcalloc (REGNO (newreg), 
+                               sizeof(rtx));
+                         reg_map2 [m->regno] = newreg;
+
+                         i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
+                         replace_regs (i1, reg_map2, REGNO (newreg), 1);
+                         free (reg_map2);
+                       }
                       else
                         i1 = loop_insn_hoist (loop, PATTERN (p));
  
@@ -2175,40 +2216,55 @@ move_movables (loop, movables, threshold, insn_count)
                          insn stream.  */
                       while (p && GET_CODE (p) == NOTE)
                         p = NEXT_INSN (temp) = NEXT_INSN (p);
+
+                     if (m->insert_temp)
+                       {
+                         rtx seq;
+                         /* Replace the original insn with a move from
+                            our newly created temp. */
+                         start_sequence ();
+                         emit_move_insn (m->set_dest, newreg);
+                         seq = get_insns ();
+                         end_sequence ();
+                         emit_insn_before (seq, p);
+                       }
                     }
  
                   /* The more regs we move, the less we like moving them.  */
                   threshold -= 3;
                 }
  
-             /* Any other movable that loads the same register
-                MUST be moved.  */
-             already_moved[regno] = 1;
-
-             /* This reg has been moved out of one loop.  */
-             regs->array[regno].moved_once = 1;
+             m->done = 1;
  
-             /* The reg set here is now invariant.  */
-             if (! m->partial)
+             if (!m->insert_temp)
                 {
-                 int i;
-                 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
-                   regs->array[regno+i].set_in_loop = 0;
-               }
+                 /* Any other movable that loads the same register
+                    MUST be moved.  */
+                 already_moved[regno] = 1;
  
-             m->done = 1;
+                 /* This reg has been moved out of one loop.  */
+                 regs->array[regno].moved_once = 1;
  
-             /* Change the length-of-life info for the register
-                to say it lives at least the full length of this loop.
-                This will help guide optimizations in outer loops.  */
+                 /* The reg set here is now invariant.  */
+                 if (! m->partial)
+                   {
+                     int i;
+                     for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
+                       regs->array[regno+i].set_in_loop = 0;
+                   }
  
-             if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
-               /* This is the old insn before all the moved insns.
-                  We can't use the moved insn because it is out of range
-                  in uid_luid.  Only the old insns have luids.  */
-               REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
-             if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
-               REGNO_LAST_UID (regno) = INSN_UID (loop_end);
+                 /* Change the length-of-life info for the register
+                    to say it lives at least the full length of this loop.
+                    This will help guide optimizations in outer loops.  */
+
+                 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
+                   /* This is the old insn before all the moved insns.
+                      We can't use the moved insn because it is out of range
+                      in uid_luid.  Only the old insns have luids.  */
+                   REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
+                 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
+                   REGNO_LAST_UID (regno) = INSN_UID (loop_end);
+               }
  
               /* Combine with this moved insn any other matching movables.  */
  
@@ -2462,6 +2518,7 @@ prescan_loop (loop)
    loop_info->pre_header_has_call = 0;
    loop_info->has_call = 0;
    loop_info->has_nonconst_call = 0;
+  loop_info->has_prefetch = 0;
    loop_info->has_volatile = 0;
    loop_info->has_tablejump = 0;
    loop_info->has_multiple_exit_targets = 0;
@@ -2473,7 +2530,8 @@ prescan_loop (loop)
    loop_info->first_loop_store_insn = NULL_RTX;
    loop_info->mems_idx = 0;
    loop_info->num_mem_sets = 0;
-
+  /* If loop opts run twice, this was set on 1st pass for 2nd.  */
+  loop_info->preconditioned = NOTE_PRECONDITIONED (end);
  
    for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
         insn = PREV_INSN (insn))
@@ -2881,7 +2939,7 @@ find_and_verify_loops (f, loops)
  
                         /* If no suitable BARRIER was found, create a suitable
                            one before TARGET.  Since TARGET is a fall through
-                          path, we'll need to insert an jump around our block
+                          path, we'll need to insert a jump around our block
                            and add a BARRIER before TARGET.
  
                            This creates an extra unconditional jump outside
@@ -3247,7 +3305,7 @@ loop_invariant_p (loop, x)
  
          We don't know the loop bounds here though, so just fail for all
          labels.  */
-      if (flag_unroll_loops)
+      if (flag_old_unroll_loops)
         return 0;
        else
         return 1;
@@ -3270,6 +3328,13 @@ loop_invariant_p (loop, x)
           && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
         return 0;
  
+      /* Out-of-range regs can occur when we are called from unrolling.
+        These have always been created by the unroller and are set in
+        the loop, hence are never invariant.  */
+
+      if (REGNO (x) >= (unsigned) regs->num)
+       return 0;
+
        if (regs->array[REGNO (x)].set_in_loop < 0)
         return 2;
  
@@ -3581,17 +3646,15 @@ struct prefetch_info
    HOST_WIDE_INT index;
    HOST_WIDE_INT stride;                /* Prefetch stride in bytes in each
                                    iteration.  */
-  unsigned int bytes_accessed; /* Sum of sizes of all acceses to this
+  unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
                                    prefetch area in one iteration.  */
    unsigned int total_bytes;    /* Total bytes loop will access in this block.
                                    This is set only for loops with known
                                    iteration counts and is 0xffffffff
                                    otherwise.  */
+  int prefetch_in_loop;                /* Number of prefetch insns in loop.  */
+  int prefetch_before_loop;    /* Number of prefetch insns before loop.  */
    unsigned int write : 1;      /* 1 for read/write prefetches.  */
-  unsigned int prefetch_in_loop : 1;
-                               /* 1 for those chosen for prefetching.  */
-  unsigned int prefetch_before_loop : 1;
-                               /* 1 for those chosen for prefetching.  */
  };
  
  /* Data used by check_store function.  */
@@ -3620,7 +3683,7 @@ check_store (x, pat, data)
  \f
  /* Like rtx_equal_p, but attempts to swap commutative operands.  This is
     important to get some addresses combined.  Later more sophisticated
-   transformations can be added when necesary.
+   transformations can be added when necessary.
  
     ??? Same trick with swapping operand is done at several other places.
     It can be nice to develop some common way to handle this.  */
@@ -3743,9 +3806,9 @@ remove_constant_addition (x)
        /* In case our parameter was constant, remove extra zero from the
          expression.  */
        if (XEXP (exp, 0) == const0_rtx)
-        *x = XEXP (exp, 1);
+       *x = XEXP (exp, 1);
        else if (XEXP (exp, 1) == const0_rtx)
-        *x = XEXP (exp, 0);
+       *x = XEXP (exp, 0);
      }
  
    return addval;
@@ -3778,7 +3841,9 @@ emit_prefetch_instructions (loop)
    int num_prefetches = 0;
    int num_real_prefetches = 0;
    int num_real_write_prefetches = 0;
-  int ahead;
+  int num_prefetches_before = 0;
+  int num_write_prefetches_before = 0;
+  int ahead = 0;
    int i;
    struct iv_class *bl;
    struct induction *iv;
@@ -3886,29 +3951,29 @@ emit_prefetch_instructions (loop)
             {
               stride = INTVAL (iv->mult_val) * basestride;
               if (stride < 0)
-               {
+               {
                   stride = -stride;
                   stride_sign = -1;
-               }
+               }
  
               /* On some targets, reversed order prefetches are not
-                worthwhile.  */
+                worthwhile.  */
               if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
                 ignore_reason = "reversed order stride";
  
               /* Prefetch of accesses with an extreme stride might not be
-                worthwhile, either.  */
+                worthwhile, either.  */
               else if (PREFETCH_NO_EXTREME_STRIDE
                        && stride > PREFETCH_EXTREME_STRIDE)
                 ignore_reason = "extreme stride";
  
               /* Ignore GIVs with varying add values; we can't predict the
-                value for the next iteration.  */
+                value for the next iteration.  */
               else if (!loop_invariant_p (loop, iv->add_val))
                 ignore_reason = "giv has varying add value";
  
               /* Ignore GIVs in the nested loops; they ought to have been
-                handled already.  */
+                handled already.  */
               else if (iv->maybe_multiple)
                 ignore_reason = "giv is in nested loop";
             }
@@ -3930,7 +3995,6 @@ emit_prefetch_instructions (loop)
           address = simplify_gen_binary (PLUS, Pmode, temp, address);
           index = remove_constant_addition (&address);
  
-         index += size;
           d.mem_write = 0;
           d.mem_address = *iv->location;
  
@@ -3938,6 +4002,13 @@ emit_prefetch_instructions (loop)
              not dirtying the cache pages.  */
           if (PREFETCH_CONDITIONAL || iv->always_executed)
             note_stores (PATTERN (iv->insn), check_store, &d);
+         else
+           {
+             if (loop_dump_stream)
+               fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
+                        INSN_UID (iv->insn), "in conditional code.");
+             continue;
+           }
  
           /* Attempt to find another prefetch to the same array and see if we
              can merge this one.  */
@@ -4004,7 +4075,7 @@ emit_prefetch_instructions (loop)
        /* Attempt to calculate the total number of bytes fetched by all
          iterations of the loop.  Avoid overflow.  */
        if (LOOP_INFO (loop)->n_iterations
-          && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
+         && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
               >= LOOP_INFO (loop)->n_iterations))
         info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
        else
@@ -4016,14 +4087,14 @@ emit_prefetch_instructions (loop)
        if (PREFETCH_ONLY_DENSE_MEM)
         if (density * 256 > PREFETCH_DENSE_MEM * 100
             && (info[i].total_bytes / PREFETCH_BLOCK
-               >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
+               >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
           {
             info[i].prefetch_before_loop = 1;
             info[i].prefetch_in_loop
               = (info[i].total_bytes / PREFETCH_BLOCK
-                > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
+                > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
           }
-        else
+       else
           {
             info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
             if (loop_dump_stream)
@@ -4034,19 +4105,54 @@ emit_prefetch_instructions (loop)
        else
         info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
  
-      if (info[i].prefetch_in_loop)
+      /* Find how many prefetch instructions we'll use within the loop.  */
+      if (info[i].prefetch_in_loop != 0)
         {
-         num_real_prefetches += ((info[i].stride + PREFETCH_BLOCK - 1)
+         info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
                                   / PREFETCH_BLOCK);
+         num_real_prefetches += info[i].prefetch_in_loop;
           if (info[i].write)
-           num_real_write_prefetches
-             += (info[i].stride + PREFETCH_BLOCK - 1) / PREFETCH_BLOCK;
+           num_real_write_prefetches += info[i].prefetch_in_loop;
         }
      }
  
-  if (loop_dump_stream)
+  /* Determine how many iterations ahead to prefetch within the loop, based
+     on how many prefetches we currently expect to do within the loop.  */
+  if (num_real_prefetches != 0)
      {
-      for (i = 0; i < num_prefetches; i++)
+      if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
+       {
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
+                    SIMULTANEOUS_PREFETCHES, num_real_prefetches);
+         num_real_prefetches = 0, num_real_write_prefetches = 0;
+       }
+    }
+  /* We'll also use AHEAD to determine how many prefetch instructions to
+     emit before a loop, so don't leave it zero.  */
+  if (ahead == 0)
+    ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
+
+  for (i = 0; i < num_prefetches; i++)
+    {
+      /* Update if we've decided not to prefetch anything within the loop.  */
+      if (num_real_prefetches == 0)
+       info[i].prefetch_in_loop = 0;
+
+      /* Find how many prefetch instructions we'll use before the loop.  */
+      if (info[i].prefetch_before_loop != 0)
+       {
+         int n = info[i].total_bytes / PREFETCH_BLOCK;
+         if (n > ahead)
+           n = ahead;
+         info[i].prefetch_before_loop = n;
+         num_prefetches_before += n;
+         if (info[i].write)
+           num_write_prefetches_before += n;
+       }
+
+      if (loop_dump_stream)
         {
           if (info[i].prefetch_in_loop == 0
               && info[i].prefetch_before_loop == 0)
@@ -4054,9 +4160,9 @@ emit_prefetch_instructions (loop)
           fprintf (loop_dump_stream, "Prefetch insn: %d",
                    INSN_UID (info[i].giv->insn));
           fprintf (loop_dump_stream,
-                  "; in loop: %s; before: %s; %s\n",
-                  info[i].prefetch_in_loop ? "yes" : "no",
-                  info[i].prefetch_before_loop ? "yes" : "no",
+                  "; in loop: %d; before: %d; %s\n",
+                  info[i].prefetch_in_loop,
+                  info[i].prefetch_before_loop,
                    info[i].write ? "read/write" : "read only");
           fprintf (loop_dump_stream,
                    " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
@@ -4070,93 +4176,101 @@ emit_prefetch_instructions (loop)
           print_rtl (loop_dump_stream, info[i].base_address);
           fprintf (loop_dump_stream, "\n");
         }
-
-      fprintf (loop_dump_stream, "Real prefetches needed: %d (write: %d)\n",
-              num_real_prefetches, num_real_write_prefetches);
      }
  
-  if (!num_real_prefetches)
-    return;
-
-  ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches;
-
-  if (ahead == 0)
+  if (num_real_prefetches + num_prefetches_before > 0)
      {
+      /* Record that this loop uses prefetch instructions.  */
+      LOOP_INFO (loop)->has_prefetch = 1;
+
        if (loop_dump_stream)
-       fprintf (loop_dump_stream,
-                "Prefetch: ignoring loop: ahead is zero; %d < %d\n",
-                SIMULTANEOUS_PREFETCHES, num_real_prefetches);
-      return;
+       {
+         fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
+                  num_real_prefetches, num_real_write_prefetches);
+         fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
+                  num_prefetches_before, num_write_prefetches_before);
+       }
      }
  
    for (i = 0; i < num_prefetches; i++)
      {
-      if (info[i].prefetch_in_loop)
-       {
-         int y;
+      int y;
  
-         for (y = 0; y < ((info[i].stride + PREFETCH_BLOCK - 1)
-                          / PREFETCH_BLOCK); y++)
+      for (y = 0; y < info[i].prefetch_in_loop; y++)
+       {
+         rtx loc = copy_rtx (*info[i].giv->location);
+         rtx insn;
+         int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
+         rtx before_insn = info[i].giv->insn;
+         rtx prev_insn = PREV_INSN (info[i].giv->insn);
+         rtx seq;
+
+         /* We can save some effort by offsetting the address on
+            architectures with offsettable memory references.  */
+         if (offsettable_address_p (0, VOIDmode, loc))
+           loc = plus_constant (loc, bytes_ahead);
+         else
             {
-             rtx loc = copy_rtx (*info[i].giv->location);
-             rtx insn;
-             int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
-             rtx before_insn = info[i].giv->insn;
-             rtx prev_insn = PREV_INSN (info[i].giv->insn);
-
-             /* We can save some effort by offsetting the address on
-                architectures with offsettable memory references.  */
-             if (offsettable_address_p (0, VOIDmode, loc))
-               loc = plus_constant (loc, bytes_ahead);
-             else
-               {
-                 rtx reg = gen_reg_rtx (Pmode);
-                 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
-                                               GEN_INT (bytes_ahead), reg,
-                                               0, before_insn);
-                 loc = reg;
-               }
+             rtx reg = gen_reg_rtx (Pmode);
+             loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
+                                           GEN_INT (bytes_ahead), reg,
+                                           0, before_insn);
+             loc = reg;
+           }
  
-             /* Make sure the address operand is valid for prefetch.  */
-             if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
-                   (loc,
-                    insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
-               loc = force_reg (Pmode, loc);
-             emit_insn_before (gen_prefetch (loc, GEN_INT (info[i].write),
-                                             GEN_INT (3)),
-                               before_insn);
-
-             /* Check all insns emitted and record the new GIV
-                information.  */
-             insn = NEXT_INSN (prev_insn);
-             while (insn != before_insn)
-               {
-                 insn = check_insn_for_givs (loop, insn,
-                                             info[i].giv->always_executed,
-                                             info[i].giv->maybe_multiple);
-                 insn = NEXT_INSN (insn);
-               }
+         start_sequence ();
+         /* Make sure the address operand is valid for prefetch.  */
+         if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
+                 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
+           loc = force_reg (Pmode, loc);
+         emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
+                                  GEN_INT (3)));
+         seq = get_insns ();
+         end_sequence ();
+         emit_insn_before (seq, before_insn);
+
+         /* Check all insns emitted and record the new GIV
+            information.  */
+         insn = NEXT_INSN (prev_insn);
+         while (insn != before_insn)
+           {
+             insn = check_insn_for_givs (loop, insn,
+                                         info[i].giv->always_executed,
+                                         info[i].giv->maybe_multiple);
+             insn = NEXT_INSN (insn);
             }
         }
  
-      if (PREFETCH_BEFORE_LOOP && info[i].prefetch_before_loop)
+      if (PREFETCH_BEFORE_LOOP)
         {
-         int y;
-
-         /* Emit INSNs before the loop to fetch the first cache lines.  */
-         for (y = 0;
-              (!info[i].prefetch_in_loop || y < ahead)
-              && y * PREFETCH_BLOCK < (int) info[i].total_bytes; y ++)
+         /* Emit insns before the loop to fetch the first cache lines or,
+            if we're not prefetching within the loop, everything we expect
+            to need.  */
+         for (y = 0; y < info[i].prefetch_before_loop; y++)
             {
               rtx reg = gen_reg_rtx (Pmode);
               rtx loop_start = loop->start;
+             rtx init_val = info[i].class->initial_value;
               rtx add_val = simplify_gen_binary (PLUS, Pmode,
                                                  info[i].giv->add_val,
                                                  GEN_INT (y * PREFETCH_BLOCK));
  
-             loop_iv_add_mult_emit_before (loop, info[i].class->initial_value,
+             /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
+                non-constant INIT_VAL to have the same mode as REG, which
+                in this case we know to be Pmode.  */
+             if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
+               {
+                 rtx seq;
+
+                 start_sequence ();
+                 init_val = convert_to_mode (Pmode, init_val, 0);
+                 seq = get_insns ();
+                 end_sequence ();
+                 loop_insn_emit_before (loop, 0, loop_start, seq);
+               }
+             loop_iv_add_mult_emit_before (loop, init_val,
                                             info[i].giv->mult_val,
-                                           add_val, reg, 0, loop_start);
+                                           add_val, reg, 0, loop_start);
               emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
                                               GEN_INT (3)),
                                 loop_start);
@@ -4179,7 +4293,7 @@ emit_prefetch_instructions (loop)
  
  static rtx note_insn;
  
-/* Dummy register to have non-zero DEST_REG for DEST_ADDR type givs.  */
+/* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs.  */
  
  static rtx addr_placeholder;
  
@@ -4214,8 +4328,8 @@ static rtx addr_placeholder;
     LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
     callback.
  
-   NOT_EVERY_ITERATION if current insn is not executed at least once for every
-   loop iteration except for the last one.
+   NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
+   least once for every loop iteration except for the last one.
  
     MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
     loop iteration.
@@ -4225,8 +4339,6 @@ for_each_insn_in_loop (loop, fncall)
       struct loop *loop;
       loop_insn_callback fncall;
  {
-  /* This is 1 if current insn is not executed at least once for every loop
-     iteration.  */
    int not_every_iteration = 0;
    int maybe_multiple = 0;
    int past_loop_latch = 0;
@@ -4238,8 +4350,7 @@ for_each_insn_in_loop (loop, fncall)
    if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
      maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
  
-  /* Scan through loop to find all possible bivs.  */
-
+  /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE.  */
    for (p = next_insn_in_loop (loop, loop->scan_start);
         p != NULL_RTX;
         p = next_insn_in_loop (loop, p))
@@ -4296,9 +4407,9 @@ for_each_insn_in_loop (loop, fncall)
           This can be any kind of jump, since we want to know if insns
           will be executed if the loop is executed.  */
           && !(JUMP_LABEL (p) == loop->top
-            && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
-                 && any_uncondjump_p (p))
-                || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
+              && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
+                   && any_uncondjump_p (p))
+                  || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
         {
           rtx label = 0;
  
@@ -4410,7 +4521,7 @@ loop_bivs_find (loop)
  }
  
  
-/* Determine how BIVS are initialised by looking through pre-header
+/* Determine how BIVS are initialized by looking through pre-header
     extended basic block.  */
  static void
  loop_bivs_init_find (loop)
@@ -4554,7 +4665,7 @@ loop_givs_check (loop)
  }
  
  
-/* Return non-zero if it is possible to eliminate the biv BL provided
+/* Return nonzero if it is possible to eliminate the biv BL provided
     all givs are reduced.  This is possible if either the reg is not
     used outside the loop, or we can compute what its final value will
     be.  */
@@ -4714,7 +4825,7 @@ loop_givs_reduce (loop, bl)
               rtx insert_before;
  
               if (! auto_inc_opt)
-               insert_before = tv->insn;
+               insert_before = NEXT_INSN (tv->insn);
               else if (auto_inc_opt == 1)
                 insert_before = NEXT_INSN (v->insn);
               else
@@ -4844,10 +4955,22 @@ loop_givs_rescan (loop, bl, reg_map)
         }
        else
         {
+         rtx original_insn = v->insn;
+         rtx note;
+
           /* Not replaceable; emit an insn to set the original giv reg from
              the reduced giv, same as above.  */
-         loop_insn_emit_after (loop, 0, v->insn,
-                               gen_move_insn (v->dest_reg, v->new_reg));
+         v->insn = loop_insn_emit_after (loop, 0, original_insn,
+                                         gen_move_insn (v->dest_reg,
+                                                        v->new_reg));
+
+         /* The original insn may have a REG_EQUAL note.  This note is
+            now incorrect and may result in invalid substitutions later.
+            The original insn is dead, but may be part of a libcall
+            sequence, which doesn't seem worth the bother of handling.  */
+         note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
+         if (note)
+           remove_note (original_insn, note);
         }
  
        /* When a loop is reversed, givs which depend on the reversed
@@ -5037,7 +5160,7 @@ strength_reduce (loop, flags)
        return;
      }
  
-  /* Determine how BIVS are initialised by looking through pre-header
+  /* Determine how BIVS are initialized by looking through pre-header
       extended basic block.  */
    loop_bivs_init_find (loop);
  
@@ -5278,7 +5401,7 @@ strength_reduce (loop, flags)
       collected.  Always unroll loops that would be as small or smaller
       unrolled than when rolled.  */
    if ((flags & LOOP_UNROLL)
-      || (!(flags & LOOP_FIRST_PASS)
+      || ((flags & LOOP_AUTO_UNROLL)
           && loop_info->n_iterations > 0
           && unrolled_insn_copies <= insn_count))
      unroll_loop (loop, insn_count, 1);
@@ -5296,7 +5419,7 @@ strength_reduce (loop, flags)
        unsigned HOST_WIDE_INT n
         = loop_info->n_iterations / loop_info->unroll_number;
        if (n > 1)
-       predict_insn (PREV_INSN (loop->end), PRED_LOOP_ITERATIONS,
+       predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS,
                       REG_BR_PROB_BASE - REG_BR_PROB_BASE / n);
      }
  
@@ -5485,7 +5608,7 @@ valid_initial_value_p (x, insn, call_seen, loop_start)
     as a possible giv.  INSN is the insn whose pattern X comes from.
     NOT_EVERY_ITERATION is 1 if the insn might not be executed during
     every loop iteration.  MAYBE_MULTIPLE is 1 if the insn might be executed
-   more thanonce in each loop iteration.  */
+   more than once in each loop iteration.  */
  
  static void
  find_mem_givs (loop, x, insn, not_every_iteration, maybe_multiple)
@@ -5690,7 +5813,7 @@ record_giv (loop, v, insn, src_reg, dest_reg, mult_val, add_val, ext_val,
    rtx set = single_set (insn);
    rtx temp;
  
-  /* Attempt to prove constantness of the values.  Don't let simplity_rtx
+  /* Attempt to prove constantness of the values.  Don't let simplify_rtx
       undo the MULT canonicalization that we performed earlier.  */
    temp = simplify_rtx (add_val);
    if (temp
@@ -5775,7 +5898,10 @@ record_giv (loop, v, insn, src_reg, dest_reg, mult_val, add_val, ext_val,
      abort ();
  
    if (type == DEST_ADDR)
-    v->replaceable = 1;
+    {
+      v->replaceable = 1;
+      v->not_replaceable = 0;
+    }
    else
      {
        /* The giv can be replaced outright by the reduced register only if all
@@ -5810,6 +5936,7 @@ record_giv (loop, v, insn, src_reg, dest_reg, mult_val, add_val, ext_val,
              using this biv anyways.  */
  
           v->replaceable = 1;
+         v->not_replaceable = 0;
           for (b = bl->biv; b; b = b->next_iv)
             {
               if (INSN_UID (b->insn) >= max_uid_for_loop
@@ -5885,12 +6012,8 @@ check_final_value (loop, v)
       const struct loop *loop;
       struct induction *v;
  {
-  struct loop_ivs *ivs = LOOP_IVS (loop);
-  struct iv_class *bl;
    rtx final_value = 0;
  
-  bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
-
    /* DEST_ADDR givs will never reach here, because they are always marked
       replaceable above in record_giv.  */
  
@@ -5913,13 +6036,15 @@ check_final_value (loop, v)
  #endif
  
    if ((final_value = final_giv_value (loop, v))
-      && (v->always_computable || last_use_this_basic_block (v->dest_reg, v->insn)))
+      && (v->always_executed
+         || last_use_this_basic_block (v->dest_reg, v->insn)))
      {
        int biv_increment_seen = 0, before_giv_insn = 0;
        rtx p = v->insn;
        rtx last_giv_use;
  
        v->replaceable = 1;
+      v->not_replaceable = 0;
  
        /* When trying to determine whether or not a biv increment occurs
          during the lifetime of the giv, we can ignore uses of the variable
@@ -6445,7 +6570,7 @@ general_induction_var (loop, x, src_reg, add_val, mult_val, ext_val,
     expression that is neither invariant nor a biv or giv), this routine
     returns 0.
  
-   For a non-zero return, the result will have a code of CONST_INT, USE,
+   For a nonzero return, the result will have a code of CONST_INT, USE,
     REG (for a BIV), PLUS, or MULT.  No other codes will occur.
  
     *BENEFIT will be incremented by the benefit of any sub-giv encountered.  */
@@ -6632,7 +6757,7 @@ simplify_giv_expr (loop, x, ext_val, benefit)
                                                                     arg1)),
                                         ext_val, benefit);
             }
-         /* Porpagate the MULT expressions to the intermost nodes.  */
+         /* Propagate the MULT expressions to the intermost nodes.  */
           else if (GET_CODE (arg0) == PLUS)
             {
               /* (invar_0 + invar_1) * invar_2.  Distribute.  */
@@ -6798,7 +6923,7 @@ simplify_giv_expr (loop, x, ext_val, benefit)
                       return simplify_giv_expr (loop, m->match->set_dest,
                                                 ext_val, benefit);
  
-                   /* If consec is non-zero, this is a member of a group of
+                   /* If consec is nonzero, this is a member of a group of
                        instructions that were moved together.  We handle this
                        case only to the point of seeking to the last insn and
                        looking for a REG_EQUAL.  Fail if we don't find one.  */
@@ -7278,21 +7403,8 @@ combine_givs_p (g1, g2)
       the expression of G2 in terms of G1 can be used.  */
    if (ret != NULL_RTX
        && g2->giv_type == DEST_ADDR
-      && memory_address_p (GET_MODE (g2->mem), ret)
-      /* ??? Looses, especially with -fforce-addr, where *g2->location
-        will always be a register, and so anything more complicated
-        gets discarded.  */
-#if 0
-#ifdef ADDRESS_COST
-      && ADDRESS_COST (tem) <= ADDRESS_COST (*g2->location)
-#else
-      && rtx_cost (tem, MEM) <= rtx_cost (*g2->location, MEM)
-#endif
-#endif
-      )
-    {
-      return ret;
-    }
+      && memory_address_p (GET_MODE (g2->mem), ret))
+    return ret;
  
    return NULL_RTX;
  }
@@ -7318,7 +7430,7 @@ check_ext_dependent_givs (bl, loop_info)
       constants in order to be certain of no overflow.  */
    /* ??? An unknown iteration count with an increment of +-1
       combined with friendly exit tests of against an invariant
-     value is also ameanable to optimization.  Not implemented.  */
+     value is also amenable to optimization.  Not implemented.  */
    if (loop_info->n_iterations > 0
        && bl->initial_value
        && GET_CODE (bl->initial_value) == CONST_INT
@@ -7340,7 +7452,7 @@ check_ext_dependent_givs (bl, loop_info)
         neg_incr = 1, abs_incr = -abs_incr;
        total_incr = abs_incr * loop_info->n_iterations;
  
-      /* Check for host arithmatic overflow.  */
+      /* Check for host arithmetic overflow.  */
        if (total_incr / loop_info->n_iterations == abs_incr)
         {
           unsigned HOST_WIDE_INT u_max;
@@ -7353,7 +7465,7 @@ check_ext_dependent_givs (bl, loop_info)
  
           /* Check zero extension of biv ok.  */
           if (start_val >= 0
-             /* Check for host arithmatic overflow.  */
+             /* Check for host arithmetic overflow.  */
               && (neg_incr
                   ? u_end_val < u_start_val
                   : u_end_val > u_start_val)
@@ -7371,7 +7483,7 @@ check_ext_dependent_givs (bl, loop_info)
              keep this fact in mind -- myself included on occasion.
              So leave alone with the signed overflow optimizations.  */
           if (start_val >= -s_max - 1
-             /* Check for host arithmatic overflow.  */
+             /* Check for host arithmetic overflow.  */
               && (neg_incr
                   ? s_end_val < start_val
                   : s_end_val > start_val)
@@ -7700,7 +7812,7 @@ gen_add_mult (b, m, a, reg)
    result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
    if (reg != result)
      emit_move_insn (reg, result);
-  seq = gen_sequence ();
+  seq = get_insns ();
    end_sequence ();
  
    return seq;
@@ -7714,24 +7826,29 @@ loop_regs_update (loop, seq)
       const struct loop *loop ATTRIBUTE_UNUSED;
       rtx seq;
  {
+  rtx insn;
+
    /* Update register info for alias analysis.  */
  
-  if (GET_CODE (seq) == SEQUENCE)
+  if (seq == NULL_RTX)
+    return;
+
+  if (INSN_P (seq))
      {
-      int i;
-      for (i = 0; i < XVECLEN (seq, 0); ++i)
+      insn = seq;
+      while (insn != NULL_RTX)
         {
-         rtx set = single_set (XVECEXP (seq, 0, i));
+         rtx set = single_set (insn);
+
           if (set && GET_CODE (SET_DEST (set)) == REG)
             record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
+
+         insn = NEXT_INSN (insn);
         }
      }
-  else
-    {
-      if (GET_CODE (seq) == SET
-         && GET_CODE (SET_DEST (seq)) == REG)
-       record_base_value (REGNO (SET_DEST (seq)), SET_SRC (seq), 0);
-    }
+  else if (GET_CODE (seq) == SET
+          && GET_CODE (SET_DEST (seq)) == REG)
+    record_base_value (REGNO (SET_DEST (seq)), SET_SRC (seq), 0);
  }
  
  
@@ -7854,16 +7971,20 @@ iv_add_mult_cost (b, m, a, reg)
  }
  \f
  /* Test whether A * B can be computed without
-   an actual multiply insn.  Value is 1 if so.  */
+   an actual multiply insn.  Value is 1 if so.
+
+  ??? This function stinks because it generates a ton of wasted RTL
+  ??? and as a result fragments GC memory to no end.  There are other
+  ??? places in the compiler which are invoked a lot and do the same
+  ??? thing, generate wasted RTL just to see if something is possible.  */
  
  static int
  product_cheap_p (a, b)
       rtx a;
       rtx b;
  {
-  int i;
    rtx tmp;
-  int win = 1;
+  int win, n_insns;
  
    /* If only one is constant, make it B.  */
    if (GET_CODE (a) == CONST_INT)
@@ -7883,31 +8004,31 @@ product_cheap_p (a, b)
  
    start_sequence ();
    expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
-  tmp = gen_sequence ();
+  tmp = get_insns ();
    end_sequence ();
  
-  if (GET_CODE (tmp) == SEQUENCE)
+  win = 1;
+  if (INSN_P (tmp))
      {
-      if (XVEC (tmp, 0) == 0)
-       win = 1;
-      else if (XVECLEN (tmp, 0) > 3)
-       win = 0;
-      else
-       for (i = 0; i < XVECLEN (tmp, 0); i++)
-         {
-           rtx insn = XVECEXP (tmp, 0, i);
-
-           if (GET_CODE (insn) != INSN
-               || (GET_CODE (PATTERN (insn)) == SET
-                   && GET_CODE (SET_SRC (PATTERN (insn))) == MULT)
-               || (GET_CODE (PATTERN (insn)) == PARALLEL
-                   && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == SET
-                   && GET_CODE (SET_SRC (XVECEXP (PATTERN (insn), 0, 0))) == MULT))
-             {
-               win = 0;
-               break;
-             }
-         }
+      n_insns = 0;
+      while (tmp != NULL_RTX)
+       {
+         rtx next = NEXT_INSN (tmp);
+
+         if (++n_insns > 3
+             || GET_CODE (tmp) != INSN
+             || (GET_CODE (PATTERN (tmp)) == SET
+                 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
+             || (GET_CODE (PATTERN (tmp)) == PARALLEL
+                 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
+                 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
+           {
+             win = 0;
+             break;
+           }
+
+         tmp = next;
+       }
      }
    else if (GET_CODE (tmp) == SET
            && GET_CODE (SET_SRC (tmp)) == MULT)
@@ -8195,12 +8316,13 @@ check_dbra_loop (loop, insn_count)
  
        if ((num_nonfixed_reads <= 1
            && ! loop_info->has_nonconst_call
+          && ! loop_info->has_prefetch
            && ! loop_info->has_volatile
            && reversible_mem_store
            && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
                + num_unmoved_movables (loop) + compare_and_branch == insn_count)
            && (bl == ivs->list && bl->next == 0))
-         || no_use_except_counting)
+         || (no_use_except_counting && ! loop_info->has_prefetch))
         {
           rtx tem;
  
@@ -8397,7 +8519,7 @@ check_dbra_loop (loop, insn_count)
                  create a sequence to hold all the insns from expand_inc.  */
               start_sequence ();
               expand_inc (reg, new_add_val);
-             tem = gen_sequence ();
+             tem = get_insns ();
               end_sequence ();
  
               p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
@@ -8438,7 +8560,7 @@ check_dbra_loop (loop, insn_count)
               emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
                                        GET_MODE (reg), 0,
                                        XEXP (jump_label, 0));
-             tem = gen_sequence ();
+             tem = get_insns ();
               end_sequence ();
               emit_jump_insn_before (tem, loop_end);
  
@@ -8523,7 +8645,7 @@ check_dbra_loop (loop, insn_count)
  /* Verify whether the biv BL appears to be eliminable,
     based on the insns in the loop that refer to it.
  
-   If ELIMINATE_P is non-zero, actually do the elimination.
+   If ELIMINATE_P is nonzero, actually do the elimination.
  
     THRESHOLD and INSN_COUNT are from loop_optimize and are used to
     determine whether invariant insns should be placed inside or at the
@@ -8548,11 +8670,12 @@ maybe_eliminate_biv (loop, bl, eliminate_p, threshold, insn_count)
        enum rtx_code code = GET_CODE (p);
        basic_block where_bb = 0;
        rtx where_insn = threshold >= insn_count ? 0 : p;
+      rtx note;
  
        /* If this is a libcall that sets a giv, skip ahead to its end.  */
        if (GET_RTX_CLASS (code) == 'i')
         {
-         rtx note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
+         note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
  
           if (note)
             {
@@ -8570,6 +8693,8 @@ maybe_eliminate_biv (loop, bl, eliminate_p, threshold, insn_count)
                 }
             }
         }
+
+      /* Closely examine the insn if the biv is mentioned.  */
        if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
           && reg_mentioned_p (reg, PATTERN (p))
           && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
@@ -8581,6 +8706,12 @@ maybe_eliminate_biv (loop, bl, eliminate_p, threshold, insn_count)
                      bl->regno, INSN_UID (p));
           break;
         }
+
+      /* If we are eliminating, kill REG_EQUAL notes mentioning the biv.  */
+      if (eliminate_p
+         && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
+         && reg_mentioned_p (reg, XEXP (note, 0)))
+       remove_note (p, note);
      }
  
    if (p == loop->end)
@@ -8595,7 +8726,7 @@ maybe_eliminate_biv (loop, bl, eliminate_p, threshold, insn_count)
  }
  \f
  /* INSN and REFERENCE are instructions in the same insn chain.
-   Return non-zero if INSN is first.  */
+   Return nonzero if INSN is first.  */
  
  int
  loop_insn_first_p (insn, reference)
@@ -8608,9 +8739,9 @@ loop_insn_first_p (insn, reference)
        /* Start with test for not first so that INSN == REFERENCE yields not
           first.  */
        if (q == insn || ! p)
-        return 0;
+       return 0;
        if (p == reference || ! q)
-        return 1;
+       return 1;
  
        /* Either of P or Q might be a NOTE.  Notes have the same LUID as the
           previous insn, hence the <= comparison below does not work if
@@ -8628,7 +8759,7 @@ loop_insn_first_p (insn, reference)
      }
  }
  
-/* We are trying to eliminate BIV in INSN using GIV.  Return non-zero if
+/* We are trying to eliminate BIV in INSN using GIV.  Return nonzero if
     the offset that we have to take into account due to auto-increment /
     div derivation is zero.  */
  static int
@@ -8655,10 +8786,10 @@ biv_elimination_giv_has_0_offset (biv, giv, insn)
  
     If BIV does not appear in X, return 1.
  
-   If ELIMINATE_P is non-zero, actually do the elimination.
+   If ELIMINATE_P is nonzero, actually do the elimination.
     WHERE_INSN/WHERE_BB indicate where extra insns should be added.
     Depending on how many items have been moved out of the loop, it
-   will either be before INSN (when WHERE_INSN is non-zero) or at the
+   will either be before INSN (when WHERE_INSN is nonzero) or at the
     start of the loop (when WHERE_INSN is zero).  */
  
  static int
@@ -8824,6 +8955,22 @@ maybe_eliminate_biv_1 (loop, x, insn, bl, eliminate_p, where_bb, where_insn)
                 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
                   continue;
  
+               /* Don't eliminate if the linear combination that makes up
+                  the giv overflows when it is applied to ARG.  */
+               if (GET_CODE (arg) == CONST_INT)
+                 {
+                   rtx add_val;
+
+                   if (GET_CODE (v->add_val) == CONST_INT)
+                     add_val = v->add_val;
+                   else
+                     add_val = const0_rtx;
+
+                   if (const_mult_add_overflow_p (arg, v->mult_val,
+                                                  add_val, mode, 1))
+                     continue;
+                 }
+
                 if (! eliminate_p)
                   return 1;
  
@@ -8834,13 +8981,10 @@ maybe_eliminate_biv_1 (loop, x, insn, bl, eliminate_p, where_bb, where_insn)
                    the derived constant can be directly placed in the COMPARE,
                    do so.  */
                 if (GET_CODE (arg) == CONST_INT
-                   && GET_CODE (v->mult_val) == CONST_INT
                     && GET_CODE (v->add_val) == CONST_INT)
                   {
-                   validate_change (insn, &XEXP (x, arg_operand),
-                                    GEN_INT (INTVAL (arg)
-                                             * INTVAL (v->mult_val)
-                                             + INTVAL (v->add_val)), 1);
+                   tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
+                                          v->add_val, mode, 1);
                   }
                 else
                   {
@@ -8849,8 +8993,10 @@ maybe_eliminate_biv_1 (loop, x, insn, bl, eliminate_p, where_bb, where_insn)
                     loop_iv_add_mult_emit_before (loop, arg,
                                                   v->mult_val, v->add_val,
                                                   tem, where_bb, where_insn);
-                   validate_change (insn, &XEXP (x, arg_operand), tem, 1);
                   }
+
+               validate_change (insn, &XEXP (x, arg_operand), tem, 1);
+
                 if (apply_change_group ())
                   return 1;
               }
@@ -9113,14 +9259,14 @@ update_reg_last_use (x, insn)
     If the condition cannot be understood, or is an inequality floating-point
     comparison which needs to be reversed, 0 will be returned.
  
-   If REVERSE is non-zero, then reverse the condition prior to canonizing it.
+   If REVERSE is nonzero, then reverse the condition prior to canonizing it.
  
-   If EARLIEST is non-zero, it is a pointer to a place where the earliest
+   If EARLIEST is nonzero, it is a pointer to a place where the earliest
     insn used in locating the condition was found.  If a replacement test
     of the condition is desired, it should be placed in front of that
     insn and we will be sure that the inputs are still valid.
  
-   If WANT_REG is non-zero, we wish the condition to be relative to that
+   If WANT_REG is nonzero, we wish the condition to be relative to that
     register, if possible.  Therefore, do not canonicalize the condition
     further.  */
  
@@ -9162,7 +9308,7 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
          && op1 == CONST0_RTX (GET_MODE (op0))
          && op0 != want_reg)
      {
-      /* Set non-zero when we find something of interest.  */
+      /* Set nonzero when we find something of interest.  */
        rtx x = 0;
  
  #ifdef HAVE_cc0
@@ -9214,6 +9360,9 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
        if (set)
         {
           enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
+#ifdef FLOAT_STORE_FLAG_VALUE
+         REAL_VALUE_TYPE fsfv;
+#endif
  
           /* ??? We may not combine comparisons done in a CCmode with
              comparisons not done in a CCmode.  This is to aid targets
@@ -9241,8 +9390,8 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
  #ifdef FLOAT_STORE_FLAG_VALUE
                      || (code == LT
                          && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
-                        && (REAL_VALUE_NEGATIVE
-                            (FLOAT_STORE_FLAG_VALUE (inner_mode))))
+                        && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
+                            REAL_VALUE_NEGATIVE (fsfv)))
  #endif
                      ))
                    && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'))
@@ -9261,8 +9410,8 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
  #ifdef FLOAT_STORE_FLAG_VALUE
                      || (code == GE
                          && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
-                        && (REAL_VALUE_NEGATIVE
-                            (FLOAT_STORE_FLAG_VALUE (inner_mode))))
+                        && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
+                            REAL_VALUE_NEGATIVE (fsfv)))
  #endif
                      ))
                    && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'
@@ -9353,11 +9502,9 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
         }
      }
  
-#ifdef HAVE_cc0
    /* Never return CC0; return zero instead.  */
-  if (op0 == cc0_rtx)
+  if (CC0_P (op0))
      return 0;
-#endif
  
    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
  }
@@ -9367,7 +9514,7 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
     inequality floating-point comparison which needs to be reversed, 0 will
     be returned.
  
-   If EARLIEST is non-zero, it is a pointer to a place where the earliest
+   If EARLIEST is nonzero, it is a pointer to a place where the earliest
     insn used in locating the condition was found.  If a replacement test
     of the condition is desired, it should be placed in front of that
     insn and we will be sure that the inputs are still valid.  */
@@ -9594,6 +9741,25 @@ loop_regs_scan (loop, extra_size)
  
        if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
         memset (last_set, 0, regs->num * sizeof (rtx));
+
+      /* Invalidate all registers used for function argument passing.
+        We check rtx_varies_p for the same reason as below, to allow
+        optimizing PIC calculations.  */
+      if (GET_CODE (insn) == CALL_INSN)
+       {
+         rtx link;
+         for (link = CALL_INSN_FUNCTION_USAGE (insn); 
+              link; 
+              link = XEXP (link, 1))
+           {
+             rtx op, reg;
+
+             if (GET_CODE (op = XEXP (link, 0)) == USE
+                 && GET_CODE (reg = XEXP (op, 0)) == REG
+                 && rtx_varies_p (reg, 1))
+               regs->array[REGNO (reg)].may_not_optimize = 1;
+           }
+       }
      }
  
    /* Invalidate all hard registers clobbered by calls.  With one exception:
@@ -9603,11 +9769,11 @@ loop_regs_scan (loop, extra_size)
    if (LOOP_INFO (loop)->has_call)
      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
        if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
-          && rtx_varies_p (gen_rtx_REG (Pmode, i), /*for_alias=*/1))
-        {
-          regs->array[i].may_not_optimize = 1;
-          regs->array[i].set_in_loop = 1;
-        }
+         && rtx_varies_p (regno_reg_rtx[i], 1))
+       {
+         regs->array[i].may_not_optimize = 1;
+         regs->array[i].set_in_loop = 1;
+       }
  
  #ifdef AVOID_CCMODE_COPIES
    /* Don't try to move insns which set CC registers if we should not
@@ -10367,7 +10533,7 @@ loop_insn_emit_after (loop, where_bb, where_insn, pattern)
  }
  
  
-/* If WHERE_INSN is non-zero emit insn for PATTERN before WHERE_INSN
+/* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
     in basic block WHERE_BB (ignored in the interim) within the loop
     otherwise hoist PATTERN into the loop pre-header.  */
  
@@ -10431,7 +10597,7 @@ loop_insn_sink (loop, pattern)
  }
  
  /* bl->final_value can be eighter general_operand or PLUS of general_operand
-   and constant.  Emit sequence of intructions to load it into REG  */
+   and constant.  Emit sequence of instructions to load it into REG.  */
  static rtx
  gen_load_of_final_value (reg, final_value)
       rtx reg, final_value;
@@ -10441,7 +10607,7 @@ gen_load_of_final_value (reg, final_value)
    final_value = force_operand (final_value, reg);
    if (final_value != reg)
      emit_move_insn (reg, final_value);
-  seq = gen_sequence ();
+  seq = get_insns ();
    end_sequence ();
    return seq;
  }
@@ -10544,9 +10710,9 @@ loop_iv_class_dump (bl, file, verbose)
        fprintf (file, " Giv%d: insn %d, benefit %d, ",
                i, INSN_UID (v->insn), v->benefit);
        if (v->giv_type == DEST_ADDR)
-         print_simple_rtl (file, v->mem);
+       print_simple_rtl (file, v->mem);
        else
-         print_simple_rtl (file, single_set (v->insn));
+       print_simple_rtl (file, single_set (v->insn));
        fputc ('\n', file);
      }
  }
@@ -10589,7 +10755,7 @@ loop_giv_dump (v, file, verbose)
  
    if (v->giv_type == DEST_REG)
      fprintf (file, "Giv %d: insn %d",
-            REGNO (v->dest_reg),  INSN_UID (v->insn));
+            REGNO (v->dest_reg), INSN_UID (v->insn));
    else
      fprintf (file, "Dest address: insn %d",
              INSN_UID (v->insn));
@@ -10746,7 +10912,7 @@ loop_dump_aux (loop, file, verbose)
        /* This can happen when a marked loop appears as two nested loops,
          say from while (a || b) {}.  The inner loop won't match
          the loop markers but the outer one will.  */
-      if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->sindex)
+      if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
         fprintf (file, ";;  NOTE_INSN_LOOP_CONT not in loop latch\n");
      }
  }