X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fhaifa-sched.c;h=09c6af22d436d838b9f33720d8db1a86162a936e;hb=6c1e5981257d54606c60959f63a626fb2be64d60;hp=8fb988e2a628eda32be040436e53eb6f2674a4c2;hpb=7db79429fa92fc74d8316729a296cd9d16d0ece0;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 8fb988e2a62..09c6af22d43 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -1,6 +1,6 @@
 /* Instruction scheduling pass.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
    and currently maintained by, Jim Wilson (wilson@cygnus.com)
@@ -129,10 +129,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "diagnostic-core.h"
-#include "toplev.h"
+#include "hard-reg-set.h"
 #include "rtl.h"
 #include "tm_p.h"
-#include "hard-reg-set.h"
 #include "regs.h"
 #include "function.h"
 #include "flags.h"
@@ -142,6 +141,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "recog.h"
 #include "sched-int.h"
 #include "target.h"
+#include "common/common-target.h"
 #include "output.h"
 #include "params.h"
 #include "vecprim.h"
@@ -149,6 +149,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "ira.h"
 #include "emit-rtl.h"  /* FIXME: Can go away once crtl is moved to rtl.h.  */
+#include "hashtab.h"
 
 #ifdef INSN_SCHEDULING
 
@@ -158,6 +159,35 @@ along with GCC; see the file COPYING3.  If not see
 
 int issue_rate;
 
+/* This can be set to true by a backend if the scheduler should not
+   enable a DCE pass.  */
+bool sched_no_dce;
+
+/* The current initiation interval used when modulo scheduling.  */
+static int modulo_ii;
+
+/* The maximum number of stages we are prepared to handle.  */
+static int modulo_max_stages;
+
+/* The number of insns that exist in each iteration of the loop.  We use this
+   to detect when we've scheduled all insns from the first iteration.  */
+static int modulo_n_insns;
+
+/* The current count of insns in the first iteration of the loop that have
+   already been scheduled.  */
+static int modulo_insns_scheduled;
+
+/* The maximum uid of insns from the first iteration of the loop.  */
+static int modulo_iter0_max_uid;
+
+/* The number of times we should attempt to backtrack when modulo scheduling.
+   Decreased each time we have to backtrack.  */
+static int modulo_backtracks_left;
+
+/* The stage in which the last insn from the original loop was
+   scheduled.  */
+static int modulo_last_stage;
+
 /* sched-verbose controls the amount of debugging output the
    scheduler prints.  It is controlled by -fsched-verbose=N:
    N>0 and no -DSR : the output is directed to stderr.
@@ -167,31 +197,23 @@ int issue_rate;
    N=3: rtl at abort point, control-flow, regions info.
    N=5: dependences info.  */
 
-static int sched_verbose_param = 0;
 int sched_verbose = 0;
 
 /* Debugging file.  All printouts are sent to dump, which is always set,
    either to stderr, or to the dump listing file (-dRS).  */
 FILE *sched_dump = 0;
 
-/* fix_sched_param() is called from toplev.c upon detection
-   of the -fsched-verbose=N option.  */
-
-void
-fix_sched_param (const char *param, const char *val)
-{
-  if (!strcmp (param, "verbose"))
-    sched_verbose_param = atoi (val);
-  else
-    warning (0, "fix_sched_param: unknown param: %s", param);
-}
-
 /* This is a placeholder for the scheduler parameters common
    to all schedulers.  */
 struct common_sched_info_def *common_sched_info;
 
 #define INSN_TICK(INSN)	(HID (INSN)->tick)
+#define INSN_EXACT_TICK(INSN) (HID (INSN)->exact_tick)
+#define INSN_TICK_ESTIMATE(INSN) (HID (INSN)->tick_estimate)
 #define INTER_TICK(INSN) (HID (INSN)->inter_tick)
+#define FEEDS_BACKTRACK_INSN(INSN) (HID (INSN)->feeds_backtrack_insn)
+#define SHADOW_P(INSN) (HID (INSN)->shadow_p)
+#define MUST_RECOMPUTE_SPEC_P(INSN) (HID (INSN)->must_recompute_spec)
 
 /* If INSN_TICK of an instruction is equal to INVALID_TICK,
    then it should be recalculated from scratch.  */
@@ -316,6 +338,22 @@ static struct ready_list *readyp = &ready;
 /* Scheduling clock.  */
 static int clock_var;
 
+/* Clock at which the previous instruction was issued.  */
+static int last_clock_var;
+
+/* Set to true if, when queuing a shadow insn, we discover that it would be
+   scheduled too late.  */
+static bool must_backtrack;
+
+/* The following variable value is number of essential insns issued on
+   the current cycle.  An insn is essential one if it changes the
+   processors state.  */
+int cycle_issued_insns;
+
+/* This records the actual schedule.  It is built up during the main phase
+   of schedule_block, and afterwards used to reorder the insns in the RTL.  */
+static VEC(rtx, heap) *scheduled_insns;
+
 static int may_trap_exp (const_rtx, int);
 
 /* Nonzero iff the address is comprised from at most 1 register.  */
@@ -342,8 +380,6 @@ const struct common_sched_info_def haifa_common_sched_info =
     SCHED_PASS_UNKNOWN /* sched_pass_id */
   };
 
-const struct sched_scan_info_def *sched_scan_info;
-
 /* Mapping from instruction UID to its Logical UID.  */
 VEC (int, heap) *sched_luids = NULL;
 
@@ -497,13 +533,267 @@ haifa_classify_insn (const_rtx insn)
 {
   return haifa_classify_rtx (PATTERN (insn));
 }
+
+/* After the scheduler initialization function has been called, this function
+   can be called to enable modulo scheduling.  II is the initiation interval
+   we should use, it affects the delays for delay_pairs that were recorded as
+   separated by a given number of stages.
+
+   MAX_STAGES provides us with a limit
+   after which we give up scheduling; the caller must have unrolled at least
+   as many copies of the loop body and recorded delay_pairs for them.
+   
+   INSNS is the number of real (non-debug) insns in one iteration of
+   the loop.  MAX_UID can be used to test whether an insn belongs to
+   the first iteration of the loop; all of them have a uid lower than
+   MAX_UID.  */
+void
+set_modulo_params (int ii, int max_stages, int insns, int max_uid)
+{
+  modulo_ii = ii;
+  modulo_max_stages = max_stages;
+  modulo_n_insns = insns;
+  modulo_iter0_max_uid = max_uid;
+  modulo_backtracks_left = PARAM_VALUE (PARAM_MAX_MODULO_BACKTRACK_ATTEMPTS);
+}
+
+/* A structure to record a pair of insns where the first one is a real
+   insn that has delay slots, and the second is its delayed shadow.
+   I1 is scheduled normally and will emit an assembly instruction,
+   while I2 describes the side effect that takes place at the
+   transition between cycles CYCLES and (CYCLES + 1) after I1.  */
+struct delay_pair
+{
+  struct delay_pair *next_same_i1;
+  rtx i1, i2;
+  int cycles;
+  /* When doing modulo scheduling, we a delay_pair can also be used to
+     show that I1 and I2 are the same insn in a different stage.  If that
+     is the case, STAGES will be nonzero.  */
+  int stages;
+};
+
+/* Two hash tables to record delay_pairs, one indexed by I1 and the other
+   indexed by I2.  */
+static htab_t delay_htab;
+static htab_t delay_htab_i2;
+
+/* Called through htab_traverse.  Walk the hashtable using I2 as
+   index, and delete all elements involving an UID higher than
+   that pointed to by *DATA.  */
+static int
+htab_i2_traverse (void **slot, void *data)
+{
+  int maxuid = *(int *)data;
+  struct delay_pair *p = *(struct delay_pair **)slot;
+  if (INSN_UID (p->i2) >= maxuid || INSN_UID (p->i1) >= maxuid)
+    {
+      htab_clear_slot (delay_htab_i2, slot);
+    }
+  return 1;
+}
+
+/* Called through htab_traverse.  Walk the hashtable using I2 as
+   index, and delete all elements involving an UID higher than
+   that pointed to by *DATA.  */
+static int
+htab_i1_traverse (void **slot, void *data)
+{
+  int maxuid = *(int *)data;
+  struct delay_pair **pslot = (struct delay_pair **)slot;
+  struct delay_pair *p, *first, **pprev;
+
+  if (INSN_UID ((*pslot)->i1) >= maxuid)
+    {
+      htab_clear_slot (delay_htab, slot);
+      return 1;
+    }
+  pprev = &first;
+  for (p = *pslot; p; p = p->next_same_i1)
+    {
+      if (INSN_UID (p->i2) < maxuid)
+	{
+	  *pprev = p;
+	  pprev = &p->next_same_i1;
+	}
+    }
+  *pprev = NULL;
+  if (first == NULL)
+    htab_clear_slot (delay_htab, slot);
+  else
+    *pslot = first;
+  return 1;
+}
+
+/* Discard all delay pairs which involve an insn with an UID higher
+   than MAX_UID.  */
+void
+discard_delay_pairs_above (int max_uid)
+{
+  htab_traverse (delay_htab, htab_i1_traverse, &max_uid);
+  htab_traverse (delay_htab_i2, htab_i2_traverse, &max_uid);
+}
+
+/* Returns a hash value for X (which really is a delay_pair), based on
+   hashing just I1.  */
+static hashval_t
+delay_hash_i1 (const void *x)
+{
+  return htab_hash_pointer (((const struct delay_pair *) x)->i1);
+}
+
+/* Returns a hash value for X (which really is a delay_pair), based on
+   hashing just I2.  */
+static hashval_t
+delay_hash_i2 (const void *x)
+{
+  return htab_hash_pointer (((const struct delay_pair *) x)->i2);
+}
+
+/* Return nonzero if I1 of pair X is the same as that of pair Y.  */
+static int
+delay_i1_eq (const void *x, const void *y)
+{
+  return ((const struct delay_pair *) x)->i1 == y;
+}
+
+/* Return nonzero if I2 of pair X is the same as that of pair Y.  */
+static int
+delay_i2_eq (const void *x, const void *y)
+{
+  return ((const struct delay_pair *) x)->i2 == y;
+}
+
+/* This function can be called by a port just before it starts the final
+   scheduling pass.  It records the fact that an instruction with delay
+   slots has been split into two insns, I1 and I2.  The first one will be
+   scheduled normally and initiates the operation.  The second one is a
+   shadow which must follow a specific number of cycles after I1; its only
+   purpose is to show the side effect that occurs at that cycle in the RTL.
+   If a JUMP_INSN or a CALL_INSN has been split, I1 should be a normal INSN,
+   while I2 retains the original insn type.
+
+   There are two ways in which the number of cycles can be specified,
+   involving the CYCLES and STAGES arguments to this function.  If STAGES
+   is zero, we just use the value of CYCLES.  Otherwise, STAGES is a factor
+   which is multiplied by MODULO_II to give the number of cycles.  This is
+   only useful if the caller also calls set_modulo_params to enable modulo
+   scheduling.  */
+
+void
+record_delay_slot_pair (rtx i1, rtx i2, int cycles, int stages)
+{
+  struct delay_pair *p = XNEW (struct delay_pair);
+  struct delay_pair **slot;
+
+  p->i1 = i1;
+  p->i2 = i2;
+  p->cycles = cycles;
+  p->stages = stages;
+
+  if (!delay_htab)
+    {
+      delay_htab = htab_create (10, delay_hash_i1, delay_i1_eq, NULL);
+      delay_htab_i2 = htab_create (10, delay_hash_i2, delay_i2_eq, free);
+    }
+  slot = ((struct delay_pair **)
+	  htab_find_slot_with_hash (delay_htab, i1, htab_hash_pointer (i1),
+				    INSERT));
+  p->next_same_i1 = *slot;
+  *slot = p;
+  slot = ((struct delay_pair **)
+	  htab_find_slot_with_hash (delay_htab_i2, i2, htab_hash_pointer (i2),
+				    INSERT));
+  *slot = p;
+}
+
+/* Examine the delay pair hashtable to see if INSN is a shadow for another,
+   and return the other insn if so.  Return NULL otherwise.  */
+rtx
+real_insn_for_shadow (rtx insn)
+{
+  struct delay_pair *pair;
+
+  if (delay_htab == NULL)
+    return NULL_RTX;
+
+  pair
+    = (struct delay_pair *)htab_find_with_hash (delay_htab_i2, insn,
+						htab_hash_pointer (insn));
+  if (!pair || pair->stages > 0)
+    return NULL_RTX;
+  return pair->i1;
+}
+
+/* For a pair P of insns, return the fixed distance in cycles from the first
+   insn after which the second must be scheduled.  */
+static int
+pair_delay (struct delay_pair *p)
+{
+  if (p->stages == 0)
+    return p->cycles;
+  else
+    return p->stages * modulo_ii;
+}
+
+/* Given an insn INSN, add a dependence on its delayed shadow if it
+   has one.  Also try to find situations where shadows depend on each other
+   and add dependencies to the real insns to limit the amount of backtracking
+   needed.  */
+void
+add_delay_dependencies (rtx insn)
+{
+  struct delay_pair *pair;
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  if (!delay_htab)
+    return;
+
+  pair
+    = (struct delay_pair *)htab_find_with_hash (delay_htab_i2, insn,
+						htab_hash_pointer (insn));
+  if (!pair)
+    return;
+  add_dependence (insn, pair->i1, REG_DEP_ANTI);
+  if (pair->stages)
+    return;
 
+  FOR_EACH_DEP (pair->i2, SD_LIST_BACK, sd_it, dep)
+    {
+      rtx pro = DEP_PRO (dep);
+      struct delay_pair *other_pair
+	= (struct delay_pair *)htab_find_with_hash (delay_htab_i2, pro,
+						    htab_hash_pointer (pro));
+      if (!other_pair || other_pair->stages)
+	continue;
+      if (pair_delay (other_pair) >= pair_delay (pair))
+	{
+	  if (sched_verbose >= 4)
+	    {
+	      fprintf (sched_dump, ";;\tadding dependence %d <- %d\n",
+		       INSN_UID (other_pair->i1),
+		       INSN_UID (pair->i1));
+	      fprintf (sched_dump, ";;\tpair1 %d <- %d, cost %d\n",
+		       INSN_UID (pair->i1),
+		       INSN_UID (pair->i2),
+		       pair_delay (pair));
+	      fprintf (sched_dump, ";;\tpair2 %d <- %d, cost %d\n",
+		       INSN_UID (other_pair->i1),
+		       INSN_UID (other_pair->i2),
+		       pair_delay (other_pair));
+	    }
+	  add_dependence (pair->i1, other_pair->i1, REG_DEP_ANTI);
+	}
+    }
+}
+
 /* Forward declarations.  */
 
 static int priority (rtx);
 static int rank_for_schedule (const void *, const void *);
 static void swap_sort (rtx *, int);
-static void queue_insn (rtx, int);
+static void queue_insn (rtx, int, const char *);
 static int schedule_insn (rtx);
 static void adjust_priority (rtx);
 static void advance_one_cycle (void);
@@ -540,8 +830,6 @@ static void debug_ready_list (struct ready_list *);
 static rtx ready_remove (struct ready_list *, int);
 static void ready_remove_insn (rtx);
 
-static int choose_ready (struct ready_list *, rtx *);
-
 static void fix_inter_tick (rtx, rtx);
 static int fix_tick_ready (rtx);
 static void change_queue_index (rtx, int);
@@ -551,6 +839,7 @@ static void change_queue_index (rtx, int);
 
 static void extend_h_i_d (void);
 static void init_h_i_d (rtx);
+static int haifa_speculate_insn (rtx, ds_t, rtx *);
 static void generate_recovery_code (rtx);
 static void process_insn_forw_deps_be_in_spec (rtx, rtx, ds_t);
 static void begin_speculative_block (rtx);
@@ -558,7 +847,7 @@ static void add_to_speculative_block (rtx);
 static void init_before_recovery (basic_block *);
 static void create_check_block_twin (rtx, bool);
 static void fix_recovery_deps (basic_block);
-static void haifa_change_pattern (rtx, rtx);
+static bool haifa_change_pattern (rtx, rtx);
 static void dump_new_block_header (int, basic_block, rtx, rtx);
 static void restore_bb_notes (basic_block);
 static void fix_jump_move (rtx);
@@ -568,10 +857,6 @@ static void sched_remove_insn (rtx);
 static void clear_priorities (rtx, rtx_vec_t *);
 static void calc_priorities (rtx_vec_t);
 static void add_jump_dependencies (rtx, rtx);
-#ifdef ENABLE_CHECKING
-static int has_edge_p (VEC(edge,gc) *, int);
-static void check_cfg (rtx, rtx);
-#endif
 
 #endif /* INSN_SCHEDULING */
 
@@ -589,11 +874,11 @@ schedule_insns (void)
    up.  */
 bool sched_pressure_p;
 
-/* Map regno -> its cover class.  The map defined only when
+/* Map regno -> its pressure class.  The map defined only when
    SCHED_PRESSURE_P is true.  */
-enum reg_class *sched_regno_cover_class;
+enum reg_class *sched_regno_pressure_class;
 
-/* The current register pressure.  Only elements corresponding cover
+/* The current register pressure.  Only elements corresponding pressure
    classes are defined.  */
 static int curr_reg_pressure[N_REG_CLASSES];
 
@@ -623,39 +908,41 @@ sched_init_region_reg_pressure_info (void)
 static void
 mark_regno_birth_or_death (int regno, bool birth_p)
 {
-  enum reg_class cover_class;
+  enum reg_class pressure_class;
 
-  cover_class = sched_regno_cover_class[regno];
+  pressure_class = sched_regno_pressure_class[regno];
   if (regno >= FIRST_PSEUDO_REGISTER)
     {
-      if (cover_class != NO_REGS)
+      if (pressure_class != NO_REGS)
 	{
 	  if (birth_p)
 	    {
 	      bitmap_set_bit (curr_reg_live, regno);
-	      curr_reg_pressure[cover_class]
-		+= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
+	      curr_reg_pressure[pressure_class]
+		+= (ira_reg_class_max_nregs
+		    [pressure_class][PSEUDO_REGNO_MODE (regno)]);
 	    }
 	  else
 	    {
 	      bitmap_clear_bit (curr_reg_live, regno);
-	      curr_reg_pressure[cover_class]
-		-= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
+	      curr_reg_pressure[pressure_class]
+		-= (ira_reg_class_max_nregs
+		    [pressure_class][PSEUDO_REGNO_MODE (regno)]);
 	    }
 	}
     }
-  else if (cover_class != NO_REGS
+  else if (pressure_class != NO_REGS
 	   && ! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno))
     {
       if (birth_p)
 	{
 	  bitmap_set_bit (curr_reg_live, regno);
-	  curr_reg_pressure[cover_class]++;
+	  curr_reg_pressure[pressure_class]++;
 	}
       else
 	{
 	  bitmap_clear_bit (curr_reg_live, regno);
-	  curr_reg_pressure[cover_class]--;
+	  curr_reg_pressure[pressure_class]--;
 	}
     }
 }
@@ -669,8 +956,8 @@ initiate_reg_pressure_info (bitmap live)
   unsigned int j;
   bitmap_iterator bi;
 
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    curr_reg_pressure[ira_reg_class_cover[i]] = 0;
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    curr_reg_pressure[ira_pressure_classes[i]] = 0;
   bitmap_clear (curr_reg_live);
   EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi)
     if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j))
@@ -688,11 +975,11 @@ setup_ref_regs (rtx x)
   if (REG_P (x))
     {
       regno = REGNO (x);
-      if (regno >= FIRST_PSEUDO_REGISTER)
-	bitmap_set_bit (region_ref_regs, REGNO (x));
+      if (HARD_REGISTER_NUM_P (regno))
+	bitmap_set_range (region_ref_regs, regno,
+			  hard_regno_nregs[regno][GET_MODE (x)]);
       else
-	for (i = hard_regno_nregs[regno][GET_MODE (x)] - 1; i >= 0; i--)
-	  bitmap_set_bit (region_ref_regs, regno + i);
+	bitmap_set_bit (region_ref_regs, REGNO (x));
       return;
     }
   fmt = GET_RTX_FORMAT (code);
@@ -711,7 +998,7 @@ setup_ref_regs (rtx x)
 static void
 initiate_bb_reg_pressure_info (basic_block bb)
 {
-  unsigned int i;
+  unsigned int i ATTRIBUTE_UNUSED;
   rtx insn;
 
   if (current_nr_blocks > 1)
@@ -739,9 +1026,9 @@ save_reg_pressure (void)
 {
   int i;
 
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    saved_reg_pressure[ira_reg_class_cover[i]]
-      = curr_reg_pressure[ira_reg_class_cover[i]];
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    saved_reg_pressure[ira_pressure_classes[i]]
+      = curr_reg_pressure[ira_pressure_classes[i]];
   bitmap_copy (saved_reg_live, curr_reg_live);
 }
 
@@ -751,9 +1038,9 @@ restore_reg_pressure (void)
 {
   int i;
 
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    curr_reg_pressure[ira_reg_class_cover[i]]
-      = saved_reg_pressure[ira_reg_class_cover[i]];
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    curr_reg_pressure[ira_pressure_classes[i]]
+      = saved_reg_pressure[ira_pressure_classes[i]];
   bitmap_copy (curr_reg_live, saved_reg_live);
 }
 
@@ -771,7 +1058,7 @@ dying_use_p (struct reg_use_data *use)
 }
 
 /* Print info about the current register pressure and its excess for
-   each cover class.  */
+   each pressure class.  */
 static void
 print_curr_reg_pressure (void)
 {
@@ -779,9 +1066,9 @@ print_curr_reg_pressure (void)
   enum reg_class cl;
 
   fprintf (sched_dump, ";;\t");
-  for (i = 0; i < ira_reg_class_cover_size; i++)
+  for (i = 0; i < ira_pressure_classes_num; i++)
     {
-      cl = ira_reg_class_cover[i];
+      cl = ira_pressure_classes[i];
       gcc_assert (curr_reg_pressure[cl] >= 0);
       fprintf (sched_dump, "  %s:%d(%d)", reg_class_names[cl],
 	       curr_reg_pressure[cl],
@@ -789,13 +1076,179 @@ print_curr_reg_pressure (void)
     }
   fprintf (sched_dump, "\n");
 }
+
+/* Determine if INSN has a condition that is clobbered if a register
+   in SET_REGS is modified.  */
+static bool
+cond_clobbered_p (rtx insn, HARD_REG_SET set_regs)
+{
+  rtx pat = PATTERN (insn);
+  gcc_assert (GET_CODE (pat) == COND_EXEC);
+  if (TEST_HARD_REG_BIT (set_regs, REGNO (XEXP (COND_EXEC_TEST (pat), 0))))
+    {
+      sd_iterator_def sd_it;
+      dep_t dep;
+      haifa_change_pattern (insn, ORIG_PAT (insn));
+      FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
+	DEP_STATUS (dep) &= ~DEP_CANCELLED;
+      TODO_SPEC (insn) = HARD_DEP;
+      if (sched_verbose >= 2)
+	fprintf (sched_dump,
+		 ";;\t\tdequeue insn %s because of clobbered condition\n",
+		 (*current_sched_info->print_insn) (insn, 0));
+      return true;
+    }
+
+  return false;
+}
+
+/* Look at the remaining dependencies for insn NEXT, and compute and return
+   the TODO_SPEC value we should use for it.  This is called after one of
+   NEXT's dependencies has been resolved.  */
+
+static ds_t
+recompute_todo_spec (rtx next)
+{
+  ds_t new_ds;
+  sd_iterator_def sd_it;
+  dep_t dep, control_dep = NULL;
+  int n_spec = 0;
+  int n_control = 0;
+  bool first_p = true;
+
+  if (sd_lists_empty_p (next, SD_LIST_BACK))
+    /* NEXT has all its dependencies resolved.  */
+    return 0;
+
+  if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
+    return HARD_DEP;
+
+  /* Now we've got NEXT with speculative deps only.
+     1. Look at the deps to see what we have to do.
+     2. Check if we can do 'todo'.  */
+  new_ds = 0;
+
+  FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep)
+    {
+      ds_t ds = DEP_STATUS (dep) & SPECULATIVE;
+
+      if (DEBUG_INSN_P (DEP_PRO (dep)) && !DEBUG_INSN_P (next))
+	continue;
+
+      if (ds)
+	{
+	  n_spec++;
+	  if (first_p)
+	    {
+	      first_p = false;
+
+	      new_ds = ds;
+	    }
+	  else
+	    new_ds = ds_merge (new_ds, ds);
+	}
+      if (DEP_TYPE (dep) == REG_DEP_CONTROL)
+	{
+	  n_control++;
+	  control_dep = dep;
+	  DEP_STATUS (dep) &= ~DEP_CANCELLED;
+	}
+    }
+
+  if (n_control == 1 && n_spec == 0)
+    {
+      rtx pro, other, new_pat;
+      rtx cond = NULL_RTX;
+      bool success;
+      rtx prev = NULL_RTX;
+      int i;
+      unsigned regno;
+  
+      if ((current_sched_info->flags & DO_PREDICATION) == 0
+	  || (ORIG_PAT (next) != NULL_RTX
+	      && PREDICATED_PAT (next) == NULL_RTX))
+	return HARD_DEP;
+
+      pro = DEP_PRO (control_dep);
+      other = real_insn_for_shadow (pro);
+      if (other != NULL_RTX)
+	pro = other;
+
+      cond = sched_get_reverse_condition_uncached (pro);
+      regno = REGNO (XEXP (cond, 0));
+
+      /* Find the last scheduled insn that modifies the condition register.
+	 We can stop looking once we find the insn we depend on through the
+	 REG_DEP_CONTROL; if the condition register isn't modified after it,
+	 we know that it still has the right value.  */
+      if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED)
+	FOR_EACH_VEC_ELT_REVERSE (rtx, scheduled_insns, i, prev)
+	  {
+	    HARD_REG_SET t;
+
+	    find_all_hard_reg_sets (prev, &t);
+	    if (TEST_HARD_REG_BIT (t, regno))
+	      return HARD_DEP;
+	    if (prev == pro)
+	      break;
+	  }
+      if (ORIG_PAT (next) == NULL_RTX)
+	{
+	  ORIG_PAT (next) = PATTERN (next);
+
+	  new_pat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (next));
+	  success = haifa_change_pattern (next, new_pat);
+	  if (!success)
+	    return HARD_DEP;
+	  PREDICATED_PAT (next) = new_pat;
+	}
+      else if (PATTERN (next) != PREDICATED_PAT (next))
+	{
+	  bool success = haifa_change_pattern (next,
+					       PREDICATED_PAT (next));
+	  gcc_assert (success);
+	}
+      DEP_STATUS (control_dep) |= DEP_CANCELLED;
+      return DEP_CONTROL;
+    }
 
-/* Pointer to the last instruction scheduled.  Used by rank_for_schedule,
-   so that insns independent of the last scheduled insn will be preferred
-   over dependent instructions.  */
+  if (PREDICATED_PAT (next) != NULL_RTX)
+    {
+      int tick = INSN_TICK (next);
+      bool success = haifa_change_pattern (next,
+					   ORIG_PAT (next));
+      INSN_TICK (next) = tick;
+      gcc_assert (success);
+    }
 
+  /* We can't handle the case where there are both speculative and control
+     dependencies, so we return HARD_DEP in such a case.  Also fail if
+     we have speculative dependencies with not enough points, or more than
+     one control dependency.  */
+  if ((n_spec > 0 && n_control > 0)
+      || (n_spec > 0
+	  /* Too few points?  */
+	  && ds_weak (new_ds) < spec_info->data_weakness_cutoff)
+      || (n_control > 1))
+    return HARD_DEP;
+
+  return new_ds;
+}
+
+/* Pointer to the last instruction scheduled.  */
 static rtx last_scheduled_insn;
 
+/* Pointer to the last nondebug instruction scheduled within the
+   block, or the prev_head of the scheduling block.  Used by
+   rank_for_schedule, so that insns independent of the last scheduled
+   insn will be preferred over dependent instructions.  */
+static rtx last_nondebug_scheduled_insn;
+
+/* Pointer that iterates through the list of unscheduled insns if we
+   have a dbg_cnt enabled.  It always points at an insn prior to the
+   first unscheduled one.  */
+static rtx nonscheduled_insns_begin;
+
 /* Cached cost of the instruction.  Use below function to get cost of the
    insn.  -1 here means that the field is not initialized.  */
 #define INSN_COST(INSN)	(HID (INSN)->cost)
@@ -857,10 +1310,29 @@ dep_cost_1 (dep_t link, dw_t dw)
   rtx used = DEP_CON (link);
   int cost;
 
+  if (DEP_COST (link) != UNKNOWN_DEP_COST)
+    return DEP_COST (link);
+
+  if (delay_htab)
+    {
+      struct delay_pair *delay_entry;
+      delay_entry
+	= (struct delay_pair *)htab_find_with_hash (delay_htab_i2, used,
+						    htab_hash_pointer (used));
+      if (delay_entry)
+	{
+	  if (delay_entry->i1 == insn)
+	    {
+	      DEP_COST (link) = pair_delay (delay_entry);
+	      return DEP_COST (link);
+	    }
+	}
+    }
+
   /* A USE insn should never require the value used to be computed.
      This allows the computation of a function's result and parameter
      values to overlap the return and call.  We don't care about the
-     the dependence cost when only decreasing register pressure.  */
+     dependence cost when only decreasing register pressure.  */
   if (recog_memoized (used) < 0)
     {
       cost = 0;
@@ -914,6 +1386,7 @@ dep_cost_1 (dep_t link, dw_t dw)
 	cost = 0;
     }
 
+  DEP_COST (link) = cost;
   return cost;
 }
 
@@ -1124,23 +1597,24 @@ setup_insn_reg_pressure_info (rtx insn)
   gcc_checking_assert (!DEBUG_INSN_P (insn));
 
   excess_cost_change = 0;
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    death[ira_reg_class_cover[i]] = 0;
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    death[ira_pressure_classes[i]] = 0;
   for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
     if (dying_use_p (use))
       {
-	cl = sched_regno_cover_class[use->regno];
+	cl = sched_regno_pressure_class[use->regno];
 	if (use->regno < FIRST_PSEUDO_REGISTER)
 	  death[cl]++;
 	else
-	  death[cl] += ira_reg_class_nregs[cl][PSEUDO_REGNO_MODE (use->regno)];
+	  death[cl]
+	    += ira_reg_class_max_nregs[cl][PSEUDO_REGNO_MODE (use->regno)];
       }
   pressure_info = INSN_REG_PRESSURE (insn);
   max_reg_pressure = INSN_MAX_REG_PRESSURE (insn);
   gcc_assert (pressure_info != NULL && max_reg_pressure != NULL);
-  for (i = 0; i < ira_reg_class_cover_size; i++)
+  for (i = 0; i < ira_pressure_classes_num; i++)
     {
-      cl = ira_reg_class_cover[i];
+      cl = ira_pressure_classes[i];
       gcc_assert (curr_reg_pressure[cl] >= 0);
       change = (int) pressure_info[i].set_increase - death[cl];
       before = MAX (0, max_reg_pressure[i] - ira_available_class_regs[cl]);
@@ -1165,7 +1639,6 @@ rank_for_schedule (const void *x, const void *y)
 {
   rtx tmp = *(const rtx *) y;
   rtx tmp2 = *(const rtx *) x;
-  rtx last;
   int tmp_class, tmp2_class;
   int val, priority_val, info_val;
 
@@ -1174,8 +1647,10 @@ rank_for_schedule (const void *x, const void *y)
       /* Schedule debug insns as early as possible.  */
       if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2))
 	return -1;
-      else if (DEBUG_INSN_P (tmp2))
+      else if (!DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
 	return 1;
+      else if (DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
+	return INSN_LUID (tmp) - INSN_LUID (tmp2);
     }
 
   /* The insn in a schedule group should be issued the first.  */
@@ -1212,6 +1687,17 @@ rank_for_schedule (const void *x, const void *y)
       else
 	return INSN_TICK (tmp) - INSN_TICK (tmp2);
     }
+
+  /* If we are doing backtracking in this schedule, prefer insns that
+     have forward dependencies with negative cost against an insn that
+     was already scheduled.  */
+  if (current_sched_info->flags & DO_BACKTRACKING)
+    {
+      priority_val = FEEDS_BACKTRACK_INSN (tmp2) - FEEDS_BACKTRACK_INSN (tmp);
+      if (priority_val)
+	return priority_val;
+    }
+
   /* Prefer insn with higher priority.  */
   priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
 
@@ -1246,23 +1732,13 @@ rank_for_schedule (const void *x, const void *y)
   if(flag_sched_rank_heuristic && info_val)
     return info_val;
 
-  if (flag_sched_last_insn_heuristic)
-    {
-      last = last_scheduled_insn;
-
-      if (DEBUG_INSN_P (last) && last != current_sched_info->prev_head)
-	do
-	  last = PREV_INSN (last);
-	while (!NONDEBUG_INSN_P (last)
-	       && last != current_sched_info->prev_head);
-    }
-
   /* Compare insns based on their relation to the last scheduled
      non-debug insn.  */
-  if (flag_sched_last_insn_heuristic && NONDEBUG_INSN_P (last))
+  if (flag_sched_last_insn_heuristic && last_nondebug_scheduled_insn)
     {
       dep_t dep1;
       dep_t dep2;
+      rtx last = last_nondebug_scheduled_insn;
 
       /* Classify the instructions into three classes:
          1) Data dependent on last schedule insn.
@@ -1326,13 +1802,15 @@ swap_sort (rtx *a, int n)
 
 /* Add INSN to the insn queue so that it can be executed at least
    N_CYCLES after the currently executing insn.  Preserve insns
-   chain for debugging purposes.  */
+   chain for debugging purposes.  REASON will be printed in debugging
+   output.  */
 
 HAIFA_INLINE static void
-queue_insn (rtx insn, int n_cycles)
+queue_insn (rtx insn, int n_cycles, const char *reason)
 {
   int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
   rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
+  int new_tick;
 
   gcc_assert (n_cycles <= max_insn_queue_index);
   gcc_assert (!DEBUG_INSN_P (insn));
@@ -1345,10 +1823,25 @@ queue_insn (rtx insn, int n_cycles)
       fprintf (sched_dump, ";;\t\tReady-->Q: insn %s: ",
 	       (*current_sched_info->print_insn) (insn, 0));
 
-      fprintf (sched_dump, "queued for %d cycles.\n", n_cycles);
+      fprintf (sched_dump, "queued for %d cycles (%s).\n", n_cycles, reason);
     }
 
   QUEUE_INDEX (insn) = next_q;
+
+  if (current_sched_info->flags & DO_BACKTRACKING)
+    {
+      new_tick = clock_var + n_cycles;
+      if (INSN_TICK (insn) == INVALID_TICK || INSN_TICK (insn) < new_tick)
+	INSN_TICK (insn) = new_tick;
+
+      if (INSN_EXACT_TICK (insn) != INVALID_TICK
+	  && INSN_EXACT_TICK (insn) < clock_var + n_cycles)
+	{
+	  must_backtrack = true;
+	  if (sched_verbose >= 2)
+	    fprintf (sched_dump, ";;\t\tcausing a backtrack.\n");
+	}
+    }
 }
 
 /* Remove INSN from queue.  */
@@ -1408,6 +1901,12 @@ ready_add (struct ready_list *ready, rtx insn, bool first_p)
 
   gcc_assert (QUEUE_INDEX (insn) != QUEUE_READY);
   QUEUE_INDEX (insn) = QUEUE_READY;
+
+  if (INSN_EXACT_TICK (insn) != INVALID_TICK
+      && INSN_EXACT_TICK (insn) < clock_var)
+    {
+      must_backtrack = true;
+    }
 }
 
 /* Remove the element with the highest priority from the ready list and
@@ -1554,9 +2053,6 @@ advance_one_cycle (void)
     fprintf (sched_dump, ";;\tAdvanced a state.\n");
 }
 
-/* Clock at which the previous instruction was issued.  */
-static int last_clock_var;
-
 /* Update register pressure after scheduling INSN.  */
 static void
 update_register_pressure (rtx insn)
@@ -1585,9 +2081,9 @@ setup_insn_max_reg_pressure (rtx after, bool update_p)
   static int max_reg_pressure[N_REG_CLASSES];
 
   save_reg_pressure ();
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    max_reg_pressure[ira_reg_class_cover[i]]
-      = curr_reg_pressure[ira_reg_class_cover[i]];
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    max_reg_pressure[ira_pressure_classes[i]]
+      = curr_reg_pressure[ira_pressure_classes[i]];
   for (insn = NEXT_INSN (after);
        insn != NULL_RTX && ! BARRIER_P (insn)
 	 && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (after);
@@ -1595,24 +2091,24 @@ setup_insn_max_reg_pressure (rtx after, bool update_p)
     if (NONDEBUG_INSN_P (insn))
       {
 	eq_p = true;
-	for (i = 0; i < ira_reg_class_cover_size; i++)
+	for (i = 0; i < ira_pressure_classes_num; i++)
 	  {
-	    p = max_reg_pressure[ira_reg_class_cover[i]];
+	    p = max_reg_pressure[ira_pressure_classes[i]];
 	    if (INSN_MAX_REG_PRESSURE (insn)[i] != p)
 	      {
 		eq_p = false;
 		INSN_MAX_REG_PRESSURE (insn)[i]
-		  = max_reg_pressure[ira_reg_class_cover[i]];
+		  = max_reg_pressure[ira_pressure_classes[i]];
 	      }
 	  }
 	if (update_p && eq_p)
 	  break;
 	update_register_pressure (insn);
-	for (i = 0; i < ira_reg_class_cover_size; i++)
-	  if (max_reg_pressure[ira_reg_class_cover[i]]
-	      < curr_reg_pressure[ira_reg_class_cover[i]])
-	    max_reg_pressure[ira_reg_class_cover[i]]
-	      = curr_reg_pressure[ira_reg_class_cover[i]];
+	for (i = 0; i < ira_pressure_classes_num; i++)
+	  if (max_reg_pressure[ira_pressure_classes[i]]
+	      < curr_reg_pressure[ira_pressure_classes[i]])
+	    max_reg_pressure[ira_pressure_classes[i]]
+	      = curr_reg_pressure[ira_pressure_classes[i]];
       }
   restore_reg_pressure ();
 }
@@ -1626,13 +2122,13 @@ update_reg_and_insn_max_reg_pressure (rtx insn)
   int i;
   int before[N_REG_CLASSES];
 
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    before[i] = curr_reg_pressure[ira_reg_class_cover[i]];
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    before[i] = curr_reg_pressure[ira_pressure_classes[i]];
   update_register_pressure (insn);
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    if (curr_reg_pressure[ira_reg_class_cover[i]] != before[i])
+  for (i = 0; i < ira_pressure_classes_num; i++)
+    if (curr_reg_pressure[ira_pressure_classes[i]] != before[i])
       break;
-  if (i < ira_reg_class_cover_size)
+  if (i < ira_pressure_classes_num)
     setup_insn_max_reg_pressure (insn, true);
 }
 
@@ -1646,6 +2142,67 @@ sched_setup_bb_reg_pressure_info (basic_block bb, rtx after)
   initiate_bb_reg_pressure_info (bb);
   setup_insn_max_reg_pressure (after, false);
 }
+
+/* If doing predication while scheduling, verify whether INSN, which
+   has just been scheduled, clobbers the conditions of any
+   instructions that must be predicated in order to break their
+   dependencies.  If so, remove them from the queues so that they will
+   only be scheduled once their control dependency is resolved.  */
+
+static void
+check_clobbered_conditions (rtx insn)
+{
+  HARD_REG_SET t;
+  int i;
+
+  if ((current_sched_info->flags & DO_PREDICATION) == 0)
+    return;
+
+  find_all_hard_reg_sets (insn, &t);
+
+ restart:
+  for (i = 0; i < ready.n_ready; i++)
+    {
+      rtx x = ready_element (&ready, i);
+      if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (x, t))
+	{
+	  ready_remove_insn (x);
+	  goto restart;
+	}
+    }
+  for (i = 0; i <= max_insn_queue_index; i++)
+    {
+      rtx link;
+      int q = NEXT_Q_AFTER (q_ptr, i);
+
+    restart_queue:
+      for (link = insn_queue[q]; link; link = XEXP (link, 1))
+	{
+	  rtx x = XEXP (link, 0);
+	  if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (x, t))
+	    {
+	      queue_remove (x);
+	      goto restart_queue;
+	    }
+	}
+    }
+}
+
+/* A structure that holds local state for the loop in schedule_block.  */
+struct sched_block_state
+{
+  /* True if no real insns have been scheduled in the current cycle.  */
+  bool first_cycle_insn_p;
+  /* True if a shadow insn has been scheduled in the current cycle, which
+     means that no more normal insns can be issued.  */
+  bool shadows_only_p;
+  /* True if we're winding down a modulo schedule, which means that we only
+     issue insns with INSN_EXACT_TICK set.  */
+  bool modulo_epilogue;
+  /* Initialized with the machine's issue rate every cycle, and updated
+     by calls to the variable_issue hook.  */
+  int can_issue_more;
+};
 
 /* INSN is the "currently executing insn".  Launch each insn which was
    waiting on INSN.  READY is the ready list which contains the insns
@@ -1678,9 +2235,9 @@ schedule_insn (rtx insn)
       if (pressure_info != NULL)
 	{
 	  fputc (':', sched_dump);
-	  for (i = 0; i < ira_reg_class_cover_size; i++)
+	  for (i = 0; i < ira_pressure_classes_num; i++)
 	    fprintf (sched_dump, "%s%+d(%d)",
-		     reg_class_names[ira_reg_class_cover[i]],
+		     reg_class_names[ira_pressure_classes[i]],
 		     pressure_info[i].set_increase, pressure_info[i].change);
 	}
       fputc ('\n', sched_dump);
@@ -1691,7 +2248,7 @@ schedule_insn (rtx insn)
 
   /* Scheduling instruction should have all its dependencies resolved and
      should have been removed from the ready list.  */
-  gcc_assert (sd_lists_empty_p (insn, SD_LIST_BACK));
+  gcc_assert (sd_lists_empty_p (insn, SD_LIST_HARD_BACK));
 
   /* Reset debug insns invalidated by moving this insn.  */
   if (MAY_HAVE_DEBUG_INSNS && !DEBUG_INSN_P (insn))
@@ -1701,6 +2258,12 @@ schedule_insn (rtx insn)
 	rtx dbg = DEP_PRO (dep);
 	struct reg_use_data *use, *next;
 
+	if (DEP_STATUS (dep) & DEP_CANCELLED)
+	  {
+	    sd_iterator_next (&sd_it);
+	    continue;
+	  }
+
 	gcc_assert (DEBUG_INSN_P (dbg));
 
 	if (sched_verbose >= 6)
@@ -1754,17 +2317,36 @@ schedule_insn (rtx insn)
      INSN_TICK untouched.  This is a machine-dependent issue, actually.  */
   INSN_TICK (insn) = clock_var;
 
+  check_clobbered_conditions (insn);
+
   /* Update dependent instructions.  */
   for (sd_it = sd_iterator_start (insn, SD_LIST_FORW);
        sd_iterator_cond (&sd_it, &dep);)
     {
       rtx next = DEP_CON (dep);
+      bool cancelled = (DEP_STATUS (dep) & DEP_CANCELLED) != 0;
 
       /* Resolve the dependence between INSN and NEXT.
 	 sd_resolve_dep () moves current dep to another list thus
 	 advancing the iterator.  */
       sd_resolve_dep (sd_it);
 
+      if (cancelled)
+	{
+	  if (QUEUE_INDEX (next) != QUEUE_SCHEDULED)
+	    {
+	      int tick = INSN_TICK (next);
+	      gcc_assert (ORIG_PAT (next) != NULL_RTX);
+	      haifa_change_pattern (next, ORIG_PAT (next));
+	      INSN_TICK (next) = tick;
+	      if (sd_lists_empty_p (next, SD_LIST_BACK))
+		TODO_SPEC (next) = 0;
+	      else if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
+		TODO_SPEC (next) = HARD_DEP;
+	    }
+	  continue;
+	}
+
       /* Don't bother trying to mark next as ready if insn is a debug
 	 insn.  If insn is the last hard dependency, it will have
 	 already been discounted.  */
@@ -1791,18 +2373,6 @@ schedule_insn (rtx insn)
 	}
     }
 
-  /* This is the place where scheduler doesn't *basically* need backward and
-     forward dependencies for INSN anymore.  Nevertheless they are used in
-     heuristics in rank_for_schedule (), early_queue_to_ready () and in
-     some targets (e.g. rs6000).  Thus the earliest place where we *can*
-     remove dependencies is after targetm.sched.finish () call in
-     schedule_block ().  But, on the other side, the safest place to remove
-     dependencies is when we are finishing scheduling entire region.  As we
-     don't generate [many] dependencies during scheduling itself, we won't
-     need memory until beginning of next region.
-     Bottom line: Dependencies are removed for all insns in the end of
-     scheduling the region.  */
-
   /* Annotate the instruction with issue information -- TImode
      indicates that the instruction is expected not to be able
      to issue on the same cycle as the previous insn.  A machine
@@ -1898,26 +2468,540 @@ remove_notes (rtx head, rtx tail)
     }
 }
 
-
-/* Return the head and tail pointers of ebb starting at BEG and ending
-   at END.  */
-void
-get_ebb_head_tail (basic_block beg, basic_block end, rtx *headp, rtx *tailp)
+/* A structure to record enough data to allow us to backtrack the scheduler to
+   a previous state.  */
+struct haifa_saved_data
 {
-  rtx beg_head = BB_HEAD (beg);
-  rtx beg_tail = BB_END (beg);
-  rtx end_head = BB_HEAD (end);
-  rtx end_tail = BB_END (end);
+  /* Next entry on the list.  */
+  struct haifa_saved_data *next;
+
+  /* Backtracking is associated with scheduling insns that have delay slots.
+     DELAY_PAIR points to the structure that contains the insns involved, and
+     the number of cycles between them.  */
+  struct delay_pair *delay_pair;
+
+  /* Data used by the frontend (e.g. sched-ebb or sched-rgn).  */
+  void *fe_saved_data;
+  /* Data used by the backend.  */
+  void *be_saved_data;
+
+  /* Copies of global state.  */
+  int clock_var, last_clock_var;
+  struct ready_list ready;
+  state_t curr_state;
+
+  rtx last_scheduled_insn;
+  rtx last_nondebug_scheduled_insn;
+  int cycle_issued_insns;
+
+  /* Copies of state used in the inner loop of schedule_block.  */
+  struct sched_block_state sched_block;
+
+  /* We don't need to save q_ptr, as its value is arbitrary and we can set it
+     to 0 when restoring.  */
+  int q_size;
+  rtx *insn_queue;
+};
 
-  /* Don't include any notes or labels at the beginning of the BEG
-     basic block, or notes at the end of the END basic blocks.  */
+/* A record, in reverse order, of all scheduled insns which have delay slots
+   and may require backtracking.  */
+static struct haifa_saved_data *backtrack_queue;
 
-  if (LABEL_P (beg_head))
+/* For every dependency of INSN, set the FEEDS_BACKTRACK_INSN bit according
+   to SET_P.  */
+static void
+mark_backtrack_feeds (rtx insn, int set_p)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+  FOR_EACH_DEP (insn, SD_LIST_HARD_BACK, sd_it, dep)
+    {
+      FEEDS_BACKTRACK_INSN (DEP_PRO (dep)) = set_p;
+    }
+}
+
+/* Save the current scheduler state so that we can backtrack to it
+   later if necessary.  PAIR gives the insns that make it necessary to
+   save this point.  SCHED_BLOCK is the local state of schedule_block
+   that need to be saved.  */
+static void
+save_backtrack_point (struct delay_pair *pair,
+		      struct sched_block_state sched_block)
+{
+  int i;
+  struct haifa_saved_data *save = XNEW (struct haifa_saved_data);
+
+  save->curr_state = xmalloc (dfa_state_size);
+  memcpy (save->curr_state, curr_state, dfa_state_size);
+
+  save->ready.first = ready.first;
+  save->ready.n_ready = ready.n_ready;
+  save->ready.n_debug = ready.n_debug;
+  save->ready.veclen = ready.veclen;
+  save->ready.vec = XNEWVEC (rtx, ready.veclen);
+  memcpy (save->ready.vec, ready.vec, ready.veclen * sizeof (rtx));
+
+  save->insn_queue = XNEWVEC (rtx, max_insn_queue_index + 1);
+  save->q_size = q_size;
+  for (i = 0; i <= max_insn_queue_index; i++)
+    {
+      int q = NEXT_Q_AFTER (q_ptr, i);
+      save->insn_queue[i] = copy_INSN_LIST (insn_queue[q]);
+    }
+
+  save->clock_var = clock_var;
+  save->last_clock_var = last_clock_var;
+  save->cycle_issued_insns = cycle_issued_insns;
+  save->last_scheduled_insn = last_scheduled_insn;
+  save->last_nondebug_scheduled_insn = last_nondebug_scheduled_insn;
+
+  save->sched_block = sched_block;
+
+  if (current_sched_info->save_state)
+    save->fe_saved_data = (*current_sched_info->save_state) ();
+
+  if (targetm.sched.alloc_sched_context)
+    {
+      save->be_saved_data = targetm.sched.alloc_sched_context ();
+      targetm.sched.init_sched_context (save->be_saved_data, false);
+    }
+  else
+    save->be_saved_data = NULL;
+
+  save->delay_pair = pair;
+
+  save->next = backtrack_queue;
+  backtrack_queue = save;
+
+  while (pair)
+    {
+      mark_backtrack_feeds (pair->i2, 1);
+      INSN_TICK (pair->i2) = INVALID_TICK;
+      INSN_EXACT_TICK (pair->i2) = clock_var + pair_delay (pair);
+      SHADOW_P (pair->i2) = pair->stages == 0;
+      pair = pair->next_same_i1;
+    }
+}
+
+/* Walk the ready list and all queues. If any insns have unresolved backwards
+   dependencies, these must be cancelled deps, broken by predication.  Set or
+   clear (depending on SET) the DEP_CANCELLED bit in DEP_STATUS.  */
+
+static void
+toggle_cancelled_flags (bool set)
+{
+  int i;
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  if (ready.n_ready > 0)
+    {
+      rtx *first = ready_lastpos (&ready);
+      for (i = 0; i < ready.n_ready; i++)
+	FOR_EACH_DEP (first[i], SD_LIST_BACK, sd_it, dep)
+	  if (!DEBUG_INSN_P (DEP_PRO (dep)))
+	    {
+	      if (set)
+		DEP_STATUS (dep) |= DEP_CANCELLED;
+	      else
+		DEP_STATUS (dep) &= ~DEP_CANCELLED;
+	    }
+    }
+  for (i = 0; i <= max_insn_queue_index; i++)
+    {
+      int q = NEXT_Q_AFTER (q_ptr, i);
+      rtx link;
+      for (link = insn_queue[q]; link; link = XEXP (link, 1))
+	{
+	  rtx insn = XEXP (link, 0);
+	  FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
+	    if (!DEBUG_INSN_P (DEP_PRO (dep)))
+	      {
+		if (set)
+		  DEP_STATUS (dep) |= DEP_CANCELLED;
+		else
+		  DEP_STATUS (dep) &= ~DEP_CANCELLED;
+	      }
+	}
+    }
+}
+
+/* Pop entries from the SCHEDULED_INSNS vector up to and including INSN.
+   Restore their dependencies to an unresolved state, and mark them as
+   queued nowhere.  */
+
+static void
+unschedule_insns_until (rtx insn)
+{
+  VEC (rtx, heap) *recompute_vec;
+
+  recompute_vec = VEC_alloc (rtx, heap, 0);
+
+  /* Make two passes over the insns to be unscheduled.  First, we clear out
+     dependencies and other trivial bookkeeping.  */
+  for (;;)
+    {
+      rtx last;
+      sd_iterator_def sd_it;
+      dep_t dep;
+
+      last = VEC_pop (rtx, scheduled_insns);
+
+      /* This will be changed by restore_backtrack_point if the insn is in
+	 any queue.  */
+      QUEUE_INDEX (last) = QUEUE_NOWHERE;
+      if (last != insn)
+	INSN_TICK (last) = INVALID_TICK;
+
+      if (modulo_ii > 0 && INSN_UID (last) < modulo_iter0_max_uid)
+	modulo_insns_scheduled--;
+
+      for (sd_it = sd_iterator_start (last, SD_LIST_RES_FORW);
+	   sd_iterator_cond (&sd_it, &dep);)
+	{
+	  rtx con = DEP_CON (dep);
+	  sd_unresolve_dep (sd_it);
+	  if (!MUST_RECOMPUTE_SPEC_P (con))
+	    {
+	      MUST_RECOMPUTE_SPEC_P (con) = 1;
+	      VEC_safe_push (rtx, heap, recompute_vec, con);
+	    }
+	}
+
+      if (last == insn)
+	break;
+    }
+
+  /* A second pass, to update ready and speculation status for insns
+     depending on the unscheduled ones.  The first pass must have
+     popped the scheduled_insns vector up to the point where we
+     restart scheduling, as recompute_todo_spec requires it to be
+     up-to-date.  */
+  while (!VEC_empty (rtx, recompute_vec))
+    {
+      rtx con;
+
+      con = VEC_pop (rtx, recompute_vec);
+      MUST_RECOMPUTE_SPEC_P (con) = 0;
+      if (!sd_lists_empty_p (con, SD_LIST_HARD_BACK))
+	{
+	  TODO_SPEC (con) = HARD_DEP;
+	  INSN_TICK (con) = INVALID_TICK;
+	  if (PREDICATED_PAT (con) != NULL_RTX)
+	    haifa_change_pattern (con, ORIG_PAT (con));
+	}
+      else if (QUEUE_INDEX (con) != QUEUE_SCHEDULED)
+	TODO_SPEC (con) = recompute_todo_spec (con);
+    }
+  VEC_free (rtx, heap, recompute_vec);
+}
+
+/* Restore scheduler state from the topmost entry on the backtracking queue.
+   PSCHED_BLOCK_P points to the local data of schedule_block that we must
+   overwrite with the saved data.
+   The caller must already have called unschedule_insns_until.  */
+
+static void
+restore_last_backtrack_point (struct sched_block_state *psched_block)
+{
+  rtx link;
+  int i;
+  struct haifa_saved_data *save = backtrack_queue;
+
+  backtrack_queue = save->next;
+
+  if (current_sched_info->restore_state)
+    (*current_sched_info->restore_state) (save->fe_saved_data);
+
+  if (targetm.sched.alloc_sched_context)
+    {
+      targetm.sched.set_sched_context (save->be_saved_data);
+      targetm.sched.free_sched_context (save->be_saved_data);
+    }
+
+  /* Clear the QUEUE_INDEX of everything in the ready list or one
+     of the queues.  */
+  if (ready.n_ready > 0)
+    {
+      rtx *first = ready_lastpos (&ready);
+      for (i = 0; i < ready.n_ready; i++)
+	{
+	  rtx insn = first[i];
+	  QUEUE_INDEX (insn) = QUEUE_NOWHERE;
+	  INSN_TICK (insn) = INVALID_TICK;
+	}
+    }
+  for (i = 0; i <= max_insn_queue_index; i++)
+    {
+      int q = NEXT_Q_AFTER (q_ptr, i);
+
+      for (link = insn_queue[q]; link; link = XEXP (link, 1))
+	{
+	  rtx x = XEXP (link, 0);
+	  QUEUE_INDEX (x) = QUEUE_NOWHERE;
+	  INSN_TICK (x) = INVALID_TICK;
+	}
+      free_INSN_LIST_list (&insn_queue[q]);
+    }
+
+  free (ready.vec);
+  ready = save->ready;
+
+  if (ready.n_ready > 0)
+    {
+      rtx *first = ready_lastpos (&ready);
+      for (i = 0; i < ready.n_ready; i++)
+	{
+	  rtx insn = first[i];
+	  QUEUE_INDEX (insn) = QUEUE_READY;
+	  TODO_SPEC (insn) = recompute_todo_spec (insn);
+	  INSN_TICK (insn) = save->clock_var;
+	}
+    }
+
+  q_ptr = 0;
+  q_size = save->q_size;
+  for (i = 0; i <= max_insn_queue_index; i++)
+    {
+      int q = NEXT_Q_AFTER (q_ptr, i);
+
+      insn_queue[q] = save->insn_queue[q];
+
+      for (link = insn_queue[q]; link; link = XEXP (link, 1))
+	{
+	  rtx x = XEXP (link, 0);
+	  QUEUE_INDEX (x) = i;
+	  TODO_SPEC (x) = recompute_todo_spec (x);
+	  INSN_TICK (x) = save->clock_var + i;
+	}
+    }
+  free (save->insn_queue);
+
+  toggle_cancelled_flags (true);
+
+  clock_var = save->clock_var;
+  last_clock_var = save->last_clock_var;
+  cycle_issued_insns = save->cycle_issued_insns;
+  last_scheduled_insn = save->last_scheduled_insn;
+  last_nondebug_scheduled_insn = save->last_nondebug_scheduled_insn;
+
+  *psched_block = save->sched_block;
+
+  memcpy (curr_state, save->curr_state, dfa_state_size);
+  free (save->curr_state);
+
+  mark_backtrack_feeds (save->delay_pair->i2, 0);
+
+  free (save);
+
+  for (save = backtrack_queue; save; save = save->next)
+    {
+      mark_backtrack_feeds (save->delay_pair->i2, 1);
+    }
+}
+
+/* Discard all data associated with the topmost entry in the backtrack
+   queue.  If RESET_TICK is false, we just want to free the data.  If true,
+   we are doing this because we discovered a reason to backtrack.  In the
+   latter case, also reset the INSN_TICK for the shadow insn.  */
+static void
+free_topmost_backtrack_point (bool reset_tick)
+{
+  struct haifa_saved_data *save = backtrack_queue;
+  int i;
+
+  backtrack_queue = save->next;
+
+  if (reset_tick)
+    {
+      struct delay_pair *pair = save->delay_pair;
+      while (pair)
+	{
+	  INSN_TICK (pair->i2) = INVALID_TICK;
+	  INSN_EXACT_TICK (pair->i2) = INVALID_TICK;
+	  pair = pair->next_same_i1;
+	}
+    }
+  if (targetm.sched.free_sched_context)
+    targetm.sched.free_sched_context (save->be_saved_data);
+  if (current_sched_info->restore_state)
+    free (save->fe_saved_data);
+  for (i = 0; i <= max_insn_queue_index; i++)
+    free_INSN_LIST_list (&save->insn_queue[i]);
+  free (save->insn_queue);
+  free (save->curr_state);
+  free (save->ready.vec);
+  free (save);
+}
+
+/* Free the entire backtrack queue.  */
+static void
+free_backtrack_queue (void)
+{
+  while (backtrack_queue)
+    free_topmost_backtrack_point (false);
+}
+
+/* Compute INSN_TICK_ESTIMATE for INSN.  PROCESSED is a bitmap of
+   instructions we've previously encountered, a set bit prevents
+   recursion.  BUDGET is a limit on how far ahead we look, it is
+   reduced on recursive calls.  Return true if we produced a good
+   estimate, or false if we exceeded the budget.  */
+static bool
+estimate_insn_tick (bitmap processed, rtx insn, int budget)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int earliest = INSN_TICK (insn);
+
+  FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
+    {
+      rtx pro = DEP_PRO (dep);
+      int t;
+
+      if (DEP_STATUS (dep) & DEP_CANCELLED)
+	continue;
+
+      if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED)
+	gcc_assert (INSN_TICK (pro) + dep_cost (dep) <= INSN_TICK (insn));
+      else
+	{
+	  int cost = dep_cost (dep);
+	  if (cost >= budget)
+	    return false;
+	  if (!bitmap_bit_p (processed, INSN_LUID (pro)))
+	    {
+	      if (!estimate_insn_tick (processed, pro, budget - cost))
+		return false;
+	    }
+	  gcc_assert (INSN_TICK_ESTIMATE (pro) != INVALID_TICK);
+	  t = INSN_TICK_ESTIMATE (pro) + cost;
+	  if (earliest == INVALID_TICK || t > earliest)
+	    earliest = t;
+	}
+    }
+  bitmap_set_bit (processed, INSN_LUID (insn));
+  INSN_TICK_ESTIMATE (insn) = earliest;
+  return true;
+}
+
+/* Examine the pair of insns in P, and estimate (optimistically, assuming
+   infinite resources) the cycle in which the delayed shadow can be issued.
+   Return the number of cycles that must pass before the real insn can be
+   issued in order to meet this constraint.  */
+static int
+estimate_shadow_tick (struct delay_pair *p)
+{
+  bitmap_head processed;
+  int t;
+  bool cutoff;
+  bitmap_initialize (&processed, 0);
+
+  cutoff = !estimate_insn_tick (&processed, p->i2,
+				max_insn_queue_index + pair_delay (p));
+  bitmap_clear (&processed);
+  if (cutoff)
+    return max_insn_queue_index;
+  t = INSN_TICK_ESTIMATE (p->i2) - (clock_var + pair_delay (p) + 1);
+  if (t > 0)
+    return t;
+  return 0;
+}
+
+/* If INSN has no unresolved backwards dependencies, add it to the schedule and
+   recursively resolve all its forward dependencies.  */
+static void
+resolve_dependencies (rtx insn)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  /* Don't use sd_lists_empty_p; it ignores debug insns.  */
+  if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (insn)) != NULL
+      || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (insn)) != NULL)
+    return;
+
+  if (sched_verbose >= 4)
+    fprintf (sched_dump, ";;\tquickly resolving %d\n", INSN_UID (insn));
+
+  if (QUEUE_INDEX (insn) >= 0)
+    queue_remove (insn);
+
+  VEC_safe_push (rtx, heap, scheduled_insns, insn);
+
+  /* Update dependent instructions.  */
+  for (sd_it = sd_iterator_start (insn, SD_LIST_FORW);
+       sd_iterator_cond (&sd_it, &dep);)
+    {
+      rtx next = DEP_CON (dep);
+
+      if (sched_verbose >= 4)
+	fprintf (sched_dump, ";;\t\tdep %d against %d\n", INSN_UID (insn),
+		 INSN_UID (next));
+
+      /* Resolve the dependence between INSN and NEXT.
+	 sd_resolve_dep () moves current dep to another list thus
+	 advancing the iterator.  */
+      sd_resolve_dep (sd_it);
+
+      if (!IS_SPECULATION_BRANCHY_CHECK_P (insn))
+	{
+	  resolve_dependencies (next);
+	}
+      else
+	/* Check always has only one forward dependence (to the first insn in
+	   the recovery block), therefore, this will be executed only once.  */
+	{
+	  gcc_assert (sd_lists_empty_p (insn, SD_LIST_FORW));
+	}
+    }
+}
+
+
+/* Return the head and tail pointers of ebb starting at BEG and ending
+   at END.  */
+void
+get_ebb_head_tail (basic_block beg, basic_block end, rtx *headp, rtx *tailp)
+{
+  rtx beg_head = BB_HEAD (beg);
+  rtx beg_tail = BB_END (beg);
+  rtx end_head = BB_HEAD (end);
+  rtx end_tail = BB_END (end);
+
+  /* Don't include any notes or labels at the beginning of the BEG
+     basic block, or notes at the end of the END basic blocks.  */
+
+  if (LABEL_P (beg_head))
     beg_head = NEXT_INSN (beg_head);
 
   while (beg_head != beg_tail)
-    if (NOTE_P (beg_head) || BOUNDARY_DEBUG_INSN_P (beg_head))
+    if (NOTE_P (beg_head))
       beg_head = NEXT_INSN (beg_head);
+    else if (DEBUG_INSN_P (beg_head))
+      {
+	rtx note, next;
+
+	for (note = NEXT_INSN (beg_head);
+	     note != beg_tail;
+	     note = next)
+	  {
+	    next = NEXT_INSN (note);
+	    if (NOTE_P (note))
+	      {
+		if (sched_verbose >= 9)
+		  fprintf (sched_dump, "reorder %i\n", INSN_UID (note));
+
+		reorder_insns_nobb (note, note, PREV_INSN (beg_head));
+
+		if (BLOCK_FOR_INSN (note) != beg)
+		  df_insn_change_bb (note, beg);
+	      }
+	    else if (!DEBUG_INSN_P (note))
+	      break;
+	  }
+
+	break;
+      }
     else
       break;
 
@@ -1929,8 +3013,36 @@ get_ebb_head_tail (basic_block beg, basic_block end, rtx *headp, rtx *tailp)
     end_head = NEXT_INSN (end_head);
 
   while (end_head != end_tail)
-    if (NOTE_P (end_tail) || BOUNDARY_DEBUG_INSN_P (end_tail))
+    if (NOTE_P (end_tail))
       end_tail = PREV_INSN (end_tail);
+    else if (DEBUG_INSN_P (end_tail))
+      {
+	rtx note, prev;
+
+	for (note = PREV_INSN (end_tail);
+	     note != end_head;
+	     note = prev)
+	  {
+	    prev = PREV_INSN (note);
+	    if (NOTE_P (note))
+	      {
+		if (sched_verbose >= 9)
+		  fprintf (sched_dump, "reorder %i\n", INSN_UID (note));
+
+		reorder_insns_nobb (note, note, end_tail);
+
+		if (end_tail == BB_END (end))
+		  BB_END (end) = note;
+
+		if (BLOCK_FOR_INSN (note) != end)
+		  df_insn_change_bb (note, end);
+	      }
+	    else if (!DEBUG_INSN_P (note))
+	      break;
+	  }
+
+	break;
+      }
     else
       break;
 
@@ -1944,8 +3056,7 @@ no_real_insns_p (const_rtx head, const_rtx tail)
 {
   while (head != NEXT_INSN (tail))
     {
-      if (!NOTE_P (head) && !LABEL_P (head)
-	  && !BOUNDARY_DEBUG_INSN_P (head))
+      if (!NOTE_P (head) && !LABEL_P (head))
 	return 0;
       head = NEXT_INSN (head);
     }
@@ -2000,9 +3111,16 @@ queue_to_ready (struct ready_list *ready)
   q_ptr = NEXT_Q (q_ptr);
 
   if (dbg_cnt (sched_insn) == false)
-    /* If debug counter is activated do not requeue insn next after
-       last_scheduled_insn.  */
-    skip_insn = next_nonnote_nondebug_insn (last_scheduled_insn);
+    {
+      /* If debug counter is activated do not requeue the first
+	 nonscheduled insn.  */
+      skip_insn = nonscheduled_insns_begin;
+      do
+	{
+	  skip_insn = next_nonnote_nondebug_insn (skip_insn);
+	}
+      while (QUEUE_INDEX (skip_insn) == QUEUE_SCHEDULED);
+    }
   else
     skip_insn = NULL_RTX;
 
@@ -2023,11 +3141,7 @@ queue_to_ready (struct ready_list *ready)
 	  && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
 	  && !SCHED_GROUP_P (insn)
 	  && insn != skip_insn)
-	{
-	  if (sched_verbose >= 2)
-	    fprintf (sched_dump, "requeued because ready full\n");
-	  queue_insn (insn, 1);
-	}
+	queue_insn (insn, 1, "ready full");
       else
 	{
 	  ready_add (ready, insn, false);
@@ -2089,22 +3203,18 @@ queue_to_ready (struct ready_list *ready)
 static bool
 ok_for_early_queue_removal (rtx insn)
 {
-  int n_cycles;
-  rtx prev_insn = last_scheduled_insn;
-
   if (targetm.sched.is_costly_dependence)
     {
+      rtx prev_insn;
+      int n_cycles;
+      int i = VEC_length (rtx, scheduled_insns);
       for (n_cycles = flag_sched_stalled_insns_dep; n_cycles; n_cycles--)
 	{
-	  for ( ; prev_insn; prev_insn = PREV_INSN (prev_insn))
+	  while (i-- > 0)
 	    {
 	      int cost;
 
-	      if (prev_insn == current_sched_info->prev_head)
-		{
-		  prev_insn = NULL;
-		  break;
-		}
+	      prev_insn = VEC_index (rtx, scheduled_insns, i);
 
 	      if (!NOTE_P (prev_insn))
 		{
@@ -2126,9 +3236,8 @@ ok_for_early_queue_removal (rtx insn)
 		break;
 	    }
 
-	  if (!prev_insn)
+	  if (i == 0)
 	    break;
-	  prev_insn = PREV_INSN (prev_insn);
 	}
     }
 
@@ -2390,6 +3499,12 @@ insn_finishes_cycle_p (rtx insn)
   return false;
 }
 
+/* Define type for target data used in multipass scheduling.  */
+#ifndef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T
+# define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T int
+#endif
+typedef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T first_cycle_multipass_data_t;
+
 /* The following structure describe an entry of the stack of choices.  */
 struct choice_entry
 {
@@ -2401,17 +3516,14 @@ struct choice_entry
   int n;
   /* State after issuing the insn.  */
   state_t state;
+  /* Target-specific data.  */
+  first_cycle_multipass_data_t target_data;
 };
 
 /* The following array is used to implement a stack of choices used in
    function max_issue.  */
 static struct choice_entry *choice_stack;
 
-/* The following variable value is number of essential insns issued on
-   the current cycle.  An insn is essential one if it changes the
-   processors state.  */
-int cycle_issued_insns;
-
 /* This holds the value of the target dfa_lookahead hook.  */
 int dfa_lookahead;
 
@@ -2451,7 +3563,7 @@ static int cached_issue_rate = 0;
    CLOBBERs, etc must be filtered elsewhere.  */
 int
 max_issue (struct ready_list *ready, int privileged_n, state_t state,
-	   int *index)
+	   bool first_cycle_insn_p, int *index)
 {
   int n, i, all, n_ready, best, delay, tries_num;
   int more_issue;
@@ -2484,6 +3596,10 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
   memcpy (top->state, state, dfa_state_size);
   top->rest = dfa_lookahead;
   top->n = 0;
+  if (targetm.sched.first_cycle_multipass_begin)
+    targetm.sched.first_cycle_multipass_begin (&top->target_data,
+					       ready_try, n_ready,
+					       first_cycle_insn_p);
 
   /* Count the number of the insns to search among.  */
   for (all = i = 0; i < n_ready; i++)
@@ -2518,7 +3634,8 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
 		{
 		  n = privileged_n;
 		  /* Try to find issued privileged insn.  */
-		  while (n && !ready_try[--n]);
+		  while (n && !ready_try[--n])
+		    ;
 		}
 
 	      if (/* If all insns are equally good...  */
@@ -2542,6 +3659,11 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
 
 	  /* Backtrack.  */
 	  ready_try [i] = 0;
+
+	  if (targetm.sched.first_cycle_multipass_backtrack)
+	    targetm.sched.first_cycle_multipass_backtrack (&top->target_data,
+							   ready_try, n_ready);
+
 	  top--;
 	  memcpy (state, top->state, dfa_state_size);
 	}
@@ -2556,8 +3678,8 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
 	    {
 	      if (state_dead_lock_p (state)
 		  || insn_finishes_cycle_p (insn))
- 		/* We won't issue any more instructions in the next
- 		   choice_state.  */
+		/* We won't issue any more instructions in the next
+		   choice_state.  */
 		top->rest = 0;
 	      else
 		top->rest--;
@@ -2573,8 +3695,15 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
 	      top->index = i;
 	      top->n = n;
 	      memcpy (top->state, state, dfa_state_size);
-
 	      ready_try [i] = 1;
+
+	      if (targetm.sched.first_cycle_multipass_issue)
+		targetm.sched.first_cycle_multipass_issue (&top->target_data,
+							   ready_try, n_ready,
+							   insn,
+							   &((top - 1)
+							     ->target_data));
+
 	      i = -1;
 	    }
 	}
@@ -2583,6 +3712,11 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
       i++;
     }
 
+  if (targetm.sched.first_cycle_multipass_end)
+    targetm.sched.first_cycle_multipass_end (best != 0
+					     ? &choice_stack[1].target_data
+					     : NULL);
+
   /* Restore the original state of the DFA.  */
   memcpy (state, choice_stack->state, dfa_state_size);
 
@@ -2597,19 +3731,24 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
    0 if INSN_PTR is set to point to the desirable insn,
    1 if choose_ready () should be restarted without advancing the cycle.  */
 static int
-choose_ready (struct ready_list *ready, rtx *insn_ptr)
+choose_ready (struct ready_list *ready, bool first_cycle_insn_p,
+	      rtx *insn_ptr)
 {
   int lookahead;
 
   if (dbg_cnt (sched_insn) == false)
     {
-      rtx insn;
-
-      insn = next_nonnote_insn (last_scheduled_insn);
+      rtx insn = nonscheduled_insns_begin;
+      do
+	{
+	  insn = next_nonnote_insn (insn);
+	}
+      while (QUEUE_INDEX (insn) == QUEUE_SCHEDULED);
 
       if (QUEUE_INDEX (insn) == QUEUE_READY)
 	/* INSN is in the ready_list.  */
 	{
+	  nonscheduled_insns_begin = insn;
 	  ready_remove_insn (insn);
 	  *insn_ptr = insn;
 	  return 0;
@@ -2726,7 +3865,7 @@ choose_ready (struct ready_list *ready, rtx *insn_ptr)
 		     (insn)));
 	  }
 
-      if (max_issue (ready, 1, curr_state, &index) == 0)
+      if (max_issue (ready, 1, curr_state, first_cycle_insn_p, &index) == 0)
 	{
 	  *insn_ptr = ready_remove_first (ready);
 	  if (sched_verbose >= 4)
@@ -2747,15 +3886,215 @@ choose_ready (struct ready_list *ready, rtx *insn_ptr)
     }
 }
 
+/* This function is called when we have successfully scheduled a
+   block.  It uses the schedule stored in the scheduled_insns vector
+   to rearrange the RTL.  PREV_HEAD is used as the anchor to which we
+   append the scheduled insns; TAIL is the insn after the scheduled
+   block.  TARGET_BB is the argument passed to schedule_block.  */
+
+static void
+commit_schedule (rtx prev_head, rtx tail, basic_block *target_bb)
+{
+  unsigned int i;
+  rtx insn;
+
+  last_scheduled_insn = prev_head;
+  for (i = 0;
+       VEC_iterate (rtx, scheduled_insns, i, insn);
+       i++)
+    {
+      if (control_flow_insn_p (last_scheduled_insn)
+	  || current_sched_info->advance_target_bb (*target_bb, insn))
+	{
+	  *target_bb = current_sched_info->advance_target_bb (*target_bb, 0);
+
+	  if (sched_verbose)
+	    {
+	      rtx x;
+
+	      x = next_real_insn (last_scheduled_insn);
+	      gcc_assert (x);
+	      dump_new_block_header (1, *target_bb, x, tail);
+	    }
+
+	  last_scheduled_insn = bb_note (*target_bb);
+	}
+
+      if (current_sched_info->begin_move_insn)
+	(*current_sched_info->begin_move_insn) (insn, last_scheduled_insn);
+      move_insn (insn, last_scheduled_insn,
+		 current_sched_info->next_tail);
+      if (!DEBUG_INSN_P (insn))
+	reemit_notes (insn);
+      last_scheduled_insn = insn;
+    }
+
+  VEC_truncate (rtx, scheduled_insns, 0);
+}
+
+/* Examine all insns on the ready list and queue those which can't be
+   issued in this cycle.  TEMP_STATE is temporary scheduler state we
+   can use as scratch space.  If FIRST_CYCLE_INSN_P is true, no insns
+   have been issued for the current cycle, which means it is valid to
+   issue an asm statement.
+
+   If SHADOWS_ONLY_P is true, we eliminate all real insns and only
+   leave those for which SHADOW_P is true.  If MODULO_EPILOGUE is true,
+   we only leave insns which have an INSN_EXACT_TICK.  */
+
+static void
+prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
+		  bool shadows_only_p, bool modulo_epilogue_p)
+{
+  int i;
+  bool sched_group_found = false;
+
+ restart:
+  for (i = 0; i < ready.n_ready; i++)
+    {
+      rtx insn = ready_element (&ready, i);
+      int cost = 0;
+      const char *reason = "resource conflict";
+
+      if (DEBUG_INSN_P (insn))
+	continue;
+
+      if (SCHED_GROUP_P (insn) && !sched_group_found)
+	{
+	  sched_group_found = true;
+	  if (i > 0)
+	    goto restart;
+	}
+
+      if (sched_group_found && !SCHED_GROUP_P (insn))
+	{
+	  cost = 1;
+	  reason = "not in sched group";
+	}
+      else if (modulo_epilogue_p && INSN_EXACT_TICK (insn) == INVALID_TICK)
+	{
+	  cost = max_insn_queue_index;
+	  reason = "not an epilogue insn";
+	}
+      else if (shadows_only_p && !SHADOW_P (insn))
+	{
+	  cost = 1;
+	  reason = "not a shadow";
+	}
+      else if (recog_memoized (insn) < 0)
+	{
+	  if (!first_cycle_insn_p
+	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		  || asm_noperands (PATTERN (insn)) >= 0))
+	    cost = 1;
+	  reason = "asm";
+	}
+      else if (sched_pressure_p)
+	cost = 0;
+      else
+	{
+	  int delay_cost = 0;
+
+	  if (delay_htab)
+	    {
+	      struct delay_pair *delay_entry;
+	      delay_entry
+		= (struct delay_pair *)htab_find_with_hash (delay_htab, insn,
+							    htab_hash_pointer (insn));
+	      while (delay_entry && delay_cost == 0)
+		{
+		  delay_cost = estimate_shadow_tick (delay_entry);
+		  if (delay_cost > max_insn_queue_index)
+		    delay_cost = max_insn_queue_index;
+		  delay_entry = delay_entry->next_same_i1;
+		}
+	    }
+
+	  memcpy (temp_state, curr_state, dfa_state_size);
+	  cost = state_transition (temp_state, insn);
+	  if (cost < 0)
+	    cost = 0;
+	  else if (cost == 0)
+	    cost = 1;
+	  if (cost < delay_cost)
+	    {
+	      cost = delay_cost;
+	      reason = "shadow tick";
+	    }
+	}
+      if (cost >= 1)
+	{
+	  ready_remove (&ready, i);
+	  queue_insn (insn, cost, reason);
+	  goto restart;
+	}
+    }
+}
+
+/* Called when we detect that the schedule is impossible.  We examine the
+   backtrack queue to find the earliest insn that caused this condition.  */
+
+static struct haifa_saved_data *
+verify_shadows (void)
+{
+  struct haifa_saved_data *save, *earliest_fail = NULL;
+  for (save = backtrack_queue; save; save = save->next)
+    {
+      int t;
+      struct delay_pair *pair = save->delay_pair;
+      rtx i1 = pair->i1;
+
+      for (; pair; pair = pair->next_same_i1)
+	{
+	  rtx i2 = pair->i2;
+
+	  if (QUEUE_INDEX (i2) == QUEUE_SCHEDULED)
+	    continue;
+
+	  t = INSN_TICK (i1) + pair_delay (pair);
+	  if (t < clock_var)
+	    {
+	      if (sched_verbose >= 2)
+		fprintf (sched_dump,
+			 ";;\t\tfailed delay requirements for %d/%d (%d->%d)"
+			 ", not ready\n",
+			 INSN_UID (pair->i1), INSN_UID (pair->i2),
+			 INSN_TICK (pair->i1), INSN_EXACT_TICK (pair->i2));
+	      earliest_fail = save;
+	      break;
+	    }
+	  if (QUEUE_INDEX (i2) >= 0)
+	    {
+	      int queued_for = INSN_TICK (i2);
+
+	      if (t < queued_for)
+		{
+		  if (sched_verbose >= 2)
+		    fprintf (sched_dump,
+			     ";;\t\tfailed delay requirements for %d/%d"
+			     " (%d->%d), queued too late\n",
+			     INSN_UID (pair->i1), INSN_UID (pair->i2),
+			     INSN_TICK (pair->i1), INSN_EXACT_TICK (pair->i2));
+		  earliest_fail = save;
+		  break;
+		}
+	    }
+	}
+    }
+
+  return earliest_fail;
+}
+
 /* Use forward list scheduling to rearrange insns of block pointed to by
    TARGET_BB, possibly bringing insns from subsequent blocks in the same
    region.  */
 
-void
+bool
 schedule_block (basic_block *target_bb)
 {
-  int i, first_cycle_insn_p;
-  int can_issue_more;
+  int i;
+  bool success = modulo_ii == 0;
+  struct sched_block_state ls;
   state_t temp_state = NULL;  /* It is used for multipass scheduling.  */
   int sort_p, advance, start_clock_var;
 
@@ -2776,6 +4115,8 @@ schedule_block (basic_block *target_bb)
 
   haifa_recovery_bb_recently_added_p = false;
 
+  backtrack_queue = NULL;
+
   /* Debug info.  */
   if (sched_verbose)
     dump_new_block_header (0, *target_bb, head, tail);
@@ -2794,10 +4135,11 @@ schedule_block (basic_block *target_bb)
     targetm.sched.init (sched_dump, sched_verbose, ready.veclen);
 
   /* We start inserting insns after PREV_HEAD.  */
-  last_scheduled_insn = prev_head;
+  last_scheduled_insn = nonscheduled_insns_begin = prev_head;
+  last_nondebug_scheduled_insn = NULL_RTX;
 
   gcc_assert ((NOTE_P (last_scheduled_insn)
-	       || BOUNDARY_DEBUG_INSN_P (last_scheduled_insn))
+	       || DEBUG_INSN_P (last_scheduled_insn))
 	      && BLOCK_FOR_INSN (last_scheduled_insn) == *target_bb);
 
   /* Initialize INSN_QUEUE.  Q_SIZE is the total number of insns in the
@@ -2839,12 +4181,12 @@ schedule_block (basic_block *target_bb)
 
       /* Delay all insns past it for 1 cycle.  If debug counter is
 	 activated make an exception for the insn right after
-	 last_scheduled_insn.  */
+	 nonscheduled_insns_begin.  */
       {
 	rtx skip_insn;
 
 	if (dbg_cnt (sched_insn) == false)
-	  skip_insn = next_nonnote_insn (last_scheduled_insn);
+	  skip_insn = next_nonnote_insn (nonscheduled_insns_begin);
 	else
 	  skip_insn = NULL_RTX;
 
@@ -2855,8 +4197,10 @@ schedule_block (basic_block *target_bb)
 	    insn = ready_remove (&ready, i);
 
 	    if (insn != skip_insn)
-	      queue_insn (insn, 1);
+	      queue_insn (insn, 1, "list truncated");
 	  }
+	if (skip_insn)
+	  ready_add (&ready, skip_insn, true);
       }
     }
 
@@ -2867,7 +4211,13 @@ schedule_block (basic_block *target_bb)
 
   advance = 0;
 
+  gcc_assert (VEC_length (rtx, scheduled_insns) == 0);
   sort_p = TRUE;
+  must_backtrack = false;
+  modulo_insns_scheduled = 0;
+
+  ls.modulo_epilogue = false;
+
   /* Loop until all the insns in BB are scheduled.  */
   while ((*current_sched_info->schedule_more_p) ())
     {
@@ -2896,78 +4246,114 @@ schedule_block (basic_block *target_bb)
 	}
       while (advance > 0);
 
-      if (sort_p)
+      if (ls.modulo_epilogue)
 	{
-	  /* Sort the ready list based on priority.  */
-	  ready_sort (&ready);
-
-	  if (sched_verbose >= 2)
+	  int stage = clock_var / modulo_ii;
+	  if (stage > modulo_last_stage * 2 + 2)
 	    {
-	      fprintf (sched_dump, ";;\t\tReady list after ready_sort:  ");
-	      debug_ready_list (&ready);
+	      if (sched_verbose >= 2)
+		fprintf (sched_dump,
+			 ";;\t\tmodulo scheduled succeeded at II %d\n",
+			 modulo_ii);
+	      success = true;
+	      goto end_schedule;
 	    }
 	}
-
-      /* We don't want md sched reorder to even see debug isns, so put
-	 them out right away.  */
-      if (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0)))
+      else if (modulo_ii > 0)
 	{
-	  if (control_flow_insn_p (last_scheduled_insn))
+	  int stage = clock_var / modulo_ii;
+	  if (stage > modulo_max_stages)
 	    {
-	      *target_bb = current_sched_info->advance_target_bb
-		(*target_bb, 0);
-
-	      if (sched_verbose)
-		{
-		  rtx x;
-
-		  x = next_real_insn (last_scheduled_insn);
-		  gcc_assert (x);
-		  dump_new_block_header (1, *target_bb, x, tail);
-		}
-
-	      last_scheduled_insn = bb_note (*target_bb);
+	      if (sched_verbose >= 2)
+		fprintf (sched_dump,
+			 ";;\t\tfailing schedule due to excessive stages\n");
+	      goto end_schedule;
 	    }
-
-	  while (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0)))
+	  if (modulo_n_insns == modulo_insns_scheduled
+	      && stage > modulo_last_stage)
 	    {
-	      rtx insn = ready_remove_first (&ready);
-	      gcc_assert (DEBUG_INSN_P (insn));
-	      (*current_sched_info->begin_schedule_ready) (insn,
-							   last_scheduled_insn);
-	      move_insn (insn, last_scheduled_insn,
-			 current_sched_info->next_tail);
-	      last_scheduled_insn = insn;
-	      advance = schedule_insn (insn);
-	      gcc_assert (advance == 0);
-	      if (ready.n_ready > 0)
-		ready_sort (&ready);
+	      if (sched_verbose >= 2)
+		fprintf (sched_dump,
+			 ";;\t\tfound kernel after %d stages, II %d\n",
+			 stage, modulo_ii);
+	      ls.modulo_epilogue = true;
 	    }
-
-	  if (!ready.n_ready)
-	    continue;
 	}
 
-      /* Allow the target to reorder the list, typically for
-	 better instruction bundling.  */
-      if (sort_p && targetm.sched.reorder
-	  && (ready.n_ready == 0
-	      || !SCHED_GROUP_P (ready_element (&ready, 0))))
-	can_issue_more =
-	  targetm.sched.reorder (sched_dump, sched_verbose,
-				 ready_lastpos (&ready),
-				 &ready.n_ready, clock_var);
-      else
-	can_issue_more = issue_rate;
+      prune_ready_list (temp_state, true, false, ls.modulo_epilogue);
+      if (ready.n_ready == 0)
+	continue;
+      if (must_backtrack)
+	goto do_backtrack;
 
-      first_cycle_insn_p = 1;
+      ls.first_cycle_insn_p = true;
+      ls.shadows_only_p = false;
       cycle_issued_insns = 0;
+      ls.can_issue_more = issue_rate;
       for (;;)
 	{
 	  rtx insn;
 	  int cost;
-	  bool asm_p = false;
+	  bool asm_p;
 
+	  if (sort_p && ready.n_ready > 0)
+	    {
+	      /* Sort the ready list based on priority.  This must be
+		 done every iteration through the loop, as schedule_insn
+		 may have readied additional insns that will not be
+		 sorted correctly.  */
+	      ready_sort (&ready);
+
+	      if (sched_verbose >= 2)
+		{
+		  fprintf (sched_dump, ";;\t\tReady list after ready_sort:  ");
+		  debug_ready_list (&ready);
+		}
+	    }
+
+	  /* We don't want md sched reorder to even see debug isns, so put
+	     them out right away.  */
+	  if (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0))
+	      && (*current_sched_info->schedule_more_p) ())
+	    {
+	      while (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0)))
+		{
+		  rtx insn = ready_remove_first (&ready);
+		  gcc_assert (DEBUG_INSN_P (insn));
+		  (*current_sched_info->begin_schedule_ready) (insn);
+		  VEC_safe_push (rtx, heap, scheduled_insns, insn);
+		  last_scheduled_insn = insn;
+		  advance = schedule_insn (insn);
+		  gcc_assert (advance == 0);
+		  if (ready.n_ready > 0)
+		    ready_sort (&ready);
+		}
+	    }
+
+	  if (ls.first_cycle_insn_p && !ready.n_ready)
+	    break;
+
+	resume_after_backtrack:
+	  /* Allow the target to reorder the list, typically for
+	     better instruction bundling.  */
+	  if (sort_p
+	      && (ready.n_ready == 0
+		  || !SCHED_GROUP_P (ready_element (&ready, 0))))
+	    {
+	      if (ls.first_cycle_insn_p && targetm.sched.reorder)
+		ls.can_issue_more
+		  = targetm.sched.reorder (sched_dump, sched_verbose,
+					   ready_lastpos (&ready),
+					   &ready.n_ready, clock_var);
+	      else if (!ls.first_cycle_insn_p && targetm.sched.reorder2)
+		ls.can_issue_more
+		  = targetm.sched.reorder2 (sched_dump, sched_verbose,
+					    ready.n_ready
+					    ? ready_lastpos (&ready) : NULL,
+					    &ready.n_ready, clock_var);
+	    }
+
+	restart_choose_ready:
 	  if (sched_verbose >= 2)
 	    {
 	      fprintf (sched_dump, ";;\tReady list (t = %3d):  ",
@@ -2978,7 +4364,7 @@ schedule_block (basic_block *target_bb)
 	    }
 
 	  if (ready.n_ready == 0
-	      && can_issue_more
+	      && ls.can_issue_more
 	      && reload_completed)
 	    {
 	      /* Allow scheduling insns directly from the queue in case
@@ -2992,7 +4378,7 @@ schedule_block (basic_block *target_bb)
 	    }
 
 	  if (ready.n_ready == 0
-	      || !can_issue_more
+	      || !ls.can_issue_more
 	      || state_dead_lock_p (curr_state)
 	      || !(*current_sched_info->schedule_more_p) ())
 	    break;
@@ -3003,14 +4389,13 @@ schedule_block (basic_block *target_bb)
 	      int res;
 
 	      insn = NULL_RTX;
-	      res = choose_ready (&ready, &insn);
+	      res = choose_ready (&ready, ls.first_cycle_insn_p, &insn);
 
 	      if (res < 0)
 		/* Finish cycle.  */
 		break;
 	      if (res > 0)
-		/* Restart choose_ready ().  */
-		continue;
+		goto restart_choose_ready;
 
 	      gcc_assert (insn != NULL_RTX);
 	    }
@@ -3045,172 +4430,185 @@ schedule_block (basic_block *target_bb)
 	    }
 
 	  sort_p = TRUE;
-	  memcpy (temp_state, curr_state, dfa_state_size);
-	  if (recog_memoized (insn) < 0)
-	    {
-	      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
-		       || asm_noperands (PATTERN (insn)) >= 0);
-	      if (!first_cycle_insn_p && asm_p)
-		/* This is asm insn which is tried to be issued on the
-		   cycle not first.  Issue it on the next cycle.  */
-		cost = 1;
-	      else
-		/* A USE insn, or something else we don't need to
-		   understand.  We can't pass these directly to
-		   state_transition because it will trigger a
-		   fatal error for unrecognizable insns.  */
-		cost = 0;
-	    }
-	  else if (sched_pressure_p)
-	    cost = 0;
-	  else
-	    {
-	      cost = state_transition (temp_state, insn);
-	      if (cost < 0)
-		cost = 0;
-	      else if (cost == 0)
-		cost = 1;
-	    }
-
-	  if (cost >= 1)
-	    {
-	      queue_insn (insn, cost);
- 	      if (SCHED_GROUP_P (insn))
- 		{
- 		  advance = cost;
- 		  break;
- 		}
-
-	      continue;
-	    }
 
 	  if (current_sched_info->can_schedule_ready_p
 	      && ! (*current_sched_info->can_schedule_ready_p) (insn))
 	    /* We normally get here only if we don't want to move
 	       insn from the split block.  */
 	    {
-	      TODO_SPEC (insn) = (TODO_SPEC (insn) & ~SPECULATIVE) | HARD_DEP;
-	      continue;
+	      TODO_SPEC (insn) = HARD_DEP;
+	      goto restart_choose_ready;
 	    }
 
-	  /* DECISION is made.  */
-
-          if (TODO_SPEC (insn) & SPECULATIVE)
-            generate_recovery_code (insn);
-
-	  if (control_flow_insn_p (last_scheduled_insn)
-	      /* This is used to switch basic blocks by request
-		 from scheduler front-end (actually, sched-ebb.c only).
-		 This is used to process blocks with single fallthru
-		 edge.  If succeeding block has jump, it [jump] will try
-		 move at the end of current bb, thus corrupting CFG.  */
-	      || current_sched_info->advance_target_bb (*target_bb, insn))
+	  if (delay_htab)
 	    {
-	      *target_bb = current_sched_info->advance_target_bb
-		(*target_bb, 0);
-
-	      if (sched_verbose)
+	      /* If this insn is the first part of a delay-slot pair, record a
+		 backtrack point.  */
+	      struct delay_pair *delay_entry;
+	      delay_entry
+		= (struct delay_pair *)htab_find_with_hash (delay_htab, insn,
+							    htab_hash_pointer (insn));
+	      if (delay_entry)
 		{
-		  rtx x;
-
-		  x = next_real_insn (last_scheduled_insn);
-		  gcc_assert (x);
-		  dump_new_block_header (1, *target_bb, x, tail);
+		  save_backtrack_point (delay_entry, ls);
+		  if (sched_verbose >= 2)
+		    fprintf (sched_dump, ";;\t\tsaving backtrack point\n");
 		}
-
-	      last_scheduled_insn = bb_note (*target_bb);
 	    }
 
-	  /* Update counters, etc in the scheduler's front end.  */
-	  (*current_sched_info->begin_schedule_ready) (insn,
-						       last_scheduled_insn);
+	  /* DECISION is made.  */
 
-	  move_insn (insn, last_scheduled_insn, current_sched_info->next_tail);
+	  if (modulo_ii > 0 && INSN_UID (insn) < modulo_iter0_max_uid)
+	    {
+	      modulo_insns_scheduled++;
+	      modulo_last_stage = clock_var / modulo_ii;
+	    }
+          if (TODO_SPEC (insn) & SPECULATIVE)
+            generate_recovery_code (insn);
 
 	  if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON))
 	    targetm.sched.dispatch_do (insn, ADD_TO_DISPATCH_WINDOW);
 
-	  reemit_notes (insn);
-	  last_scheduled_insn = insn;
+	  /* Update counters, etc in the scheduler's front end.  */
+	  (*current_sched_info->begin_schedule_ready) (insn);
+	  VEC_safe_push (rtx, heap, scheduled_insns, insn);
+	  gcc_assert (NONDEBUG_INSN_P (insn));
+	  last_nondebug_scheduled_insn = last_scheduled_insn = insn;
 
-	  if (memcmp (curr_state, temp_state, dfa_state_size) != 0)
-            {
-              cycle_issued_insns++;
-              memcpy (curr_state, temp_state, dfa_state_size);
-            }
+	  if (recog_memoized (insn) >= 0)
+	    {
+	      memcpy (temp_state, curr_state, dfa_state_size);
+	      cost = state_transition (curr_state, insn);
+	      if (!sched_pressure_p)
+		gcc_assert (cost < 0);
+	      if (memcmp (temp_state, curr_state, dfa_state_size) != 0)
+		cycle_issued_insns++;
+	      asm_p = false;
+	    }
+	  else
+	    asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		     || asm_noperands (PATTERN (insn)) >= 0);
 
 	  if (targetm.sched.variable_issue)
-	    can_issue_more =
+	    ls.can_issue_more =
 	      targetm.sched.variable_issue (sched_dump, sched_verbose,
-					    insn, can_issue_more);
+					    insn, ls.can_issue_more);
 	  /* A naked CLOBBER or USE generates no instruction, so do
 	     not count them against the issue rate.  */
 	  else if (GET_CODE (PATTERN (insn)) != USE
 		   && GET_CODE (PATTERN (insn)) != CLOBBER)
-	    can_issue_more--;
+	    ls.can_issue_more--;
 	  advance = schedule_insn (insn);
 
+	  if (SHADOW_P (insn))
+	    ls.shadows_only_p = true;
+
 	  /* After issuing an asm insn we should start a new cycle.  */
 	  if (advance == 0 && asm_p)
 	    advance = 1;
-	  if (advance != 0)
+
+	  if (must_backtrack)
 	    break;
 
-	  first_cycle_insn_p = 0;
+	  if (advance != 0)
+	    break;
 
-	  /* Sort the ready list based on priority.  This must be
-	     redone here, as schedule_insn may have readied additional
-	     insns that will not be sorted correctly.  */
+	  ls.first_cycle_insn_p = false;
 	  if (ready.n_ready > 0)
-	    ready_sort (&ready);
+	    prune_ready_list (temp_state, false, ls.shadows_only_p,
+			      ls.modulo_epilogue);
+	}
 
-	  /* Quickly go through debug insns such that md sched
-	     reorder2 doesn't have to deal with debug insns.  */
-	  if (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0))
-	      && (*current_sched_info->schedule_more_p) ())
+    do_backtrack:
+      if (!must_backtrack)
+	for (i = 0; i < ready.n_ready; i++)
+	  {
+	    rtx insn = ready_element (&ready, i);
+	    if (INSN_EXACT_TICK (insn) == clock_var)
+	      {
+		must_backtrack = true;
+		clock_var++;
+		break;
+	      }
+	  }
+      if (must_backtrack && modulo_ii > 0)
+	{
+	  if (modulo_backtracks_left == 0)
+	    goto end_schedule;
+	  modulo_backtracks_left--;
+	}
+      while (must_backtrack)
+	{
+	  struct haifa_saved_data *failed;
+	  rtx failed_insn;
+
+	  must_backtrack = false;
+	  failed = verify_shadows ();
+	  gcc_assert (failed);
+
+	  failed_insn = failed->delay_pair->i1;
+	  toggle_cancelled_flags (false);
+	  unschedule_insns_until (failed_insn);
+	  while (failed != backtrack_queue)
+	    free_topmost_backtrack_point (true);
+	  restore_last_backtrack_point (&ls);
+	  if (sched_verbose >= 2)
+	    fprintf (sched_dump, ";;\t\trewind to cycle %d\n", clock_var);
+	  /* Delay by at least a cycle.  This could cause additional
+	     backtracking.  */
+	  queue_insn (failed_insn, 1, "backtracked");
+	  advance = 0;
+	  if (must_backtrack)
+	    continue;
+	  if (ready.n_ready > 0)
+	    goto resume_after_backtrack;
+	  else
 	    {
-	      if (control_flow_insn_p (last_scheduled_insn))
-		{
-		  *target_bb = current_sched_info->advance_target_bb
-		    (*target_bb, 0);
-
-		  if (sched_verbose)
-		    {
-		      rtx x;
-
-		      x = next_real_insn (last_scheduled_insn);
-		      gcc_assert (x);
-		      dump_new_block_header (1, *target_bb, x, tail);
-		    }
-
-		  last_scheduled_insn = bb_note (*target_bb);
-		}
+	      if (clock_var == 0 && ls.first_cycle_insn_p)
+		goto end_schedule;
+	      advance = 1;
+	      break;
+	    }
+	}
+    }
+  if (ls.modulo_epilogue)
+    success = true;
+ end_schedule:
+  if (modulo_ii > 0)
+    {
+      /* Once again, debug insn suckiness: they can be on the ready list
+	 even if they have unresolved dependencies.  To make our view
+	 of the world consistent, remove such "ready" insns.  */
+    restart_debug_insn_loop:
+      for (i = ready.n_ready - 1; i >= 0; i--)
+	{
+	  rtx x;
 
- 	      while (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0)))
-		{
-		  insn = ready_remove_first (&ready);
-		  gcc_assert (DEBUG_INSN_P (insn));
-		  (*current_sched_info->begin_schedule_ready)
-		    (insn, last_scheduled_insn);
-		  move_insn (insn, last_scheduled_insn,
-			     current_sched_info->next_tail);
-		  advance = schedule_insn (insn);
-		  last_scheduled_insn = insn;
-		  gcc_assert (advance == 0);
-		  if (ready.n_ready > 0)
-		    ready_sort (&ready);
-		}
+	  x = ready_element (&ready, i);
+	  if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (x)) != NULL
+	      || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (x)) != NULL)
+	    {
+	      ready_remove (&ready, i);
+	      goto restart_debug_insn_loop;
 	    }
+	}
+      for (i = ready.n_ready - 1; i >= 0; i--)
+	{
+	  rtx x;
 
-	  if (targetm.sched.reorder2
-	      && (ready.n_ready == 0
-		  || !SCHED_GROUP_P (ready_element (&ready, 0))))
+	  x = ready_element (&ready, i);
+	  resolve_dependencies (x);
+	}
+      for (i = 0; i <= max_insn_queue_index; i++)
+	{
+	  rtx link;
+	  while ((link = insn_queue[i]) != NULL)
 	    {
-	      can_issue_more =
-		targetm.sched.reorder2 (sched_dump, sched_verbose,
-					ready.n_ready
-					? ready_lastpos (&ready) : NULL,
-					&ready.n_ready, clock_var);
+	      rtx x = XEXP (link, 0);
+	      insn_queue[i] = XEXP (link, 1);
+	      QUEUE_INDEX (x) = QUEUE_NOWHERE;
+	      free_INSN_LIST_node (link);
+	      resolve_dependencies (x);
 	    }
 	}
     }
@@ -3222,11 +4620,11 @@ schedule_block (basic_block *target_bb)
       debug_ready_list (&ready);
     }
 
-  if (current_sched_info->queue_must_finish_empty)
+  if (modulo_ii == 0 && current_sched_info->queue_must_finish_empty)
     /* Sanity check -- queue must be empty now.  Meaningless if region has
        multiple bbs.  */
     gcc_assert (!q_size && !ready.n_ready && !ready.n_debug);
-  else
+  else if (modulo_ii == 0)
     {
       /* We must maintain QUEUE_INDEX between blocks in region.  */
       for (i = ready.n_ready - 1; i >= 0; i--)
@@ -3235,7 +4633,7 @@ schedule_block (basic_block *target_bb)
 
 	  x = ready_element (&ready, i);
 	  QUEUE_INDEX (x) = QUEUE_NOWHERE;
-	  TODO_SPEC (x) = (TODO_SPEC (x) & ~SPECULATIVE) | HARD_DEP;
+	  TODO_SPEC (x) = HARD_DEP;
 	}
 
       if (q_size)
@@ -3248,14 +4646,22 @@ schedule_block (basic_block *target_bb)
 
 		x = XEXP (link, 0);
 		QUEUE_INDEX (x) = QUEUE_NOWHERE;
-		TODO_SPEC (x) = (TODO_SPEC (x) & ~SPECULATIVE) | HARD_DEP;
+		TODO_SPEC (x) = HARD_DEP;
 	      }
 	    free_INSN_LIST_list (&insn_queue[i]);
 	  }
     }
 
-  if (sched_verbose)
-    fprintf (sched_dump, ";;   total time = %d\n", clock_var);
+  if (success)
+    {
+      commit_schedule (prev_head, tail, target_bb);
+      if (sched_verbose)
+	fprintf (sched_dump, ";;   total time = %d\n", clock_var);
+    }
+  else
+    last_scheduled_insn = tail;
+
+  VEC_truncate (rtx, scheduled_insns, 0);
 
   if (!current_sched_info->queue_must_finish_empty
       || haifa_recovery_bb_recently_added_p)
@@ -3276,7 +4682,7 @@ schedule_block (basic_block *target_bb)
 	 in its md_finish () hook.  These new insns don't have any data
 	 initialized and to identify them we extend h_i_d so that they'll
 	 get zero luids.  */
-      sched_init_luids (NULL, NULL, NULL, NULL);
+      sched_extend_luids ();
     }
 
   if (sched_verbose)
@@ -3291,6 +4697,10 @@ schedule_block (basic_block *target_bb)
 
   current_sched_info->head = head;
   current_sched_info->tail = tail;
+
+  free_backtrack_queue ();
+
+  return success;
 }
 
 /* Set_priorities: compute priority of each insn in the block.  */
@@ -3304,7 +4714,7 @@ set_priorities (rtx head, rtx tail)
 	current_sched_info->sched_max_insns_priority;
   rtx prev_head;
 
-  if (head == tail && (! INSN_P (head) || BOUNDARY_DEBUG_INSN_P (head)))
+  if (head == tail && ! INSN_P (head))
     gcc_unreachable ();
 
   n_insn = 0;
@@ -3415,7 +4825,8 @@ sched_init (void)
 
   init_alias_analysis ();
 
-  df_set_flags (DF_LR_RUN_DCE);
+  if (!sched_no_dce)
+    df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
 
   /* More problems needed for interloop dep calculation in SMS.  */
@@ -3441,14 +4852,18 @@ sched_init (void)
     {
       int i, max_regno = max_reg_num ();
 
+      if (sched_dump != NULL)
+	/* We need info about pseudos for rtl dumps about pseudo
+	   classes and costs.  */
+	regstat_init_n_sets_and_refs ();
       ira_set_pseudo_classes (sched_verbose ? sched_dump : NULL);
-      sched_regno_cover_class
+      sched_regno_pressure_class
 	= (enum reg_class *) xmalloc (max_regno * sizeof (enum reg_class));
       for (i = 0; i < max_regno; i++)
-	sched_regno_cover_class[i]
+	sched_regno_pressure_class[i]
 	  = (i < FIRST_PSEUDO_REGISTER
-	     ? ira_class_translate[REGNO_REG_CLASS (i)]
-	     : reg_cover_class (i));
+	     ? ira_pressure_class_translate[REGNO_REG_CLASS (i)]
+	     : ira_pressure_class_translate[reg_allocno_class (i)]);
       curr_reg_live = BITMAP_ALLOC (NULL);
       saved_reg_live = BITMAP_ALLOC (NULL);
       region_ref_regs = BITMAP_ALLOC (NULL);
@@ -3466,6 +4881,8 @@ haifa_sched_init (void)
   setup_sched_dump ();
   sched_init ();
 
+  scheduled_insns = VEC_alloc (rtx, heap, 0);
+
   if (spec_info != NULL)
     {
       sched_deps_info->use_deps_list = 1;
@@ -3482,10 +4899,10 @@ haifa_sched_init (void)
 
     FOR_EACH_BB (bb)
       VEC_quick_push (basic_block, bbs, bb);
-    sched_init_luids (bbs, NULL, NULL, NULL);
+    sched_init_luids (bbs);
     sched_deps_init (true);
     sched_extend_target ();
-    haifa_init_h_i_d (bbs, NULL, NULL, NULL);
+    haifa_init_h_i_d (bbs);
 
     VEC_free (basic_block, heap, bbs);
   }
@@ -3495,16 +4912,11 @@ haifa_sched_init (void)
   sched_create_empty_bb = sched_create_empty_bb_1;
   haifa_recovery_bb_ever_added_p = false;
 
-#ifdef ENABLE_CHECKING
-  /* This is used preferably for finding bugs in check_cfg () itself.
-     We must call sched_bbs_init () before check_cfg () because check_cfg ()
-     assumes that the last insn in the last bb has a non-null successor.  */
-  check_cfg (0, 0);
-#endif
-
   nr_begin_data = nr_begin_control = nr_be_in_data = nr_be_in_control = 0;
   before_recovery = 0;
   after_recovery = 0;
+
+  modulo_ii = 0;
 }
 
 /* Finish work with the data specific to the Haifa scheduler.  */
@@ -3536,6 +4948,8 @@ haifa_sched_finish (void)
                c, nr_be_in_control);
     }
 
+  VEC_free (rtx, heap, scheduled_insns);
+
   /* Finalize h_i_d, dependency caches, and luids for the whole
      function.  Target will be finalized in md_global_finish ().  */
   sched_deps_finish ();
@@ -3553,7 +4967,9 @@ sched_finish (void)
   haifa_finish_h_i_d ();
   if (sched_pressure_p)
     {
-      free (sched_regno_cover_class);
+      if (regstat_n_sets_and_refs != NULL)
+	regstat_free_n_sets_and_refs ();
+      free (sched_regno_pressure_class);
       BITMAP_FREE (region_ref_regs);
       BITMAP_FREE (saved_reg_live);
       BITMAP_FREE (curr_reg_live);
@@ -3568,12 +4984,17 @@ sched_finish (void)
   regstat_free_calls_crossed ();
 
   dfa_finish ();
+}
 
-#ifdef ENABLE_CHECKING
-  /* After reload ia64 backend clobbers CFG, so can't check anything.  */
-  if (!reload_completed)
-    check_cfg (0, 0);
-#endif
+/* Free all delay_pair structures that were recorded.  */
+void
+free_delay_pairs (void)
+{
+  if (delay_htab)
+    {
+      htab_empty (delay_htab);
+      htab_empty (delay_htab_i2);
+    }
 }
 
 /* Fix INSN_TICKs of the instructions in the current block as well as
@@ -3648,8 +5069,6 @@ fix_inter_tick (rtx head, rtx tail)
   bitmap_clear (&processed);
 }
 
-static int haifa_speculate_insn (rtx, ds_t, rtx *);
-
 /* Check if NEXT is ready to be added to the ready or queue list.
    If "yes", add it to the proper list.
    Returns:
@@ -3659,72 +5078,22 @@ static int haifa_speculate_insn (rtx, ds_t, rtx *);
 int
 try_ready (rtx next)
 {
-  ds_t old_ts, *ts;
+  ds_t old_ts, new_ts;
 
-  ts = &TODO_SPEC (next);
-  old_ts = *ts;
+  old_ts = TODO_SPEC (next);
 
-  gcc_assert (!(old_ts & ~(SPECULATIVE | HARD_DEP))
+  gcc_assert (!(old_ts & ~(SPECULATIVE | HARD_DEP | DEP_CONTROL))
 	      && ((old_ts & HARD_DEP)
-		  || (old_ts & SPECULATIVE)));
-
-  if (sd_lists_empty_p (next, SD_LIST_BACK))
-    /* NEXT has all its dependencies resolved.  */
-    {
-      /* Remove HARD_DEP bit from NEXT's status.  */
-      *ts &= ~HARD_DEP;
-
-      if (current_sched_info->flags & DO_SPECULATION)
-	/* Remove all speculative bits from NEXT's status.  */
-	*ts &= ~SPECULATIVE;
-    }
-  else
-    {
-      /* One of the NEXT's dependencies has been resolved.
-	 Recalculate NEXT's status.  */
-
-      *ts &= ~SPECULATIVE & ~HARD_DEP;
-
-      if (sd_lists_empty_p (next, SD_LIST_HARD_BACK))
-	/* Now we've got NEXT with speculative deps only.
-	   1. Look at the deps to see what we have to do.
-	   2. Check if we can do 'todo'.  */
-	{
-	  sd_iterator_def sd_it;
-	  dep_t dep;
-	  bool first_p = true;
-
-	  FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep)
-	    {
-	      ds_t ds = DEP_STATUS (dep) & SPECULATIVE;
+		  || (old_ts & SPECULATIVE)
+		  || (old_ts & DEP_CONTROL)));
 
-	      if (DEBUG_INSN_P (DEP_PRO (dep))
-		  && !DEBUG_INSN_P (next))
-		continue;
+  new_ts = recompute_todo_spec (next);
 
-	      if (first_p)
-		{
-		  first_p = false;
-
-		  *ts = ds;
-		}
-	      else
-		*ts = ds_merge (*ts, ds);
-	    }
-
-	  if (ds_weak (*ts) < spec_info->data_weakness_cutoff)
-	    /* Too few points.  */
-	    *ts = (*ts & ~SPECULATIVE) | HARD_DEP;
-	}
-      else
-	*ts |= HARD_DEP;
-    }
-
-  if (*ts & HARD_DEP)
-    gcc_assert (*ts == old_ts
+  if (new_ts & HARD_DEP)
+    gcc_assert (new_ts == old_ts
 		&& QUEUE_INDEX (next) == QUEUE_NOWHERE);
   else if (current_sched_info->new_ready)
-    *ts = current_sched_info->new_ready (next, *ts);
+    new_ts = current_sched_info->new_ready (next, new_ts);
 
   /* * if !(old_ts & SPECULATIVE) (e.g. HARD_DEP or 0), then insn might
      have its original pattern or changed (speculative) one.  This is due
@@ -3732,29 +5101,29 @@ try_ready (rtx next)
      * But if (old_ts & SPECULATIVE), then we are pretty sure that insn
      has speculative pattern.
 
-     We can't assert (!(*ts & HARD_DEP) || *ts == old_ts) here because
+     We can't assert (!(new_ts & HARD_DEP) || new_ts == old_ts) here because
      control-speculative NEXT could have been discarded by sched-rgn.c
      (the same case as when discarded by can_schedule_ready_p ()).  */
 
-  if ((*ts & SPECULATIVE)
-      /* If (old_ts == *ts), then (old_ts & SPECULATIVE) and we don't
+  if ((new_ts & SPECULATIVE)
+      /* If (old_ts == new_ts), then (old_ts & SPECULATIVE) and we don't
 	 need to change anything.  */
-      && *ts != old_ts)
+      && new_ts != old_ts)
     {
       int res;
       rtx new_pat;
 
-      gcc_assert ((*ts & SPECULATIVE) && !(*ts & ~SPECULATIVE));
+      gcc_assert ((new_ts & SPECULATIVE) && !(new_ts & ~SPECULATIVE));
 
-      res = haifa_speculate_insn (next, *ts, &new_pat);
+      res = haifa_speculate_insn (next, new_ts, &new_pat);
 
       switch (res)
 	{
 	case -1:
 	  /* It would be nice to change DEP_STATUS of all dependences,
-	     which have ((DEP_STATUS & SPECULATIVE) == *ts) to HARD_DEP,
+	     which have ((DEP_STATUS & SPECULATIVE) == new_ts) to HARD_DEP,
 	     so we won't reanalyze anything.  */
-	  *ts = (*ts & ~SPECULATIVE) | HARD_DEP;
+	  new_ts = HARD_DEP;
 	  break;
 
 	case 0:
@@ -3770,7 +5139,8 @@ try_ready (rtx next)
 	       save it.  */
 	    ORIG_PAT (next) = PATTERN (next);
 
-	  haifa_change_pattern (next, new_pat);
+	  res = haifa_change_pattern (next, new_pat);
+	  gcc_assert (res);
 	  break;
 
 	default:
@@ -3778,14 +5148,16 @@ try_ready (rtx next)
 	}
     }
 
-  /* We need to restore pattern only if (*ts == 0), because otherwise it is
-     either correct (*ts & SPECULATIVE),
-     or we simply don't care (*ts & HARD_DEP).  */
+  /* We need to restore pattern only if (new_ts == 0), because otherwise it is
+     either correct (new_ts & SPECULATIVE),
+     or we simply don't care (new_ts & HARD_DEP).  */
 
   gcc_assert (!ORIG_PAT (next)
 	      || !IS_SPECULATION_BRANCHY_CHECK_P (next));
 
-  if (*ts & HARD_DEP)
+  TODO_SPEC (next) = new_ts;
+
+  if (new_ts & HARD_DEP)
     {
       /* We can't assert (QUEUE_INDEX (next) == QUEUE_NOWHERE) here because
 	 control-speculative NEXT could have been discarded by sched-rgn.c
@@ -3793,35 +5165,38 @@ try_ready (rtx next)
       /*gcc_assert (QUEUE_INDEX (next) == QUEUE_NOWHERE);*/
 
       change_queue_index (next, QUEUE_NOWHERE);
+
       return -1;
     }
-  else if (!(*ts & BEGIN_SPEC) && ORIG_PAT (next) && !IS_SPECULATION_CHECK_P (next))
+  else if (!(new_ts & BEGIN_SPEC)
+	   && ORIG_PAT (next) && PREDICATED_PAT (next) == NULL_RTX
+	   && !IS_SPECULATION_CHECK_P (next))
     /* We should change pattern of every previously speculative
        instruction - and we determine if NEXT was speculative by using
        ORIG_PAT field.  Except one case - speculation checks have ORIG_PAT
        pat too, so skip them.  */
     {
-      haifa_change_pattern (next, ORIG_PAT (next));
+      bool success = haifa_change_pattern (next, ORIG_PAT (next));
+      gcc_assert (success);
       ORIG_PAT (next) = 0;
     }
 
   if (sched_verbose >= 2)
     {
-      int s = TODO_SPEC (next);
-
       fprintf (sched_dump, ";;\t\tdependencies resolved: insn %s",
                (*current_sched_info->print_insn) (next, 0));
 
       if (spec_info && spec_info->dump)
         {
-          if (s & BEGIN_DATA)
+          if (new_ts & BEGIN_DATA)
             fprintf (spec_info->dump, "; data-spec;");
-          if (s & BEGIN_CONTROL)
+          if (new_ts & BEGIN_CONTROL)
             fprintf (spec_info->dump, "; control-spec;");
-          if (s & BE_IN_CONTROL)
+          if (new_ts & BE_IN_CONTROL)
             fprintf (spec_info->dump, "; in-control-spec;");
         }
-
+      if (TODO_SPEC (next) & DEP_CONTROL)
+	fprintf (sched_dump, " predicated");
       fprintf (sched_dump, "\n");
     }
 
@@ -3836,7 +5211,7 @@ fix_tick_ready (rtx next)
 {
   int tick, delay;
 
-  if (!sd_lists_empty_p (next, SD_LIST_RES_BACK))
+  if (!DEBUG_INSN_P (next) && !sd_lists_empty_p (next, SD_LIST_RES_BACK))
     {
       int full_p;
       sd_iterator_def sd_it;
@@ -3904,7 +5279,7 @@ change_queue_index (rtx next, int delay)
   if (delay == QUEUE_READY)
     ready_add (readyp, next, false);
   else if (delay >= 1)
-    queue_insn (next, delay);
+    queue_insn (next, delay, "change queue index");
 
   if (sched_verbose >= 2)
     {
@@ -3934,6 +5309,7 @@ sched_extend_ready_list (int new_sched_ready_n_insns)
     {
       i = 0;
       sched_ready_n_insns = 0;
+      VEC_reserve (rtx, heap, scheduled_insns, new_sched_ready_n_insns);
     }
   else
     i = sched_ready_n_insns + 1;
@@ -3952,7 +5328,13 @@ sched_extend_ready_list (int new_sched_ready_n_insns)
 			     new_sched_ready_n_insns + 1);
 
   for (; i <= new_sched_ready_n_insns; i++)
-    choice_stack[i].state = xmalloc (dfa_state_size);
+    {
+      choice_stack[i].state = xmalloc (dfa_state_size);
+
+      if (targetm.sched.first_cycle_multipass_init)
+	targetm.sched.first_cycle_multipass_init (&(choice_stack[i]
+						    .target_data));
+    }
 
   sched_ready_n_insns = new_sched_ready_n_insns;
 }
@@ -3971,7 +5353,13 @@ sched_finish_ready_list (void)
   ready_try = NULL;
 
   for (i = 0; i <= sched_ready_n_insns; i++)
-    free (choice_stack [i].state);
+    {
+      if (targetm.sched.first_cycle_multipass_fini)
+	targetm.sched.first_cycle_multipass_fini (&(choice_stack[i]
+						    .target_data));
+
+      free (choice_stack [i].state);
+    }
   free (choice_stack);
   choice_stack = NULL;
 
@@ -4428,7 +5816,7 @@ sched_create_recovery_edges (basic_block first_bb, basic_block rec,
     {
       /* Rewritten from cfgrtl.c.  */
       if (flag_reorder_blocks_and_partition
-	  && targetm.have_named_sections)
+	  && targetm_common.have_named_sections)
 	{
 	  /* We don't need the same note for the check because
 	     any_condjump_p (check) == true.  */
@@ -4440,6 +5828,8 @@ sched_create_recovery_edges (basic_block first_bb, basic_block rec,
     edge_flags = 0;
 
   make_single_succ_edge (rec, second_bb, edge_flags);
+  if (dom_info_available_p (CDI_DOMINATORS))
+    set_immediate_dominator (CDI_DOMINATORS, rec, first_bb);
 }
 
 /* This function creates recovery code for INSN.  If MUTATE_P is nonzero,
@@ -4782,28 +6172,33 @@ fix_recovery_deps (basic_block rec)
   add_jump_dependencies (insn, jump);
 }
 
-/* Change pattern of INSN to NEW_PAT.  */
-void
-sched_change_pattern (rtx insn, rtx new_pat)
+/* Change pattern of INSN to NEW_PAT.  Invalidate cached haifa
+   instruction data.  */
+static bool
+haifa_change_pattern (rtx insn, rtx new_pat)
 {
+  sd_iterator_def sd_it;
+  dep_t dep;
   int t;
 
   t = validate_change (insn, &PATTERN (insn), new_pat, 0);
-  gcc_assert (t);
+  if (!t)
+    return false;
   dfa_clear_single_insn_cache (insn);
-}
 
-/* Change pattern of INSN to NEW_PAT.  Invalidate cached haifa
-   instruction data.  */
-static void
-haifa_change_pattern (rtx insn, rtx new_pat)
-{
-  sched_change_pattern (insn, new_pat);
+  sd_it = sd_iterator_start (insn,
+			     SD_LIST_FORW | SD_LIST_BACK | SD_LIST_RES_BACK);
+  while (sd_iterator_cond (&sd_it, &dep))
+    {
+      DEP_COST (dep) = UNKNOWN_DEP_COST;
+      sd_iterator_next (&sd_it);
+    }
 
   /* Invalidate INSN_COST, so it'll be recalculated.  */
   INSN_COST (insn) = -1;
   /* Invalidate INSN_TICK, so it'll be recalculated.  */
   INSN_TICK (insn) = INVALID_TICK;
+  return true;
 }
 
 /* -1 - can't speculate,
@@ -5117,244 +6512,9 @@ add_jump_dependencies (rtx insn, rtx jump)
   gcc_assert (!sd_lists_empty_p (jump, SD_LIST_BACK));
 }
 
-/* Return the NOTE_INSN_BASIC_BLOCK of BB.  */
-rtx
-bb_note (basic_block bb)
-{
-  rtx note;
-
-  note = BB_HEAD (bb);
-  if (LABEL_P (note))
-    note = NEXT_INSN (note);
-
-  gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note));
-  return note;
-}
-
-#ifdef ENABLE_CHECKING
-/* Helper function for check_cfg.
-   Return nonzero, if edge vector pointed to by EL has edge with TYPE in
-   its flags.  */
-static int
-has_edge_p (VEC(edge,gc) *el, int type)
-{
-  edge e;
-  edge_iterator ei;
-
-  FOR_EACH_EDGE (e, ei, el)
-    if (e->flags & type)
-      return 1;
-  return 0;
-}
-
-/* Search back, starting at INSN, for an insn that is not a
-   NOTE_INSN_VAR_LOCATION.  Don't search beyond HEAD, and return it if
-   no such insn can be found.  */
-static inline rtx
-prev_non_location_insn (rtx insn, rtx head)
-{
-  while (insn != head && NOTE_P (insn)
-	 && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
-    insn = PREV_INSN (insn);
-
-  return insn;
-}
-
-/* Check few properties of CFG between HEAD and TAIL.
-   If HEAD (TAIL) is NULL check from the beginning (till the end) of the
-   instruction stream.  */
-static void
-check_cfg (rtx head, rtx tail)
-{
-  rtx next_tail;
-  basic_block bb = 0;
-  int not_first = 0, not_last;
-
-  if (head == NULL)
-    head = get_insns ();
-  if (tail == NULL)
-    tail = get_last_insn ();
-  next_tail = NEXT_INSN (tail);
-
-  do
-    {
-      not_last = head != tail;
-
-      if (not_first)
-	gcc_assert (NEXT_INSN (PREV_INSN (head)) == head);
-      if (not_last)
-	gcc_assert (PREV_INSN (NEXT_INSN (head)) == head);
-
-      if (LABEL_P (head)
-	  || (NOTE_INSN_BASIC_BLOCK_P (head)
-	      && (!not_first
-		  || (not_first && !LABEL_P (PREV_INSN (head))))))
-	{
-	  gcc_assert (bb == 0);
-	  bb = BLOCK_FOR_INSN (head);
-	  if (bb != 0)
-	    gcc_assert (BB_HEAD (bb) == head);
-	  else
-	    /* This is the case of jump table.  See inside_basic_block_p ().  */
-	    gcc_assert (LABEL_P (head) && !inside_basic_block_p (head));
-	}
-
-      if (bb == 0)
-	{
-	  gcc_assert (!inside_basic_block_p (head));
-	  head = NEXT_INSN (head);
-	}
-      else
-	{
-	  gcc_assert (inside_basic_block_p (head)
-		      || NOTE_P (head));
-	  gcc_assert (BLOCK_FOR_INSN (head) == bb);
-
-	  if (LABEL_P (head))
-	    {
-	      head = NEXT_INSN (head);
-	      gcc_assert (NOTE_INSN_BASIC_BLOCK_P (head));
-	    }
-	  else
-	    {
-	      if (control_flow_insn_p (head))
-		{
-		  gcc_assert (prev_non_location_insn (BB_END (bb), head)
-			      == head);
-
-		  if (any_uncondjump_p (head))
-		    gcc_assert (EDGE_COUNT (bb->succs) == 1
-				&& BARRIER_P (NEXT_INSN (head)));
-		  else if (any_condjump_p (head))
-		    gcc_assert (/* Usual case.  */
-                                (EDGE_COUNT (bb->succs) > 1
-                                 && !BARRIER_P (NEXT_INSN (head)))
-                                /* Or jump to the next instruction.  */
-                                || (EDGE_COUNT (bb->succs) == 1
-                                    && (BB_HEAD (EDGE_I (bb->succs, 0)->dest)
-                                        == JUMP_LABEL (head))));
-		}
-	      if (BB_END (bb) == head)
-		{
-		  if (EDGE_COUNT (bb->succs) > 1)
-		    gcc_assert (control_flow_insn_p (prev_non_location_insn
-						     (head, BB_HEAD (bb)))
-				|| has_edge_p (bb->succs, EDGE_COMPLEX));
-		  bb = 0;
-		}
-
-	      head = NEXT_INSN (head);
-	    }
-	}
-
-      not_first = 1;
-    }
-  while (head != next_tail);
-
-  gcc_assert (bb == 0);
-}
-
-#endif /* ENABLE_CHECKING */
-
-/* Extend per basic block data structures.  */
-static void
-extend_bb (void)
-{
-  if (sched_scan_info->extend_bb)
-    sched_scan_info->extend_bb ();
-}
-
-/* Init data for BB.  */
-static void
-init_bb (basic_block bb)
-{
-  if (sched_scan_info->init_bb)
-    sched_scan_info->init_bb (bb);
-}
-
-/* Extend per insn data structures.  */
-static void
-extend_insn (void)
-{
-  if (sched_scan_info->extend_insn)
-    sched_scan_info->extend_insn ();
-}
-
-/* Init data structures for INSN.  */
-static void
-init_insn (rtx insn)
-{
-  if (sched_scan_info->init_insn)
-    sched_scan_info->init_insn (insn);
-}
-
-/* Init all insns in BB.  */
-static void
-init_insns_in_bb (basic_block bb)
-{
-  rtx insn;
-
-  FOR_BB_INSNS (bb, insn)
-    init_insn (insn);
-}
-
-/* A driver function to add a set of basic blocks (BBS),
-   a single basic block (BB), a set of insns (INSNS) or a single insn (INSN)
-   to the scheduling region.  */
-void
-sched_scan (const struct sched_scan_info_def *ssi,
-	    bb_vec_t bbs, basic_block bb, insn_vec_t insns, rtx insn)
-{
-  sched_scan_info = ssi;
-
-  if (bbs != NULL || bb != NULL)
-    {
-      extend_bb ();
-
-      if (bbs != NULL)
-	{
-	  unsigned i;
-	  basic_block x;
-
-	  FOR_EACH_VEC_ELT (basic_block, bbs, i, x)
-	    init_bb (x);
-	}
-
-      if (bb != NULL)
-	init_bb (bb);
-    }
-
-  extend_insn ();
-
-  if (bbs != NULL)
-    {
-      unsigned i;
-      basic_block x;
-
-      FOR_EACH_VEC_ELT (basic_block, bbs, i, x)
-	init_insns_in_bb (x);
-    }
-
-  if (bb != NULL)
-    init_insns_in_bb (bb);
-
-  if (insns != NULL)
-    {
-      unsigned i;
-      rtx x;
-
-      FOR_EACH_VEC_ELT (rtx, insns, i, x)
-	init_insn (x);
-    }
-
-  if (insn != NULL)
-    init_insn (insn);
-}
-
-
 /* Extend data structures for logical insn UID.  */
-static void
-luids_extend_insn (void)
+void
+sched_extend_luids (void)
 {
   int new_luids_max_uid = get_max_uid () + 1;
 
@@ -5362,8 +6522,8 @@ luids_extend_insn (void)
 }
 
 /* Initialize LUID for INSN.  */
-static void
-luids_init_insn (rtx insn)
+void
+sched_init_insn_luid (rtx insn)
 {
   int i = INSN_P (insn) ? 1 : common_sched_info->luid_for_non_insn (insn);
   int luid;
@@ -5379,21 +6539,23 @@ luids_init_insn (rtx insn)
   SET_INSN_LUID (insn, luid);
 }
 
-/* Initialize luids for BBS, BB, INSNS and INSN.
+/* Initialize luids for BBS.
    The hook common_sched_info->luid_for_non_insn () is used to determine
    if notes, labels, etc. need luids.  */
 void
-sched_init_luids (bb_vec_t bbs, basic_block bb, insn_vec_t insns, rtx insn)
+sched_init_luids (bb_vec_t bbs)
 {
-  const struct sched_scan_info_def ssi =
+  int i;
+  basic_block bb;
+
+  sched_extend_luids ();
+  FOR_EACH_VEC_ELT (basic_block, bbs, i, bb)
     {
-      NULL, /* extend_bb */
-      NULL, /* init_bb */
-      luids_extend_insn, /* extend_insn */
-      luids_init_insn /* init_insn */
-    };
+      rtx insn;
 
-  sched_scan (&ssi, bbs, bb, insns, insn);
+      FOR_BB_INSNS (bb, insn)
+	sched_init_insn_luid (insn);
+    }
 }
 
 /* Free LUIDs.  */
@@ -5445,24 +6607,27 @@ init_h_i_d (rtx insn)
       INSN_COST (insn) = -1;
       QUEUE_INDEX (insn) = QUEUE_NOWHERE;
       INSN_TICK (insn) = INVALID_TICK;
+      INSN_EXACT_TICK (insn) = INVALID_TICK;
       INTER_TICK (insn) = INVALID_TICK;
       TODO_SPEC (insn) = HARD_DEP;
     }
 }
 
-/* Initialize haifa_insn_data for BBS, BB, INSNS and INSN.  */
+/* Initialize haifa_insn_data for BBS.  */
 void
-haifa_init_h_i_d (bb_vec_t bbs, basic_block bb, insn_vec_t insns, rtx insn)
+haifa_init_h_i_d (bb_vec_t bbs)
 {
-  const struct sched_scan_info_def ssi =
+  int i;
+  basic_block bb;
+
+  extend_h_i_d ();
+  FOR_EACH_VEC_ELT (basic_block, bbs, i, bb)
     {
-      NULL, /* extend_bb */
-      NULL, /* init_bb */
-      extend_h_i_d, /* extend_insn */
-      init_h_i_d /* init_insn */
-    };
+      rtx insn;
 
-  sched_scan (&ssi, bbs, bb, insns, insn);
+      FOR_BB_INSNS (bb, insn)
+	init_h_i_d (insn);
+    }
 }
 
 /* Finalize haifa_insn_data.  */
@@ -5475,8 +6640,7 @@ haifa_finish_h_i_d (void)
 
   FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data)
     {
-      if (data->reg_pressure != NULL)
-	free (data->reg_pressure);
+      free (data->reg_pressure);
       for (use = data->reg_use_list; use != NULL; use = next)
 	{
 	  next = use->next_insn_use;
@@ -5492,10 +6656,12 @@ haifa_init_insn (rtx insn)
 {
   gcc_assert (insn != NULL);
 
-  sched_init_luids (NULL, NULL, NULL, insn);
+  sched_extend_luids ();
+  sched_init_insn_luid (insn);
   sched_extend_target ();
   sched_deps_init (false);
-  haifa_init_h_i_d (NULL, NULL, NULL, insn);
+  extend_h_i_d ();
+  init_h_i_d (insn);
 
   if (adding_bb_to_current_region_p)
     {
@@ -5504,6 +6670,8 @@ haifa_init_insn (rtx insn)
       /* Extend dependency caches by one element.  */
       extend_dependency_caches (1, false);
     }
+  if (sched_pressure_p)
+    init_insn_reg_pressure_info (insn);
 }
 
 /* Init data for the new basic block BB which comes after AFTER.  */
@@ -5546,9 +6714,16 @@ sched_create_empty_bb_1 (basic_block after)
 rtx
 sched_emit_insn (rtx pat)
 {
-  rtx insn = emit_insn_after (pat, last_scheduled_insn);
-  last_scheduled_insn = insn;
+  rtx insn = emit_insn_before (pat, nonscheduled_insns_begin);
   haifa_init_insn (insn);
+
+  if (current_sched_info->add_remove_insn)
+    current_sched_info->add_remove_insn (insn, 0);
+
+  (*current_sched_info->begin_schedule_ready) (insn);
+  VEC_safe_push (rtx, heap, scheduled_insns, insn);
+
+  last_scheduled_insn = insn;
   return insn;
 }