OSDN Git Service

2010-04-06 Kai Tietz <kai.tietz@onevision.com>
[pf3gnuchains/gcc-fork.git] / gcc / bb-reorder.c
index 0711f18..5f7230f 100644 (file)
@@ -1,11 +1,12 @@
 /* Basic block reordering routines for the GNU compiler.
 /* Basic block reordering routines for the GNU compiler.
-   Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+   Free Software Foundation, Inc.
 
    This file is part of GCC.
 
    GCC is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
 
    This file is part of GCC.
 
    GCC is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
+   the Free Software Foundation; either version 3, or (at your option)
    any later version.
 
    GCC is distributed in the hope that it will be useful, but WITHOUT
    any later version.
 
    GCC is distributed in the hope that it will be useful, but WITHOUT
@@ -14,9 +15,8 @@
    License for more details.
 
    You should have received a copy of the GNU General Public License
    License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING.  If not, write to the Free
-   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.  */
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
 
 /* This (greedy) algorithm constructs traces in several rounds.
    The construction starts from "seeds".  The seed for the first round
 
 /* This (greedy) algorithm constructs traces in several rounds.
    The construction starts from "seeds".  The seed for the first round
 #include "coretypes.h"
 #include "tm.h"
 #include "rtl.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "rtl.h"
-#include "basic-block.h"
+#include "regs.h"
 #include "flags.h"
 #include "flags.h"
+#include "timevar.h"
 #include "output.h"
 #include "cfglayout.h"
 #include "fibheap.h"
 #include "target.h"
 #include "output.h"
 #include "cfglayout.h"
 #include "fibheap.h"
 #include "target.h"
+#include "function.h"
+#include "tm_p.h"
+#include "obstack.h"
+#include "expr.h"
+#include "params.h"
+#include "toplev.h"
+#include "tree-pass.h"
+#include "df.h"
+
+/* The number of rounds.  In most cases there will only be 4 rounds, but
+   when partitioning hot and cold basic blocks into separate sections of
+   the .o file there will be an extra round.*/
+#define N_ROUNDS 5
+
+/* Stubs in case we don't have a return insn.
+   We have to check at runtime too, not only compiletime.  */
+
+#ifndef HAVE_return
+#define HAVE_return 0
+#define gen_return() NULL_RTX
+#endif
 
 
-/* The number of rounds.  */
-#define N_ROUNDS 4
 
 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
 
 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
-static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0};
+static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
 
 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
 
 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
-static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0};
+static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
 
 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
    block the edge destination is not duplicated while connecting traces.  */
 
 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
    block the edge destination is not duplicated while connecting traces.  */
@@ -102,6 +122,9 @@ typedef struct bbro_basic_block_data_def
   /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
   int end_of_trace;
 
   /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
   int end_of_trace;
 
+  /* Which trace is the bb in?  */
+  int in_trace;
+
   /* Which heap is BB in (if any)?  */
   fibheap_t heap;
 
   /* Which heap is BB in (if any)?  */
   fibheap_t heap;
 
@@ -120,8 +143,7 @@ static bbro_basic_block_data *bbd;
 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 
 /* Free the memory and set the pointer to NULL.  */
 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
 
 /* Free the memory and set the pointer to NULL.  */
-#define FREE(P) \
-  do { if (P) { free (P); P = 0; } else { abort (); } } while (0)
+#define FREE(P) (gcc_assert (P), free (P), P = 0)
 
 /* Structure for holding information about a trace.  */
 struct trace
 
 /* Structure for holding information about a trace.  */
 struct trace
@@ -137,43 +159,84 @@ struct trace
 };
 
 /* Maximum frequency and count of one of the entry blocks.  */
 };
 
 /* Maximum frequency and count of one of the entry blocks.  */
-int max_entry_frequency;
-gcov_type max_entry_count;
+static int max_entry_frequency;
+static gcov_type max_entry_count;
 
 /* Local function prototypes.  */
 
 /* Local function prototypes.  */
-static void find_traces                        PARAMS ((int *, struct trace *));
-static basic_block rotate_loop         PARAMS ((edge, struct trace *, int));
-static void mark_bb_visited            PARAMS ((basic_block, int));
-static void find_traces_1_round                PARAMS ((int, int, gcov_type,
-                                                struct trace *, int *, int,
-                                                fibheap_t *));
-static basic_block copy_bb             PARAMS ((basic_block, edge,
-                                                basic_block, int));
-static fibheapkey_t bb_to_key          PARAMS ((basic_block));
-static bool better_edge_p              PARAMS ((basic_block, edge, int, int,
-                                                int, int));
-static void connect_traces             PARAMS ((int, struct trace *));
-static bool copy_bb_p                  PARAMS ((basic_block, int));
-static int get_uncond_jump_length      PARAMS ((void));
+static void find_traces (int *, struct trace *);
+static basic_block rotate_loop (edge, struct trace *, int);
+static void mark_bb_visited (basic_block, int);
+static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
+                                int, fibheap_t *, int);
+static basic_block copy_bb (basic_block, edge, basic_block, int);
+static fibheapkey_t bb_to_key (basic_block);
+static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
+static void connect_traces (int, struct trace *);
+static bool copy_bb_p (const_basic_block, int);
+static int get_uncond_jump_length (void);
+static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
+static void find_rarely_executed_basic_blocks_and_crossing_edges (edge **,
+                                                                 int *,
+                                                                 int *);
+static void add_labels_and_missing_jumps (edge *, int);
+static void add_reg_crossing_jump_notes (void);
+static void fix_up_fall_thru_edges (void);
+static void fix_edges_for_rarely_executed_code (edge *, int);
+static void fix_crossing_conditional_branches (void);
+static void fix_crossing_unconditional_branches (void);
 \f
 \f
+/* Check to see if bb should be pushed into the next round of trace
+   collections or not.  Reasons for pushing the block forward are 1).
+   If the block is cold, we are doing partitioning, and there will be
+   another round (cold partition blocks are not supposed to be
+   collected into traces until the very last round); or 2). There will
+   be another round, and the basic block is not "hot enough" for the
+   current round of trace collection.  */
+
+static bool
+push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
+                     int exec_th, gcov_type count_th)
+{
+  bool there_exists_another_round;
+  bool block_not_hot_enough;
+
+  there_exists_another_round = round < number_of_rounds - 1;
+
+  block_not_hot_enough = (bb->frequency < exec_th
+                         || bb->count < count_th
+                         || probably_never_executed_bb_p (bb));
+
+  if (there_exists_another_round
+      && block_not_hot_enough)
+    return true;
+  else
+    return false;
+}
+
 /* Find the traces for Software Trace Cache.  Chain each trace through
    RBI()->next.  Store the number of traces to N_TRACES and description of
    traces to TRACES.  */
 
 static void
 /* Find the traces for Software Trace Cache.  Chain each trace through
    RBI()->next.  Store the number of traces to N_TRACES and description of
    traces to TRACES.  */
 
 static void
-find_traces (n_traces, traces)
-     int *n_traces;
-     struct trace *traces;
+find_traces (int *n_traces, struct trace *traces)
 {
   int i;
 {
   int i;
+  int number_of_rounds;
   edge e;
   edge e;
+  edge_iterator ei;
   fibheap_t heap;
 
   fibheap_t heap;
 
+  /* Add one extra round of trace collection when partitioning hot/cold
+     basic blocks into separate sections.  The last round is for all the
+     cold blocks (and ONLY the cold blocks).  */
+
+  number_of_rounds = N_ROUNDS - 1;
+
   /* Insert entry points of function into heap.  */
   heap = fibheap_new ();
   max_entry_frequency = 0;
   max_entry_count = 0;
   /* Insert entry points of function into heap.  */
   heap = fibheap_new ();
   max_entry_frequency = 0;
   max_entry_count = 0;
-  for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
+  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
     {
       bbd[e->dest->index].heap = heap;
       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
     {
       bbd[e->dest->index].heap = heap;
       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
@@ -185,12 +248,12 @@ find_traces (n_traces, traces)
     }
 
   /* Find the traces.  */
     }
 
   /* Find the traces.  */
-  for (i = 0; i < N_ROUNDS; i++)
+  for (i = 0; i < number_of_rounds; i++)
     {
       gcov_type count_threshold;
 
     {
       gcov_type count_threshold;
 
-      if (rtl_dump_file)
-       fprintf (rtl_dump_file, "STC - round %d\n", i + 1);
+      if (dump_file)
+       fprintf (dump_file, "STC - round %d\n", i + 1);
 
       if (max_entry_count < INT_MAX / 1000)
        count_threshold = max_entry_count * exec_threshold[i] / 1000;
 
       if (max_entry_count < INT_MAX / 1000)
        count_threshold = max_entry_count * exec_threshold[i] / 1000;
@@ -199,22 +262,23 @@ find_traces (n_traces, traces)
 
       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
                           max_entry_frequency * exec_threshold[i] / 1000,
 
       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
                           max_entry_frequency * exec_threshold[i] / 1000,
-                          count_threshold, traces, n_traces, i, &heap);
+                          count_threshold, traces, n_traces, i, &heap,
+                          number_of_rounds);
     }
   fibheap_delete (heap);
 
     }
   fibheap_delete (heap);
 
-  if (rtl_dump_file)
+  if (dump_file)
     {
       for (i = 0; i < *n_traces; i++)
        {
          basic_block bb;
     {
       for (i = 0; i < *n_traces; i++)
        {
          basic_block bb;
-         fprintf (rtl_dump_file, "Trace %d (round %d):  ", i + 1,
+         fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
                   traces[i].round + 1);
                   traces[i].round + 1);
-         for (bb = traces[i].first; bb != traces[i].last; bb = RBI (bb)->next)
-           fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency);
-         fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency);
+         for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
+           fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
+         fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
        }
        }
-      fflush (rtl_dump_file);
+      fflush (dump_file);
     }
 }
 
     }
 }
 
@@ -222,10 +286,7 @@ find_traces (n_traces, traces)
    (with sequential number TRACE_N).  */
 
 static basic_block
    (with sequential number TRACE_N).  */
 
 static basic_block
-rotate_loop (back_edge, trace, trace_n)
-     edge back_edge;
-     struct trace *trace;
-     int trace_n;
+rotate_loop (edge back_edge, struct trace *trace, int trace_n)
 {
   basic_block bb;
 
 {
   basic_block bb;
 
@@ -243,16 +304,18 @@ rotate_loop (back_edge, trace, trace_n)
   do
     {
       edge e;
   do
     {
       edge e;
-      for (e = bb->succ; e; e = e->succ_next)
+      edge_iterator ei;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
        if (e->dest != EXIT_BLOCK_PTR
        if (e->dest != EXIT_BLOCK_PTR
-           && RBI (e->dest)->visited != trace_n
+           && e->dest->il.rtl->visited != trace_n
            && (e->flags & EDGE_CAN_FALLTHRU)
            && !(e->flags & EDGE_COMPLEX))
        {
          if (is_preferred)
            {
              /* The best edge is preferred.  */
            && (e->flags & EDGE_CAN_FALLTHRU)
            && !(e->flags & EDGE_COMPLEX))
        {
          if (is_preferred)
            {
              /* The best edge is preferred.  */
-             if (!RBI (e->dest)->visited
+             if (!e->dest->il.rtl->visited
                  || bbd[e->dest->index].start_of_trace >= 0)
                {
                  /* The current edge E is also preferred.  */
                  || bbd[e->dest->index].start_of_trace >= 0)
                {
                  /* The current edge E is also preferred.  */
@@ -268,7 +331,7 @@ rotate_loop (back_edge, trace, trace_n)
            }
          else
            {
            }
          else
            {
-             if (!RBI (e->dest)->visited
+             if (!e->dest->il.rtl->visited
                  || bbd[e->dest->index].start_of_trace >= 0)
                {
                  /* The current edge E is preferred.  */
                  || bbd[e->dest->index].start_of_trace >= 0)
                {
                  /* The current edge E is preferred.  */
@@ -291,7 +354,7 @@ rotate_loop (back_edge, trace, trace_n)
                }
            }
        }
                }
            }
        }
-      bb = RBI (bb)->next;
+      bb = (basic_block) bb->aux;
     }
   while (bb != back_edge->dest);
 
     }
   while (bb != back_edge->dest);
 
@@ -301,29 +364,29 @@ rotate_loop (back_edge, trace, trace_n)
         the trace.  */
       if (back_edge->dest == trace->first)
        {
         the trace.  */
       if (back_edge->dest == trace->first)
        {
-         trace->first = RBI (best_bb)->next;
+         trace->first = (basic_block) best_bb->aux;
        }
       else
        {
          basic_block prev_bb;
 
          for (prev_bb = trace->first;
        }
       else
        {
          basic_block prev_bb;
 
          for (prev_bb = trace->first;
-              RBI (prev_bb)->next != back_edge->dest;
-              prev_bb = RBI (prev_bb)->next)
+              prev_bb->aux != back_edge->dest;
+              prev_bb = (basic_block) prev_bb->aux)
            ;
            ;
-         RBI (prev_bb)->next = RBI (best_bb)->next;
+         prev_bb->aux = best_bb->aux;
 
          /* Try to get rid of uncond jump to cond jump.  */
 
          /* Try to get rid of uncond jump to cond jump.  */
-         if (prev_bb->succ && !prev_bb->succ->succ_next)
+         if (single_succ_p (prev_bb))
            {
            {
-             basic_block header = prev_bb->succ->dest;
+             basic_block header = single_succ (prev_bb);
 
              /* Duplicate HEADER if it is a small block containing cond jump
                 in the end.  */
 
              /* Duplicate HEADER if it is a small block containing cond jump
                 in the end.  */
-             if (any_condjump_p (header->end) && copy_bb_p (header, 0))
-               {
-                 copy_bb (header, prev_bb->succ, prev_bb, trace_n);
-               }
+             if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
+                 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
+                                    NULL_RTX))
+               copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
            }
        }
     }
            }
        }
     }
@@ -332,18 +395,16 @@ rotate_loop (back_edge, trace, trace_n)
       /* We have not found suitable loop tail so do no rotation.  */
       best_bb = back_edge->src;
     }
       /* We have not found suitable loop tail so do no rotation.  */
       best_bb = back_edge->src;
     }
-  RBI (best_bb)->next = NULL;
+  best_bb->aux = NULL;
   return best_bb;
 }
 
 /* This function marks BB that it was visited in trace number TRACE.  */
 
 static void
   return best_bb;
 }
 
 /* This function marks BB that it was visited in trace number TRACE.  */
 
 static void
-mark_bb_visited (bb, trace)
-     basic_block bb;
-     int trace;
+mark_bb_visited (basic_block bb, int trace)
 {
 {
-  RBI (bb)->visited = trace;
+  bb->il.rtl->visited = trace;
   if (bbd[bb->index].heap)
     {
       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
   if (bbd[bb->index].heap)
     {
       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
@@ -361,15 +422,9 @@ mark_bb_visited (bb, trace)
    *HEAP and stores starting points for the next round into new *HEAP.  */
 
 static void
    *HEAP and stores starting points for the next round into new *HEAP.  */
 
 static void
-find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
-                    heap)
-     int branch_th;
-     int exec_th;
-     gcov_type count_th;
-     struct trace *traces;
-     int *n_traces;
-     int round;
-     fibheap_t *heap;
+find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
+                    struct trace *traces, int *n_traces, int round,
+                    fibheap_t *heap, int number_of_rounds)
 {
   /* Heap for discarded basic blocks which are possible starting points for
      the next round.  */
 {
   /* Heap for discarded basic blocks which are possible starting points for
      the next round.  */
@@ -381,24 +436,29 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
       struct trace *trace;
       edge best_edge, e;
       fibheapkey_t key;
       struct trace *trace;
       edge best_edge, e;
       fibheapkey_t key;
+      edge_iterator ei;
 
 
-      bb = fibheap_extract_min (*heap);
+      bb = (basic_block) fibheap_extract_min (*heap);
       bbd[bb->index].heap = NULL;
       bbd[bb->index].node = NULL;
 
       bbd[bb->index].heap = NULL;
       bbd[bb->index].node = NULL;
 
-      if (rtl_dump_file)
-       fprintf (rtl_dump_file, "Getting bb %d\n", bb->index);
+      if (dump_file)
+       fprintf (dump_file, "Getting bb %d\n", bb->index);
+
+      /* If the BB's frequency is too low send BB to the next round.  When
+        partitioning hot/cold blocks into separate sections, make sure all
+        the cold blocks (and ONLY the cold blocks) go into the (extra) final
+        round.  */
 
 
-      /* If the BB's frequency is too low send BB to the next round.  */
-      if (bb->frequency < exec_th || bb->count < count_th
-         || ((round < N_ROUNDS - 1) && probably_never_executed_bb_p (bb)))
+      if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
+                               count_th))
        {
          int key = bb_to_key (bb);
          bbd[bb->index].heap = new_heap;
          bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
 
        {
          int key = bb_to_key (bb);
          bbd[bb->index].heap = new_heap;
          bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
 
-         if (rtl_dump_file)
-           fprintf (rtl_dump_file,
+         if (dump_file)
+           fprintf (dump_file,
                     "  Possible start point of next round: %d (key: %d)\n",
                     bb->index, key);
          continue;
                     "  Possible start point of next round: %d (key: %d)\n",
                     bb->index, key);
          continue;
@@ -408,11 +468,13 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
       trace->first = bb;
       trace->round = round;
       trace->length = 0;
       trace->first = bb;
       trace->round = round;
       trace->length = 0;
+      bbd[bb->index].in_trace = *n_traces;
       (*n_traces)++;
 
       do
        {
          int prob, freq;
       (*n_traces)++;
 
       do
        {
          int prob, freq;
+         bool ends_in_call;
 
          /* The probability and frequency of the best edge.  */
          int best_prob = INT_MIN / 2;
 
          /* The probability and frequency of the best edge.  */
          int best_prob = INT_MIN / 2;
@@ -422,33 +484,55 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
          mark_bb_visited (bb, *n_traces);
          trace->length++;
 
          mark_bb_visited (bb, *n_traces);
          trace->length++;
 
-         if (rtl_dump_file)
-           fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n",
+         if (dump_file)
+           fprintf (dump_file, "Basic block %d was visited in trace %d\n",
                     bb->index, *n_traces - 1);
 
                     bb->index, *n_traces - 1);
 
+         ends_in_call = block_ends_with_call_p (bb);
+
          /* Select the successor that will be placed after BB.  */
          /* Select the successor that will be placed after BB.  */
-         for (e = bb->succ; e; e = e->succ_next)
+         FOR_EACH_EDGE (e, ei, bb->succs)
            {
            {
-             if (e->flags & EDGE_FAKE)
-               abort ();
+             gcc_assert (!(e->flags & EDGE_FAKE));
 
              if (e->dest == EXIT_BLOCK_PTR)
                continue;
 
 
              if (e->dest == EXIT_BLOCK_PTR)
                continue;
 
-             if (RBI (e->dest)->visited
-                 && RBI (e->dest)->visited != *n_traces)
+             if (e->dest->il.rtl->visited
+                 && e->dest->il.rtl->visited != *n_traces)
+               continue;
+
+             if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
                continue;
 
              prob = e->probability;
                continue;
 
              prob = e->probability;
-             freq = EDGE_FREQUENCY (e);
+             freq = e->dest->frequency;
+
+             /* The only sensible preference for a call instruction is the
+                fallthru edge.  Don't bother selecting anything else.  */
+             if (ends_in_call)
+               {
+                 if (e->flags & EDGE_CAN_FALLTHRU)
+                   {
+                     best_edge = e;
+                     best_prob = prob;
+                     best_freq = freq;
+                   }
+                 continue;
+               }
 
              /* Edge that cannot be fallthru or improbable or infrequent
 
              /* Edge that cannot be fallthru or improbable or infrequent
-                successor (ie. it is unsuitable successor).  */
+                successor (i.e. it is unsuitable successor).  */
              if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
              if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
-                 || prob < branch_th || freq < exec_th || e->count < count_th)
+                 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
+                 || e->count < count_th)
                continue;
 
                continue;
 
-             if (better_edge_p (bb, e, prob, freq, best_prob, best_freq))
+             /* If partitioning hot/cold basic blocks, don't consider edges
+                that cross section boundaries.  */
+
+             if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
+                                best_edge))
                {
                  best_edge = e;
                  best_prob = prob;
                {
                  best_edge = e;
                  best_prob = prob;
@@ -456,12 +540,19 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
                }
            }
 
                }
            }
 
+         /* If the best destination has multiple predecessors, and can be
+            duplicated cheaper than a jump, don't allow it to be added
+            to a trace.  We'll duplicate it when connecting traces.  */
+         if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
+             && copy_bb_p (best_edge->dest, 0))
+           best_edge = NULL;
+
          /* Add all non-selected successors to the heaps.  */
          /* Add all non-selected successors to the heaps.  */
-         for (e = bb->succ; e; e = e->succ_next)
+         FOR_EACH_EDGE (e, ei, bb->succs)
            {
              if (e == best_edge
                  || e->dest == EXIT_BLOCK_PTR
            {
              if (e == best_edge
                  || e->dest == EXIT_BLOCK_PTR
-                 || RBI (e->dest)->visited)
+                 || e->dest->il.rtl->visited)
                continue;
 
              key = bb_to_key (e->dest);
                continue;
 
              key = bb_to_key (e->dest);
@@ -471,9 +562,9 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
                  /* E->DEST is already in some heap.  */
                  if (key != bbd[e->dest->index].node->key)
                    {
                  /* E->DEST is already in some heap.  */
                  if (key != bbd[e->dest->index].node->key)
                    {
-                     if (rtl_dump_file)
+                     if (dump_file)
                        {
                        {
-                         fprintf (rtl_dump_file,
+                         fprintf (dump_file,
                                   "Changing key for bb %d from %ld to %ld.\n",
                                   e->dest->index,
                                   (long) bbd[e->dest->index].node->key,
                                   "Changing key for bb %d from %ld to %ld.\n",
                                   e->dest->index,
                                   (long) bbd[e->dest->index].node->key,
@@ -495,7 +586,13 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
                      || prob < branch_th || freq < exec_th
                      || e->count < count_th)
                    {
                      || prob < branch_th || freq < exec_th
                      || e->count < count_th)
                    {
-                     if (round < N_ROUNDS - 1)
+                     /* When partitioning hot/cold basic blocks, make sure
+                        the cold blocks (and only the cold blocks) all get
+                        pushed to the last round of trace collection.  */
+
+                     if (push_to_next_round_p (e->dest, round,
+                                               number_of_rounds,
+                                               exec_th, count_th))
                        which_heap = new_heap;
                    }
 
                        which_heap = new_heap;
                    }
 
@@ -503,9 +600,9 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
                  bbd[e->dest->index].node = fibheap_insert (which_heap,
                                                                key, e->dest);
 
                  bbd[e->dest->index].node = fibheap_insert (which_heap,
                                                                key, e->dest);
 
-                 if (rtl_dump_file)
+                 if (dump_file)
                    {
                    {
-                     fprintf (rtl_dump_file,
+                     fprintf (dump_file,
                               "  Possible start of %s round: %d (key: %ld)\n",
                               (which_heap == new_heap) ? "next" : "this",
                               e->dest->index, (long) key);
                               "  Possible start of %s round: %d (key: %ld)\n",
                               (which_heap == new_heap) ? "next" : "this",
                               e->dest->index, (long) key);
@@ -516,7 +613,7 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
 
          if (best_edge) /* Suitable successor was found.  */
            {
 
          if (best_edge) /* Suitable successor was found.  */
            {
-             if (RBI (best_edge->dest)->visited == *n_traces)
+             if (best_edge->dest->il.rtl->visited == *n_traces)
                {
                  /* We do nothing with one basic block loops.  */
                  if (best_edge->dest != bb)
                {
                  /* We do nothing with one basic block loops.  */
                  if (best_edge->dest != bb)
@@ -530,13 +627,15 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
 
                          if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
                            {
 
                          if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
                            {
-                             if (rtl_dump_file)
+                             if (dump_file)
                                {
                                {
-                                 fprintf (rtl_dump_file,
+                                 fprintf (dump_file,
                                           "Rotating loop %d - %d\n",
                                           best_edge->dest->index, bb->index);
                                }
                                           "Rotating loop %d - %d\n",
                                           best_edge->dest->index, bb->index);
                                }
-                             RBI (bb)->next = best_edge->dest;
+                             bb->aux = best_edge->dest;
+                             bbd[best_edge->dest->index].in_trace =
+                                                            (*n_traces) - 1;
                              bb = rotate_loop (best_edge, trace, *n_traces);
                            }
                        }
                              bb = rotate_loop (best_edge, trace, *n_traces);
                            }
                        }
@@ -544,19 +643,13 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
                        {
                          /* The loop has less than 4 iterations.  */
 
                        {
                          /* The loop has less than 4 iterations.  */
 
-                         /* Check whether there is another edge from BB.  */
-                         edge another_edge;
-                         for (another_edge = bb->succ;
-                              another_edge;
-                              another_edge = another_edge->succ_next)
-                           if (another_edge != best_edge)
-                             break;
-
-                         if (!another_edge && copy_bb_p (best_edge->dest,
-                                                         !optimize_size))
+                         if (single_succ_p (bb)
+                             && copy_bb_p (best_edge->dest,
+                                           optimize_edge_for_speed_p (best_edge)))
                            {
                              bb = copy_bb (best_edge->dest, best_edge, bb,
                                            *n_traces);
                            {
                              bb = copy_bb (best_edge->dest, best_edge, bb,
                                            *n_traces);
+                             trace->length++;
                            }
                        }
                    }
                            }
                        }
                    }
@@ -587,27 +680,29 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
 
                  */
 
 
                  */
 
-                 for (e = bb->succ; e; e = e->succ_next)
+                 FOR_EACH_EDGE (e, ei, bb->succs)
                    if (e != best_edge
                        && (e->flags & EDGE_CAN_FALLTHRU)
                        && !(e->flags & EDGE_COMPLEX)
                    if (e != best_edge
                        && (e->flags & EDGE_CAN_FALLTHRU)
                        && !(e->flags & EDGE_COMPLEX)
-                       && !RBI (e->dest)->visited
-                       && !e->dest->pred->pred_next
-                       && e->dest->succ
-                       && (e->dest->succ->flags & EDGE_CAN_FALLTHRU)
-                       && !(e->dest->succ->flags & EDGE_COMPLEX)
-                       && !e->dest->succ->succ_next
-                       && e->dest->succ->dest == best_edge->dest
+                       && !e->dest->il.rtl->visited
+                       && single_pred_p (e->dest)
+                       && !(e->flags & EDGE_CROSSING)
+                       && single_succ_p (e->dest)
+                       && (single_succ_edge (e->dest)->flags
+                           & EDGE_CAN_FALLTHRU)
+                       && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
+                       && single_succ (e->dest) == best_edge->dest
                        && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
                      {
                        best_edge = e;
                        && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
                      {
                        best_edge = e;
-                       if (rtl_dump_file)
-                         fprintf (rtl_dump_file, "Selecting BB %d\n",
+                       if (dump_file)
+                         fprintf (dump_file, "Selecting BB %d\n",
                                   best_edge->dest->index);
                        break;
                      }
 
                                   best_edge->dest->index);
                        break;
                      }
 
-                 RBI (bb)->next = best_edge->dest;
+                 bb->aux = best_edge->dest;
+                 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
                  bb = best_edge->dest;
                }
            }
                  bb = best_edge->dest;
                }
            }
@@ -620,10 +715,10 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
       /* The trace is terminated so we have to recount the keys in heap
         (some block can have a lower key because now one of its predecessors
         is an end of the trace).  */
       /* The trace is terminated so we have to recount the keys in heap
         (some block can have a lower key because now one of its predecessors
         is an end of the trace).  */
-      for (e = bb->succ; e; e = e->succ_next)
+      FOR_EACH_EDGE (e, ei, bb->succs)
        {
          if (e->dest == EXIT_BLOCK_PTR
        {
          if (e->dest == EXIT_BLOCK_PTR
-             || RBI (e->dest)->visited)
+             || e->dest->il.rtl->visited)
            continue;
 
          if (bbd[e->dest->index].heap)
            continue;
 
          if (bbd[e->dest->index].heap)
@@ -631,9 +726,9 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
              key = bb_to_key (e->dest);
              if (key != bbd[e->dest->index].node->key)
                {
              key = bb_to_key (e->dest);
              if (key != bbd[e->dest->index].node->key)
                {
-                 if (rtl_dump_file)
+                 if (dump_file)
                    {
                    {
-                     fprintf (rtl_dump_file,
+                     fprintf (dump_file,
                               "Changing key for bb %d from %ld to %ld.\n",
                               e->dest->index,
                               (long) bbd[e->dest->index].node->key, key);
                               "Changing key for bb %d from %ld to %ld.\n",
                               e->dest->index,
                               (long) bbd[e->dest->index].node->key, key);
@@ -657,26 +752,23 @@ find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 
 static basic_block
    (TRACE is a number of trace which OLD_BB is duplicated to).  */
 
 static basic_block
-copy_bb (old_bb, e, bb, trace)
-     basic_block old_bb;
-     edge e;
-     basic_block bb;
-     int trace;
+copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
 {
   basic_block new_bb;
 
 {
   basic_block new_bb;
 
-  new_bb = cfg_layout_duplicate_bb (old_bb, e);
-  if (e->dest != new_bb)
-    abort ();
-  if (RBI (e->dest)->visited)
-    abort ();
-  if (rtl_dump_file)
-    fprintf (rtl_dump_file,
+  new_bb = duplicate_block (old_bb, e, bb);
+  BB_COPY_PARTITION (new_bb, old_bb);
+
+  gcc_assert (e->dest == new_bb);
+  gcc_assert (!e->dest->il.rtl->visited);
+
+  if (dump_file)
+    fprintf (dump_file,
             "Duplicated bb %d (created bb %d)\n",
             old_bb->index, new_bb->index);
             "Duplicated bb %d (created bb %d)\n",
             old_bb->index, new_bb->index);
-  RBI (new_bb)->visited = trace;
-  RBI (new_bb)->next = RBI (bb)->next;
-  RBI (bb)->next = new_bb;
+  new_bb->il.rtl->visited = trace;
+  new_bb->aux = bb->aux;
+  bb->aux = new_bb;
 
   if (new_bb->index >= array_size || last_basic_block > array_size)
     {
 
   if (new_bb->index >= array_size || last_basic_block > array_size)
     {
@@ -685,44 +777,48 @@ copy_bb (old_bb, e, bb, trace)
 
       new_size = MAX (last_basic_block, new_bb->index + 1);
       new_size = GET_ARRAY_SIZE (new_size);
 
       new_size = MAX (last_basic_block, new_bb->index + 1);
       new_size = GET_ARRAY_SIZE (new_size);
-      bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
+      bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
       for (i = array_size; i < new_size; i++)
        {
          bbd[i].start_of_trace = -1;
       for (i = array_size; i < new_size; i++)
        {
          bbd[i].start_of_trace = -1;
+         bbd[i].in_trace = -1;
          bbd[i].end_of_trace = -1;
          bbd[i].heap = NULL;
          bbd[i].node = NULL;
        }
       array_size = new_size;
 
          bbd[i].end_of_trace = -1;
          bbd[i].heap = NULL;
          bbd[i].node = NULL;
        }
       array_size = new_size;
 
-      if (rtl_dump_file)
+      if (dump_file)
        {
        {
-         fprintf (rtl_dump_file,
+         fprintf (dump_file,
                   "Growing the dynamic array to %d elements.\n",
                   array_size);
        }
     }
 
                   "Growing the dynamic array to %d elements.\n",
                   array_size);
        }
     }
 
+  bbd[new_bb->index].in_trace = trace;
+
   return new_bb;
 }
 
 /* Compute and return the key (for the heap) of the basic block BB.  */
 
 static fibheapkey_t
   return new_bb;
 }
 
 /* Compute and return the key (for the heap) of the basic block BB.  */
 
 static fibheapkey_t
-bb_to_key (bb)
-     basic_block bb;
+bb_to_key (basic_block bb)
 {
   edge e;
 {
   edge e;
-
+  edge_iterator ei;
   int priority = 0;
 
   /* Do not start in probably never executed blocks.  */
   int priority = 0;
 
   /* Do not start in probably never executed blocks.  */
-  if (probably_never_executed_bb_p (bb))
+
+  if (BB_PARTITION (bb) == BB_COLD_PARTITION
+      || probably_never_executed_bb_p (bb))
     return BB_FREQ_MAX;
 
   /* Prefer blocks whose predecessor is an end of some trace
      or whose predecessor edge is EDGE_DFS_BACK.  */
     return BB_FREQ_MAX;
 
   /* Prefer blocks whose predecessor is an end of some trace
      or whose predecessor edge is EDGE_DFS_BACK.  */
-  for (e = bb->pred; e; e = e->pred_next)
+  FOR_EACH_EDGE (e, ei, bb->preds)
     {
       if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
          || (e->flags & EDGE_DFS_BACK))
     {
       if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
          || (e->flags & EDGE_DFS_BACK))
@@ -748,13 +844,8 @@ bb_to_key (bb)
    BEST_PROB; similarly for frequency.  */
 
 static bool
    BEST_PROB; similarly for frequency.  */
 
 static bool
-better_edge_p (bb, e, prob, freq, best_prob, best_freq)
-     basic_block bb;
-     edge e;
-     int prob;
-     int freq;
-     int best_prob;
-     int best_freq;
+better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
+              int best_freq, const_edge cur_best_edge)
 {
   bool is_better_edge;
 
 {
   bool is_better_edge;
 
@@ -785,19 +876,30 @@ better_edge_p (bb, e, prob, freq, best_prob, best_freq)
   else
     is_better_edge = false;
 
   else
     is_better_edge = false;
 
+  /* If we are doing hot/cold partitioning, make sure that we always favor
+     non-crossing edges over crossing edges.  */
+
+  if (!is_better_edge
+      && flag_reorder_blocks_and_partition
+      && cur_best_edge
+      && (cur_best_edge->flags & EDGE_CROSSING)
+      && !(e->flags & EDGE_CROSSING))
+    is_better_edge = true;
+
   return is_better_edge;
 }
 
 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
 
 static void
   return is_better_edge;
 }
 
 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
 
 static void
-connect_traces (n_traces, traces)
-     int n_traces;
-     struct trace *traces;
+connect_traces (int n_traces, struct trace *traces)
 {
   int i;
   bool *connected;
 {
   int i;
   bool *connected;
+  bool two_passes;
   int last_trace;
   int last_trace;
+  int current_pass;
+  int current_partition;
   int freq_threshold;
   gcov_type count_threshold;
 
   int freq_threshold;
   gcov_type count_threshold;
 
@@ -807,26 +909,53 @@ connect_traces (n_traces, traces)
   else
     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
 
   else
     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
 
-  connected = xcalloc (n_traces, sizeof (bool));
+  connected = XCNEWVEC (bool, n_traces);
   last_trace = -1;
   last_trace = -1;
-  for (i = 0; i < n_traces; i++)
+  current_pass = 1;
+  current_partition = BB_PARTITION (traces[0].first);
+  two_passes = false;
+
+  if (flag_reorder_blocks_and_partition)
+    for (i = 0; i < n_traces && !two_passes; i++)
+      if (BB_PARTITION (traces[0].first)
+         != BB_PARTITION (traces[i].first))
+       two_passes = true;
+
+  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
     {
       int t = i;
       int t2;
       edge e, best;
       int best_len;
 
     {
       int t = i;
       int t2;
       edge e, best;
       int best_len;
 
+      if (i >= n_traces)
+       {
+         gcc_assert (two_passes && current_pass == 1);
+         i = 0;
+         t = i;
+         current_pass = 2;
+         if (current_partition == BB_HOT_PARTITION)
+           current_partition = BB_COLD_PARTITION;
+         else
+           current_partition = BB_HOT_PARTITION;
+       }
+
       if (connected[t])
        continue;
 
       if (connected[t])
        continue;
 
+      if (two_passes
+         && BB_PARTITION (traces[t].first) != current_partition)
+       continue;
+
       connected[t] = true;
 
       /* Find the predecessor traces.  */
       for (t2 = t; t2 > 0;)
        {
       connected[t] = true;
 
       /* Find the predecessor traces.  */
       for (t2 = t; t2 > 0;)
        {
+         edge_iterator ei;
          best = NULL;
          best_len = 0;
          best = NULL;
          best_len = 0;
-         for (e = traces[t2].first->pred; e; e = e->pred_next)
+         FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
            {
              int si = e->src->index;
 
            {
              int si = e->src->index;
 
@@ -835,6 +964,7 @@ connect_traces (n_traces, traces)
                  && !(e->flags & EDGE_COMPLEX)
                  && bbd[si].end_of_trace >= 0
                  && !connected[bbd[si].end_of_trace]
                  && !(e->flags & EDGE_COMPLEX)
                  && bbd[si].end_of_trace >= 0
                  && !connected[bbd[si].end_of_trace]
+                 && (BB_PARTITION (e->src) == current_partition)
                  && (!best
                      || e->probability > best->probability
                      || (e->probability == best->probability
                  && (!best
                      || e->probability > best->probability
                      || (e->probability == best->probability
@@ -846,12 +976,13 @@ connect_traces (n_traces, traces)
            }
          if (best)
            {
            }
          if (best)
            {
-             RBI (best->src)->next = best->dest;
+             best->src->aux = best->dest;
              t2 = bbd[best->src->index].end_of_trace;
              connected[t2] = true;
              t2 = bbd[best->src->index].end_of_trace;
              connected[t2] = true;
-             if (rtl_dump_file)
+
+             if (dump_file)
                {
                {
-                 fprintf (rtl_dump_file, "Connection: %d %d\n",
+                 fprintf (dump_file, "Connection: %d %d\n",
                           best->src->index, best->dest->index);
                }
            }
                           best->src->index, best->dest->index);
                }
            }
@@ -860,16 +991,17 @@ connect_traces (n_traces, traces)
        }
 
       if (last_trace >= 0)
        }
 
       if (last_trace >= 0)
-       RBI (traces[last_trace].last)->next = traces[t2].first;
+       traces[last_trace].last->aux = traces[t2].first;
       last_trace = t;
 
       /* Find the successor traces.  */
       while (1)
        {
          /* Find the continuation of the chain.  */
       last_trace = t;
 
       /* Find the successor traces.  */
       while (1)
        {
          /* Find the continuation of the chain.  */
+         edge_iterator ei;
          best = NULL;
          best_len = 0;
          best = NULL;
          best_len = 0;
-         for (e = traces[t].last->succ; e; e = e->succ_next)
+         FOR_EACH_EDGE (e, ei, traces[t].last->succs)
            {
              int di = e->dest->index;
 
            {
              int di = e->dest->index;
 
@@ -878,6 +1010,7 @@ connect_traces (n_traces, traces)
                  && !(e->flags & EDGE_COMPLEX)
                  && bbd[di].start_of_trace >= 0
                  && !connected[bbd[di].start_of_trace]
                  && !(e->flags & EDGE_COMPLEX)
                  && bbd[di].start_of_trace >= 0
                  && !connected[bbd[di].start_of_trace]
+                 && (BB_PARTITION (e->dest) == current_partition)
                  && (!best
                      || e->probability > best->probability
                      || (e->probability == best->probability
                  && (!best
                      || e->probability > best->probability
                      || (e->probability == best->probability
@@ -890,13 +1023,13 @@ connect_traces (n_traces, traces)
 
          if (best)
            {
 
          if (best)
            {
-             if (rtl_dump_file)
+             if (dump_file)
                {
                {
-                 fprintf (rtl_dump_file, "Connection: %d %d\n",
+                 fprintf (dump_file, "Connection: %d %d\n",
                           best->src->index, best->dest->index);
                }
              t = bbd[best->dest->index].start_of_trace;
                           best->src->index, best->dest->index);
                }
              t = bbd[best->dest->index].start_of_trace;
-             RBI (traces[last_trace].last)->next = traces[t].first;
+             traces[last_trace].last->aux = traces[t].first;
              connected[t] = true;
              last_trace = t;
            }
              connected[t] = true;
              last_trace = t;
            }
@@ -905,20 +1038,31 @@ connect_traces (n_traces, traces)
              /* Try to connect the traces by duplication of 1 block.  */
              edge e2;
              basic_block next_bb = NULL;
              /* Try to connect the traces by duplication of 1 block.  */
              edge e2;
              basic_block next_bb = NULL;
+             bool try_copy = false;
 
 
-             for (e = traces[t].last->succ; e; e = e->succ_next)
+             FOR_EACH_EDGE (e, ei, traces[t].last->succs)
                if (e->dest != EXIT_BLOCK_PTR
                    && (e->flags & EDGE_CAN_FALLTHRU)
                    && !(e->flags & EDGE_COMPLEX)
                if (e->dest != EXIT_BLOCK_PTR
                    && (e->flags & EDGE_CAN_FALLTHRU)
                    && !(e->flags & EDGE_COMPLEX)
-                   && (EDGE_FREQUENCY (e) >= freq_threshold)
-                   && (e->count >= count_threshold)
-                   && (!best
-                       || e->probability > best->probability))
+                   && (!best || e->probability > best->probability))
                  {
                  {
+                   edge_iterator ei;
                    edge best2 = NULL;
                    int best2_len = 0;
 
                    edge best2 = NULL;
                    int best2_len = 0;
 
-                   for (e2 = e->dest->succ; e2; e2 = e2->succ_next)
+                   /* If the destination is a start of a trace which is only
+                      one block long, then no need to search the successor
+                      blocks of the trace.  Accept it.  */
+                   if (bbd[e->dest->index].start_of_trace >= 0
+                       && traces[bbd[e->dest->index].start_of_trace].length
+                          == 1)
+                     {
+                       best = e;
+                       try_copy = true;
+                       continue;
+                     }
+
+                   FOR_EACH_EDGE (e2, ei, e->dest->succs)
                      {
                        int di = e2->dest->index;
 
                      {
                        int di = e2->dest->index;
 
@@ -927,6 +1071,7 @@ connect_traces (n_traces, traces)
                                && !(e2->flags & EDGE_COMPLEX)
                                && bbd[di].start_of_trace >= 0
                                && !connected[bbd[di].start_of_trace]
                                && !(e2->flags & EDGE_COMPLEX)
                                && bbd[di].start_of_trace >= 0
                                && !connected[bbd[di].start_of_trace]
+                               && (BB_PARTITION (e2->dest) == current_partition)
                                && (EDGE_FREQUENCY (e2) >= freq_threshold)
                                && (e2->count >= count_threshold)
                                && (!best2
                                && (EDGE_FREQUENCY (e2) >= freq_threshold)
                                && (e2->count >= count_threshold)
                                && (!best2
@@ -942,29 +1087,42 @@ connect_traces (n_traces, traces)
                            else
                              best2_len = INT_MAX;
                            next_bb = e2->dest;
                            else
                              best2_len = INT_MAX;
                            next_bb = e2->dest;
+                           try_copy = true;
                          }
                      }
                  }
                          }
                      }
                  }
-             if (best && next_bb && copy_bb_p (best->dest, !optimize_size))
+
+             if (flag_reorder_blocks_and_partition)
+               try_copy = false;
+
+             /* Copy tiny blocks always; copy larger blocks only when the
+                edge is traversed frequently enough.  */
+             if (try_copy
+                 && copy_bb_p (best->dest,
+                               optimize_edge_for_speed_p (best)
+                               && EDGE_FREQUENCY (best) >= freq_threshold
+                               && best->count >= count_threshold))
                {
                  basic_block new_bb;
 
                {
                  basic_block new_bb;
 
-                 if (rtl_dump_file)
+                 if (dump_file)
                    {
                    {
-                     fprintf (rtl_dump_file, "Connection: %d %d ",
+                     fprintf (dump_file, "Connection: %d %d ",
                               traces[t].last->index, best->dest->index);
                               traces[t].last->index, best->dest->index);
-                     if (next_bb == EXIT_BLOCK_PTR)
-                       fprintf (rtl_dump_file, "exit\n");
+                     if (!next_bb)
+                       fputc ('\n', dump_file);
+                     else if (next_bb == EXIT_BLOCK_PTR)
+                       fprintf (dump_file, "exit\n");
                      else
                      else
-                       fprintf (rtl_dump_file, "%d\n", next_bb->index);
+                       fprintf (dump_file, "%d\n", next_bb->index);
                    }
 
                  new_bb = copy_bb (best->dest, best, traces[t].last, t);
                  traces[t].last = new_bb;
                    }
 
                  new_bb = copy_bb (best->dest, best, traces[t].last, t);
                  traces[t].last = new_bb;
-                 if (next_bb != EXIT_BLOCK_PTR)
+                 if (next_bb && next_bb != EXIT_BLOCK_PTR)
                    {
                      t = bbd[next_bb->index].start_of_trace;
                    {
                      t = bbd[next_bb->index].start_of_trace;
-                     RBI (traces[last_trace].last)->next = traces[t].first;
+                     traces[last_trace].last->aux = traces[t].first;
                      connected[t] = true;
                      last_trace = t;
                    }
                      connected[t] = true;
                      last_trace = t;
                    }
@@ -977,15 +1135,15 @@ connect_traces (n_traces, traces)
        }
     }
 
        }
     }
 
-  if (rtl_dump_file)
+  if (dump_file)
     {
       basic_block bb;
 
     {
       basic_block bb;
 
-      fprintf (rtl_dump_file, "Final order:\n");
-      for (bb = traces[0].first; bb; bb = RBI (bb)->next)
-       fprintf (rtl_dump_file, "%d ", bb->index);
-      fprintf (rtl_dump_file, "\n");
-      fflush (rtl_dump_file);
+      fprintf (dump_file, "Final order:\n");
+      for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
+       fprintf (dump_file, "%d ", bb->index);
+      fprintf (dump_file, "\n");
+      fflush (dump_file);
     }
 
   FREE (connected);
     }
 
   FREE (connected);
@@ -995,9 +1153,7 @@ connect_traces (n_traces, traces)
    when code size is allowed to grow by duplication.  */
 
 static bool
    when code size is allowed to grow by duplication.  */
 
 static bool
-copy_bb_p (bb, code_may_grow)
-     basic_block bb;
-     int code_may_grow;
+copy_bb_p (const_basic_block bb, int code_may_grow)
 {
   int size = 0;
   int max_size = uncond_jump_length;
 {
   int size = 0;
   int max_size = uncond_jump_length;
@@ -1005,27 +1161,30 @@ copy_bb_p (bb, code_may_grow)
 
   if (!bb->frequency)
     return false;
 
   if (!bb->frequency)
     return false;
-  if (!bb->pred || !bb->pred->pred_next)
+  if (EDGE_COUNT (bb->preds) < 2)
+    return false;
+  if (!can_duplicate_block_p (bb))
     return false;
     return false;
-  if (!cfg_layout_can_duplicate_bb_p (bb))
+
+  /* Avoid duplicating blocks which have many successors (PR/13430).  */
+  if (EDGE_COUNT (bb->succs) > 8)
     return false;
 
     return false;
 
-  if (code_may_grow && maybe_hot_bb_p (bb))
-    max_size *= 8;
+  if (code_may_grow && optimize_bb_for_speed_p (bb))
+    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
 
 
-  for (insn = bb->head; insn != NEXT_INSN (bb->end);
-       insn = NEXT_INSN (insn))
+  FOR_BB_INSNS (bb, insn)
     {
       if (INSN_P (insn))
     {
       if (INSN_P (insn))
-       size += get_attr_length (insn);
+       size += get_attr_min_length (insn);
     }
 
   if (size <= max_size)
     return true;
 
     }
 
   if (size <= max_size)
     return true;
 
-  if (rtl_dump_file)
+  if (dump_file)
     {
     {
-      fprintf (rtl_dump_file,
+      fprintf (dump_file,
               "Block %d can't be copied because its size = %d.\n",
               bb->index, size);
     }
               "Block %d can't be copied because its size = %d.\n",
               bb->index, size);
     }
@@ -1036,7 +1195,7 @@ copy_bb_p (bb, code_may_grow)
 /* Return the length of unconditional jump instruction.  */
 
 static int
 /* Return the length of unconditional jump instruction.  */
 
 static int
-get_uncond_jump_length ()
+get_uncond_jump_length (void)
 {
   rtx label, jump;
   int length;
 {
   rtx label, jump;
   int length;
@@ -1044,58 +1203,1092 @@ get_uncond_jump_length ()
   label = emit_label_before (gen_label_rtx (), get_insns ());
   jump = emit_jump_insn (gen_jump (label));
 
   label = emit_label_before (gen_label_rtx (), get_insns ());
   jump = emit_jump_insn (gen_jump (label));
 
-  length = get_attr_length (jump);
+  length = get_attr_min_length (jump);
 
   delete_insn (jump);
   delete_insn (label);
   return length;
 }
 
 
   delete_insn (jump);
   delete_insn (label);
   return length;
 }
 
-/* Reorder basic blocks.  The main entry point to this file.  */
+/* Find the basic blocks that are rarely executed and need to be moved to
+   a separate section of the .o file (to cut down on paging and improve
+   cache locality).  */
+
+static void
+find_rarely_executed_basic_blocks_and_crossing_edges (edge **crossing_edges,
+                                                     int *n_crossing_edges,
+                                                     int *max_idx)
+{
+  basic_block bb;
+  edge e;
+  int i;
+  edge_iterator ei;
+
+  /* Mark which partition (hot/cold) each basic block belongs in.  */
+
+  FOR_EACH_BB (bb)
+    {
+      if (probably_never_executed_bb_p (bb))
+       BB_SET_PARTITION (bb, BB_COLD_PARTITION);
+      else
+       BB_SET_PARTITION (bb, BB_HOT_PARTITION);
+    }
+
+  /* Mark every edge that crosses between sections.  */
+
+  i = 0;
+  FOR_EACH_BB (bb)
+    FOR_EACH_EDGE (e, ei, bb->succs)
+    {
+      if (e->src != ENTRY_BLOCK_PTR
+         && e->dest != EXIT_BLOCK_PTR
+         && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
+       {
+         e->flags |= EDGE_CROSSING;
+         if (i == *max_idx)
+           {
+             *max_idx *= 2;
+             *crossing_edges = XRESIZEVEC (edge, *crossing_edges, *max_idx);
+           }
+         (*crossing_edges)[i++] = e;
+       }
+      else
+       e->flags &= ~EDGE_CROSSING;
+    }
+  *n_crossing_edges = i;
+}
+
+/* If any destination of a crossing edge does not have a label, add label;
+   Convert any fall-through crossing edges (for blocks that do not contain
+   a jump) to unconditional jumps.  */
+
+static void
+add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
+{
+  int i;
+  basic_block src;
+  basic_block dest;
+  rtx label;
+  rtx barrier;
+  rtx new_jump;
+
+  for (i=0; i < n_crossing_edges; i++)
+    {
+      if (crossing_edges[i])
+       {
+         src = crossing_edges[i]->src;
+         dest = crossing_edges[i]->dest;
+
+         /* Make sure dest has a label.  */
+
+         if (dest && (dest != EXIT_BLOCK_PTR))
+           {
+             label = block_label (dest);
+
+             /* Make sure source block ends with a jump.  If the
+                source block does not end with a jump it might end
+                with a call_insn;  this case will be handled in
+                fix_up_fall_thru_edges function.  */
+
+             if (src && (src != ENTRY_BLOCK_PTR))
+               {
+                 if (!JUMP_P (BB_END (src)) && !block_ends_with_call_p (src))
+                   /* bb just falls through.  */
+                   {
+                     /* make sure there's only one successor */
+                     gcc_assert (single_succ_p (src));
+
+                     /* Find label in dest block.  */
+                     label = block_label (dest);
+
+                     new_jump = emit_jump_insn_after (gen_jump (label),
+                                                      BB_END (src));
+                     barrier = emit_barrier_after (new_jump);
+                     JUMP_LABEL (new_jump) = label;
+                     LABEL_NUSES (label) += 1;
+                     src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
+                     /* Mark edge as non-fallthru.  */
+                     crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
+                   } /* end: 'if (GET_CODE ... '  */
+               } /* end: 'if (src && src->index...'  */
+           } /* end: 'if (dest && dest->index...'  */
+       } /* end: 'if (crossing_edges[i]...'  */
+    } /* end for loop  */
+}
+
+/* Find any bb's where the fall-through edge is a crossing edge (note that
+   these bb's must also contain a conditional jump or end with a call
+   instruction; we've already dealt with fall-through edges for blocks
+   that didn't have a conditional jump or didn't end with call instruction
+   in the call to add_labels_and_missing_jumps).  Convert the fall-through
+   edge to non-crossing edge by inserting a new bb to fall-through into.
+   The new bb will contain an unconditional jump (crossing edge) to the
+   original fall through destination.  */
+
+static void
+fix_up_fall_thru_edges (void)
+{
+  basic_block cur_bb;
+  basic_block new_bb;
+  edge succ1;
+  edge succ2;
+  edge fall_thru;
+  edge cond_jump = NULL;
+  edge e;
+  bool cond_jump_crosses;
+  int invert_worked;
+  rtx old_jump;
+  rtx fall_thru_label;
+  rtx barrier;
+
+  FOR_EACH_BB (cur_bb)
+    {
+      fall_thru = NULL;
+      if (EDGE_COUNT (cur_bb->succs) > 0)
+       succ1 = EDGE_SUCC (cur_bb, 0);
+      else
+       succ1 = NULL;
+
+      if (EDGE_COUNT (cur_bb->succs) > 1)
+       succ2 = EDGE_SUCC (cur_bb, 1);
+      else
+       succ2 = NULL;
+
+      /* Find the fall-through edge.  */
+
+      if (succ1
+         && (succ1->flags & EDGE_FALLTHRU))
+       {
+         fall_thru = succ1;
+         cond_jump = succ2;
+       }
+      else if (succ2
+              && (succ2->flags & EDGE_FALLTHRU))
+       {
+         fall_thru = succ2;
+         cond_jump = succ1;
+       }
+      else if (!fall_thru && succ1 && block_ends_with_call_p (cur_bb))
+      {
+        edge e;
+        edge_iterator ei;
+
+        /* Find EDGE_CAN_FALLTHRU edge.  */
+        FOR_EACH_EDGE (e, ei, cur_bb->succs)
+          if (e->flags & EDGE_CAN_FALLTHRU)
+          {
+            fall_thru = e;
+            break;
+          }
+      }
+
+      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
+       {
+         /* Check to see if the fall-thru edge is a crossing edge.  */
+
+         if (fall_thru->flags & EDGE_CROSSING)
+           {
+             /* The fall_thru edge crosses; now check the cond jump edge, if
+                it exists.  */
+
+             cond_jump_crosses = true;
+             invert_worked  = 0;
+             old_jump = BB_END (cur_bb);
+
+             /* Find the jump instruction, if there is one.  */
+
+             if (cond_jump)
+               {
+                 if (!(cond_jump->flags & EDGE_CROSSING))
+                   cond_jump_crosses = false;
+
+                 /* We know the fall-thru edge crosses; if the cond
+                    jump edge does NOT cross, and its destination is the
+                    next block in the bb order, invert the jump
+                    (i.e. fix it so the fall thru does not cross and
+                    the cond jump does).  */
+
+                 if (!cond_jump_crosses
+                     && cur_bb->aux == cond_jump->dest)
+                   {
+                     /* Find label in fall_thru block. We've already added
+                        any missing labels, so there must be one.  */
+
+                     fall_thru_label = block_label (fall_thru->dest);
+
+                     if (old_jump && JUMP_P (old_jump) && fall_thru_label)
+                       invert_worked = invert_jump (old_jump,
+                                                    fall_thru_label,0);
+                     if (invert_worked)
+                       {
+                         fall_thru->flags &= ~EDGE_FALLTHRU;
+                         cond_jump->flags |= EDGE_FALLTHRU;
+                         update_br_prob_note (cur_bb);
+                         e = fall_thru;
+                         fall_thru = cond_jump;
+                         cond_jump = e;
+                         cond_jump->flags |= EDGE_CROSSING;
+                         fall_thru->flags &= ~EDGE_CROSSING;
+                       }
+                   }
+               }
+
+             if (cond_jump_crosses || !invert_worked)
+               {
+                 /* This is the case where both edges out of the basic
+                    block are crossing edges. Here we will fix up the
+                    fall through edge. The jump edge will be taken care
+                    of later.  The EDGE_CROSSING flag of fall_thru edge
+                     is unset before the call to force_nonfallthru
+                     function because if a new basic-block is created
+                     this edge remains in the current section boundary
+                     while the edge between new_bb and the fall_thru->dest
+                     becomes EDGE_CROSSING.  */
+
+                  fall_thru->flags &= ~EDGE_CROSSING;
+                 new_bb = force_nonfallthru (fall_thru);
+
+                 if (new_bb)
+                   {
+                     new_bb->aux = cur_bb->aux;
+                     cur_bb->aux = new_bb;
+
+                     /* Make sure new fall-through bb is in same
+                        partition as bb it's falling through from.  */
+
+                     BB_COPY_PARTITION (new_bb, cur_bb);
+                     single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
+                   }
+                  else
+                    {
+                      /* If a new basic-block was not created; restore
+                         the EDGE_CROSSING flag.  */
+                      fall_thru->flags |= EDGE_CROSSING;
+                    }
+
+                 /* Add barrier after new jump */
+
+                 if (new_bb)
+                   {
+                     barrier = emit_barrier_after (BB_END (new_bb));
+                     new_bb->il.rtl->footer = unlink_insn_chain (barrier,
+                                                              barrier);
+                   }
+                 else
+                   {
+                     barrier = emit_barrier_after (BB_END (cur_bb));
+                     cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
+                                                              barrier);
+                   }
+               }
+           }
+       }
+    }
+}
+
+/* This function checks the destination block of a "crossing jump" to
+   see if it has any crossing predecessors that begin with a code label
+   and end with an unconditional jump.  If so, it returns that predecessor
+   block.  (This is to avoid creating lots of new basic blocks that all
+   contain unconditional jumps to the same destination).  */
+
+static basic_block
+find_jump_block (basic_block jump_dest)
+{
+  basic_block source_bb = NULL;
+  edge e;
+  rtx insn;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, jump_dest->preds)
+    if (e->flags & EDGE_CROSSING)
+      {
+       basic_block src = e->src;
+
+       /* Check each predecessor to see if it has a label, and contains
+          only one executable instruction, which is an unconditional jump.
+          If so, we can use it.  */
+
+       if (LABEL_P (BB_HEAD (src)))
+         for (insn = BB_HEAD (src);
+              !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
+              insn = NEXT_INSN (insn))
+           {
+             if (INSN_P (insn)
+                 && insn == BB_END (src)
+                 && JUMP_P (insn)
+                 && !any_condjump_p (insn))
+               {
+                 source_bb = src;
+                 break;
+               }
+           }
+
+       if (source_bb)
+         break;
+      }
+
+  return source_bb;
+}
+
+/* Find all BB's with conditional jumps that are crossing edges;
+   insert a new bb and make the conditional jump branch to the new
+   bb instead (make the new bb same color so conditional branch won't
+   be a 'crossing' edge).  Insert an unconditional jump from the
+   new bb to the original destination of the conditional jump.  */
+
+static void
+fix_crossing_conditional_branches (void)
+{
+  basic_block cur_bb;
+  basic_block new_bb;
+  basic_block last_bb;
+  basic_block dest;
+  edge succ1;
+  edge succ2;
+  edge crossing_edge;
+  edge new_edge;
+  rtx old_jump;
+  rtx set_src;
+  rtx old_label = NULL_RTX;
+  rtx new_label;
+  rtx new_jump;
+  rtx barrier;
+
+ last_bb = EXIT_BLOCK_PTR->prev_bb;
+
+  FOR_EACH_BB (cur_bb)
+    {
+      crossing_edge = NULL;
+      if (EDGE_COUNT (cur_bb->succs) > 0)
+       succ1 = EDGE_SUCC (cur_bb, 0);
+      else
+       succ1 = NULL;
+
+      if (EDGE_COUNT (cur_bb->succs) > 1)
+       succ2 = EDGE_SUCC (cur_bb, 1);
+      else
+       succ2 = NULL;
+
+      /* We already took care of fall-through edges, so only one successor
+        can be a crossing edge.  */
+
+      if (succ1 && (succ1->flags & EDGE_CROSSING))
+       crossing_edge = succ1;
+      else if (succ2 && (succ2->flags & EDGE_CROSSING))
+       crossing_edge = succ2;
+
+      if (crossing_edge)
+       {
+         old_jump = BB_END (cur_bb);
+
+         /* Check to make sure the jump instruction is a
+            conditional jump.  */
+
+         set_src = NULL_RTX;
+
+         if (any_condjump_p (old_jump))
+           {
+             if (GET_CODE (PATTERN (old_jump)) == SET)
+               set_src = SET_SRC (PATTERN (old_jump));
+             else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
+               {
+                 set_src = XVECEXP (PATTERN (old_jump), 0,0);
+                 if (GET_CODE (set_src) == SET)
+                   set_src = SET_SRC (set_src);
+                 else
+                   set_src = NULL_RTX;
+               }
+           }
+
+         if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
+           {
+             if (GET_CODE (XEXP (set_src, 1)) == PC)
+               old_label = XEXP (set_src, 2);
+             else if (GET_CODE (XEXP (set_src, 2)) == PC)
+               old_label = XEXP (set_src, 1);
+
+             /* Check to see if new bb for jumping to that dest has
+                already been created; if so, use it; if not, create
+                a new one.  */
+
+             new_bb = find_jump_block (crossing_edge->dest);
+
+             if (new_bb)
+               new_label = block_label (new_bb);
+             else
+               {
+                 /* Create new basic block to be dest for
+                    conditional jump.  */
+
+                 new_bb = create_basic_block (NULL, NULL, last_bb);
+                 new_bb->aux = last_bb->aux;
+                 last_bb->aux = new_bb;
+                 last_bb = new_bb;
+                 /* Put appropriate instructions in new bb.  */
+
+                 new_label = gen_label_rtx ();
+                 emit_label_before (new_label, BB_HEAD (new_bb));
+                 BB_HEAD (new_bb) = new_label;
+
+                 if (GET_CODE (old_label) == LABEL_REF)
+                   {
+                     old_label = JUMP_LABEL (old_jump);
+                     new_jump = emit_jump_insn_after (gen_jump
+                                                      (old_label),
+                                                      BB_END (new_bb));
+                   }
+                 else
+                   {
+                     gcc_assert (HAVE_return
+                                 && GET_CODE (old_label) == RETURN);
+                     new_jump = emit_jump_insn_after (gen_return (),
+                                                      BB_END (new_bb));
+                   }
+
+                 barrier = emit_barrier_after (new_jump);
+                 JUMP_LABEL (new_jump) = old_label;
+                 new_bb->il.rtl->footer = unlink_insn_chain (barrier,
+                                                          barrier);
+
+                 /* Make sure new bb is in same partition as source
+                    of conditional branch.  */
+                 BB_COPY_PARTITION (new_bb, cur_bb);
+               }
+
+             /* Make old jump branch to new bb.  */
+
+             redirect_jump (old_jump, new_label, 0);
+
+             /* Remove crossing_edge as predecessor of 'dest'.  */
+
+             dest = crossing_edge->dest;
+
+             redirect_edge_succ (crossing_edge, new_bb);
+
+             /* Make a new edge from new_bb to old dest; new edge
+                will be a successor for new_bb and a predecessor
+                for 'dest'.  */
+
+             if (EDGE_COUNT (new_bb->succs) == 0)
+               new_edge = make_edge (new_bb, dest, 0);
+             else
+               new_edge = EDGE_SUCC (new_bb, 0);
+
+             crossing_edge->flags &= ~EDGE_CROSSING;
+             new_edge->flags |= EDGE_CROSSING;
+           }
+       }
+    }
+}
+
+/* Find any unconditional branches that cross between hot and cold
+   sections.  Convert them into indirect jumps instead.  */
+
+static void
+fix_crossing_unconditional_branches (void)
+{
+  basic_block cur_bb;
+  rtx last_insn;
+  rtx label;
+  rtx label_addr;
+  rtx indirect_jump_sequence;
+  rtx jump_insn = NULL_RTX;
+  rtx new_reg;
+  rtx cur_insn;
+  edge succ;
+
+  FOR_EACH_BB (cur_bb)
+    {
+      last_insn = BB_END (cur_bb);
+
+      if (EDGE_COUNT (cur_bb->succs) < 1)
+       continue;
+
+      succ = EDGE_SUCC (cur_bb, 0);
+
+      /* Check to see if bb ends in a crossing (unconditional) jump.  At
+        this point, no crossing jumps should be conditional.  */
+
+      if (JUMP_P (last_insn)
+         && (succ->flags & EDGE_CROSSING))
+       {
+         rtx label2, table;
+
+         gcc_assert (!any_condjump_p (last_insn));
+
+         /* Make sure the jump is not already an indirect or table jump.  */
+
+         if (!computed_jump_p (last_insn)
+             && !tablejump_p (last_insn, &label2, &table))
+           {
+             /* We have found a "crossing" unconditional branch.  Now
+                we must convert it to an indirect jump.  First create
+                reference of label, as target for jump.  */
+
+             label = JUMP_LABEL (last_insn);
+             label_addr = gen_rtx_LABEL_REF (Pmode, label);
+             LABEL_NUSES (label) += 1;
+
+             /* Get a register to use for the indirect jump.  */
+
+             new_reg = gen_reg_rtx (Pmode);
+
+             /* Generate indirect the jump sequence.  */
+
+             start_sequence ();
+             emit_move_insn (new_reg, label_addr);
+             emit_indirect_jump (new_reg);
+             indirect_jump_sequence = get_insns ();
+             end_sequence ();
+
+             /* Make sure every instruction in the new jump sequence has
+                its basic block set to be cur_bb.  */
+
+             for (cur_insn = indirect_jump_sequence; cur_insn;
+                  cur_insn = NEXT_INSN (cur_insn))
+               {
+                 if (!BARRIER_P (cur_insn))
+                   BLOCK_FOR_INSN (cur_insn) = cur_bb;
+                 if (JUMP_P (cur_insn))
+                   jump_insn = cur_insn;
+               }
+
+             /* Insert the new (indirect) jump sequence immediately before
+                the unconditional jump, then delete the unconditional jump.  */
+
+             emit_insn_before (indirect_jump_sequence, last_insn);
+             delete_insn (last_insn);
+
+             /* Make BB_END for cur_bb be the jump instruction (NOT the
+                barrier instruction at the end of the sequence...).  */
+
+             BB_END (cur_bb) = jump_insn;
+           }
+       }
+    }
+}
+
+/* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
+
+static void
+add_reg_crossing_jump_notes (void)
+{
+  basic_block bb;
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_BB (bb)
+    FOR_EACH_EDGE (e, ei, bb->succs)
+      if ((e->flags & EDGE_CROSSING)
+         && JUMP_P (BB_END (e->src)))
+       add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
+}
+
+/* Hot and cold basic blocks are partitioned and put in separate
+   sections of the .o file, to reduce paging and improve cache
+   performance (hopefully).  This can result in bits of code from the
+   same function being widely separated in the .o file.  However this
+   is not obvious to the current bb structure.  Therefore we must take
+   care to ensure that: 1). There are no fall_thru edges that cross
+   between sections; 2). For those architectures which have "short"
+   conditional branches, all conditional branches that attempt to
+   cross between sections are converted to unconditional branches;
+   and, 3). For those architectures which have "short" unconditional
+   branches, all unconditional branches that attempt to cross between
+   sections are converted to indirect jumps.
+
+   The code for fixing up fall_thru edges that cross between hot and
+   cold basic blocks does so by creating new basic blocks containing
+   unconditional branches to the appropriate label in the "other"
+   section.  The new basic block is then put in the same (hot or cold)
+   section as the original conditional branch, and the fall_thru edge
+   is modified to fall into the new basic block instead.  By adding
+   this level of indirection we end up with only unconditional branches
+   crossing between hot and cold sections.
+
+   Conditional branches are dealt with by adding a level of indirection.
+   A new basic block is added in the same (hot/cold) section as the
+   conditional branch, and the conditional branch is retargeted to the
+   new basic block.  The new basic block contains an unconditional branch
+   to the original target of the conditional branch (in the other section).
+
+   Unconditional branches are dealt with by converting them into
+   indirect jumps.  */
+
+static void
+fix_edges_for_rarely_executed_code (edge *crossing_edges,
+                                   int n_crossing_edges)
+{
+  /* Make sure the source of any crossing edge ends in a jump and the
+     destination of any crossing edge has a label.  */
+
+  add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
+
+  /* Convert all crossing fall_thru edges to non-crossing fall
+     thrus to unconditional jumps (that jump to the original fall
+     thru dest).  */
+
+  fix_up_fall_thru_edges ();
+
+  /* If the architecture does not have conditional branches that can
+     span all of memory, convert crossing conditional branches into
+     crossing unconditional branches.  */
+
+  if (!HAS_LONG_COND_BRANCH)
+    fix_crossing_conditional_branches ();
+
+  /* If the architecture does not have unconditional branches that
+     can span all of memory, convert crossing unconditional branches
+     into indirect jumps.  Since adding an indirect jump also adds
+     a new register usage, update the register usage information as
+     well.  */
+
+  if (!HAS_LONG_UNCOND_BRANCH)
+    fix_crossing_unconditional_branches ();
+
+  add_reg_crossing_jump_notes ();
+}
+
+/* Verify, in the basic block chain, that there is at most one switch
+   between hot/cold partitions. This is modelled on
+   rtl_verify_flow_info_1, but it cannot go inside that function
+   because this condition will not be true until after
+   reorder_basic_blocks is called.  */
+
+static void
+verify_hot_cold_block_grouping (void)
+{
+  basic_block bb;
+  int err = 0;
+  bool switched_sections = false;
+  int current_partition = 0;
+
+  FOR_EACH_BB (bb)
+    {
+      if (!current_partition)
+       current_partition = BB_PARTITION (bb);
+      if (BB_PARTITION (bb) != current_partition)
+       {
+         if (switched_sections)
+           {
+             error ("multiple hot/cold transitions found (bb %i)",
+                    bb->index);
+             err = 1;
+           }
+         else
+           {
+             switched_sections = true;
+             current_partition = BB_PARTITION (bb);
+           }
+       }
+    }
+
+  gcc_assert(!err);
+}
+
+/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
+   the set of flags to pass to cfg_layout_initialize().  */
 
 void
 
 void
-reorder_basic_blocks ()
+reorder_basic_blocks (void)
 {
   int n_traces;
   int i;
   struct trace *traces;
 
 {
   int n_traces;
   int i;
   struct trace *traces;
 
-  if (n_basic_blocks <= 1)
-    return;
+  gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
 
 
-  if ((* targetm.cannot_modify_jumps_p) ())
+  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
     return;
 
     return;
 
-  cfg_layout_initialize (NULL);
-
   set_edge_can_fallthru_flag ();
   mark_dfs_back_edges ();
 
   set_edge_can_fallthru_flag ();
   mark_dfs_back_edges ();
 
-  /* We are estimating the lenght of uncond jump insn only once since the code
-     for getting the insn lenght always returns the minimal length now.  */
-  if (uncond_jump_length == 0) 
+  /* We are estimating the length of uncond jump insn only once since the code
+     for getting the insn length always returns the minimal length now.  */
+  if (uncond_jump_length == 0)
     uncond_jump_length = get_uncond_jump_length ();
 
   /* We need to know some information for each basic block.  */
   array_size = GET_ARRAY_SIZE (last_basic_block);
     uncond_jump_length = get_uncond_jump_length ();
 
   /* We need to know some information for each basic block.  */
   array_size = GET_ARRAY_SIZE (last_basic_block);
-  bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
+  bbd = XNEWVEC (bbro_basic_block_data, array_size);
   for (i = 0; i < array_size; i++)
     {
       bbd[i].start_of_trace = -1;
   for (i = 0; i < array_size; i++)
     {
       bbd[i].start_of_trace = -1;
+      bbd[i].in_trace = -1;
       bbd[i].end_of_trace = -1;
       bbd[i].heap = NULL;
       bbd[i].node = NULL;
     }
 
       bbd[i].end_of_trace = -1;
       bbd[i].heap = NULL;
       bbd[i].node = NULL;
     }
 
-  traces = xmalloc (n_basic_blocks * sizeof (struct trace));
+  traces = XNEWVEC (struct trace, n_basic_blocks);
   n_traces = 0;
   find_traces (&n_traces, traces);
   connect_traces (n_traces, traces);
   FREE (traces);
   FREE (bbd);
 
   n_traces = 0;
   find_traces (&n_traces, traces);
   connect_traces (n_traces, traces);
   FREE (traces);
   FREE (bbd);
 
-  if (rtl_dump_file)
-    dump_flow_info (rtl_dump_file);
+  relink_block_chain (/*stay_in_cfglayout_mode=*/true);
+
+  if (dump_file)
+    dump_flow_info (dump_file, dump_flags);
+
+  if (flag_reorder_blocks_and_partition)
+    verify_hot_cold_block_grouping ();
+}
+
+/* Determine which partition the first basic block in the function
+   belongs to, then find the first basic block in the current function
+   that belongs to a different section, and insert a
+   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
+   instruction stream.  When writing out the assembly code,
+   encountering this note will make the compiler switch between the
+   hot and cold text sections.  */
+
+static void
+insert_section_boundary_note (void)
+{
+  basic_block bb;
+  rtx new_note;
+  int first_partition = 0;
+
+  if (flag_reorder_blocks_and_partition)
+    FOR_EACH_BB (bb)
+    {
+      if (!first_partition)
+       first_partition = BB_PARTITION (bb);
+      if (BB_PARTITION (bb) != first_partition)
+       {
+         new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
+                                      BB_HEAD (bb));
+         /* ??? This kind of note always lives between basic blocks,
+            but add_insn_before will set BLOCK_FOR_INSN anyway.  */
+         BLOCK_FOR_INSN (new_note) = NULL;
+         break;
+       }
+    }
+}
+
+/* Duplicate the blocks containing computed gotos.  This basically unfactors
+   computed gotos that were factored early on in the compilation process to
+   speed up edge based data flow.  We used to not unfactoring them again,
+   which can seriously pessimize code with many computed jumps in the source
+   code, such as interpreters.  See e.g. PR15242.  */
+
+static bool
+gate_duplicate_computed_gotos (void)
+{
+  if (targetm.cannot_modify_jumps_p ())
+    return false;
+  return (optimize > 0
+         && flag_expensive_optimizations
+         && ! optimize_function_for_size_p (cfun));
+}
+
+
+static unsigned int
+duplicate_computed_gotos (void)
+{
+  basic_block bb, new_bb;
+  bitmap candidates;
+  int max_size;
+
+  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
+    return 0;
+
+  cfg_layout_initialize (0);
+
+  /* We are estimating the length of uncond jump insn only once
+     since the code for getting the insn length always returns
+     the minimal length now.  */
+  if (uncond_jump_length == 0)
+    uncond_jump_length = get_uncond_jump_length ();
+
+  max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
+  candidates = BITMAP_ALLOC (NULL);
 
 
+  /* Look for blocks that end in a computed jump, and see if such blocks
+     are suitable for unfactoring.  If a block is a candidate for unfactoring,
+     mark it in the candidates.  */
+  FOR_EACH_BB (bb)
+    {
+      rtx insn;
+      edge e;
+      edge_iterator ei;
+      int size, all_flags;
+
+      /* Build the reorder chain for the original order of blocks.  */
+      if (bb->next_bb != EXIT_BLOCK_PTR)
+       bb->aux = bb->next_bb;
+
+      /* Obviously the block has to end in a computed jump.  */
+      if (!computed_jump_p (BB_END (bb)))
+       continue;
+
+      /* Only consider blocks that can be duplicated.  */
+      if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
+         || !can_duplicate_block_p (bb))
+       continue;
+
+      /* Make sure that the block is small enough.  */
+      size = 0;
+      FOR_BB_INSNS (bb, insn)
+       if (INSN_P (insn))
+         {
+           size += get_attr_min_length (insn);
+           if (size > max_size)
+              break;
+         }
+      if (size > max_size)
+       continue;
+
+      /* Final check: there must not be any incoming abnormal edges.  */
+      all_flags = 0;
+      FOR_EACH_EDGE (e, ei, bb->preds)
+       all_flags |= e->flags;
+      if (all_flags & EDGE_COMPLEX)
+       continue;
+
+      bitmap_set_bit (candidates, bb->index);
+    }
+
+  /* Nothing to do if there is no computed jump here.  */
+  if (bitmap_empty_p (candidates))
+    goto done;
+
+  /* Duplicate computed gotos.  */
+  FOR_EACH_BB (bb)
+    {
+      if (bb->il.rtl->visited)
+       continue;
+
+      bb->il.rtl->visited = 1;
+
+      /* BB must have one outgoing edge.  That edge must not lead to
+        the exit block or the next block.
+        The destination must have more than one predecessor.  */
+      if (!single_succ_p (bb)
+         || single_succ (bb) == EXIT_BLOCK_PTR
+         || single_succ (bb) == bb->next_bb
+         || single_pred_p (single_succ (bb)))
+       continue;
+
+      /* The successor block has to be a duplication candidate.  */
+      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
+       continue;
+
+      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
+      new_bb->aux = bb->aux;
+      bb->aux = new_bb;
+      new_bb->il.rtl->visited = 1;
+    }
+
+done:
+  cfg_layout_finalize ();
+
+  BITMAP_FREE (candidates);
+  return 0;
+}
+
+struct rtl_opt_pass pass_duplicate_computed_gotos =
+{
+ {
+  RTL_PASS,
+  "compgotos",                          /* name */
+  gate_duplicate_computed_gotos,        /* gate */
+  duplicate_computed_gotos,             /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_REORDER_BLOCKS,                    /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */
+ }
+};
+
+
+/* This function is the main 'entrance' for the optimization that
+   partitions hot and cold basic blocks into separate sections of the
+   .o file (to improve performance and cache locality).  Ideally it
+   would be called after all optimizations that rearrange the CFG have
+   been called.  However part of this optimization may introduce new
+   register usage, so it must be called before register allocation has
+   occurred.  This means that this optimization is actually called
+   well before the optimization that reorders basic blocks (see
+   function above).
+
+   This optimization checks the feedback information to determine
+   which basic blocks are hot/cold, updates flags on the basic blocks
+   to indicate which section they belong in.  This information is
+   later used for writing out sections in the .o file.  Because hot
+   and cold sections can be arbitrarily large (within the bounds of
+   memory), far beyond the size of a single function, it is necessary
+   to fix up all edges that cross section boundaries, to make sure the
+   instructions used can actually span the required distance.  The
+   fixes are described below.
+
+   Fall-through edges must be changed into jumps; it is not safe or
+   legal to fall through across a section boundary.  Whenever a
+   fall-through edge crossing a section boundary is encountered, a new
+   basic block is inserted (in the same section as the fall-through
+   source), and the fall through edge is redirected to the new basic
+   block.  The new basic block contains an unconditional jump to the
+   original fall-through target.  (If the unconditional jump is
+   insufficient to cross section boundaries, that is dealt with a
+   little later, see below).
+
+   In order to deal with architectures that have short conditional
+   branches (which cannot span all of memory) we take any conditional
+   jump that attempts to cross a section boundary and add a level of
+   indirection: it becomes a conditional jump to a new basic block, in
+   the same section.  The new basic block contains an unconditional
+   jump to the original target, in the other section.
+
+   For those architectures whose unconditional branch is also
+   incapable of reaching all of memory, those unconditional jumps are
+   converted into indirect jumps, through a register.
+
+   IMPORTANT NOTE: This optimization causes some messy interactions
+   with the cfg cleanup optimizations; those optimizations want to
+   merge blocks wherever possible, and to collapse indirect jump
+   sequences (change "A jumps to B jumps to C" directly into "A jumps
+   to C").  Those optimizations can undo the jump fixes that
+   partitioning is required to make (see above), in order to ensure
+   that jumps attempting to cross section boundaries are really able
+   to cover whatever distance the jump requires (on many architectures
+   conditional or unconditional jumps are not able to reach all of
+   memory).  Therefore tests have to be inserted into each such
+   optimization to make sure that it does not undo stuff necessary to
+   cross partition boundaries.  This would be much less of a problem
+   if we could perform this optimization later in the compilation, but
+   unfortunately the fact that we may need to create indirect jumps
+   (through registers) requires that this optimization be performed
+   before register allocation.  */
+
+static void
+partition_hot_cold_basic_blocks (void)
+{
+  edge *crossing_edges;
+  int n_crossing_edges;
+  int max_edges = 2 * last_basic_block;
+
+  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
+    return;
+
+  crossing_edges = XCNEWVEC (edge, max_edges);
+
+  find_rarely_executed_basic_blocks_and_crossing_edges (&crossing_edges,
+                                                       &n_crossing_edges,
+                                                       &max_edges);
+
+  if (n_crossing_edges > 0)
+    fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
+
+  free (crossing_edges);
+}
+\f
+static bool
+gate_handle_reorder_blocks (void)
+{
+  if (targetm.cannot_modify_jumps_p ())
+    return false;
+  return (optimize > 0);
+}
+
+
+/* Reorder basic blocks.  */
+static unsigned int
+rest_of_handle_reorder_blocks (void)
+{
+  basic_block bb;
+
+  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
+     splitting possibly introduced more crossjumping opportunities.  */
+  cfg_layout_initialize (CLEANUP_EXPENSIVE);
+
+  if ((flag_reorder_blocks || flag_reorder_blocks_and_partition)
+      /* Don't reorder blocks when optimizing for size because extra jump insns may
+        be created; also barrier may create extra padding.
+
+        More correctly we should have a block reordering mode that tried to
+        minimize the combined size of all the jumps.  This would more or less
+        automatically remove extra jumps, but would also try to use more short
+        jumps instead of long jumps.  */
+      && optimize_function_for_speed_p (cfun))
+    {
+      reorder_basic_blocks ();
+      cleanup_cfg (CLEANUP_EXPENSIVE);
+    }
+
+  FOR_EACH_BB (bb)
+    if (bb->next_bb != EXIT_BLOCK_PTR)
+      bb->aux = bb->next_bb;
   cfg_layout_finalize ();
   cfg_layout_finalize ();
+
+  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
+  insert_section_boundary_note ();
+  return 0;
+}
+
+struct rtl_opt_pass pass_reorder_blocks =
+{
+ {
+  RTL_PASS,
+  "bbro",                               /* name */
+  gate_handle_reorder_blocks,           /* gate */
+  rest_of_handle_reorder_blocks,        /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_REORDER_BLOCKS,                    /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */
+ }
+};
+
+static bool
+gate_handle_partition_blocks (void)
+{
+  /* The optimization to partition hot/cold basic blocks into separate
+     sections of the .o file does not work well with linkonce or with
+     user defined section attributes.  Don't call it if either case
+     arises.  */
+
+  return (flag_reorder_blocks_and_partition
+         && !DECL_ONE_ONLY (current_function_decl)
+         && !user_defined_section_attribute);
 }
 }
+
+/* Partition hot and cold basic blocks.  */
+static unsigned int
+rest_of_handle_partition_blocks (void)
+{
+  partition_hot_cold_basic_blocks ();
+  return 0;
+}
+
+struct rtl_opt_pass pass_partition_blocks =
+{
+ {
+  RTL_PASS,
+  "bbpart",                             /* name */
+  gate_handle_partition_blocks,         /* gate */
+  rest_of_handle_partition_blocks,      /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_REORDER_BLOCKS,                    /* tv_id */
+  PROP_cfglayout,                       /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func | TODO_verify_rtl_sharing/* todo_flags_finish */
+ }
+};