/* Instruction scheduling pass.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
and currently maintained by, Jim Wilson (wilson@cygnus.com)
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA. */
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA. */
/* This pass implements list scheduling within basic blocks. It is
run twice: (1) after flow analysis, but before register allocation,
#include "rtl.h"
#include "tm_p.h"
#include "hard-reg-set.h"
-#include "basic-block.h"
#include "regs.h"
#include "function.h"
#include "flags.h"
#include "params.h"
#include "sched-int.h"
#include "target.h"
+#include "timevar.h"
+#include "tree-pass.h"
/* Define when we want to do count REG_DEAD notes before and after scheduling
for sanity checking. We can't do that when conditional execution is used,
/* The number of the region containing a block. */
static int *containing_rgn;
+/* The minimum probability of reaching a source block so that it will be
+ considered for speculative scheduling. */
+static int min_spec_prob;
+
#define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
#define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
#define BLOCK_TO_BB(block) (block_to_bb[block])
#define IS_DOMINATED(bb_src, bb_trg) \
( TEST_BIT (dom[bb_src], bb_trg) )
-/* Probability: Prob[i] is a float in [0, 1] which is the probability
- of bb i relative to the region entry. */
-static float *prob;
-
-/* The probability of bb_src, relative to bb_trg. Note, that while the
- 'prob[bb]' is a float in [0, 1], this macro returns an integer
- in [0, 100]. */
-#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
- prob[bb_trg])))
+/* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
+ the probability of bb i relative to the region entry. */
+static int *prob;
/* Bit-set of edges, where bit i stands for edge i. */
typedef sbitmap edgeset;
#define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
#define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
-/* Parameters affecting the decision of rank_for_schedule().
- ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
-#define MIN_PROBABILITY 40
-
/* Speculative scheduling functions. */
static int check_live_1 (int, rtx);
static void update_live_1 (int, rtx);
{
basic_block b;
rtx insn;
- RTX_CODE code;
/* If we have a label that could be the target of a nonlocal goto, then
the cfg is not well structured. */
if (forced_labels)
return 1;
- /* If this function has a computed jump, then we consider the cfg
- not well structured. */
- if (current_function_has_computed_jump)
- return 1;
-
/* If we have exception handlers, then we consider the cfg not well
structured. ?!? We should be able to handle this now that flow.c
computes an accurate cfg for EH. */
/* If we have non-jumping insns which refer to labels, then we consider
the cfg not well structured. */
- /* Check for labels referred to other thn by jumps. */
FOR_EACH_BB (b)
- for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
+ FOR_BB_INSNS (b, insn)
{
- code = GET_CODE (insn);
- if (INSN_P (insn) && code != JUMP_INSN)
+ /* Check for labels referred by non-jump insns. */
+ if (NONJUMP_INSN_P (insn) || CALL_P (insn))
{
rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
-
if (note
&& ! (JUMP_P (NEXT_INSN (insn))
&& find_reg_note (NEXT_INSN (insn), REG_LABEL,
XEXP (note, 0))))
return 1;
}
-
- if (insn == BB_END (b))
- break;
+ /* If this function has a computed jump, then we consider the cfg
+ not well structured. */
+ else if (JUMP_P (insn) && computed_jump_p (insn))
+ return 1;
}
/* Unreachable loops with more than one basic block are detected
FOR_EACH_BB (b)
{
if (EDGE_COUNT (b->preds) == 0
- || (EDGE_PRED (b, 0)->src == b
- && EDGE_COUNT (b->preds) == 1))
+ || (single_pred_p (b)
+ && single_pred (b) == b))
return 1;
}
static void
extract_edgelst (sbitmap set, edgelst *el)
{
- int i;
+ unsigned int i = 0;
+ sbitmap_iterator sbi;
/* edgelst table space is reused in each call to extract_edgelst. */
edgelst_last = 0;
el->nr_members = 0;
/* Iterate over each word in the bitset. */
- EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
- {
- edgelst_table[edgelst_last++] = rgn_edges[i];
- el->nr_members++;
- });
+ EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
+ {
+ edgelst_table[edgelst_last++] = rgn_edges[i];
+ el->nr_members++;
+ }
}
/* Functions for the construction of regions. */
STACK, SP and DFS_NR are only used during the first traversal. */
/* Allocate and initialize variables for the first traversal. */
- max_hdr = xmalloc (last_basic_block * sizeof (int));
- dfs_nr = xcalloc (last_basic_block, sizeof (int));
- stack = xmalloc (n_edges * sizeof (edge_iterator));
+ max_hdr = XNEWVEC (int, last_basic_block);
+ dfs_nr = XCNEWVEC (int, last_basic_block);
+ stack = XNEWVEC (edge_iterator, n_edges);
inner = sbitmap_alloc (last_basic_block);
sbitmap_ones (inner);
/* DFS traversal to find inner loops in the cfg. */
- current_edge = ei_start (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest->succs);
+ current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
sp = -1;
while (1)
/* Second traversal:find reducible inner loops and topologically sort
block of each region. */
- queue = xmalloc (n_basic_blocks * sizeof (int));
+ queue = XNEWVEC (int, n_basic_blocks);
/* Find blocks which are inner loop headers. We still have non-reducible
loops to consider at this point. */
FOR_EACH_BB (jbb)
/* Leaf nodes have only a single successor which must
be EXIT_BLOCK. */
- if (EDGE_COUNT (jbb->succs) == 1
- && EDGE_SUCC (jbb, 0)->dest == EXIT_BLOCK_PTR)
+ if (single_succ_p (jbb)
+ && single_succ (jbb) == EXIT_BLOCK_PTR)
{
queue[++tail] = jbb->index;
SET_BIT (in_queue, jbb->index);
static void
compute_dom_prob_ps (int bb)
{
- int pred_bb;
- int nr_out_edges, nr_rgn_out_edges;
- edge_iterator in_ei, out_ei;
- edge in_edge, out_edge;
+ edge_iterator in_ei;
+ edge in_edge;
- prob[bb] = 0.0;
if (IS_RGN_ENTRY (bb))
{
SET_BIT (dom[bb], 0);
- prob[bb] = 1.0;
+ prob[bb] = REG_BR_PROB_BASE;
return;
}
+ prob[bb] = 0;
+
/* Initialize dom[bb] to '111..1'. */
sbitmap_ones (dom[bb]);
FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
{
+ int pred_bb;
+ edge out_edge;
+ edge_iterator out_ei;
+
if (in_edge->src == ENTRY_BLOCK_PTR)
continue;
sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
- nr_out_edges = 0;
- nr_rgn_out_edges = 0;
-
FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
- {
- ++nr_out_edges;
-
- /* The successor doesn't belong in the region? */
- if (out_edge->dest != EXIT_BLOCK_PTR
- && CONTAINING_RGN (out_edge->dest->index)
- != CONTAINING_RGN (BB_TO_BLOCK (bb)))
- ++nr_rgn_out_edges;
-
- SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
- }
+ SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
- /* Now nr_rgn_out_edges is the number of region-exit edges from
- pred, and nr_out_edges will be the number of pred out edges
- not leaving the region. */
- nr_out_edges -= nr_rgn_out_edges;
- if (nr_rgn_out_edges > 0)
- prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
- else
- prob[bb] += prob[pred_bb] / nr_out_edges;
+ prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
}
SET_BIT (dom[bb], bb);
if (sched_verbose >= 2)
fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
- (int) (100.0 * prob[bb]));
+ (100 * prob[bb]) / REG_BR_PROB_BASE);
}
/* Functions for target info. */
edgelst el;
int i, j, k, update_idx;
basic_block block;
+ sbitmap visited;
edge_iterator ei;
edge e;
sp = candidate_table + trg;
sp->is_valid = 1;
sp->is_speculative = 0;
- sp->src_prob = 100;
+ sp->src_prob = REG_BR_PROB_BASE;
+
+ visited = sbitmap_alloc (last_basic_block);
for (i = trg + 1; i < current_nr_blocks; i++)
{
sp->is_valid = IS_DOMINATED (i, trg);
if (sp->is_valid)
{
- sp->src_prob = GET_SRC_PROB (i, trg);
- sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
+ int tf = prob[trg], cf = prob[i];
+
+ /* In CFGs with low probability edges TF can possibly be zero. */
+ sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
+ sp->is_valid = (sp->src_prob >= min_spec_prob);
}
if (sp->is_valid)
overrunning the end of the bblst_table. */
update_idx = 0;
+ sbitmap_zero (visited);
for (j = 0; j < el.nr_members; j++)
{
block = el.first_member[j]->src;
FOR_EACH_EDGE (e, ei, block->succs)
{
- if (!(e->dest->flags & BB_VISITED))
+ if (!TEST_BIT (visited, e->dest->index))
{
for (k = 0; k < el.nr_members; k++)
if (e == el.first_member[k])
if (k >= el.nr_members)
{
bblst_table[bblst_last++] = e->dest;
- e->dest->flags |= BB_VISITED;
+ SET_BIT (visited, e->dest->index);
update_idx++;
}
}
}
sp->update_bbs.nr_members = update_idx;
- FOR_ALL_BB (block)
- block->flags &= ~BB_VISITED;
-
/* Make sure we didn't overrun the end of bblst_table. */
gcc_assert (bblst_last <= bblst_size);
}
sp->src_prob = 0;
}
}
+
+ sbitmap_free (visited);
}
/* Print candidates info, for debugging purposes. Callable from debugger. */
if (reg == 0)
return 1;
- while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
- || GET_CODE (reg) == SIGN_EXTRACT
+ while (GET_CODE (reg) == SUBREG
+ || GET_CODE (reg) == ZERO_EXTRACT
|| GET_CODE (reg) == STRICT_LOW_PART)
reg = XEXP (reg, 0);
{
basic_block b = candidate_table[src].split_bbs.first_member[i];
- if (REGNO_REG_SET_P (b->global_live_at_start, regno + j))
+ if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start,
+ regno + j))
{
return 0;
}
{
basic_block b = candidate_table[src].split_bbs.first_member[i];
- if (REGNO_REG_SET_P (b->global_live_at_start, regno))
+ if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start, regno))
{
return 0;
}
if (reg == 0)
return;
- while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
- || GET_CODE (reg) == SIGN_EXTRACT
+ while (GET_CODE (reg) == SUBREG
+ || GET_CODE (reg) == ZERO_EXTRACT
|| GET_CODE (reg) == STRICT_LOW_PART)
reg = XEXP (reg, 0);
{
basic_block b = candidate_table[src].update_bbs.first_member[i];
- SET_REGNO_REG_SET (b->global_live_at_start, regno + j);
+ SET_REGNO_REG_SET (b->il.rtl->global_live_at_start,
+ regno + j);
}
}
}
{
basic_block b = candidate_table[src].update_bbs.first_member[i];
- SET_REGNO_REG_SET (b->global_live_at_start, regno);
+ SET_REGNO_REG_SET (b->il.rtl->global_live_at_start, regno);
}
}
}
(bb_from == bb_to \
|| IS_RGN_ENTRY (bb_from) \
|| (TEST_BIT (ancestor_edges[bb_to], \
- EDGE_TO_BIT (EDGE_PRED (BASIC_BLOCK (BB_TO_BLOCK (bb_from)), 0)))))
+ EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
/* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
/* Prepare current target block info. */
if (current_nr_blocks > 1)
{
- candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
+ candidate_table = XNEWVEC (candidate, current_nr_blocks);
bblst_last = 0;
/* bblst_table holds split blocks and update blocks for each block after
the TO blocks of region edges, so there can be at most rgn_nr_edges
of them. */
bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
- bblst_table = xmalloc (bblst_size * sizeof (basic_block));
+ bblst_table = XNEWVEC (basic_block, bblst_size);
edgelst_last = 0;
- edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge));
+ edgelst_table = XNEWVEC (edge, rgn_nr_edges);
compute_trg_info (target_bb);
}
cc0 setters remain at the end because they can't be moved away from
their cc0 user.
+ COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
+
Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
are not moved before reload because we can wind up with register
allocation failures. */
{
if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
{
- add_dependence (last, insn, REG_DEP_ANTI);
+ if (! sched_insns_conditions_mutex_p (last, insn))
+ add_dependence (last, insn, REG_DEP_ANTI);
INSN_REF_COUNT (insn)++;
}
if (INSN_REF_COUNT (insn) != 0)
continue;
- add_dependence (last, insn, REG_DEP_ANTI);
+ if (! sched_insns_conditions_mutex_p (last, insn))
+ add_dependence (last, insn, REG_DEP_ANTI);
INSN_REF_COUNT (insn) = 1;
}
+
+#ifdef HAVE_conditional_execution
+ /* Finally, if the block ends in a jump, and we are doing intra-block
+ scheduling, make sure that the branch depends on any COND_EXEC insns
+ inside the block to avoid moving the COND_EXECs past the branch insn.
+
+ We only have to do this after reload, because (1) before reload there
+ are no COND_EXEC insns, and (2) the region scheduler is an intra-block
+ scheduler after reload.
+
+ FIXME: We could in some cases move COND_EXEC insns past the branch if
+ this scheduler would be a little smarter. Consider this code:
+
+ T = [addr]
+ C ? addr += 4
+ !C ? X += 12
+ C ? T += 1
+ C ? jump foo
+
+ On a target with a one cycle stall on a memory access the optimal
+ sequence would be:
+
+ T = [addr]
+ C ? addr += 4
+ C ? T += 1
+ C ? jump foo
+ !C ? X += 12
+
+ We don't want to put the 'X += 12' before the branch because it just
+ wastes a cycle of execution time when the branch is taken.
+
+ Note that in the example "!C" will always be true. That is another
+ possible improvement for handling COND_EXECs in this scheduler: it
+ could remove always-true predicates. */
+
+ if (!reload_completed || ! JUMP_P (tail))
+ return;
+
+ insn = tail;
+ while (insn != head)
+ {
+ insn = PREV_INSN (insn);
+
+ /* Note that we want to add this dependency even when
+ sched_insns_conditions_mutex_p returns true. The whole point
+ is that we _want_ this dependency, even if these insns really
+ are independent. */
+ if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
+ add_dependence (tail, insn, REG_DEP_ANTI);
+ }
+#endif
}
/* Data structures for the computation of data dependences in a regions. We
init_deps_global ();
/* Initializations for region data dependence analysis. */
- bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
+ bb_deps = XNEWVEC (struct deps, current_nr_blocks);
for (bb = 0; bb < current_nr_blocks; bb++)
init_deps (bb_deps + bb);
/* Compute interblock info: probabilities, split-edges, dominators, etc. */
if (current_nr_blocks > 1)
{
- prob = xmalloc ((current_nr_blocks) * sizeof (float));
+ prob = XNEWVEC (int, current_nr_blocks);
dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
sbitmap_vector_zero (dom, current_nr_blocks);
SET_EDGE_TO_BIT (e, rgn_nr_edges++);
}
- rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge));
+ rgn_edges = XNEWVEC (edge, rgn_nr_edges);
rgn_nr_edges = 0;
FOR_EACH_BB (block)
{
for (note = REG_NOTES (head); note; note = XEXP (note, 1))
if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
- {
- remove_note (head, note);
- note = XEXP (note, 1);
- remove_note (head, note);
- }
+ remove_note (head, note);
}
/* Remove remaining note insns from the block, save them in
int rgn;
nr_regions = 0;
- rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
- rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
- block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
- containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
+ rgn_table = XNEWVEC (region, n_basic_blocks);
+ rgn_bb_table = XNEWVEC (int, n_basic_blocks);
+ block_to_bb = XNEWVEC (int, last_basic_block);
+ containing_rgn = XNEWVEC (int, last_basic_block);
/* Compute regions for scheduling. */
if (reload_completed
- || n_basic_blocks == 1
+ || n_basic_blocks == NUM_FIXED_BLOCKS + 1
|| !flag_schedule_interblock
|| is_cfg_nonregular ())
{
if (CHECK_DEAD_NOTES)
{
blocks = sbitmap_alloc (last_basic_block);
- deaths_in_region = xmalloc (sizeof (int) * nr_regions);
+ deaths_in_region = XNEWVEC (int, nr_regions);
/* Remove all death notes from the subroutine. */
for (rgn = 0; rgn < nr_regions; rgn++)
{
count_or_remove_death_notes (NULL, 1);
}
-/* The one entry point in this file. DUMP_FILE is the dump file for
- this pass. */
+/* The one entry point in this file. */
void
-schedule_insns (FILE *dump_file)
+schedule_insns (void)
{
sbitmap large_region_blocks, blocks;
int rgn;
/* Taking care of this degenerate case makes the rest of
this code simpler. */
- if (n_basic_blocks == 0)
+ if (n_basic_blocks == NUM_FIXED_BLOCKS)
return;
nr_inter = 0;
nr_spec = 0;
+ sched_init ();
- sched_init (dump_file);
+ min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
+ / 100);
init_regions ();
current_sched_info = ®ion_sched_info;
-
/* Schedule every region in the subroutine. */
for (rgn = 0; rgn < nr_regions; rgn++)
schedule_region (rgn);
sbitmap_free (large_region_blocks);
}
#endif
+\f
+static bool
+gate_handle_sched (void)
+{
+#ifdef INSN_SCHEDULING
+ return flag_schedule_insns;
+#else
+ return 0;
+#endif
+}
+
+/* Run instruction scheduler. */
+static unsigned int
+rest_of_handle_sched (void)
+{
+#ifdef INSN_SCHEDULING
+ /* Do control and data sched analysis,
+ and write some of the results to dump file. */
+
+ schedule_insns ();
+#endif
+ return 0;
+}
+
+static bool
+gate_handle_sched2 (void)
+{
+#ifdef INSN_SCHEDULING
+ return optimize > 0 && flag_schedule_insns_after_reload;
+#else
+ return 0;
+#endif
+}
+
+/* Run second scheduling pass after reload. */
+static unsigned int
+rest_of_handle_sched2 (void)
+{
+#ifdef INSN_SCHEDULING
+ /* Do control and data sched analysis again,
+ and write some more of the results to dump file. */
+
+ split_all_insns (1);
+
+ if (flag_sched2_use_superblocks || flag_sched2_use_traces)
+ {
+ schedule_ebbs ();
+ /* No liveness updating code yet, but it should be easy to do.
+ reg-stack recomputes the liveness when needed for now. */
+ count_or_remove_death_notes (NULL, 1);
+ cleanup_cfg (CLEANUP_EXPENSIVE);
+ }
+ else
+ schedule_insns ();
+#endif
+ return 0;
+}
+
+struct tree_opt_pass pass_sched =
+{
+ "sched1", /* name */
+ gate_handle_sched, /* gate */
+ rest_of_handle_sched, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_SCHED, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func |
+ TODO_ggc_collect, /* todo_flags_finish */
+ 'S' /* letter */
+};
+
+struct tree_opt_pass pass_sched2 =
+{
+ "sched2", /* name */
+ gate_handle_sched2, /* gate */
+ rest_of_handle_sched2, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_SCHED2, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func |
+ TODO_ggc_collect, /* todo_flags_finish */
+ 'R' /* letter */
+};
+