X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fsched-rgn.c;h=3109786576f03298612861e8f9cca0e8dd26dfa7;hb=2dde0cc6a6036301a92f5dfd1698d15e8bbcc821;hp=edaf79627db1219e762b323374d5fe70072a0a4f;hpb=e0dde8f8f8975b893fb6604a17de35ca881e85e9;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index edaf79627db..3109786576f 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -18,8 +18,8 @@ for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ /* This pass implements list scheduling within basic blocks. It is run twice: (1) after flow analysis, but before register allocation, @@ -65,6 +65,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "params.h" #include "sched-int.h" #include "target.h" +#include "timevar.h" +#include "tree-pass.h" /* Define when we want to do count REG_DEAD notes before and after scheduling for sanity checking. We can't do that when conditional execution is used, @@ -117,6 +119,10 @@ static int *block_to_bb; /* The number of the region containing a block. */ static int *containing_rgn; +/* The minimum probability of reaching a source block so that it will be + considered for speculative scheduling. */ +static int min_spec_prob; + #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks) #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks) #define BLOCK_TO_BB(block) (block_to_bb[block]) @@ -209,15 +215,9 @@ static sbitmap *dom; #define IS_DOMINATED(bb_src, bb_trg) \ ( TEST_BIT (dom[bb_src], bb_trg) ) -/* Probability: Prob[i] is a float in [0, 1] which is the probability - of bb i relative to the region entry. */ -static float *prob; - -/* The probability of bb_src, relative to bb_trg. Note, that while the - 'prob[bb]' is a float in [0, 1], this macro returns an integer - in [0, 100]. */ -#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \ - prob[bb_trg]))) +/* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is + the probability of bb i relative to the region entry. */ +static int *prob; /* Bit-set of edges, where bit i stands for edge i. */ typedef sbitmap edgeset; @@ -249,10 +249,6 @@ static void compute_dom_prob_ps (int); #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN)))) #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN))) -/* Parameters affecting the decision of rank_for_schedule(). - ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */ -#define MIN_PROBABILITY 40 - /* Speculative scheduling functions. */ static int check_live_1 (int, rtx); static void update_live_1 (int, rtx); @@ -351,7 +347,7 @@ is_cfg_nonregular (void) static void extract_edgelst (sbitmap set, edgelst *el) { - unsigned int i; + unsigned int i = 0; sbitmap_iterator sbi; /* edgelst table space is reused in each call to extract_edgelst. */ @@ -514,9 +510,9 @@ find_rgns (void) STACK, SP and DFS_NR are only used during the first traversal. */ /* Allocate and initialize variables for the first traversal. */ - max_hdr = xmalloc (last_basic_block * sizeof (int)); - dfs_nr = xcalloc (last_basic_block, sizeof (int)); - stack = xmalloc (n_edges * sizeof (edge_iterator)); + max_hdr = XNEWVEC (int, last_basic_block); + dfs_nr = XCNEWVEC (int, last_basic_block); + stack = XNEWVEC (edge_iterator, n_edges); inner = sbitmap_alloc (last_basic_block); sbitmap_ones (inner); @@ -660,7 +656,7 @@ find_rgns (void) /* Second traversal:find reducible inner loops and topologically sort block of each region. */ - queue = xmalloc (n_basic_blocks * sizeof (int)); + queue = XNEWVEC (int, n_basic_blocks); /* Find blocks which are inner loop headers. We still have non-reducible loops to consider at this point. */ @@ -900,24 +896,27 @@ find_rgns (void) static void compute_dom_prob_ps (int bb) { - int pred_bb; - int nr_out_edges, nr_rgn_out_edges; - edge_iterator in_ei, out_ei; - edge in_edge, out_edge; + edge_iterator in_ei; + edge in_edge; - prob[bb] = 0.0; if (IS_RGN_ENTRY (bb)) { SET_BIT (dom[bb], 0); - prob[bb] = 1.0; + prob[bb] = REG_BR_PROB_BASE; return; } + prob[bb] = 0; + /* Initialize dom[bb] to '111..1'. */ sbitmap_ones (dom[bb]); FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds) { + int pred_bb; + edge out_edge; + edge_iterator out_ei; + if (in_edge->src == ENTRY_BLOCK_PTR) continue; @@ -930,30 +929,10 @@ compute_dom_prob_ps (int bb) sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]); - nr_out_edges = 0; - nr_rgn_out_edges = 0; - FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs) - { - ++nr_out_edges; - - /* The successor doesn't belong in the region? */ - if (out_edge->dest != EXIT_BLOCK_PTR - && CONTAINING_RGN (out_edge->dest->index) - != CONTAINING_RGN (BB_TO_BLOCK (bb))) - ++nr_rgn_out_edges; - - SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge)); - } + SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge)); - /* Now nr_rgn_out_edges is the number of region-exit edges from - pred, and nr_out_edges will be the number of pred out edges - not leaving the region. */ - nr_out_edges -= nr_rgn_out_edges; - if (nr_rgn_out_edges > 0) - prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges; - else - prob[bb] += prob[pred_bb] / nr_out_edges; + prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE); } SET_BIT (dom[bb], bb); @@ -961,7 +940,7 @@ compute_dom_prob_ps (int bb) if (sched_verbose >= 2) fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), - (int) (100.0 * prob[bb])); + (100 * prob[bb]) / REG_BR_PROB_BASE); } /* Functions for target info. */ @@ -999,9 +978,9 @@ compute_trg_info (int trg) sp = candidate_table + trg; sp->is_valid = 1; sp->is_speculative = 0; - sp->src_prob = 100; + sp->src_prob = REG_BR_PROB_BASE; - visited = sbitmap_alloc (last_basic_block - (INVALID_BLOCK + 1)); + visited = sbitmap_alloc (last_basic_block); for (i = trg + 1; i < current_nr_blocks; i++) { @@ -1010,8 +989,11 @@ compute_trg_info (int trg) sp->is_valid = IS_DOMINATED (i, trg); if (sp->is_valid) { - sp->src_prob = GET_SRC_PROB (i, trg); - sp->is_valid = (sp->src_prob >= MIN_PROBABILITY); + int tf = prob[trg], cf = prob[i]; + + /* In CFGs with low probability edges TF can possibly be zero. */ + sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0); + sp->is_valid = (sp->src_prob >= min_spec_prob); } if (sp->is_valid) @@ -1046,8 +1028,7 @@ compute_trg_info (int trg) block = el.first_member[j]->src; FOR_EACH_EDGE (e, ei, block->succs) { - if (!TEST_BIT (visited, - e->dest->index - (INVALID_BLOCK + 1))) + if (!TEST_BIT (visited, e->dest->index)) { for (k = 0; k < el.nr_members; k++) if (e == el.first_member[k]) @@ -1056,8 +1037,7 @@ compute_trg_info (int trg) if (k >= el.nr_members) { bblst_table[bblst_last++] = e->dest; - SET_BIT (visited, - e->dest->index - (INVALID_BLOCK + 1)); + SET_BIT (visited, e->dest->index); update_idx++; } } @@ -1589,7 +1569,7 @@ init_ready_list (struct ready_list *ready) /* Prepare current target block info. */ if (current_nr_blocks > 1) { - candidate_table = xmalloc (current_nr_blocks * sizeof (candidate)); + candidate_table = XNEWVEC (candidate, current_nr_blocks); bblst_last = 0; /* bblst_table holds split blocks and update blocks for each block after @@ -1597,10 +1577,10 @@ init_ready_list (struct ready_list *ready) the TO blocks of region edges, so there can be at most rgn_nr_edges of them. */ bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges; - bblst_table = xmalloc (bblst_size * sizeof (basic_block)); + bblst_table = XNEWVEC (basic_block, bblst_size); edgelst_last = 0; - edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge)); + edgelst_table = XNEWVEC (edge, rgn_nr_edges); compute_trg_info (target_bb); } @@ -1881,6 +1861,8 @@ add_branch_dependences (rtx head, rtx tail) cc0 setters remain at the end because they can't be moved away from their cc0 user. + COND_EXEC insns cannot be moved past a branch (see e.g. PR17808). + Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values) are not moved before reload because we can wind up with register allocation failures. */ @@ -1904,7 +1886,8 @@ add_branch_dependences (rtx head, rtx tail) { if (last != 0 && !find_insn_list (insn, LOG_LINKS (last))) { - add_dependence (last, insn, REG_DEP_ANTI); + if (! sched_insns_conditions_mutex_p (last, insn)) + add_dependence (last, insn, REG_DEP_ANTI); INSN_REF_COUNT (insn)++; } @@ -1930,9 +1913,61 @@ add_branch_dependences (rtx head, rtx tail) if (INSN_REF_COUNT (insn) != 0) continue; - add_dependence (last, insn, REG_DEP_ANTI); + if (! sched_insns_conditions_mutex_p (last, insn)) + add_dependence (last, insn, REG_DEP_ANTI); INSN_REF_COUNT (insn) = 1; } + +#ifdef HAVE_conditional_execution + /* Finally, if the block ends in a jump, and we are doing intra-block + scheduling, make sure that the branch depends on any COND_EXEC insns + inside the block to avoid moving the COND_EXECs past the branch insn. + + We only have to do this after reload, because (1) before reload there + are no COND_EXEC insns, and (2) the region scheduler is an intra-block + scheduler after reload. + + FIXME: We could in some cases move COND_EXEC insns past the branch if + this scheduler would be a little smarter. Consider this code: + + T = [addr] + C ? addr += 4 + !C ? X += 12 + C ? T += 1 + C ? jump foo + + On a target with a one cycle stall on a memory access the optimal + sequence would be: + + T = [addr] + C ? addr += 4 + C ? T += 1 + C ? jump foo + !C ? X += 12 + + We don't want to put the 'X += 12' before the branch because it just + wastes a cycle of execution time when the branch is taken. + + Note that in the example "!C" will always be true. That is another + possible improvement for handling COND_EXECs in this scheduler: it + could remove always-true predicates. */ + + if (!reload_completed || ! JUMP_P (tail)) + return; + + insn = tail; + while (insn != head) + { + insn = PREV_INSN (insn); + + /* Note that we want to add this dependency even when + sched_insns_conditions_mutex_p returns true. The whole point + is that we _want_ this dependency, even if these insns really + are independent. */ + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC) + add_dependence (tail, insn, REG_DEP_ANTI); + } +#endif } /* Data structures for the computation of data dependences in a regions. We @@ -2224,7 +2259,7 @@ schedule_region (int rgn) init_deps_global (); /* Initializations for region data dependence analysis. */ - bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks); + bb_deps = XNEWVEC (struct deps, current_nr_blocks); for (bb = 0; bb < current_nr_blocks; bb++) init_deps (bb_deps + bb); @@ -2257,7 +2292,7 @@ schedule_region (int rgn) /* Compute interblock info: probabilities, split-edges, dominators, etc. */ if (current_nr_blocks > 1) { - prob = xmalloc ((current_nr_blocks) * sizeof (float)); + prob = XNEWVEC (int, current_nr_blocks); dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks); sbitmap_vector_zero (dom, current_nr_blocks); @@ -2272,7 +2307,7 @@ schedule_region (int rgn) SET_EDGE_TO_BIT (e, rgn_nr_edges++); } - rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge)); + rgn_edges = XNEWVEC (edge, rgn_nr_edges); rgn_nr_edges = 0; FOR_EACH_BB (block) { @@ -2409,14 +2444,14 @@ init_regions (void) int rgn; nr_regions = 0; - rgn_table = xmalloc ((n_basic_blocks) * sizeof (region)); - rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int)); - block_to_bb = xmalloc ((last_basic_block) * sizeof (int)); - containing_rgn = xmalloc ((last_basic_block) * sizeof (int)); + rgn_table = XNEWVEC (region, n_basic_blocks); + rgn_bb_table = XNEWVEC (int, n_basic_blocks); + block_to_bb = XNEWVEC (int, last_basic_block); + containing_rgn = XNEWVEC (int, last_basic_block); /* Compute regions for scheduling. */ if (reload_completed - || n_basic_blocks == 1 + || n_basic_blocks == NUM_FIXED_BLOCKS + 1 || !flag_schedule_interblock || is_cfg_nonregular ()) { @@ -2442,7 +2477,7 @@ init_regions (void) if (CHECK_DEAD_NOTES) { blocks = sbitmap_alloc (last_basic_block); - deaths_in_region = xmalloc (sizeof (int) * nr_regions); + deaths_in_region = XNEWVEC (int, nr_regions); /* Remove all death notes from the subroutine. */ for (rgn = 0; rgn < nr_regions; rgn++) { @@ -2460,11 +2495,10 @@ init_regions (void) count_or_remove_death_notes (NULL, 1); } -/* The one entry point in this file. DUMP_FILE is the dump file for - this pass. */ +/* The one entry point in this file. */ void -schedule_insns (FILE *dump_file) +schedule_insns (void) { sbitmap large_region_blocks, blocks; int rgn; @@ -2473,18 +2507,19 @@ schedule_insns (FILE *dump_file) /* Taking care of this degenerate case makes the rest of this code simpler. */ - if (n_basic_blocks == 0) + if (n_basic_blocks == NUM_FIXED_BLOCKS) return; nr_inter = 0; nr_spec = 0; + sched_init (); - sched_init (dump_file); + min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE) + / 100); init_regions (); current_sched_info = ®ion_sched_info; - /* Schedule every region in the subroutine. */ for (rgn = 0; rgn < nr_regions; rgn++) schedule_region (rgn); @@ -2588,3 +2623,95 @@ schedule_insns (FILE *dump_file) sbitmap_free (large_region_blocks); } #endif + +static bool +gate_handle_sched (void) +{ +#ifdef INSN_SCHEDULING + return flag_schedule_insns; +#else + return 0; +#endif +} + +/* Run instruction scheduler. */ +static void +rest_of_handle_sched (void) +{ +#ifdef INSN_SCHEDULING + /* Do control and data sched analysis, + and write some of the results to dump file. */ + + schedule_insns (); +#endif +} + +static bool +gate_handle_sched2 (void) +{ +#ifdef INSN_SCHEDULING + return optimize > 0 && flag_schedule_insns_after_reload; +#else + return 0; +#endif +} + +/* Run second scheduling pass after reload. */ +static void +rest_of_handle_sched2 (void) +{ +#ifdef INSN_SCHEDULING + /* Do control and data sched analysis again, + and write some more of the results to dump file. */ + + split_all_insns (1); + + if (flag_sched2_use_superblocks || flag_sched2_use_traces) + { + schedule_ebbs (); + /* No liveness updating code yet, but it should be easy to do. + reg-stack recomputes the liveness when needed for now. */ + count_or_remove_death_notes (NULL, 1); + cleanup_cfg (CLEANUP_EXPENSIVE); + } + else + schedule_insns (); +#endif +} + +struct tree_opt_pass pass_sched = +{ + "sched1", /* name */ + gate_handle_sched, /* gate */ + rest_of_handle_sched, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_SCHED, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | + TODO_ggc_collect, /* todo_flags_finish */ + 'S' /* letter */ +}; + +struct tree_opt_pass pass_sched2 = +{ + "sched2", /* name */ + gate_handle_sched2, /* gate */ + rest_of_handle_sched2, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_SCHED2, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | + TODO_ggc_collect, /* todo_flags_finish */ + 'R' /* letter */ +}; +