X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fcfgloopanal.c;h=02f026576991ded3cf4e9396bd1abfacee2bd784;hp=2d2ece2cb2d6536ad68c151df58630c5ff520c40;hb=12d9baf99f5dc0a2bf75047322487521fa6684be;hpb=c088dce620b6ca4e163bceeb44903b67251380f1 diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 2d2ece2cb2d..02f02657699 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -1,11 +1,12 @@ /* Natural loop analysis code for GNU compiler. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -14,9 +15,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" #include "system.h" @@ -29,11 +29,18 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfgloop.h" #include "expr.h" #include "output.h" +#include "graphds.h" +#include "params.h" + +struct target_cfgloop default_target_cfgloop; +#if SWITCHABLE_TARGET +struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop; +#endif /* Checks whether BB is executed exactly once in each LOOP iteration. */ bool -just_once_each_iteration_p (struct loop *loop, basic_block bb) +just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) { /* It must be executed at least once each iteration. */ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) @@ -50,231 +57,34 @@ just_once_each_iteration_p (struct loop *loop, basic_block bb) return true; } -/* Structure representing edge of a graph. */ - -struct edge -{ - int src, dest; /* Source and destination. */ - struct edge *pred_next, *succ_next; - /* Next edge in predecessor and successor lists. */ - void *data; /* Data attached to the edge. */ -}; - -/* Structure representing vertex of a graph. */ - -struct vertex -{ - struct edge *pred, *succ; - /* Lists of predecessors and successors. */ - int component; /* Number of dfs restarts before reaching the - vertex. */ - int post; /* Postorder number. */ -}; - -/* Structure representing a graph. */ - -struct graph -{ - int n_vertices; /* Number of vertices. */ - struct vertex *vertices; - /* The vertices. */ -}; - -/* Dumps graph G into F. */ - -extern void dump_graph (FILE *, struct graph *); -void dump_graph (FILE *f, struct graph *g) -{ - int i; - struct edge *e; - - for (i = 0; i < g->n_vertices; i++) - { - if (!g->vertices[i].pred - && !g->vertices[i].succ) - continue; - - fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component); - for (e = g->vertices[i].pred; e; e = e->pred_next) - fprintf (f, " %d", e->src); - fprintf (f, "\n"); - - fprintf (f, "\t->"); - for (e = g->vertices[i].succ; e; e = e->succ_next) - fprintf (f, " %d", e->dest); - fprintf (f, "\n"); - } -} - -/* Creates a new graph with N_VERTICES vertices. */ - -static struct graph * -new_graph (int n_vertices) -{ - struct graph *g = xmalloc (sizeof (struct graph)); - - g->n_vertices = n_vertices; - g->vertices = xcalloc (n_vertices, sizeof (struct vertex)); - - return g; -} - -/* Adds an edge from F to T to graph G, with DATA attached. */ - -static void -add_edge (struct graph *g, int f, int t, void *data) -{ - struct edge *e = xmalloc (sizeof (struct edge)); - - e->src = f; - e->dest = t; - e->data = data; - - e->pred_next = g->vertices[t].pred; - g->vertices[t].pred = e; - - e->succ_next = g->vertices[f].succ; - g->vertices[f].succ = e; -} - -/* Runs dfs search over vertices of G, from NQ vertices in queue QS. - The vertices in postorder are stored into QT. If FORWARD is false, - backward dfs is run. */ - -static void -dfs (struct graph *g, int *qs, int nq, int *qt, bool forward) -{ - int i, tick = 0, v, comp = 0, top; - struct edge *e; - struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices); - - for (i = 0; i < g->n_vertices; i++) - { - g->vertices[i].component = -1; - g->vertices[i].post = -1; - } - -#define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred) -#define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next) -#define EDGE_SRC(E) (forward ? (E)->src : (E)->dest) -#define EDGE_DEST(E) (forward ? (E)->dest : (E)->src) - - for (i = 0; i < nq; i++) - { - v = qs[i]; - if (g->vertices[v].post != -1) - continue; - - g->vertices[v].component = comp++; - e = FST_EDGE (v); - top = 0; - - while (1) - { - while (e && g->vertices[EDGE_DEST (e)].component != -1) - e = NEXT_EDGE (e); - - if (!e) - { - if (qt) - qt[tick] = v; - g->vertices[v].post = tick++; - - if (!top) - break; - - e = stack[--top]; - v = EDGE_SRC (e); - e = NEXT_EDGE (e); - continue; - } - - stack[top++] = e; - v = EDGE_DEST (e); - e = FST_EDGE (v); - g->vertices[v].component = comp - 1; - } - } - - free (stack); -} - -/* Marks the edge E in graph G irreducible if it connects two vertices in the - same scc. */ - -static void -check_irred (struct graph *g, struct edge *e) -{ - edge real = e->data; - - /* All edges should lead from a component with higher number to the - one with lower one. */ - gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component); - - if (g->vertices[e->src].component != g->vertices[e->dest].component) - return; - - real->flags |= EDGE_IRREDUCIBLE_LOOP; - if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) - real->src->flags |= BB_IRREDUCIBLE_LOOP; -} - -/* Runs CALLBACK for all edges in G. */ - -static void -for_each_edge (struct graph *g, - void (callback) (struct graph *, struct edge *)) -{ - struct edge *e; - int i; - - for (i = 0; i < g->n_vertices; i++) - for (e = g->vertices[i].succ; e; e = e->succ_next) - callback (g, e); -} - -/* Releases the memory occupied by G. */ - -static void -free_graph (struct graph *g) -{ - struct edge *e, *n; - int i; - - for (i = 0; i < g->n_vertices; i++) - for (e = g->vertices[i].succ; e; e = n) - { - n = e->succ_next; - free (e); - } - free (g->vertices); - free (g); -} - /* Marks blocks and edges that are part of non-recognized loops; i.e. we throw away all latch edges and mark blocks inside any remaining cycle. Everything is a bit complicated due to fact we do not want to do this for parts of cycles that only "pass" through some loop -- i.e. for each cycle, we want to mark blocks that belong directly to innermost loop containing the whole cycle. - + LOOPS is the loop tree. */ #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block) #define BB_REPR(BB) ((BB)->index + 1) -void -mark_irreducible_loops (struct loops *loops) +bool +mark_irreducible_loops (void) { basic_block act; + struct graph_edge *ge; edge e; edge_iterator ei; - int i, src, dest; + int src, dest; + unsigned depth; struct graph *g; - int *queue1 = xmalloc ((last_basic_block + loops->num) * sizeof (int)); - int *queue2 = xmalloc ((last_basic_block + loops->num) * sizeof (int)); - int nq, depth; + int num = number_of_loops (); struct loop *cloop; + bool irred_loop_found = false; + int i; + + gcc_assert (current_loops != NULL); /* Reset the flags. */ FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) @@ -285,16 +95,19 @@ mark_irreducible_loops (struct loops *loops) } /* Create the edge lists. */ - g = new_graph (last_basic_block + loops->num); + g = new_graph (last_basic_block + num); FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) FOR_EACH_EDGE (e, ei, act->succs) { - /* Ignore edges to exit. */ - if (e->dest == EXIT_BLOCK_PTR) + /* Ignore edges to exit. */ + if (e->dest == EXIT_BLOCK_PTR) continue; - /* And latch edges. */ + src = BB_REPR (act); + dest = BB_REPR (e->dest); + + /* Ignore latch edges. */ if (e->dest->loop_father->header == e->dest && e->dest->loop_father->latch == act) continue; @@ -307,57 +120,57 @@ mark_irreducible_loops (struct loops *loops) of the son of nearest common ancestor of the loops in that act lays. */ - src = BB_REPR (act); - dest = BB_REPR (e->dest); - if (e->dest->loop_father->header == e->dest) dest = LOOP_REPR (e->dest->loop_father); if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) { - depth = find_common_loop (act->loop_father, - e->dest->loop_father)->depth + 1; - if (depth == act->loop_father->depth) + depth = 1 + loop_depth (find_common_loop (act->loop_father, + e->dest->loop_father)); + if (depth == loop_depth (act->loop_father)) cloop = act->loop_father; else - cloop = act->loop_father->pred[depth]; + cloop = VEC_index (loop_p, act->loop_father->superloops, depth); src = LOOP_REPR (cloop); } - add_edge (g, src, dest, e); + add_edge (g, src, dest)->data = e; } - /* Find the strongly connected components. Use the algorithm of Tarjan -- - first determine the postorder dfs numbering in reversed graph, then - run the dfs on the original graph in the order given by decreasing - numbers assigned by the previous pass. */ - nq = 0; - FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) - { - queue1[nq++] = BB_REPR (act); - } - for (i = 1; i < (int) loops->num; i++) - if (loops->parray[i]) - queue1[nq++] = LOOP_REPR (loops->parray[i]); - dfs (g, queue1, nq, queue2, false); - for (i = 0; i < nq; i++) - queue1[i] = queue2[nq - i - 1]; - dfs (g, queue1, nq, NULL, true); + /* Find the strongly connected components. */ + graphds_scc (g, NULL); /* Mark the irreducible loops. */ - for_each_edge (g, check_irred); + for (i = 0; i < g->n_vertices; i++) + for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) + { + edge real = (edge) ge->data; + /* edge E in graph G is irreducible if it connects two vertices in the + same scc. */ + + /* All edges should lead from a component with higher number to the + one with lower one. */ + gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); + + if (g->vertices[ge->src].component != g->vertices[ge->dest].component) + continue; + + real->flags |= EDGE_IRREDUCIBLE_LOOP; + irred_loop_found = true; + if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) + real->src->flags |= BB_IRREDUCIBLE_LOOP; + } free_graph (g); - free (queue1); - free (queue2); - loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS; + loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); + return irred_loop_found; } /* Counts number of insns inside LOOP. */ int -num_loop_insns (struct loop *loop) +num_loop_insns (const struct loop *loop) { basic_block *bbs, bb; unsigned i, ninsns = 0; @@ -367,19 +180,21 @@ num_loop_insns (struct loop *loop) for (i = 0; i < loop->num_nodes; i++) { bb = bbs[i]; - ninsns++; - for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) - if (INSN_P (insn)) + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) ninsns++; } - free(bbs); + free (bbs); + + if (!ninsns) + ninsns = 1; /* To avoid division by zero. */ return ninsns; } /* Counts number of insns executed on average per iteration LOOP. */ int -average_num_loop_insns (struct loop *loop) +average_num_loop_insns (const struct loop *loop) { basic_block *bbs, bb; unsigned i, binsns, ninsns, ratio; @@ -391,9 +206,9 @@ average_num_loop_insns (struct loop *loop) { bb = bbs[i]; - binsns = 1; - for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) - if (INSN_P (insn)) + binsns = 0; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) binsns++; ratio = loop->header->frequency == 0 @@ -401,7 +216,7 @@ average_num_loop_insns (struct loop *loop) : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; ninsns += binsns * ratio; } - free(bbs); + free (bbs); ninsns /= BB_FREQ_MAX; if (!ninsns) @@ -410,16 +225,17 @@ average_num_loop_insns (struct loop *loop) return ninsns; } -/* Returns expected number of LOOP iterations. - Compute upper bound on number of iterations in case they do not fit integer - to help loop peeling heuristics. Use exact counts if at all possible. */ -unsigned -expected_loop_iterations (const struct loop *loop) +/* Returns expected number of iterations of LOOP, according to + measured or guessed profile. No bounding is done on the + value. */ + +gcov_type +expected_loop_iterations_unbounded (const struct loop *loop) { edge e; edge_iterator ei; - if (loop->header->count) + if (loop->latch->count || loop->header->count) { gcov_type count_in, count_latch, expected; @@ -433,12 +249,11 @@ expected_loop_iterations (const struct loop *loop) count_in += e->count; if (count_in == 0) - expected = count_latch * 2; + expected = count_latch * 2; else - expected = (count_latch + count_in - 1) / count_in; + expected = (count_latch + count_in - 1) / count_in; - /* Avoid overflows. */ - return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); + return expected; } else { @@ -460,6 +275,16 @@ expected_loop_iterations (const struct loop *loop) } } +/* Returns expected number of LOOP iterations. The returned value is bounded + by REG_BR_PROB_BASE. */ + +unsigned +expected_loop_iterations (const struct loop *loop) +{ + gcov_type expected = expected_loop_iterations_unbounded (loop); + return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); +} + /* Returns the maximum level of nesting of subloops of LOOP. */ unsigned @@ -480,7 +305,7 @@ get_loop_level (const struct loop *loop) /* Returns estimate on cost of computing SEQ. */ static unsigned -seq_cost (rtx seq) +seq_cost (const_rtx seq, bool speed) { unsigned cost = 0; rtx set; @@ -489,7 +314,7 @@ seq_cost (rtx seq) { set = single_set (seq); if (set) - cost += rtx_cost (set, SET); + cost += rtx_cost (set, SET, speed); else cost++; } @@ -497,20 +322,12 @@ seq_cost (rtx seq) return cost; } -/* The properties of the target. */ - -unsigned target_avail_regs; /* Number of available registers. */ -unsigned target_res_regs; /* Number of reserved registers. */ -unsigned target_small_cost; /* The cost for register when there is a free one. */ -unsigned target_pres_cost; /* The cost for register when there are not too many - free ones. */ -unsigned target_spill_cost; /* The cost for register when we need to spill. */ - /* Initialize the constants for computing set costs. */ void init_set_costs (void) { + int speed; rtx seq; rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); @@ -518,62 +335,101 @@ init_set_costs (void) rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); unsigned i; + target_avail_regs = 0; + target_clobbered_regs = 0; for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) && !fixed_regs[i]) - target_avail_regs++; + { + target_avail_regs++; + if (call_used_regs[i]) + target_clobbered_regs++; + } target_res_regs = 3; - /* These are really just heuristic values. */ - - start_sequence (); - emit_move_insn (reg1, reg2); - seq = get_insns (); - end_sequence (); - target_small_cost = seq_cost (seq); - target_pres_cost = 2 * target_small_cost; - - start_sequence (); - emit_move_insn (mem, reg1); - emit_move_insn (reg2, mem); - seq = get_insns (); - end_sequence (); - target_spill_cost = seq_cost (seq); + for (speed = 0; speed < 2; speed++) + { + crtl->maybe_hot_insn_p = speed; + /* Set up the costs for using extra registers: + + 1) If not many free registers remain, we should prefer having an + additional move to decreasing the number of available registers. + (TARGET_REG_COST). + 2) If no registers are available, we need to spill, which may require + storing the old value to memory and loading it back + (TARGET_SPILL_COST). */ + + start_sequence (); + emit_move_insn (reg1, reg2); + seq = get_insns (); + end_sequence (); + target_reg_cost [speed] = seq_cost (seq, speed); + + start_sequence (); + emit_move_insn (mem, reg1); + emit_move_insn (reg2, mem); + seq = get_insns (); + end_sequence (); + target_spill_cost [speed] = seq_cost (seq, speed); + } + default_rtl_profile (); } -/* Calculates cost for having SIZE new loop global variables. REGS_USED is the - number of global registers used in loop. N_USES is the number of relevant - variable uses. */ +/* Estimates cost of increased register pressure caused by making N_NEW new + registers live around the loop. N_OLD is the number of registers live + around the loop. If CALL_P is true, also take into account that + call-used registers may be clobbered in the loop body, reducing the + number of available registers before we spill. */ unsigned -global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses) +estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, + bool call_p) { - unsigned regs_needed = regs_used + size; - unsigned cost = 0; - - if (regs_needed + target_res_regs <= target_avail_regs) - cost += target_small_cost * size; - else if (regs_needed <= target_avail_regs) - cost += target_pres_cost * size; + unsigned cost; + unsigned regs_needed = n_new + n_old; + unsigned available_regs = target_avail_regs; + + /* If there is a call in the loop body, the call-clobbered registers + are not available for loop invariants. */ + if (call_p) + available_regs = available_regs - target_clobbered_regs; + + /* If we have enough registers, we should use them and not restrict + the transformations unnecessarily. */ + if (regs_needed + target_res_regs <= available_regs) + return 0; + + if (regs_needed <= available_regs) + /* If we are close to running out of registers, try to preserve + them. */ + cost = target_reg_cost [speed] * n_new; else - { - cost += target_pres_cost * size; - cost += target_spill_cost * n_uses * (regs_needed - target_avail_regs) / regs_needed; - } + /* If we run out of registers, it is very expensive to add another + one. */ + cost = target_spill_cost [speed] * n_new; + + if (optimize && (flag_ira_region == IRA_REGION_ALL + || flag_ira_region == IRA_REGION_MIXED) + && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) + /* IRA regional allocation deals with high register pressure + better. So decrease the cost (to do more accurate the cost + calculation for IRA, we need to know how many registers lives + through the loop transparently). */ + cost /= 2; return cost; } -/* Sets EDGE_LOOP_EXIT flag for all exits of LOOPS. */ +/* Sets EDGE_LOOP_EXIT flag for all loop exits. */ void -mark_loop_exit_edges (struct loops *loops) +mark_loop_exit_edges (void) { basic_block bb; edge e; - - if (loops->num <= 1) + + if (number_of_loops () <= 1) return; FOR_EACH_BB (bb) @@ -582,7 +438,7 @@ mark_loop_exit_edges (struct loops *loops) FOR_EACH_EDGE (e, ei, bb->succs) { - if (bb->loop_father->outer + if (loop_outer (bb->loop_father) && loop_exit_edge_p (bb->loop_father, e)) e->flags |= EDGE_LOOP_EXIT; else