X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Floop-doloop.c;h=1f5856f581b24da02f4ffb281e19393c3ddcdecb;hb=d3832055fda2ca4c70c534b5638eb8f018ea3225;hp=737761b962c2a509f51b53d2e7fb041fed53dcce;hpb=ea091dfdfe023dacb0c7c2af37bdd75e23c530f4;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c index 737761b962c..1f5856f581b 100644 --- a/gcc/loop-doloop.c +++ b/gcc/loop-doloop.c @@ -1,12 +1,13 @@ /* Perform doloop optimizations - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation, + Inc. Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -15,9 +16,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" #include "system.h" @@ -33,6 +33,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfgloop.h" #include "output.h" #include "params.h" +#include "target.h" /* This module is used to modify loops with a determinable number of iterations to use special low-overhead looping instructions. @@ -68,41 +69,77 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /* Return the loop termination condition for PATTERN or zero if it is not a decrement and branch jump insn. */ -static rtx -doloop_condition_get (rtx pattern) +rtx +doloop_condition_get (rtx doloop_pat) { rtx cmp; rtx inc; rtx reg; + rtx inc_src; rtx condition; + rtx pattern; - /* The canonical doloop pattern we expect is: + /* The canonical doloop pattern we expect has one of the following + forms: - (parallel [(set (pc) (if_then_else (condition) - (label_ref (label)) - (pc))) - (set (reg) (plus (reg) (const_int -1))) - (additional clobbers and uses)]) + 1) (parallel [(set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) - Some machines (IA-64) make the decrement conditional on - the condition as well, so we don't bother verifying the - actual decrement. In summary, the branch must be the - first entry of the parallel (also required by jump.c), - and the second entry of the parallel must be a set of - the loop counter register. */ + The branch must be the first entry of the parallel (also required + by jump.c), and the second entry of the parallel must be a set of + the loop counter register. Some targets (IA-64) wrap the set of + the loop counter in an if_then_else too. + + 2) (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) + (pc))). */ + + pattern = PATTERN (doloop_pat); if (GET_CODE (pattern) != PARALLEL) - return 0; + { + rtx cond; - cmp = XVECEXP (pattern, 0, 0); - inc = XVECEXP (pattern, 0, 1); + /* We expect the decrement to immediately precede the branch. */ + + if ((PREV_INSN (doloop_pat) == NULL_RTX) + || !INSN_P (PREV_INSN (doloop_pat))) + return 0; + + cmp = pattern; + inc = PATTERN (PREV_INSN (doloop_pat)); + /* We expect the condition to be of the form (reg != 0) */ + cond = XEXP (SET_SRC (cmp), 0); + if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) + return 0; + + } + else + { + cmp = XVECEXP (pattern, 0, 0); + inc = XVECEXP (pattern, 0, 1); + } /* Check for (set (reg) (something)). */ - if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) + if (GET_CODE (inc) != SET) return 0; - - /* Extract loop counter register. */ reg = SET_DEST (inc); + if (! REG_P (reg)) + return 0; + + /* Check if something = (plus (reg) (const_int -1)). + On IA-64, this decrement is wrapped in an if_then_else. */ + inc_src = SET_SRC (inc); + if (GET_CODE (inc_src) == IF_THEN_ELSE) + inc_src = XEXP (inc_src, 1); + if (GET_CODE (inc_src) != PLUS + || XEXP (inc_src, 0) != reg + || XEXP (inc_src, 1) != constm1_rtx) + return 0; /* Check for (set (pc) (if_then_else (condition) (label_ref (label)) @@ -117,16 +154,39 @@ doloop_condition_get (rtx pattern) /* Extract loop termination condition. */ condition = XEXP (SET_SRC (cmp), 0); - if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) - || GET_CODE (XEXP (condition, 1)) != CONST_INT) + /* We expect a GE or NE comparison with 0 or 1. */ + if ((GET_CODE (condition) != GE + && GET_CODE (condition) != NE) + || (XEXP (condition, 1) != const0_rtx + && XEXP (condition, 1) != const1_rtx)) return 0; - if (XEXP (condition, 0) == reg) - return condition; + if ((XEXP (condition, 0) == reg) + || (GET_CODE (XEXP (condition, 0)) == PLUS + && XEXP (XEXP (condition, 0), 0) == reg)) + { + if (GET_CODE (pattern) != PARALLEL) + /* The second form we expect: + + (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) + (pc))). + + is equivalent to the following: + + (parallel [(set (pc) (if_then_else (reg != 1) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + + So we return that form instead. + */ + condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); - if (GET_CODE (XEXP (condition, 0)) == PLUS - && XEXP (XEXP (condition, 0), 0) == reg) return condition; + } /* ??? If a machine uses a funny comparison, we could return a canonicalized form here. */ @@ -187,24 +247,15 @@ doloop_valid_p (struct loop *loop, struct niter_desc *desc) insn != NEXT_INSN (BB_END (bb)); insn = NEXT_INSN (insn)) { - /* A called function may clobber any special registers required for - low-overhead looping. */ - if (CALL_P (insn)) - { - if (dump_file) - fprintf (dump_file, "Doloop: Function call in loop.\n"); - result = false; - goto cleanup; - } - - /* Some targets (eg, PPC) use the count register for branch on table - instructions. ??? This should be a target specific check. */ - if (JUMP_P (insn) - && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC - || GET_CODE (PATTERN (insn)) == ADDR_VEC)) + /* Different targets have different necessities for low-overhead + looping. Call the back end for each instruction within the loop + to let it decide whether the insn prohibits a low-overhead loop. + It will then return the cause for it to emit to the dump file. */ + const char * invalid = targetm.invalid_within_doloop (insn); + if (invalid) { if (dump_file) - fprintf (dump_file, "Doloop: Computed branch in the loop.\n"); + fprintf (dump_file, "Doloop: %s\n", invalid); result = false; goto cleanup; } @@ -218,15 +269,19 @@ cleanup: return result; } -/* Adds test of COND jumping to DEST to the end of BB. */ +/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru + edge. If the condition is always false, do not do anything. If it is always + true, redirect E to DEST and return false. In all other cases, true is + returned. */ -static void -add_test (rtx cond, basic_block bb, basic_block dest) +static bool +add_test (rtx cond, edge *e, basic_block dest) { rtx seq, jump, label; enum machine_mode mode; rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); enum rtx_code code = GET_CODE (cond); + basic_block bb; mode = GET_MODE (XEXP (cond, 0)); if (mode == VOIDmode) @@ -239,39 +294,64 @@ add_test (rtx cond, basic_block bb, basic_block dest) do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); jump = get_last_insn (); + if (!jump || !JUMP_P (jump)) + { + /* The condition is always false and the jump was optimized out. */ + end_sequence (); + return true; + } + + seq = get_insns (); + end_sequence (); + + /* There always is at least the jump insn in the sequence. */ + gcc_assert (seq != NULL_RTX); + + bb = split_edge_and_insert (*e, seq); + *e = single_succ_edge (bb); + + if (any_uncondjump_p (jump)) + { + /* The condition is always true. */ + delete_insn (jump); + redirect_edge_and_branch_force (*e, dest); + return false; + } + JUMP_LABEL (jump) = label; /* The jump is supposed to handle an unlikely special case. */ - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - const0_rtx, REG_NOTES (jump)); + add_reg_note (jump, REG_BR_PROB, const0_rtx); LABEL_NUSES (label)++; - seq = get_insns (); - end_sequence (); - emit_insn_after (seq, BB_END (bb)); + make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); + return true; } /* Modify the loop to use the low-overhead looping insn where LOOP describes the loop, DESC describes the number of iterations of the loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the end of the loop. CONDITION is the condition separated from the - DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ + DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. + ZERO_EXTEND_P says to zero extend COUNT after the increment of it to + word_mode from FROM_MODE. */ static void doloop_modify (struct loop *loop, struct niter_desc *desc, - rtx doloop_seq, rtx condition, rtx count) + rtx doloop_seq, rtx condition, rtx count, + bool zero_extend_p, enum machine_mode from_mode) { rtx counter_reg; rtx tmp, noloop = NULL_RTX; rtx sequence; rtx jump_insn; rtx jump_label; - int nonneg = 0, irr; + int nonneg = 0; bool increment_count; basic_block loop_end = desc->out_edge->src; enum machine_mode mode; + rtx true_prob_val; jump_insn = BB_END (loop_end); @@ -285,6 +365,10 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, fputs (" iterations).\n", dump_file); } + /* Get the probability of the original branch. If it exists we would + need to update REG_BR_PROB of the new jump_insn. */ + true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); + /* Discard original jump to continue loop. The original compare result may still be live, so it cannot be discarded explicitly. */ delete_insn (jump_insn); @@ -299,21 +383,17 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, { case NE: /* Currently only NE tests against zero and one are supported. */ - if (XEXP (condition, 1) == const1_rtx) + noloop = XEXP (condition, 1); + if (noloop != const0_rtx) { + gcc_assert (noloop == const1_rtx); increment_count = true; - noloop = const1_rtx; } - else if (XEXP (condition, 1) == const0_rtx) - noloop = const0_rtx; - else - abort (); break; case GE: /* Currently only GE tests against zero are supported. */ - if (XEXP (condition, 1) != const0_rtx) - abort (); + gcc_assert (XEXP (condition, 1) == const0_rtx); noloop = constm1_rtx; @@ -330,11 +410,15 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, /* Abort if an invalid doloop pattern has been generated. */ default: - abort (); + gcc_unreachable (); } if (increment_count) - count = simplify_gen_binary (PLUS, mode, count, const1_rtx); + count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx); + + if (zero_extend_p) + count = simplify_gen_unary (ZERO_EXTEND, word_mode, + count, from_mode); /* Insert initialization of the count register into the loop header. */ start_sequence (); @@ -349,42 +433,59 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, rtx ass = copy_rtx (desc->noloop_assumptions); basic_block preheader = loop_preheader_edge (loop)->src; basic_block set_zero - = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + = split_edge (loop_preheader_edge (loop)); basic_block new_preheader - = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - basic_block bb; + = split_edge (loop_preheader_edge (loop)); edge te; - gcov_type cnt; /* Expand the condition testing the assumptions and if it does not pass, reset the count register to 0. */ - add_test (XEXP (ass, 0), preheader, set_zero); - single_succ_edge (preheader)->flags &= ~EDGE_FALLTHRU; - cnt = single_succ_edge (preheader)->count; - single_succ_edge (preheader)->probability = 0; - single_succ_edge (preheader)->count = 0; - irr = single_succ_edge (preheader)->flags & EDGE_IRREDUCIBLE_LOOP; - te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); - te->probability = REG_BR_PROB_BASE; - te->count = cnt; + redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = 0; set_zero->frequency = 0; - for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) + te = single_succ_edge (preheader); + for (; ass; ass = XEXP (ass, 1)) + if (!add_test (XEXP (ass, 0), &te, set_zero)) + break; + + if (ass) { - bb = loop_split_edge_with (te, NULL_RTX); - te = single_succ_edge (bb); - add_test (XEXP (ass, 0), bb, set_zero); - make_edge (bb, set_zero, irr); + /* We reached a condition that is always true. This is very hard to + reproduce (such a loop does not roll, and thus it would most + likely get optimized out by some of the preceding optimizations). + In fact, I do not have any testcase for it. However, it would + also be very hard to show that it is impossible, so we must + handle this case. */ + set_zero->count = preheader->count; + set_zero->frequency = preheader->frequency; } - - start_sequence (); - convert_move (counter_reg, noloop, 0); - sequence = get_insns (); - end_sequence (); - emit_insn_after (sequence, BB_END (set_zero)); + + if (EDGE_COUNT (set_zero->preds) == 0) + { + /* All the conditions were simplified to false, remove the + unreachable set_zero block. */ + delete_basic_block (set_zero); + } + else + { + /* Reset the counter to zero in the set_zero block. */ + start_sequence (); + convert_move (counter_reg, noloop, 0); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (set_zero)); + + set_immediate_dominator (CDI_DOMINATORS, set_zero, + recompute_dominator (CDI_DOMINATORS, + set_zero)); + } + + set_immediate_dominator (CDI_DOMINATORS, new_preheader, + recompute_dominator (CDI_DOMINATORS, + new_preheader)); } /* Some targets (eg, C4x) need to initialize special looping @@ -395,7 +496,7 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, unsigned level = get_loop_level (loop) + 1; init = gen_doloop_begin (counter_reg, desc->const_iter ? desc->niter_expr : const0_rtx, - desc->niter_max, + GEN_INT (desc->niter_max), GEN_INT (level)); if (init) { @@ -423,9 +524,14 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, /* Add a REG_NONNEG note if the actual or estimated maximum number of iterations is non-negative. */ if (nonneg) + add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); + + /* Update the REG_BR_PROB note. */ + if (true_prob_val) { - REG_NOTES (jump_insn) - = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); + /* Seems safer to use the branch probability. */ + add_reg_note (jump_insn, REG_BR_PROB, + GEN_INT (desc->in_edge->probability)); } } @@ -444,9 +550,11 @@ doloop_optimize (struct loop *loop) rtx start_label; rtx condition; unsigned level, est_niter; + int max_cost; struct niter_desc *desc; unsigned word_mode_size; unsigned HOST_WIDE_INT word_mode_max; + bool zero_extend_p = false; if (dump_file) fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); @@ -484,6 +592,17 @@ doloop_optimize (struct loop *loop) return false; } + max_cost + = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); + if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop)) + > max_cost) + { + if (dump_file) + fprintf (dump_file, + "Doloop: number of iterations too costly to compute.\n"); + return false; + } + count = copy_rtx (desc->niter_expr); iterations = desc->const_iter ? desc->niter_expr : const0_rtx; iterations_max = GEN_INT (desc->niter_max); @@ -510,8 +629,7 @@ doloop_optimize (struct loop *loop) { if (word_mode_size > GET_MODE_BITSIZE (mode)) { - count = simplify_gen_unary (ZERO_EXTEND, word_mode, - count, mode); + zero_extend_p = true; iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, iterations, mode); iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, @@ -543,9 +661,7 @@ doloop_optimize (struct loop *loop) { while (NEXT_INSN (doloop_pat) != NULL_RTX) doloop_pat = NEXT_INSN (doloop_pat); - if (JUMP_P (doloop_pat)) - doloop_pat = PATTERN (doloop_pat); - else + if (!JUMP_P (doloop_pat)) doloop_pat = NULL_RTX; } @@ -557,24 +673,21 @@ doloop_optimize (struct loop *loop) return false; } - doloop_modify (loop, desc, doloop_seq, condition, count); + doloop_modify (loop, desc, doloop_seq, condition, count, + zero_extend_p, mode); return true; } -/* This is the main entry point. Process all LOOPS using doloop_optimize. */ +/* This is the main entry point. Process all loops using doloop_optimize. */ void -doloop_optimize_loops (struct loops *loops) +doloop_optimize_loops (void) { - unsigned i; + loop_iterator li; struct loop *loop; - for (i = 1; i < loops->num; i++) + FOR_EACH_LOOP (li, loop, 0) { - loop = loops->parray[i]; - if (!loop) - continue; - doloop_optimize (loop); } @@ -582,7 +695,7 @@ doloop_optimize_loops (struct loops *loops) #ifdef ENABLE_CHECKING verify_dominators (CDI_DOMINATORS); - verify_loop_structure (loops); + verify_loop_structure (); #endif } #endif /* HAVE_doloop_end */