X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;ds=sidebyside;f=gcc%2Floop-doloop.c;h=5f645694107582d5c1ba533090e4dbff60dbe844;hb=7d2188e888651f20602de3f7b08f6c101957b89b;hp=fa6b55b3cfa59c22d354bf9ff5dc4fad8f1b1dee;hpb=7bd28bbae4ff4dd2612545c506c6c9d33bc28cfc;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c index fa6b55b3cfa..5f645694107 100644 --- a/gcc/loop-doloop.c +++ b/gcc/loop-doloop.c @@ -1,12 +1,13 @@ /* Perform doloop optimizations - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 + Free Software Foundation, Inc. Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -15,9 +16,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" #include "system.h" @@ -28,11 +28,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "expr.h" #include "hard-reg-set.h" #include "basic-block.h" -#include "toplev.h" +#include "diagnostic-core.h" #include "tm_p.h" #include "cfgloop.h" #include "output.h" #include "params.h" +#include "target.h" /* This module is used to modify loops with a determinable number of iterations to use special low-overhead looping instructions. @@ -68,41 +69,116 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /* Return the loop termination condition for PATTERN or zero if it is not a decrement and branch jump insn. */ -static rtx -doloop_condition_get (rtx pattern) +rtx +doloop_condition_get (rtx doloop_pat) { rtx cmp; rtx inc; rtx reg; + rtx inc_src; rtx condition; + rtx pattern; + rtx cc_reg = NULL_RTX; + rtx reg_orig = NULL_RTX; - /* The canonical doloop pattern we expect is: + /* The canonical doloop pattern we expect has one of the following + forms: - (parallel [(set (pc) (if_then_else (condition) - (label_ref (label)) - (pc))) - (set (reg) (plus (reg) (const_int -1))) - (additional clobbers and uses)]) + 1) (parallel [(set (pc) (if_then_else (condition) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) - Some machines (IA-64) make the decrement conditional on - the condition as well, so we don't bother verifying the - actual decrement. In summary, the branch must be the - first entry of the parallel (also required by jump.c), - and the second entry of the parallel must be a set of - the loop counter register. */ + The branch must be the first entry of the parallel (also required + by jump.c), and the second entry of the parallel must be a set of + the loop counter register. Some targets (IA-64) wrap the set of + the loop counter in an if_then_else too. - if (GET_CODE (pattern) != PARALLEL) - return 0; + 2) (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) + (pc))). + + Some targets (ARM) do the comparison before the branch, as in the + following form: + + 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) + (set (reg) (plus (reg) (const_int -1)))]) + (set (pc) (if_then_else (cc == NE) + (label_ref (label)) + (pc))) */ + + pattern = PATTERN (doloop_pat); - cmp = XVECEXP (pattern, 0, 0); - inc = XVECEXP (pattern, 0, 1); + if (GET_CODE (pattern) != PARALLEL) + { + rtx cond; + rtx prev_insn = prev_nondebug_insn (doloop_pat); + rtx cmp_arg1, cmp_arg2; + rtx cmp_orig; + + /* In case the pattern is not PARALLEL we expect two forms + of doloop which are cases 2) and 3) above: in case 2) the + decrement immediately precedes the branch, while in case 3) + the compare and decrement instructions immediately precede + the branch. */ + + if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) + return 0; + + cmp = pattern; + if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) + { + /* The third case: the compare and decrement instructions + immediately precede the branch. */ + cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); + if (GET_CODE (cmp_orig) != SET) + return 0; + if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) + return 0; + cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); + cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); + if (cmp_arg2 != const0_rtx + || GET_CODE (cmp_arg1) != PLUS) + return 0; + reg_orig = XEXP (cmp_arg1, 0); + if (XEXP (cmp_arg1, 1) != GEN_INT (-1) + || !REG_P (reg_orig)) + return 0; + cc_reg = SET_DEST (cmp_orig); + + inc = XVECEXP (PATTERN (prev_insn), 0, 1); + } + else + inc = PATTERN (prev_insn); + /* We expect the condition to be of the form (reg != 0) */ + cond = XEXP (SET_SRC (cmp), 0); + if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) + return 0; + } + else + { + cmp = XVECEXP (pattern, 0, 0); + inc = XVECEXP (pattern, 0, 1); + } /* Check for (set (reg) (something)). */ - if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc))) + if (GET_CODE (inc) != SET) return 0; - - /* Extract loop counter register. */ reg = SET_DEST (inc); + if (! REG_P (reg)) + return 0; + + /* Check if something = (plus (reg) (const_int -1)). + On IA-64, this decrement is wrapped in an if_then_else. */ + inc_src = SET_SRC (inc); + if (GET_CODE (inc_src) == IF_THEN_ELSE) + inc_src = XEXP (inc_src, 1); + if (GET_CODE (inc_src) != PLUS + || XEXP (inc_src, 0) != reg + || XEXP (inc_src, 1) != constm1_rtx) + return 0; /* Check for (set (pc) (if_then_else (condition) (label_ref (label)) @@ -117,16 +193,60 @@ doloop_condition_get (rtx pattern) /* Extract loop termination condition. */ condition = XEXP (SET_SRC (cmp), 0); - if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE) - || GET_CODE (XEXP (condition, 1)) != CONST_INT) + /* We expect a GE or NE comparison with 0 or 1. */ + if ((GET_CODE (condition) != GE + && GET_CODE (condition) != NE) + || (XEXP (condition, 1) != const0_rtx + && XEXP (condition, 1) != const1_rtx)) return 0; - if (XEXP (condition, 0) == reg) - return condition; + if ((XEXP (condition, 0) == reg) + /* For the third case: */ + || ((cc_reg != NULL_RTX) + && (XEXP (condition, 0) == cc_reg) + && (reg_orig == reg)) + || (GET_CODE (XEXP (condition, 0)) == PLUS + && XEXP (XEXP (condition, 0), 0) == reg)) + { + if (GET_CODE (pattern) != PARALLEL) + /* For the second form we expect: + + (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) + (pc))). + + is equivalent to the following: + + (parallel [(set (pc) (if_then_else (reg != 1) + (label_ref (label)) + (pc))) + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + + For the third form we expect: + + (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) + (set (reg) (plus (reg) (const_int -1)))]) + (set (pc) (if_then_else (cc == NE) + (label_ref (label)) + (pc))) + + which is equivalent to the following: + + (parallel [(set (cc) (compare (reg, 1)) + (set (reg) (plus (reg) (const_int -1))) + (set (pc) (if_then_else (NE == cc) + (label_ref (label)) + (pc))))]) + + So we return the second form instead for the two cases. + + */ + condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); - if (GET_CODE (XEXP (condition, 0)) == PLUS - && XEXP (XEXP (condition, 0), 0) == reg) return condition; + } /* ??? If a machine uses a funny comparison, we could return a canonicalized form here. */ @@ -144,6 +264,7 @@ doloop_valid_p (struct loop *loop, struct niter_desc *desc) basic_block *body = get_loop_body (loop), bb; rtx insn; unsigned i; + bool result = true; /* Check for loops that may not terminate under special conditions. */ if (!desc->simple_p @@ -174,7 +295,8 @@ doloop_valid_p (struct loop *loop, struct niter_desc *desc) enable count-register loops in this case. */ if (dump_file) fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); - return false; + result = false; + goto cleanup; } for (i = 0; i < loop->num_nodes; i++) @@ -185,41 +307,41 @@ doloop_valid_p (struct loop *loop, struct niter_desc *desc) insn != NEXT_INSN (BB_END (bb)); insn = NEXT_INSN (insn)) { - /* A called function may clobber any special registers required for - low-overhead looping. */ - if (GET_CODE (insn) == CALL_INSN) - { - if (dump_file) - fprintf (dump_file, "Doloop: Function call in loop.\n"); - return false; - } - - /* Some targets (eg, PPC) use the count register for branch on table - instructions. ??? This should be a target specific check. */ - if (GET_CODE (insn) == JUMP_INSN - && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC - || GET_CODE (PATTERN (insn)) == ADDR_VEC)) + /* Different targets have different necessities for low-overhead + looping. Call the back end for each instruction within the loop + to let it decide whether the insn prohibits a low-overhead loop. + It will then return the cause for it to emit to the dump file. */ + const char * invalid = targetm.invalid_within_doloop (insn); + if (invalid) { if (dump_file) - fprintf (dump_file, "Doloop: Computed branch in the loop.\n"); - return false; + fprintf (dump_file, "Doloop: %s\n", invalid); + result = false; + goto cleanup; } } } + result = true; + +cleanup: free (body); - return true; + return result; } -/* Adds test of COND jumping to DEST to the end of BB. */ +/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru + edge. If the condition is always false, do not do anything. If it is always + true, redirect E to DEST and return false. In all other cases, true is + returned. */ -static void -add_test (rtx cond, basic_block bb, basic_block dest) +static bool +add_test (rtx cond, edge *e, basic_block dest) { rtx seq, jump, label; enum machine_mode mode; rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); enum rtx_code code = GET_CODE (cond); + basic_block bb; mode = GET_MODE (XEXP (cond, 0)); if (mode == VOIDmode) @@ -229,41 +351,65 @@ add_test (rtx cond, basic_block bb, basic_block dest) op0 = force_operand (op0, NULL_RTX); op1 = force_operand (op1, NULL_RTX); label = block_label (dest); - do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); + do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, + NULL_RTX, label, -1); jump = get_last_insn (); + if (!jump || !JUMP_P (jump)) + { + /* The condition is always false and the jump was optimized out. */ + end_sequence (); + return true; + } + + seq = get_insns (); + end_sequence (); + + /* There always is at least the jump insn in the sequence. */ + gcc_assert (seq != NULL_RTX); + + bb = split_edge_and_insert (*e, seq); + *e = single_succ_edge (bb); + + if (any_uncondjump_p (jump)) + { + /* The condition is always true. */ + delete_insn (jump); + redirect_edge_and_branch_force (*e, dest); + return false; + } + JUMP_LABEL (jump) = label; /* The jump is supposed to handle an unlikely special case. */ - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - const0_rtx, REG_NOTES (jump)); + add_reg_note (jump, REG_BR_PROB, const0_rtx); LABEL_NUSES (label)++; - seq = get_insns (); - end_sequence (); - emit_insn_after (seq, BB_END (bb)); + make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); + return true; } /* Modify the loop to use the low-overhead looping insn where LOOP describes the loop, DESC describes the number of iterations of the loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the end of the loop. CONDITION is the condition separated from the - DOLOOP_SEQ. */ + DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ static void doloop_modify (struct loop *loop, struct niter_desc *desc, - rtx doloop_seq, rtx condition) + rtx doloop_seq, rtx condition, rtx count) { rtx counter_reg; - rtx count, tmp, noloop = NULL_RTX; + rtx tmp, noloop = NULL_RTX; rtx sequence; rtx jump_insn; rtx jump_label; - int nonneg = 0, irr; + int nonneg = 0; bool increment_count; basic_block loop_end = desc->out_edge->src; + enum machine_mode mode; + rtx true_prob_val; jump_insn = BB_END (loop_end); @@ -277,6 +423,10 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, fputs (" iterations).\n", dump_file); } + /* Get the probability of the original branch. If it exists we would + need to update REG_BR_PROB of the new jump_insn. */ + true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX); + /* Discard original jump to continue loop. The original compare result may still be live, so it cannot be discarded explicitly. */ delete_insn (jump_insn); @@ -284,28 +434,24 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, counter_reg = XEXP (condition, 0); if (GET_CODE (counter_reg) == PLUS) counter_reg = XEXP (counter_reg, 0); + mode = GET_MODE (counter_reg); - count = desc->niter_expr; increment_count = false; switch (GET_CODE (condition)) { case NE: /* Currently only NE tests against zero and one are supported. */ - if (XEXP (condition, 1) == const1_rtx) + noloop = XEXP (condition, 1); + if (noloop != const0_rtx) { + gcc_assert (noloop == const1_rtx); increment_count = true; - noloop = const1_rtx; } - else if (XEXP (condition, 1) == const0_rtx) - noloop = const0_rtx; - else - abort (); break; case GE: /* Currently only GE tests against zero are supported. */ - if (XEXP (condition, 1) != const0_rtx) - abort (); + gcc_assert (XEXP (condition, 1) == const0_rtx); noloop = constm1_rtx; @@ -316,17 +462,17 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, Note that the maximum value loaded is iterations_max - 1. */ if (desc->niter_max <= ((unsigned HOST_WIDEST_INT) 1 - << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1))) + << (GET_MODE_PRECISION (mode) - 1))) nonneg = 1; break; /* Abort if an invalid doloop pattern has been generated. */ default: - abort (); + gcc_unreachable (); } if (increment_count) - count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx); + count = simplify_gen_binary (PLUS, mode, count, const1_rtx); /* Insert initialization of the count register into the loop header. */ start_sequence (); @@ -338,45 +484,62 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, if (desc->noloop_assumptions) { - rtx ass = desc->noloop_assumptions; + rtx ass = copy_rtx (desc->noloop_assumptions); basic_block preheader = loop_preheader_edge (loop)->src; basic_block set_zero - = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); + = split_edge (loop_preheader_edge (loop)); basic_block new_preheader - = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - basic_block bb; + = split_edge (loop_preheader_edge (loop)); edge te; - gcov_type cnt; /* Expand the condition testing the assumptions and if it does not pass, reset the count register to 0. */ - add_test (XEXP (ass, 0), preheader, set_zero); - preheader->succ->flags &= ~EDGE_FALLTHRU; - cnt = preheader->succ->count; - preheader->succ->probability = 0; - preheader->succ->count = 0; - irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP; - te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr); - te->probability = REG_BR_PROB_BASE; - te->count = cnt; + redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); set_zero->count = 0; set_zero->frequency = 0; - for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1)) + te = single_succ_edge (preheader); + for (; ass; ass = XEXP (ass, 1)) + if (!add_test (XEXP (ass, 0), &te, set_zero)) + break; + + if (ass) { - bb = loop_split_edge_with (te, NULL_RTX); - te = bb->succ; - add_test (XEXP (ass, 0), bb, set_zero); - make_edge (bb, set_zero, irr); + /* We reached a condition that is always true. This is very hard to + reproduce (such a loop does not roll, and thus it would most + likely get optimized out by some of the preceding optimizations). + In fact, I do not have any testcase for it. However, it would + also be very hard to show that it is impossible, so we must + handle this case. */ + set_zero->count = preheader->count; + set_zero->frequency = preheader->frequency; } - - start_sequence (); - convert_move (counter_reg, noloop, 0); - sequence = get_insns (); - end_sequence (); - emit_insn_after (sequence, BB_END (set_zero)); + + if (EDGE_COUNT (set_zero->preds) == 0) + { + /* All the conditions were simplified to false, remove the + unreachable set_zero block. */ + delete_basic_block (set_zero); + } + else + { + /* Reset the counter to zero in the set_zero block. */ + start_sequence (); + convert_move (counter_reg, noloop, 0); + sequence = get_insns (); + end_sequence (); + emit_insn_after (sequence, BB_END (set_zero)); + + set_immediate_dominator (CDI_DOMINATORS, set_zero, + recompute_dominator (CDI_DOMINATORS, + set_zero)); + } + + set_immediate_dominator (CDI_DOMINATORS, new_preheader, + recompute_dominator (CDI_DOMINATORS, + new_preheader)); } /* Some targets (eg, C4x) need to initialize special looping @@ -387,7 +550,7 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, unsigned level = get_loop_level (loop) + 1; init = gen_doloop_begin (counter_reg, desc->const_iter ? desc->niter_expr : const0_rtx, - desc->niter_max, + GEN_INT (desc->niter_max), GEN_INT (level)); if (init) { @@ -415,9 +578,14 @@ doloop_modify (struct loop *loop, struct niter_desc *desc, /* Add a REG_NONNEG note if the actual or estimated maximum number of iterations is non-negative. */ if (nonneg) + add_reg_note (jump_insn, REG_NONNEG, NULL_RTX); + + /* Update the REG_BR_PROB note. */ + if (true_prob_val) { - REG_NOTES (jump_insn) - = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); + /* Seems safer to use the branch probability. */ + add_reg_note (jump_insn, REG_BR_PROB, + GEN_INT (desc->in_edge->probability)); } } @@ -431,12 +599,15 @@ doloop_optimize (struct loop *loop) { enum machine_mode mode; rtx doloop_seq, doloop_pat, doloop_reg; - rtx iterations; + rtx iterations, count; rtx iterations_max; rtx start_label; rtx condition; unsigned level, est_niter; + int max_cost; struct niter_desc *desc; + unsigned word_mode_size; + unsigned HOST_WIDE_INT word_mode_max; if (dump_file) fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); @@ -474,6 +645,18 @@ doloop_optimize (struct loop *loop) return false; } + max_cost + = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); + if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop)) + > max_cost) + { + if (dump_file) + fprintf (dump_file, + "Doloop: number of iterations too costly to compute.\n"); + return false; + } + + count = copy_rtx (desc->niter_expr); iterations = desc->const_iter ? desc->niter_expr : const0_rtx; iterations_max = GEN_INT (desc->niter_max); level = get_loop_level (loop) + 1; @@ -485,8 +668,33 @@ doloop_optimize (struct loop *loop) doloop_reg = gen_reg_rtx (mode); doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, GEN_INT (level), start_label); - if (! doloop_seq && mode != word_mode) + + word_mode_size = GET_MODE_PRECISION (word_mode); + word_mode_max + = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; + if (! doloop_seq + && mode != word_mode + /* Before trying mode different from the one in that # of iterations is + computed, we must be sure that the number of iterations fits into + the new mode. */ + && (word_mode_size >= GET_MODE_PRECISION (mode) + || desc->niter_max <= word_mode_max)) { + if (word_mode_size > GET_MODE_PRECISION (mode)) + { + count = simplify_gen_unary (ZERO_EXTEND, word_mode, + count, mode); + iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, + iterations, mode); + iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, + iterations_max, mode); + } + else + { + count = lowpart_subreg (word_mode, count, mode); + iterations = lowpart_subreg (word_mode, iterations, mode); + iterations_max = lowpart_subreg (word_mode, iterations_max, mode); + } PUT_MODE (doloop_reg, word_mode); doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, GEN_INT (level), start_label); @@ -507,9 +715,7 @@ doloop_optimize (struct loop *loop) { while (NEXT_INSN (doloop_pat) != NULL_RTX) doloop_pat = NEXT_INSN (doloop_pat); - if (GET_CODE (doloop_pat) == JUMP_INSN) - doloop_pat = PATTERN (doloop_pat); - else + if (!JUMP_P (doloop_pat)) doloop_pat = NULL_RTX; } @@ -521,24 +727,20 @@ doloop_optimize (struct loop *loop) return false; } - doloop_modify (loop, desc, doloop_seq, condition); + doloop_modify (loop, desc, doloop_seq, condition, count); return true; } -/* This is the main entry point. Process all LOOPS using doloop_optimize. */ +/* This is the main entry point. Process all loops using doloop_optimize. */ void -doloop_optimize_loops (struct loops *loops) +doloop_optimize_loops (void) { - unsigned i; + loop_iterator li; struct loop *loop; - for (i = 1; i < loops->num; i++) + FOR_EACH_LOOP (li, loop, 0) { - loop = loops->parray[i]; - if (!loop) - continue; - doloop_optimize (loop); } @@ -546,7 +748,7 @@ doloop_optimize_loops (struct loops *loops) #ifdef ENABLE_CHECKING verify_dominators (CDI_DOMINATORS); - verify_loop_structure (loops); + verify_loop_structure (); #endif } #endif /* HAVE_doloop_end */