/* Perform doloop optimizations
- Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
- Inc.
+ Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
+ Free Software Foundation, Inc.
Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
This file is part of GCC.
#include "expr.h"
#include "hard-reg-set.h"
#include "basic-block.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
#include "tm_p.h"
#include "cfgloop.h"
#include "output.h"
rtx inc_src;
rtx condition;
rtx pattern;
+ rtx cc_reg = NULL_RTX;
+ rtx reg_orig = NULL_RTX;
/* The canonical doloop pattern we expect has one of the following
forms:
2) (set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
- (pc))). */
+ (pc))).
+
+ Some targets (ARM) do the comparison before the branch, as in the
+ following form:
+
+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc))) */
pattern = PATTERN (doloop_pat);
if (GET_CODE (pattern) != PARALLEL)
{
rtx cond;
+ rtx prev_insn = prev_nondebug_insn (doloop_pat);
+ rtx cmp_arg1, cmp_arg2;
+ rtx cmp_orig;
- /* We expect the decrement to immediately precede the branch. */
+ /* In case the pattern is not PARALLEL we expect two forms
+ of doloop which are cases 2) and 3) above: in case 2) the
+ decrement immediately precedes the branch, while in case 3)
+ the compare and decrement instructions immediately precede
+ the branch. */
- if ((PREV_INSN (doloop_pat) == NULL_RTX)
- || !INSN_P (PREV_INSN (doloop_pat)))
+ if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
return 0;
cmp = pattern;
- inc = PATTERN (PREV_INSN (doloop_pat));
+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
+ {
+ /* The third case: the compare and decrement instructions
+ immediately precede the branch. */
+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
+ if (GET_CODE (cmp_orig) != SET)
+ return 0;
+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
+ return 0;
+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
+ if (cmp_arg2 != const0_rtx
+ || GET_CODE (cmp_arg1) != PLUS)
+ return 0;
+ reg_orig = XEXP (cmp_arg1, 0);
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
+ || !REG_P (reg_orig))
+ return 0;
+ cc_reg = SET_DEST (cmp_orig);
+
+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
+ }
+ else
+ inc = PATTERN (prev_insn);
/* We expect the condition to be of the form (reg != 0) */
cond = XEXP (SET_SRC (cmp), 0);
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
return 0;
-
}
else
{
return 0;
if ((XEXP (condition, 0) == reg)
+ /* For the third case: */
+ || ((cc_reg != NULL_RTX)
+ && (XEXP (condition, 0) == cc_reg)
+ && (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
- && XEXP (XEXP (condition, 0), 0) == reg))
+ && XEXP (XEXP (condition, 0), 0) == reg))
{
if (GET_CODE (pattern) != PARALLEL)
- /* The second form we expect:
+ /* For the second form we expect:
(set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
- So we return that form instead.
+ For the third form we expect:
+
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
+
+ which is equivalent to the following:
+
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
+
+ So we return the second form instead for the two cases.
+
*/
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
op0 = force_operand (op0, NULL_RTX);
op1 = force_operand (op1, NULL_RTX);
label = block_label (dest);
- do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
+ do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
+ NULL_RTX, label, -1);
jump = get_last_insn ();
if (!jump || !JUMP_P (jump))
redirect_edge_and_branch_force (*e, dest);
return false;
}
-
+
JUMP_LABEL (jump) = label;
/* The jump is supposed to handle an unlikely special case. */
describes the loop, DESC describes the number of iterations of the
loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
end of the loop. CONDITION is the condition separated from the
- DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
+ DOLOOP_SEQ. COUNT is the number of iterations of the LOOP.
+ ZERO_EXTEND_P says to zero extend COUNT after the increment of it to
+ word_mode from FROM_MODE. */
static void
doloop_modify (struct loop *loop, struct niter_desc *desc,
- rtx doloop_seq, rtx condition, rtx count)
+ rtx doloop_seq, rtx condition, rtx count,
+ bool zero_extend_p, enum machine_mode from_mode)
{
rtx counter_reg;
rtx tmp, noloop = NULL_RTX;
}
if (increment_count)
- count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
+ count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx);
+
+ if (zero_extend_p)
+ count = simplify_gen_unary (ZERO_EXTEND, word_mode,
+ count, from_mode);
/* Insert initialization of the count register into the loop header. */
start_sequence ();
set_zero->count = preheader->count;
set_zero->frequency = preheader->frequency;
}
-
+
if (EDGE_COUNT (set_zero->preds) == 0)
{
/* All the conditions were simplified to false, remove the
sequence = get_insns ();
end_sequence ();
emit_insn_after (sequence, BB_END (set_zero));
-
+
set_immediate_dominator (CDI_DOMINATORS, set_zero,
recompute_dominator (CDI_DOMINATORS,
set_zero));
if (true_prob_val)
{
/* Seems safer to use the branch probability. */
- add_reg_note (jump_insn, REG_BR_PROB,
+ add_reg_note (jump_insn, REG_BR_PROB,
GEN_INT (desc->in_edge->probability));
}
}
struct niter_desc *desc;
unsigned word_mode_size;
unsigned HOST_WIDE_INT word_mode_max;
+ bool zero_extend_p = false;
if (dump_file)
fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
max_cost
= COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
- if (rtx_cost (desc->niter_expr, SET) > max_cost)
+ if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop))
+ > max_cost)
{
if (dump_file)
fprintf (dump_file,
{
if (word_mode_size > GET_MODE_BITSIZE (mode))
{
- count = simplify_gen_unary (ZERO_EXTEND, word_mode,
- count, mode);
+ zero_extend_p = true;
iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
iterations, mode);
iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
return false;
}
- doloop_modify (loop, desc, doloop_seq, condition, count);
+ doloop_modify (loop, desc, doloop_seq, condition, count,
+ zero_extend_p, mode);
return true;
}