/* Perform doloop optimizations
- Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
+ Free Software Foundation, Inc.
Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
This file is part of GCC.
#include "config.h"
#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
#include "rtl.h"
#include "flags.h"
#include "expr.h"
#include "basic-block.h"
#include "toplev.h"
#include "tm_p.h"
+#include "cfgloop.h"
/* This module is used to modify loops with a determinable number of
#ifdef HAVE_doloop_end
-static rtx doloop_condition_get
- PARAMS ((rtx));
-static unsigned HOST_WIDE_INT doloop_iterations_max
- PARAMS ((const struct loop_info *, enum machine_mode, int));
-static int doloop_valid_p
- PARAMS ((const struct loop *, rtx));
-static int doloop_modify
- PARAMS ((const struct loop *, rtx, rtx, rtx, rtx, rtx));
-static int doloop_modify_runtime
- PARAMS ((const struct loop *, rtx, rtx, rtx, enum machine_mode, rtx));
+static rtx doloop_condition_get (rtx);
+static unsigned HOST_WIDE_INT doloop_iterations_max (const struct loop_info *,
+ enum machine_mode, int);
+static int doloop_valid_p (const struct loop *, rtx);
+static int doloop_modify (const struct loop *, rtx, rtx, rtx, rtx, rtx);
+static int doloop_modify_runtime (const struct loop *, rtx, rtx, rtx,
+ enum machine_mode, rtx);
/* Return the loop termination condition for PATTERN or zero
if it is not a decrement and branch jump insn. */
static rtx
-doloop_condition_get (pattern)
- rtx pattern;
+doloop_condition_get (rtx pattern)
{
rtx cmp;
rtx inc;
/* Return an estimate of the maximum number of loop iterations for the
loop specified by LOOP or zero if the loop is not normal.
- MODE is the mode of the iteration count and NONNEG is non-zero if
- the the iteration count has been proved to be non-negative. */
+ MODE is the mode of the iteration count and NONNEG is nonzero if
+ the iteration count has been proved to be non-negative. */
static unsigned HOST_WIDE_INT
-doloop_iterations_max (loop_info, mode, nonneg)
- const struct loop_info *loop_info;
- enum machine_mode mode;
- int nonneg;
+doloop_iterations_max (const struct loop_info *loop_info,
+ enum machine_mode mode, int nonneg)
{
unsigned HOST_WIDE_INT n_iterations_max;
enum rtx_code code;
if (GET_CODE (max_value) == CONST_INT)
umax = INTVAL (max_value);
else
- umax = ((unsigned)2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+ umax = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
n_iterations_max = umax - umin;
break;
if (GET_CODE (min_value) == CONST_INT)
smin = INTVAL (min_value);
else
- smin = -((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1));
+ smin = -((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1));
if (GET_CODE (max_value) == CONST_INT)
smax = INTVAL (max_value);
else
- smax = ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+ smax = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
n_iterations_max = smax - smin;
break;
else
/* We need to conservatively assume that we might have the maximum
number of iterations without any additional knowledge. */
- n_iterations_max = ((unsigned)2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+ n_iterations_max = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
break;
default:
/* If we know that the iteration count is non-negative then adjust
n_iterations_max if it is so large that it appears negative. */
if (nonneg
- && n_iterations_max > ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)))
- n_iterations_max = ((unsigned)1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+ && n_iterations_max > ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)))
+ n_iterations_max = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
return n_iterations_max;
}
-/* Return non-zero if the loop specified by LOOP is suitable for
+/* Return nonzero if the loop specified by LOOP is suitable for
the use of special low-overhead looping instructions. */
static int
-doloop_valid_p (loop, jump_insn)
- const struct loop *loop;
- rtx jump_insn;
+doloop_valid_p (const struct loop *loop, rtx jump_insn)
{
const struct loop_info *loop_info = LOOP_INFO (loop);
|| ! onlyjump_p (jump_insn))
{
if (loop_dump_stream)
- fprintf (loop_dump_stream,
+ fprintf (loop_dump_stream,
"Doloop: Invalid jump at loop end.\n");
return 0;
}
return 0;
}
- /* Some targets (eg, PPC) use the count register for branch on table
- instructions. ??? This should be a target specific check. */
+ /* Some targets (eg, PPC) use the count register for branch on table
+ instructions. ??? This should be a target specific check. */
if (loop_info->has_tablejump)
{
if (loop_dump_stream)
/* There is no guarantee that a NE loop will terminate if the
absolute increment is not unity. ??? We could compute this
- condition at run-time and have a additional jump around the loop
+ condition at run-time and have an additional jump around the loop
to ensure an infinite loop. */
if (loop_info->comparison_code == NE
+ && !loop_info->preconditioned
&& INTVAL (loop_info->increment) != -1
&& INTVAL (loop_info->increment) != 1)
{
&& ((loop_info->comparison_code == LEU
&& INTVAL (loop_info->increment) > 0)
|| (loop_info->comparison_code == GEU
- && INTVAL (loop_info->increment) < 0)))
+ && INTVAL (loop_info->increment) < 0)
+ || (loop_info->comparison_code == LTU
+ && INTVAL (loop_info->increment) > 1)
+ || (loop_info->comparison_code == GTU
+ && INTVAL (loop_info->increment) < -1)))
{
/* If the comparison is LEU and the comparison value is UINT_MAX
then the loop will not terminate. Similarly, if the
- comparison code is GEU and the initial value is 0, the loop
- will not terminate.
+ comparison code is GEU and the comparison value is 0, the
+ loop will not terminate.
+
+ If the absolute increment is not 1, the loop can be infinite
+ even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
- Note that with LE and GE, the loop behaviour can be
- implementation dependent if an overflow occurs, say between
- INT_MAX and INT_MAX + 1. We thus don't have to worry about
- these two cases.
+ Note that with LE and GE, the loop behavior is undefined
+ (C++ standard section 5 clause 5) if an overflow occurs, say
+ between INT_MAX and INT_MAX + 1. We thus don't have to worry
+ about these two cases.
??? We could compute these conditions at run-time and have a
additional jump around the loop to ensure an infinite loop.
However, it is very unlikely that this is the intended
- behaviour of the loop and checking for these rare boundary
- conditions would pessimize all other code. */
+ behavior of the loop and checking for these rare boundary
+ conditions would pessimize all other code.
+
+ If the loop is executed only a few times an extra check to
+ restart the loop could use up most of the benefits of using a
+ count register loop. Note however, that normally, this
+ restart branch would never execute, so it could be predicted
+ well by the CPU. We should generate the pessimistic code by
+ default, and have an option, e.g. -funsafe-loops that would
+ enable count-register loops in this case. */
if (loop_dump_stream)
fprintf (loop_dump_stream,
"Doloop: Possible infinite iteration case ignored.\n");
number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying
the maximum number of loop iterations, and DOLOOP_INSN is the
low-overhead looping insn to emit at the end of the loop. This
- returns non-zero if it was successful. */
+ returns nonzero if it was successful. */
static int
-doloop_modify (loop, iterations, iterations_max,
- doloop_seq, start_label, condition)
- const struct loop *loop;
- rtx iterations;
- rtx iterations_max;
- rtx doloop_seq;
- rtx start_label;
- rtx condition;
+doloop_modify (const struct loop *loop, rtx iterations, rtx iterations_max,
+ rtx doloop_seq, rtx start_label, rtx condition)
{
rtx counter_reg;
rtx count;
/* Determine if the iteration counter will be non-negative.
Note that the maximum value loaded is iterations_max - 1. */
if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max)
- <= ((unsigned)1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
+ <= ((unsigned) 1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
nonneg = 1;
break;
/* Abort if an invalid doloop pattern has been generated. */
default:
- abort();
+ abort ();
}
if (decrement_count)
count = GEN_INT (INTVAL (count) - 1);
else
count = expand_simple_binop (GET_MODE (counter_reg), MINUS,
- count, GEN_INT (1),
+ count, const1_rtx,
0, 0, OPTAB_LIB_WIDEN);
}
/* Insert initialization of the count register into the loop header. */
convert_move (counter_reg, count, 1);
- sequence = gen_sequence ();
+ sequence = get_insns ();
end_sequence ();
emit_insn_before (sequence, loop->start);
{
start_sequence ();
emit_insn (init);
- sequence = gen_sequence ();
+ sequence = get_insns ();
end_sequence ();
emit_insn_after (sequence, loop->start);
}
not present, we emit one. The loop to modify is described by LOOP.
ITERATIONS_MAX is a CONST_INT specifying the estimated maximum
number of loop iterations. DOLOOP_INSN is the low-overhead looping
- insn to insert. Returns non-zero if loop successfully modified. */
+ insn to insert. Returns nonzero if loop successfully modified. */
static int
-doloop_modify_runtime (loop, iterations_max,
- doloop_seq, start_label, mode, condition)
- const struct loop *loop;
- rtx iterations_max;
- rtx doloop_seq;
- rtx start_label;
- enum machine_mode mode;
- rtx condition;
+doloop_modify_runtime (const struct loop *loop, rtx iterations_max,
+ rtx doloop_seq, rtx start_label,
+ enum machine_mode mode, rtx condition)
{
const struct loop_info *loop_info = LOOP_INFO (loop);
HOST_WIDE_INT abs_inc;
+ HOST_WIDE_INT abs_loop_inc;
int neg_inc;
rtx diff;
rtx sequence;
n = abs (final - initial) / abs_inc;
n += (abs (final - initial) % abs_inc) != 0;
- If the loop has been unrolled, then the loop body has been
- preconditioned to iterate a multiple of unroll_number times. If
- abs_inc is != 1, the full calculation is
+ But when abs_inc is a power of two, the summation won't overflow
+ except in cases where the loop never terminates. So we don't
+ need to use this more costly calculation.
+
+ If the loop has been unrolled, the full calculation is
+
+ t1 = abs_inc * unroll_number; increment per loop
+ n = (abs (final - initial) + abs_inc - 1) / t1; full loops
+ n += (abs (final - initial) + abs_inc - 1) % t1) >= abs_inc;
+ partial loop
+ which works out to be equivalent to
- t1 = abs_inc * unroll_number;
- n = abs (final - initial) / t1;
- n += (abs (final - initial) % t1) > t1 - abs_inc;
+ n = (abs (final - initial) + t1 - 1) / t1;
+
+ In the case where the loop was preconditioned, a few iterations
+ may have been executed earlier; but 'initial' was adjusted as they
+ were executed, so we don't need anything special for that case here.
+ As above, when t1 is a power of two we don't need to worry about
+ overflow.
The division and modulo operations can be avoided by requiring
that the increment is a power of 2 (precondition_loop_p enforces
copy_rtx (neg_inc ? final_value : initial_value),
NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
- if (abs_inc * loop_info->unroll_number != 1)
+ /* Some code transformations can result in code akin to
+
+ tmp = i + 1;
+ ...
+ goto scan_start;
+ top:
+ tmp = tmp + 1;
+ scan_start:
+ i = tmp;
+ if (i < n) goto top;
+
+ We'll have already detected this form of loop in scan_loop,
+ and set loop->top and loop->scan_start appropriately.
+
+ In this situation, we skip the increment the first time through
+ the loop, which results in an incorrect estimate of the number
+ of iterations. Adjust the difference to compensate. */
+ /* ??? Logically, it would seem this belongs in loop_iterations.
+ However, this causes regressions e.g. on x86 execute/20011008-3.c,
+ so I do not believe we've properly characterized the exact nature
+ of the problem. In the meantime, this fixes execute/20011126-2.c
+ on ia64 and some Ada front end miscompilation on ppc. */
+
+ if (loop->scan_start)
{
- int shift_count;
- rtx extra;
- rtx label;
- unsigned HOST_WIDE_INT limit;
+ rtx iteration_var = loop_info->iteration_var;
+ struct loop_ivs *ivs = LOOP_IVS (loop);
+ struct iv_class *bl;
- shift_count = exact_log2 (abs_inc * loop_info->unroll_number);
- if (shift_count < 0)
+ if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
+ bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
+ else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
+ {
+ struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
+ bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
+ }
+ else
+ /* Iteration var must be an induction variable to get here. */
abort ();
- /* abs (final - initial) / (abs_inc * unroll_number) */
- iterations = expand_simple_binop (GET_MODE (diff), LSHIFTRT,
- diff, GEN_INT (shift_count),
- NULL_RTX, 1,
- OPTAB_LIB_WIDEN);
-
- if (abs_inc != 1)
+ if (INSN_UID (bl->biv->insn) < max_uid_for_loop
+ && INSN_LUID (bl->biv->insn) < INSN_LUID (loop->scan_start))
{
- /* abs (final - initial) % (abs_inc * unroll_number) */
- rtx count = GEN_INT (abs_inc * loop_info->unroll_number - 1);
- extra = expand_simple_binop (GET_MODE (iterations), AND,
- diff, count, NULL_RTX, 1,
- OPTAB_LIB_WIDEN);
-
- /* If (abs (final - initial) % (abs_inc * unroll_number)
- <= abs_inc * (unroll - 1)),
- jump past following increment instruction. */
- label = gen_label_rtx();
- limit = abs_inc * (loop_info->unroll_number - 1);
- emit_cmp_and_jump_insns (extra, GEN_INT (limit),
- limit == 0 ? EQ : LEU, NULL_RTX,
- GET_MODE (extra), 0, 0, label);
- JUMP_LABEL (get_last_insn ()) = label;
- LABEL_NUSES (label)++;
-
- /* Increment the iteration count by one. */
- iterations = expand_simple_binop (GET_MODE (iterations), PLUS,
- iterations, GEN_INT (1),
- iterations, 1,
- OPTAB_LIB_WIDEN);
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Basic induction var skips initial incr.\n");
- emit_label (label);
+ diff = expand_simple_binop (mode, PLUS, diff, GEN_INT (abs_inc),
+ diff, unsigned_p, OPTAB_LIB_WIDEN);
}
}
- else
- iterations = diff;
+
+ abs_loop_inc = abs_inc * loop_info->unroll_number;
+ if (abs_loop_inc != 1)
+ {
+ int shift_count;
+
+ shift_count = exact_log2 (abs_loop_inc);
+ if (shift_count < 0)
+ abort ();
+
+ /* (abs (final - initial) + abs_inc * unroll_number - 1) */
+ diff = expand_simple_binop (GET_MODE (diff), PLUS,
+ diff, GEN_INT (abs_loop_inc - 1),
+ diff, 1, OPTAB_LIB_WIDEN);
+
+ /* (abs (final - initial) + abs_inc * unroll_number - 1)
+ / (abs_inc * unroll_number) */
+ diff = expand_simple_binop (GET_MODE (diff), LSHIFTRT,
+ diff, GEN_INT (shift_count),
+ diff, 1, OPTAB_LIB_WIDEN);
+ }
+ iterations = diff;
/* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
style loop, with a loop exit test at the start. Thus, we can
iteration count to one if necessary. */
if (! loop->vtop)
{
- rtx label;
-
if (loop_dump_stream)
fprintf (loop_dump_stream, "Doloop: Do-while loop.\n");
- /* A `do-while' loop must iterate at least once. If the
- iteration count is bogus, we set the iteration count to 1.
+ /* A `do-while' loop must iterate at least once. For code like
+ i = initial; do { ... } while (++i < final);
+ we will calculate a bogus iteration count if initial > final.
+ So detect this and set the iteration count to 1.
Note that if the loop has been unrolled, then the loop body
- is guaranteed to execute at least once. */
- if (loop_info->unroll_number == 1)
+ is guaranteed to execute at least once. Also, when the
+ comparison is NE, our calculated count will be OK. */
+ if (loop_info->unroll_number == 1 && comparison_code != NE)
{
+ rtx label;
+
/* Emit insns to test if the loop will immediately
terminate and to set the iteration count to 1 if true. */
label = gen_label_rtx();
emit_cmp_and_jump_insns (copy_rtx (initial_value),
copy_rtx (loop_info->comparison_value),
- comparison_code, NULL_RTX, mode, 0, 0,
+ comparison_code, NULL_RTX, mode, 0,
label);
JUMP_LABEL (get_last_insn ()) = label;
LABEL_NUSES (label)++;
}
}
- sequence = gen_sequence ();
+ sequence = get_insns ();
end_sequence ();
emit_insn_before (sequence, loop->start);
suitable. We distinguish between loops with compile-time bounds
and those with run-time bounds. Information from LOOP is used to
compute the number of iterations and to determine whether the loop
- is a candidate for this optimization. Returns non-zero if loop
+ is a candidate for this optimization. Returns nonzero if loop
successfully modified. */
int
-doloop_optimize (loop)
- const struct loop *loop;
+doloop_optimize (const struct loop *loop)
{
struct loop_info *loop_info = LOOP_INFO (loop);
rtx initial_value;
&increment, &mode))
{
if (loop_dump_stream)
- fprintf (loop_dump_stream,
+ fprintf (loop_dump_stream,
"Doloop: Cannot precondition loop.\n");
return 0;
}
return 0;
}
- /* A raw define_insn may yield a plain pattern. If a sequence
- was involved, the last must be the jump instruction. */
- if (GET_CODE (doloop_seq) == SEQUENCE)
+ /* If multiple instructions were created, the last must be the
+ jump instruction. Also, a raw define_insn may yield a plain
+ pattern. */
+ doloop_pat = doloop_seq;
+ if (INSN_P (doloop_pat))
{
- doloop_pat = XVECEXP (doloop_seq, 0, XVECLEN (doloop_seq, 0) - 1);
+ while (NEXT_INSN (doloop_pat) != NULL_RTX)
+ doloop_pat = NEXT_INSN (doloop_pat);
if (GET_CODE (doloop_pat) == JUMP_INSN)
doloop_pat = PATTERN (doloop_pat);
else
doloop_pat = NULL_RTX;
}
- else
- doloop_pat = doloop_seq;
if (! doloop_pat
|| ! (condition = doloop_condition_get (doloop_pat)))