+2003-02-26 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
+
+ * cse.c (count_reg_usage): Fix handling of REG_EQUAL notes.
+
+ * Makefile.in (loop-unroll.o): New.
+ * cfgloop.h (UAP_PEEL, UAP_UNROLL, UAP_UNROLL_ALL): New.
+ (unroll_and_peel_loops): Declare.
+ * alias.c (init_alias_analysis): Flag_unroll_loops renamed to
+ flag_old_unroll_loops.
+ * loop.c (loop_invariant_p): Ditto.
+ * unroll.c (unroll_loop): Flag_unroll_all_loops renamed to
+ flag_old_unroll_all_loops.
+ * flags.h (flag_unroll_loops): Renamed to flag_old_unroll_loops.
+ (flag_unroll_all_loops): Renamed to flag_old_unroll_all_loops.
+ * params.def (PARAM_MAX_UNROLLED_INSNS): Default value changed.
+ (PARAM_MAX_AVERAGE_UNROLLED_INSNS, PARAM_MAX_UNROLL_TIMES,
+ PARAM_MAX_PEELED_INSNS, PARAM_MAX_PEEL_TIMES,
+ PARAM_MAX_COMPLETELY_PEELED_INSNS, PARAM_MAX_COMPLETELY_PEEL_TIMES,
+ PARAM_MAX_ONCE_PEELED_INSNS): New.
+ * toplev.h (flag_old_unroll_loops, flag_old_unroll_all_loops): New.
+ (flag_unroll_loops, flag_unroll_all_loops): Used for new unroller
+ instead of old one.
+ (flag_peel_loops): New.
+ (lang_independent_options): The new flags added.
+ (rest_of_compilation): Call new unroller.
+ (process_options): Setup flags for coexistence of old and new unroller.
+ * doc/invoke.texi: Document new options.
+ * doc/passes.texi: Document new unroller pass.
+
2003-02-26 David Billinghurst <David.Billinghurst@riotinto.com>
* fixinc/fixincl.x: Regenerate
TCFLAGS =
CFLAGS = -g
STAGE1_CFLAGS = -g @stage1_cflags@
-BOOT_CFLAGS = -g -O2
+BOOT_CFLAGS = -g -O2 -funroll-loops -fpeel-loops
# Flags to determine code coverage. When coverage is disabled, this will
# contain the optimization flags, as you normally want code coverage
OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
- cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o \
+ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
cfgrtl.o combine.o conflict.o convert.o cse.o cselib.o dbxout.o \
debug.o df.o diagnostic.o doloop.o dominance.o \
dwarf2asm.o dwarf2out.o dwarfout.o emit-rtl.o except.o explow.o \
loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \
output.h $(EXPR_H) coretypes.h $(TM_H)
+loop-unroll.o: loop-unroll.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
+ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \
+ output.h $(EXPR_H) coretypes.h $(TM_H)
dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
new_reg_base_value = (rtx *) xmalloc (reg_base_value_size * sizeof (rtx));
reg_seen = (char *) xmalloc (reg_base_value_size);
- if (! reload_completed && flag_unroll_loops)
+ if (! reload_completed && flag_old_unroll_loops)
{
/* ??? Why are we realloc'ing if we're just going to zero it? */
alias_invariant = (rtx *)xrealloc (alias_invariant,
/* Optimization passes. */
extern void unswitch_loops PARAMS ((struct loops *));
+enum
+{
+ UAP_PEEL = 1, /* Enables loop peeling. */
+ UAP_UNROLL = 2, /* Enables peeling of loops if it seems profitable. */
+ UAP_UNROLL_ALL = 4 /* Enables peeling of all loops. */
+};
+
+extern void unroll_and_peel_loops PARAMS ((struct loops *, int));
int incr;
{
enum rtx_code code;
+ rtx note;
const char *fmt;
int i, j;
/* Things used in a REG_EQUAL note aren't dead since loop may try to
use them. */
- count_reg_usage (REG_NOTES (x), counts, NULL_RTX, incr);
+ note = find_reg_equal_equiv_note (x);
+ if (note)
+ count_reg_usage (XEXP (note, 0), counts, NULL_RTX, incr);
return;
- case EXPR_LIST:
case INSN_LIST:
- if (REG_NOTE_KIND (x) == REG_EQUAL
- || (REG_NOTE_KIND (x) != REG_NONNEG && GET_CODE (XEXP (x,0)) == USE))
- count_reg_usage (XEXP (x, 0), counts, NULL_RTX, incr);
- count_reg_usage (XEXP (x, 1), counts, NULL_RTX, incr);
- return;
+ abort ();
default:
break;
-fsched2-use-traces -fsignaling-nans @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
--funit-at-a-time -funroll-all-loops -funroll-loops -funswitch-loops @gol
+-funroll-all-loops -funroll-loops -fpeel-loops -funswitch-loops @gol
+-fold-unroll-loops -fold-unroll-all-loops @gol
--param @var{name}=@var{value}
-O -O0 -O1 -O2 -O3 -Os}
@item -funroll-loops
@opindex funroll-loops
-Unroll loops whose number of iterations can be determined at compile
-time or upon entry to the loop. @option{-funroll-loops} implies both
-@option{-fstrength-reduce} and @option{-frerun-cse-after-loop}. This
-option makes code larger, and may or may not make it run faster.
+Unroll loops whose number of iterations can be determined at compile time or
+upon entry to the loop. @option{-funroll-loops} implies
+@option{-frerun-cse-after-loop}. It also turns on complete loop peeling
+(i.e. complete removal of loops with small constant number of iterations).
+This option makes code larger, and may or may not make it run faster.
@item -funroll-all-loops
@opindex funroll-all-loops
Unroll all loops, even if their number of iterations is uncertain when
the loop is entered. This usually makes programs run more slowly.
@option{-funroll-all-loops} implies the same options as
-@option{-funroll-loops},
+@option{-funroll-loops}.
+
+@item -fpeel-loops
+@opindex fpeel-loops
+Peels the loops for that there is enough information that they do not
+roll much (from profile feedback). It also turns on complete loop peeling
+(i.e. complete removal of loops with small constant number of iterations).
+
+@item -funswitch-loops
+@opindex funswitch-loops
+Move branches with loop invariant conditions out of the loop, with duplicates
+of the loop on both branches (modified according to result of the condition).
+
+@item -fold-unroll-loops
+@opindex fold-unroll-loops
+Unroll loops whose number of iterations can be determined at compile
+time or upon entry to the loop, using the old loop unroller whose loop
+recognition is based on notes from frontend. @option{-fold-unroll-loops} implies
+both @option{-fstrength-reduce} and @option{-frerun-cse-after-loop}. This
+option makes code larger, and may or may not make it run faster.
+
+@item -fold-unroll-all-loops
+@opindex fold-unroll-all-loops
+Unroll all loops, even if their number of iterations is uncertain when
+the loop is entered. This is done using the old loop unroller whose loop
+recognition is based on notes from frontend. This usually makes programs run more slowly.
+@option{-fold-unroll-all-loops} implies the same options as
+@option{-fold-unroll-loops}.
+
+@item -funswitch-loops
+@opindex funswitch-loops
+Move branches with loop invariant conditions out of the loop, with duplicates
+of the loop on both branches (modified according to result of the condition).
@item -funswitch-loops
@opindex funswitch-loops
is unrolled, and if the loop is unrolled, it determines how many times
the loop code is unrolled.
+@item max-average-unrolled-insns
+The maximum number of instructions biased by probabilities of their execution
+that a loop should have if that loop is unrolled, and if the loop is unrolled,
+it determines how many times the loop code is unrolled.
+
+@item max-unroll-times
+The maximum number of unrollings of a single loop.
+
+@item max-peeled-insns
+The maximum number of instructions that a loop should have if that loop
+is peeled, and if the loop is peeled, it determines how many times
+the loop code is peeled.
+
+@item max-peel-times
+The maximum number of peelings of a single loop.
+
+@item max-completely-peeled-insns
+The maximum number of insns of a completely peeled loop.
+
+@item max-completely-peel-times
+The maximum number of iterations of a loop to be suitable for complete peeling.
+
@item max-unswitch-insns
The maximum number of insns of an unswitched loop.
Loop dependency analysis routines are contained in @file{dependence.c}.
Second loop optimization pass takes care of basic block level optimalizations --
-unswitching loops. The source files are
+unrolling, peeling and unswitching loops. The source files are
@file{cfgloopanal.c} and @file{cfgloopmanip.c} containing generic loop
analysis and manipulation code, @file{loop-init.c} with initialization and
-finalization code, @file{loop-unswitch.c} for loop unswitching.
+finalization code, @file{loop-unswitch.c} for loop unswitching and
+@file{loop-unroll.c} for loop unrolling and peeling.
@opindex dL
The option @option{-dL} causes a debugging dump of the RTL code after
UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
unrolled. */
-extern int flag_unroll_loops;
+extern int flag_old_unroll_loops;
/* Nonzero enables loop unrolling in unroll.c. All loops are unrolled.
This is generally not a win. */
-extern int flag_unroll_all_loops;
+extern int flag_old_unroll_all_loops;
/* Nonzero forces all invariant computations in loops to be moved
outside the loop. */
We don't know the loop bounds here though, so just fail for all
labels. */
- if (flag_unroll_loops)
+ if (flag_old_unroll_loops)
return 0;
else
return 1;
DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
"max-unrolled-insns",
"The maximum number of instructions to consider to unroll in a loop",
- 100)
+ 200)
+/* This parameter limits how many times the loop is unrolled depending
+ on number of insns really executed in each iteration. */
+DEFPARAM(PARAM_MAX_AVERAGE_UNROLLED_INSNS,
+ "max-average-unrolled-insns",
+ "The maximum number of instructions to consider to unroll in a loop on average",
+ 80)
+/* The maximum number of unrollings of a single loop. */
+DEFPARAM(PARAM_MAX_UNROLL_TIMES,
+ "max-unroll-times",
+ "The maximum number of unrollings of a single loop",
+ 8)
+/* The maximum number of insns of a peeled loop. */
+DEFPARAM(PARAM_MAX_PEELED_INSNS,
+ "max-peeled-insns",
+ "The maximum number of insns of a peeled loop",
+ 120)
+/* The maximum number of peelings of a single loop. */
+DEFPARAM(PARAM_MAX_PEEL_TIMES,
+ "max-peel-times",
+ "The maximum number of peelings of a single loop",
+ 16)
+/* The maximum number of insns of a peeled loop. */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
+ "max-completely-peeled-insns",
+ "The maximum number of insns of a completely peeled loop",
+ 120)
+/* The maximum number of peelings of a single loop that is peeled completely. */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
+ "max-completely-peel-times",
+ "The maximum number of peelings of a single loop that is peeled completely",
+ 16)
+/* The maximum number of insns of a peeled loop that rolls only once. */
+DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS,
+ "max-once-peeled-insns",
+ "The maximum number of insns of a peeled loop that rolls only once",
+ 200)
/* The maximum number of insns of an unswitched loop. */
DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,
+2003-02-26 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
+
+ * lib/scanasm.exp: Add support for counting numbers of
+ occurences.
+ * gcc.dg/unswitch-1.c, gcc.dg/peel-1.c, gcc.dg/unroll-1.c,
+ gcc.dg/unroll-2.c, gcc.dg/unroll-3.c: New tests.
+
2003-02-25 Mark Mitchell <mark@codesourcery.com>
PR c++/9683
}
}
+# Call pass if pattern is present given number of times, otherwise fail.
+proc scan-assembler-times { args } {
+ if { [llength $args] < 2 } {
+ error "scan-assembler: too few arguments"
+ return
+ }
+ if { [llength $args] > 3 } {
+ error "scan-assembler: too many arguments"
+ return
+ }
+ if { [llength $args] >= 3 } {
+ switch [dg-process-target [lindex $args 2]] {
+ "S" { }
+ "N" { return }
+ "F" { error "scan-assembler: `xfail' not allowed here" }
+ "P" { error "scan-assembler: `xfail' not allowed here" }
+ }
+ }
+
+ # This assumes that we are two frames down from dg-test, and that
+ # it still stores the filename of the testcase in a local variable "name".
+ # A cleaner solution would require a new dejagnu release.
+ upvar 2 name testcase
+
+ # This must match the rule in gcc-dg.exp.
+ set output_file "[file rootname [file tail $testcase]].s"
+
+ set fd [open $output_file r]
+ set text [read $fd]
+ close $fd
+
+ if { [llength [regexp -inline -all -- [lindex $args 0] $text]] == [lindex $args 1]} {
+ pass "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+ } else {
+ fail "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+ }
+}
+
# Call pass if pattern is not present, otherwise fail.
proc scan-assembler-not { args } {
if { [llength $args] < 1 } {
UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
unrolled. */
-int flag_unroll_loops;
+int flag_old_unroll_loops;
/* Nonzero enables loop unrolling in unroll.c. All loops are unrolled.
This is generally not a win. */
+int flag_old_unroll_all_loops;
+
+/* Enables unrolling of simple loops in loop-unroll.c. */
+int flag_unroll_loops;
+
+/* Enables unrolling of all loops in loop-unroll.c. */
int flag_unroll_all_loops;
+/* Nonzero enables loop peeling. */
+int flag_peel_loops;
+
/* Nonzero enables loop unswitching. */
int flag_unswitch_loops;
N_("Perform loop unrolling when iteration count is known") },
{"unroll-all-loops", &flag_unroll_all_loops, 1,
N_("Perform loop unrolling for all loops") },
+ {"old-unroll-loops", &flag_old_unroll_loops, 1,
+ N_("Perform loop unrolling when iteration count is known") },
+ {"old-unroll-all-loops", &flag_old_unroll_all_loops, 1,
+ N_("Perform loop unrolling for all loops") },
+ {"peel-loops", &flag_peel_loops, 1,
+ N_("Perform loop peeling") },
{"unswitch-loops", &flag_unswitch_loops, 1,
N_("Perform loop unswitching") },
{"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
/* CFG is no longer maintained up-to-date. */
free_bb_for_insn ();
- do_unroll = flag_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
+ if (flag_unroll_loops)
+ do_unroll = 0; /* Having two unrollers is useless. */
+ else
+ do_unroll = flag_old_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt)
{
/* Perform loop optimalizations. It might be better to do them a bit
sooner, but we want the profile feedback to work more efficiently. */
if (optimize > 0
- && flag_unswitch_loops)
+ && (flag_unswitch_loops
+ || flag_peel_loops
+ || flag_unroll_loops))
{
struct loops *loops;
timevar_push (TV_LOOP);
if (flag_unswitch_loops)
unswitch_loops (loops);
+ if (flag_peel_loops || flag_unroll_loops)
+ unroll_and_peel_loops (loops,
+ (flag_peel_loops ? UAP_PEEL : 0) |
+ (flag_unroll_loops ? UAP_UNROLL : 0) |
+ (flag_unroll_all_loops ? UAP_UNROLL_ALL : 0));
+
loop_optimizer_finalize (loops, rtl_dump_file);
}
be done. */
if (flag_unroll_all_loops)
flag_unroll_loops = 1;
- /* Loop unrolling requires that strength_reduction be on also. Silently
+
+ if (flag_unroll_loops)
+ {
+ flag_old_unroll_loops = 0;
+ flag_old_unroll_all_loops = 0;
+ }
+
+ if (flag_old_unroll_all_loops)
+ flag_old_unroll_loops = 1;
+
+ /* Old loop unrolling requires that strength_reduction be on also. Silently
turn on strength reduction here if it isn't already on. Also, the loop
unrolling code assumes that cse will be run after loop, so that must
be turned on also. */
- if (flag_unroll_loops)
+ if (flag_old_unroll_loops)
{
flag_strength_reduce = 1;
flag_rerun_cse_after_loop = 1;
}
+ if (flag_unroll_loops || flag_peel_loops)
+ flag_rerun_cse_after_loop = 1;
if (flag_non_call_exceptions)
flag_asynchronous_unwind_tables = 1;
/* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
the loop unless all loops are being unrolled. */
- if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
+ if (unroll_type == UNROLL_NAIVE && ! flag_old_unroll_all_loops)
{
if (loop_dump_stream)
fprintf (loop_dump_stream,