/* Loop unrolling and peeling.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
+Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
for more details.
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
the accumulator. If REUSE_EXPANSION is 0 reuse
the original accumulator. Else use
var_expansions[REUSE_EXPANSION - 1]. */
+ unsigned accum_pos; /* The position in which the accumulator is placed in
+ the insn src. For example in x = x + something
+ accum_pos is 0 while in x = something + x accum_pos
+ is 1. */
};
/* Information about optimization applied in
basic_block loop_preheader; /* The loop preheader basic block. */
};
-static void decide_unrolling_and_peeling (struct loops *, int);
-static void peel_loops_completely (struct loops *, int);
+static void decide_unrolling_and_peeling (int);
+static void peel_loops_completely (int);
static void decide_peel_simple (struct loop *, int);
static void decide_peel_once_rolling (struct loop *, int);
static void decide_peel_completely (struct loop *, int);
static void decide_unroll_stupid (struct loop *, int);
static void decide_unroll_constant_iterations (struct loop *, int);
static void decide_unroll_runtime_iterations (struct loop *, int);
-static void peel_loop_simple (struct loops *, struct loop *);
-static void peel_loop_completely (struct loops *, struct loop *);
-static void unroll_loop_stupid (struct loops *, struct loop *);
-static void unroll_loop_constant_iterations (struct loops *, struct loop *);
-static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
+static void peel_loop_simple (struct loop *);
+static void peel_loop_completely (struct loop *);
+static void unroll_loop_stupid (struct loop *);
+static void unroll_loop_constant_iterations (struct loop *);
+static void unroll_loop_runtime_iterations (struct loop *);
static struct opt_info *analyze_insns_in_loop (struct loop *);
static void opt_info_start_duplication (struct opt_info *);
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
-unroll_and_peel_loops (struct loops *loops, int flags)
+unroll_and_peel_loops (int flags)
{
- struct loop *loop, *next;
+ struct loop *loop;
bool check;
+ loop_iterator li;
/* First perform complete loop peeling (it is almost surely a win,
and affects parameters for further decision a lot). */
- peel_loops_completely (loops, flags);
+ peel_loops_completely (flags);
/* Now decide rest of unrolling and peeling. */
- decide_unrolling_and_peeling (loops, flags);
-
- loop = loops->tree_root;
- while (loop->inner)
- loop = loop->inner;
+ decide_unrolling_and_peeling (flags);
/* Scan the loops, inner ones first. */
- while (loop != loops->tree_root)
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
- if (loop->next)
- {
- next = loop->next;
- while (next->inner)
- next = next->inner;
- }
- else
- next = loop->outer;
-
check = true;
/* And perform the appropriate transformations. */
switch (loop->lpt_decision.decision)
/* Already done. */
gcc_unreachable ();
case LPT_PEEL_SIMPLE:
- peel_loop_simple (loops, loop);
+ peel_loop_simple (loop);
break;
case LPT_UNROLL_CONSTANT:
- unroll_loop_constant_iterations (loops, loop);
+ unroll_loop_constant_iterations (loop);
break;
case LPT_UNROLL_RUNTIME:
- unroll_loop_runtime_iterations (loops, loop);
+ unroll_loop_runtime_iterations (loop);
break;
case LPT_UNROLL_STUPID:
- unroll_loop_stupid (loops, loop);
+ unroll_loop_stupid (loop);
break;
case LPT_NONE:
check = false;
{
#ifdef ENABLE_CHECKING
verify_dominators (CDI_DOMINATORS);
- verify_loop_structure (loops);
+ verify_loop_structure ();
#endif
}
- loop = next;
}
iv_analysis_done ();
return true;
}
-/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
+/* Depending on FLAGS, check whether to peel loops completely and do so. */
static void
-peel_loops_completely (struct loops *loops, int flags)
+peel_loops_completely (int flags)
{
struct loop *loop;
- unsigned i;
+ loop_iterator li;
/* Scan the loops, the inner ones first. */
- for (i = loops->num - 1; i > 0; i--)
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
- loop = loops->parray[i];
- if (!loop)
- continue;
-
loop->lpt_decision.decision = LPT_NONE;
if (dump_file)
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
{
- peel_loop_completely (loops, loop);
+ peel_loop_completely (loop);
#ifdef ENABLE_CHECKING
verify_dominators (CDI_DOMINATORS);
- verify_loop_structure (loops);
+ verify_loop_structure ();
#endif
}
}
}
-/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
+/* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
static void
-decide_unrolling_and_peeling (struct loops *loops, int flags)
+decide_unrolling_and_peeling (int flags)
{
- struct loop *loop = loops->tree_root, *next;
-
- while (loop->inner)
- loop = loop->inner;
+ struct loop *loop;
+ loop_iterator li;
/* Scan the loops, inner ones first. */
- while (loop != loops->tree_root)
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
- if (loop->next)
- {
- next = loop->next;
- while (next->inner)
- next = next->inner;
- }
- else
- next = loop->outer;
-
loop->lpt_decision.decision = LPT_NONE;
if (dump_file)
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, cold area\n");
- loop = next;
continue;
}
if (dump_file)
fprintf (dump_file,
";; Not considering loop, cannot duplicate\n");
- loop = next;
continue;
}
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, is not innermost\n");
- loop = next;
continue;
}
decide_unroll_stupid (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_peel_simple (loop, flags);
-
- loop = next;
}
}
body; i++;
*/
static void
-peel_loop_completely (struct loops *loops, struct loop *loop)
+peel_loop_completely (struct loop *loop)
{
sbitmap wont_exit;
unsigned HOST_WIDE_INT npeel;
- unsigned n_remove_edges, i;
- edge *remove_edges, ein;
+ unsigned i;
+ VEC (edge, heap) *remove_edges;
+ edge ein;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct opt_info *opt_info = NULL;
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
- remove_edges = XCNEWVEC (edge, npeel);
- n_remove_edges = 0;
+ remove_edges = NULL;
if (flag_split_ivs_in_unroller)
opt_info = analyze_insns_in_loop (loop);
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, npeel,
+ npeel,
wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL
| (opt_info
}
/* Remove the exit edges. */
- for (i = 0; i < n_remove_edges; i++)
- remove_path (loops, remove_edges[i]);
- free (remove_edges);
+ for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
+ remove_path (ein);
+ VEC_free (edge, heap, remove_edges);
}
ein = desc->in_edge;
/* Now remove the unreachable part of the last iteration and cancel
the loop. */
- remove_path (loops, ein);
+ remove_path (ein);
if (dump_file)
fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
}
*/
static void
-unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
+unroll_loop_constant_iterations (struct loop *loop)
{
unsigned HOST_WIDE_INT niter;
unsigned exit_mod;
sbitmap wont_exit;
- unsigned n_remove_edges, i;
- edge *remove_edges;
+ unsigned i;
+ VEC (edge, heap) *remove_edges;
+ edge e;
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
wont_exit = sbitmap_alloc (max_unroll + 1);
sbitmap_ones (wont_exit);
- remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
- n_remove_edges = 0;
+ remove_edges = NULL;
if (flag_split_ivs_in_unroller
|| flag_variable_expansion_in_unroller)
opt_info = analyze_insns_in_loop (loop);
{
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, exit_mod,
+ exit_mod,
wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| (opt_info && exit_mod > 1
? DLTHE_RECORD_COPY_NUMBER
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, exit_mod + 1,
+ exit_mod + 1,
wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| (opt_info && exit_mod > 0
? DLTHE_RECORD_COPY_NUMBER
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- loops, max_unroll,
+ max_unroll,
wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
desc->niter_expr = GEN_INT (desc->niter);
/* Remove the edges. */
- for (i = 0; i < n_remove_edges; i++)
- remove_path (loops, remove_edges[i]);
- free (remove_edges);
+ for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ remove_path (e);
+ VEC_free (edge, heap, remove_edges);
if (dump_file)
fprintf (dump_file,
return NULL;
bb = split_edge (e);
emit_insn_after (insns, BB_END (bb));
- bb->flags |= BB_SUPERBLOCK;
+
+ /* ??? We used to assume that INSNS can contain control flow insns, and
+ that we had to try to find sub basic blocks in BB to maintain a valid
+ CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
+ and call break_superblocks when going out of cfglayout mode. But it
+ turns out that this never happens; and that if it does ever happen,
+ the verify_flow_info call in loop_optimizer_finalize would fail.
+
+ There are two reasons why we expected we could have control flow insns
+ in INSNS. The first is when a comparison has to be done in parts, and
+ the second is when the number of iterations is computed for loops with
+ the number of iterations known at runtime. In both cases, test cases
+ to get control flow in INSNS appear to be impossible to construct:
+
+ * If do_compare_rtx_and_jump needs several branches to do comparison
+ in a mode that needs comparison by parts, we cannot analyze the
+ number of iterations of the loop, and we never get to unrolling it.
+
+ * The code in expand_divmod that was suspected to cause creation of
+ branching code seems to be only accessed for signed division. The
+ divisions used by # of iterations analysis are always unsigned.
+ Problems might arise on architectures that emits branching code
+ for some operations that may appear in the unroller (especially
+ for division), but we have no such architectures.
+
+ Considering all this, it was decided that we should for now assume
+ that INSNS can in theory contain control flow insns, but in practice
+ it never does. So we don't handle the theoretical case, and should
+ a real failure ever show up, we have a pretty good clue for how to
+ fix it. */
+
return bb;
}
}
*/
static void
-unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
+unroll_loop_runtime_iterations (struct loop *loop)
{
rtx old_niter, niter, init_code, branch_code, tmp;
unsigned i, j, p;
- basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
- unsigned n_dom_bbs;
+ basic_block preheader, *body, swtch, ezc_swtch;
+ VEC (basic_block, heap) *dom_bbs;
sbitmap wont_exit;
int may_exit_copy;
- unsigned n_peel, n_remove_edges;
- edge *remove_edges, e;
+ unsigned n_peel;
+ VEC (edge, heap) *remove_edges;
+ edge e;
bool extra_zero_check, last_may_exit;
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
opt_info = analyze_insns_in_loop (loop);
/* Remember blocks whose dominators will have to be updated. */
- dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
- n_dom_bbs = 0;
+ dom_bbs = NULL;
body = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
- unsigned nldom;
- basic_block *ldom;
+ VEC (basic_block, heap) *ldom;
+ basic_block bb;
- nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
- for (j = 0; j < nldom; j++)
- if (!flow_bb_inside_loop_p (loop, ldom[j]))
- dom_bbs[n_dom_bbs++] = ldom[j];
+ ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
+ for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
+ if (!flow_bb_inside_loop_p (loop, bb))
+ VEC_safe_push (basic_block, heap, dom_bbs, bb);
- free (ldom);
+ VEC_free (basic_block, heap, ldom);
}
free (body);
init_code = get_insns ();
end_sequence ();
+ unshare_all_rtl_in_chain (init_code);
/* Precondition the loop. */
split_edge_and_insert (loop_preheader_edge (loop), init_code);
- remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
- n_remove_edges = 0;
+ remove_edges = NULL;
wont_exit = sbitmap_alloc (max_unroll + 2);
SET_BIT (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, 1,
- wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ 1, wont_exit, desc->out_edge,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
gcc_assert (ok);
if (i != n_peel - 1 || !last_may_exit)
SET_BIT (wont_exit, 1);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, 1,
- wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ 1, wont_exit, desc->out_edge,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ);
gcc_assert (ok);
}
/* Recount dominators for outer blocks. */
- iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
+ iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
/* And unroll loop. */
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- loops, max_unroll,
+ max_unroll,
wont_exit, desc->out_edge,
- remove_edges, &n_remove_edges,
+ &remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
}
/* Remove the edges. */
- for (i = 0; i < n_remove_edges; i++)
- remove_path (loops, remove_edges[i]);
- free (remove_edges);
+ for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
+ remove_path (e);
+ VEC_free (edge, heap, remove_edges);
/* We must be careful when updating the number of iterations due to
preconditioning and the fact that the value must be valid at entry
"in runtime, %i insns\n",
max_unroll, num_loop_insns (loop));
- if (dom_bbs)
- free (dom_bbs);
+ VEC_free (basic_block, heap, dom_bbs);
}
/* Decide whether to simply peel LOOP and how much. */
end: ;
*/
static void
-peel_loop_simple (struct loops *loops, struct loop *loop)
+peel_loop_simple (struct loop *loop)
{
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, npeel, wont_exit,
- NULL, NULL,
+ npeel, wont_exit, NULL,
NULL, DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
}
*/
static void
-unroll_loop_stupid (struct loops *loops, struct loop *loop)
+unroll_loop_stupid (struct loop *loop)
{
sbitmap wont_exit;
unsigned nunroll = loop->lpt_decision.times;
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- loops, nunroll, wont_exit,
- NULL, NULL, NULL,
+ nunroll, wont_exit,
+ NULL, NULL,
DLTHE_FLAG_UPDATE_FREQ
| (opt_info
? DLTHE_RECORD_COPY_NUMBER
static hashval_t
si_info_hash (const void *ivts)
{
- return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
+ return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
}
/* An equality functions for information about insns to split. */
static hashval_t
ve_info_hash (const void *ves)
{
- return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
+ return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
}
/* Return true if IVTS1 and IVTS2 (which are really both of type
static struct var_to_expand *
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
{
- rtx set, dest, src, op1;
+ rtx set, dest, src, op1, op2, something;
struct var_to_expand *ves;
enum machine_mode mode1, mode2;
-
+ unsigned accum_pos;
+
set = single_set (insn);
if (!set)
return NULL;
if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
return NULL;
- if (!XEXP (src, 0))
- return NULL;
-
op1 = XEXP (src, 0);
+ op2 = XEXP (src, 1);
if (!REG_P (dest)
&& !(GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest))))
return NULL;
- if (!rtx_equal_p (dest, op1))
- return NULL;
-
+ if (rtx_equal_p (dest, op1))
+ accum_pos = 0;
+ else if (rtx_equal_p (dest, op2))
+ accum_pos = 1;
+ else
+ return NULL;
+
+ /* The method of expansion that we are using; which includes
+ the initialization of the expansions with zero and the summation of
+ the expansions at the end of the computation will yield wrong results
+ for (x = something - x) thus avoid using it in that case. */
+ if (accum_pos == 1
+ && GET_CODE (src) == MINUS)
+ return NULL;
+
+ something = (accum_pos == 0)? op2 : op1;
+
if (!referenced_in_one_insn_in_loop_p (loop, dest))
return NULL;
- if (rtx_referenced_p (dest, XEXP (src, 1)))
+ if (rtx_referenced_p (dest, something))
return NULL;
mode1 = GET_MODE (dest);
- mode2 = GET_MODE (XEXP (src, 1));
+ mode2 = GET_MODE (something);
if ((FLOAT_MODE_P (mode1)
|| FLOAT_MODE_P (mode2))
- && !flag_unsafe_math_optimizations)
+ && !flag_associative_math)
return NULL;
-
+
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "\n;; Expanding Accumulator ");
+ print_rtl (dump_file, dest);
+ fprintf (dump_file, "\n");
+ }
+
/* Record the accumulator to expand. */
ves = XNEW (struct var_to_expand);
ves->insn = insn;
ves->op = GET_CODE (src);
ves->expansion_count = 0;
ves->reuse_expansion = 0;
+ ves->accum_pos = accum_pos;
return ves;
}
new_reg = get_expansion (ve);
validate_change (insn, &SET_DEST (set), new_reg, 1);
- validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
+ validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
if (apply_change_group ())
if (really_new_expansion)
/* Initialize the variable expansions in loop preheader.
Callbacks for htab_traverse. PLACE_P is the loop-preheader
basic block where the initialization of the expansions
- should take place. */
+ should take place. The expansions are initialized with (-0)
+ when the operation is plus or minus to honor sign zero.
+ This way we can prevent cases where the sign of the final result is
+ effected by the sign of the expansion.
+ Here is an example to demonstrate this:
+
+ for (i = 0 ; i < n; i++)
+ sum += something;
+
+ ==>
+
+ sum += something
+ ....
+ i = i+1;
+ sum1 += something
+ ....
+ i = i+1
+ sum2 += something;
+ ....
+
+ When SUM is initialized with -zero and SOMETHING is also -zero; the
+ final result of sum should be -zero thus the expansions sum1 and sum2
+ should be initialized with -zero as well (otherwise we will get +zero
+ as the final result). */
static int
insert_var_expansion_initialization (void **slot, void *place_p)
basic_block place = (basic_block)place_p;
rtx seq, var, zero_init, insn;
unsigned i;
-
+ enum machine_mode mode = GET_MODE (ve->reg);
+ bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
+
if (VEC_length (rtx, ve->var_expansions) == 0)
return 1;
if (ve->op == PLUS || ve->op == MINUS)
for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
{
- zero_init = CONST0_RTX (GET_MODE (var));
+ if (honor_signed_zero_p)
+ zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
+ else
+ zero_init = CONST0_RTX (mode);
+
emit_move_insn (var, zero_init);
}
else if (ve->op == MULT)