/* Loop unrolling and peeling.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
+Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
for more details.
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
the accumulator. If REUSE_EXPANSION is 0 reuse
the original accumulator. Else use
var_expansions[REUSE_EXPANSION - 1]. */
+ unsigned accum_pos; /* The position in which the accumulator is placed in
+ the insn src. For example in x = x + something
+ accum_pos is 0 while in x = something + x accum_pos
+ is 1. */
};
/* Information about optimization applied in
fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
/* Do not peel cold areas. */
- if (!maybe_hot_bb_p (loop->header))
+ if (optimize_loop_for_size_p (loop))
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, cold area\n");
}
/* Do not peel cold areas. */
- if (!maybe_hot_bb_p (loop->header))
+ if (optimize_loop_for_size_p (loop))
{
if (dump_file)
fprintf (dump_file, ";; Not considering loop, cold area\n");
{
rtx old_niter, niter, init_code, branch_code, tmp;
unsigned i, j, p;
- basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
- unsigned n_dom_bbs;
+ basic_block preheader, *body, swtch, ezc_swtch;
+ VEC (basic_block, heap) *dom_bbs;
sbitmap wont_exit;
int may_exit_copy;
unsigned n_peel;
opt_info = analyze_insns_in_loop (loop);
/* Remember blocks whose dominators will have to be updated. */
- dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
- n_dom_bbs = 0;
+ dom_bbs = NULL;
body = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
- unsigned nldom;
- basic_block *ldom;
+ VEC (basic_block, heap) *ldom;
+ basic_block bb;
- nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
- for (j = 0; j < nldom; j++)
- if (!flow_bb_inside_loop_p (loop, ldom[j]))
- dom_bbs[n_dom_bbs++] = ldom[j];
+ ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
+ for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
+ if (!flow_bb_inside_loop_p (loop, bb))
+ VEC_safe_push (basic_block, heap, dom_bbs, bb);
- free (ldom);
+ VEC_free (basic_block, heap, ldom);
}
free (body);
init_code = get_insns ();
end_sequence ();
+ unshare_all_rtl_in_chain (init_code);
/* Precondition the loop. */
split_edge_and_insert (loop_preheader_edge (loop), init_code);
}
/* Recount dominators for outer blocks. */
- iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
+ iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
/* And unroll loop. */
"in runtime, %i insns\n",
max_unroll, num_loop_insns (loop));
- if (dom_bbs)
- free (dom_bbs);
+ VEC_free (basic_block, heap, dom_bbs);
}
/* Decide whether to simply peel LOOP and how much. */
static hashval_t
si_info_hash (const void *ivts)
{
- return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
+ return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
}
/* An equality functions for information about insns to split. */
static int
si_info_eq (const void *ivts1, const void *ivts2)
{
- const struct iv_to_split *i1 = ivts1;
- const struct iv_to_split *i2 = ivts2;
+ const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
+ const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
return i1->insn == i2->insn;
}
static hashval_t
ve_info_hash (const void *ves)
{
- return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
+ return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
}
/* Return true if IVTS1 and IVTS2 (which are really both of type
static int
ve_info_eq (const void *ivts1, const void *ivts2)
{
- const struct var_to_expand *i1 = ivts1;
- const struct var_to_expand *i2 = ivts2;
+ const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
+ const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
return i1->insn == i2->insn;
}
static struct var_to_expand *
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
{
- rtx set, dest, src, op1;
+ rtx set, dest, src, op1, op2, something;
struct var_to_expand *ves;
enum machine_mode mode1, mode2;
-
+ unsigned accum_pos;
+
set = single_set (insn);
if (!set)
return NULL;
if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
return NULL;
- if (!XEXP (src, 0))
- return NULL;
-
op1 = XEXP (src, 0);
+ op2 = XEXP (src, 1);
if (!REG_P (dest)
&& !(GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest))))
return NULL;
- if (!rtx_equal_p (dest, op1))
- return NULL;
-
+ if (rtx_equal_p (dest, op1))
+ accum_pos = 0;
+ else if (rtx_equal_p (dest, op2))
+ accum_pos = 1;
+ else
+ return NULL;
+
+ /* The method of expansion that we are using; which includes
+ the initialization of the expansions with zero and the summation of
+ the expansions at the end of the computation will yield wrong results
+ for (x = something - x) thus avoid using it in that case. */
+ if (accum_pos == 1
+ && GET_CODE (src) == MINUS)
+ return NULL;
+
+ something = (accum_pos == 0)? op2 : op1;
+
if (!referenced_in_one_insn_in_loop_p (loop, dest))
return NULL;
- if (rtx_referenced_p (dest, XEXP (src, 1)))
+ if (rtx_referenced_p (dest, something))
return NULL;
mode1 = GET_MODE (dest);
- mode2 = GET_MODE (XEXP (src, 1));
+ mode2 = GET_MODE (something);
if ((FLOAT_MODE_P (mode1)
|| FLOAT_MODE_P (mode2))
- && !flag_unsafe_math_optimizations)
+ && !flag_associative_math)
return NULL;
if (dump_file)
ves->op = GET_CODE (src);
ves->expansion_count = 0;
ves->reuse_expansion = 0;
+ ves->accum_pos = accum_pos;
return ves;
}
static int
allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
{
- struct iv_to_split *ivts = *slot;
+ struct iv_to_split *ivts = (struct iv_to_split *) *slot;
rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
ivts->base_var = gen_reg_rtx (GET_MODE (expr));
new_reg = get_expansion (ve);
validate_change (insn, &SET_DEST (set), new_reg, 1);
- validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
+ validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
if (apply_change_group ())
if (really_new_expansion)
/* Initialize the variable expansions in loop preheader.
Callbacks for htab_traverse. PLACE_P is the loop-preheader
basic block where the initialization of the expansions
- should take place. */
+ should take place. The expansions are initialized with (-0)
+ when the operation is plus or minus to honor sign zero.
+ This way we can prevent cases where the sign of the final result is
+ effected by the sign of the expansion.
+ Here is an example to demonstrate this:
+
+ for (i = 0 ; i < n; i++)
+ sum += something;
+
+ ==>
+
+ sum += something
+ ....
+ i = i+1;
+ sum1 += something
+ ....
+ i = i+1
+ sum2 += something;
+ ....
+
+ When SUM is initialized with -zero and SOMETHING is also -zero; the
+ final result of sum should be -zero thus the expansions sum1 and sum2
+ should be initialized with -zero as well (otherwise we will get +zero
+ as the final result). */
static int
insert_var_expansion_initialization (void **slot, void *place_p)
{
- struct var_to_expand *ve = *slot;
+ struct var_to_expand *ve = (struct var_to_expand *) *slot;
basic_block place = (basic_block)place_p;
rtx seq, var, zero_init, insn;
unsigned i;
-
+ enum machine_mode mode = GET_MODE (ve->reg);
+ bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
+
if (VEC_length (rtx, ve->var_expansions) == 0)
return 1;
if (ve->op == PLUS || ve->op == MINUS)
for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
{
- zero_init = CONST0_RTX (GET_MODE (var));
+ if (honor_signed_zero_p)
+ zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
+ else
+ zero_init = CONST0_RTX (mode);
+
emit_move_insn (var, zero_init);
}
else if (ve->op == MULT)
static int
combine_var_copies_in_loop_exit (void **slot, void *place_p)
{
- struct var_to_expand *ve = *slot;
+ struct var_to_expand *ve = (struct var_to_expand *) *slot;
basic_block place = (basic_block)place_p;
rtx sum = ve->reg;
rtx expr, seq, var, insn;
/* Apply splitting iv optimization. */
if (opt_info->insns_to_split)
{
- ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
+ ivts = (struct iv_to_split *)
+ htab_find (opt_info->insns_to_split, &ivts_templ);
if (ivts)
{
/* Apply variable expansion optimization. */
if (unrolling && opt_info->insns_with_var_to_expand)
{
- ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
+ ves = (struct var_to_expand *)
+ htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
if (ves)
{
gcc_assert (GET_CODE (PATTERN (insn))
ivts_templ.insn = orig_insn;
if (opt_info->insns_to_split)
{
- ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
+ ivts = (struct iv_to_split *)
+ htab_find (opt_info->insns_to_split, &ivts_templ);
if (ivts)
{
if (!delta)
static int
release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
{
- struct var_to_expand *ve = *slot;
+ struct var_to_expand *ve = (struct var_to_expand *) *slot;
VEC_free (rtx, heap, ve->var_expansions);