/* Induction variable optimizations.
Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
-
+
This file is part of GCC.
-
+
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
-
+
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
-
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
4) The trees are transformed to use the new variables, the dead code is
removed.
-
+
All of this is done loop by loop. Doing it globally is theoretically
possible, it might give a better performance and it might enable us
to decide costs more precisely, but getting all the interactions right
print_generic_expr (dump_file, niter, TDF_SLIM);
fprintf (dump_file, "\n\n");
};
-
+
fprintf (dump_file, "Induction variables:\n\n");
EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
iv = get_iv (data, op);
if (!iv)
return NULL;
-
+
if (iv->have_use_for)
{
use = iv_use (data, iv->use_id);
|| bitpos % GET_MODE_ALIGNMENT (mode) != 0
|| bitpos % BITS_PER_UNIT != 0)
return true;
-
+
if (!constant_multiple_of (step, al, &mul))
return true;
}
unsigned i;
struct iv_cand *cand = NULL;
tree type, orig_type;
-
+
if (base)
{
orig_type = TREE_TYPE (base);
it. The candidate computation is scheduled on all available positions. */
static void
-add_candidate (struct ivopts_data *data,
+add_candidate (struct ivopts_data *data,
tree base, tree step, bool important, struct iv_use *use)
{
if (ip_normal_pos (data->current_loop))
return ret;
}
-
+
/* n_map_members is a power of two, so this computes modulo. */
s = cand->id & (use->n_map_members - 1);
for (i = s; i < use->n_map_members; i++)
produce_memory_decl_rtl (tree obj, int *regno)
{
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
+ enum machine_mode address_mode = targetm.addr_space.address_mode (as);
rtx x;
-
+
gcc_assert (obj);
if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
{
const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
- x = gen_rtx_SYMBOL_REF (Pmode, name);
+ x = gen_rtx_SYMBOL_REF (address_mode, name);
SET_SYMBOL_REF_DECL (x, obj);
x = gen_rtx_MEM (DECL_MODE (obj), x);
set_mem_addr_space (x, as);
}
else
{
- x = gen_raw_REG (Pmode, (*regno)++);
+ x = gen_raw_REG (address_mode, (*regno)++);
x = gen_rtx_MEM (DECL_MODE (obj), x);
set_mem_addr_space (x, as);
}
if (stmt_after_increment (loop, cand, at))
{
aff_tree cstep_aff;
-
+
if (common_type != uutype)
cstep_common = fold_convert (common_type, cstep);
else
cost = 1;
costs[mode] = cost;
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Addition in %s costs %d\n",
GET_MODE_NAME (mode), cost);
gen_int_mode (cst, mode), NULL_RTX, 0);
seq = get_insns ();
end_sequence ();
-
+
cost = seq_cost (seq, speed);
if (dump_file && (dump_flags & TDF_DETAILS))
valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
if (!valid_mult)
{
- rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
+ enum machine_mode address_mode = targetm.addr_space.address_mode (as);
+ rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
rtx addr;
HOST_WIDE_INT i;
valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
sbitmap_zero (valid_mult);
- addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
+ addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
{
- XEXP (addr, 1) = gen_int_mode (i, Pmode);
+ XEXP (addr, 1) = gen_int_mode (i, address_mode);
if (memory_address_addr_space_p (mode, addr, as))
SET_BIT (valid_mult, i + MAX_RATIO);
}
addr_space_t as, bool speed,
bool stmt_after_inc, bool *may_autoinc)
{
+ enum machine_mode address_mode = targetm.addr_space.address_mode (as);
static VEC(address_cost_data, heap) *address_cost_data_list;
unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
address_cost_data data;
data = (address_cost_data) xcalloc (1, sizeof (*data));
- reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
+ reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
- addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
+ addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
for (i = start; i <= 1 << 20; i <<= 1)
{
- XEXP (addr, 1) = gen_int_mode (i, Pmode);
+ XEXP (addr, 1) = gen_int_mode (i, address_mode);
if (!memory_address_addr_space_p (mem_mode, addr, as))
break;
}
for (i = start; i <= 1 << 20; i <<= 1)
{
- XEXP (addr, 1) = gen_int_mode (-i, Pmode);
+ XEXP (addr, 1) = gen_int_mode (-i, address_mode);
if (!memory_address_addr_space_p (mem_mode, addr, as))
break;
}
/* Compute the cost of various addressing modes. */
acost = 0;
- reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
- reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
+ reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
+ reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
if (HAVE_PRE_DECREMENT)
{
- addr = gen_rtx_PRE_DEC (Pmode, reg0);
+ addr = gen_rtx_PRE_DEC (address_mode, reg0);
has_predec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
if (HAVE_POST_DECREMENT)
{
- addr = gen_rtx_POST_DEC (Pmode, reg0);
+ addr = gen_rtx_POST_DEC (address_mode, reg0);
has_postdec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
if (HAVE_PRE_INCREMENT)
{
- addr = gen_rtx_PRE_INC (Pmode, reg0);
+ addr = gen_rtx_PRE_INC (address_mode, reg0);
has_preinc[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
if (HAVE_POST_INCREMENT)
{
- addr = gen_rtx_POST_INC (Pmode, reg0);
+ addr = gen_rtx_POST_INC (address_mode, reg0);
has_postinc[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
addr = reg0;
if (rat_p)
- addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
- gen_int_mode (rat, Pmode));
+ addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
+ gen_int_mode (rat, address_mode));
if (var_p)
- addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
+ addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
if (sym_p)
{
- base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
+ base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
/* ??? We can run into trouble with some backends by presenting
it with symbols which haven't been properly passed through
targetm.encode_section_info. By setting the local bit, we
SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
if (off_p)
- base = gen_rtx_fmt_e (CONST, Pmode,
+ base = gen_rtx_fmt_e (CONST, address_mode,
gen_rtx_fmt_ee
- (PLUS, Pmode, base,
- gen_int_mode (off, Pmode)));
+ (PLUS, address_mode, base,
+ gen_int_mode (off, address_mode)));
}
else if (off_p)
- base = gen_int_mode (off, Pmode);
+ base = gen_int_mode (off, address_mode);
else
base = NULL_RTX;
-
+
if (base)
- addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
+ addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
start_sequence ();
/* To avoid splitting addressing modes, pretend that no cse will
If VAR_PRESENT is true, try whether the mode with
SYMBOL_PRESENT = false is cheaper even with cost of addition, and
if this is the case, use it. */
- add_c = add_cost (Pmode, speed);
+ add_c = add_cost (address_mode, speed);
for (i = 0; i < 8; i++)
{
var_p = i & 1;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Address costs:\n");
-
+
for (i = 0; i < 16; i++)
{
sym_p = i & 1;
data_index, data);
}
- bits = GET_MODE_BITSIZE (Pmode);
+ bits = GET_MODE_BITSIZE (address_mode);
mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
offset &= mask;
if ((offset >> (bits - 1) & 1))
&& multiplier_allowed_in_address_p (ratio, mem_mode, as));
if (ratio != 1 && !ratio_p)
- cost += multiply_by_cost (ratio, Pmode, speed);
+ cost += multiply_by_cost (ratio, address_mode, speed);
if (s_offset && !offset_p && !symbol_present)
- cost += add_cost (Pmode, speed);
+ cost += add_cost (address_mode, speed);
if (may_autoinc)
*may_autoinc = autoinc;
case MULT_EXPR:
if (cst_and_fits_in_hwi (op0))
cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
- else if (cst_and_fits_in_hwi (op1))
+ else if (cst_and_fits_in_hwi (op1))
cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
else
return new_cost (target_spill_cost [speed], 0);
tree toffset;
enum machine_mode mode;
int unsignedp, volatilep;
-
+
core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
&unsignedp, &volatilep, false);
*var_present = false;
return zero_cost;
}
-
+
*symbol_present = false;
*var_present = true;
return zero_cost;
if (!constant_multiple_of (ustep, cstep, &rat))
return infinite_cost;
-
+
if (double_int_fits_in_shwi_p (rat))
ratio = double_int_to_shwi (rat);
else
/* use = ubase + ratio * (var - cbase). If either cbase is a constant
or ratio == 1, it is better to handle this like
-
+
ubase - ratio * cbase + ratio * var
-
+
(also holds in the case ratio == -1, TODO. */
if (cst_and_fits_in_hwi (cbase))
{
- offset = - ratio * int_cst_value (cbase);
+ offset = - ratio * int_cst_value (cbase);
cost = difference_cost (data,
ubase, build_int_cst (utype, 0),
&symbol_present, &var_present, &offset,
if (cand->pos != IP_ORIGINAL
|| DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
cost++;
-
+
/* Prefer not to insert statements into latch unless there are some
already (so that we do not create unnecessary jumps). */
if (cand->pos == IP_END
etc.). For now the reserve is a constant 3.
Let I be the number of induction variables.
-
+
-- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
make a lot of ivs without a reason).
-- if A - R < U + I <= A, the cost is I * PRES_COST
if (!iv_ca_has_deps (ivs, new_cp))
continue;
-
+
if (!cheaper_cost_pair (new_cp, old_cp))
continue;
continue;
if (!iv_ca_has_deps (ivs, cp))
continue;
-
+
if (!cheaper_cost_pair (cp, new_cp))
continue;
continue;
if (!iv_ca_has_deps (ivs, cp))
continue;
-
+
if (!cheaper_cost_pair (cp, new_cp))
continue;
/* Already tried this. */
if (cand->important && cand->iv->base_object == NULL_TREE)
continue;
-
+
if (iv_ca_cand_used_p (ivs, cand))
continue;
for (i = 0; i < n_iv_cands (data); i++)
{
cand = iv_cand (data, i);
-
+
if (iv_ca_cand_used_p (ivs, cand))
continue;
/* Rewrite the increment so that it uses var_before directly. */
find_interesting_uses_op (data, cand->var_after)->selected = cand;
-
+
return;
}
-
+
gimple_add_tmp_var (cand->var_before);
add_referenced_var (cand->var_before);
{
aff_tree aff;
gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
+ tree base_hint = NULL_TREE;
tree ref;
bool ok;
gcc_assert (ok);
unshare_aff_combination (&aff);
- ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
+ /* To avoid undefined overflow problems, all IV candidates use unsigned
+ integer types. The drawback is that this makes it impossible for
+ create_mem_ref to distinguish an IV that is based on a memory object
+ from one that represents simply an offset.
+
+ To work around this problem, we pass a hint to create_mem_ref that
+ indicates which variable (if any) in aff is an IV based on a memory
+ object. Note that we only consider the candidate. If this is not
+ based on an object, the base of the reference is in some subexpression
+ of the use -- but these will use pointer types, so they are recognized
+ by the create_mem_ref heuristics anyway. */
+ if (cand->iv->base_object)
+ base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
+
+ ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
+ data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
default:
gcc_unreachable ();
}
-
+
update_stmt (use->stmt);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Processing loop %d\n", loop->num);
-
+
exit = single_dom_exit (loop);
if (exit)
{
/* Create the new induction variables (item 4, part 1). */
create_new_ivs (data, iv_ca);
iv_ca_free (&iv_ca);
-
+
/* Rewrite the uses (item 4, part 2). */
rewrite_uses (data);