/* Induction variable optimizations.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
-
+
This file is part of GCC.
-
+
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
-
+
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
-
+
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
4) The trees are transformed to use the new variables, the dead code is
removed.
-
+
All of this is done loop by loop. Doing it globally is theoretically
possible, it might give a better performance and it might enable us
to decide costs more precisely, but getting all the interactions right
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
-#include "rtl.h"
#include "tm_p.h"
-#include "hard-reg-set.h"
#include "basic-block.h"
#include "output.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
-#include "varray.h"
-#include "expr.h"
#include "tree-pass.h"
#include "ggc.h"
#include "insn-config.h"
#include "langhooks.h"
#include "tree-affine.h"
#include "target.h"
+#include "tree-ssa-propagate.h"
+
+/* FIXME: Expressions are expanded to RTL in this pass to determine the
+ cost of different addressing modes. This should be moved to a TBD
+ interface between the GIMPLE and RTL worlds. */
+#include "expr.h"
/* The infinite cost. */
#define INFTY 10000000
struct iv *iv; /* Induction variable description. */
bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
an expression that is not an induction variable. */
- unsigned inv_id; /* Id of an invariant. */
bool preserve_biv; /* For the original biv, whether to preserve it. */
+ unsigned inv_id; /* Id of an invariant. */
};
/* Types of uses. */
unsigned id; /* The number of the candidate. */
bool important; /* Whether this is an "important" candidate, i.e. such
that it should be considered by all uses. */
- enum iv_position pos; /* Where it is computed. */
+ ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
gimple incremented_at;/* For original biv, the statement where it is
incremented. */
tree var_before; /* The variable used for it before increment. */
/* Are we optimizing for speed? */
bool speed;
+
+ /* Whether the loop body includes any function calls. */
+ bool body_includes_call;
};
/* An assignment of iv candidates to uses. */
idx_contains_abnormal_ssa_name_p,
NULL);
+ if (code == COND_EXPR)
+ return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
+ || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
+ || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
+
switch (codeclass)
{
case tcc_binary:
if (!base)
return expr;
- if (TREE_CODE (base) == INDIRECT_REF)
+ if (TREE_CODE (base) == MEM_REF)
return determine_base_object (TREE_OPERAND (base, 0));
return fold_convert (ptr_type_node,
print_generic_expr (dump_file, niter, TDF_SLIM);
fprintf (dump_file, "\n\n");
};
-
+
fprintf (dump_file, "Induction variables:\n\n");
EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
iv = get_iv (data, op);
if (!iv)
return NULL;
-
+
if (iv->have_use_for)
{
use = iv_use (data, iv->use_id);
tree step, iv_base, iv_step, lbound, off;
struct loop *loop = dta->ivopts_data->current_loop;
- if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
- || TREE_CODE (base) == ALIGN_INDIRECT_REF)
+ if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
return false;
/* If base is a component ref, require that the offset of the reference
if (mode != BLKmode)
{
- double_int mul;
- tree al = build_int_cst (TREE_TYPE (step),
- GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
+ unsigned mode_align = GET_MODE_ALIGNMENT (mode);
+
+ if (base_align < mode_align
+ || (bitpos % mode_align) != 0
+ || (bitpos % BITS_PER_UNIT) != 0)
+ return true;
- if (base_align < GET_MODE_ALIGNMENT (mode)
- || bitpos % GET_MODE_ALIGNMENT (mode) != 0
- || bitpos % BITS_PER_UNIT != 0)
+ if (toffset
+ && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
return true;
-
- if (!constant_multiple_of (step, al, &mul))
+
+ if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
return true;
}
goto fail;
step = ifs_ivopts_data.step;
- gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
/* Check that the base expression is addressable. This needs
tree *ref = &TREE_OPERAND (base, 0);
while (handled_component_p (*ref))
ref = &TREE_OPERAND (*ref, 0);
- if (TREE_CODE (*ref) == INDIRECT_REF)
- *ref = fold_indirect_ref (*ref);
+ if (TREE_CODE (*ref) == MEM_REF)
+ {
+ tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
+ TREE_OPERAND (*ref, 0),
+ TREE_OPERAND (*ref, 1));
+ if (tem)
+ *ref = tem;
+ }
}
}
expr = build_fold_addr_expr (op0);
return fold_convert (orig_type, expr);
- case INDIRECT_REF:
+ case MEM_REF:
+ /* ??? Offset operand? */
inside_addr = false;
break;
unsigned i;
struct iv_cand *cand = NULL;
tree type, orig_type;
-
+
if (base)
{
orig_type = TREE_TYPE (base);
it. The candidate computation is scheduled on all available positions. */
static void
-add_candidate (struct ivopts_data *data,
+add_candidate (struct ivopts_data *data,
tree base, tree step, bool important, struct iv_use *use)
{
if (ip_normal_pos (data->current_loop))
return ret;
}
-
+
/* n_map_members is a power of two, so this computes modulo. */
s = cand->id & (use->n_map_members - 1);
for (i = s; i < use->n_map_members; i++)
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
enum machine_mode address_mode = targetm.addr_space.address_mode (as);
rtx x;
-
+
gcc_assert (obj);
if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
{
unsigned cost;
/* Avoid using hard regs in ways which may be unsupported. */
int regno = LAST_VIRTUAL_REGISTER + 1;
- enum function_frequency real_frequency = cfun->function_frequency;
+ struct cgraph_node *node = cgraph_node (current_function_decl);
+ enum node_frequency real_frequency = node->frequency;
- cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+ node->frequency = NODE_FREQUENCY_NORMAL;
crtl->maybe_hot_insn_p = speed;
walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
start_sequence ();
seq = get_insns ();
end_sequence ();
default_rtl_profile ();
- cfun->function_frequency = real_frequency;
+ node->frequency = real_frequency;
cost = seq_cost (seq, speed);
if (MEM_P (rslt))
if (stmt_after_increment (loop, cand, at))
{
aff_tree cstep_aff;
-
+
if (common_type != uutype)
cstep_common = fold_convert (common_type, cstep);
else
return get_computation_at (loop, use, cand, use->stmt);
}
+/* Adjust the cost COST for being in loop setup rather than loop body.
+ If we're optimizing for space, the loop setup overhead is constant;
+ if we're optimizing for speed, amortize it over the per-iteration cost. */
+static unsigned
+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
+{
+ if (cost == INFTY)
+ return cost;
+ else if (optimize_loop_for_speed_p (data->current_loop))
+ return cost / AVG_LOOP_NITER (data->current_loop);
+ else
+ return cost;
+}
+
/* Returns cost of addition in MODE. */
static unsigned
cost = 1;
costs[mode] = cost;
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Addition in %s costs %d\n",
GET_MODE_NAME (mode), cost);
gen_int_mode (cst, mode), NULL_RTX, 0);
seq = get_insns ();
end_sequence ();
-
+
cost = seq_cost (seq, speed);
if (dump_file && (dump_flags & TDF_DETAILS))
base = gen_int_mode (off, address_mode);
else
base = NULL_RTX;
-
+
if (base)
addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Address costs:\n");
-
+
for (i = 0; i < 16; i++)
{
sym_p = i & 1;
case MULT_EXPR:
if (cst_and_fits_in_hwi (op0))
cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
- else if (cst_and_fits_in_hwi (op1))
+ else if (cst_and_fits_in_hwi (op1))
cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
else
return new_cost (target_spill_cost [speed], 0);
tree toffset;
enum machine_mode mode;
int unsignedp, volatilep;
-
+
core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
&unsignedp, &volatilep, false);
*var_present = false;
return zero_cost;
}
-
+
*symbol_present = false;
*var_present = true;
return zero_cost;
if (!constant_multiple_of (ustep, cstep, &rat))
return infinite_cost;
-
+
if (double_int_fits_in_shwi_p (rat))
ratio = double_int_to_shwi (rat);
else
/* use = ubase + ratio * (var - cbase). If either cbase is a constant
or ratio == 1, it is better to handle this like
-
+
ubase - ratio * cbase + ratio * var
-
+
(also holds in the case ratio == -1, TODO. */
if (cst_and_fits_in_hwi (cbase))
{
- offset = - ratio * int_cst_value (cbase);
+ offset = - ratio * int_cst_value (cbase);
cost = difference_cost (data,
ubase, build_int_cst (utype, 0),
&symbol_present, &var_present, &offset,
/* Symbol + offset should be compile-time computable so consider that they
are added once to the variable, if present. */
if (var_present && (symbol_present || offset))
- cost.cost += add_cost (TYPE_MODE (ctype), speed)
- / AVG_LOOP_NITER (data->current_loop);
+ cost.cost += adjust_setup_cost (data,
+ add_cost (TYPE_MODE (ctype), speed));
/* Having offset does not affect runtime cost in case it is added to
symbol, but it increases complexity. */
aratio = ratio > 0 ? ratio : -ratio;
if (aratio != 1)
cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
+ return cost;
fallback:
if (can_autoinc)
return infinite_cost;
if (address_p)
- comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
+ comp = build_simple_mem_ref (comp);
return new_cost (computation_cost (comp, speed), 0);
}
bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
comp_cost elim_cost, express_cost, cost;
bool ok;
+ tree *control_var, *bound_cst;
/* Only consider real candidates. */
if (!cand->iv)
elim_cost = force_var_cost (data, bound, &depends_on_elim);
/* The bound is a loop invariant, so it will be only computed
once. */
- elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
+ elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
}
else
elim_cost = infinite_cost;
/* Try expressing the original giv. If it is compared with an invariant,
note that we cannot get rid of it. */
- ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
+ ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
+ NULL, &cmp_iv);
gcc_assert (ok);
+ /* When the condition is a comparison of the candidate IV against
+ zero, prefer this IV.
+
+ TODO: The constant that we're substracting from the cost should
+ be target-dependent. This information should be added to the
+ target costs for each backend. */
+ if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
+ && integer_zerop (*bound_cst)
+ && (operand_equal_p (*control_var, cand->var_after, 0)
+ || operand_equal_p (*control_var, cand->var_before, 0)))
+ elim_cost.cost -= 1;
+
express_cost = get_computation_cost (data, use, cand, false,
&depends_on_express, NULL);
fd_ivopts_data = data;
cost_base = force_var_cost (data, base, NULL);
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
- cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
+ cost = cost_step + adjust_setup_cost (data, cost_base.cost);
/* Prefer the original ivs unless we may gain something by replacing it.
The reason is to make debugging simpler; so this is not relevant for
if (cand->pos != IP_ORIGINAL
|| DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
cost++;
-
+
/* Prefer not to insert statements into latch unless there are some
already (so that we do not create unnecessary jumps). */
if (cand->pos == IP_END
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
- return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
+ data->body_includes_call);
}
/* For each size of the induction variable set determine the penalty. */
struct loop *loop = data->current_loop;
bitmap_iterator bi;
- /* We use the following model (definitely improvable, especially the
- cost function -- TODO):
-
- We estimate the number of registers available (using MD data), name it A.
-
- We estimate the number of registers used by the loop, name it U. This
- number is obtained as the number of loop phi nodes (not counting virtual
- registers and bivs) + the number of variables from outside of the loop.
-
- We set a reserve R (free regs that are used for temporary computations,
- etc.). For now the reserve is a constant 3.
-
- Let I be the number of induction variables.
-
- -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
- make a lot of ivs without a reason).
- -- if A - R < U + I <= A, the cost is I * PRES_COST
- -- if U + I > A, the cost is I * PRES_COST and
- number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
-
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
+ fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
if (!iv_ca_has_deps (ivs, new_cp))
continue;
-
+
if (!cheaper_cost_pair (new_cp, old_cp))
continue;
continue;
if (!iv_ca_has_deps (ivs, cp))
continue;
-
+
if (!cheaper_cost_pair (cp, new_cp))
continue;
continue;
if (!iv_ca_has_deps (ivs, cp))
continue;
-
+
if (!cheaper_cost_pair (cp, new_cp))
continue;
}
/* Tries to extend the sets IVS in the best possible way in order
- to express the USE. */
+ to express the USE. If ORIGINALP is true, prefer candidates from
+ the original set of IVs, otherwise favor important candidates not
+ based on any memory object. */
static bool
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
- struct iv_use *use)
+ struct iv_use *use, bool originalp)
{
comp_cost best_cost, act_cost;
unsigned i;
iv_ca_set_no_cp (data, ivs, use);
}
- /* First try important candidates not based on any memory object. Only if
+ /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
+ first try important candidates not based on any memory object. Only if
this fails, try the specific ones. Rationale -- in loops with many
variables the best choice often is to use just one generic biv. If we
added here many ivs specific to the uses, the optimization algorithm later
{
cand = iv_cand (data, i);
- if (cand->iv->base_object != NULL_TREE)
+ if (originalp && cand->pos !=IP_ORIGINAL)
+ continue;
+
+ if (!originalp && cand->iv->base_object != NULL_TREE)
continue;
if (iv_ca_cand_used_p (ivs, cand))
continue;
/* Already tried this. */
- if (cand->important && cand->iv->base_object == NULL_TREE)
- continue;
-
+ if (cand->important)
+ {
+ if (originalp && cand->pos == IP_ORIGINAL)
+ continue;
+ if (!originalp && cand->iv->base_object == NULL_TREE)
+ continue;
+ }
+
if (iv_ca_cand_used_p (ivs, cand))
continue;
/* Finds an initial assignment of candidates to uses. */
static struct iv_ca *
-get_initial_solution (struct ivopts_data *data)
+get_initial_solution (struct ivopts_data *data, bool originalp)
{
struct iv_ca *ivs = iv_ca_new (data);
unsigned i;
for (i = 0; i < n_iv_uses (data); i++)
- if (!try_add_cand_for (data, ivs, iv_use (data, i)))
+ if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
{
iv_ca_free (&ivs);
return NULL;
for (i = 0; i < n_iv_cands (data); i++)
{
cand = iv_cand (data, i);
-
+
if (iv_ca_cand_used_p (ivs, cand))
continue;
solution and remove the unused ivs while this improves the cost. */
static struct iv_ca *
-find_optimal_iv_set (struct ivopts_data *data)
+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
{
- unsigned i;
struct iv_ca *set;
- struct iv_use *use;
/* Get the initial solution. */
- set = get_initial_solution (data);
+ set = get_initial_solution (data, originalp);
if (!set)
{
if (dump_file && (dump_flags & TDF_DETAILS))
}
}
+ return set;
+}
+
+static struct iv_ca *
+find_optimal_iv_set (struct ivopts_data *data)
+{
+ unsigned i;
+ struct iv_ca *set, *origset;
+ struct iv_use *use;
+ comp_cost cost, origcost;
+
+ /* Determine the cost based on a strategy that starts with original IVs,
+ and try again using a strategy that prefers candidates not based
+ on any IVs. */
+ origset = find_optimal_iv_set_1 (data, true);
+ set = find_optimal_iv_set_1 (data, false);
+
+ if (!origset && !set)
+ return NULL;
+
+ origcost = origset ? iv_ca_cost (origset) : infinite_cost;
+ cost = set ? iv_ca_cost (set) : infinite_cost;
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
- comp_cost cost = iv_ca_cost (set);
- fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
+ fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+ origcost.cost, origcost.complexity);
+ fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+ cost.cost, cost.complexity);
+ }
+
+ /* Choose the one with the best cost. */
+ if (compare_costs (origcost, cost) <= 0)
+ {
+ if (set)
+ iv_ca_free (&set);
+ set = origset;
}
+ else if (origset)
+ iv_ca_free (&origset);
for (i = 0; i < n_iv_uses (data); i++)
{
/* Rewrite the increment so that it uses var_before directly. */
find_interesting_uses_op (data, cand->var_after)->selected = cand;
-
+
return;
}
-
+
gimple_add_tmp_var (cand->var_before);
add_referenced_var (cand->var_before);
gcc_unreachable ();
}
- op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
- true, GSI_SAME_STMT);
+ if (!valid_gimple_rhs_p (comp)
+ || (gimple_code (use->stmt) != GIMPLE_PHI
+ /* We can't allow re-allocating the stmt as it might be pointed
+ to still. */
+ && (get_gimple_rhs_num_ops (TREE_CODE (comp))
+ >= gimple_num_ops (gsi_stmt (bsi)))))
+ {
+ comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ if (POINTER_TYPE_P (TREE_TYPE (tgt)))
+ duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
+ }
if (gimple_code (use->stmt) == GIMPLE_PHI)
{
- ass = gimple_build_assign (tgt, op);
+ ass = gimple_build_assign (tgt, comp);
gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
bsi = gsi_for_stmt (use->stmt);
}
else
{
- gimple_assign_set_rhs_from_tree (&bsi, op);
+ gimple_assign_set_rhs_from_tree (&bsi, comp);
use->stmt = gsi_stmt (bsi);
}
}
static void
copy_ref_info (tree new_ref, tree old_ref)
{
- if (TREE_CODE (old_ref) == TARGET_MEM_REF)
- copy_mem_ref_info (new_ref, old_ref);
- else
+ tree new_ptr_base = NULL_TREE;
+
+ if (TREE_CODE (old_ref) == TARGET_MEM_REF
+ && TREE_CODE (new_ref) == TARGET_MEM_REF)
+ TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
+ else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
+
+ TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
+ TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
+
+ if (TREE_CODE (new_ref) == TARGET_MEM_REF)
+ new_ptr_base = TMR_BASE (new_ref);
+ else if (TREE_CODE (new_ref) == MEM_REF)
+ new_ptr_base = TREE_OPERAND (new_ref, 0);
+
+ /* We can transfer points-to information from an old pointer
+ or decl base to the new one. */
+ if (new_ptr_base
+ && TREE_CODE (new_ptr_base) == SSA_NAME
+ && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
+ && !SSA_NAME_PTR_INFO (new_ptr_base))
+ {
+ tree base = get_base_address (old_ref);
+ if ((INDIRECT_REF_P (base)
+ || TREE_CODE (base) == MEM_REF)
+ && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
+ duplicate_ssa_name_ptr_info
+ (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
+ else if (TREE_CODE (base) == VAR_DECL
+ || TREE_CODE (base) == PARM_DECL
+ || TREE_CODE (base) == RESULT_DECL)
+ {
+ struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
+ pt_solution_set_var (&pi->pt, base);
+ }
+ }
}
/* Rewrites USE (address that is an iv) using candidate CAND. */
{
aff_tree aff;
gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
+ tree base_hint = NULL_TREE;
tree ref;
bool ok;
gcc_assert (ok);
unshare_aff_combination (&aff);
- ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
+ /* To avoid undefined overflow problems, all IV candidates use unsigned
+ integer types. The drawback is that this makes it impossible for
+ create_mem_ref to distinguish an IV that is based on a memory object
+ from one that represents simply an offset.
+
+ To work around this problem, we pass a hint to create_mem_ref that
+ indicates which variable (if any) in aff is an IV based on a memory
+ object. Note that we only consider the candidate. If this is not
+ based on an object, the base of the reference is in some subexpression
+ of the use -- but these will use pointer types, so they are recognized
+ by the create_mem_ref heuristics anyway. */
+ if (cand->iv->base_object)
+ base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
+
+ ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p),
+ reference_alias_ptr_type (*use->op_p),
+ &aff, base_hint, data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
default:
gcc_unreachable ();
}
-
+
update_stmt (use->stmt);
}
VEC_free (iv_cand_p, heap, data->iv_candidates);
}
+/* Returns true if the loop body BODY includes any function calls. */
+
+static bool
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
+{
+ gimple_stmt_iterator gsi;
+ unsigned i;
+
+ for (i = 0; i < num_nodes; i++)
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (is_gimple_call (stmt)
+ && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
+ return true;
+ }
+ return false;
+}
+
/* Optimizes the LOOP. Returns true if anything changed. */
static bool
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Processing loop %d\n", loop->num);
-
+
exit = single_dom_exit (loop);
if (exit)
{
}
body = get_loop_body (loop);
+ data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
free (body);
/* Create the new induction variables (item 4, part 1). */
create_new_ivs (data, iv_ca);
iv_ca_free (&iv_ca);
-
+
/* Rewrite the uses (item 4, part 2). */
rewrite_uses (data);