/* Induction variable optimizations.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
This file is part of GCC.
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
-#include "rtl.h"
#include "tm_p.h"
-#include "hard-reg-set.h"
#include "basic-block.h"
#include "output.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
-#include "varray.h"
-#include "expr.h"
#include "tree-pass.h"
#include "ggc.h"
#include "insn-config.h"
#include "langhooks.h"
#include "tree-affine.h"
#include "target.h"
+#include "tree-inline.h"
+#include "tree-ssa-propagate.h"
+
+/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
+ */
+#include "expmed.h"
+#undef add_cost
+#undef zero_cost
+
+/* FIXME: Expressions are expanded to RTL in this pass to determine the
+ cost of different addressing modes. This should be moved to a TBD
+ interface between the GIMPLE and RTL worlds. */
+#include "expr.h"
/* The infinite cost. */
#define INFTY 10000000
-/* The expected number of loop iterations. TODO -- use profiling instead of
- this. */
#define AVG_LOOP_NITER(LOOP) 5
+/* Returns the expected number of loop iterations for LOOP.
+ The average trip count is computed from profile data if it
+ exists. */
+
+static inline HOST_WIDE_INT
+avg_loop_niter (struct loop *loop)
+{
+ HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
+ if (niter == -1)
+ return AVG_LOOP_NITER (loop);
+
+ return niter;
+}
/* Representation of the induction variable. */
struct iv
struct iv *iv; /* Induction variable description. */
bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
an expression that is not an induction variable. */
- unsigned inv_id; /* Id of an invariant. */
bool preserve_biv; /* For the original biv, whether to preserve it. */
+ unsigned inv_id; /* Id of an invariant. */
};
/* Types of uses. */
tree value; /* For final value elimination, the expression for
the final value of the iv. For iv elimination,
the new bound to compare with. */
+ enum tree_code comp; /* For iv elimination, the comparison. */
+ int inv_expr_id; /* Loop invariant expression id. */
};
/* Use. */
unsigned id; /* The number of the candidate. */
bool important; /* Whether this is an "important" candidate, i.e. such
that it should be considered by all uses. */
- enum iv_position pos; /* Where it is computed. */
+ ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
gimple incremented_at;/* For original biv, the statement where it is
incremented. */
tree var_before; /* The variable used for it before increment. */
biv. */
};
+/* Loop invariant expression hashtable entry. */
+struct iv_inv_expr_ent
+{
+ tree expr;
+ int id;
+ hashval_t hash;
+};
+
/* The data used by the induction variable optimizations. */
typedef struct iv_use *iv_use_p;
/* The array of information for the ssa names. */
struct version_info *version_info;
+ /* The hashtable of loop invariant expressions created
+ by ivopt. */
+ htab_t inv_expr_tab;
+
+ /* Loop invariant expression id. */
+ int inv_expr_id;
+
/* The bitmap of indices in version_info whose value was changed. */
bitmap relevant;
/* Are we optimizing for speed? */
bool speed;
+
+ /* Whether the loop body includes any function calls. */
+ bool body_includes_call;
+
+ /* Whether the loop body can only be exited via single exit. */
+ bool loop_single_exit_p;
};
/* An assignment of iv candidates to uses. */
/* Number of times each invariant is used. */
unsigned *n_invariant_uses;
+ /* The array holding the number of uses of each loop
+ invariant expressions created by ivopt. */
+ unsigned *used_inv_expr;
+
+ /* The number of created loop invariants. */
+ unsigned num_used_inv_expr;
+
/* Total cost of the assignment. */
comp_cost cost;
};
static VEC(tree,heap) *decl_rtl_to_reset;
+static comp_cost force_expr_to_var_cost (tree, bool);
+
/* Number of uses recorded in DATA. */
static inline unsigned
return;
}
+ if (cand->var_before)
+ {
+ fprintf (file, " var_before ");
+ print_generic_expr (file, cand->var_before, TDF_SLIM);
+ fprintf (file, "\n");
+ }
+ if (cand->var_after)
+ {
+ fprintf (file, " var_after ");
+ print_generic_expr (file, cand->var_after, TDF_SLIM);
+ fprintf (file, "\n");
+ }
+
switch (cand->pos)
{
case IP_NORMAL:
idx_contains_abnormal_ssa_name_p,
NULL);
+ if (code == COND_EXPR)
+ return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
+ || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
+ || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
+
switch (codeclass)
{
case tcc_binary:
return false;
}
-/* Returns tree describing number of iterations determined from
+/* Returns the structure describing number of iterations determined from
EXIT of DATA->current_loop, or NULL if something goes wrong. */
-static tree
+static struct tree_niter_desc *
niter_for_exit (struct ivopts_data *data, edge exit)
{
- struct tree_niter_desc desc;
- tree niter;
+ struct tree_niter_desc *desc;
void **slot;
if (!data->niters)
if (!slot)
{
- /* Try to determine number of iterations. We must know it
- unconditionally (i.e., without possibility of # of iterations
- being zero). Also, we cannot safely work with ssa names that
- appear in phi nodes on abnormal edges, so that we do not create
- overlapping life ranges for them (PR 27283). */
- if (number_of_iterations_exit (data->current_loop,
- exit, &desc, true)
- && integer_zerop (desc.may_be_zero)
- && !contains_abnormal_ssa_name_p (desc.niter))
- niter = desc.niter;
- else
- niter = NULL_TREE;
-
- *pointer_map_insert (data->niters, exit) = niter;
+ /* Try to determine number of iterations. We cannot safely work with ssa
+ names that appear in phi nodes on abnormal edges, so that we do not
+ create overlapping life ranges for them (PR 27283). */
+ desc = XNEW (struct tree_niter_desc);
+ if (!number_of_iterations_exit (data->current_loop,
+ exit, desc, true)
+ || contains_abnormal_ssa_name_p (desc->niter))
+ {
+ XDELETE (desc);
+ desc = NULL;
+ }
+ slot = pointer_map_insert (data->niters, exit);
+ *slot = desc;
}
else
- niter = (tree) *slot;
+ desc = (struct tree_niter_desc *) *slot;
- return niter;
+ return desc;
}
-/* Returns tree describing number of iterations determined from
+/* Returns the structure describing number of iterations determined from
single dominating exit of DATA->current_loop, or NULL if something
goes wrong. */
-static tree
+static struct tree_niter_desc *
niter_for_single_dom_exit (struct ivopts_data *data)
{
edge exit = single_dom_exit (data->current_loop);
return niter_for_exit (data, exit);
}
+/* Hash table equality function for expressions. */
+
+static int
+htab_inv_expr_eq (const void *ent1, const void *ent2)
+{
+ const struct iv_inv_expr_ent *expr1 =
+ (const struct iv_inv_expr_ent *)ent1;
+ const struct iv_inv_expr_ent *expr2 =
+ (const struct iv_inv_expr_ent *)ent2;
+
+ return expr1->hash == expr2->hash
+ && operand_equal_p (expr1->expr, expr2->expr, 0);
+}
+
+/* Hash function for loop invariant expressions. */
+
+static hashval_t
+htab_inv_expr_hash (const void *ent)
+{
+ const struct iv_inv_expr_ent *expr =
+ (const struct iv_inv_expr_ent *)ent;
+ return expr->hash;
+}
+
/* Initializes data structures used by the iv optimization pass, stored
in DATA. */
data->niters = NULL;
data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
+ data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
+ htab_inv_expr_eq, free);
+ data->inv_expr_id = 0;
decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
}
if (!base)
return expr;
- if (TREE_CODE (base) == INDIRECT_REF)
+ if (TREE_CODE (base) == MEM_REF)
return determine_base_object (TREE_OPERAND (base, 0));
return fold_convert (ptr_type_node,
if (step)
{
if (POINTER_TYPE_P (type))
- step = fold_convert (sizetype, step);
+ step = convert_to_ptrofftype (step);
else
step = fold_convert (type, step);
}
|| contains_abnormal_ssa_name_p (iv->step))
return false;
+ /* If STMT could throw, then do not consider STMT as defining a GIV.
+ While this will suppress optimizations, we can not safely delete this
+ GIV and associated statements, even if it appears it is not used. */
+ if (stmt_could_throw_p (stmt))
+ return false;
+
return true;
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
- tree niter = niter_for_single_dom_exit (data);
+ struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
if (niter)
{
fprintf (dump_file, " number of iterations ");
- print_generic_expr (dump_file, niter, TDF_SLIM);
+ print_generic_expr (dump_file, niter->niter, TDF_SLIM);
+ if (!integer_zerop (niter->may_be_zero))
+ {
+ fprintf (dump_file, "; zero if ");
+ print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
+ }
fprintf (dump_file, "\n\n");
};
tree step, iv_base, iv_step, lbound, off;
struct loop *loop = dta->ivopts_data->current_loop;
- if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
- || TREE_CODE (base) == ALIGN_INDIRECT_REF)
- return false;
-
/* If base is a component ref, require that the offset of the reference
be invariant. */
if (TREE_CODE (base) == COMPONENT_REF)
}
else
/* The step for pointer arithmetics already is 1 byte. */
- step = build_int_cst (sizetype, 1);
+ step = size_one_node;
iv_base = iv->base;
iv_step = iv->step;
base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
&unsignedp, &volatilep, true);
base_type = TREE_TYPE (base);
- base_align = TYPE_ALIGN (base_type);
+ base_align = get_object_alignment (base);
+ base_align = MAX (base_align, TYPE_ALIGN (base_type));
if (mode != BLKmode)
{
- double_int mul;
- tree al = build_int_cst (TREE_TYPE (step),
- GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
+ unsigned mode_align = GET_MODE_ALIGNMENT (mode);
+
+ if (base_align < mode_align
+ || (bitpos % mode_align) != 0
+ || (bitpos % BITS_PER_UNIT) != 0)
+ return true;
- if (base_align < GET_MODE_ALIGNMENT (mode)
- || bitpos % GET_MODE_ALIGNMENT (mode) != 0
- || bitpos % BITS_PER_UNIT != 0)
+ if (toffset
+ && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
return true;
- if (!constant_multiple_of (step, al, &mul))
+ if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
return true;
}
/* Return true if EXPR may be non-addressable. */
-static bool
+bool
may_be_nonaddressable_p (tree expr)
{
switch (TREE_CODE (expr))
static void
find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
{
- tree base = *op_p, step = build_int_cst (sizetype, 0);
+ tree base = *op_p, step = size_zero_node;
struct iv *civ;
struct ifs_ivopts_data ifs_ivopts_data;
TMR_BASE (base) = civ->base;
step = civ->step;
}
+ if (TMR_INDEX2 (base)
+ && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
+ {
+ civ = get_iv (data, TMR_INDEX2 (base));
+ if (!civ)
+ goto fail;
+
+ TMR_INDEX2 (base) = civ->base;
+ step = civ->step;
+ }
if (TMR_INDEX (base)
&& TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
{
{
ifs_ivopts_data.ivopts_data = data;
ifs_ivopts_data.stmt = stmt;
- ifs_ivopts_data.step = build_int_cst (sizetype, 0);
+ ifs_ivopts_data.step = size_zero_node;
if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
|| integer_zerop (ifs_ivopts_data.step))
goto fail;
step = ifs_ivopts_data.step;
- gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
- gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
-
/* Check that the base expression is addressable. This needs
to be done after substituting bases of IVs into it. */
if (may_be_nonaddressable_p (base))
tree *ref = &TREE_OPERAND (base, 0);
while (handled_component_p (*ref))
ref = &TREE_OPERAND (*ref, 0);
- if (TREE_CODE (*ref) == INDIRECT_REF)
+ if (TREE_CODE (*ref) == MEM_REF)
{
- tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
+ tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
+ TREE_OPERAND (*ref, 0),
+ TREE_OPERAND (*ref, 1));
if (tem)
*ref = tem;
}
phi = gsi_stmt (psi);
def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
if (is_gimple_reg (def))
- find_interesting_uses_op (data, def);
+ find_interesting_uses_op (data, def);
}
}
expr = build_fold_addr_expr (op0);
return fold_convert (orig_type, expr);
- case INDIRECT_REF:
+ case MEM_REF:
+ /* ??? Offset operand? */
inside_addr = false;
break;
struct iv_cand *cand = NULL;
tree type, orig_type;
- if (base)
+ /* For non-original variables, make sure their values are computed in a type
+ that does not invoke undefined behavior on overflows (since in general,
+ we cannot prove that these induction variables are non-wrapping). */
+ if (pos != IP_ORIGINAL)
{
orig_type = TREE_TYPE (base);
type = generic_type_for (orig_type);
continue;
if (operand_equal_p (base, cand->iv->base, 0)
- && operand_equal_p (step, cand->iv->step, 0))
+ && operand_equal_p (step, cand->iv->step, 0)
+ && (TYPE_PRECISION (TREE_TYPE (base))
+ == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
break;
}
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
on invariants DEPENDS_ON and that the value used in expressing it
- is VALUE. */
+ is VALUE, and in case of iv elimination the comparison operator is COMP. */
static void
set_use_iv_cost (struct ivopts_data *data,
struct iv_use *use, struct iv_cand *cand,
- comp_cost cost, bitmap depends_on, tree value)
+ comp_cost cost, bitmap depends_on, tree value,
+ enum tree_code comp, int inv_expr_id)
{
unsigned i, s;
use->cost_map[cand->id].cost = cost;
use->cost_map[cand->id].depends_on = depends_on;
use->cost_map[cand->id].value = value;
+ use->cost_map[cand->id].comp = comp;
+ use->cost_map[cand->id].inv_expr_id = inv_expr_id;
return;
}
use->cost_map[i].cost = cost;
use->cost_map[i].depends_on = depends_on;
use->cost_map[i].value = value;
+ use->cost_map[i].comp = comp;
+ use->cost_map[i].inv_expr_id = inv_expr_id;
}
/* Gets cost of (USE, CANDIDATE) pair. */
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET,speed);
+ cost += set_src_cost (SET_SRC (set), speed);
else
cost++;
}
unsigned cost;
/* Avoid using hard regs in ways which may be unsupported. */
int regno = LAST_VIRTUAL_REGISTER + 1;
- enum function_frequency real_frequency = cfun->function_frequency;
+ struct cgraph_node *node = cgraph_get_node (current_function_decl);
+ enum node_frequency real_frequency = node->frequency;
- cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+ node->frequency = NODE_FREQUENCY_NORMAL;
crtl->maybe_hot_insn_p = speed;
walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
start_sequence ();
seq = get_insns ();
end_sequence ();
default_rtl_profile ();
- cfun->function_frequency = real_frequency;
+ node->frequency = real_frequency;
cost = seq_cost (seq, speed);
if (MEM_P (rslt))
cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
TYPE_ADDR_SPACE (type), speed);
+ else if (!REG_P (rslt))
+ cost += set_src_cost (rslt, speed);
return cost;
}
return cand->var_before;
}
-/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
- but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
-
-int
-tree_int_cst_sign_bit (const_tree t)
-{
- unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
- unsigned HOST_WIDE_INT w;
-
- if (bitno < HOST_BITS_PER_WIDE_INT)
- w = TREE_INT_CST_LOW (t);
- else
- {
- w = TREE_INT_CST_HIGH (t);
- bitno -= HOST_BITS_PER_WIDE_INT;
- }
-
- return (w >> bitno) & 1;
-}
-
/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
same precision that is at least as wide as the precision of TYPE, stores
BA to A and BB to B, and returns the type of BA. Otherwise, returns the
return get_computation_at (loop, use, cand, use->stmt);
}
+/* Adjust the cost COST for being in loop setup rather than loop body.
+ If we're optimizing for space, the loop setup overhead is constant;
+ if we're optimizing for speed, amortize it over the per-iteration cost. */
+static unsigned
+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
+{
+ if (cost == INFTY)
+ return cost;
+ else if (optimize_loop_for_speed_p (data->current_loop))
+ return cost / avg_loop_niter (data->current_loop);
+ else
+ return cost;
+}
+
/* Returns cost of addition in MODE. */
static unsigned
if (!data)
{
HOST_WIDE_INT i;
- HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
- HOST_WIDE_INT rat, off;
- int old_cse_not_expected;
+ HOST_WIDE_INT rat, off = 0;
+ int old_cse_not_expected, width;
unsigned sym_p, var_p, off_p, rat_p, add_c;
rtx seq, addr, base;
rtx reg0, reg1;
reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
+ width = GET_MODE_BITSIZE (address_mode) - 1;
+ if (width > (HOST_BITS_PER_WIDE_INT - 1))
+ width = HOST_BITS_PER_WIDE_INT - 1;
addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
- for (i = start; i <= 1 << 20; i <<= 1)
+
+ for (i = width; i >= 0; i--)
{
- XEXP (addr, 1) = gen_int_mode (i, address_mode);
- if (!memory_address_addr_space_p (mem_mode, addr, as))
+ off = -((HOST_WIDE_INT) 1 << i);
+ XEXP (addr, 1) = gen_int_mode (off, address_mode);
+ if (memory_address_addr_space_p (mem_mode, addr, as))
break;
}
- data->max_offset = i == start ? 0 : i >> 1;
- off = data->max_offset;
+ data->min_offset = (i == -1? 0 : off);
- for (i = start; i <= 1 << 20; i <<= 1)
+ for (i = width; i >= 0; i--)
{
- XEXP (addr, 1) = gen_int_mode (-i, address_mode);
- if (!memory_address_addr_space_p (mem_mode, addr, as))
+ off = ((HOST_WIDE_INT) 1 << i) - 1;
+ XEXP (addr, 1) = gen_int_mode (off, address_mode);
+ if (memory_address_addr_space_p (mem_mode, addr, as))
break;
}
- data->min_offset = i == start ? 0 : -(i >> 1);
+ if (i == -1)
+ off = 0;
+ data->max_offset = off;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "get_address_cost:\n");
- fprintf (dump_file, " min offset %s %d\n",
+ fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
GET_MODE_NAME (mem_mode),
- (int) data->min_offset);
- fprintf (dump_file, " max offset %s %d\n",
+ data->min_offset);
+ fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
GET_MODE_NAME (mem_mode),
- (int) data->max_offset);
+ data->max_offset);
}
rat = 1;
return new_cost (cost + acost, complexity);
}
+ /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
+ the EXPR operand holding the shift. COST0 and COST1 are the costs for
+ calculating the operands of EXPR. Returns true if successful, and returns
+ the cost in COST. */
+
+static bool
+get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
+ comp_cost cost1, tree mult, bool speed, comp_cost *cost)
+{
+ comp_cost res;
+ tree op1 = TREE_OPERAND (expr, 1);
+ tree cst = TREE_OPERAND (mult, 1);
+ tree multop = TREE_OPERAND (mult, 0);
+ int m = exact_log2 (int_cst_value (cst));
+ int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+ int sa_cost;
+
+ if (!(m >= 0 && m < maxm))
+ return false;
+
+ sa_cost = (TREE_CODE (expr) != MINUS_EXPR
+ ? shiftadd_cost[speed][mode][m]
+ : (mult == op1
+ ? shiftsub1_cost[speed][mode][m]
+ : shiftsub0_cost[speed][mode][m]));
+ res = new_cost (sa_cost, 0);
+ res = add_costs (res, mult == op1 ? cost0 : cost1);
+
+ STRIP_NOPS (multop);
+ if (!is_gimple_val (multop))
+ res = add_costs (res, force_expr_to_var_cost (multop, speed));
+
+ *cost = res;
+ return true;
+}
+
/* Estimates cost of forcing expression EXPR into a variable. */
static comp_cost
symbol_cost[i] = computation_cost (addr, i) + 1;
address_cost[i]
- = computation_cost (build2 (POINTER_PLUS_EXPR, type,
- addr,
- build_int_cst (sizetype, 2000)), i) + 1;
+ = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
case MINUS_EXPR:
case NEGATE_EXPR:
cost = new_cost (add_cost (mode, speed), 0);
+ if (TREE_CODE (expr) != NEGATE_EXPR)
+ {
+ tree mult = NULL_TREE;
+ comp_cost sa_cost;
+ if (TREE_CODE (op1) == MULT_EXPR)
+ mult = op1;
+ else if (TREE_CODE (op0) == MULT_EXPR)
+ mult = op0;
+
+ if (mult != NULL_TREE
+ && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
+ && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
+ &sa_cost))
+ return sa_cost;
+ }
break;
case MULT_EXPR:
return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
}
+/* Returns true if AFF1 and AFF2 are identical. */
+
+static bool
+compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
+{
+ unsigned i;
+
+ if (aff1->n != aff2->n)
+ return false;
+
+ for (i = 0; i < aff1->n; i++)
+ {
+ if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
+ return false;
+
+ if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
+ return false;
+ }
+ return true;
+}
+
+/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
+
+static int
+get_expr_id (struct ivopts_data *data, tree expr)
+{
+ struct iv_inv_expr_ent ent;
+ struct iv_inv_expr_ent **slot;
+
+ ent.expr = expr;
+ ent.hash = iterative_hash_expr (expr, 0);
+ slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
+ &ent, INSERT);
+ if (*slot)
+ return (*slot)->id;
+
+ *slot = XNEW (struct iv_inv_expr_ent);
+ (*slot)->expr = expr;
+ (*slot)->hash = ent.hash;
+ (*slot)->id = data->inv_expr_id++;
+ return (*slot)->id;
+}
+
+/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
+ requires a new compiler generated temporary. Returns -1 otherwise.
+ ADDRESS_P is a flag indicating if the expression is for address
+ computation. */
+
+static int
+get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
+ tree cbase, HOST_WIDE_INT ratio,
+ bool address_p)
+{
+ aff_tree ubase_aff, cbase_aff;
+ tree expr, ub, cb;
+
+ STRIP_NOPS (ubase);
+ STRIP_NOPS (cbase);
+ ub = ubase;
+ cb = cbase;
+
+ if ((TREE_CODE (ubase) == INTEGER_CST)
+ && (TREE_CODE (cbase) == INTEGER_CST))
+ return -1;
+
+ /* Strips the constant part. */
+ if (TREE_CODE (ubase) == PLUS_EXPR
+ || TREE_CODE (ubase) == MINUS_EXPR
+ || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
+ {
+ if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
+ ubase = TREE_OPERAND (ubase, 0);
+ }
+
+ /* Strips the constant part. */
+ if (TREE_CODE (cbase) == PLUS_EXPR
+ || TREE_CODE (cbase) == MINUS_EXPR
+ || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
+ {
+ if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
+ cbase = TREE_OPERAND (cbase, 0);
+ }
+
+ if (address_p)
+ {
+ if (((TREE_CODE (ubase) == SSA_NAME)
+ || (TREE_CODE (ubase) == ADDR_EXPR
+ && is_gimple_min_invariant (ubase)))
+ && (TREE_CODE (cbase) == INTEGER_CST))
+ return -1;
+
+ if (((TREE_CODE (cbase) == SSA_NAME)
+ || (TREE_CODE (cbase) == ADDR_EXPR
+ && is_gimple_min_invariant (cbase)))
+ && (TREE_CODE (ubase) == INTEGER_CST))
+ return -1;
+ }
+
+ if (ratio == 1)
+ {
+ if(operand_equal_p (ubase, cbase, 0))
+ return -1;
+
+ if (TREE_CODE (ubase) == ADDR_EXPR
+ && TREE_CODE (cbase) == ADDR_EXPR)
+ {
+ tree usym, csym;
+
+ usym = TREE_OPERAND (ubase, 0);
+ csym = TREE_OPERAND (cbase, 0);
+ if (TREE_CODE (usym) == ARRAY_REF)
+ {
+ tree ind = TREE_OPERAND (usym, 1);
+ if (TREE_CODE (ind) == INTEGER_CST
+ && host_integerp (ind, 0)
+ && TREE_INT_CST_LOW (ind) == 0)
+ usym = TREE_OPERAND (usym, 0);
+ }
+ if (TREE_CODE (csym) == ARRAY_REF)
+ {
+ tree ind = TREE_OPERAND (csym, 1);
+ if (TREE_CODE (ind) == INTEGER_CST
+ && host_integerp (ind, 0)
+ && TREE_INT_CST_LOW (ind) == 0)
+ csym = TREE_OPERAND (csym, 0);
+ }
+ if (operand_equal_p (usym, csym, 0))
+ return -1;
+ }
+ /* Now do more complex comparison */
+ tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
+ tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
+ if (compare_aff_trees (&ubase_aff, &cbase_aff))
+ return -1;
+ }
+
+ tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
+ tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
+
+ aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
+ aff_combination_add (&ubase_aff, &cbase_aff);
+ expr = aff_combination_to_tree (&ubase_aff);
+ return get_expr_id (data, expr);
+}
+
+
+
/* Determines the cost of the computation by that USE is expressed
from induction variable CAND. If ADDRESS_P is true, we just need
to create an address from it, otherwise we want to get it into
get_computation_cost_at (struct ivopts_data *data,
struct iv_use *use, struct iv_cand *cand,
bool address_p, bitmap *depends_on, gimple at,
- bool *can_autoinc)
+ bool *can_autoinc,
+ int *inv_expr_id)
{
tree ubase = use->iv->base, ustep = use->iv->step;
tree cbase, cstep;
STRIP_NOPS (cbase);
ctype = TREE_TYPE (cbase);
+ stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
+
/* use = ubase + ratio * (var - cbase). If either cbase is a constant
or ratio == 1, it is better to handle this like
ubase, build_int_cst (utype, 0),
&symbol_present, &var_present, &offset,
depends_on);
+ cost.cost /= avg_loop_niter (data->current_loop);
}
else if (ratio == 1)
{
+ tree real_cbase = cbase;
+
+ /* Check to see if any adjustment is needed. */
+ if (cstepi == 0 && stmt_is_after_inc)
+ {
+ aff_tree real_cbase_aff;
+ aff_tree cstep_aff;
+
+ tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
+ &real_cbase_aff);
+ tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
+
+ aff_combination_add (&real_cbase_aff, &cstep_aff);
+ real_cbase = aff_combination_to_tree (&real_cbase_aff);
+ }
+
cost = difference_cost (data,
- ubase, cbase,
+ ubase, real_cbase,
&symbol_present, &var_present, &offset,
depends_on);
+ cost.cost /= avg_loop_niter (data->current_loop);
}
else if (address_p
&& !POINTER_TYPE_P (ctype)
ubase, cbase,
&symbol_present, &var_present, &offset,
depends_on);
+ cost.cost /= avg_loop_niter (data->current_loop);
}
else
{
cost = force_var_cost (data, cbase, depends_on);
- cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
cost = add_costs (cost,
difference_cost (data,
ubase, build_int_cst (utype, 0),
&symbol_present, &var_present,
&offset, depends_on));
+ cost.cost /= avg_loop_niter (data->current_loop);
+ cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
+ }
+
+ if (inv_expr_id)
+ {
+ *inv_expr_id =
+ get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
+ /* Clear depends on. */
+ if (*inv_expr_id != -1 && depends_on && *depends_on)
+ bitmap_clear (*depends_on);
}
/* If we are after the increment, the value of the candidate is higher by
one iteration. */
- stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
if (stmt_is_after_inc)
offset -= ratio * cstepi;
/* Symbol + offset should be compile-time computable so consider that they
are added once to the variable, if present. */
if (var_present && (symbol_present || offset))
- cost.cost += add_cost (TYPE_MODE (ctype), speed)
- / AVG_LOOP_NITER (data->current_loop);
+ cost.cost += adjust_setup_cost (data,
+ add_cost (TYPE_MODE (ctype), speed));
/* Having offset does not affect runtime cost in case it is added to
symbol, but it increases complexity. */
aratio = ratio > 0 ? ratio : -ratio;
if (aratio != 1)
cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
+ return cost;
fallback:
if (can_autoinc)
return infinite_cost;
if (address_p)
- comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
+ comp = build_simple_mem_ref (comp);
return new_cost (computation_cost (comp, speed), 0);
}
static comp_cost
get_computation_cost (struct ivopts_data *data,
struct iv_use *use, struct iv_cand *cand,
- bool address_p, bitmap *depends_on, bool *can_autoinc)
+ bool address_p, bitmap *depends_on,
+ bool *can_autoinc, int *inv_expr_id)
{
return get_computation_cost_at (data,
use, cand, address_p, depends_on, use->stmt,
- can_autoinc);
+ can_autoinc, inv_expr_id);
}
/* Determines cost of basing replacement of USE on CAND in a generic
{
bitmap depends_on;
comp_cost cost;
+ int inv_expr_id = -1;
/* The simple case first -- if we need to express value of the preserved
original biv, the cost is 0. This also prevents us from counting the
if (cand->pos == IP_ORIGINAL
&& cand->incremented_at == use->stmt)
{
- set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
+ set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE,
+ ERROR_MARK, -1);
return true;
}
- cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
- set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
+ cost = get_computation_cost (data, use, cand, false, &depends_on,
+ NULL, &inv_expr_id);
+
+ set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
+ inv_expr_id);
return !infinite_cost_p (cost);
}
{
bitmap depends_on;
bool can_autoinc;
+ int inv_expr_id = -1;
comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
- &can_autoinc);
+ &can_autoinc, &inv_expr_id);
if (cand->ainc_use == use)
{
else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
cost = infinite_cost;
}
- set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
+ set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
+ inv_expr_id);
return !infinite_cost_p (cost);
}
gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
- /* Period of the iv is gcd (step, type range). Since type range is power
- of two, it suffices to determine the maximum power of two that divides
- step. */
- pow2div = num_ending_zeros (step);
type = unsigned_type_for (TREE_TYPE (step));
+ /* Period of the iv is lcm (step, type_range)/step -1,
+ i.e., N*type_range/step - 1. Since type range is power
+ of two, N == (step >> num_of_ending_zeros_binary (step),
+ so the final result is
+
+ (type_range >> num_of_ending_zeros_binary (step)) - 1
+
+ */
+ pow2div = num_ending_zeros (step);
period = build_low_bits_mask (type,
- (TYPE_PRECISION (type)
- - tree_low_cst (pow2div, 1)));
+ (TYPE_PRECISION (type)
+ - tree_low_cst (pow2div, 1)));
return period;
}
return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
}
+static tree
+strip_wrap_conserving_type_conversions (tree exp)
+{
+ while (tree_ssa_useless_type_conversion (exp)
+ && (nowrap_type_p (TREE_TYPE (exp))
+ == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
+ exp = TREE_OPERAND (exp, 0);
+ return exp;
+}
+
+/* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
+ check for an exact match. */
+
+static bool
+expr_equal_p (tree e, tree what)
+{
+ gimple stmt;
+ enum tree_code code;
+
+ e = strip_wrap_conserving_type_conversions (e);
+ what = strip_wrap_conserving_type_conversions (what);
+
+ code = TREE_CODE (what);
+ if (TREE_TYPE (e) != TREE_TYPE (what))
+ return false;
+
+ if (operand_equal_p (e, what, 0))
+ return true;
+
+ if (TREE_CODE (e) != SSA_NAME)
+ return false;
+
+ stmt = SSA_NAME_DEF_STMT (e);
+ if (gimple_code (stmt) != GIMPLE_ASSIGN
+ || gimple_assign_rhs_code (stmt) != code)
+ return false;
+
+ switch (get_gimple_rhs_class (code))
+ {
+ case GIMPLE_BINARY_RHS:
+ if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
+ return false;
+ /* Fallthru. */
+
+ case GIMPLE_UNARY_RHS:
+ case GIMPLE_SINGLE_RHS:
+ return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
+ default:
+ return false;
+ }
+}
+
+/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
+ we only detect the situation that BASE = SOMETHING + OFFSET, where the
+ calculation is performed in non-wrapping type.
+
+ TODO: More generally, we could test for the situation that
+ BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
+ This would require knowing the sign of OFFSET.
+
+ Also, we only look for the first addition in the computation of BASE.
+ More complex analysis would be better, but introducing it just for
+ this optimization seems like an overkill. */
+
+static bool
+difference_cannot_overflow_p (tree base, tree offset)
+{
+ enum tree_code code;
+ tree e1, e2;
+
+ if (!nowrap_type_p (TREE_TYPE (base)))
+ return false;
+
+ base = expand_simple_operations (base);
+
+ if (TREE_CODE (base) == SSA_NAME)
+ {
+ gimple stmt = SSA_NAME_DEF_STMT (base);
+
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ code = gimple_assign_rhs_code (stmt);
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+ return false;
+
+ e1 = gimple_assign_rhs1 (stmt);
+ e2 = gimple_assign_rhs2 (stmt);
+ }
+ else
+ {
+ code = TREE_CODE (base);
+ if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+ return false;
+ e1 = TREE_OPERAND (base, 0);
+ e2 = TREE_OPERAND (base, 1);
+ }
+
+ /* TODO: deeper inspection may be necessary to prove the equality. */
+ switch (code)
+ {
+ case PLUS_EXPR:
+ return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
+ case POINTER_PLUS_EXPR:
+ return expr_equal_p (e2, offset);
+
+ default:
+ return false;
+ }
+}
+
+/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
+ comparison with CAND. NITER describes the number of iterations of
+ the loops. If successful, the comparison in COMP_P is altered accordingly.
+
+ We aim to handle the following situation:
+
+ sometype *base, *p;
+ int a, b, i;
+
+ i = a;
+ p = p_0 = base + a;
+
+ do
+ {
+ bla (*p);
+ p++;
+ i++;
+ }
+ while (i < b);
+
+ Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
+ We aim to optimize this to
+
+ p = p_0 = base + a;
+ do
+ {
+ bla (*p);
+ p++;
+ }
+ while (p < p_0 - a + b);
+
+ This preserves the correctness, since the pointer arithmetics does not
+ overflow. More precisely:
+
+ 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
+ overflow in computing it or the values of p.
+ 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
+ overflow. To prove this, we use the fact that p_0 = base + a. */
+
+static bool
+iv_elimination_compare_lt (struct ivopts_data *data,
+ struct iv_cand *cand, enum tree_code *comp_p,
+ struct tree_niter_desc *niter)
+{
+ tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
+ struct affine_tree_combination nit, tmpa, tmpb;
+ enum tree_code comp;
+ HOST_WIDE_INT step;
+
+ /* We need to know that the candidate induction variable does not overflow.
+ While more complex analysis may be used to prove this, for now just
+ check that the variable appears in the original program and that it
+ is computed in a type that guarantees no overflows. */
+ cand_type = TREE_TYPE (cand->iv->base);
+ if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+ return false;
+
+ /* Make sure that the loop iterates till the loop bound is hit, as otherwise
+ the calculation of the BOUND could overflow, making the comparison
+ invalid. */
+ if (!data->loop_single_exit_p)
+ return false;
+
+ /* We need to be able to decide whether candidate is increasing or decreasing
+ in order to choose the right comparison operator. */
+ if (!cst_and_fits_in_hwi (cand->iv->step))
+ return false;
+ step = int_cst_value (cand->iv->step);
+
+ /* Check that the number of iterations matches the expected pattern:
+ a + 1 > b ? 0 : b - a - 1. */
+ mbz = niter->may_be_zero;
+ if (TREE_CODE (mbz) == GT_EXPR)
+ {
+ /* Handle a + 1 > b. */
+ tree op0 = TREE_OPERAND (mbz, 0);
+ if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
+ {
+ a = TREE_OPERAND (op0, 0);
+ b = TREE_OPERAND (mbz, 1);
+ }
+ else
+ return false;
+ }
+ else if (TREE_CODE (mbz) == LT_EXPR)
+ {
+ tree op1 = TREE_OPERAND (mbz, 1);
+
+ /* Handle b < a + 1. */
+ if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
+ {
+ a = TREE_OPERAND (op1, 0);
+ b = TREE_OPERAND (mbz, 0);
+ }
+ else
+ return false;
+ }
+ else
+ return false;
+
+ /* Expected number of iterations is B - A - 1. Check that it matches
+ the actual number, i.e., that B - A - NITER = 1. */
+ tree_to_aff_combination (niter->niter, nit_type, &nit);
+ tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
+ tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
+ aff_combination_scale (&nit, double_int_minus_one);
+ aff_combination_scale (&tmpa, double_int_minus_one);
+ aff_combination_add (&tmpb, &tmpa);
+ aff_combination_add (&tmpb, &nit);
+ if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
+ return false;
+
+ /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
+ overflow. */
+ offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
+ cand->iv->step,
+ fold_convert (TREE_TYPE (cand->iv->step), a));
+ if (!difference_cannot_overflow_p (cand->iv->base, offset))
+ return false;
+
+ /* Determine the new comparison operator. */
+ comp = step < 0 ? GT_EXPR : LT_EXPR;
+ if (*comp_p == NE_EXPR)
+ *comp_p = comp;
+ else if (*comp_p == EQ_EXPR)
+ *comp_p = invert_tree_comparison (comp, false);
+ else
+ gcc_unreachable ();
+
+ return true;
+}
+
/* Check whether it is possible to express the condition in USE by comparison
- of candidate CAND. If so, store the value compared with to BOUND. */
+ of candidate CAND. If so, store the value compared with to BOUND, and the
+ comparison operator to COMP. */
static bool
may_eliminate_iv (struct ivopts_data *data,
- struct iv_use *use, struct iv_cand *cand, tree *bound)
+ struct iv_use *use, struct iv_cand *cand, tree *bound,
+ enum tree_code *comp)
{
basic_block ex_bb;
edge exit;
- tree nit, period;
+ tree period;
struct loop *loop = data->current_loop;
aff_tree bnd;
+ struct tree_niter_desc *desc = NULL;
if (TREE_CODE (cand->iv->step) != INTEGER_CST)
return false;
if (flow_bb_inside_loop_p (loop, exit->dest))
return false;
- nit = niter_for_exit (data, exit);
- if (!nit)
+ desc = niter_for_exit (data, exit);
+ if (!desc)
return false;
/* Determine whether we can use the variable to test the exit condition.
period = iv_period (cand->iv);
/* If the number of iterations is constant, compare against it directly. */
- if (TREE_CODE (nit) == INTEGER_CST)
- {
- if (!tree_int_cst_lt (nit, period))
- return false;
+ if (TREE_CODE (desc->niter) == INTEGER_CST)
+ {
+ /* See cand_value_at. */
+ if (stmt_after_increment (loop, cand, use->stmt))
+ {
+ if (!tree_int_cst_lt (desc->niter, period))
+ return false;
+ }
+ else
+ {
+ if (tree_int_cst_lt (period, desc->niter))
+ return false;
+ }
}
/* If not, and if this is the only possible exit of the loop, see whether
we can get a conservative estimate on the number of iterations of the
entire loop and compare against that instead. */
- else if (loop_only_exit_p (loop, exit))
+ else
{
double_int period_value, max_niter;
- if (!estimated_loop_iterations (loop, true, &max_niter))
- return false;
- period_value = tree_to_double_int (period);
- if (double_int_ucmp (max_niter, period_value) >= 0)
- return false;
- }
-
- /* Otherwise, punt. */
- else
- return false;
- cand_value_at (loop, cand, use->stmt, nit, &bnd);
+ max_niter = desc->max;
+ if (stmt_after_increment (loop, cand, use->stmt))
+ max_niter = double_int_add (max_niter, double_int_one);
+ period_value = tree_to_double_int (period);
+ if (double_int_ucmp (max_niter, period_value) > 0)
+ {
+ /* See if we can take advantage of infered loop bound information. */
+ if (data->loop_single_exit_p)
+ {
+ if (!estimated_loop_iterations (loop, true, &max_niter))
+ return false;
+ /* The loop bound is already adjusted by adding 1. */
+ if (double_int_ucmp (max_niter, period_value) > 0)
+ return false;
+ }
+ else
+ return false;
+ }
+ }
+
+ cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
*bound = aff_combination_to_tree (&bnd);
+ *comp = iv_elimination_compare (data, use);
+
/* It is unlikely that computing the number of iterations using division
would be more profitable than keeping the original induction variable. */
if (expression_expensive_p (*bound))
return false;
+
+ /* Sometimes, it is possible to handle the situation that the number of
+ iterations may be zero unless additional assumtions by using <
+ instead of != in the exit condition.
+
+ TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
+ base the exit condition on it. However, that is often too
+ expensive. */
+ if (!integer_zerop (desc->may_be_zero))
+ return iv_elimination_compare_lt (data, cand, comp, desc);
+
return true;
}
+ /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
+ be copied, if is is used in the loop body and DATA->body_includes_call. */
+
+static int
+parm_decl_cost (struct ivopts_data *data, tree bound)
+{
+ tree sbound = bound;
+ STRIP_NOPS (sbound);
+
+ if (TREE_CODE (sbound) == SSA_NAME
+ && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
+ && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
+ && data->body_includes_call)
+ return COSTS_N_INSNS (1);
+
+ return 0;
+}
+
/* Determines cost of basing replacement of USE on CAND in a condition. */
static bool
tree bound = NULL_TREE;
struct iv *cmp_iv;
bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
- comp_cost elim_cost, express_cost, cost;
+ comp_cost elim_cost, express_cost, cost, bound_cost;
bool ok;
+ int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
tree *control_var, *bound_cst;
+ enum tree_code comp = ERROR_MARK;
/* Only consider real candidates. */
if (!cand->iv)
{
- set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
+ set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
+ ERROR_MARK, -1);
return false;
}
/* Try iv elimination. */
- if (may_eliminate_iv (data, use, cand, &bound))
+ if (may_eliminate_iv (data, use, cand, &bound, &comp))
{
elim_cost = force_var_cost (data, bound, &depends_on_elim);
+ if (elim_cost.cost == 0)
+ elim_cost.cost = parm_decl_cost (data, bound);
+ else if (TREE_CODE (bound) == INTEGER_CST)
+ elim_cost.cost = 0;
+ /* If we replace a loop condition 'i < n' with 'p < base + n',
+ depends_on_elim will have 'base' and 'n' set, which implies
+ that both 'base' and 'n' will be live during the loop. More likely,
+ 'base + n' will be loop invariant, resulting in only one live value
+ during the loop. So in that case we clear depends_on_elim and set
+ elim_inv_expr_id instead. */
+ if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
+ {
+ elim_inv_expr_id = get_expr_id (data, bound);
+ bitmap_clear (depends_on_elim);
+ }
/* The bound is a loop invariant, so it will be only computed
once. */
- elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
+ elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
}
else
elim_cost = infinite_cost;
TODO: The constant that we're substracting from the cost should
be target-dependent. This information should be added to the
target costs for each backend. */
- if (integer_zerop (*bound_cst)
+ if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
+ && integer_zerop (*bound_cst)
&& (operand_equal_p (*control_var, cand->var_after, 0)
|| operand_equal_p (*control_var, cand->var_before, 0)))
elim_cost.cost -= 1;
express_cost = get_computation_cost (data, use, cand, false,
- &depends_on_express, NULL);
+ &depends_on_express, NULL,
+ &express_inv_expr_id);
fd_ivopts_data = data;
walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
+ /* Count the cost of the original bound as well. */
+ bound_cost = force_var_cost (data, *bound_cst, NULL);
+ if (bound_cost.cost == 0)
+ bound_cost.cost = parm_decl_cost (data, *bound_cst);
+ else if (TREE_CODE (*bound_cst) == INTEGER_CST)
+ bound_cost.cost = 0;
+ express_cost.cost += bound_cost.cost;
+
/* Choose the better approach, preferring the eliminated IV. */
if (compare_costs (elim_cost, express_cost) <= 0)
{
cost = elim_cost;
depends_on = depends_on_elim;
depends_on_elim = NULL;
+ inv_expr_id = elim_inv_expr_id;
}
else
{
depends_on = depends_on_express;
depends_on_express = NULL;
bound = NULL_TREE;
+ comp = ERROR_MARK;
+ inv_expr_id = express_inv_expr_id;
}
- set_use_iv_cost (data, use, cand, cost, depends_on, bound);
+ set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
if (depends_on_elim)
BITMAP_FREE (depends_on_elim);
return false;
cost = get_computation_cost (data, use, cand, true, &depends_on,
- &can_autoinc);
+ &can_autoinc, NULL);
BITMAP_FREE (depends_on);
if (use->cost_map[j].depends_on)
bitmap_print (dump_file,
use->cost_map[j].depends_on, "","");
+ if (use->cost_map[j].inv_expr_id != -1)
+ fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
fprintf (dump_file, "\n");
}
base = cand->iv->base;
cost_base = force_var_cost (data, base, NULL);
+ /* It will be exceptional that the iv register happens to be initialized with
+ the proper value at no cost. In general, there will at least be a regcopy
+ or a const set. */
+ if (cost_base.cost == 0)
+ cost_base.cost = COSTS_N_INSNS (1);
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
- cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
+ cost = cost_step + adjust_setup_cost (data, cost_base.cost);
/* Prefer the original ivs unless we may gain something by replacing it.
The reason is to make debugging simpler; so this is not relevant for
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
- return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
+ data->body_includes_call);
}
/* For each size of the induction variable set determine the penalty. */
struct loop *loop = data->current_loop;
bitmap_iterator bi;
- /* We use the following model (definitely improvable, especially the
- cost function -- TODO):
-
- We estimate the number of registers available (using MD data), name it A.
-
- We estimate the number of registers used by the loop, name it U. This
- number is obtained as the number of loop phi nodes (not counting virtual
- registers and bivs) + the number of variables from outside of the loop.
-
- We set a reserve R (free regs that are used for temporary computations,
- etc.). For now the reserve is a constant 3.
-
- Let I be the number of induction variables.
-
- -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
- make a lot of ivs without a reason).
- -- if A - R < U + I <= A, the cost is I * PRES_COST
- -- if U + I > A, the cost is I * PRES_COST and
- number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
-
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
+ fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
return false;
}
+
+/* Returns candidate by that USE is expressed in IVS. */
+
+static struct cost_pair *
+iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
+{
+ return ivs->cand_for_use[use->id];
+}
+
/* Computes the cost field of IVS structure. */
static void
iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
{
comp_cost cost = ivs->cand_use_cost;
+
cost.cost += ivs->cand_cost;
- cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
+
+ cost.cost += ivopts_global_cost_for_size (data,
+ ivs->n_regs + ivs->num_used_inv_expr);
ivs->cost = cost;
}
{
ivs->n_invariant_uses[iid]--;
if (ivs->n_invariant_uses[iid] == 0)
- ivs->n_regs--;
+ ivs->n_regs--;
}
}
ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
iv_ca_set_remove_invariants (ivs, cp->depends_on);
+
+ if (cp->inv_expr_id != -1)
+ {
+ ivs->used_inv_expr[cp->inv_expr_id]--;
+ if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
+ ivs->num_used_inv_expr--;
+ }
iv_ca_recount_cost (data, ivs);
}
{
ivs->n_invariant_uses[iid]++;
if (ivs->n_invariant_uses[iid] == 1)
- ivs->n_regs++;
+ ivs->n_regs++;
}
}
ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
iv_ca_set_add_invariants (ivs, cp->depends_on);
+
+ if (cp->inv_expr_id != -1)
+ {
+ ivs->used_inv_expr[cp->inv_expr_id]++;
+ if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
+ ivs->num_used_inv_expr++;
+ }
iv_ca_recount_cost (data, ivs);
}
}
/* Extend set IVS by expressing USE by some of the candidates in it
- if possible. */
+ if possible. All important candidates will be considered
+ if IMPORTANT_CANDIDATES is true. */
static void
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
- struct iv_use *use)
+ struct iv_use *use, bool important_candidates)
{
struct cost_pair *best_cp = NULL, *cp;
bitmap_iterator bi;
+ bitmap cands;
unsigned i;
gcc_assert (ivs->upto >= use->id);
ivs->bad_uses++;
}
- EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
+ cands = (important_candidates ? data->important_candidates : ivs->cands);
+ EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
{
- cp = get_use_iv_cost (data, use, iv_cand (data, i));
+ struct iv_cand *cand = iv_cand (data, i);
+
+ cp = get_use_iv_cost (data, use, cand);
if (cheaper_cost_pair (cp, best_cp))
best_cp = cp;
return l1;
}
-/* Returns candidate by that USE is expressed in IVS. */
-
-static struct cost_pair *
-iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
-{
- return ivs->cand_for_use[use->id];
-}
-
/* Reverse the list of changes DELTA, forming the inverse to it. */
static struct iv_ca_delta *
nw->cand_cost = 0;
nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
nw->cost = zero_cost;
+ nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
+ nw->num_used_inv_expr = 0;
return nw;
}
free ((*ivs)->n_cand_uses);
BITMAP_FREE ((*ivs)->cands);
free ((*ivs)->n_invariant_uses);
+ free ((*ivs)->used_inv_expr);
free (*ivs);
*ivs = NULL;
}
unsigned i;
comp_cost cost = iv_ca_cost (ivs);
- fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
- bitmap_print (file, ivs->cands, " candidates ","\n");
+ fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
+ fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
+ ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
+ bitmap_print (file, ivs->cands, " candidates: ","\n");
+
+ for (i = 0; i < ivs->upto; i++)
+ {
+ struct iv_use *use = iv_use (data, i);
+ struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
+ if (cp)
+ fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
+ use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
+ else
+ fprintf (file, " use:%d --> ??\n", use->id);
+ }
for (i = 1; i <= data->max_inv_id; i++)
if (ivs->n_invariant_uses[i])
fprintf (file, "%s%d", pref, i);
pref = ", ";
}
- fprintf (file, "\n");
+ fprintf (file, "\n\n");
}
/* Try changing candidate in IVS to CAND for each use. Return cost of the
new set, and store differences in DELTA. Number of induction variables
- in the new set is stored to N_IVS. */
+ in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
+ the function will try to find a solution with mimimal iv candidates. */
static comp_cost
iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
struct iv_cand *cand, struct iv_ca_delta **delta,
- unsigned *n_ivs)
+ unsigned *n_ivs, bool min_ncand)
{
unsigned i;
comp_cost cost;
if (!new_cp)
continue;
- if (!iv_ca_has_deps (ivs, new_cp))
+ if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
continue;
- if (!cheaper_cost_pair (new_cp, old_cp))
- continue;
+ if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
+ continue;
*delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
}
cp = get_use_iv_cost (data, use, cnd);
if (!cp)
continue;
+
if (!iv_ca_has_deps (ivs, cp))
- continue;
+ continue;
if (!cheaper_cost_pair (cp, new_cp))
continue;
}
/* Tries to extend the sets IVS in the best possible way in order
- to express the USE. */
+ to express the USE. If ORIGINALP is true, prefer candidates from
+ the original set of IVs, otherwise favor important candidates not
+ based on any memory object. */
static bool
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
- struct iv_use *use)
+ struct iv_use *use, bool originalp)
{
comp_cost best_cost, act_cost;
unsigned i;
struct iv_ca_delta *best_delta = NULL, *act_delta;
struct cost_pair *cp;
- iv_ca_add_use (data, ivs, use);
+ iv_ca_add_use (data, ivs, use, false);
best_cost = iv_ca_cost (ivs);
cp = iv_ca_cand_for_use (ivs, use);
+ if (!cp)
+ {
+ ivs->upto--;
+ ivs->bad_uses--;
+ iv_ca_add_use (data, ivs, use, true);
+ best_cost = iv_ca_cost (ivs);
+ cp = iv_ca_cand_for_use (ivs, use);
+ }
if (cp)
{
best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
iv_ca_set_no_cp (data, ivs, use);
}
- /* First try important candidates not based on any memory object. Only if
+ /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
+ first try important candidates not based on any memory object. Only if
this fails, try the specific ones. Rationale -- in loops with many
variables the best choice often is to use just one generic biv. If we
added here many ivs specific to the uses, the optimization algorithm later
{
cand = iv_cand (data, i);
- if (cand->iv->base_object != NULL_TREE)
+ if (originalp && cand->pos !=IP_ORIGINAL)
continue;
- if (iv_ca_cand_used_p (ivs, cand))
+ if (!originalp && cand->iv->base_object != NULL_TREE)
continue;
+ if (iv_ca_cand_used_p (ivs, cand))
+ continue;
+
cp = get_use_iv_cost (data, use, cand);
if (!cp)
continue;
iv_ca_set_cp (data, ivs, use, cp);
- act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
+ act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
+ true);
iv_ca_set_no_cp (data, ivs, use);
act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
continue;
/* Already tried this. */
- if (cand->important && cand->iv->base_object == NULL_TREE)
- continue;
+ if (cand->important)
+ {
+ if (originalp && cand->pos == IP_ORIGINAL)
+ continue;
+ if (!originalp && cand->iv->base_object == NULL_TREE)
+ continue;
+ }
if (iv_ca_cand_used_p (ivs, cand))
continue;
act_delta = NULL;
iv_ca_set_cp (data, ivs, use, cp);
- act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
+ act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
iv_ca_set_no_cp (data, ivs, use);
act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
cp, act_delta);
/* Finds an initial assignment of candidates to uses. */
static struct iv_ca *
-get_initial_solution (struct ivopts_data *data)
+get_initial_solution (struct ivopts_data *data, bool originalp)
{
struct iv_ca *ivs = iv_ca_new (data);
unsigned i;
for (i = 0; i < n_iv_uses (data); i++)
- if (!try_add_cand_for (data, ivs, iv_use (data, i)))
+ if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
{
iv_ca_free (&ivs);
return NULL;
if (iv_ca_cand_used_p (ivs, cand))
continue;
- acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
+ acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
if (!act_delta)
continue;
solution and remove the unused ivs while this improves the cost. */
static struct iv_ca *
-find_optimal_iv_set (struct ivopts_data *data)
+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
{
- unsigned i;
struct iv_ca *set;
- struct iv_use *use;
/* Get the initial solution. */
- set = get_initial_solution (data);
+ set = get_initial_solution (data, originalp);
if (!set)
{
if (dump_file && (dump_flags & TDF_DETAILS))
}
}
+ return set;
+}
+
+static struct iv_ca *
+find_optimal_iv_set (struct ivopts_data *data)
+{
+ unsigned i;
+ struct iv_ca *set, *origset;
+ struct iv_use *use;
+ comp_cost cost, origcost;
+
+ /* Determine the cost based on a strategy that starts with original IVs,
+ and try again using a strategy that prefers candidates not based
+ on any IVs. */
+ origset = find_optimal_iv_set_1 (data, true);
+ set = find_optimal_iv_set_1 (data, false);
+
+ if (!origset && !set)
+ return NULL;
+
+ origcost = origset ? iv_ca_cost (origset) : infinite_cost;
+ cost = set ? iv_ca_cost (set) : infinite_cost;
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
- comp_cost cost = iv_ca_cost (set);
- fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
+ fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+ origcost.cost, origcost.complexity);
+ fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+ cost.cost, cost.complexity);
+ }
+
+ /* Choose the one with the best cost. */
+ if (compare_costs (origcost, cost) <= 0)
+ {
+ if (set)
+ iv_ca_free (&set);
+ set = origset;
}
+ else if (origset)
+ iv_ca_free (&origset);
for (i = 0; i < n_iv_uses (data); i++)
{
/* Rewrite the increment so that it uses var_before directly. */
find_interesting_uses_op (data, cand->var_after)->selected = cand;
-
return;
}
cand = iv_cand (data, i);
create_new_iv (data, cand);
}
-}
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nSelected IV set: \n");
+ EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
+ {
+ cand = iv_cand (data, i);
+ dump_cand (dump_file, cand);
+ }
+ fprintf (dump_file, "\n");
+ }
+}
/* Rewrites USE (definition of iv used in a nonlinear expression)
using candidate CAND. */
gcc_unreachable ();
}
- op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
- true, GSI_SAME_STMT);
+ if (!valid_gimple_rhs_p (comp)
+ || (gimple_code (use->stmt) != GIMPLE_PHI
+ /* We can't allow re-allocating the stmt as it might be pointed
+ to still. */
+ && (get_gimple_rhs_num_ops (TREE_CODE (comp))
+ >= gimple_num_ops (gsi_stmt (bsi)))))
+ {
+ comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ if (POINTER_TYPE_P (TREE_TYPE (tgt)))
+ {
+ duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
+ /* As this isn't a plain copy we have to reset alignment
+ information. */
+ if (SSA_NAME_PTR_INFO (comp))
+ {
+ SSA_NAME_PTR_INFO (comp)->align = 1;
+ SSA_NAME_PTR_INFO (comp)->misalign = 0;
+ }
+ }
+ }
if (gimple_code (use->stmt) == GIMPLE_PHI)
{
- ass = gimple_build_assign (tgt, op);
+ ass = gimple_build_assign (tgt, comp);
gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
bsi = gsi_for_stmt (use->stmt);
}
else
{
- gimple_assign_set_rhs_from_tree (&bsi, op);
+ gimple_assign_set_rhs_from_tree (&bsi, comp);
use->stmt = gsi_stmt (bsi);
}
}
-/* Replaces ssa name in index IDX by its basic variable. Callback for
- for_each_index. */
+/* Performs a peephole optimization to reorder the iv update statement with
+ a mem ref to enable instruction combining in later phases. The mem ref uses
+ the iv value before the update, so the reordering transformation requires
+ adjustment of the offset. CAND is the selected IV_CAND.
-static bool
-idx_remove_ssa_names (tree base, tree *idx,
- void *data ATTRIBUTE_UNUSED)
-{
- tree *op;
-
- if (TREE_CODE (*idx) == SSA_NAME)
- *idx = SSA_NAME_VAR (*idx);
+ Example:
- if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
- {
- op = &TREE_OPERAND (base, 2);
- if (*op
- && TREE_CODE (*op) == SSA_NAME)
- *op = SSA_NAME_VAR (*op);
- op = &TREE_OPERAND (base, 3);
- if (*op
- && TREE_CODE (*op) == SSA_NAME)
- *op = SSA_NAME_VAR (*op);
- }
+ t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
+ iv2 = iv1 + 1;
- return true;
-}
+ if (t < val) (1)
+ goto L;
+ goto Head;
-/* Unshares REF and replaces ssa names inside it by their basic variables. */
-static tree
-unshare_and_remove_ssa_names (tree ref)
-{
- ref = unshare_expr (ref);
- for_each_index (&ref, idx_remove_ssa_names, NULL);
+ directly propagating t over to (1) will introduce overlapping live range
+ thus increase register pressure. This peephole transform it into:
- return ref;
-}
-/* Copies the reference information from OLD_REF to NEW_REF. */
+ iv2 = iv1 + 1;
+ t = MEM_REF (base, iv2, 8, 8);
+ if (t < val)
+ goto L;
+ goto Head;
+*/
static void
-copy_ref_info (tree new_ref, tree old_ref)
+adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
{
- if (TREE_CODE (old_ref) == TARGET_MEM_REF)
- copy_mem_ref_info (new_ref, old_ref);
- else
- TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
+ tree var_after;
+ gimple iv_update, stmt;
+ basic_block bb;
+ gimple_stmt_iterator gsi, gsi_iv;
+
+ if (cand->pos != IP_NORMAL)
+ return;
+
+ var_after = cand->var_after;
+ iv_update = SSA_NAME_DEF_STMT (var_after);
+
+ bb = gimple_bb (iv_update);
+ gsi = gsi_last_nondebug_bb (bb);
+ stmt = gsi_stmt (gsi);
+
+ /* Only handle conditional statement for now. */
+ if (gimple_code (stmt) != GIMPLE_COND)
+ return;
+
+ gsi_prev_nondebug (&gsi);
+ stmt = gsi_stmt (gsi);
+ if (stmt != iv_update)
+ return;
+
+ gsi_prev_nondebug (&gsi);
+ if (gsi_end_p (gsi))
+ return;
+
+ stmt = gsi_stmt (gsi);
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return;
+
+ if (stmt != use->stmt)
+ return;
+
+ if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+ return;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Reordering \n");
+ print_gimple_stmt (dump_file, iv_update, 0, 0);
+ print_gimple_stmt (dump_file, use->stmt, 0, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ gsi = gsi_for_stmt (use->stmt);
+ gsi_iv = gsi_for_stmt (iv_update);
+ gsi_move_before (&gsi_iv, &gsi);
+
+ cand->pos = IP_BEFORE_USE;
+ cand->incremented_at = use->stmt;
}
/* Rewrites USE (address that is an iv) using candidate CAND. */
aff_tree aff;
gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
tree base_hint = NULL_TREE;
- tree ref;
+ tree ref, iv;
bool ok;
+ adjust_iv_update_pos (cand, use);
ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
gcc_assert (ok);
unshare_aff_combination (&aff);
if (cand->iv->base_object)
base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
- ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
- data->speed);
+ iv = var_at_stmt (data->current_loop, cand, use->stmt);
+ ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
+ reference_alias_ptr_type (*use->op_p),
+ iv, base_hint, data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
tree var_type = TREE_TYPE (var);
gimple_seq stmts;
- compare = iv_elimination_compare (data, use);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Replacing exit test: ");
+ print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
+ }
+ compare = cp->comp;
bound = unshare_expr (fold_convert (var_type, bound));
op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
if (stmts)
BITMAP_FREE (toremove);
}
+/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
+ for pointer_map_traverse. */
+
+static bool
+free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
+ void *data ATTRIBUTE_UNUSED)
+{
+ struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
+
+ free (niter);
+ return true;
+}
+
/* Frees data allocated by the optimization of a single loop. */
static void
if (data->niters)
{
+ pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
pointer_map_destroy (data->niters);
data->niters = NULL;
}
struct version_info *info;
info = ver_info (data, i);
- if (info->iv)
- free (info->iv);
+ free (info->iv);
info->iv = NULL;
info->has_nonlin_use = false;
info->preserve_biv = false;
{
struct iv_cand *cand = iv_cand (data, i);
- if (cand->iv)
- free (cand->iv);
+ free (cand->iv);
if (cand->depends_on)
BITMAP_FREE (cand->depends_on);
free (cand);
data->max_inv_id = 0;
- for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
+ FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
SET_DECL_RTL (obj, NULL_RTX);
VEC_truncate (tree, decl_rtl_to_reset, 0);
+
+ htab_empty (data->inv_expr_tab);
+ data->inv_expr_id = 0;
}
/* Finalizes data structures used by the iv optimization pass. LOOPS is the
VEC_free (tree, heap, decl_rtl_to_reset);
VEC_free (iv_use_p, heap, data->iv_uses);
VEC_free (iv_cand_p, heap, data->iv_candidates);
+ htab_delete (data->inv_expr_tab);
+}
+
+/* Returns true if the loop body BODY includes any function calls. */
+
+static bool
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
+{
+ gimple_stmt_iterator gsi;
+ unsigned i;
+
+ for (i = 0; i < num_nodes; i++)
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (is_gimple_call (stmt)
+ && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
+ return true;
+ }
+ return false;
}
/* Optimizes the LOOP. Returns true if anything changed. */
{
bool changed = false;
struct iv_ca *iv_ca;
- edge exit;
+ edge exit = single_dom_exit (loop);
basic_block *body;
gcc_assert (!data->niters);
{
fprintf (dump_file, "Processing loop %d\n", loop->num);
- exit = single_dom_exit (loop);
if (exit)
{
fprintf (dump_file, " single exit %d -> %d, exit condition ",
}
body = get_loop_body (loop);
+ data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
free (body);
+ data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
+
/* For each ssa name determines whether it behaves as an induction variable
in some loop. */
if (!find_induction_variables (data))