/* The currently optimized loop. */
struct loop *current_loop;
+ /* Are we optimizing for speed? */
+ bool speed;
+
/* Number of registers used in it. */
unsigned regs_used;
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
void *data ATTRIBUTE_UNUSED)
{
- if (TREE_CODE (base) == ARRAY_REF)
+ if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
return false;
reference out of the loop (in order to take its address in strength
reduction). In order for this to work we need both lower bound
and step to be loop invariants. */
- if (TREE_CODE (base) == ARRAY_REF)
+ if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
+ /* Moreover, for a range, the size needs to be invariant as well. */
+ if (TREE_CODE (base) == ARRAY_RANGE_REF
+ && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
+ return false;
+
step = array_ref_element_size (base);
lbound = array_ref_low_bound (base);
if (integer_zerop (iv->step))
return true;
- if (TREE_CODE (base) == ARRAY_REF)
+ if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
step = array_ref_element_size (base);
{
struct ivopts_data *data = (struct ivopts_data *) vdata;
find_interesting_uses_op (data, *idx);
- if (TREE_CODE (base) == ARRAY_REF)
+ if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
find_interesting_uses_op (data, array_ref_element_size (base));
find_interesting_uses_op (data, array_ref_low_bound (base));
non-addressability may be uncovered again, causing ADDR_EXPRs
of inappropriate objects to be built. */
if (is_gimple_reg (TREE_OPERAND (expr, 0))
- || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
+ || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
return true;
/* ... fall through ... */
return fold_convert (orig_type, expr);
case ARRAY_REF:
+ case ARRAY_RANGE_REF:
if (!inside_addr)
return orig_expr;
add_candidate (data, iv->base, iv->step, true, NULL);
/* The same, but with initial value zero. */
- add_candidate (data,
- build_int_cst (TREE_TYPE (iv->base), 0),
- iv->step, true, NULL);
+ if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
+ add_candidate (data, size_int (0), iv->step, true, NULL);
+ else
+ add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
+ iv->step, true, NULL);
phi = SSA_NAME_DEF_STMT (iv->ssa_name);
if (gimple_code (phi) == GIMPLE_PHI)
add_candidate (data, build_int_cst (basetype, 0),
iv->step, true, use);
- /* Third, try removing the constant offset. */
+ /* Third, try removing the constant offset. Make sure to even
+ add a candidate for &a[0] vs. (T *)&a. */
base = strip_offset (iv->base, &offset);
- if (offset)
+ if (offset
+ || base != iv->base)
add_candidate (data, base, iv->step, false, use);
}
/* Returns estimate on cost of computing SEQ. */
static unsigned
-seq_cost (rtx seq)
+seq_cost (rtx seq, bool speed)
{
unsigned cost = 0;
rtx set;
{
set = single_set (seq);
if (set)
- cost += rtx_cost (set, SET);
+ cost += rtx_cost (set, SET,speed);
else
cost++;
}
/* Determines cost of the computation of EXPR. */
static unsigned
-computation_cost (tree expr)
+computation_cost (tree expr, bool speed)
{
rtx seq, rslt;
tree type = TREE_TYPE (expr);
unsigned cost;
/* Avoid using hard regs in ways which may be unsupported. */
int regno = LAST_VIRTUAL_REGISTER + 1;
+ enum function_frequency real_frequency = cfun->function_frequency;
+ cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+ crtl->maybe_hot_insn_p = speed;
walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
start_sequence ();
rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
seq = get_insns ();
end_sequence ();
+ default_rtl_profile ();
+ cfun->function_frequency = real_frequency;
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (MEM_P (rslt))
- cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
+ cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
return cost;
}
/* Returns cost of addition in MODE. */
static unsigned
-add_cost (enum machine_mode mode)
+add_cost (enum machine_mode mode, bool speed)
{
static unsigned costs[NUM_MACHINE_MODES];
rtx seq;
seq = get_insns ();
end_sequence ();
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (!cost)
cost = 1;
/* Returns cost of multiplication by constant CST in MODE. */
unsigned
-multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
+multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
{
static htab_t costs;
struct mbc_entry **cached, act;
seq = get_insns ();
end_sequence ();
- cost = seq_cost (seq);
+ cost = seq_cost (seq, speed);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
static comp_cost
get_address_cost (bool symbol_present, bool var_present,
unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
- enum machine_mode mem_mode)
+ enum machine_mode mem_mode,
+ bool speed)
{
static bool initialized[MAX_MACHINE_MODE];
static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
seq = get_insns ();
end_sequence ();
- acost = seq_cost (seq);
- acost += address_cost (addr, mem_mode);
+ acost = seq_cost (seq, speed);
+ acost += address_cost (addr, mem_mode, speed);
if (!acost)
acost = 1;
If VAR_PRESENT is true, try whether the mode with
SYMBOL_PRESENT = false is cheaper even with cost of addition, and
if this is the case, use it. */
- add_c = add_cost (Pmode);
+ add_c = add_cost (Pmode, speed);
for (i = 0; i < 8; i++)
{
var_p = i & 1;
&& multiplier_allowed_in_address_p (ratio, mem_mode));
if (ratio != 1 && !ratio_p)
- cost += multiply_by_cost (ratio, Pmode);
+ cost += multiply_by_cost (ratio, Pmode, speed);
if (s_offset && !offset_p && !symbol_present)
- cost += add_cost (Pmode);
+ cost += add_cost (Pmode, speed);
acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
/* Estimates cost of forcing expression EXPR into a variable. */
static comp_cost
-force_expr_to_var_cost (tree expr)
+force_expr_to_var_cost (tree expr, bool speed)
{
static bool costs_initialized = false;
- static unsigned integer_cost;
- static unsigned symbol_cost;
- static unsigned address_cost;
+ static unsigned integer_cost [2];
+ static unsigned symbol_cost [2];
+ static unsigned address_cost [2];
tree op0, op1;
comp_cost cost0, cost1, cost;
enum machine_mode mode;
tree type = build_pointer_type (integer_type_node);
tree var, addr;
rtx x;
+ int i;
var = create_tmp_var_raw (integer_type_node, "test_var");
TREE_STATIC (var) = 1;
x = produce_memory_decl_rtl (var, NULL);
SET_DECL_RTL (var, x);
- integer_cost = computation_cost (build_int_cst (integer_type_node,
- 2000));
-
addr = build1 (ADDR_EXPR, type, var);
- symbol_cost = computation_cost (addr) + 1;
- address_cost
- = computation_cost (build2 (POINTER_PLUS_EXPR, type,
- addr,
- build_int_cst (sizetype, 2000))) + 1;
- if (dump_file && (dump_flags & TDF_DETAILS))
+
+ for (i = 0; i < 2; i++)
{
- fprintf (dump_file, "force_expr_to_var_cost:\n");
- fprintf (dump_file, " integer %d\n", (int) integer_cost);
- fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
- fprintf (dump_file, " address %d\n", (int) address_cost);
- fprintf (dump_file, " other %d\n", (int) target_spill_cost);
- fprintf (dump_file, "\n");
+ integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
+ 2000), i);
+
+ symbol_cost[i] = computation_cost (addr, i) + 1;
+
+ address_cost[i]
+ = computation_cost (build2 (POINTER_PLUS_EXPR, type,
+ addr,
+ build_int_cst (sizetype, 2000)), i) + 1;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
+ fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
+ fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
+ fprintf (dump_file, " address %d\n", (int) address_cost[i]);
+ fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
+ fprintf (dump_file, "\n");
+ }
}
costs_initialized = true;
if (is_gimple_min_invariant (expr))
{
if (TREE_CODE (expr) == INTEGER_CST)
- return new_cost (integer_cost, 0);
+ return new_cost (integer_cost [speed], 0);
if (TREE_CODE (expr) == ADDR_EXPR)
{
if (TREE_CODE (obj) == VAR_DECL
|| TREE_CODE (obj) == PARM_DECL
|| TREE_CODE (obj) == RESULT_DECL)
- return new_cost (symbol_cost, 0);
+ return new_cost (symbol_cost [speed], 0);
}
- return new_cost (address_cost, 0);
+ return new_cost (address_cost [speed], 0);
}
switch (TREE_CODE (expr))
if (is_gimple_val (op0))
cost0 = zero_cost;
else
- cost0 = force_expr_to_var_cost (op0);
+ cost0 = force_expr_to_var_cost (op0, speed);
if (is_gimple_val (op1))
cost1 = zero_cost;
else
- cost1 = force_expr_to_var_cost (op1);
+ cost1 = force_expr_to_var_cost (op1, speed);
break;
default:
/* Just an arbitrary value, FIXME. */
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost[speed], 0);
}
mode = TYPE_MODE (TREE_TYPE (expr));
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
- cost = new_cost (add_cost (mode), 0);
+ cost = new_cost (add_cost (mode, speed), 0);
break;
case MULT_EXPR:
if (cst_and_fits_in_hwi (op0))
- cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
- else if (cst_and_fits_in_hwi (op1))
- cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
+ cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
+ else if (cst_and_fits_in_hwi (op1))
+ cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
else
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost [speed], 0);
break;
default:
computations often are either loop invariant or at least can
be shared between several iv uses, so letting this grow without
limits would not give reasonable results. */
- if (cost.cost > target_spill_cost)
- cost.cost = target_spill_cost;
+ if (cost.cost > target_spill_cost [speed])
+ cost.cost = target_spill_cost [speed];
return cost;
}
walk_tree (&expr, find_depends, depends_on, NULL);
}
- return force_expr_to_var_cost (expr);
+ return force_expr_to_var_cost (expr, data->speed);
}
/* Estimates cost of expressing address ADDR as var + symbol + offset. The
*var_present = true;
fd_ivopts_data = data;
walk_tree (&addr, find_depends, depends_on, NULL);
- return new_cost (target_spill_cost, 0);
+ return new_cost (target_spill_cost[data->speed], 0);
}
*offset += bitpos / BITS_PER_UNIT;
{
HOST_WIDE_INT diff = 0;
comp_cost cost;
+ bool speed = optimize_loop_for_speed_p (data->current_loop);
gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
- cost.cost += add_cost (Pmode);
+ cost.cost += add_cost (Pmode, speed);
return cost;
}
if (integer_zerop (e1))
{
cost = force_var_cost (data, e2, depends_on);
- cost.cost += multiply_by_cost (-1, mode);
+ cost.cost += multiply_by_cost (-1, mode, data->speed);
return cost;
}
cost = force_var_cost (data, e1, depends_on);
cost = add_costs (cost, force_var_cost (data, e2, depends_on));
- cost.cost += add_cost (mode);
+ cost.cost += add_cost (mode, data->speed);
return cost;
}
comp_cost cost;
unsigned n_sums;
double_int rat;
+ bool speed = optimize_bb_for_speed_p (gimple_bb (at));
*depends_on = NULL;
else
{
cost = force_var_cost (data, cbase, depends_on);
- cost.cost += add_cost (TYPE_MODE (ctype));
+ cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
cost = add_costs (cost,
difference_cost (data,
ubase, build_int_cst (utype, 0),
if (address_p)
return add_costs (cost, get_address_cost (symbol_present, var_present,
offset, ratio,
- TYPE_MODE (TREE_TYPE (*use->op_p))));
+ TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
/* Otherwise estimate the costs for computing the expression. */
aratio = ratio > 0 ? ratio : -ratio;
if (!symbol_present && !var_present && !offset)
{
if (ratio != 1)
- cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
+ cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
return cost;
}
if (aratio != 1)
- cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
+ cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
n_sums = 1;
if (var_present
if (offset)
cost.complexity++;
- cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
+ cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
return cost;
fallback:
if (address_p)
comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
- return new_cost (computation_cost (comp), 0);
+ return new_cost (computation_cost (comp, speed), 0);
}
}
return false;
cand_value_at (loop, cand, use->stmt, nit, &bnd);
+
*bound = aff_combination_to_tree (&bnd);
+ /* It is unlikely that computing the number of iterations using division
+ would be more profitable than keeping the original induction variable. */
+ if (expression_expensive_p (*bound))
+ return false;
return true;
}
fd_ivopts_data = data;
walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
- /* Choose the better approach. */
- if (compare_costs (elim_cost, express_cost) < 0)
+ /* Choose the better approach, preferring the eliminated IV. */
+ if (compare_costs (elim_cost, express_cost) <= 0)
{
cost = elim_cost;
depends_on = depends_on_elim;
base = cand->iv->base;
cost_base = force_var_cost (data, base, NULL);
- cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
+ cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
{
/* We add size to the cost, so that we prefer eliminating ivs
if possible. */
- return size + estimate_reg_pressure_cost (size, data->regs_used);
+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
}
/* For each size of the induction variable set determine the penalty. */
{
fprintf (dump_file, "Global costs:\n");
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
- fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
- fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
+ fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
+ fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
}
n = 0;
static comp_cost
iv_ca_cost (struct iv_ca *ivs)
{
- return (ivs->bad_uses ? infinite_cost : ivs->cost);
+ /* This was a conditional expression but it triggered a bug in
+ Sun C 5.5. */
+ if (ivs->bad_uses)
+ return infinite_cost;
+ else
+ return ivs->cost;
}
/* Returns true if all dependences of CP are among invariants in IVS. */
}
}
+/* Returns the phi-node in BB with result RESULT. */
+
+static gimple
+get_phi_with_result (basic_block bb, tree result)
+{
+ gimple_stmt_iterator i = gsi_start_phis (bb);
+
+ for (; !gsi_end_p (i); gsi_next (&i))
+ if (gimple_phi_result (gsi_stmt (i)) == result)
+ return gsi_stmt (i);
+
+ gcc_unreachable ();
+ return NULL;
+}
+
+
/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
is true, remove also the ssa name defined by the statement. */
static void
remove_statement (gimple stmt, bool including_defined_name)
{
- gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
-
if (gimple_code (stmt) == GIMPLE_PHI)
- remove_phi_node (&bsi, including_defined_name);
+ {
+ gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
+ gimple_phi_result (stmt));
+ gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
+ remove_phi_node (&bsi, including_defined_name);
+ }
else
{
+ gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
gsi_remove (&bsi, true);
release_defs (stmt);
}
if (TREE_CODE (*idx) == SSA_NAME)
*idx = SSA_NAME_VAR (*idx);
- if (TREE_CODE (base) == ARRAY_REF)
+ if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
op = &TREE_OPERAND (base, 2);
if (*op
gcc_assert (ok);
unshare_aff_combination (&aff);
- ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
+ ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
copy_ref_info (ref, *use->op_p);
*use->op_p = ref;
}
{
tree var = var_at_stmt (data->current_loop, cand, use->stmt);
tree var_type = TREE_TYPE (var);
+ gimple_seq stmts;
compare = iv_elimination_compare (data, use);
bound = unshare_expr (fold_convert (var_type, bound));
- op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
- true, GSI_SAME_STMT);
+ op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
+ if (stmts)
+ gsi_insert_seq_on_edge_immediate (
+ loop_preheader_edge (data->current_loop),
+ stmts);
gimple_cond_set_lhs (use->stmt, var);
gimple_cond_set_code (use->stmt, compare);
gcc_assert (!data->niters);
data->current_loop = loop;
+ data->speed = optimize_loop_for_speed_p (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
{