/* Loop invariant motion.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
- Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
+ Free Software Foundation, Inc.
This file is part of GCC.
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
-#include "rtl.h"
#include "tm_p.h"
-#include "hard-reg-set.h"
#include "basic-block.h"
#include "output.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "params.h"
#include "tree-pass.h"
#include "flags.h"
-#include "real.h"
#include "hashtab.h"
#include "tree-affine.h"
#include "pointer-set.h"
/* Minimum cost of an expensive expression. */
#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
-/* The outermost loop for that execution of the header guarantees that the
+/* The outermost loop for which execution of the header guarantees that the
block will be executed. */
#define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
+#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
static struct lim_aux_data *
init_lim_data (gimple stmt)
case SSA_NAME:
return cbck (*addr_p, addr_p, data);
- case MISALIGNED_INDIRECT_REF:
- case ALIGN_INDIRECT_REF:
- case INDIRECT_REF:
+ case MEM_REF:
nxt = &TREE_OPERAND (*addr_p, 0);
return cbck (*addr_p, nxt, data);
if (*idx
&& !cbck (*addr_p, idx, data))
return false;
+ idx = &TMR_INDEX2 (*addr_p);
+ if (*idx
+ && !cbck (*addr_p, idx, data))
+ return false;
return true;
default:
return MOVE_POSSIBLE;
}
+ if (gimple_code (stmt) == GIMPLE_PHI
+ && gimple_phi_num_args (stmt) <= 2
+ && is_gimple_reg (gimple_phi_result (stmt))
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
+ return MOVE_POSSIBLE;
+
if (gimple_get_lhs (stmt) == NULL_TREE)
return MOVE_IMPOSSIBLE;
unsigned cost = 1;
/* Always try to create possibilities for unswitching. */
- if (gimple_code (stmt) == GIMPLE_COND)
+ if (gimple_code (stmt) == GIMPLE_COND
+ || gimple_code (stmt) == GIMPLE_PHI)
return LIM_EXPENSIVE;
/* Hoisting memory references out should almost surely be a win. */
return ref;
}
+/* From a controlling predicate in DOM determine the arguments from
+ the PHI node PHI that are chosen if the predicate evaluates to
+ true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
+ they are non-NULL. Returns true if the arguments can be determined,
+ else return false. */
+
+static bool
+extract_true_false_args_from_phi (basic_block dom, gimple phi,
+ tree *true_arg_p, tree *false_arg_p)
+{
+ basic_block bb = gimple_bb (phi);
+ edge true_edge, false_edge, tem;
+ tree arg0 = NULL_TREE, arg1 = NULL_TREE;
+
+ /* We have to verify that one edge into the PHI node is dominated
+ by the true edge of the predicate block and the other edge
+ dominated by the false edge. This ensures that the PHI argument
+ we are going to take is completely determined by the path we
+ take from the predicate block.
+ We can only use BB dominance checks below if the destination of
+ the true/false edges are dominated by their edge, thus only
+ have a single predecessor. */
+ extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
+ tem = EDGE_PRED (bb, 0);
+ if (tem == true_edge
+ || (single_pred_p (true_edge->dest)
+ && (tem->src == true_edge->dest
+ || dominated_by_p (CDI_DOMINATORS,
+ tem->src, true_edge->dest))))
+ arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
+ else if (tem == false_edge
+ || (single_pred_p (false_edge->dest)
+ && (tem->src == false_edge->dest
+ || dominated_by_p (CDI_DOMINATORS,
+ tem->src, false_edge->dest))))
+ arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
+ else
+ return false;
+ tem = EDGE_PRED (bb, 1);
+ if (tem == true_edge
+ || (single_pred_p (true_edge->dest)
+ && (tem->src == true_edge->dest
+ || dominated_by_p (CDI_DOMINATORS,
+ tem->src, true_edge->dest))))
+ arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
+ else if (tem == false_edge
+ || (single_pred_p (false_edge->dest)
+ && (tem->src == false_edge->dest
+ || dominated_by_p (CDI_DOMINATORS,
+ tem->src, false_edge->dest))))
+ arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
+ else
+ return false;
+ if (!arg0 || !arg1)
+ return false;
+
+ if (true_arg_p)
+ *true_arg_p = arg0;
+ if (false_arg_p)
+ *false_arg_p = arg1;
+
+ return true;
+}
+
/* Determine the outermost loop to that it is possible to hoist a statement
STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
the outermost loop in that the value computed by STMT is invariant.
level = superloop_at_depth (loop, 1);
lim_data->max_loop = level;
- FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
- if (!add_dependency (val, lim_data, loop, true))
- return false;
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ use_operand_p use_p;
+ unsigned min_cost = UINT_MAX;
+ unsigned total_cost = 0;
+ struct lim_aux_data *def_data;
+
+ /* We will end up promoting dependencies to be unconditionally
+ evaluated. For this reason the PHI cost (and thus the
+ cost we remove from the loop by doing the invariant motion)
+ is that of the cheapest PHI argument dependency chain. */
+ FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
+ {
+ val = USE_FROM_PTR (use_p);
+ if (TREE_CODE (val) != SSA_NAME)
+ continue;
+ if (!add_dependency (val, lim_data, loop, false))
+ return false;
+ def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
+ if (def_data)
+ {
+ min_cost = MIN (min_cost, def_data->cost);
+ total_cost += def_data->cost;
+ }
+ }
+
+ lim_data->cost += min_cost;
+
+ if (gimple_phi_num_args (stmt) > 1)
+ {
+ basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gimple cond;
+ if (gsi_end_p (gsi_last_bb (dom)))
+ return false;
+ cond = gsi_stmt (gsi_last_bb (dom));
+ if (gimple_code (cond) != GIMPLE_COND)
+ return false;
+ /* Verify that this is an extended form of a diamond and
+ the PHI arguments are completely controlled by the
+ predicate in DOM. */
+ if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
+ return false;
+
+ /* Fold in dependencies and cost of the condition. */
+ FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
+ {
+ if (!add_dependency (val, lim_data, loop, false))
+ return false;
+ def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
+ if (def_data)
+ total_cost += def_data->cost;
+ }
+
+ /* We want to avoid unconditionally executing very expensive
+ operations. As costs for our dependencies cannot be
+ negative just claim we are not invariand for this case.
+ We also are not sure whether the control-flow inside the
+ loop will vanish. */
+ if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
+ && !(min_cost != 0
+ && total_cost / min_cost <= 2))
+ return false;
+
+ /* Assume that the control-flow in the loop will vanish.
+ ??? We should verify this and not artificially increase
+ the cost if that is not the case. */
+ lim_data->cost += stmt_cost (stmt);
+ }
+
+ return true;
+ }
+ else
+ FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
+ if (!add_dependency (val, lim_data, loop, true))
+ return false;
if (gimple_vuse (stmt))
{
add_referenced_var (var);
DECL_GIMPLE_REG_P (var) = 1;
- /* For vectors, create a VECTOR_CST full of 1's. */
- if (TREE_CODE (type) == VECTOR_TYPE)
- {
- int i, len;
- tree list = NULL_TREE;
- real_one = build_real (TREE_TYPE (type), dconst1);
- len = TYPE_VECTOR_SUBPARTS (type);
- for (i = 0; i < len; i++)
- list = tree_cons (NULL, real_one, list);
- real_one = build_vector (type, list);
- }
- else
- real_one = build_real (type, dconst1);
+ real_one = build_one_cst (type);
stmt1 = gimple_build_assign_with_ops (RDIV_EXPR,
var, real_one, gimple_assign_rhs2 (stmt));
fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
+ /* Look at PHI nodes, but only if there is at most two.
+ ??? We could relax this further by post-processing the inserted
+ code and transforming adjacent cond-exprs with the same predicate
+ to control flow again. */
+ bsi = gsi_start_phis (bb);
+ if (!gsi_end_p (bsi)
+ && ((gsi_next (&bsi), gsi_end_p (bsi))
+ || (gsi_next (&bsi), gsi_end_p (bsi))))
+ for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ stmt = gsi_stmt (bsi);
+
+ pos = movement_possibility (stmt);
+ if (pos == MOVE_IMPOSSIBLE)
+ continue;
+
+ lim_data = init_lim_data (stmt);
+ lim_data->always_executed_in = outermost;
+
+ if (!determine_max_movement (stmt, false))
+ {
+ lim_data->max_loop = NULL;
+ continue;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_gimple_stmt (dump_file, stmt, 2, 0);
+ fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
+ loop_depth (lim_data->max_loop),
+ lim_data->cost);
+ }
+
+ if (lim_data->cost >= LIM_EXPENSIVE)
+ set_profitable_level (stmt);
+ }
+
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
stmt = gsi_stmt (bsi);
for walk_dominator_tree. */
static void
-move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
+move_computations_stmt (struct dom_walk_data *dw_data,
basic_block bb)
{
struct loop *level;
if (!loop_outer (bb->loop_father))
return;
+ for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
+ {
+ gimple new_stmt;
+ stmt = gsi_stmt (bsi);
+
+ lim_data = get_lim_data (stmt);
+ if (lim_data == NULL)
+ {
+ gsi_next (&bsi);
+ continue;
+ }
+
+ cost = lim_data->cost;
+ level = lim_data->tgt_loop;
+ clear_lim_data (stmt);
+
+ if (!level)
+ {
+ gsi_next (&bsi);
+ continue;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Moving PHI node\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
+ cost, level->num);
+ }
+
+ if (gimple_phi_num_args (stmt) == 1)
+ {
+ tree arg = PHI_ARG_DEF (stmt, 0);
+ new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
+ gimple_phi_result (stmt),
+ arg, NULL_TREE);
+ SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
+ }
+ else
+ {
+ basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gimple cond = gsi_stmt (gsi_last_bb (dom));
+ tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
+ /* Get the PHI arguments corresponding to the true and false
+ edges of COND. */
+ extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
+ gcc_assert (arg0 && arg1);
+ t = build2 (gimple_cond_code (cond), boolean_type_node,
+ gimple_cond_lhs (cond), gimple_cond_rhs (cond));
+ new_stmt = gimple_build_assign_with_ops3 (COND_EXPR,
+ gimple_phi_result (stmt),
+ t, arg0, arg1);
+ SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
+ *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
+ }
+ gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
+ remove_phi_node (&bsi, false);
+ }
+
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
{
stmt = gsi_stmt (bsi);
/* Hoist the statements out of the loops prescribed by data stored in
LIM_DATA structures associated with each statement.*/
-static void
+static unsigned int
move_computations (void)
{
struct dom_walk_data walk_data;
+ unsigned int todo = 0;
memset (&walk_data, 0, sizeof (struct dom_walk_data));
+ walk_data.global_data = &todo;
walk_data.dom_direction = CDI_DOMINATORS;
walk_data.before_dom_children = move_computations_stmt;
gsi_commit_edge_inserts ();
if (need_ssa_update_p (cfun))
rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+
+ return todo;
}
/* Checks whether the statement defining variable *INDEX can be hoisted
if (!accs)
return;
- for (i = 0; VEC_iterate (mem_ref_loc_p, accs->locs, i, loc); i++)
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc)
free (loc);
VEC_free (mem_ref_loc_p, heap, accs->locs);
free (accs);
BITMAP_FREE (mem->indep_ref);
BITMAP_FREE (mem->dep_ref);
- for (i = 0; VEC_iterate (mem_ref_locs_p, mem->accesses_in_loop, i, accs); i++)
+ FOR_EACH_VEC_ELT (mem_ref_locs_p, mem->accesses_in_loop, i, accs)
free_mem_ref_locs (accs);
VEC_free (mem_ref_locs_p, heap, mem->accesses_in_loop);
create_vop_ref_mapping ();
}
-/* Returns true if a region of size SIZE1 at position 0 and a region of
- size SIZE2 at position DIFF cannot overlap. */
-
-static bool
-cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2)
-{
- double_int d, bound;
-
- /* Unless the difference is a constant, we fail. */
- if (diff->n != 0)
- return false;
-
- d = diff->offset;
- if (double_int_negative_p (d))
- {
- /* The second object is before the first one, we succeed if the last
- element of the second object is before the start of the first one. */
- bound = double_int_add (d, double_int_add (size2, double_int_minus_one));
- return double_int_negative_p (bound);
- }
- else
- {
- /* We succeed if the second object starts after the first one ends. */
- return double_int_scmp (size1, d) <= 0;
- }
-}
-
/* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
tree_to_aff_combination_expand. */
aff_combination_scale (&off1, double_int_minus_one);
aff_combination_add (&off2, &off1);
- if (cannot_overlap_p (&off2, size1, size2))
+ if (aff_comb_cannot_overlap_p (&off2, size1, size2))
return false;
return true;
accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num);
if (accs)
{
- for (i = 0; VEC_iterate (mem_ref_loc_p, accs->locs, i, loc); i++)
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc)
VEC_safe_push (mem_ref_loc_p, heap, *locs, loc);
}
}
VEC (mem_ref_loc_p, heap) *locs = NULL;
get_all_locs_in_loop (loop, ref, &locs);
- for (i = 0; VEC_iterate (mem_ref_loc_p, locs, i, loc); i++)
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
rewrite_mem_ref_loc (loc, tmp_var);
VEC_free (mem_ref_loc_p, heap, locs);
}
switch (TREE_CODE (ref))
{
- case MISALIGNED_INDIRECT_REF:
- case ALIGN_INDIRECT_REF:
- case INDIRECT_REF:
+ case MEM_REF:
+ case TARGET_MEM_REF:
gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
lsm_tmp_name_add ("_");
break;
+ case ADDR_EXPR:
+ gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
+ break;
+
case BIT_FIELD_REF:
case VIEW_CONVERT_EXPR:
case ARRAY_RANGE_REF:
all dependencies. */
gsi_insert_on_edge (loop_latch_edge (loop), load);
- for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
+ FOR_EACH_VEC_ELT (edge, exits, i, ex)
{
store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
gsi_insert_on_edge (ex, store);
}
}
-/* Returns true if REF is always accessed in LOOP. */
+/* Returns true if REF is always accessed in LOOP. If STORED_P is true
+ make sure REF is always stored to in LOOP. */
static bool
-ref_always_accessed_p (struct loop *loop, mem_ref_p ref)
+ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
{
VEC (mem_ref_loc_p, heap) *locs = NULL;
unsigned i;
mem_ref_loc_p loc;
bool ret = false;
struct loop *must_exec;
+ tree base;
+
+ base = get_base_address (ref->mem);
+ if (INDIRECT_REF_P (base)
+ || TREE_CODE (base) == MEM_REF)
+ base = TREE_OPERAND (base, 0);
get_all_locs_in_loop (loop, ref, &locs);
- for (i = 0; VEC_iterate (mem_ref_loc_p, locs, i, loc); i++)
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
{
if (!get_lim_data (loc->stmt))
continue;
+ /* If we require an always executed store make sure the statement
+ stores to the reference. */
+ if (stored_p)
+ {
+ tree lhs;
+ if (!gimple_get_lhs (loc->stmt))
+ continue;
+ lhs = get_base_address (gimple_get_lhs (loc->stmt));
+ if (!lhs)
+ continue;
+ if (INDIRECT_REF_P (lhs)
+ || TREE_CODE (lhs) == MEM_REF)
+ lhs = TREE_OPERAND (lhs, 0);
+ if (lhs != base)
+ continue;
+ }
+
must_exec = get_lim_data (loc->stmt)->always_executed_in;
if (!must_exec)
continue;
static bool
can_sm_ref_p (struct loop *loop, mem_ref_p ref)
{
+ tree base;
+
/* Unless the reference is stored in the loop, there is nothing to do. */
if (!bitmap_bit_p (ref->stored, loop->num))
return false;
|| !for_each_index (&ref->mem, may_move_till, loop))
return false;
- /* If it can trap, it must be always executed in LOOP. */
- if (tree_could_trap_p (ref->mem)
- && !ref_always_accessed_p (loop, ref))
+ /* If it can throw fail, we do not properly update EH info. */
+ if (tree_could_throw_p (ref->mem))
+ return false;
+
+ /* If it can trap, it must be always executed in LOOP.
+ Readonly memory locations may trap when storing to them, but
+ tree_could_trap_p is a predicate for rvalues, so check that
+ explicitly. */
+ base = get_base_address (ref->mem);
+ if ((tree_could_trap_p (ref->mem)
+ || (DECL_P (base) && TREE_READONLY (base)))
+ && !ref_always_accessed_p (loop, ref, true))
return false;
/* And it must be independent on all other memory references
unsigned i;
edge ex;
- for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
- if (ex->flags & EDGE_ABNORMAL)
+ FOR_EACH_VEC_ELT (edge, exits, i, ex)
+ if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
return false;
return true;
edge e;
struct loop *inn_loop = loop;
- if (!loop->header->aux)
+ if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
{
bbs = get_loop_body_in_dom_order (loop);
while (1)
{
- last->aux = loop;
+ SET_ALWAYS_EXECUTED_IN (last, loop);
if (last == loop->header)
break;
last = get_immediate_dominator (CDI_DOMINATORS, last);
htab_t h;
FOR_EACH_BB (bb)
- {
- bb->aux = NULL;
- }
+ SET_ALWAYS_EXECUTED_IN (bb, NULL);
pointer_map_destroy (lim_aux_data_map);
VEC_free (mem_ref_p, heap, memory_accesses.refs_list);
htab_delete (memory_accesses.refs);
- for (i = 0; VEC_iterate (bitmap, memory_accesses.refs_in_loop, i, b); i++)
+ FOR_EACH_VEC_ELT (bitmap, memory_accesses.refs_in_loop, i, b)
BITMAP_FREE (b);
VEC_free (bitmap, heap, memory_accesses.refs_in_loop);
- for (i = 0; VEC_iterate (bitmap, memory_accesses.all_refs_in_loop, i, b); i++)
+ FOR_EACH_VEC_ELT (bitmap, memory_accesses.all_refs_in_loop, i, b)
BITMAP_FREE (b);
VEC_free (bitmap, heap, memory_accesses.all_refs_in_loop);
- for (i = 0; VEC_iterate (bitmap, memory_accesses.clobbered_vops, i, b); i++)
+ FOR_EACH_VEC_ELT (bitmap, memory_accesses.clobbered_vops, i, b)
BITMAP_FREE (b);
VEC_free (bitmap, heap, memory_accesses.clobbered_vops);
- for (i = 0; VEC_iterate (htab_t, memory_accesses.vop_ref_map, i, h); i++)
+ FOR_EACH_VEC_ELT (htab_t, memory_accesses.vop_ref_map, i, h)
htab_delete (h);
VEC_free (htab_t, heap, memory_accesses.vop_ref_map);
/* Moves invariants from loops. Only "expensive" invariants are moved out --
i.e. those that are likely to be win regardless of the register pressure. */
-void
+unsigned int
tree_ssa_lim (void)
{
+ unsigned int todo;
+
tree_ssa_lim_initialize ();
/* Gathers information about memory accesses in the loops. */
store_motion ();
/* Move the expressions that are expensive enough. */
- move_computations ();
+ todo = move_computations ();
tree_ssa_lim_finalize ();
+
+ return todo;
}