X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Ftree-ssa-loop-im.c;h=ce5eb208850c60f96ed880e1430b1ff6b6212354;hp=825fb71760a09c10fefb36ed0b40f4549f8b34ab;hb=404e61fce5e33fa6b167ce85dc55fa0ce01895f6;hpb=307f7fdab3d257c2d86a68133d5bba4c9e2af961 diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 825fb71760a..ce5eb208850 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1,6 +1,6 @@ /* Loop invariant motion. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software - Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010 + Free Software Foundation, Inc. This file is part of GCC. @@ -23,12 +23,11 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "tree.h" -#include "rtl.h" #include "tm_p.h" -#include "hard-reg-set.h" #include "basic-block.h" #include "output.h" -#include "diagnostic.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" @@ -37,7 +36,6 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "tree-pass.h" #include "flags.h" -#include "real.h" #include "hashtab.h" #include "tree-affine.h" #include "pointer-set.h" @@ -137,8 +135,6 @@ typedef struct mem_ref VEC (mem_ref_locs_p, heap) *accesses_in_loop; /* The locations of the accesses. Vector indexed by the loop number. */ - bitmap vops; /* Vops corresponding to this memory - location. */ /* The following sets are computed on demand. We keep both set and its complement, so that we know whether the information was @@ -154,7 +150,7 @@ typedef struct mem_ref bitmap indep_ref; /* The set of memory references on that this reference is independent. */ - bitmap dep_ref; /* The complement of DEP_REF. */ + bitmap dep_ref; /* The complement of INDEP_REF. */ } *mem_ref_p; DEF_VEC_P(mem_ref_p); @@ -183,12 +179,9 @@ static struct subloops. */ VEC (bitmap, heap) *all_refs_in_loop; - /* The set of virtual operands clobbered in a given loop. */ - VEC (bitmap, heap) *clobbered_vops; - - /* Map from the pair (loop, virtual operand) to the set of refs that - touch the virtual operand in the loop. */ - VEC (htab_t, heap) *vop_ref_map; + /* The set of memory references stored in each loop, including + subloops. */ + VEC (bitmap, heap) *all_refs_stored_in_loop; /* Cache for expanding memory addresses. */ struct pointer_map_t *ttae_cache; @@ -199,9 +192,13 @@ static bool ref_indep_loop_p (struct loop *, mem_ref_p); /* Minimum cost of an expensive expression. */ #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE)) -/* The outermost loop for that execution of the header guarantees that the +/* The outermost loop for which execution of the header guarantees that the block will be executed. */ #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux) +#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) + +/* Whether the reference was analyzable. */ +#define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node) static struct lim_aux_data * init_lim_data (gimple stmt) @@ -274,9 +271,7 @@ for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data) case SSA_NAME: return cbck (*addr_p, addr_p, data); - case MISALIGNED_INDIRECT_REF: - case ALIGN_INDIRECT_REF: - case INDIRECT_REF: + case MEM_REF: nxt = &TREE_OPERAND (*addr_p, 0); return cbck (*addr_p, nxt, data); @@ -330,6 +325,10 @@ for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data) if (*idx && !cbck (*addr_p, idx, data)) return false; + idx = &TMR_INDEX2 (*addr_p); + if (*idx + && !cbck (*addr_p, idx, data)) + return false; return true; default: @@ -359,6 +358,12 @@ movement_possibility (gimple stmt) return MOVE_POSSIBLE; } + if (gimple_code (stmt) == GIMPLE_PHI + && gimple_phi_num_args (stmt) <= 2 + && is_gimple_reg (gimple_phi_result (stmt)) + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt))) + return MOVE_POSSIBLE; + if (gimple_get_lhs (stmt) == NULL_TREE) return MOVE_IMPOSSIBLE; @@ -407,6 +412,26 @@ movement_possibility (gimple stmt) || gimple_could_trap_p (stmt)) return MOVE_PRESERVE_EXECUTION; + /* Non local loads in a transaction cannot be hoisted out. Well, + unless the load happens on every path out of the loop, but we + don't take this into account yet. */ + if (flag_tm + && gimple_in_transaction (stmt) + && gimple_assign_single_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + if (DECL_P (rhs) && is_global_var (rhs)) + { + if (dump_file) + { + fprintf (dump_file, "Cannot hoist conditional load of "); + print_generic_expr (dump_file, rhs, TDF_SLIM); + fprintf (dump_file, " because it is in a transaction.\n"); + } + return MOVE_IMPOSSIBLE; + } + } + return ret; } @@ -502,27 +527,21 @@ add_dependency (tree def, struct lim_aux_data *data, struct loop *loop, return true; } -/* Returns an estimate for a cost of statement STMT. TODO -- the values here - are just ad-hoc constants. The estimates should be based on target-specific - values. */ +/* Returns an estimate for a cost of statement STMT. The values here + are just ad-hoc constants, similar to costs for inlining. */ static unsigned stmt_cost (gimple stmt) { - tree fndecl; - unsigned cost = 1; - /* Always try to create possibilities for unswitching. */ - if (gimple_code (stmt) == GIMPLE_COND) + if (gimple_code (stmt) == GIMPLE_COND + || gimple_code (stmt) == GIMPLE_PHI) return LIM_EXPENSIVE; - /* Hoisting memory references out should almost surely be a win. */ - if (gimple_references_memory_p (stmt)) - cost += 20; - + /* We should be hoisting calls if possible. */ if (is_gimple_call (stmt)) { - /* We should be hoisting calls if possible. */ + tree fndecl; /* Unless the call is a builtin_constant_p; this always folds to a constant, so moving it is useless. */ @@ -532,15 +551,24 @@ stmt_cost (gimple stmt) && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P) return 0; - return cost + 20; + return LIM_EXPENSIVE; } + /* Hoisting memory references out should almost surely be a win. */ + if (gimple_references_memory_p (stmt)) + return LIM_EXPENSIVE; + if (gimple_code (stmt) != GIMPLE_ASSIGN) - return cost; + return 1; switch (gimple_assign_rhs_code (stmt)) { case MULT_EXPR: + case WIDEN_MULT_EXPR: + case WIDEN_MULT_PLUS_EXPR: + case WIDEN_MULT_MINUS_EXPR: + case DOT_PROD_EXPR: + case FMA_EXPR: case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: @@ -552,19 +580,31 @@ stmt_cost (gimple stmt) case TRUNC_MOD_EXPR: case RDIV_EXPR: /* Division and multiplication are usually expensive. */ - cost += 20; - break; + return LIM_EXPENSIVE; case LSHIFT_EXPR: case RSHIFT_EXPR: - cost += 20; - break; + case WIDEN_LSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: + /* Shifts and rotates are usually expensive. */ + return LIM_EXPENSIVE; + + case CONSTRUCTOR: + /* Make vector construction cost proportional to the number + of elements. */ + return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)); + + case SSA_NAME: + case PAREN_EXPR: + /* Whether or not something is wrapped inside a PAREN_EXPR + should not change move cost. Nor should an intermediate + unpropagated SSA name copy. */ + return 0; default: - break; + return 1; } - - return cost; } /* Finds the outermost loop between OUTER and LOOP in that the memory reference @@ -651,6 +691,70 @@ mem_ref_in_stmt (gimple stmt) return ref; } +/* From a controlling predicate in DOM determine the arguments from + the PHI node PHI that are chosen if the predicate evaluates to + true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if + they are non-NULL. Returns true if the arguments can be determined, + else return false. */ + +static bool +extract_true_false_args_from_phi (basic_block dom, gimple phi, + tree *true_arg_p, tree *false_arg_p) +{ + basic_block bb = gimple_bb (phi); + edge true_edge, false_edge, tem; + tree arg0 = NULL_TREE, arg1 = NULL_TREE; + + /* We have to verify that one edge into the PHI node is dominated + by the true edge of the predicate block and the other edge + dominated by the false edge. This ensures that the PHI argument + we are going to take is completely determined by the path we + take from the predicate block. + We can only use BB dominance checks below if the destination of + the true/false edges are dominated by their edge, thus only + have a single predecessor. */ + extract_true_false_edges_from_block (dom, &true_edge, &false_edge); + tem = EDGE_PRED (bb, 0); + if (tem == true_edge + || (single_pred_p (true_edge->dest) + && (tem->src == true_edge->dest + || dominated_by_p (CDI_DOMINATORS, + tem->src, true_edge->dest)))) + arg0 = PHI_ARG_DEF (phi, tem->dest_idx); + else if (tem == false_edge + || (single_pred_p (false_edge->dest) + && (tem->src == false_edge->dest + || dominated_by_p (CDI_DOMINATORS, + tem->src, false_edge->dest)))) + arg1 = PHI_ARG_DEF (phi, tem->dest_idx); + else + return false; + tem = EDGE_PRED (bb, 1); + if (tem == true_edge + || (single_pred_p (true_edge->dest) + && (tem->src == true_edge->dest + || dominated_by_p (CDI_DOMINATORS, + tem->src, true_edge->dest)))) + arg0 = PHI_ARG_DEF (phi, tem->dest_idx); + else if (tem == false_edge + || (single_pred_p (false_edge->dest) + && (tem->src == false_edge->dest + || dominated_by_p (CDI_DOMINATORS, + tem->src, false_edge->dest)))) + arg1 = PHI_ARG_DEF (phi, tem->dest_idx); + else + return false; + if (!arg0 || !arg1) + return false; + + if (true_arg_p) + *true_arg_p = arg0; + if (false_arg_p) + *false_arg_p = arg1; + + return true; +} + /* Determine the outermost loop to that it is possible to hoist a statement STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine the outermost loop in that the value computed by STMT is invariant. @@ -677,9 +781,81 @@ determine_max_movement (gimple stmt, bool must_preserve_exec) level = superloop_at_depth (loop, 1); lim_data->max_loop = level; - FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) - if (!add_dependency (val, lim_data, loop, true)) - return false; + if (gimple_code (stmt) == GIMPLE_PHI) + { + use_operand_p use_p; + unsigned min_cost = UINT_MAX; + unsigned total_cost = 0; + struct lim_aux_data *def_data; + + /* We will end up promoting dependencies to be unconditionally + evaluated. For this reason the PHI cost (and thus the + cost we remove from the loop by doing the invariant motion) + is that of the cheapest PHI argument dependency chain. */ + FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE) + { + val = USE_FROM_PTR (use_p); + if (TREE_CODE (val) != SSA_NAME) + continue; + if (!add_dependency (val, lim_data, loop, false)) + return false; + def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); + if (def_data) + { + min_cost = MIN (min_cost, def_data->cost); + total_cost += def_data->cost; + } + } + + lim_data->cost += min_cost; + + if (gimple_phi_num_args (stmt) > 1) + { + basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); + gimple cond; + if (gsi_end_p (gsi_last_bb (dom))) + return false; + cond = gsi_stmt (gsi_last_bb (dom)); + if (gimple_code (cond) != GIMPLE_COND) + return false; + /* Verify that this is an extended form of a diamond and + the PHI arguments are completely controlled by the + predicate in DOM. */ + if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL)) + return false; + + /* Fold in dependencies and cost of the condition. */ + FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE) + { + if (!add_dependency (val, lim_data, loop, false)) + return false; + def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); + if (def_data) + total_cost += def_data->cost; + } + + /* We want to avoid unconditionally executing very expensive + operations. As costs for our dependencies cannot be + negative just claim we are not invariand for this case. + We also are not sure whether the control-flow inside the + loop will vanish. */ + if (total_cost - min_cost >= 2 * LIM_EXPENSIVE + && !(min_cost != 0 + && total_cost / min_cost <= 2)) + return false; + + /* Assume that the control-flow in the loop will vanish. + ??? We should verify this and not artificially increase + the cost if that is not the case. */ + lim_data->cost += stmt_cost (stmt); + } + + return true; + } + else + FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) + if (!add_dependency (val, lim_data, loop, true)) + return false; if (gimple_vuse (stmt)) { @@ -774,19 +950,7 @@ rewrite_reciprocal (gimple_stmt_iterator *bsi) add_referenced_var (var); DECL_GIMPLE_REG_P (var) = 1; - /* For vectors, create a VECTOR_CST full of 1's. */ - if (TREE_CODE (type) == VECTOR_TYPE) - { - int i, len; - tree list = NULL_TREE; - real_one = build_real (TREE_TYPE (type), dconst1); - len = TYPE_VECTOR_SUBPARTS (type); - for (i = 0; i < len; i++) - list = tree_cons (NULL, real_one, list); - real_one = build_vector (type, list); - } - else - real_one = build_real (type, dconst1); + real_one = build_one_cst (type); stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, var, real_one, gimple_assign_rhs2 (stmt)); @@ -920,6 +1084,43 @@ determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n", bb->index, bb->loop_father->num, loop_depth (bb->loop_father)); + /* Look at PHI nodes, but only if there is at most two. + ??? We could relax this further by post-processing the inserted + code and transforming adjacent cond-exprs with the same predicate + to control flow again. */ + bsi = gsi_start_phis (bb); + if (!gsi_end_p (bsi) + && ((gsi_next (&bsi), gsi_end_p (bsi)) + || (gsi_next (&bsi), gsi_end_p (bsi)))) + for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) + { + stmt = gsi_stmt (bsi); + + pos = movement_possibility (stmt); + if (pos == MOVE_IMPOSSIBLE) + continue; + + lim_data = init_lim_data (stmt); + lim_data->always_executed_in = outermost; + + if (!determine_max_movement (stmt, false)) + { + lim_data->max_loop = NULL; + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + print_gimple_stmt (dump_file, stmt, 2, 0); + fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", + loop_depth (lim_data->max_loop), + lim_data->cost); + } + + if (lim_data->cost >= LIM_EXPENSIVE) + set_profitable_level (stmt); + } + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) { stmt = gsi_stmt (bsi); @@ -1021,7 +1222,7 @@ determine_invariantness (void) for walk_dominator_tree. */ static void -move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, +move_computations_stmt (struct dom_walk_data *dw_data, basic_block bb) { struct loop *level; @@ -1033,6 +1234,65 @@ move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, if (!loop_outer (bb->loop_father)) return; + for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) + { + gimple new_stmt; + stmt = gsi_stmt (bsi); + + lim_data = get_lim_data (stmt); + if (lim_data == NULL) + { + gsi_next (&bsi); + continue; + } + + cost = lim_data->cost; + level = lim_data->tgt_loop; + clear_lim_data (stmt); + + if (!level) + { + gsi_next (&bsi); + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Moving PHI node\n"); + print_gimple_stmt (dump_file, stmt, 0, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", + cost, level->num); + } + + if (gimple_phi_num_args (stmt) == 1) + { + tree arg = PHI_ARG_DEF (stmt, 0); + new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg), + gimple_phi_result (stmt), + arg, NULL_TREE); + SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; + } + else + { + basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); + gimple cond = gsi_stmt (gsi_last_bb (dom)); + tree arg0 = NULL_TREE, arg1 = NULL_TREE, t; + /* Get the PHI arguments corresponding to the true and false + edges of COND. */ + extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1); + gcc_assert (arg0 && arg1); + t = build2 (gimple_cond_code (cond), boolean_type_node, + gimple_cond_lhs (cond), gimple_cond_rhs (cond)); + new_stmt = gimple_build_assign_with_ops3 (COND_EXPR, + gimple_phi_result (stmt), + t, arg0, arg1); + SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; + *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg; + } + gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); + remove_phi_node (&bsi, false); + } + for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ) { stmt = gsi_stmt (bsi); @@ -1076,12 +1336,14 @@ move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, /* Hoist the statements out of the loops prescribed by data stored in LIM_DATA structures associated with each statement.*/ -static void +static unsigned int move_computations (void) { struct dom_walk_data walk_data; + unsigned int todo = 0; memset (&walk_data, 0, sizeof (struct dom_walk_data)); + walk_data.global_data = &todo; walk_data.dom_direction = CDI_DOMINATORS; walk_data.before_dom_children = move_computations_stmt; @@ -1092,6 +1354,8 @@ move_computations (void) gsi_commit_edge_inserts (); if (need_ssa_update_p (cfun)) rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + + return todo; } /* Checks whether the statement defining variable *INDEX can be hoisted @@ -1207,7 +1471,7 @@ free_mem_ref_locs (mem_ref_locs_p accs) if (!accs) return; - for (i = 0; VEC_iterate (mem_ref_loc_p, accs->locs, i, loc); i++) + FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc) free (loc); VEC_free (mem_ref_loc_p, heap, accs->locs); free (accs); @@ -1228,11 +1492,10 @@ memref_free (void *obj) BITMAP_FREE (mem->indep_ref); BITMAP_FREE (mem->dep_ref); - for (i = 0; VEC_iterate (mem_ref_locs_p, mem->accesses_in_loop, i, accs); i++) + FOR_EACH_VEC_ELT (mem_ref_locs_p, mem->accesses_in_loop, i, accs) free_mem_ref_locs (accs); VEC_free (mem_ref_locs_p, heap, mem->accesses_in_loop); - BITMAP_FREE (mem->vops); free (mem); } @@ -1252,7 +1515,6 @@ mem_ref_alloc (tree mem, unsigned hash, unsigned id) ref->indep_ref = BITMAP_ALLOC (NULL); ref->dep_ref = BITMAP_ALLOC (NULL); ref->accesses_in_loop = NULL; - ref->vops = BITMAP_ALLOC (NULL); return ref; } @@ -1319,9 +1581,7 @@ gather_mem_refs_stmt (struct loop *loop, gimple stmt) hashval_t hash; PTR *slot; mem_ref_p ref; - tree vname; bool is_stored; - bitmap clvops; unsigned id; if (!gimple_vuse (stmt)) @@ -1329,7 +1589,20 @@ gather_mem_refs_stmt (struct loop *loop, gimple stmt) mem = simple_mem_ref_in_stmt (stmt, &is_stored); if (!mem) - goto fail; + { + id = VEC_length (mem_ref_p, memory_accesses.refs_list); + ref = mem_ref_alloc (error_mark_node, 0, id); + VEC_safe_push (mem_ref_p, heap, memory_accesses.refs_list, ref); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unanalyzed memory reference %u: ", id); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + if (gimple_vdef (stmt)) + mark_ref_stored (ref, loop); + record_mem_ref_loc (ref, loop, stmt, mem); + return; + } hash = iterative_hash_expr (*mem, 0); slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT); @@ -1356,15 +1629,8 @@ gather_mem_refs_stmt (struct loop *loop, gimple stmt) if (is_stored) mark_ref_stored (ref, loop); - if ((vname = gimple_vuse (stmt)) != NULL_TREE) - bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname))); record_mem_ref_loc (ref, loop, stmt, mem); return; - -fail: - clvops = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - if ((vname = gimple_vuse (stmt)) != NULL_TREE) - bitmap_set_bit (clvops, DECL_UID (SSA_NAME_VAR (vname))); } /* Gathers memory references in loops. */ @@ -1376,7 +1642,6 @@ gather_mem_refs_in_loops (void) basic_block bb; struct loop *loop; loop_iterator li; - bitmap clvo, clvi; bitmap lrefs, alrefs, alrefso; FOR_EACH_BB (bb) @@ -1389,8 +1654,8 @@ gather_mem_refs_in_loops (void) gather_mem_refs_stmt (loop, gsi_stmt (bsi)); } - /* Propagate the information about clobbered vops and accessed memory - references up the loop hierarchy. */ + /* Propagate the information about accessed memory references up + the loop hierarchy. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { lrefs = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num); @@ -1400,133 +1665,12 @@ gather_mem_refs_in_loops (void) if (loop_outer (loop) == current_loops->tree_root) continue; - clvi = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - clvo = VEC_index (bitmap, memory_accesses.clobbered_vops, - loop_outer (loop)->num); - bitmap_ior_into (clvo, clvi); - alrefso = VEC_index (bitmap, memory_accesses.all_refs_in_loop, loop_outer (loop)->num); bitmap_ior_into (alrefso, alrefs); } } -/* Element of the hash table that maps vops to memory references. */ - -struct vop_to_refs_elt -{ - /* DECL_UID of the vop. */ - unsigned uid; - - /* List of the all references. */ - bitmap refs_all; - - /* List of stored references. */ - bitmap refs_stored; -}; - -/* A hash function for struct vop_to_refs_elt object OBJ. */ - -static hashval_t -vtoe_hash (const void *obj) -{ - const struct vop_to_refs_elt *const vtoe = - (const struct vop_to_refs_elt *) obj; - - return vtoe->uid; -} - -/* An equality function for struct vop_to_refs_elt object OBJ1 with - uid of a vop OBJ2. */ - -static int -vtoe_eq (const void *obj1, const void *obj2) -{ - const struct vop_to_refs_elt *const vtoe = - (const struct vop_to_refs_elt *) obj1; - const unsigned *const uid = (const unsigned *) obj2; - - return vtoe->uid == *uid; -} - -/* A function to free the struct vop_to_refs_elt object. */ - -static void -vtoe_free (void *obj) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) obj; - - BITMAP_FREE (vtoe->refs_all); - BITMAP_FREE (vtoe->refs_stored); - free (vtoe); -} - -/* Records REF to hashtable VOP_TO_REFS for the index VOP. STORED is true - if the reference REF is stored. */ - -static void -record_vop_access (htab_t vop_to_refs, unsigned vop, unsigned ref, bool stored) -{ - void **slot = htab_find_slot_with_hash (vop_to_refs, &vop, vop, INSERT); - struct vop_to_refs_elt *vtoe; - - if (!*slot) - { - vtoe = XNEW (struct vop_to_refs_elt); - vtoe->uid = vop; - vtoe->refs_all = BITMAP_ALLOC (NULL); - vtoe->refs_stored = BITMAP_ALLOC (NULL); - *slot = vtoe; - } - else - vtoe = (struct vop_to_refs_elt *) *slot; - - bitmap_set_bit (vtoe->refs_all, ref); - if (stored) - bitmap_set_bit (vtoe->refs_stored, ref); -} - -/* Returns the set of references that access VOP according to the table - VOP_TO_REFS. */ - -static bitmap -get_vop_accesses (htab_t vop_to_refs, unsigned vop) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) htab_find_with_hash (vop_to_refs, &vop, vop); - return vtoe->refs_all; -} - -/* Returns the set of stores that access VOP according to the table - VOP_TO_REFS. */ - -static bitmap -get_vop_stores (htab_t vop_to_refs, unsigned vop) -{ - struct vop_to_refs_elt *const vtoe = - (struct vop_to_refs_elt *) htab_find_with_hash (vop_to_refs, &vop, vop); - return vtoe->refs_stored; -} - -/* Adds REF to mapping from virtual operands to references in LOOP. */ - -static void -add_vop_ref_mapping (struct loop *loop, mem_ref_p ref) -{ - htab_t map = VEC_index (htab_t, memory_accesses.vop_ref_map, loop->num); - bool stored = bitmap_bit_p (ref->stored, loop->num); - bitmap clobbers = VEC_index (bitmap, memory_accesses.clobbered_vops, - loop->num); - bitmap_iterator bi; - unsigned vop; - - EXECUTE_IF_AND_COMPL_IN_BITMAP (ref->vops, clobbers, 0, vop, bi) - { - record_vop_access (map, vop, ref->id, stored); - } -} - /* Create a mapping from virtual operands to references that touch them in LOOP. */ @@ -1542,8 +1686,15 @@ create_vop_ref_mapping_loop (struct loop *loop) EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi) { ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); - for (sloop = loop; sloop != current_loops->tree_root; sloop = loop_outer (sloop)) - add_vop_ref_mapping (sloop, ref); + for (sloop = loop; sloop != current_loops->tree_root; + sloop = loop_outer (sloop)) + if (bitmap_bit_p (ref->stored, loop->num)) + { + bitmap refs_stored + = VEC_index (bitmap, memory_accesses.all_refs_stored_in_loop, + sloop->num); + bitmap_set_bit (refs_stored, ref->id); + } } } @@ -1569,7 +1720,6 @@ analyze_memory_references (void) { unsigned i; bitmap empty; - htab_t hempty; memory_accesses.refs = htab_create (100, memref_hash, memref_eq, memref_free); @@ -1578,10 +1728,8 @@ analyze_memory_references (void) number_of_loops ()); memory_accesses.all_refs_in_loop = VEC_alloc (bitmap, heap, number_of_loops ()); - memory_accesses.clobbered_vops = VEC_alloc (bitmap, heap, - number_of_loops ()); - memory_accesses.vop_ref_map = VEC_alloc (htab_t, heap, - number_of_loops ()); + memory_accesses.all_refs_stored_in_loop = VEC_alloc (bitmap, heap, + number_of_loops ()); for (i = 0; i < number_of_loops (); i++) { @@ -1590,9 +1738,7 @@ analyze_memory_references (void) empty = BITMAP_ALLOC (NULL); VEC_quick_push (bitmap, memory_accesses.all_refs_in_loop, empty); empty = BITMAP_ALLOC (NULL); - VEC_quick_push (bitmap, memory_accesses.clobbered_vops, empty); - hempty = htab_create (10, vtoe_hash, vtoe_eq, vtoe_free); - VEC_quick_push (htab_t, memory_accesses.vop_ref_map, hempty); + VEC_quick_push (bitmap, memory_accesses.all_refs_stored_in_loop, empty); } memory_accesses.ttae_cache = NULL; @@ -1601,33 +1747,6 @@ analyze_memory_references (void) create_vop_ref_mapping (); } -/* Returns true if a region of size SIZE1 at position 0 and a region of - size SIZE2 at position DIFF cannot overlap. */ - -static bool -cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) -{ - double_int d, bound; - - /* Unless the difference is a constant, we fail. */ - if (diff->n != 0) - return false; - - d = diff->offset; - if (double_int_negative_p (d)) - { - /* The second object is before the first one, we succeed if the last - element of the second object is before the start of the first one. */ - bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); - return double_int_negative_p (bound); - } - else - { - /* We succeed if the second object starts after the first one ends. */ - return double_int_scmp (size1, d) <= 0; - } -} - /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in tree_to_aff_combination_expand. */ @@ -1656,7 +1775,7 @@ mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache) aff_combination_scale (&off1, double_int_minus_one); aff_combination_add (&off2, &off1); - if (cannot_overlap_p (&off2, size1, size2)) + if (aff_comb_cannot_overlap_p (&off2, size1, size2)) return false; return true; @@ -1694,7 +1813,7 @@ get_all_locs_in_loop (struct loop *loop, mem_ref_p ref, accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num); if (accs) { - for (i = 0; VEC_iterate (mem_ref_loc_p, accs->locs, i, loc); i++) + FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc) VEC_safe_push (mem_ref_loc_p, heap, *locs, loc); } } @@ -1713,7 +1832,7 @@ rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var) VEC (mem_ref_loc_p, heap) *locs = NULL; get_all_locs_in_loop (loop, ref, &locs); - for (i = 0; VEC_iterate (mem_ref_loc_p, locs, i, loc); i++) + FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc) rewrite_mem_ref_loc (loc, tmp_var); VEC_free (mem_ref_loc_p, heap, locs); } @@ -1747,13 +1866,16 @@ gen_lsm_tmp_name (tree ref) switch (TREE_CODE (ref)) { - case MISALIGNED_INDIRECT_REF: - case ALIGN_INDIRECT_REF: - case INDIRECT_REF: + case MEM_REF: + case TARGET_MEM_REF: gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); lsm_tmp_name_add ("_"); break; + case ADDR_EXPR: + gen_lsm_tmp_name (TREE_OPERAND (ref, 0)); + break; + case BIT_FIELD_REF: case VIEW_CONVERT_EXPR: case ARRAY_RANGE_REF: @@ -1875,7 +1997,7 @@ execute_sm (struct loop *loop, VEC (edge, heap) *exits, mem_ref_p ref) all dependencies. */ gsi_insert_on_edge (loop_latch_edge (loop), load); - for (i = 0; VEC_iterate (edge, exits, i, ex); i++) + FOR_EACH_VEC_ELT (edge, exits, i, ex) { store = gimple_build_assign (unshare_expr (ref->mem), tmp_var); gsi_insert_on_edge (ex, store); @@ -1914,11 +2036,12 @@ ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p) tree base; base = get_base_address (ref->mem); - if (INDIRECT_REF_P (base)) + if (INDIRECT_REF_P (base) + || TREE_CODE (base) == MEM_REF) base = TREE_OPERAND (base, 0); get_all_locs_in_loop (loop, ref, &locs); - for (i = 0; VEC_iterate (mem_ref_loc_p, locs, i, loc); i++) + FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc) { if (!get_lim_data (loc->stmt)) continue; @@ -1933,7 +2056,8 @@ ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p) lhs = get_base_address (gimple_get_lhs (loc->stmt)); if (!lhs) continue; - if (INDIRECT_REF_P (lhs)) + if (INDIRECT_REF_P (lhs) + || TREE_CODE (lhs) == MEM_REF) lhs = TREE_OPERAND (lhs, 0); if (lhs != base) continue; @@ -1965,6 +2089,9 @@ refs_independent_p (mem_ref_p ref1, mem_ref_p ref2) return true; if (bitmap_bit_p (ref1->dep_ref, ref2->id)) return false; + if (!MEM_ANALYZABLE (ref1) + || !MEM_ANALYZABLE (ref2)) + return false; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Querying dependency of refs %u and %u: ", @@ -2007,35 +2134,25 @@ record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep) static bool ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref) { - bitmap clobbers, refs_to_check, refs; + bitmap refs_to_check; unsigned i; bitmap_iterator bi; bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num); - htab_t map; mem_ref_p aref; - /* If the reference is clobbered, it is not independent. */ - clobbers = VEC_index (bitmap, memory_accesses.clobbered_vops, loop->num); - if (bitmap_intersect_p (ref->vops, clobbers)) - return false; - - refs_to_check = BITMAP_ALLOC (NULL); - - map = VEC_index (htab_t, memory_accesses.vop_ref_map, loop->num); - EXECUTE_IF_AND_COMPL_IN_BITMAP (ref->vops, clobbers, 0, i, bi) - { - if (stored) - refs = get_vop_accesses (map, i); - else - refs = get_vop_stores (map, i); - - bitmap_ior_into (refs_to_check, refs); - } + if (stored) + refs_to_check = VEC_index (bitmap, + memory_accesses.all_refs_in_loop, loop->num); + else + refs_to_check = VEC_index (bitmap, + memory_accesses.all_refs_stored_in_loop, + loop->num); EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) { aref = VEC_index (mem_ref_p, memory_accesses.refs_list, i); - if (!refs_independent_p (ref, aref)) + if (!MEM_ANALYZABLE (aref) + || !refs_independent_p (ref, aref)) { ret = false; record_indep_loop (loop, aref, false); @@ -2043,7 +2160,6 @@ ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref) } } - BITMAP_FREE (refs_to_check); return ret; } @@ -2078,6 +2194,10 @@ can_sm_ref_p (struct loop *loop, mem_ref_p ref) { tree base; + /* Can't hoist unanalyzable refs. */ + if (!MEM_ANALYZABLE (ref)) + return false; + /* Unless the reference is stored in the loop, there is nothing to do. */ if (!bitmap_bit_p (ref->stored, loop->num)) return false; @@ -2088,6 +2208,10 @@ can_sm_ref_p (struct loop *loop, mem_ref_p ref) || !for_each_index (&ref->mem, may_move_till, loop)) return false; + /* If it can throw fail, we do not properly update EH info. */ + if (tree_could_throw_p (ref->mem)) + return false; + /* If it can trap, it must be always executed in LOOP. Readonly memory locations may trap when storing to them, but tree_could_trap_p is a predicate for rvalues, so check that @@ -2138,8 +2262,8 @@ loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, unsigned i; edge ex; - for (i = 0; VEC_iterate (edge, exits, i, ex); i++) - if (ex->flags & EDGE_ABNORMAL) + FOR_EACH_VEC_ELT (edge, exits, i, ex) + if (ex->flags & (EDGE_ABNORMAL | EDGE_EH)) return false; return true; @@ -2199,7 +2323,7 @@ fill_always_executed_in (struct loop *loop, sbitmap contains_call) edge e; struct loop *inn_loop = loop; - if (!loop->header->aux) + if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { bbs = get_loop_body_in_dom_order (loop); @@ -2241,7 +2365,7 @@ fill_always_executed_in (struct loop *loop, sbitmap contains_call) while (1) { - last->aux = loop; + SET_ALWAYS_EXECUTED_IN (last, loop); if (last == loop->header) break; last = get_immediate_dominator (CDI_DOMINATORS, last); @@ -2283,6 +2407,9 @@ tree_ssa_lim_initialize (void) sbitmap_free (contains_call); lim_aux_data_map = pointer_map_create (); + + if (flag_tm) + compute_transaction_bits (); } /* Cleans up after the invariant motion pass. */ @@ -2293,33 +2420,26 @@ tree_ssa_lim_finalize (void) basic_block bb; unsigned i; bitmap b; - htab_t h; FOR_EACH_BB (bb) - { - bb->aux = NULL; - } + SET_ALWAYS_EXECUTED_IN (bb, NULL); pointer_map_destroy (lim_aux_data_map); VEC_free (mem_ref_p, heap, memory_accesses.refs_list); htab_delete (memory_accesses.refs); - for (i = 0; VEC_iterate (bitmap, memory_accesses.refs_in_loop, i, b); i++) + FOR_EACH_VEC_ELT (bitmap, memory_accesses.refs_in_loop, i, b) BITMAP_FREE (b); VEC_free (bitmap, heap, memory_accesses.refs_in_loop); - for (i = 0; VEC_iterate (bitmap, memory_accesses.all_refs_in_loop, i, b); i++) + FOR_EACH_VEC_ELT (bitmap, memory_accesses.all_refs_in_loop, i, b) BITMAP_FREE (b); VEC_free (bitmap, heap, memory_accesses.all_refs_in_loop); - for (i = 0; VEC_iterate (bitmap, memory_accesses.clobbered_vops, i, b); i++) + FOR_EACH_VEC_ELT (bitmap, memory_accesses.all_refs_stored_in_loop, i, b) BITMAP_FREE (b); - VEC_free (bitmap, heap, memory_accesses.clobbered_vops); - - for (i = 0; VEC_iterate (htab_t, memory_accesses.vop_ref_map, i, h); i++) - htab_delete (h); - VEC_free (htab_t, heap, memory_accesses.vop_ref_map); + VEC_free (bitmap, heap, memory_accesses.all_refs_stored_in_loop); if (memory_accesses.ttae_cache) pointer_map_destroy (memory_accesses.ttae_cache); @@ -2328,9 +2448,11 @@ tree_ssa_lim_finalize (void) /* Moves invariants from loops. Only "expensive" invariants are moved out -- i.e. those that are likely to be win regardless of the register pressure. */ -void +unsigned int tree_ssa_lim (void) { + unsigned int todo; + tree_ssa_lim_initialize (); /* Gathers information about memory accesses in the loops. */ @@ -2345,7 +2467,9 @@ tree_ssa_lim (void) store_motion (); /* Move the expressions that are expensive enough. */ - move_computations (); + todo = move_computations (); tree_ssa_lim_finalize (); + + return todo; }