o Provide heuristics to clamp inlining of recursive template
calls? */
+
+/* Weights that estimate_num_insns uses for heuristics in inlining. */
+
+eni_weights eni_inlining_weights;
+
+/* Weights that estimate_num_insns uses to estimate the size of the
+ produced code. */
+
+eni_weights eni_size_weights;
+
+/* Weights that estimate_num_insns uses to estimate the time necessary
+ to execute the produced code. */
+
+eni_weights eni_time_weights;
+
/* Prototypes. */
static tree declare_return_variable (copy_body_data *, tree, tree, tree *);
copy_basic_block = create_basic_block (NULL, (void *) 0,
(basic_block) bb->prev_bb->aux);
copy_basic_block->count = bb->count * count_scale / REG_BR_PROB_BASE;
- copy_basic_block->frequency = (bb->frequency
+
+ /* We are going to rebuild frequencies from scratch. These values have just
+ small importance to drive canonicalize_loop_headers. */
+ copy_basic_block->frequency = ((gcov_type)bb->frequency
* frequency_scale / REG_BR_PROB_BASE);
+ if (copy_basic_block->frequency > BB_FREQ_MAX)
+ copy_basic_block->frequency = BB_FREQ_MAX;
copy_bsi = bsi_start (copy_basic_block);
for (bsi = bsi_start (bb);
{
stmt = bsi_stmt (copy_bsi);
call = get_call_expr_in (stmt);
+
+ /* Statements produced by inlining can be unfolded, especially
+ when we constant propagated some operands. We can't fold
+ them right now for two reasons:
+ 1) folding require SSA_NAME_DEF_STMTs to be correct
+ 2) we can't change function calls to builtins.
+ So we just mark statement for later folding. We mark
+ all new statements, instead just statements that has changed
+ by some nontrivial substitution so even statements made
+ foldable indirectly are updated. If this turns out to be
+ expensive, copy_body can be told to watch for nontrivial
+ changes. */
+ if (id->statements_to_fold)
+ pointer_set_insert (id->statements_to_fold, stmt);
/* We're duplicating a CALL_EXPR. Find any corresponding
callgraph edges and update or duplicate them. */
if (call && (decl = get_callee_fndecl (call)))
edge = cgraph_edge (id->src_node, orig_stmt);
if (edge)
cgraph_clone_edge (edge, id->dst_node, stmt,
- REG_BR_PROB_BASE, 1, true);
+ REG_BR_PROB_BASE, 1, edge->frequency, true);
break;
case CB_CGE_MOVE_CLONES:
gcc_assert (lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt)
!= 0);
- if (tree_could_throw_p (stmt))
+ if (tree_could_throw_p (stmt)
+ /* When we are cloning for inlining, we are supposed to
+ construct a clone that calls precisely the same functions
+ as original. However IPA optimizers might've proved
+ earlier some function calls as non-trapping that might
+ render some basic blocks dead that might become
+ unreachable.
+
+ We can't update SSA with unreachable blocks in CFG and thus
+ we prevent the scenario by preserving even the "dead" eh
+ edges until the point they are later removed by
+ fixup_cfg pass. */
+ || (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES
+ && lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt) > 0))
{
int region = lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt);
/* Add an entry for the copied tree in the EH hashtable.
/* Register specific tree functions. */
tree_register_cfg_hooks ();
*new_cfun = *DECL_STRUCT_FUNCTION (callee_fndecl);
+ new_cfun->funcdef_no = get_next_funcdef_no ();
VALUE_HISTOGRAMS (new_cfun) = NULL;
new_cfun->unexpanded_var_list = NULL;
new_cfun->cfg = NULL;
represent multiple variables for purposes of debugging. */
if (gimple_in_ssa_p (cfun) && rhs && def && is_gimple_reg (p)
&& (TREE_CODE (rhs) == SSA_NAME
- || is_gimple_min_invariant (rhs)))
+ || is_gimple_min_invariant (rhs))
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def))
{
insert_decl_map (id, def, rhs);
return;
}
/* Generate code to initialize the parameters of the function at the
- top of the stack in ID from the ARGS (presented as a TREE_LIST). */
+ top of the stack in ID from the CALL_EXPR EXP. */
static void
-initialize_inlined_parameters (copy_body_data *id, tree args, tree static_chain,
+initialize_inlined_parameters (copy_body_data *id, tree exp,
tree fn, basic_block bb)
{
tree parms;
tree p;
tree vars = NULL_TREE;
int argnum = 0;
+ call_expr_arg_iterator iter;
+ tree static_chain = CALL_EXPR_STATIC_CHAIN (exp);
/* Figure out what the parameters are. */
parms = DECL_ARGUMENTS (fn);
/* Loop through the parameter declarations, replacing each with an
equivalent VAR_DECL, appropriately initialized. */
- for (p = parms, a = args; p;
- a = a ? TREE_CHAIN (a) : a, p = TREE_CHAIN (p))
+ for (p = parms, a = first_call_expr_arg (exp, &iter); p;
+ a = next_call_expr_arg (&iter), p = TREE_CHAIN (p))
{
tree value;
/* Find the initializer. */
value = lang_hooks.tree_inlining.convert_parm_for_inlining
- (p, a ? TREE_VALUE (a) : NULL_TREE, fn, argnum);
+ (p, a, fn, argnum);
setup_one_parameter (id, p, value, fn, bb, &vars);
}
return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
}
+/* Arguments for estimate_num_insns_1. */
+
+struct eni_data
+{
+ /* Used to return the number of insns. */
+ int count;
+
+ /* Weights of various constructs. */
+ eni_weights *weights;
+};
+
/* Used by estimate_num_insns. Estimate number of instructions seen
by given statement. */
static tree
estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
{
- int *count = (int *) data;
+ struct eni_data *d = data;
tree x = *tp;
+ unsigned cost;
if (IS_TYPE_OR_DECL_P (x))
{
/* Otherwise it's a store, so fall through to compute the move cost. */
case CONSTRUCTOR:
- *count += estimate_move_cost (TREE_TYPE (x));
+ d->count += estimate_move_cost (TREE_TYPE (x));
break;
/* Assign cost of 1 to usual operations.
case POSTDECREMENT_EXPR:
case POSTINCREMENT_EXPR:
- case SWITCH_EXPR:
-
case ASM_EXPR:
case REALIGN_LOAD_EXPR:
case VEC_INTERLEAVE_LOW_EXPR:
case RESX_EXPR:
- *count += 1;
+ d->count += 1;
+ break;
+
+ case SWITCH_EXPR:
+ /* TODO: Cost of a switch should be derived from the number of
+ branches. */
+ d->count += d->weights->switch_cost;
break;
/* Few special cases of expensive operations. This is useful
case FLOOR_MOD_EXPR:
case ROUND_MOD_EXPR:
case RDIV_EXPR:
- *count += 10;
+ d->count += d->weights->div_mod_cost;
break;
case CALL_EXPR:
{
tree decl = get_callee_fndecl (x);
- tree arg;
+ cost = d->weights->call_cost;
if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (decl))
{
return NULL_TREE;
case BUILT_IN_EXPECT:
return NULL_TREE;
+ /* Prefetch instruction is not expensive. */
+ case BUILT_IN_PREFETCH:
+ cost = 1;
+ break;
default:
break;
}
that does use function declaration to figure out the arguments. */
if (!decl)
{
- for (arg = TREE_OPERAND (x, 1); arg; arg = TREE_CHAIN (arg))
- *count += estimate_move_cost (TREE_TYPE (TREE_VALUE (arg)));
+ tree a;
+ call_expr_arg_iterator iter;
+ FOR_EACH_CALL_EXPR_ARG (a, iter, x)
+ d->count += estimate_move_cost (TREE_TYPE (a));
}
else
{
+ tree arg;
for (arg = DECL_ARGUMENTS (decl); arg; arg = TREE_CHAIN (arg))
- *count += estimate_move_cost (TREE_TYPE (arg));
+ d->count += estimate_move_cost (TREE_TYPE (arg));
}
- *count += PARAM_VALUE (PARAM_INLINE_CALL_COST);
+ d->count += cost;
break;
}
case OMP_CRITICAL:
case OMP_ATOMIC:
/* OpenMP directives are generally very expensive. */
- *count += 40;
+ d->count += d->weights->omp_cost;
break;
default:
return NULL;
}
-/* Estimate number of instructions that will be created by expanding EXPR. */
+/* Estimate number of instructions that will be created by expanding EXPR.
+ WEIGHTS contains weights attributed to various constructs. */
int
-estimate_num_insns (tree expr)
+estimate_num_insns (tree expr, eni_weights *weights)
{
- int num = 0;
struct pointer_set_t *visited_nodes;
basic_block bb;
block_stmt_iterator bsi;
struct function *my_function;
+ struct eni_data data;
+
+ data.count = 0;
+ data.weights = weights;
/* If we're given an entire function, walk the CFG. */
if (TREE_CODE (expr) == FUNCTION_DECL)
bsi_next (&bsi))
{
walk_tree (bsi_stmt_ptr (bsi), estimate_num_insns_1,
- &num, visited_nodes);
+ &data, visited_nodes);
}
}
pointer_set_destroy (visited_nodes);
}
else
- walk_tree_without_duplicates (&expr, estimate_num_insns_1, &num);
+ walk_tree_without_duplicates (&expr, estimate_num_insns_1, &data);
- return num;
+ return data.count;
+}
+
+/* Initializes weights used by estimate_num_insns. */
+
+void
+init_inline_once (void)
+{
+ eni_inlining_weights.call_cost = PARAM_VALUE (PARAM_INLINE_CALL_COST);
+ eni_inlining_weights.div_mod_cost = 10;
+ eni_inlining_weights.switch_cost = 1;
+ eni_inlining_weights.omp_cost = 40;
+
+ eni_size_weights.call_cost = 1;
+ eni_size_weights.div_mod_cost = 1;
+ eni_size_weights.switch_cost = 10;
+ eni_size_weights.omp_cost = 40;
+
+ /* Estimating time for call is difficult, since we have no idea what the
+ called function does. In the current uses of eni_time_weights,
+ underestimating the cost does less harm than overestimating it, so
+ we choose a rather small value here. */
+ eni_time_weights.call_cost = 10;
+ eni_time_weights.div_mod_cost = 10;
+ eni_time_weights.switch_cost = 4;
+ eni_time_weights.omp_cost = 40;
}
typedef struct function *function_p;
tree use_retvar;
tree fn;
splay_tree st;
- tree args;
tree return_slot;
tree modify_dest;
location_t saved_location;
(incorrect node sharing is most common reason for missing edges. */
gcc_assert (dest->needed || !flag_unit_at_a_time);
cgraph_create_edge (id->dst_node, dest, stmt,
- bb->count, bb->loop_depth)->inline_failed
+ bb->count, CGRAPH_FREQ_BASE,
+ bb->loop_depth)->inline_failed
= N_("originally indirect function call not considered for inlining");
+ if (dump_file)
+ {
+ fprintf (dump_file, "Created new direct edge to %s",
+ cgraph_node_name (dest));
+ }
goto egress;
}
id->decl_map = splay_tree_new (splay_tree_compare_pointers,
NULL, NULL);
- /* Initialize the parameters. */
- args = TREE_OPERAND (t, 1);
-
/* Record the function we are about to inline. */
id->src_fn = fn;
id->src_node = cg_edge->callee;
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
- initialize_inlined_parameters (id, args, TREE_OPERAND (t, 2), fn, bb);
+ initialize_inlined_parameters (id, t, fn, bb);
if (DECL_INITIAL (fn))
add_lexical_block (id->block, remap_blocks (DECL_INITIAL (fn), id));
return false;
}
+/* Walk all basic blocks created after FIRST and try to fold every statement
+ in the STATEMENTS pointer set. */
+static void
+fold_marked_statements (int first, struct pointer_set_t *statements)
+{
+ for (;first < n_basic_blocks;first++)
+ if (BASIC_BLOCK (first))
+ {
+ block_stmt_iterator bsi;
+ for (bsi = bsi_start (BASIC_BLOCK (first));
+ !bsi_end_p (bsi); bsi_next (&bsi))
+ if (pointer_set_contains (statements, bsi_stmt (bsi)))
+ {
+ tree old_stmt = bsi_stmt (bsi);
+ if (fold_stmt (bsi_stmt_ptr (bsi)))
+ {
+ update_stmt (bsi_stmt (bsi));
+ if (maybe_clean_or_replace_eh_stmt (old_stmt, bsi_stmt (bsi)))
+ tree_purge_dead_eh_edges (BASIC_BLOCK (first));
+ }
+ }
+ }
+}
+
+/* Return true if BB has at least one abnormal outgoing edge. */
+
+static inline bool
+has_abnormal_outgoing_edge_p (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->flags & EDGE_ABNORMAL)
+ return true;
+
+ return false;
+}
+
+/* When a block from the inlined function contains a call with side-effects
+ in the middle gets inlined in a function with non-locals labels, the call
+ becomes a potential non-local goto so we need to add appropriate edge. */
+
+static void
+make_nonlocal_label_edges (void)
+{
+ block_stmt_iterator bsi;
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ {
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ if (tree_can_make_abnormal_goto (stmt))
+ {
+ if (stmt == bsi_stmt (bsi_last (bb)))
+ {
+ if (!has_abnormal_outgoing_edge_p (bb))
+ make_abnormal_goto_edges (bb, true);
+ }
+ else
+ {
+ edge e = split_block (bb, stmt);
+ bb = e->src;
+ make_abnormal_goto_edges (bb, true);
+ }
+ break;
+ }
+
+ /* Update PHIs on nonlocal goto receivers we (possibly)
+ just created new edges into. */
+ if (TREE_CODE (stmt) == LABEL_EXPR
+ && gimple_in_ssa_p (cfun))
+ {
+ tree target = LABEL_EXPR_LABEL (stmt);
+ if (DECL_NONLOCAL (target))
+ {
+ tree phi;
+
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ {
+ gcc_assert (SSA_NAME_OCCURS_IN_ABNORMAL_PHI
+ (PHI_RESULT (phi)));
+ mark_sym_for_renaming
+ (SSA_NAME_VAR (PHI_RESULT (phi)));
+ }
+ }
+ }
+ }
+ }
+}
+
/* Expand calls to inline functions in the body of FN. */
-void
+unsigned int
optimize_inline_calls (tree fn)
{
copy_body_data id;
tree prev_fn;
basic_block bb;
+ int last = n_basic_blocks;
/* There is no point in performing inlining if errors have already
occurred -- and we might crash if we try to inline invalid
code. */
if (errorcount || sorrycount)
- return;
+ return 0;
/* Clear out ID. */
memset (&id, 0, sizeof (id));
id.transform_new_cfg = false;
id.transform_return_to_modify = true;
id.transform_lang_insert_block = false;
+ id.statements_to_fold = pointer_set_create ();
push_gimplify_context ();
gcc_assert (e->inline_failed);
}
#endif
- /* We need to rescale frequencies again to peak at REG_BR_PROB_BASE
- as inlining loops might increase the maximum. */
- if (ENTRY_BLOCK_PTR->count)
- counts_to_freqs ();
- if (gimple_in_ssa_p (cfun))
- {
- /* We make no attempts to keep dominance info up-to-date. */
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
- delete_unreachable_blocks ();
- update_ssa (TODO_update_ssa);
- fold_cond_expr_cond ();
- if (need_ssa_update_p ())
- update_ssa (TODO_update_ssa);
- }
- else
- fold_cond_expr_cond ();
+
+ /* We are not going to maintain the cgraph edges up to date.
+ Kill it so it won't confuse us. */
+ cgraph_node_remove_callees (id.dst_node);
+
+ fold_marked_statements (last, id.statements_to_fold);
+ pointer_set_destroy (id.statements_to_fold);
+ fold_cond_expr_cond ();
+ if (current_function_has_nonlocal_label)
+ make_nonlocal_label_edges ();
+ /* We make no attempts to keep dominance info up-to-date. */
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
/* It would be nice to check SSA/CFG/statement consistency here, but it is
not possible yet - the IPA passes might make various functions to not
throw and they don't care to proactively update local EH info. This is
done later in fixup_cfg pass that also execute the verification. */
+ return (TODO_update_ssa | TODO_cleanup_cfg
+ | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0)
+ | (profile_status != PROFILE_ABSENT ? TODO_rebuild_frequencies : 0));
}
/* FN is a function that has a complete body, and CLONE is a function whose
struct ipa_replace_map *replace_info;
basic_block old_entry_block;
tree t_step;
+ tree old_current_function_decl = current_function_decl;
gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
&& TREE_CODE (new_decl) == FUNCTION_DECL);
old_version_node = cgraph_node (old_decl);
new_version_node = cgraph_node (new_decl);
- allocate_struct_function (new_decl);
- /* Cfun points to the new allocated function struct at this point. */
- cfun->function_end_locus = DECL_SOURCE_LOCATION (new_decl);
-
DECL_ARTIFICIAL (new_decl) = 1;
DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
+ /* Prepare the data structures for the tree copy. */
+ memset (&id, 0, sizeof (id));
+
/* Generate a new name for the new version. */
if (!update_clones)
{
DECL_NAME (new_decl) = create_tmp_var_name (NULL);
SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
SET_DECL_RTL (new_decl, NULL_RTX);
+ id.statements_to_fold = pointer_set_create ();
}
-
- /* Prepare the data structures for the tree copy. */
- memset (&id, 0, sizeof (id));
id.decl_map = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
id.src_fn = old_decl;
/* Clean up. */
splay_tree_delete (id.decl_map);
- fold_cond_expr_cond ();
+ if (!update_clones)
+ {
+ fold_marked_statements (0, id.statements_to_fold);
+ pointer_set_destroy (id.statements_to_fold);
+ fold_cond_expr_cond ();
+ }
if (gimple_in_ssa_p (cfun))
{
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+ if (!update_clones)
+ delete_unreachable_blocks ();
update_ssa (TODO_update_ssa);
+ if (!update_clones)
+ {
+ fold_cond_expr_cond ();
+ if (need_ssa_update_p ())
+ update_ssa (TODO_update_ssa);
+ }
}
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);
pop_cfun ();
- current_function_decl = NULL;
+ current_function_decl = old_current_function_decl;
+ gcc_assert (!current_function_decl
+ || DECL_STRUCT_FUNCTION (current_function_decl) == cfun);
return;
}