There are three major parts of this file:
- cgraph_mark_inline implementation
+ cgraph_mark_inline_edge implementation
This function allows to mark given call inline and performs necessary
modifications of cgraph (production of the clones and updating overall
optimized allowing it to unfold abstraction penalty on C++ effectively and
cheaply.
- pass_ipa_early_inlining
-
- With profiling, the early inlining is also necessary to reduce
- instrumentation costs on program with high abstraction penalty (doing
- many redundant calls). This can't happen in parallel with early
- optimization and profile instrumentation, because we would end up
- re-instrumenting already instrumented function bodies we brought in via
- inlining.
-
- To avoid this, this pass is executed as IPA pass before profiling. It is
- simple wrapper to pass_early_inlining and ensures first inlining.
-
pass_ipa_inline
This is the main pass implementing simple greedy algorithm to do inlining
inlining of functions called once. The pass compute just so called inline
plan (representation of inlining to be done in callgraph) and unlike early
inlining it is not performing the inlining itself.
-
- pass_apply_inline
-
- This pass performs actual inlining according to pass_ipa_inline on given
- function. Possible the function body before inlining is saved when it is
- needed for further inlining later.
*/
#include "config.h"
return time;
}
-/* Estimate self time of the function after inlining WHAT into TO. */
+/* Estimate self size of the function after inlining WHAT into TO. */
static inline int
-cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
+cgraph_estimate_size_after_inlining (struct cgraph_node *to,
struct cgraph_node *what)
{
int size = ((what->global.size - inline_summary (what)->size_inlining_benefit)
- * times + to->global.size);
+ + to->global.size);
gcc_assert (size >= 0);
return size;
}
/* We may eliminate the need for out-of-line copy to be output.
In that case just go ahead and re-use it. */
if (!e->callee->callers->next_caller
+ /* Recursive inlining never wants the master clone to be overwritten. */
+ && update_original
+ /* FIXME: When address is taken of DECL_EXTERNAL function we still can remove its
+ offline copy, but we would need to keep unanalyzed node in the callgraph so
+ references can point to it. */
+ && !e->callee->address_taken
&& cgraph_can_remove_if_no_direct_calls_p (e->callee)
+ /* Inlining might enable more devirtualizing, so we want to remove
+ those only after all devirtualizable virtual calls are processed.
+ Lacking may edges in callgraph we just preserve them post
+ inlining. */
+ && (!DECL_VIRTUAL_P (e->callee->decl)
+ || (!DECL_COMDAT (e->callee->decl) && !DECL_EXTERNAL (e->callee->decl)))
/* Don't reuse if more than one function shares a comdat group.
If the other function(s) are needed, we need to emit even
this function out of line. */
&& !cgraph_new_nodes)
{
gcc_assert (!e->callee->global.inlined_to);
- if (e->callee->analyzed)
+ if (e->callee->analyzed && !DECL_EXTERNAL (e->callee->decl))
{
overall_size -= e->callee->global.size;
nfunctions_inlined++;
struct cgraph_edge *curr = e;
int freq;
+ /* Don't inline inlined edges. */
gcc_assert (e->inline_failed);
+ /* Don't even think of inlining inline clone. */
+ gcc_assert (!e->callee->global.inlined_to);
+
e->inline_failed = CIF_OK;
DECL_POSSIBLY_INLINED (e->callee->decl) = true;
{
to = e->caller;
old_size = e->caller->global.size;
- new_size = cgraph_estimate_size_after_inlining (1, to, what);
+ new_size = cgraph_estimate_size_after_inlining (to, what);
to->global.size = new_size;
to->global.time = cgraph_estimate_time_after_inlining (freq, to, what);
}
overall_size += new_size - old_size;
ncalls_inlined++;
- if (flag_indirect_inlining)
+ /* FIXME: We should remove the optimize check after we ensure we never run
+ IPA passes when not optimizng. */
+ if (flag_indirect_inlining && optimize)
return ipa_propagate_indirect_call_infos (curr, new_edges);
else
return false;
}
-/* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER. */
-
-static void
-cgraph_mark_inline (struct cgraph_edge *edge)
-{
- struct cgraph_node *to = edge->caller;
- struct cgraph_node *what = edge->callee;
- struct cgraph_edge *e, *next;
-
- gcc_assert (!edge->call_stmt_cannot_inline_p);
- /* Look for all calls, mark them inline and clone recursively
- all inlined functions. */
- for (e = what->callers; e; e = next)
- {
- next = e->next_caller;
- if (e->caller == to && e->inline_failed)
- {
- cgraph_mark_inline_edge (e, true, NULL);
- if (e == edge)
- edge = next;
- }
- }
-}
-
/* Estimate the growth caused by inlining NODE into all callees. */
static int
if (e->caller == node)
self_recursive = true;
if (e->inline_failed)
- growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
+ growth += (cgraph_estimate_size_after_inlining (e->caller, node)
- e->caller->global.size);
}
if (cgraph_will_be_removed_from_program_if_no_direct_calls (node)
&& !DECL_EXTERNAL (node->decl) && !self_recursive)
growth -= node->global.size;
+ /* COMDAT functions are very often not shared across multiple units since they
+ come from various template instantiations. Take this into account. */
+ else if (DECL_COMDAT (node->decl) && !self_recursive
+ && cgraph_can_remove_if_no_direct_calls_p (node))
+ growth -= (node->global.size
+ * (100 - PARAM_VALUE (PARAM_COMDAT_SHARING_PROBABILITY)) + 50) / 100;
node->global.estimated_growth = growth;
return growth;
static bool
cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
- cgraph_inline_failed_t *reason, bool one_only)
+ cgraph_inline_failed_t *reason)
{
- int times = 0;
- struct cgraph_edge *e;
int newsize;
int limit;
HOST_WIDE_INT stack_size_limit, inlined_stack;
- if (one_only)
- times = 1;
- else
- for (e = to->callees; e; e = e->next_callee)
- if (e->callee == what)
- times++;
-
if (to->global.inlined_to)
to = to->global.inlined_to;
/* Check the size after inlining against the function limits. But allow
the function to shrink if it went over the limits by forced inlining. */
- newsize = cgraph_estimate_size_after_inlining (times, to, what);
+ newsize = cgraph_estimate_size_after_inlining (to, what);
if (newsize >= to->global.size
&& newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
&& newsize > limit)
*reason = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
return false;
}
-
if (!n->analyzed)
{
if (reason)
*reason = CIF_BODY_NOT_AVAILABLE;
return false;
}
+ if (cgraph_function_body_availability (n) <= AVAIL_OVERWRITABLE)
+ {
+ if (reason)
+ *reason = CIF_OVERWRITABLE;
+ return false;
+ }
+
if (DECL_DECLARED_INLINE_P (decl))
{
{
gcov_type badness;
int growth =
- (cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee)
+ (cgraph_estimate_size_after_inlining (edge->caller, edge->callee)
- edge->caller->global.size);
if (edge->callee->local.disregard_inline_limits)
cgraph_inline_failed_t failed_reason;
if (!node->local.inlinable
+ || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
|| node->global.inlined_to)
return;
- if (bitmap_bit_p (updated_nodes, node->uid))
+ if (!bitmap_set_bit (updated_nodes, node->uid))
return;
- bitmap_set_bit (updated_nodes, node->uid);
node->global.estimated_growth = INT_MIN;
/* See if there is something to do. */
{
if (e->inline_failed
&& e->callee->local.inlinable
+ && cgraph_function_body_availability (e->callee) >= AVAIL_AVAILABLE
&& !bitmap_bit_p (updated_nodes, e->callee->uid))
{
node->global.estimated_growth = INT_MIN;
/* Make sure that function is small enough to be considered for inlining. */
if (!max_depth
- || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
+ || cgraph_estimate_size_after_inlining (node, node) >= limit)
return false;
heap = fibheap_new ();
lookup_recursive_calls (node, node, heap);
master_clone = cgraph_clone_node (node, node->decl,
node->count, CGRAPH_FREQ_BASE, 1,
false, NULL);
- master_clone->needed = true;
for (e = master_clone->callees; e; e = e->next_callee)
if (!e->inline_failed)
cgraph_clone_inlined_nodes (e, true, false);
/* Do the inlining and update list of recursive call during process. */
while (!fibheap_empty (heap)
- && (cgraph_estimate_size_after_inlining (1, node, master_clone)
+ && (cgraph_estimate_size_after_inlining (node, master_clone)
<= limit))
{
struct cgraph_edge *curr
gcc_assert (!edge->aux);
if (edge->callee->local.inlinable
+ && edge->inline_failed
&& cgraph_default_inline_p (edge->callee, &edge->inline_failed))
edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge, false), edge);
}
callee = edge->callee;
- growth = (cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee)
+ growth = (cgraph_estimate_size_after_inlining (edge->caller, edge->callee)
- edge->caller->global.size);
if (dump_file)
}
continue;
}
- if (!tree_can_inline_p (edge))
+ if (!tree_can_inline_p (edge)
+ || edge->call_stmt_cannot_inline_p)
{
if (dump_file)
fprintf (dump_file, " inline_failed:%s.\n",
else
{
struct cgraph_node *callee;
- if (edge->call_stmt_cannot_inline_p
- || !cgraph_check_inline_limits (edge->caller, edge->callee,
- &edge->inline_failed, true))
+ if (!cgraph_check_inline_limits (edge->caller, edge->callee,
+ &edge->inline_failed))
{
if (dump_file)
fprintf (dump_file, " Not inlining into %s:%s.\n",
struct cgraph_node *orig_callee;
if (e->call_stmt_cannot_inline_p)
- continue;
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not inlining: %s",
+ cgraph_inline_failed_string (e->inline_failed));
+ continue;
+ }
if (!e->callee->analyzed)
{
continue;
}
+ if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
+ != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not inlining: SSA form does not match.\n");
+ continue;
+ }
+
/* Inline the edge and flatten the inline clone. Avoid
recursing through the original node if the node was cloned. */
if (dump_file)
struct cgraph_edge *e;
gcc_assert (inline_summary (node)->self_size == node->global.size);
- initial_size += node->global.size;
+ if (!DECL_EXTERNAL (node->decl))
+ initial_size += node->global.size;
for (e = node->callees; e; e = e->next_callee)
if (max_count < e->count)
max_count = e->count;
if (node->callers
&& !node->callers->next_caller
+ && !node->global.inlined_to
&& cgraph_will_be_removed_from_program_if_no_direct_calls (node)
&& node->local.inlinable
+ && cgraph_function_body_availability (node) >= AVAIL_AVAILABLE
&& node->callers->inline_failed
&& node->callers->caller != node
&& node->callers->caller->global.inlined_to != node
}
if (cgraph_check_inline_limits (node->callers->caller, node,
- &reason, false))
+ &reason))
{
struct cgraph_node *caller = node->callers->caller;
- cgraph_mark_inline (node->callers);
+ cgraph_mark_inline_edge (node->callers, true, NULL);
if (dump_file)
fprintf (dump_file,
" Inlined into %s which now has %i size"
return 0;
}
-/* Return true when N is leaf function. Accept cheap (pure&const) builtins
+/* Return true when N is leaf function. Accept cheap builtins
in leaf functions. */
+
static bool
leaf_node_p (struct cgraph_node *n)
{
struct cgraph_edge *e;
for (e = n->callees; e; e = e->next_callee)
- if (!DECL_BUILT_IN (e->callee->decl)
- || (!TREE_READONLY (e->callee->decl)
- || DECL_PURE_P (e->callee->decl)))
+ if (!is_inexpensive_builtin (e->callee->decl))
return false;
return true;
}
if (!e->callee->local.disregard_inline_limits
&& (mode != INLINE_ALL || !e->callee->local.inlinable))
continue;
- if (e->call_stmt_cannot_inline_p)
- continue;
if (dump_file)
fprintf (dump_file,
"Considering to always inline inline candidate %s.\n",
fprintf (dump_file, "Not inlining: recursive call.\n");
continue;
}
- if (!tree_can_inline_p (e))
+ if (!tree_can_inline_p (e)
+ || e->call_stmt_cannot_inline_p)
{
if (dump_file)
fprintf (dump_file,
fprintf (dump_file, " Inlining %s into %s.\n",
cgraph_node_name (e->callee),
cgraph_node_name (e->caller));
- cgraph_mark_inline (e);
+ cgraph_mark_inline_edge (e, true, NULL);
inlined = true;
}
if (((mode == INLINE_SIZE || mode == INLINE_SIZE_NORECURSIVE)
|| (!flag_inline_functions
&& !DECL_DECLARED_INLINE_P (e->callee->decl)))
- && (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
+ && (cgraph_estimate_size_after_inlining (e->caller, e->callee)
> e->caller->global.size + allowed_growth)
&& cgraph_estimate_growth (e->callee) > allowed_growth)
{
if (dump_file)
fprintf (dump_file,
"Not inlining: code size would grow by %i.\n",
- cgraph_estimate_size_after_inlining (1, e->caller,
+ cgraph_estimate_size_after_inlining (e->caller,
e->callee)
- e->caller->global.size);
continue;
}
- if (!cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
- false)
- || e->call_stmt_cannot_inline_p)
+ if (e->call_stmt_cannot_inline_p
+ || !tree_can_inline_p (e))
{
if (dump_file)
- fprintf (dump_file, "Not inlining: %s.\n",
- cgraph_inline_failed_string (e->inline_failed));
+ fprintf (dump_file,
+ "Not inlining: call site not inlinable.\n");
continue;
}
if (!e->callee->analyzed)
"Not inlining: Function body no longer available.\n");
continue;
}
- if (!tree_can_inline_p (e))
+ if (!cgraph_check_inline_limits (node, e->callee, &e->inline_failed))
{
if (dump_file)
- fprintf (dump_file,
- "Not inlining: %s.",
+ fprintf (dump_file, "Not inlining: %s.\n",
cgraph_inline_failed_string (e->inline_failed));
continue;
}
fprintf (dump_file, " Inlining %s into %s.\n",
cgraph_node_name (e->callee),
cgraph_node_name (e->caller));
- cgraph_mark_inline (e);
+ cgraph_mark_inline_edge (e, true, NULL);
inlined = true;
}
}
}
};
-/* When inlining shall be performed. */
-static bool
-cgraph_gate_ipa_early_inlining (void)
-{
- return (flag_early_inlining
- && !in_lto_p
- && (flag_branch_probabilities || flag_test_coverage
- || profile_arc_flag));
-}
-
-/* IPA pass wrapper for early inlining pass. We need to run early inlining
- before tree profiling so we have stand alone IPA pass for doing so. */
-struct simple_ipa_opt_pass pass_ipa_early_inline =
-{
- {
- SIMPLE_IPA_PASS,
- "einline_ipa", /* name */
- cgraph_gate_ipa_early_inlining, /* gate */
- NULL, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_INLINE_HEURISTICS, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_dump_cgraph /* todo_flags_finish */
- }
-};
-/* See if statement might disappear after inlining. We are not terribly
- sophisficated, basically looking for simple abstraction penalty wrappers. */
+/* See if statement might disappear after inlining.
+ 0 - means not eliminated
+ 1 - half of statements goes away
+ 2 - for sure it is eliminated.
+ We are not terribly sophisficated, basically looking for simple abstraction
+ penalty wrappers. */
-static bool
-likely_eliminated_by_inlining_p (gimple stmt)
+static int
+eliminated_by_inlining_prob (gimple stmt)
{
enum gimple_code code = gimple_code (stmt);
switch (code)
{
case GIMPLE_RETURN:
- return true;
+ return 2;
case GIMPLE_ASSIGN:
if (gimple_num_ops (stmt) != 2)
- return false;
+ return 0;
/* Casts of parameters, loads from parameters passed by reference
- and stores to return value or parameters are probably free after
- inlining. */
+ and stores to return value or parameters are often free after
+ inlining dua to SRA and further combining.
+ Assume that half of statements goes away. */
if (gimple_assign_rhs_code (stmt) == CONVERT_EXPR
|| gimple_assign_rhs_code (stmt) == NOP_EXPR
|| gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR
&& (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
rhs_free = true;
if (lhs_free && rhs_free)
- return true;
+ return 1;
}
- return false;
+ return 0;
default:
- return false;
+ return 0;
}
}
{
gcov_type time = 0;
gcov_type time_inlining_benefit = 0;
- int size = 0;
- int size_inlining_benefit = 0;
+ /* Estimate static overhead for function prologue/epilogue and alignment. */
+ int size = 2;
+ /* Benefits are scaled by probability of elimination that is in range
+ <0,2>. */
+ int size_inlining_benefit = 2 * 2;
basic_block bb;
gimple_stmt_iterator bsi;
struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
gimple stmt = gsi_stmt (bsi);
int this_size = estimate_num_insns (stmt, &eni_size_weights);
int this_time = estimate_num_insns (stmt, &eni_time_weights);
+ int prob;
if (dump_file && (dump_flags & TDF_DETAILS))
{
this_time *= freq;
time += this_time;
size += this_size;
- if (likely_eliminated_by_inlining_p (stmt))
- {
- size_inlining_benefit += this_size;
- time_inlining_benefit += this_time;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, " Likely eliminated\n");
- }
+ prob = eliminated_by_inlining_prob (stmt);
+ if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " 50%% will be eliminated by inlining\n");
+ if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " will eliminated by inlining\n");
+ size_inlining_benefit += this_size * prob;
+ time_inlining_benefit += this_time * prob;
gcc_assert (time >= 0);
gcc_assert (size >= 0);
}
}
time = (time + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
- time_inlining_benefit = ((time_inlining_benefit + CGRAPH_FREQ_BASE / 2)
- / CGRAPH_FREQ_BASE);
+ time_inlining_benefit = ((time_inlining_benefit + CGRAPH_FREQ_BASE)
+ / (CGRAPH_FREQ_BASE * 2));
+ size_inlining_benefit = (size_inlining_benefit + 1) / 2;
if (dump_file)
fprintf (dump_file, "Overall function body time: %i-%i size: %i-%i\n",
(int)time, (int)time_inlining_benefit,
current_function_decl = node->decl;
compute_inline_parameters (node);
- if (flag_indirect_inlining)
+ /* FIXME: We should remove the optimize check after we ensure we never run
+ IPA passes when not optimizng. */
+ if (flag_indirect_inlining && optimize)
inline_indirect_intraprocedural_analysis (node);
current_function_decl = NULL;