1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectively and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also necessary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
125 #include "coretypes.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
132 #include "diagnostic.h"
137 #include "tree-pass.h"
139 #include "coverage.h"
141 #include "tree-flow.h"
144 /* Mode incremental inliner operate on:
146 In ALWAYS_INLINE only functions marked
147 always_inline are inlined. This mode is used after detecting cycle during
150 In SIZE mode, only functions that reduce function body size after inlining
151 are inlined, this is used during early inlining.
153 In SPEED mode, all small functions are inlined. This might result in
154 unbounded growth of compilation unit and is used only in non-unit-at-a-time
157 in ALL mode, everything is inlined. This is used during flattening. */
160 INLINE_ALWAYS_INLINE,
166 cgraph_decide_inlining_incrementally (struct cgraph_node *, enum inlining_mode,
170 /* Statistics we collect about inlining algorithm. */
171 static int ncalls_inlined;
172 static int nfunctions_inlined;
173 static int overall_insns;
174 static gcov_type max_count;
176 /* Estimate size of the function after inlining WHAT into TO. */
179 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
180 struct cgraph_node *what)
183 tree fndecl = what->decl, arg;
184 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
186 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
187 call_insns += estimate_move_cost (TREE_TYPE (arg));
188 size = (what->global.insns - call_insns) * times + to->global.insns;
189 gcc_assert (size >= 0);
193 /* E is expected to be an edge being inlined. Clone destination node of
194 the edge and redirect it to the new clone.
195 DUPLICATE is used for bookkeeping on whether we are actually creating new
196 clones or re-using node originally representing out-of-line function call.
199 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
204 /* We may eliminate the need for out-of-line copy to be output.
205 In that case just go ahead and re-use it. */
206 if (!e->callee->callers->next_caller
207 && !e->callee->needed
209 && flag_unit_at_a_time)
211 gcc_assert (!e->callee->global.inlined_to);
212 if (DECL_SAVED_TREE (e->callee->decl))
213 overall_insns -= e->callee->global.insns, nfunctions_inlined++;
218 struct cgraph_node *n;
219 n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
221 cgraph_redirect_edge_callee (e, n);
225 if (e->caller->global.inlined_to)
226 e->callee->global.inlined_to = e->caller->global.inlined_to;
228 e->callee->global.inlined_to = e->caller;
229 e->callee->global.stack_frame_offset
230 = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
231 peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
232 if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
233 e->callee->global.inlined_to->global.estimated_stack_size = peak;
235 /* Recursively clone all bodies. */
236 for (e = e->callee->callees; e; e = e->next_callee)
237 if (!e->inline_failed)
238 cgraph_clone_inlined_nodes (e, duplicate, update_original);
241 /* Mark edge E as inlined and update callgraph accordingly.
242 UPDATE_ORIGINAL specify whether profile of original function should be
246 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
248 int old_insns = 0, new_insns = 0;
249 struct cgraph_node *to = NULL, *what;
251 if (e->callee->inline_decl)
252 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
254 gcc_assert (e->inline_failed);
255 e->inline_failed = NULL;
257 if (!e->callee->global.inlined && flag_unit_at_a_time)
258 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
259 e->callee->global.inlined = true;
261 cgraph_clone_inlined_nodes (e, true, update_original);
265 /* Now update size of caller and all functions caller is inlined into. */
266 for (;e && !e->inline_failed; e = e->caller->callers)
268 old_insns = e->caller->global.insns;
269 new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
271 gcc_assert (new_insns >= 0);
273 to->global.insns = new_insns;
275 gcc_assert (what->global.inlined_to == to);
276 if (new_insns > old_insns)
277 overall_insns += new_insns - old_insns;
281 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
282 Return following unredirected edge in the list of callers
285 static struct cgraph_edge *
286 cgraph_mark_inline (struct cgraph_edge *edge)
288 struct cgraph_node *to = edge->caller;
289 struct cgraph_node *what = edge->callee;
290 struct cgraph_edge *e, *next;
292 gcc_assert (!CALL_CANNOT_INLINE_P (edge->call_stmt));
293 /* Look for all calls, mark them inline and clone recursively
294 all inlined functions. */
295 for (e = what->callers; e; e = next)
297 next = e->next_caller;
298 if (e->caller == to && e->inline_failed)
300 cgraph_mark_inline_edge (e, true);
309 /* Estimate the growth caused by inlining NODE into all callees. */
312 cgraph_estimate_growth (struct cgraph_node *node)
315 struct cgraph_edge *e;
316 if (node->global.estimated_growth != INT_MIN)
317 return node->global.estimated_growth;
319 for (e = node->callers; e; e = e->next_caller)
320 if (e->inline_failed)
321 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
322 - e->caller->global.insns);
324 /* ??? Wrong for self recursive functions or cases where we decide to not
325 inline for different reasons, but it is not big deal as in that case
326 we will keep the body around, but we will also avoid some inlining. */
327 if (!node->needed && !DECL_EXTERNAL (node->decl))
328 growth -= node->global.insns;
330 node->global.estimated_growth = growth;
334 /* Return false when inlining WHAT into TO is not good idea
335 as it would cause too large growth of function bodies.
336 When ONE_ONLY is true, assume that only one call site is going
337 to be inlined, otherwise figure out how many call sites in
338 TO calls WHAT and verify that all can be inlined.
342 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
343 const char **reason, bool one_only)
346 struct cgraph_edge *e;
349 HOST_WIDE_INT stack_size_limit, inlined_stack;
354 for (e = to->callees; e; e = e->next_callee)
355 if (e->callee == what)
358 if (to->global.inlined_to)
359 to = to->global.inlined_to;
361 /* When inlining large function body called once into small function,
362 take the inlined function as base for limiting the growth. */
363 if (to->local.self_insns > what->local.self_insns)
364 limit = to->local.self_insns;
366 limit = what->local.self_insns;
368 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
370 /* Check the size after inlining against the function limits. But allow
371 the function to shrink if it went over the limits by forced inlining. */
372 newsize = cgraph_estimate_size_after_inlining (times, to, what);
373 if (newsize >= to->global.insns
374 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
378 *reason = N_("--param large-function-growth limit reached");
382 stack_size_limit = to->local.estimated_self_stack_size;
384 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
386 inlined_stack = (to->global.stack_frame_offset
387 + to->local.estimated_self_stack_size
388 + what->global.estimated_stack_size);
389 if (inlined_stack > stack_size_limit
390 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
393 *reason = N_("--param large-stack-frame-growth limit reached");
399 /* Return true when function N is small enough to be inlined. */
402 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
407 decl = n->inline_decl;
408 if (!DECL_INLINE (decl))
411 *reason = N_("function not inlinable");
415 if (!DECL_STRUCT_FUNCTION (decl)->cfg)
418 *reason = N_("function body not available");
422 if (DECL_DECLARED_INLINE_P (decl))
424 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
427 *reason = N_("--param max-inline-insns-single limit reached");
433 if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
436 *reason = N_("--param max-inline-insns-auto limit reached");
444 /* Return true when inlining WHAT would create recursive inlining.
445 We call recursive inlining all cases where same function appears more than
446 once in the single recursion nest path in the inline graph. */
449 cgraph_recursive_inlining_p (struct cgraph_node *to,
450 struct cgraph_node *what,
454 if (to->global.inlined_to)
455 recursive = what->decl == to->global.inlined_to->decl;
457 recursive = what->decl == to->decl;
458 /* Marking recursive function inline has sane semantic and thus we should
460 if (recursive && reason)
461 *reason = (what->local.disregard_inline_limits
462 ? N_("recursive inlining") : "");
466 /* Return true if the call can be hot. */
468 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
470 if (profile_info && flag_branch_probabilities
472 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
474 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl))
475 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl)))
477 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)))
479 if (flag_guess_branch_prob
480 && edge->frequency < (CGRAPH_FREQ_MAX
481 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
486 /* A cost model driving the inlining heuristics in a way so the edges with
487 smallest badness are inlined first. After each inlining is performed
488 the costs of all caller edges of nodes affected are recomputed so the
489 metrics may accurately depend on values such as number of inlinable callers
490 of the function or function body size. */
493 cgraph_edge_badness (struct cgraph_edge *edge)
497 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
499 growth -= edge->caller->global.insns;
501 /* Always prefer inlining saving code size. */
503 badness = INT_MIN - growth;
505 /* When profiling is available, base priorities -(#calls / growth).
506 So we optimize for overall number of "executed" inlined calls. */
508 badness = ((int)((double)edge->count * INT_MIN / max_count)) / growth;
510 /* When function local profile is available, base priorities on
511 growth / frequency, so we optimize for overall frequency of inlined
512 calls. This is not too accurate since while the call might be frequent
513 within function, the function itself is infrequent.
515 Other objective to optimize for is number of different calls inlined.
516 We add the estimated growth after inlining all functions to biass the
517 priorities slightly in this direction (so fewer times called functions
518 of the same size gets priority). */
519 else if (flag_guess_branch_prob)
521 int div = edge->frequency * 100 / CGRAPH_FREQ_BASE;
523 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
524 growth -= edge->caller->global.insns;
525 badness = growth * 256;
527 /* Decrease badness if call is nested. */
528 /* Compress the range so we don't overflow. */
530 div = 256 + ceil_log2 (div) - 8;
535 badness += cgraph_estimate_growth (edge->callee);
537 /* When function local profile is not available or it does not give
538 useful information (ie frequency is zero), base the cost on
539 loop nest and overall size growth, so we optimize for overall number
540 of functions fully inlined in program. */
543 int nest = MIN (edge->loop_nest, 8);
544 badness = cgraph_estimate_growth (edge->callee) * 256;
546 /* Decrease badness if call is nested. */
554 /* Make recursive inlining happen always after other inlining is done. */
555 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
561 /* Recompute heap nodes for each of caller edge. */
564 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
565 bitmap updated_nodes)
567 struct cgraph_edge *edge;
568 const char *failed_reason;
570 if (!node->local.inlinable || node->local.disregard_inline_limits
571 || node->global.inlined_to)
573 if (bitmap_bit_p (updated_nodes, node->uid))
575 bitmap_set_bit (updated_nodes, node->uid);
576 node->global.estimated_growth = INT_MIN;
578 if (!node->local.inlinable)
580 /* Prune out edges we won't inline into anymore. */
581 if (!cgraph_default_inline_p (node, &failed_reason))
583 for (edge = node->callers; edge; edge = edge->next_caller)
586 fibheap_delete_node (heap, edge->aux);
588 if (edge->inline_failed)
589 edge->inline_failed = failed_reason;
594 for (edge = node->callers; edge; edge = edge->next_caller)
595 if (edge->inline_failed)
597 int badness = cgraph_edge_badness (edge);
600 fibnode_t n = edge->aux;
601 gcc_assert (n->data == edge);
602 if (n->key == badness)
605 /* fibheap_replace_key only increase the keys. */
606 if (fibheap_replace_key (heap, n, badness))
608 fibheap_delete_node (heap, edge->aux);
610 edge->aux = fibheap_insert (heap, badness, edge);
614 /* Recompute heap nodes for each of caller edges of each of callees. */
617 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
618 bitmap updated_nodes)
620 struct cgraph_edge *e;
621 node->global.estimated_growth = INT_MIN;
623 for (e = node->callees; e; e = e->next_callee)
624 if (e->inline_failed)
625 update_caller_keys (heap, e->callee, updated_nodes);
626 else if (!e->inline_failed)
627 update_callee_keys (heap, e->callee, updated_nodes);
630 /* Enqueue all recursive calls from NODE into priority queue depending on
631 how likely we want to recursively inline the call. */
634 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
638 struct cgraph_edge *e;
639 for (e = where->callees; e; e = e->next_callee)
640 if (e->callee == node)
642 /* When profile feedback is available, prioritize by expected number
643 of calls. Without profile feedback we maintain simple queue
644 to order candidates via recursive depths. */
645 fibheap_insert (heap,
646 !max_count ? priority++
647 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
650 for (e = where->callees; e; e = e->next_callee)
651 if (!e->inline_failed)
652 lookup_recursive_calls (node, e->callee, heap);
655 /* Decide on recursive inlining: in the case function has recursive calls,
656 inline until body size reaches given argument. */
659 cgraph_decide_recursive_inlining (struct cgraph_node *node)
661 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
662 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
663 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
665 struct cgraph_edge *e;
666 struct cgraph_node *master_clone, *next;
673 if (DECL_DECLARED_INLINE_P (node->decl))
675 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
676 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
679 /* Make sure that function is small enough to be considered for inlining. */
681 || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
683 heap = fibheap_new ();
684 lookup_recursive_calls (node, node, heap);
685 if (fibheap_empty (heap))
687 fibheap_delete (heap);
693 " Performing recursive inlining on %s\n",
694 cgraph_node_name (node));
696 /* We need original clone to copy around. */
697 master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1, false);
698 master_clone->needed = true;
699 for (e = master_clone->callees; e; e = e->next_callee)
700 if (!e->inline_failed)
701 cgraph_clone_inlined_nodes (e, true, false);
703 /* Do the inlining and update list of recursive call during process. */
704 while (!fibheap_empty (heap)
705 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
708 struct cgraph_edge *curr = fibheap_extract_min (heap);
709 struct cgraph_node *cnode;
712 for (cnode = curr->caller;
713 cnode->global.inlined_to; cnode = cnode->callers->caller)
714 if (node->decl == curr->callee->decl)
716 if (depth > max_depth)
720 " maximal depth reached\n");
726 if (!cgraph_maybe_hot_edge_p (curr))
729 fprintf (dump_file, " Not inlining cold call\n");
732 if (curr->count * 100 / node->count < probability)
736 " Probability of edge is too small\n");
744 " Inlining call of depth %i", depth);
747 fprintf (dump_file, " called approx. %.2f times per call",
748 (double)curr->count / node->count);
750 fprintf (dump_file, "\n");
752 cgraph_redirect_edge_callee (curr, master_clone);
753 cgraph_mark_inline_edge (curr, false);
754 lookup_recursive_calls (node, curr->callee, heap);
757 if (!fibheap_empty (heap) && dump_file)
758 fprintf (dump_file, " Recursive inlining growth limit met.\n");
760 fibheap_delete (heap);
763 "\n Inlined %i times, body grown from %i to %i insns\n", n,
764 master_clone->global.insns, node->global.insns);
766 /* Remove master clone we used for inlining. We rely that clones inlined
767 into master clone gets queued just before master clone so we don't
769 for (node = cgraph_nodes; node != master_clone;
773 if (node->global.inlined_to == master_clone)
774 cgraph_remove_node (node);
776 cgraph_remove_node (master_clone);
777 /* FIXME: Recursive inlining actually reduces number of calls of the
778 function. At this place we should probably walk the function and
779 inline clones and compensate the counts accordingly. This probably
780 doesn't matter much in practice. */
784 /* Set inline_failed for all callers of given function to REASON. */
787 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
789 struct cgraph_edge *e;
792 fprintf (dump_file, "Inlining failed: %s\n", reason);
793 for (e = node->callers; e; e = e->next_caller)
794 if (e->inline_failed)
795 e->inline_failed = reason;
798 /* Given whole compilation unit estimate of INSNS, compute how large we can
799 allow the unit to grow. */
801 compute_max_insns (int insns)
803 int max_insns = insns;
804 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
805 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
807 return ((HOST_WIDEST_INT) max_insns
808 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
811 /* We use greedy algorithm for inlining of small functions:
812 All inline candidates are put into prioritized heap based on estimated
813 growth of the overall number of instructions and then update the estimates.
815 INLINED and INLINED_CALEES are just pointers to arrays large enough
816 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
819 cgraph_decide_inlining_of_small_functions (void)
821 struct cgraph_node *node;
822 struct cgraph_edge *edge;
823 const char *failed_reason;
824 fibheap_t heap = fibheap_new ();
825 bitmap updated_nodes = BITMAP_ALLOC (NULL);
826 int min_insns, max_insns;
829 fprintf (dump_file, "\nDeciding on smaller functions:\n");
831 /* Put all inline candidates into the heap. */
833 for (node = cgraph_nodes; node; node = node->next)
835 if (!node->local.inlinable || !node->callers
836 || node->local.disregard_inline_limits)
839 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
841 node->global.estimated_growth = INT_MIN;
842 if (!cgraph_default_inline_p (node, &failed_reason))
844 cgraph_set_inline_failed (node, failed_reason);
848 for (edge = node->callers; edge; edge = edge->next_caller)
849 if (edge->inline_failed)
851 gcc_assert (!edge->aux);
852 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
856 max_insns = compute_max_insns (overall_insns);
857 min_insns = overall_insns;
859 while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
861 int old_insns = overall_insns;
862 struct cgraph_node *where;
864 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
866 growth -= edge->caller->global.insns;
871 "\nConsidering %s with %i insns\n",
872 cgraph_node_name (edge->callee),
873 edge->callee->global.insns);
875 " to be inlined into %s\n"
876 " Estimated growth after inlined into all callees is %+i insns.\n"
877 " Estimated badness is %i, frequency %.2f.\n",
878 cgraph_node_name (edge->caller),
879 cgraph_estimate_growth (edge->callee),
880 cgraph_edge_badness (edge),
881 edge->frequency / (double)CGRAPH_FREQ_BASE);
883 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
885 gcc_assert (edge->aux);
887 if (!edge->inline_failed)
890 /* When not having profile info ready we don't weight by any way the
891 position of call in procedure itself. This means if call of
892 function A from function B seems profitable to inline, the recursive
893 call of function A in inline copy of A in B will look profitable too
894 and we end up inlining until reaching maximal function growth. This
895 is not good idea so prohibit the recursive inlining.
897 ??? When the frequencies are taken into account we might not need this
901 where = edge->caller;
902 while (where->global.inlined_to)
904 if (where->decl == edge->callee->decl)
906 where = where->callers->caller;
908 if (where->global.inlined_to)
911 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
913 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
918 if ((!cgraph_maybe_hot_edge_p (edge) || optimize_size) && growth > 0)
920 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
921 &edge->inline_failed))
923 edge->inline_failed =
924 N_("call is unlikely");
926 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
930 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
932 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
933 &edge->inline_failed))
936 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
940 if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
941 &edge->inline_failed))
943 where = edge->caller;
944 if (where->global.inlined_to)
945 where = where->global.inlined_to;
946 if (!cgraph_decide_recursive_inlining (where))
948 update_callee_keys (heap, where, updated_nodes);
952 struct cgraph_node *callee;
953 if (CALL_CANNOT_INLINE_P (edge->call_stmt)
954 || !cgraph_check_inline_limits (edge->caller, edge->callee,
955 &edge->inline_failed, true))
958 fprintf (dump_file, " Not inlining into %s:%s.\n",
959 cgraph_node_name (edge->caller), edge->inline_failed);
962 callee = edge->callee;
963 cgraph_mark_inline_edge (edge, true);
964 update_callee_keys (heap, callee, updated_nodes);
966 where = edge->caller;
967 if (where->global.inlined_to)
968 where = where->global.inlined_to;
970 /* Our profitability metric can depend on local properties
971 such as number of inlinable calls and size of the function body.
972 After inlining these properties might change for the function we
973 inlined into (since it's body size changed) and for the functions
974 called by function we inlined (since number of it inlinable callers
976 update_caller_keys (heap, where, updated_nodes);
977 bitmap_clear (updated_nodes);
982 " Inlined into %s which now has %i insns,"
983 "net change of %+i insns.\n",
984 cgraph_node_name (edge->caller),
985 edge->caller->global.insns,
986 overall_insns - old_insns);
988 if (min_insns > overall_insns)
990 min_insns = overall_insns;
991 max_insns = compute_max_insns (min_insns);
994 fprintf (dump_file, "New minimal insns reached: %i\n", min_insns);
997 while ((edge = fibheap_extract_min (heap)) != NULL)
999 gcc_assert (edge->aux);
1001 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
1002 && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
1003 &edge->inline_failed))
1004 edge->inline_failed = N_("--param inline-unit-growth limit reached");
1006 fibheap_delete (heap);
1007 BITMAP_FREE (updated_nodes);
1010 /* Decide on the inlining. We do so in the topological order to avoid
1011 expenses on updating data structures. */
1014 cgraph_decide_inlining (void)
1016 struct cgraph_node *node;
1018 struct cgraph_node **order =
1019 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1022 int initial_insns = 0;
1025 for (node = cgraph_nodes; node; node = node->next)
1026 if (node->analyzed && (node->needed || node->reachable))
1028 struct cgraph_edge *e;
1030 initial_insns += node->local.self_insns;
1031 gcc_assert (node->local.self_insns == node->global.insns);
1032 for (e = node->callees; e; e = e->next_callee)
1033 if (max_count < e->count)
1034 max_count = e->count;
1036 overall_insns = initial_insns;
1037 gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
1039 nnodes = cgraph_postorder (order);
1043 "\nDeciding on inlining. Starting with %i insns.\n",
1046 for (node = cgraph_nodes; node; node = node->next)
1050 fprintf (dump_file, "\nInlining always_inline functions:\n");
1052 /* In the first pass mark all always_inline edges. Do this with a priority
1053 so none of our later choices will make this impossible. */
1054 for (i = nnodes - 1; i >= 0; i--)
1056 struct cgraph_edge *e, *next;
1060 /* Handle nodes to be flattened, but don't update overall unit size. */
1061 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1065 "Flattening %s\n", cgraph_node_name (node));
1066 cgraph_decide_inlining_incrementally (node, INLINE_ALL, 0);
1069 if (!node->local.disregard_inline_limits)
1073 "\nConsidering %s %i insns (always inline)\n",
1074 cgraph_node_name (node), node->global.insns);
1075 old_insns = overall_insns;
1076 for (e = node->callers; e; e = next)
1078 next = e->next_caller;
1079 if (!e->inline_failed || CALL_CANNOT_INLINE_P (e->call_stmt))
1081 if (cgraph_recursive_inlining_p (e->caller, e->callee,
1084 cgraph_mark_inline_edge (e, true);
1087 " Inlined into %s which now has %i insns.\n",
1088 cgraph_node_name (e->caller),
1089 e->caller->global.insns);
1091 /* Inlining self recursive function might introduce new calls to
1092 themselves we didn't see in the loop above. Fill in the proper
1093 reason why inline failed. */
1094 for (e = node->callers; e; e = e->next_caller)
1095 if (e->inline_failed)
1096 e->inline_failed = N_("recursive inlining");
1099 " Inlined for a net change of %+i insns.\n",
1100 overall_insns - old_insns);
1103 if (!flag_really_no_inline)
1104 cgraph_decide_inlining_of_small_functions ();
1106 if (!flag_really_no_inline
1107 && flag_inline_functions_called_once)
1110 fprintf (dump_file, "\nDeciding on functions called once:\n");
1112 /* And finally decide what functions are called once. */
1114 for (i = nnodes - 1; i >= 0; i--)
1118 if (node->callers && !node->callers->next_caller && !node->needed
1119 && node->local.inlinable && node->callers->inline_failed
1120 && !CALL_CANNOT_INLINE_P (node->callers->call_stmt)
1121 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1126 "\nConsidering %s %i insns.\n",
1127 cgraph_node_name (node), node->global.insns);
1129 " Called once from %s %i insns.\n",
1130 cgraph_node_name (node->callers->caller),
1131 node->callers->caller->global.insns);
1134 old_insns = overall_insns;
1136 if (cgraph_check_inline_limits (node->callers->caller, node,
1139 cgraph_mark_inline (node->callers);
1142 " Inlined into %s which now has %i insns"
1143 " for a net change of %+i insns.\n",
1144 cgraph_node_name (node->callers->caller),
1145 node->callers->caller->global.insns,
1146 overall_insns - old_insns);
1152 " Inline limit reached, not inlined.\n");
1160 "\nInlined %i calls, eliminated %i functions, "
1161 "%i insns turned to %i insns.\n\n",
1162 ncalls_inlined, nfunctions_inlined, initial_insns,
1168 /* Try to inline edge E from incremental inliner. MODE specifies mode
1171 We are detecting cycles by storing mode of inliner into cgraph_node last
1172 time we visited it in the recursion. In general when mode is set, we have
1173 recursive inlining, but as an special case, we want to try harder inline
1174 ALWAYS_INLINE functions: consider callgraph a->b->c->b, with a being
1175 flatten, b being always inline. Flattening 'a' will collapse
1176 a->b->c before hitting cycle. To accommodate always inline, we however
1177 need to inline a->b->c->b.
1179 So after hitting cycle first time, we switch into ALWAYS_INLINE mode and
1180 stop inlining only after hitting ALWAYS_INLINE in ALWAY_INLINE mode. */
1182 try_inline (struct cgraph_edge *e, enum inlining_mode mode, int depth)
1184 struct cgraph_node *callee = e->callee;
1185 enum inlining_mode callee_mode = (size_t) callee->aux;
1186 bool always_inline = e->callee->local.disregard_inline_limits;
1188 /* We've hit cycle? */
1191 /* It is first time we see it and we are not in ALWAY_INLINE only
1192 mode yet. and the function in question is always_inline. */
1193 if (always_inline && mode != INLINE_ALWAYS_INLINE)
1197 indent_to (dump_file, depth);
1199 "Hit cycle in %s, switching to always inline only.\n",
1200 cgraph_node_name (callee));
1202 mode = INLINE_ALWAYS_INLINE;
1204 /* Otherwise it is time to give up. */
1209 indent_to (dump_file, depth);
1211 "Not inlining %s into %s to avoid cycle.\n",
1212 cgraph_node_name (callee),
1213 cgraph_node_name (e->caller));
1215 e->inline_failed = (e->callee->local.disregard_inline_limits
1216 ? N_("recursive inlining") : "");
1221 callee->aux = (void *)(size_t) mode;
1224 indent_to (dump_file, depth);
1225 fprintf (dump_file, " Inlining %s into %s.\n",
1226 cgraph_node_name (e->callee),
1227 cgraph_node_name (e->caller));
1229 if (e->inline_failed)
1230 cgraph_mark_inline (e);
1232 /* In order to fully inline always_inline functions at -O0, we need to
1233 recurse here, since the inlined functions might not be processed by
1234 incremental inlining at all yet.
1236 Also flattening needs to be done recursively. */
1238 if (!flag_unit_at_a_time || mode == INLINE_ALL || always_inline)
1239 cgraph_decide_inlining_incrementally (e->callee, mode, depth + 1);
1240 callee->aux = (void *)(size_t) callee_mode;
1244 /* Decide on the inlining. We do so in the topological order to avoid
1245 expenses on updating data structures.
1246 DEPTH is depth of recursion, used only for debug output. */
1249 cgraph_decide_inlining_incrementally (struct cgraph_node *node,
1250 enum inlining_mode mode,
1253 struct cgraph_edge *e;
1254 bool inlined = false;
1255 const char *failed_reason;
1256 enum inlining_mode old_mode;
1258 #ifdef ENABLE_CHECKING
1259 verify_cgraph_node (node);
1262 old_mode = (size_t)node->aux;
1264 if (mode != INLINE_ALWAYS_INLINE
1265 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1269 indent_to (dump_file, depth);
1270 fprintf (dump_file, "Flattening %s\n", cgraph_node_name (node));
1275 node->aux = (void *)(size_t) mode;
1277 /* First of all look for always inline functions. */
1278 for (e = node->callees; e; e = e->next_callee)
1280 if (!e->callee->local.disregard_inline_limits
1281 && (mode != INLINE_ALL || !e->callee->local.inlinable))
1283 if (CALL_CANNOT_INLINE_P (e->call_stmt))
1285 /* When the edge is already inlined, we just need to recurse into
1286 it in order to fully flatten the leaves. */
1287 if (!e->inline_failed && mode == INLINE_ALL)
1289 inlined |= try_inline (e, mode, depth);
1294 indent_to (dump_file, depth);
1296 "Considering to always inline inline candidate %s.\n",
1297 cgraph_node_name (e->callee));
1299 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed))
1303 indent_to (dump_file, depth);
1304 fprintf (dump_file, "Not inlining: recursive call.\n");
1308 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1309 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1313 indent_to (dump_file, depth);
1314 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1318 if (!DECL_SAVED_TREE (e->callee->decl) && !e->callee->inline_decl)
1322 indent_to (dump_file, depth);
1324 "Not inlining: Function body no longer available.\n");
1328 inlined |= try_inline (e, mode, depth);
1331 /* Now do the automatic inlining. */
1332 if (!flag_really_no_inline && mode != INLINE_ALL
1333 && mode != INLINE_ALWAYS_INLINE)
1334 for (e = node->callees; e; e = e->next_callee)
1336 if (!e->callee->local.inlinable
1337 || !e->inline_failed
1338 || e->callee->local.disregard_inline_limits)
1341 fprintf (dump_file, "Considering inline candidate %s.\n",
1342 cgraph_node_name (e->callee));
1343 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed))
1347 indent_to (dump_file, depth);
1348 fprintf (dump_file, "Not inlining: recursive call.\n");
1352 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1353 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1357 indent_to (dump_file, depth);
1358 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1362 /* When the function body would grow and inlining the function won't
1363 eliminate the need for offline copy of the function, don't inline.
1365 if (mode == INLINE_SIZE
1366 && (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1367 > e->caller->global.insns)
1368 && cgraph_estimate_growth (e->callee) > 0)
1372 indent_to (dump_file, depth);
1374 "Not inlining: code size would grow by %i insns.\n",
1375 cgraph_estimate_size_after_inlining (1, e->caller,
1377 - e->caller->global.insns);
1381 if (!cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1383 || CALL_CANNOT_INLINE_P (e->call_stmt))
1387 indent_to (dump_file, depth);
1388 fprintf (dump_file, "Not inlining: %s.\n", e->inline_failed);
1392 if (!DECL_SAVED_TREE (e->callee->decl) && !e->callee->inline_decl)
1396 indent_to (dump_file, depth);
1398 "Not inlining: Function body no longer available.\n");
1402 if (cgraph_default_inline_p (e->callee, &failed_reason))
1403 inlined |= try_inline (e, mode, depth);
1404 else if (!flag_unit_at_a_time)
1405 e->inline_failed = failed_reason;
1407 node->aux = (void *)(size_t) old_mode;
1411 /* When inlining shall be performed. */
1413 cgraph_gate_inlining (void)
1415 return flag_inline_trees;
1418 struct tree_opt_pass pass_ipa_inline =
1420 "inline", /* name */
1421 cgraph_gate_inlining, /* gate */
1422 cgraph_decide_inlining, /* execute */
1425 0, /* static_pass_number */
1426 TV_INLINE_HEURISTICS, /* tv_id */
1427 0, /* properties_required */
1428 PROP_cfg, /* properties_provided */
1429 0, /* properties_destroyed */
1430 TODO_remove_functions, /* todo_flags_finish */
1431 TODO_dump_cgraph | TODO_dump_func
1432 | TODO_remove_functions, /* todo_flags_finish */
1436 /* Because inlining might remove no-longer reachable nodes, we need to
1437 keep the array visible to garbage collector to avoid reading collected
1440 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1442 /* Do inlining of small functions. Doing so early helps profiling and other
1443 passes to be somewhat more effective and avoids some code duplication in
1444 later real inlining pass for testcases with very many function calls. */
1446 cgraph_early_inlining (void)
1448 struct cgraph_node *node = cgraph_node (current_function_decl);
1449 unsigned int todo = 0;
1451 if (sorrycount || errorcount)
1453 if (cgraph_decide_inlining_incrementally (node,
1454 flag_unit_at_a_time || optimize_size
1455 ? INLINE_SIZE : INLINE_SPEED, 0))
1457 timevar_push (TV_INTEGRATION);
1458 todo = optimize_inline_calls (current_function_decl);
1459 timevar_pop (TV_INTEGRATION);
1464 /* When inlining shall be performed. */
1466 cgraph_gate_early_inlining (void)
1468 return flag_inline_trees && flag_early_inlining;
1471 struct tree_opt_pass pass_early_inline =
1473 "einline", /* name */
1474 cgraph_gate_early_inlining, /* gate */
1475 cgraph_early_inlining, /* execute */
1478 0, /* static_pass_number */
1479 TV_INLINE_HEURISTICS, /* tv_id */
1480 0, /* properties_required */
1481 PROP_cfg, /* properties_provided */
1482 0, /* properties_destroyed */
1483 0, /* todo_flags_start */
1484 TODO_dump_func, /* todo_flags_finish */
1488 /* When inlining shall be performed. */
1490 cgraph_gate_ipa_early_inlining (void)
1492 return (flag_inline_trees && flag_early_inlining
1493 && (flag_branch_probabilities || flag_test_coverage
1494 || profile_arc_flag));
1497 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1498 before tree profiling so we have stand alone IPA pass for doing so. */
1499 struct tree_opt_pass pass_ipa_early_inline =
1501 "einline_ipa", /* name */
1502 cgraph_gate_ipa_early_inlining, /* gate */
1506 0, /* static_pass_number */
1507 TV_INLINE_HEURISTICS, /* tv_id */
1508 0, /* properties_required */
1509 PROP_cfg, /* properties_provided */
1510 0, /* properties_destroyed */
1511 0, /* todo_flags_start */
1512 TODO_dump_cgraph, /* todo_flags_finish */
1516 /* Compute parameters of functions used by inliner. */
1518 compute_inline_parameters (void)
1520 struct cgraph_node *node = cgraph_node (current_function_decl);
1522 gcc_assert (!node->global.inlined_to);
1523 node->local.estimated_self_stack_size = estimated_stack_frame_size ();
1524 node->global.estimated_stack_size = node->local.estimated_self_stack_size;
1525 node->global.stack_frame_offset = 0;
1526 node->local.inlinable = tree_inlinable_function_p (current_function_decl);
1527 node->local.self_insns = estimate_num_insns (current_function_decl,
1528 &eni_inlining_weights);
1529 if (node->local.inlinable)
1530 node->local.disregard_inline_limits
1531 = lang_hooks.tree_inlining.disregard_inline_limits (current_function_decl);
1532 if (flag_really_no_inline && !node->local.disregard_inline_limits)
1533 node->local.inlinable = 0;
1534 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1535 node->global.insns = node->local.self_insns;
1539 /* When inlining shall be performed. */
1541 gate_inline_passes (void)
1543 return flag_inline_trees;
1546 struct tree_opt_pass pass_inline_parameters =
1549 gate_inline_passes, /* gate */
1550 compute_inline_parameters, /* execute */
1553 0, /* static_pass_number */
1554 TV_INLINE_HEURISTICS, /* tv_id */
1555 0, /* properties_required */
1556 PROP_cfg, /* properties_provided */
1557 0, /* properties_destroyed */
1558 0, /* todo_flags_start */
1559 0, /* todo_flags_finish */
1563 /* Apply inline plan to the function. */
1567 unsigned int todo = 0;
1568 struct cgraph_edge *e;
1569 struct cgraph_node *node = cgraph_node (current_function_decl);
1571 /* Even when not optimizing, ensure that always_inline functions get inlined.
1574 cgraph_decide_inlining_incrementally (node, INLINE_SPEED, 0);
1576 /* We might need the body of this function so that we can expand
1577 it inline somewhere else. */
1578 if (cgraph_preserve_function_body_p (current_function_decl))
1579 save_inline_function_body (node);
1581 for (e = node->callees; e; e = e->next_callee)
1582 if (!e->inline_failed || warn_inline)
1586 timevar_push (TV_INTEGRATION);
1587 todo = optimize_inline_calls (current_function_decl);
1588 timevar_pop (TV_INTEGRATION);
1590 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1591 if (!flag_unit_at_a_time)
1593 struct cgraph_edge *e;
1594 for (e = node->callees; e; e = e->next_callee)
1595 if (e->callee->analyzed)
1596 cgraph_mark_needed_node (e->callee);
1598 return todo | execute_fixup_cfg ();
1601 struct tree_opt_pass pass_apply_inline =
1603 "apply_inline", /* name */
1605 apply_inline, /* execute */
1608 0, /* static_pass_number */
1609 TV_INLINE_HEURISTICS, /* tv_id */
1610 0, /* properties_required */
1611 PROP_cfg, /* properties_provided */
1612 0, /* properties_destroyed */
1613 0, /* todo_flags_start */
1614 TODO_dump_func | TODO_verify_flow
1615 | TODO_verify_stmts, /* todo_flags_finish */
1619 #include "gt-ipa-inline.h"