1 /* Inlining decision heuristics.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* Inlining decision heuristics
24 We separate inlining decisions from the inliner itself and store it
25 inside callgraph as so called inline plan. Refer to cgraph.c
26 documentation about particular representation of inline plans in the
29 There are three major parts of this file:
31 cgraph_mark_inline implementation
33 This function allows to mark given call inline and performs necessary
34 modifications of cgraph (production of the clones and updating overall
37 inlining heuristics limits
39 These functions allow to check that particular inlining is allowed
40 by the limits specified by user (allowed function growth, overall unit
45 This is implementation of IPA pass aiming to get as much of benefit
46 from inlining obeying the limits checked above.
48 The implementation of particular heuristics is separated from
49 the rest of code to make it easier to replace it with more complicated
50 implementation in the future. The rest of inlining code acts as a
51 library aimed to modify the callgraph and verify that the parameters
52 on code size growth fits.
54 To mark given call inline, use cgraph_mark_inline function, the
55 verification is performed by cgraph_default_inline_p and
56 cgraph_check_inline_limits.
58 The heuristics implements simple knapsack style algorithm ordering
59 all functions by their "profitability" (estimated by code size growth)
60 and inlining them in priority order.
62 cgraph_decide_inlining implements heuristics taking whole callgraph
63 into account, while cgraph_decide_inlining_incrementally considers
64 only one function at a time and is used in non-unit-at-a-time mode.
66 The inliner itself is split into several passes:
68 pass_inline_parameters
70 This pass computes local properties of functions that are used by inliner:
71 estimated function body size, whether function is inlinable at all and
72 stack frame consumption.
74 Before executing any of inliner passes, this local pass has to be applied
75 to each function in the callgraph (ie run as subpass of some earlier
76 IPA pass). The results are made out of date by any optimization applied
81 Simple local inlining pass inlining callees into current function. This
82 pass makes no global whole compilation unit analysis and this when allowed
83 to do inlining expanding code size it might result in unbounded growth of
86 This is the main inlining pass in non-unit-at-a-time.
88 With unit-at-a-time the pass is run during conversion into SSA form.
89 Only functions already converted into SSA form are inlined, so the
90 conversion must happen in topological order on the callgraph (that is
91 maintained by pass manager). The functions after inlining are early
92 optimized so the early inliner sees unoptimized function itself, but
93 all considered callees are already optimized allowing it to unfold
94 abstraction penalty on C++ effectively and cheaply.
96 pass_ipa_early_inlining
98 With profiling, the early inlining is also necessary to reduce
99 instrumentation costs on program with high abstraction penalty (doing
100 many redundant calls). This can't happen in parallel with early
101 optimization and profile instrumentation, because we would end up
102 re-instrumenting already instrumented function bodies we brought in via
105 To avoid this, this pass is executed as IPA pass before profiling. It is
106 simple wrapper to pass_early_inlining and ensures first inlining.
110 This is the main pass implementing simple greedy algorithm to do inlining
111 of small functions that results in overall growth of compilation unit and
112 inlining of functions called once. The pass compute just so called inline
113 plan (representation of inlining to be done in callgraph) and unlike early
114 inlining it is not performing the inlining itself.
118 This pass performs actual inlining according to pass_ipa_inline on given
119 function. Possible the function body before inlining is saved when it is
120 needed for further inlining later.
125 #include "coretypes.h"
128 #include "tree-inline.h"
129 #include "langhooks.h"
132 #include "diagnostic.h"
137 #include "tree-pass.h"
139 #include "coverage.h"
141 #include "tree-flow.h"
144 /* Mode incremental inliner operate on:
146 In ALWAYS_INLINE only functions marked
147 always_inline are inlined. This mode is used after detecting cycle during
150 In SIZE mode, only functions that reduce function body size after inlining
151 are inlined, this is used during early inlining.
153 In SPEED mode, all small functions are inlined. This might result in
154 unbounded growth of compilation unit and is used only in non-unit-at-a-time
157 in ALL mode, everything is inlined. This is used during flattening. */
160 INLINE_ALWAYS_INLINE,
166 cgraph_decide_inlining_incrementally (struct cgraph_node *, enum inlining_mode,
170 /* Statistics we collect about inlining algorithm. */
171 static int ncalls_inlined;
172 static int nfunctions_inlined;
173 static int overall_insns;
174 static gcov_type max_count;
176 /* Estimate size of the function after inlining WHAT into TO. */
179 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
180 struct cgraph_node *what)
183 tree fndecl = what->decl, arg;
184 int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
186 for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
187 call_insns += estimate_move_cost (TREE_TYPE (arg));
188 size = (what->global.insns - call_insns) * times + to->global.insns;
189 gcc_assert (size >= 0);
193 /* E is expected to be an edge being inlined. Clone destination node of
194 the edge and redirect it to the new clone.
195 DUPLICATE is used for bookkeeping on whether we are actually creating new
196 clones or re-using node originally representing out-of-line function call.
199 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
204 /* We may eliminate the need for out-of-line copy to be output.
205 In that case just go ahead and re-use it. */
206 if (!e->callee->callers->next_caller
207 && !e->callee->needed
209 && flag_unit_at_a_time)
211 gcc_assert (!e->callee->global.inlined_to);
212 if (DECL_SAVED_TREE (e->callee->decl))
213 overall_insns -= e->callee->global.insns, nfunctions_inlined++;
218 struct cgraph_node *n;
219 n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
221 cgraph_redirect_edge_callee (e, n);
225 if (e->caller->global.inlined_to)
226 e->callee->global.inlined_to = e->caller->global.inlined_to;
228 e->callee->global.inlined_to = e->caller;
229 e->callee->global.stack_frame_offset
230 = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
231 peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
232 if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
233 e->callee->global.inlined_to->global.estimated_stack_size = peak;
235 /* Recursively clone all bodies. */
236 for (e = e->callee->callees; e; e = e->next_callee)
237 if (!e->inline_failed)
238 cgraph_clone_inlined_nodes (e, duplicate, update_original);
241 /* Mark edge E as inlined and update callgraph accordingly.
242 UPDATE_ORIGINAL specify whether profile of original function should be
246 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
248 int old_insns = 0, new_insns = 0;
249 struct cgraph_node *to = NULL, *what;
251 if (e->callee->inline_decl)
252 cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
254 gcc_assert (e->inline_failed);
255 e->inline_failed = NULL;
257 if (!e->callee->global.inlined && flag_unit_at_a_time)
258 DECL_POSSIBLY_INLINED (e->callee->decl) = true;
259 e->callee->global.inlined = true;
261 cgraph_clone_inlined_nodes (e, true, update_original);
265 /* Now update size of caller and all functions caller is inlined into. */
266 for (;e && !e->inline_failed; e = e->caller->callers)
268 old_insns = e->caller->global.insns;
269 new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
271 gcc_assert (new_insns >= 0);
273 to->global.insns = new_insns;
275 gcc_assert (what->global.inlined_to == to);
276 if (new_insns > old_insns)
277 overall_insns += new_insns - old_insns;
281 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
282 Return following unredirected edge in the list of callers
285 static struct cgraph_edge *
286 cgraph_mark_inline (struct cgraph_edge *edge)
288 struct cgraph_node *to = edge->caller;
289 struct cgraph_node *what = edge->callee;
290 struct cgraph_edge *e, *next;
293 /* Look for all calls, mark them inline and clone recursively
294 all inlined functions. */
295 for (e = what->callers; e; e = next)
297 next = e->next_caller;
298 if (e->caller == to && e->inline_failed)
300 cgraph_mark_inline_edge (e, true);
310 /* Estimate the growth caused by inlining NODE into all callees. */
313 cgraph_estimate_growth (struct cgraph_node *node)
316 struct cgraph_edge *e;
317 if (node->global.estimated_growth != INT_MIN)
318 return node->global.estimated_growth;
320 for (e = node->callers; e; e = e->next_caller)
321 if (e->inline_failed)
322 growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
323 - e->caller->global.insns);
325 /* ??? Wrong for self recursive functions or cases where we decide to not
326 inline for different reasons, but it is not big deal as in that case
327 we will keep the body around, but we will also avoid some inlining. */
328 if (!node->needed && !DECL_EXTERNAL (node->decl))
329 growth -= node->global.insns;
331 node->global.estimated_growth = growth;
335 /* Return false when inlining WHAT into TO is not good idea
336 as it would cause too large growth of function bodies.
337 When ONE_ONLY is true, assume that only one call site is going
338 to be inlined, otherwise figure out how many call sites in
339 TO calls WHAT and verify that all can be inlined.
343 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
344 const char **reason, bool one_only)
347 struct cgraph_edge *e;
350 HOST_WIDE_INT stack_size_limit, inlined_stack;
355 for (e = to->callees; e; e = e->next_callee)
356 if (e->callee == what)
359 if (to->global.inlined_to)
360 to = to->global.inlined_to;
362 /* When inlining large function body called once into small function,
363 take the inlined function as base for limiting the growth. */
364 if (to->local.self_insns > what->local.self_insns)
365 limit = to->local.self_insns;
367 limit = what->local.self_insns;
369 limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
371 /* Check the size after inlining against the function limits. But allow
372 the function to shrink if it went over the limits by forced inlining. */
373 newsize = cgraph_estimate_size_after_inlining (times, to, what);
374 if (newsize >= to->global.insns
375 && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
379 *reason = N_("--param large-function-growth limit reached");
383 stack_size_limit = to->local.estimated_self_stack_size;
385 stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
387 inlined_stack = (to->global.stack_frame_offset
388 + to->local.estimated_self_stack_size
389 + what->global.estimated_stack_size);
390 if (inlined_stack > stack_size_limit
391 && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
394 *reason = N_("--param large-stack-frame-growth limit reached");
400 /* Return true when function N is small enough to be inlined. */
403 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
408 decl = n->inline_decl;
409 if (!DECL_INLINE (decl))
412 *reason = N_("function not inlinable");
416 if (!DECL_STRUCT_FUNCTION (decl)->cfg)
419 *reason = N_("function body not available");
423 if (DECL_DECLARED_INLINE_P (decl))
425 if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
428 *reason = N_("--param max-inline-insns-single limit reached");
434 if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
437 *reason = N_("--param max-inline-insns-auto limit reached");
445 /* Return true when inlining WHAT would create recursive inlining.
446 We call recursive inlining all cases where same function appears more than
447 once in the single recursion nest path in the inline graph. */
450 cgraph_recursive_inlining_p (struct cgraph_node *to,
451 struct cgraph_node *what,
455 if (to->global.inlined_to)
456 recursive = what->decl == to->global.inlined_to->decl;
458 recursive = what->decl == to->decl;
459 /* Marking recursive function inline has sane semantic and thus we should
461 if (recursive && reason)
462 *reason = (what->local.disregard_inline_limits
463 ? N_("recursive inlining") : "");
467 /* Return true if the call can be hot. */
469 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
471 if (profile_info && flag_branch_probabilities
473 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
475 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl))
476 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl)))
478 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)))
480 if (flag_guess_branch_prob
481 && edge->frequency < (CGRAPH_FREQ_MAX
482 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
487 /* A cost model driving the inlining heuristics in a way so the edges with
488 smallest badness are inlined first. After each inlining is performed
489 the costs of all caller edges of nodes affected are recomputed so the
490 metrics may accurately depend on values such as number of inlinable callers
491 of the function or function body size. */
494 cgraph_edge_badness (struct cgraph_edge *edge)
498 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
500 growth -= edge->caller->global.insns;
502 /* Always prefer inlining saving code size. */
504 badness = INT_MIN - growth;
506 /* When profiling is available, base priorities -(#calls / growth).
507 So we optimize for overall number of "executed" inlined calls. */
509 badness = ((int)((double)edge->count * INT_MIN / max_count)) / growth;
511 /* When function local profile is available, base priorities on
512 growth / frequency, so we optimize for overall frequency of inlined
513 calls. This is not too accurate since while the call might be frequent
514 within function, the function itself is infrequent.
516 Other objective to optimize for is number of different calls inlined.
517 We add the estimated growth after inlining all functions to biass the
518 priorities slightly in this direction (so fewer times called functions
519 of the same size gets priority). */
520 else if (flag_guess_branch_prob)
522 int div = edge->frequency * 100 / CGRAPH_FREQ_BASE;
524 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
525 growth -= edge->caller->global.insns;
526 badness = growth * 256;
528 /* Decrease badness if call is nested. */
529 /* Compress the range so we don't overflow. */
531 div = 256 + ceil_log2 (div) - 8;
536 badness += cgraph_estimate_growth (edge->callee);
538 /* When function local profile is not available or it does not give
539 useful information (ie frequency is zero), base the cost on
540 loop nest and overall size growth, so we optimize for overall number
541 of functions fully inlined in program. */
544 int nest = MIN (edge->loop_nest, 8);
545 badness = cgraph_estimate_growth (edge->callee) * 256;
547 /* Decrease badness if call is nested. */
555 /* Make recursive inlining happen always after other inlining is done. */
556 if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
562 /* Recompute heap nodes for each of caller edge. */
565 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
566 bitmap updated_nodes)
568 struct cgraph_edge *edge;
569 const char *failed_reason;
571 if (!node->local.inlinable || node->local.disregard_inline_limits
572 || node->global.inlined_to)
574 if (bitmap_bit_p (updated_nodes, node->uid))
576 bitmap_set_bit (updated_nodes, node->uid);
577 node->global.estimated_growth = INT_MIN;
579 if (!node->local.inlinable)
581 /* Prune out edges we won't inline into anymore. */
582 if (!cgraph_default_inline_p (node, &failed_reason))
584 for (edge = node->callers; edge; edge = edge->next_caller)
587 fibheap_delete_node (heap, edge->aux);
589 if (edge->inline_failed)
590 edge->inline_failed = failed_reason;
595 for (edge = node->callers; edge; edge = edge->next_caller)
596 if (edge->inline_failed)
598 int badness = cgraph_edge_badness (edge);
601 fibnode_t n = edge->aux;
602 gcc_assert (n->data == edge);
603 if (n->key == badness)
606 /* fibheap_replace_key only increase the keys. */
607 if (fibheap_replace_key (heap, n, badness))
609 fibheap_delete_node (heap, edge->aux);
611 edge->aux = fibheap_insert (heap, badness, edge);
615 /* Recompute heap nodes for each of caller edges of each of callees. */
618 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
619 bitmap updated_nodes)
621 struct cgraph_edge *e;
622 node->global.estimated_growth = INT_MIN;
624 for (e = node->callees; e; e = e->next_callee)
625 if (e->inline_failed)
626 update_caller_keys (heap, e->callee, updated_nodes);
627 else if (!e->inline_failed)
628 update_callee_keys (heap, e->callee, updated_nodes);
631 /* Enqueue all recursive calls from NODE into priority queue depending on
632 how likely we want to recursively inline the call. */
635 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
639 struct cgraph_edge *e;
640 for (e = where->callees; e; e = e->next_callee)
641 if (e->callee == node)
643 /* When profile feedback is available, prioritize by expected number
644 of calls. Without profile feedback we maintain simple queue
645 to order candidates via recursive depths. */
646 fibheap_insert (heap,
647 !max_count ? priority++
648 : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
651 for (e = where->callees; e; e = e->next_callee)
652 if (!e->inline_failed)
653 lookup_recursive_calls (node, e->callee, heap);
656 /* Decide on recursive inlining: in the case function has recursive calls,
657 inline until body size reaches given argument. */
660 cgraph_decide_recursive_inlining (struct cgraph_node *node)
662 int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
663 int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
664 int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
666 struct cgraph_edge *e;
667 struct cgraph_node *master_clone, *next;
671 if (DECL_DECLARED_INLINE_P (node->decl))
673 limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
674 max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
677 /* Make sure that function is small enough to be considered for inlining. */
679 || cgraph_estimate_size_after_inlining (1, node, node) >= limit)
681 heap = fibheap_new ();
682 lookup_recursive_calls (node, node, heap);
683 if (fibheap_empty (heap))
685 fibheap_delete (heap);
691 " Performing recursive inlining on %s\n",
692 cgraph_node_name (node));
694 /* We need original clone to copy around. */
695 master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1, false);
696 master_clone->needed = true;
697 for (e = master_clone->callees; e; e = e->next_callee)
698 if (!e->inline_failed)
699 cgraph_clone_inlined_nodes (e, true, false);
701 /* Do the inlining and update list of recursive call during process. */
702 while (!fibheap_empty (heap)
703 && (cgraph_estimate_size_after_inlining (1, node, master_clone)
706 struct cgraph_edge *curr = fibheap_extract_min (heap);
707 struct cgraph_node *cnode;
710 for (cnode = curr->caller;
711 cnode->global.inlined_to; cnode = cnode->callers->caller)
712 if (node->decl == curr->callee->decl)
714 if (depth > max_depth)
718 " maxmal depth reached\n");
724 if (!cgraph_maybe_hot_edge_p (curr))
727 fprintf (dump_file, " Not inlining cold call\n");
730 if (curr->count * 100 / node->count < probability)
734 " Probability of edge is too small\n");
742 " Inlining call of depth %i", depth);
745 fprintf (dump_file, " called approx. %.2f times per call",
746 (double)curr->count / node->count);
748 fprintf (dump_file, "\n");
750 cgraph_redirect_edge_callee (curr, master_clone);
751 cgraph_mark_inline_edge (curr, false);
752 lookup_recursive_calls (node, curr->callee, heap);
755 if (!fibheap_empty (heap) && dump_file)
756 fprintf (dump_file, " Recursive inlining growth limit met.\n");
758 fibheap_delete (heap);
761 "\n Inlined %i times, body grown from %i to %i insns\n", n,
762 master_clone->global.insns, node->global.insns);
764 /* Remove master clone we used for inlining. We rely that clones inlined
765 into master clone gets queued just before master clone so we don't
767 for (node = cgraph_nodes; node != master_clone;
771 if (node->global.inlined_to == master_clone)
772 cgraph_remove_node (node);
774 cgraph_remove_node (master_clone);
775 /* FIXME: Recursive inlining actually reduces number of calls of the
776 function. At this place we should probably walk the function and
777 inline clones and compensate the counts accordingly. This probably
778 doesn't matter much in practice. */
782 /* Set inline_failed for all callers of given function to REASON. */
785 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
787 struct cgraph_edge *e;
790 fprintf (dump_file, "Inlining failed: %s\n", reason);
791 for (e = node->callers; e; e = e->next_caller)
792 if (e->inline_failed)
793 e->inline_failed = reason;
796 /* Given whole compilation unit estimate of INSNS, compute how large we can
797 allow the unit to grow. */
799 compute_max_insns (int insns)
801 int max_insns = insns;
802 if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
803 max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
805 return ((HOST_WIDEST_INT) max_insns
806 * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
809 /* We use greedy algorithm for inlining of small functions:
810 All inline candidates are put into prioritized heap based on estimated
811 growth of the overall number of instructions and then update the estimates.
813 INLINED and INLINED_CALEES are just pointers to arrays large enough
814 to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
817 cgraph_decide_inlining_of_small_functions (void)
819 struct cgraph_node *node;
820 struct cgraph_edge *edge;
821 const char *failed_reason;
822 fibheap_t heap = fibheap_new ();
823 bitmap updated_nodes = BITMAP_ALLOC (NULL);
824 int min_insns, max_insns;
827 fprintf (dump_file, "\nDeciding on smaller functions:\n");
829 /* Put all inline candidates into the heap. */
831 for (node = cgraph_nodes; node; node = node->next)
833 if (!node->local.inlinable || !node->callers
834 || node->local.disregard_inline_limits)
837 fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
839 node->global.estimated_growth = INT_MIN;
840 if (!cgraph_default_inline_p (node, &failed_reason))
842 cgraph_set_inline_failed (node, failed_reason);
846 for (edge = node->callers; edge; edge = edge->next_caller)
847 if (edge->inline_failed)
849 gcc_assert (!edge->aux);
850 edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
854 max_insns = compute_max_insns (overall_insns);
855 min_insns = overall_insns;
857 while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
859 int old_insns = overall_insns;
860 struct cgraph_node *where;
862 cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
864 growth -= edge->caller->global.insns;
869 "\nConsidering %s with %i insns\n",
870 cgraph_node_name (edge->callee),
871 edge->callee->global.insns);
873 " to be inlined into %s\n"
874 " Estimated growth after inlined into all callees is %+i insns.\n"
875 " Estimated badness is %i, frequency %.2f.\n",
876 cgraph_node_name (edge->caller),
877 cgraph_estimate_growth (edge->callee),
878 cgraph_edge_badness (edge),
879 edge->frequency / (double)CGRAPH_FREQ_BASE);
881 fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
883 gcc_assert (edge->aux);
885 if (!edge->inline_failed)
888 /* When not having profile info ready we don't weight by any way the
889 position of call in procedure itself. This means if call of
890 function A from function B seems profitable to inline, the recursive
891 call of function A in inline copy of A in B will look profitable too
892 and we end up inlining until reaching maximal function growth. This
893 is not good idea so prohibit the recursive inlining.
895 ??? When the frequencies are taken into account we might not need this
899 where = edge->caller;
900 while (where->global.inlined_to)
902 if (where->decl == edge->callee->decl)
904 where = where->callers->caller;
906 if (where->global.inlined_to)
909 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
911 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
916 if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
918 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
919 &edge->inline_failed))
921 edge->inline_failed =
922 N_("call is unlikely");
924 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
928 if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
930 if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
931 &edge->inline_failed))
934 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
938 if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
939 &edge->inline_failed))
941 where = edge->caller;
942 if (where->global.inlined_to)
943 where = where->global.inlined_to;
944 if (!cgraph_decide_recursive_inlining (where))
946 update_callee_keys (heap, where, updated_nodes);
950 struct cgraph_node *callee;
951 if (!cgraph_check_inline_limits (edge->caller, edge->callee,
952 &edge->inline_failed, true))
955 fprintf (dump_file, " Not inlining into %s:%s.\n",
956 cgraph_node_name (edge->caller), edge->inline_failed);
959 callee = edge->callee;
960 cgraph_mark_inline_edge (edge, true);
961 update_callee_keys (heap, callee, updated_nodes);
963 where = edge->caller;
964 if (where->global.inlined_to)
965 where = where->global.inlined_to;
967 /* Our profitability metric can depend on local properties
968 such as number of inlinable calls and size of the function body.
969 After inlining these properties might change for the function we
970 inlined into (since it's body size changed) and for the functions
971 called by function we inlined (since number of it inlinable callers
973 update_caller_keys (heap, where, updated_nodes);
974 bitmap_clear (updated_nodes);
979 " Inlined into %s which now has %i insns,"
980 "net change of %+i insns.\n",
981 cgraph_node_name (edge->caller),
982 edge->caller->global.insns,
983 overall_insns - old_insns);
985 if (min_insns > overall_insns)
987 min_insns = overall_insns;
988 max_insns = compute_max_insns (min_insns);
991 fprintf (dump_file, "New minimal insns reached: %i\n", min_insns);
994 while ((edge = fibheap_extract_min (heap)) != NULL)
996 gcc_assert (edge->aux);
998 if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
999 && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
1000 &edge->inline_failed))
1001 edge->inline_failed = N_("--param inline-unit-growth limit reached");
1003 fibheap_delete (heap);
1004 BITMAP_FREE (updated_nodes);
1007 /* Decide on the inlining. We do so in the topological order to avoid
1008 expenses on updating data structures. */
1011 cgraph_decide_inlining (void)
1013 struct cgraph_node *node;
1015 struct cgraph_node **order =
1016 XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1019 int initial_insns = 0;
1022 for (node = cgraph_nodes; node; node = node->next)
1023 if (node->analyzed && (node->needed || node->reachable))
1025 struct cgraph_edge *e;
1027 initial_insns += node->local.self_insns;
1028 gcc_assert (node->local.self_insns == node->global.insns);
1029 for (e = node->callees; e; e = e->next_callee)
1030 if (max_count < e->count)
1031 max_count = e->count;
1033 overall_insns = initial_insns;
1034 gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
1036 nnodes = cgraph_postorder (order);
1040 "\nDeciding on inlining. Starting with %i insns.\n",
1043 for (node = cgraph_nodes; node; node = node->next)
1047 fprintf (dump_file, "\nInlining always_inline functions:\n");
1049 /* In the first pass mark all always_inline edges. Do this with a priority
1050 so none of our later choices will make this impossible. */
1051 for (i = nnodes - 1; i >= 0; i--)
1053 struct cgraph_edge *e, *next;
1057 /* Handle nodes to be flattened, but don't update overall unit size. */
1058 if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1062 "Flattening %s\n", cgraph_node_name (node));
1063 cgraph_decide_inlining_incrementally (node, INLINE_ALL, 0);
1066 if (!node->local.disregard_inline_limits)
1070 "\nConsidering %s %i insns (always inline)\n",
1071 cgraph_node_name (node), node->global.insns);
1072 old_insns = overall_insns;
1073 for (e = node->callers; e; e = next)
1075 next = e->next_caller;
1076 if (!e->inline_failed)
1078 if (cgraph_recursive_inlining_p (e->caller, e->callee,
1081 cgraph_mark_inline_edge (e, true);
1084 " Inlined into %s which now has %i insns.\n",
1085 cgraph_node_name (e->caller),
1086 e->caller->global.insns);
1088 /* Inlining self recursive function might introduce new calls to
1089 themselves we didn't see in the loop above. Fill in the proper
1090 reason why inline failed. */
1091 for (e = node->callers; e; e = e->next_caller)
1092 if (e->inline_failed)
1093 e->inline_failed = N_("recursive inlining");
1096 " Inlined for a net change of %+i insns.\n",
1097 overall_insns - old_insns);
1100 if (!flag_really_no_inline)
1101 cgraph_decide_inlining_of_small_functions ();
1103 if (!flag_really_no_inline
1104 && flag_inline_functions_called_once)
1107 fprintf (dump_file, "\nDeciding on functions called once:\n");
1109 /* And finally decide what functions are called once. */
1111 for (i = nnodes - 1; i >= 0; i--)
1115 if (node->callers && !node->callers->next_caller && !node->needed
1116 && node->local.inlinable && node->callers->inline_failed
1117 && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1122 "\nConsidering %s %i insns.\n",
1123 cgraph_node_name (node), node->global.insns);
1125 " Called once from %s %i insns.\n",
1126 cgraph_node_name (node->callers->caller),
1127 node->callers->caller->global.insns);
1130 old_insns = overall_insns;
1132 if (cgraph_check_inline_limits (node->callers->caller, node,
1135 cgraph_mark_inline (node->callers);
1138 " Inlined into %s which now has %i insns"
1139 " for a net change of %+i insns.\n",
1140 cgraph_node_name (node->callers->caller),
1141 node->callers->caller->global.insns,
1142 overall_insns - old_insns);
1148 " Inline limit reached, not inlined.\n");
1156 "\nInlined %i calls, eliminated %i functions, "
1157 "%i insns turned to %i insns.\n\n",
1158 ncalls_inlined, nfunctions_inlined, initial_insns,
1164 /* Try to inline edge E from incremental inliner. MODE specifies mode
1167 We are detecting cycles by storing mode of inliner into cgraph_node last
1168 time we visited it in the recursion. In general when mode is set, we have
1169 recursive inlining, but as an special case, we want to try harder inline
1170 ALWAYS_INLINE functions: consider callgraph a->b->c->b, with a being
1171 flatten, b being always inline. Flattening 'a' will collapse
1172 a->b->c before hitting cycle. To accommodate always inline, we however
1173 need to inline a->b->c->b.
1175 So after hitting cycle first time, we switch into ALWAYS_INLINE mode and
1176 stop inlining only after hitting ALWAYS_INLINE in ALWAY_INLINE mode. */
1178 try_inline (struct cgraph_edge *e, enum inlining_mode mode, int depth)
1180 struct cgraph_node *callee = e->callee;
1181 enum inlining_mode callee_mode = (size_t) callee->aux;
1182 bool always_inline = e->callee->local.disregard_inline_limits;
1184 /* We've hit cycle? */
1187 /* It is first time we see it and we are not in ALWAY_INLINE only
1188 mode yet. and the function in question is always_inline. */
1189 if (always_inline && mode != INLINE_ALWAYS_INLINE)
1193 indent_to (dump_file, depth);
1195 "Hit cycle in %s, switching to always inline only.\n",
1196 cgraph_node_name (callee));
1198 mode = INLINE_ALWAYS_INLINE;
1200 /* Otherwise it is time to give up. */
1205 indent_to (dump_file, depth);
1207 "Not inlining %s into %s to avoid cycle.\n",
1208 cgraph_node_name (callee),
1209 cgraph_node_name (e->caller));
1211 e->inline_failed = (e->callee->local.disregard_inline_limits
1212 ? N_("recursive inlining") : "");
1217 callee->aux = (void *)(size_t) mode;
1220 indent_to (dump_file, depth);
1221 fprintf (dump_file, " Inlining %s into %s.\n",
1222 cgraph_node_name (e->callee),
1223 cgraph_node_name (e->caller));
1225 if (e->inline_failed)
1226 cgraph_mark_inline (e);
1228 /* In order to fully inline always_inline functions at -O0, we need to
1229 recurse here, since the inlined functions might not be processed by
1230 incremental inlining at all yet.
1232 Also flattening needs to be done recursively. */
1234 if (!flag_unit_at_a_time || mode == INLINE_ALL || always_inline)
1235 cgraph_decide_inlining_incrementally (e->callee, mode, depth + 1);
1236 callee->aux = (void *)(size_t) callee_mode;
1240 /* Decide on the inlining. We do so in the topological order to avoid
1241 expenses on updating data structures.
1242 DEPTH is depth of recursion, used only for debug output. */
1245 cgraph_decide_inlining_incrementally (struct cgraph_node *node,
1246 enum inlining_mode mode,
1249 struct cgraph_edge *e;
1250 bool inlined = false;
1251 const char *failed_reason;
1252 enum inlining_mode old_mode;
1254 #ifdef ENABLE_CHECKING
1255 verify_cgraph_node (node);
1258 old_mode = (size_t)node->aux;
1260 if (mode != INLINE_ALWAYS_INLINE
1261 && lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
1265 indent_to (dump_file, depth);
1266 fprintf (dump_file, "Flattening %s\n", cgraph_node_name (node));
1271 node->aux = (void *)(size_t) mode;
1273 /* First of all look for always inline functions. */
1274 for (e = node->callees; e; e = e->next_callee)
1276 if (!e->callee->local.disregard_inline_limits
1277 && (mode != INLINE_ALL || !e->callee->local.inlinable))
1279 /* When the edge is already inlined, we just need to recurse into
1280 it in order to fully flatten the leaves. */
1281 if (!e->inline_failed && mode == INLINE_ALL)
1283 inlined |= try_inline (e, mode, depth);
1288 indent_to (dump_file, depth);
1290 "Considering to always inline inline candidate %s.\n",
1291 cgraph_node_name (e->callee));
1293 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed))
1297 indent_to (dump_file, depth);
1298 fprintf (dump_file, "Not inlining: recursive call.\n");
1302 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1303 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1307 indent_to (dump_file, depth);
1308 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1312 if (!DECL_SAVED_TREE (e->callee->decl) && !e->callee->inline_decl)
1316 indent_to (dump_file, depth);
1318 "Not inlining: Function body no longer available.\n");
1322 inlined |= try_inline (e, mode, depth);
1325 /* Now do the automatic inlining. */
1326 if (!flag_really_no_inline && mode != INLINE_ALL
1327 && mode != INLINE_ALWAYS_INLINE)
1328 for (e = node->callees; e; e = e->next_callee)
1330 if (!e->callee->local.inlinable
1331 || !e->inline_failed
1332 || e->callee->local.disregard_inline_limits)
1335 fprintf (dump_file, "Considering inline candidate %s.\n",
1336 cgraph_node_name (e->callee));
1337 if (cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed))
1341 indent_to (dump_file, depth);
1342 fprintf (dump_file, "Not inlining: recursive call.\n");
1346 if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1347 != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->callee->decl)))
1351 indent_to (dump_file, depth);
1352 fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1356 /* When the function body would grow and inlining the function won't
1357 eliminate the need for offline copy of the function, don't inline.
1359 if (mode == INLINE_SIZE
1360 && (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1361 > e->caller->global.insns)
1362 && cgraph_estimate_growth (e->callee) > 0)
1366 indent_to (dump_file, depth);
1368 "Not inlining: code size would grow by %i insns.\n",
1369 cgraph_estimate_size_after_inlining (1, e->caller,
1371 - e->caller->global.insns);
1375 if (!cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1380 indent_to (dump_file, depth);
1381 fprintf (dump_file, "Not inlining: %s.\n", e->inline_failed);
1385 if (!DECL_SAVED_TREE (e->callee->decl) && !e->callee->inline_decl)
1389 indent_to (dump_file, depth);
1391 "Not inlining: Function body no longer available.\n");
1395 if (cgraph_default_inline_p (e->callee, &failed_reason))
1396 inlined |= try_inline (e, mode, depth);
1397 else if (!flag_unit_at_a_time)
1398 e->inline_failed = failed_reason;
1400 node->aux = (void *)(size_t) old_mode;
1404 /* When inlining shall be performed. */
1406 cgraph_gate_inlining (void)
1408 return flag_inline_trees;
1411 struct tree_opt_pass pass_ipa_inline =
1413 "inline", /* name */
1414 cgraph_gate_inlining, /* gate */
1415 cgraph_decide_inlining, /* execute */
1418 0, /* static_pass_number */
1419 TV_INLINE_HEURISTICS, /* tv_id */
1420 0, /* properties_required */
1421 PROP_cfg, /* properties_provided */
1422 0, /* properties_destroyed */
1423 TODO_remove_functions, /* todo_flags_finish */
1424 TODO_dump_cgraph | TODO_dump_func
1425 | TODO_remove_functions, /* todo_flags_finish */
1429 /* Because inlining might remove no-longer reachable nodes, we need to
1430 keep the array visible to garbage collector to avoid reading collected
1433 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1435 /* Do inlining of small functions. Doing so early helps profiling and other
1436 passes to be somewhat more effective and avoids some code duplication in
1437 later real inlining pass for testcases with very many function calls. */
1439 cgraph_early_inlining (void)
1441 struct cgraph_node *node = cgraph_node (current_function_decl);
1442 unsigned int todo = 0;
1444 if (sorrycount || errorcount)
1446 if (cgraph_decide_inlining_incrementally (node,
1448 ? INLINE_SIZE : INLINE_SPEED, 0))
1450 timevar_push (TV_INTEGRATION);
1451 todo = optimize_inline_calls (current_function_decl);
1452 timevar_pop (TV_INTEGRATION);
1457 /* When inlining shall be performed. */
1459 cgraph_gate_early_inlining (void)
1461 return flag_inline_trees && flag_early_inlining;
1464 struct tree_opt_pass pass_early_inline =
1466 "einline", /* name */
1467 cgraph_gate_early_inlining, /* gate */
1468 cgraph_early_inlining, /* execute */
1471 0, /* static_pass_number */
1472 TV_INLINE_HEURISTICS, /* tv_id */
1473 0, /* properties_required */
1474 PROP_cfg, /* properties_provided */
1475 0, /* properties_destroyed */
1476 0, /* todo_flags_start */
1477 TODO_dump_func, /* todo_flags_finish */
1481 /* When inlining shall be performed. */
1483 cgraph_gate_ipa_early_inlining (void)
1485 return (flag_inline_trees && flag_early_inlining
1486 && (flag_branch_probabilities || flag_test_coverage
1487 || profile_arc_flag));
1490 /* IPA pass wrapper for early inlining pass. We need to run early inlining
1491 before tree profiling so we have stand alone IPA pass for doing so. */
1492 struct tree_opt_pass pass_ipa_early_inline =
1494 "einline_ipa", /* name */
1495 cgraph_gate_ipa_early_inlining, /* gate */
1499 0, /* static_pass_number */
1500 TV_INLINE_HEURISTICS, /* tv_id */
1501 0, /* properties_required */
1502 PROP_cfg, /* properties_provided */
1503 0, /* properties_destroyed */
1504 0, /* todo_flags_start */
1505 TODO_dump_cgraph, /* todo_flags_finish */
1509 /* Compute parameters of functions used by inliner. */
1511 compute_inline_parameters (void)
1513 struct cgraph_node *node = cgraph_node (current_function_decl);
1515 gcc_assert (!node->global.inlined_to);
1516 node->local.estimated_self_stack_size = estimated_stack_frame_size ();
1517 node->global.estimated_stack_size = node->local.estimated_self_stack_size;
1518 node->global.stack_frame_offset = 0;
1519 node->local.inlinable = tree_inlinable_function_p (current_function_decl);
1520 node->local.self_insns = estimate_num_insns (current_function_decl,
1521 &eni_inlining_weights);
1522 if (node->local.inlinable)
1523 node->local.disregard_inline_limits
1524 = lang_hooks.tree_inlining.disregard_inline_limits (current_function_decl);
1525 if (flag_really_no_inline && !node->local.disregard_inline_limits)
1526 node->local.inlinable = 0;
1527 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
1528 node->global.insns = node->local.self_insns;
1532 /* When inlining shall be performed. */
1534 gate_inline_passes (void)
1536 return flag_inline_trees;
1539 struct tree_opt_pass pass_inline_parameters =
1542 gate_inline_passes, /* gate */
1543 compute_inline_parameters, /* execute */
1546 0, /* static_pass_number */
1547 TV_INLINE_HEURISTICS, /* tv_id */
1548 0, /* properties_required */
1549 PROP_cfg, /* properties_provided */
1550 0, /* properties_destroyed */
1551 0, /* todo_flags_start */
1552 0, /* todo_flags_finish */
1556 /* Apply inline plan to the function. */
1560 unsigned int todo = 0;
1561 struct cgraph_edge *e;
1562 struct cgraph_node *node = cgraph_node (current_function_decl);
1564 /* Even when not optimizing, ensure that always_inline functions get inlined.
1567 cgraph_decide_inlining_incrementally (node, INLINE_SPEED, 0);
1569 /* We might need the body of this function so that we can expand
1570 it inline somewhere else. */
1571 if (cgraph_preserve_function_body_p (current_function_decl))
1572 save_inline_function_body (node);
1574 for (e = node->callees; e; e = e->next_callee)
1575 if (!e->inline_failed || warn_inline)
1579 timevar_push (TV_INTEGRATION);
1580 todo = optimize_inline_calls (current_function_decl);
1581 timevar_pop (TV_INTEGRATION);
1583 /* In non-unit-at-a-time we must mark all referenced functions as needed. */
1584 if (!flag_unit_at_a_time)
1586 struct cgraph_edge *e;
1587 for (e = node->callees; e; e = e->next_callee)
1588 if (e->callee->analyzed)
1589 cgraph_mark_needed_node (e->callee);
1591 return todo | execute_fixup_cfg ();
1594 struct tree_opt_pass pass_apply_inline =
1596 "apply_inline", /* name */
1598 apply_inline, /* execute */
1601 0, /* static_pass_number */
1602 TV_INLINE_HEURISTICS, /* tv_id */
1603 0, /* properties_required */
1604 PROP_cfg, /* properties_provided */
1605 0, /* properties_destroyed */
1606 0, /* todo_flags_start */
1607 TODO_dump_func | TODO_verify_flow
1608 | TODO_verify_stmts, /* todo_flags_finish */
1612 #include "gt-ipa-inline.h"