gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     We separate inlining decisions from the inliner itself and store it
  25     inside callgraph as so called inline plan.  Refer to cgraph.c
  26     documentation about particular representation of inline plans in the
  27     callgraph.
  28
  29     There are three major parts of this file:
  30
  31     cgraph_mark_inline implementation
  32
  33       This function allows to mark given call inline and performs necessary
  34       modifications of cgraph (production of the clones and updating overall
  35       statistics)
  36
  37     inlining heuristics limits
  38
  39       These functions allow to check that particular inlining is allowed
  40       by the limits specified by user (allowed function growth, overall unit
  41       growth and so on).
  42
  43     inlining heuristics
  44
  45       This is implementation of IPA pass aiming to get as much of benefit
  46       from inlining obeying the limits checked above.
  47
  48       The implementation of particular heuristics is separated from
  49       the rest of code to make it easier to replace it with more complicated
  50       implementation in the future.  The rest of inlining code acts as a
  51       library aimed to modify the callgraph and verify that the parameters
  52       on code size growth fits.
  53
  54       To mark given call inline, use cgraph_mark_inline function, the
  55       verification is performed by cgraph_default_inline_p and
  56       cgraph_check_inline_limits.
  57
  58       The heuristics implements simple knapsack style algorithm ordering
  59       all functions by their "profitability" (estimated by code size growth)
  60       and inlining them in priority order.
  61
  62       cgraph_decide_inlining implements heuristics taking whole callgraph
  63       into account, while cgraph_decide_inlining_incrementally considers
  64       only one function at a time and is used in non-unit-at-a-time mode.  */
  65
  66 #include "config.h"
  67 #include "system.h"
  68 #include "coretypes.h"
  69 #include "tm.h"
  70 #include "tree.h"
  71 #include "tree-inline.h"
  72 #include "langhooks.h"
  73 #include "flags.h"
  74 #include "cgraph.h"
  75 #include "diagnostic.h"
  76 #include "timevar.h"
  77 #include "params.h"
  78 #include "fibheap.h"
  79 #include "intl.h"
  80 #include "tree-pass.h"
  81 #include "hashtab.h"
  82 #include "coverage.h"
  83 #include "ggc.h"
  84
  85 /* Statistics we collect about inlining algorithm.  */
  86 static int ncalls_inlined;
  87 static int nfunctions_inlined;
  88 static int initial_insns;
  89 static int overall_insns;
  90 static int max_insns;
  91 static gcov_type max_count;
  92
  93 /* Estimate size of the function after inlining WHAT into TO.  */
  94
  95 static int
  96 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
  97                                      struct cgraph_node *what)
  98 {
  99   int size;
 100   tree fndecl = what->decl, arg;
 101   int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
 102
 103   for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
 104     call_insns += estimate_move_cost (TREE_TYPE (arg));
 105   size = (what->global.insns - call_insns) * times + to->global.insns;
 106   gcc_assert (size >= 0);
 107   return size;
 108 }
 109
 110 /* E is expected to be an edge being inlined.  Clone destination node of
 111    the edge and redirect it to the new clone.
 112    DUPLICATE is used for bookkeeping on whether we are actually creating new
 113    clones or re-using node originally representing out-of-line function call.
 114    */
 115 void
 116 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
 117 {
 118   HOST_WIDE_INT peak;
 119   if (duplicate)
 120     {
 121       /* We may eliminate the need for out-of-line copy to be output.
 122          In that case just go ahead and re-use it.  */
 123       if (!e->callee->callers->next_caller
 124           && !e->callee->needed
 125           && flag_unit_at_a_time)
 126         {
 127           gcc_assert (!e->callee->global.inlined_to);
 128           if (DECL_SAVED_TREE (e->callee->decl))
 129             overall_insns -= e->callee->global.insns, nfunctions_inlined++;
 130           duplicate = false;
 131         }
 132       else
 133         {
 134           struct cgraph_node *n;
 135           n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
 136                                  update_original);
 137           cgraph_redirect_edge_callee (e, n);
 138         }
 139     }
 140
 141   if (e->caller->global.inlined_to)
 142     e->callee->global.inlined_to = e->caller->global.inlined_to;
 143   else
 144     e->callee->global.inlined_to = e->caller;
 145   e->callee->global.stack_frame_offset
 146     = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
 147   peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
 148   if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
 149     e->callee->global.inlined_to->global.estimated_stack_size = peak;
 150
 151   /* Recursively clone all bodies.  */
 152   for (e = e->callee->callees; e; e = e->next_callee)
 153     if (!e->inline_failed)
 154       cgraph_clone_inlined_nodes (e, duplicate, update_original);
 155 }
 156
 157 /* Mark edge E as inlined and update callgraph accordingly.
 158    UPDATE_ORIGINAL specify whether profile of original function should be
 159    updated. */
 160
 161 void
 162 cgraph_mark_inline_edge (struct cgraph_edge *e, bool update_original)
 163 {
 164   int old_insns = 0, new_insns = 0;
 165   struct cgraph_node *to = NULL, *what;
 166
 167   if (e->callee->inline_decl)
 168     cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
 169
 170   gcc_assert (e->inline_failed);
 171   e->inline_failed = NULL;
 172
 173   if (!e->callee->global.inlined && flag_unit_at_a_time)
 174     DECL_POSSIBLY_INLINED (e->callee->decl) = true;
 175   e->callee->global.inlined = true;
 176
 177   cgraph_clone_inlined_nodes (e, true, update_original);
 178
 179   what = e->callee;
 180
 181   /* Now update size of caller and all functions caller is inlined into.  */
 182   for (;e && !e->inline_failed; e = e->caller->callers)
 183     {
 184       old_insns = e->caller->global.insns;
 185       new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
 186                                                        what);
 187       gcc_assert (new_insns >= 0);
 188       to = e->caller;
 189       to->global.insns = new_insns;
 190     }
 191   gcc_assert (what->global.inlined_to == to);
 192   if (new_insns > old_insns)
 193     overall_insns += new_insns - old_insns;
 194   ncalls_inlined++;
 195 }
 196
 197 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
 198    Return following unredirected edge in the list of callers
 199    of EDGE->CALLEE  */
 200
 201 static struct cgraph_edge *
 202 cgraph_mark_inline (struct cgraph_edge *edge)
 203 {
 204   struct cgraph_node *to = edge->caller;
 205   struct cgraph_node *what = edge->callee;
 206   struct cgraph_edge *e, *next;
 207   int times = 0;
 208
 209   /* Look for all calls, mark them inline and clone recursively
 210      all inlined functions.  */
 211   for (e = what->callers; e; e = next)
 212     {
 213       next = e->next_caller;
 214       if (e->caller == to && e->inline_failed)
 215         {
 216           cgraph_mark_inline_edge (e, true);
 217           if (e == edge)
 218             edge = next;
 219           times++;
 220         }
 221     }
 222   gcc_assert (times);
 223   return edge;
 224 }
 225
 226 /* Estimate the growth caused by inlining NODE into all callees.  */
 227
 228 static int
 229 cgraph_estimate_growth (struct cgraph_node *node)
 230 {
 231   int growth = 0;
 232   struct cgraph_edge *e;
 233   if (node->global.estimated_growth != INT_MIN)
 234     return node->global.estimated_growth;
 235
 236   for (e = node->callers; e; e = e->next_caller)
 237     if (e->inline_failed)
 238       growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
 239                  - e->caller->global.insns);
 240
 241   /* ??? Wrong for self recursive functions or cases where we decide to not
 242      inline for different reasons, but it is not big deal as in that case
 243      we will keep the body around, but we will also avoid some inlining.  */
 244   if (!node->needed && !DECL_EXTERNAL (node->decl))
 245     growth -= node->global.insns;
 246
 247   node->global.estimated_growth = growth;
 248   return growth;
 249 }
 250
 251 /* Return false when inlining WHAT into TO is not good idea
 252    as it would cause too large growth of function bodies.
 253    When ONE_ONLY is true, assume that only one call site is going
 254    to be inlined, otherwise figure out how many call sites in
 255    TO calls WHAT and verify that all can be inlined.
 256    */
 257
 258 static bool
 259 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
 260                             const char **reason, bool one_only)
 261 {
 262   int times = 0;
 263   struct cgraph_edge *e;
 264   int newsize;
 265   int limit;
 266   HOST_WIDE_INT stack_size_limit, inlined_stack;
 267
 268   if (one_only)
 269     times = 1;
 270   else
 271     for (e = to->callees; e; e = e->next_callee)
 272       if (e->callee == what)
 273         times++;
 274
 275   if (to->global.inlined_to)
 276     to = to->global.inlined_to;
 277
 278   /* When inlining large function body called once into small function,
 279      take the inlined function as base for limiting the growth.  */
 280   if (to->local.self_insns > what->local.self_insns)
 281     limit = to->local.self_insns;
 282   else
 283     limit = what->local.self_insns;
 284
 285   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 286
 287   /* Check the size after inlining against the function limits.  But allow
 288      the function to shrink if it went over the limits by forced inlining.  */
 289   newsize = cgraph_estimate_size_after_inlining (times, to, what);
 290   if (newsize >= to->global.insns
 291       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 292       && newsize > limit)
 293     {
 294       if (reason)
 295         *reason = N_("--param large-function-growth limit reached");
 296       return false;
 297     }
 298
 299   stack_size_limit = to->local.estimated_self_stack_size;
 300
 301   stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
 302
 303   inlined_stack = (to->global.stack_frame_offset
 304                    + to->local.estimated_self_stack_size
 305                    + what->global.estimated_stack_size);
 306   if (inlined_stack  > stack_size_limit
 307       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 308     {
 309       if (reason)
 310         *reason = N_("--param large-stack-frame-growth limit reached");
 311       return false;
 312     }
 313   return true;
 314 }
 315
 316 /* Return true when function N is small enough to be inlined.  */
 317
 318 bool
 319 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
 320 {
 321   tree decl = n->decl;
 322
 323   if (n->inline_decl)
 324     decl = n->inline_decl;
 325   if (!DECL_INLINE (decl))
 326     {
 327       if (reason)
 328         *reason = N_("function not inlinable");
 329       return false;
 330     }
 331
 332   if (!DECL_STRUCT_FUNCTION (decl)->cfg)
 333     {
 334       if (reason)
 335         *reason = N_("function body not available");
 336       return false;
 337     }
 338
 339   if (DECL_DECLARED_INLINE_P (decl))
 340     {
 341       if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
 342         {
 343           if (reason)
 344             *reason = N_("--param max-inline-insns-single limit reached");
 345           return false;
 346         }
 347     }
 348   else
 349     {
 350       if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
 351         {
 352           if (reason)
 353             *reason = N_("--param max-inline-insns-auto limit reached");
 354           return false;
 355         }
 356     }
 357
 358   return true;
 359 }
 360
 361 /* Return true when inlining WHAT would create recursive inlining.
 362    We call recursive inlining all cases where same function appears more than
 363    once in the single recursion nest path in the inline graph.  */
 364
 365 static bool
 366 cgraph_recursive_inlining_p (struct cgraph_node *to,
 367                              struct cgraph_node *what,
 368                              const char **reason)
 369 {
 370   bool recursive;
 371   if (to->global.inlined_to)
 372     recursive = what->decl == to->global.inlined_to->decl;
 373   else
 374     recursive = what->decl == to->decl;
 375   /* Marking recursive function inline has sane semantic and thus we should
 376      not warn on it.  */
 377   if (recursive && reason)
 378     *reason = (what->local.disregard_inline_limits
 379                ? N_("recursive inlining") : "");
 380   return recursive;
 381 }
 382
 383 /* Return true if the call can be hot.  */
 384 static bool
 385 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 386 {
 387   if (profile_info && flag_branch_probabilities
 388       && (edge->count
 389           <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 390     return false;
 391   return true;
 392 }
 393
 394 /* A cost model driving the inlining heuristics in a way so the edges with
 395    smallest badness are inlined first.  After each inlining is performed
 396    the costs of all caller edges of nodes affected are recomputed so the
 397    metrics may accurately depend on values such as number of inlinable callers
 398    of the function or function body size.
 399
 400    With profiling we use number of executions of each edge to drive the cost.
 401    We also should distinguish hot and cold calls where the cold calls are
 402    inlined into only when code size is overall improved.
 403    */
 404
 405 static int
 406 cgraph_edge_badness (struct cgraph_edge *edge)
 407 {
 408   if (max_count)
 409     {
 410       int growth =
 411         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 412       growth -= edge->caller->global.insns;
 413
 414       /* Always prefer inlining saving code size.  */
 415       if (growth <= 0)
 416         return INT_MIN - growth;
 417       return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
 418     }
 419   else
 420   {
 421     int nest = MIN (edge->loop_nest, 8);
 422     int badness = cgraph_estimate_growth (edge->callee) * 256;
 423
 424     /* Decrease badness if call is nested.  */
 425     if (badness > 0)
 426       badness >>= nest;
 427     else
 428       badness <<= nest;
 429
 430     /* Make recursive inlining happen always after other inlining is done.  */
 431     if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
 432       return badness + 1;
 433     else
 434       return badness;
 435   }
 436 }
 437
 438 /* Recompute heap nodes for each of caller edge.  */
 439
 440 static void
 441 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 442                     bitmap updated_nodes)
 443 {
 444   struct cgraph_edge *edge;
 445   const char *failed_reason;
 446
 447   if (!node->local.inlinable || node->local.disregard_inline_limits
 448       || node->global.inlined_to)
 449     return;
 450   if (bitmap_bit_p (updated_nodes, node->uid))
 451     return;
 452   bitmap_set_bit (updated_nodes, node->uid);
 453   node->global.estimated_growth = INT_MIN;
 454
 455   if (!node->local.inlinable)
 456     return;
 457   /* Prune out edges we won't inline into anymore.  */
 458   if (!cgraph_default_inline_p (node, &failed_reason))
 459     {
 460       for (edge = node->callers; edge; edge = edge->next_caller)
 461         if (edge->aux)
 462           {
 463             fibheap_delete_node (heap, edge->aux);
 464             edge->aux = NULL;
 465             if (edge->inline_failed)
 466               edge->inline_failed = failed_reason;
 467           }
 468       return;
 469     }
 470
 471   for (edge = node->callers; edge; edge = edge->next_caller)
 472     if (edge->inline_failed)
 473       {
 474         int badness = cgraph_edge_badness (edge);
 475         if (edge->aux)
 476           {
 477             fibnode_t n = edge->aux;
 478             gcc_assert (n->data == edge);
 479             if (n->key == badness)
 480               continue;
 481
 482             /* fibheap_replace_key only increase the keys.  */
 483             if (fibheap_replace_key (heap, n, badness))
 484               continue;
 485             fibheap_delete_node (heap, edge->aux);
 486           }
 487         edge->aux = fibheap_insert (heap, badness, edge);
 488       }
 489 }
 490
 491 /* Recompute heap nodes for each of caller edges of each of callees.  */
 492
 493 static void
 494 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 495                     bitmap updated_nodes)
 496 {
 497   struct cgraph_edge *e;
 498   node->global.estimated_growth = INT_MIN;
 499
 500   for (e = node->callees; e; e = e->next_callee)
 501     if (e->inline_failed)
 502       update_caller_keys (heap, e->callee, updated_nodes);
 503     else if (!e->inline_failed)
 504       update_callee_keys (heap, e->callee, updated_nodes);
 505 }
 506
 507 /* Enqueue all recursive calls from NODE into priority queue depending on
 508    how likely we want to recursively inline the call.  */
 509
 510 static void
 511 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
 512                         fibheap_t heap)
 513 {
 514   static int priority;
 515   struct cgraph_edge *e;
 516   for (e = where->callees; e; e = e->next_callee)
 517     if (e->callee == node)
 518       {
 519         /* When profile feedback is available, prioritize by expected number
 520            of calls.  Without profile feedback we maintain simple queue
 521            to order candidates via recursive depths.  */
 522         fibheap_insert (heap,
 523                         !max_count ? priority++
 524                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
 525                         e);
 526       }
 527   for (e = where->callees; e; e = e->next_callee)
 528     if (!e->inline_failed)
 529       lookup_recursive_calls (node, e->callee, heap);
 530 }
 531
 532 /* Find callgraph nodes closing a circle in the graph.  The
 533    resulting hashtab can be used to avoid walking the circles.
 534    Uses the cgraph nodes ->aux field which needs to be zero
 535    before and will be zero after operation.  */
 536
 537 static void
 538 cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
 539 {
 540   struct cgraph_edge *e;
 541
 542   if (node->aux)
 543     {
 544       void **slot;
 545       slot = htab_find_slot (cycles, node, INSERT);
 546       if (!*slot)
 547         {
 548           if (dump_file)
 549             fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node));
 550           *slot = node;
 551         }
 552       return;
 553     }
 554
 555   node->aux = node;
 556   for (e = node->callees; e; e = e->next_callee)
 557     cgraph_find_cycles (e->callee, cycles);
 558   node->aux = 0;
 559 }
 560
 561 /* Leafify the cgraph node.  We have to be careful in recursing
 562    as to not run endlessly in circles of the callgraph.
 563    We do so by using a hashtab of cycle entering nodes as generated
 564    by cgraph_find_cycles.  */
 565
 566 static void
 567 cgraph_flatten_node (struct cgraph_node *node, htab_t cycles)
 568 {
 569   struct cgraph_edge *e;
 570
 571   for (e = node->callees; e; e = e->next_callee)
 572     {
 573       /* Inline call, if possible, and recurse.  Be sure we are not
 574          entering callgraph circles here.  */
 575       if (e->inline_failed
 576           && e->callee->local.inlinable
 577           && !cgraph_recursive_inlining_p (node, e->callee,
 578                                            &e->inline_failed)
 579           && !htab_find (cycles, e->callee))
 580         {
 581           if (dump_file)
 582             fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee));
 583           cgraph_mark_inline_edge (e, true);
 584           cgraph_flatten_node (e->callee, cycles);
 585         }
 586       else if (dump_file)
 587         fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee));
 588     }
 589 }
 590
 591 /* Decide on recursive inlining: in the case function has recursive calls,
 592    inline until body size reaches given argument.  */
 593
 594 static bool
 595 cgraph_decide_recursive_inlining (struct cgraph_node *node)
 596 {
 597   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
 598   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 599   int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
 600   fibheap_t heap;
 601   struct cgraph_edge *e;
 602   struct cgraph_node *master_clone, *next;
 603   int depth = 0;
 604   int n = 0;
 605
 606   if (DECL_DECLARED_INLINE_P (node->decl))
 607     {
 608       limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
 609       max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 610     }
 611
 612   /* Make sure that function is small enough to be considered for inlining.  */
 613   if (!max_depth
 614       || cgraph_estimate_size_after_inlining (1, node, node)  >= limit)
 615     return false;
 616   heap = fibheap_new ();
 617   lookup_recursive_calls (node, node, heap);
 618   if (fibheap_empty (heap))
 619     {
 620       fibheap_delete (heap);
 621       return false;
 622     }
 623
 624   if (dump_file)
 625     fprintf (dump_file,
 626              "  Performing recursive inlining on %s\n",
 627              cgraph_node_name (node));
 628
 629   /* We need original clone to copy around.  */
 630   master_clone = cgraph_clone_node (node, node->count, 1, false);
 631   master_clone->needed = true;
 632   for (e = master_clone->callees; e; e = e->next_callee)
 633     if (!e->inline_failed)
 634       cgraph_clone_inlined_nodes (e, true, false);
 635
 636   /* Do the inlining and update list of recursive call during process.  */
 637   while (!fibheap_empty (heap)
 638          && (cgraph_estimate_size_after_inlining (1, node, master_clone)
 639              <= limit))
 640     {
 641       struct cgraph_edge *curr = fibheap_extract_min (heap);
 642       struct cgraph_node *cnode;
 643
 644       depth = 1;
 645       for (cnode = curr->caller;
 646            cnode->global.inlined_to; cnode = cnode->callers->caller)
 647         if (node->decl == curr->callee->decl)
 648           depth++;
 649       if (depth > max_depth)
 650         {
 651           if (dump_file)
 652             fprintf (dump_file,
 653                      "   maxmal depth reached\n");
 654           continue;
 655         }
 656
 657       if (max_count)
 658         {
 659           if (!cgraph_maybe_hot_edge_p (curr))
 660             {
 661               if (dump_file)
 662                 fprintf (dump_file, "   Not inlining cold call\n");
 663               continue;
 664             }
 665           if (curr->count * 100 / node->count < probability)
 666             {
 667               if (dump_file)
 668                 fprintf (dump_file,
 669                          "   Probability of edge is too small\n");
 670               continue;
 671             }
 672         }
 673
 674       if (dump_file)
 675         {
 676           fprintf (dump_file,
 677                    "   Inlining call of depth %i", depth);
 678           if (node->count)
 679             {
 680               fprintf (dump_file, " called approx. %.2f times per call",
 681                        (double)curr->count / node->count);
 682             }
 683           fprintf (dump_file, "\n");
 684         }
 685       cgraph_redirect_edge_callee (curr, master_clone);
 686       cgraph_mark_inline_edge (curr, false);
 687       lookup_recursive_calls (node, curr->callee, heap);
 688       n++;
 689     }
 690   if (!fibheap_empty (heap) && dump_file)
 691     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
 692
 693   fibheap_delete (heap);
 694   if (dump_file)
 695     fprintf (dump_file,
 696              "\n   Inlined %i times, body grown from %i to %i insns\n", n,
 697              master_clone->global.insns, node->global.insns);
 698
 699   /* Remove master clone we used for inlining.  We rely that clones inlined
 700      into master clone gets queued just before master clone so we don't
 701      need recursion.  */
 702   for (node = cgraph_nodes; node != master_clone;
 703        node = next)
 704     {
 705       next = node->next;
 706       if (node->global.inlined_to == master_clone)
 707         cgraph_remove_node (node);
 708     }
 709   cgraph_remove_node (master_clone);
 710   /* FIXME: Recursive inlining actually reduces number of calls of the
 711      function.  At this place we should probably walk the function and
 712      inline clones and compensate the counts accordingly.  This probably
 713      doesn't matter much in practice.  */
 714   return n > 0;
 715 }
 716
 717 /* Set inline_failed for all callers of given function to REASON.  */
 718
 719 static void
 720 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
 721 {
 722   struct cgraph_edge *e;
 723
 724   if (dump_file)
 725     fprintf (dump_file, "Inlining failed: %s\n", reason);
 726   for (e = node->callers; e; e = e->next_caller)
 727     if (e->inline_failed)
 728       e->inline_failed = reason;
 729 }
 730
 731 /* We use greedy algorithm for inlining of small functions:
 732    All inline candidates are put into prioritized heap based on estimated
 733    growth of the overall number of instructions and then update the estimates.
 734
 735    INLINED and INLINED_CALEES are just pointers to arrays large enough
 736    to be passed to cgraph_inlined_into and cgraph_inlined_callees.  */
 737
 738 static void
 739 cgraph_decide_inlining_of_small_functions (void)
 740 {
 741   struct cgraph_node *node;
 742   struct cgraph_edge *edge;
 743   const char *failed_reason;
 744   fibheap_t heap = fibheap_new ();
 745   bitmap updated_nodes = BITMAP_ALLOC (NULL);
 746
 747   if (dump_file)
 748     fprintf (dump_file, "\nDeciding on smaller functions:\n");
 749
 750   /* Put all inline candidates into the heap.  */
 751
 752   for (node = cgraph_nodes; node; node = node->next)
 753     {
 754       if (!node->local.inlinable || !node->callers
 755           || node->local.disregard_inline_limits)
 756         continue;
 757       if (dump_file)
 758         fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
 759
 760       node->global.estimated_growth = INT_MIN;
 761       if (!cgraph_default_inline_p (node, &failed_reason))
 762         {
 763           cgraph_set_inline_failed (node, failed_reason);
 764           continue;
 765         }
 766
 767       for (edge = node->callers; edge; edge = edge->next_caller)
 768         if (edge->inline_failed)
 769           {
 770             gcc_assert (!edge->aux);
 771             edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
 772           }
 773     }
 774   while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
 775     {
 776       int old_insns = overall_insns;
 777       struct cgraph_node *where;
 778       int growth =
 779         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 780
 781       growth -= edge->caller->global.insns;
 782
 783       if (dump_file)
 784         {
 785           fprintf (dump_file,
 786                    "\nConsidering %s with %i insns\n",
 787                    cgraph_node_name (edge->callee),
 788                    edge->callee->global.insns);
 789           fprintf (dump_file,
 790                    " to be inlined into %s\n"
 791                    " Estimated growth after inlined into all callees is %+i insns.\n"
 792                    " Estimated badness is %i.\n",
 793                    cgraph_node_name (edge->caller),
 794                    cgraph_estimate_growth (edge->callee),
 795                    cgraph_edge_badness (edge));
 796           if (edge->count)
 797             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
 798         }
 799       gcc_assert (edge->aux);
 800       edge->aux = NULL;
 801       if (!edge->inline_failed)
 802         continue;
 803
 804       /* When not having profile info ready we don't weight by any way the
 805          position of call in procedure itself.  This means if call of
 806          function A from function B seems profitable to inline, the recursive
 807          call of function A in inline copy of A in B will look profitable too
 808          and we end up inlining until reaching maximal function growth.  This
 809          is not good idea so prohibit the recursive inlining.
 810
 811          ??? When the frequencies are taken into account we might not need this
 812          restriction.   */
 813       if (!max_count)
 814         {
 815           where = edge->caller;
 816           while (where->global.inlined_to)
 817             {
 818               if (where->decl == edge->callee->decl)
 819                 break;
 820               where = where->callers->caller;
 821             }
 822           if (where->global.inlined_to)
 823             {
 824               edge->inline_failed
 825                 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
 826               if (dump_file)
 827                 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
 828               continue;
 829             }
 830         }
 831
 832       if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
 833         {
 834           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 835                                             &edge->inline_failed))
 836             {
 837               edge->inline_failed =
 838                 N_("call is unlikely");
 839               if (dump_file)
 840                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 841             }
 842           continue;
 843         }
 844       if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
 845         {
 846           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 847                                             &edge->inline_failed))
 848             {
 849               if (dump_file)
 850                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 851             }
 852           continue;
 853         }
 854       if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
 855                                        &edge->inline_failed))
 856         {
 857           where = edge->caller;
 858           if (where->global.inlined_to)
 859             where = where->global.inlined_to;
 860           if (!cgraph_decide_recursive_inlining (where))
 861             continue;
 862           update_callee_keys (heap, where, updated_nodes);
 863         }
 864       else
 865         {
 866           struct cgraph_node *callee;
 867           if (!cgraph_check_inline_limits (edge->caller, edge->callee,
 868                                            &edge->inline_failed, true))
 869             {
 870               if (dump_file)
 871                 fprintf (dump_file, " Not inlining into %s:%s.\n",
 872                          cgraph_node_name (edge->caller), edge->inline_failed);
 873               continue;
 874             }
 875           callee = edge->callee;
 876           cgraph_mark_inline_edge (edge, true);
 877           update_callee_keys (heap, callee, updated_nodes);
 878         }
 879       where = edge->caller;
 880       if (where->global.inlined_to)
 881         where = where->global.inlined_to;
 882
 883       /* Our profitability metric can depend on local properties
 884          such as number of inlinable calls and size of the function body.
 885          After inlining these properties might change for the function we
 886          inlined into (since it's body size changed) and for the functions
 887          called by function we inlined (since number of it inlinable callers
 888          might change).  */
 889       update_caller_keys (heap, where, updated_nodes);
 890       bitmap_clear (updated_nodes);
 891
 892       if (dump_file)
 893         {
 894           fprintf (dump_file,
 895                    " Inlined into %s which now has %i insns,"
 896                    "net change of %+i insns.\n",
 897                    cgraph_node_name (edge->caller),
 898                    edge->caller->global.insns,
 899                    overall_insns - old_insns);
 900         }
 901     }
 902   while ((edge = fibheap_extract_min (heap)) != NULL)
 903     {
 904       gcc_assert (edge->aux);
 905       edge->aux = NULL;
 906       if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
 907           && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
 908                                            &edge->inline_failed))
 909         edge->inline_failed = N_("--param inline-unit-growth limit reached");
 910     }
 911   fibheap_delete (heap);
 912   BITMAP_FREE (updated_nodes);
 913 }
 914
 915 /* Decide on the inlining.  We do so in the topological order to avoid
 916    expenses on updating data structures.  */
 917
 918 static unsigned int
 919 cgraph_decide_inlining (void)
 920 {
 921   struct cgraph_node *node;
 922   int nnodes;
 923   struct cgraph_node **order =
 924     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
 925   int old_insns = 0;
 926   int i;
 927
 928   timevar_push (TV_INLINE_HEURISTICS);
 929   max_count = 0;
 930   for (node = cgraph_nodes; node; node = node->next)
 931     if (node->analyzed && (node->needed || node->reachable))
 932       {
 933         struct cgraph_edge *e;
 934
 935         /* At the moment, no IPA passes change function bodies before inlining.
 936            Save some time by not recomputing function body sizes if early inlining
 937            already did so.  */
 938         if (!flag_early_inlining)
 939           node->local.self_insns = node->global.insns
 940              = estimate_num_insns (node->decl);
 941
 942         initial_insns += node->local.self_insns;
 943         gcc_assert (node->local.self_insns == node->global.insns);
 944         for (e = node->callees; e; e = e->next_callee)
 945           if (max_count < e->count)
 946             max_count = e->count;
 947       }
 948   overall_insns = initial_insns;
 949   gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
 950
 951   max_insns = overall_insns;
 952   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
 953     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
 954
 955   max_insns = ((HOST_WIDEST_INT) max_insns
 956                * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
 957
 958   nnodes = cgraph_postorder (order);
 959
 960   if (dump_file)
 961     fprintf (dump_file,
 962              "\nDeciding on inlining.  Starting with %i insns.\n",
 963              initial_insns);
 964
 965   for (node = cgraph_nodes; node; node = node->next)
 966     node->aux = 0;
 967
 968   if (dump_file)
 969     fprintf (dump_file, "\nInlining always_inline functions:\n");
 970
 971   /* In the first pass mark all always_inline edges.  Do this with a priority
 972      so none of our later choices will make this impossible.  */
 973   for (i = nnodes - 1; i >= 0; i--)
 974     {
 975       struct cgraph_edge *e, *next;
 976
 977       node = order[i];
 978
 979       /* Handle nodes to be flattened, but don't update overall unit size.  */
 980       if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
 981         {
 982           int old_overall_insns = overall_insns;
 983           htab_t cycles;
 984           if (dump_file)
 985             fprintf (dump_file,
 986                      "Leafifying %s\n", cgraph_node_name (node));
 987           cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
 988           cgraph_find_cycles (node, cycles);
 989           cgraph_flatten_node (node, cycles);
 990           htab_delete (cycles);
 991           overall_insns = old_overall_insns;
 992           /* We don't need to consider always_inline functions inside the flattened
 993              function anymore.  */
 994           continue;
 995         }
 996
 997       if (!node->local.disregard_inline_limits)
 998         continue;
 999       if (dump_file)
1000         fprintf (dump_file,
1001                  "\nConsidering %s %i insns (always inline)\n",
1002                  cgraph_node_name (node), node->global.insns);
1003       old_insns = overall_insns;
1004       for (e = node->callers; e; e = next)
1005         {
1006           next = e->next_caller;
1007           if (!e->inline_failed)
1008             continue;
1009           if (cgraph_recursive_inlining_p (e->caller, e->callee,
1010                                            &e->inline_failed))
1011             continue;
1012           cgraph_mark_inline_edge (e, true);
1013           if (dump_file)
1014             fprintf (dump_file,
1015                      " Inlined into %s which now has %i insns.\n",
1016                      cgraph_node_name (e->caller),
1017                      e->caller->global.insns);
1018         }
1019       if (dump_file)
1020         fprintf (dump_file,
1021                  " Inlined for a net change of %+i insns.\n",
1022                  overall_insns - old_insns);
1023     }
1024
1025   if (!flag_really_no_inline)
1026     cgraph_decide_inlining_of_small_functions ();
1027
1028   if (!flag_really_no_inline
1029       && flag_inline_functions_called_once)
1030     {
1031       if (dump_file)
1032         fprintf (dump_file, "\nDeciding on functions called once:\n");
1033
1034       /* And finally decide what functions are called once.  */
1035
1036       for (i = nnodes - 1; i >= 0; i--)
1037         {
1038           node = order[i];
1039
1040           if (node->callers && !node->callers->next_caller && !node->needed
1041               && node->local.inlinable && node->callers->inline_failed
1042               && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1043             {
1044               bool ok = true;
1045               struct cgraph_node *node1;
1046
1047               /* Verify that we won't duplicate the caller.  */
1048               for (node1 = node->callers->caller;
1049                    node1->callers && !node1->callers->inline_failed
1050                    && ok; node1 = node1->callers->caller)
1051                 if (node1->callers->next_caller || node1->needed)
1052                   ok = false;
1053               if (ok)
1054                 {
1055                   if (dump_file)
1056                     {
1057                       fprintf (dump_file,
1058                                "\nConsidering %s %i insns.\n",
1059                                cgraph_node_name (node), node->global.insns);
1060                       fprintf (dump_file,
1061                                " Called once from %s %i insns.\n",
1062                                cgraph_node_name (node->callers->caller),
1063                                node->callers->caller->global.insns);
1064                     }
1065
1066                   old_insns = overall_insns;
1067
1068                   if (cgraph_check_inline_limits (node->callers->caller, node,
1069                                                   NULL, false))
1070                     {
1071                       cgraph_mark_inline (node->callers);
1072                       if (dump_file)
1073                         fprintf (dump_file,
1074                                  " Inlined into %s which now has %i insns"
1075                                  " for a net change of %+i insns.\n",
1076                                  cgraph_node_name (node->callers->caller),
1077                                  node->callers->caller->global.insns,
1078                                  overall_insns - old_insns);
1079                     }
1080                   else
1081                     {
1082                       if (dump_file)
1083                         fprintf (dump_file,
1084                                  " Inline limit reached, not inlined.\n");
1085                     }
1086                 }
1087             }
1088         }
1089     }
1090
1091   if (dump_file)
1092     fprintf (dump_file,
1093              "\nInlined %i calls, eliminated %i functions, "
1094              "%i insns turned to %i insns.\n\n",
1095              ncalls_inlined, nfunctions_inlined, initial_insns,
1096              overall_insns);
1097   free (order);
1098   timevar_pop (TV_INLINE_HEURISTICS);
1099   return 0;
1100 }
1101
1102 /* Decide on the inlining.  We do so in the topological order to avoid
1103    expenses on updating data structures.  */
1104
1105 bool
1106 cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
1107 {
1108   struct cgraph_edge *e;
1109   bool inlined = false;
1110   const char *failed_reason;
1111
1112   /* First of all look for always inline functions.  */
1113   for (e = node->callees; e; e = e->next_callee)
1114     if (e->callee->local.disregard_inline_limits
1115         && e->inline_failed
1116         && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1117         /* ??? It is possible that renaming variable removed the function body
1118            in duplicate_decls. See gcc.c-torture/compile/20011119-2.c  */
1119         && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1120       {
1121         if (dump_file && early)
1122           {
1123             fprintf (dump_file, "  Early inlining %s",
1124                      cgraph_node_name (e->callee));
1125             fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1126           }
1127         cgraph_mark_inline (e);
1128         inlined = true;
1129       }
1130
1131   /* Now do the automatic inlining.  */
1132   if (!flag_really_no_inline)
1133     for (e = node->callees; e; e = e->next_callee)
1134       if (e->callee->local.inlinable
1135           && e->inline_failed
1136           && !e->callee->local.disregard_inline_limits
1137           && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1138           && (!early
1139               || (cgraph_estimate_size_after_inlining (1, e->caller, e->callee)
1140                   <= e->caller->global.insns))
1141           && cgraph_check_inline_limits (node, e->callee, &e->inline_failed,
1142                                          false)
1143           && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1144         {
1145           if (cgraph_default_inline_p (e->callee, &failed_reason))
1146             {
1147               if (dump_file && early)
1148                 {
1149                   fprintf (dump_file, "  Early inlining %s",
1150                            cgraph_node_name (e->callee));
1151                   fprintf (dump_file, " into %s\n", cgraph_node_name (node));
1152                 }
1153               cgraph_mark_inline (e);
1154               inlined = true;
1155             }
1156           else if (!early)
1157             e->inline_failed = failed_reason;
1158         }
1159   if (early && inlined)
1160     {
1161       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1162       tree_register_cfg_hooks ();
1163       current_function_decl = node->decl;
1164       optimize_inline_calls (current_function_decl);
1165       node->local.self_insns = node->global.insns;
1166       current_function_decl = NULL;
1167       pop_cfun ();
1168     }
1169   return inlined;
1170 }
1171
1172 /* When inlining shall be performed.  */
1173 static bool
1174 cgraph_gate_inlining (void)
1175 {
1176   return flag_inline_trees;
1177 }
1178
1179 struct tree_opt_pass pass_ipa_inline =
1180 {
1181   "inline",                             /* name */
1182   cgraph_gate_inlining,                 /* gate */
1183   cgraph_decide_inlining,               /* execute */
1184   NULL,                                 /* sub */
1185   NULL,                                 /* next */
1186   0,                                    /* static_pass_number */
1187   TV_INTEGRATION,                       /* tv_id */
1188   0,                                    /* properties_required */
1189   PROP_cfg,                             /* properties_provided */
1190   0,                                    /* properties_destroyed */
1191   0,                                    /* todo_flags_start */
1192   TODO_dump_cgraph | TODO_dump_func,    /* todo_flags_finish */
1193   0                                     /* letter */
1194 };
1195
1196 /* Because inlining might remove no-longer reachable nodes, we need to
1197    keep the array visible to garbage collector to avoid reading collected
1198    out nodes.  */
1199 static int nnodes;
1200 static GTY ((length ("nnodes"))) struct cgraph_node **order;
1201
1202 /* Do inlining of small functions.  Doing so early helps profiling and other
1203    passes to be somewhat more effective and avoids some code duplication in
1204    later real inlining pass for testcases with very many function calls.  */
1205 static unsigned int
1206 cgraph_early_inlining (void)
1207 {
1208   struct cgraph_node *node;
1209   int i;
1210
1211   if (sorrycount || errorcount)
1212     return 0;
1213 #ifdef ENABLE_CHECKING
1214   for (node = cgraph_nodes; node; node = node->next)
1215     gcc_assert (!node->aux);
1216 #endif
1217
1218   order = ggc_alloc (sizeof (*order) * cgraph_n_nodes);
1219   nnodes = cgraph_postorder (order);
1220   for (i = nnodes - 1; i >= 0; i--)
1221     {
1222       node = order[i];
1223       if (node->analyzed && (node->needed || node->reachable))
1224         node->local.self_insns = node->global.insns
1225           = estimate_num_insns (node->decl);
1226     }
1227   for (i = nnodes - 1; i >= 0; i--)
1228     {
1229       node = order[i];
1230       if (node->analyzed && node->local.inlinable
1231           && (node->needed || node->reachable)
1232           && node->callers)
1233         {
1234           if (cgraph_decide_inlining_incrementally (node, true))
1235             ggc_collect ();
1236         }
1237     }
1238   cgraph_remove_unreachable_nodes (true, dump_file);
1239 #ifdef ENABLE_CHECKING
1240   for (node = cgraph_nodes; node; node = node->next)
1241     gcc_assert (!node->global.inlined_to);
1242 #endif
1243   ggc_free (order);
1244   order = NULL;
1245   nnodes = 0;
1246   return 0;
1247 }
1248
1249 /* When inlining shall be performed.  */
1250 static bool
1251 cgraph_gate_early_inlining (void)
1252 {
1253   return flag_inline_trees && flag_early_inlining;
1254 }
1255
1256 struct tree_opt_pass pass_early_ipa_inline =
1257 {
1258   "einline",                            /* name */
1259   cgraph_gate_early_inlining,           /* gate */
1260   cgraph_early_inlining,                /* execute */
1261   NULL,                                 /* sub */
1262   NULL,                                 /* next */
1263   0,                                    /* static_pass_number */
1264   TV_INTEGRATION,                       /* tv_id */
1265   0,                                    /* properties_required */
1266   PROP_cfg,                             /* properties_provided */
1267   0,                                    /* properties_destroyed */
1268   0,                                    /* todo_flags_start */
1269   TODO_dump_cgraph | TODO_dump_func,    /* todo_flags_finish */
1270   0                                     /* letter */
1271 };
1272
1273 #include "gt-ipa-inline.h"