1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
29 printf ("value is %d",y);
49 The IPCP algorithm will find that g's formal argument y is always called
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
56 The optimization is divided into three stages:
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
73 -ipcp_generate_summary() is the first stage driver.
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
81 BOTTOM - non constant.
82 CONSTANT - constant value.
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
90 -ipcp_iterate_stage() is the second stage driver.
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
118 -ipcp_insert_stage() is the third phase driver.
121 This pass also performs devirtualization - turns virtual calls into direct
122 ones if it can prove that all invocations of the function call the same
123 callee. This is achieved by building a list of all base types (actually,
124 their BINFOs) that individual parameters can have in an iterative matter
125 just like propagating scalar constants and then examining whether virtual
126 calls which take a parameter as their object fold to the same target for all
127 these types. If we cannot enumerate all types or there is a type which does
128 not have any BINFO associated with it, cannot_devirtualize of the associated
129 parameter descriptor is set which is an equivalent of BOTTOM lattice value
130 in standard IPA constant propagation.
135 #include "coretypes.h"
140 #include "ipa-prop.h"
141 #include "tree-flow.h"
142 #include "tree-pass.h"
145 #include "diagnostic.h"
146 #include "tree-pretty-print.h"
147 #include "tree-dump.h"
148 #include "tree-inline.h"
151 #include "ipa-inline.h"
153 /* Number of functions identified as candidates for cloning. When not cloning
154 we can simplify iterate stage not forcing it to go through the decision
155 on what is profitable and what not. */
156 static int n_cloning_candidates;
158 /* Maximal count found in program. */
159 static gcov_type max_count;
161 /* Cgraph nodes that has been completely replaced by cloning during iterate
162 * stage and will be removed after ipcp is finished. */
163 static bitmap dead_nodes;
165 static void ipcp_print_profile_data (FILE *);
166 static void ipcp_function_scale_print (FILE *);
168 /* Get the original node field of ipa_node_params associated with node NODE. */
169 static inline struct cgraph_node *
170 ipcp_get_orig_node (struct cgraph_node *node)
172 return IPA_NODE_REF (node)->ipcp_orig_node;
175 /* Return true if NODE describes a cloned/versioned function. */
177 ipcp_node_is_clone (struct cgraph_node *node)
179 return (ipcp_get_orig_node (node) != NULL);
182 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
183 as the ipcp_orig_node field in ipa_node_params. */
185 ipcp_init_cloned_node (struct cgraph_node *orig_node,
186 struct cgraph_node *new_node)
188 gcc_checking_assert (ipa_node_params_vector
189 && (VEC_length (ipa_node_params_t,
190 ipa_node_params_vector)
191 > (unsigned) cgraph_max_uid));
192 gcc_checking_assert (IPA_NODE_REF (new_node)->params);
193 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
196 /* Return scale for NODE. */
197 static inline gcov_type
198 ipcp_get_node_scale (struct cgraph_node *node)
200 return IPA_NODE_REF (node)->count_scale;
203 /* Set COUNT as scale for NODE. */
205 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
207 IPA_NODE_REF (node)->count_scale = count;
210 /* Return whether LAT is a constant lattice. */
212 ipcp_lat_is_const (struct ipcp_lattice *lat)
214 if (lat->type == IPA_CONST_VALUE)
220 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
221 into the code (i.e. constants excluding member pointers and pointers). */
223 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
225 return lat->type == IPA_CONST_VALUE;
228 /* Return true if LAT1 and LAT2 are equal. */
230 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
232 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
233 if (lat1->type != lat2->type)
236 if (TREE_CODE (lat1->constant) == ADDR_EXPR
237 && TREE_CODE (lat2->constant) == ADDR_EXPR
238 && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL
239 && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL)
240 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)),
241 DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0);
243 return operand_equal_p (lat1->constant, lat2->constant, 0);
246 /* Compute Meet arithmetics:
247 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
249 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
250 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
252 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
253 struct ipcp_lattice *lat2)
255 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
257 res->type = IPA_BOTTOM;
260 if (lat1->type == IPA_TOP)
262 res->type = lat2->type;
263 res->constant = lat2->constant;
266 if (lat2->type == IPA_TOP)
268 res->type = lat1->type;
269 res->constant = lat1->constant;
272 if (!ipcp_lats_are_equal (lat1, lat2))
274 res->type = IPA_BOTTOM;
277 res->type = lat1->type;
278 res->constant = lat1->constant;
281 /* True when OLD_LAT and NEW_LAT values are not the same. */
284 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
285 struct ipcp_lattice *new_lat)
287 if (old_lat->type == new_lat->type)
289 if (!ipcp_lat_is_const (old_lat))
291 if (ipcp_lats_are_equal (old_lat, new_lat))
297 /* Print all ipcp_lattices of all functions to F. */
299 ipcp_print_all_lattices (FILE * f)
301 struct cgraph_node *node;
304 fprintf (f, "\nLattice:\n");
305 for (node = cgraph_nodes; node; node = node->next)
307 struct ipa_node_params *info;
311 info = IPA_NODE_REF (node);
312 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
313 count = ipa_get_param_count (info);
314 for (i = 0; i < count; i++)
316 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
318 fprintf (f, " param [%d]: ", i);
319 if (lat->type == IPA_CONST_VALUE)
321 tree cst = lat->constant;
322 fprintf (f, "type is CONST ");
323 print_generic_expr (f, cst, 0);
324 if (TREE_CODE (cst) == ADDR_EXPR
325 && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL)
328 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)),
332 else if (lat->type == IPA_TOP)
333 fprintf (f, "type is TOP");
335 fprintf (f, "type is BOTTOM");
336 if (ipa_param_cannot_devirtualize_p (info, i))
337 fprintf (f, " - cannot_devirtualize set\n");
338 else if (ipa_param_types_vec_empty (info, i))
339 fprintf (f, " - type list empty\n");
346 /* Return true if ipcp algorithms would allow cloning NODE. */
349 ipcp_versionable_function_p (struct cgraph_node *node)
351 struct cgraph_edge *edge;
353 /* We always version the actual function and redirect through the aliases. */
357 /* We don't know how to clone thunks. */
358 if (node->thunk.thunk_p)
361 /* There are a number of generic reasons functions cannot be versioned. We
362 also cannot remove parameters if there are type attributes such as fnspec
364 if (!inline_summary (node)->versionable
365 || TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
368 /* Removing arguments doesn't work if the function takes varargs
369 or use __builtin_apply_args.
370 FIXME: handle this together with can_change_signature flag. */
371 for (edge = node->callees; edge; edge = edge->next_callee)
373 tree t = edge->callee->decl;
374 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
375 && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
376 || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
383 /* Return true if this NODE is viable candidate for cloning. */
385 ipcp_cloning_candidate_p (struct cgraph_node *node)
389 gcov_type direct_call_sum = 0;
390 struct cgraph_edge *e;
392 /* We look through aliases, so we clone the aliased function instead. */
396 /* We never clone functions that are not visible from outside.
397 FIXME: in future we should clone such functions when they are called with
398 different constants, but current ipcp implementation is not good on this.
400 if (cgraph_only_called_directly_p (node) || !node->analyzed)
403 /* When function address is taken, we are pretty sure it will be called in hidden way. */
404 if (node->address_taken)
407 fprintf (dump_file, "Not considering %s for cloning; address is taken.\n",
408 cgraph_node_name (node));
412 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
415 fprintf (dump_file, "Not considering %s for cloning; body is overwritable.\n",
416 cgraph_node_name (node));
419 if (!ipcp_versionable_function_p (node))
422 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
423 cgraph_node_name (node));
426 for (e = node->callers; e; e = e->next_caller)
428 direct_call_sum += e->count;
430 if (cgraph_maybe_hot_edge_p (e))
437 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
438 cgraph_node_name (node));
441 if (inline_summary (node)->self_size < n_calls)
444 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
445 cgraph_node_name (node));
449 if (!flag_ipa_cp_clone)
452 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
453 cgraph_node_name (node));
457 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
460 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
461 cgraph_node_name (node));
465 /* When profile is available and function is hot, propagate into it even if
466 calls seems cold; constant propagation can improve function's speed
470 if (direct_call_sum > node->count * 90 / 100)
473 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
474 cgraph_node_name (node));
481 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
482 cgraph_node_name (node));
486 fprintf (dump_file, "Considering %s for cloning.\n",
487 cgraph_node_name (node));
491 /* Mark parameter with index I of function described by INFO as unsuitable for
492 devirtualization. Return true if it has already been marked so. */
495 ipa_set_param_cannot_devirtualize (struct ipa_node_params *info, int i)
497 bool ret = info->params[i].cannot_devirtualize;
498 info->params[i].cannot_devirtualize = true;
499 if (info->params[i].types)
500 VEC_free (tree, heap, info->params[i].types);
504 /* Initialize ipcp_lattices array. The lattices corresponding to supported
505 types (integers, real types and Fortran constants defined as const_decls)
506 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
508 ipcp_initialize_node_lattices (struct cgraph_node *node)
511 struct ipa_node_params *info = IPA_NODE_REF (node);
512 enum ipa_lattice_type type;
514 if (ipa_is_called_with_var_arguments (info))
516 /* We don't know how to clone thunks even when they are local. */
517 else if (node->local.local
518 && !node->thunk.thunk_p)
520 /* When cloning is allowed, we can assume that externally visible functions
521 are not called. We will compensate this by cloning later. */
522 else if (ipcp_cloning_candidate_p (node))
523 type = IPA_TOP, n_cloning_candidates ++;
527 for (i = 0; i < ipa_get_param_count (info) ; i++)
529 ipa_get_lattice (info, i)->type = type;
530 if (type == IPA_BOTTOM)
531 ipa_set_param_cannot_devirtualize (info, i);
535 /* Build a constant tree with type TREE_TYPE and value according to LAT.
536 Return the tree, or, if it is not possible to convert such value
537 to TREE_TYPE, NULL. */
539 build_const_val (struct ipcp_lattice *lat, tree tree_type)
543 gcc_assert (ipcp_lat_is_const (lat));
546 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
548 if (fold_convertible_p (tree_type, val))
549 return fold_build1 (NOP_EXPR, tree_type, val);
550 else if (TYPE_SIZE (tree_type) == TYPE_SIZE (TREE_TYPE (val)))
551 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
558 /* Compute the proper scale for NODE. It is the ratio between the number of
559 direct calls (represented on the incoming cgraph_edges) and sum of all
560 invocations of NODE (represented as count in cgraph_node).
562 FIXME: This code is wrong. Since the callers can be also clones and
563 the clones are not scaled yet, the sums gets unrealistically high.
564 To properly compute the counts, we would need to do propagation across
565 callgraph (as external call to A might imply call to non-cloned B
566 if A's clone calls cloned B). */
568 ipcp_compute_node_scale (struct cgraph_node *node)
571 struct cgraph_edge *cs;
574 /* Compute sum of all counts of callers. */
575 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
577 /* Work around the unrealistically high sum problem. We just don't want
578 the non-cloned body to have negative or very low frequency. Since
579 majority of execution time will be spent in clones anyway, this should
580 give good enough profile. */
581 if (sum > node->count * 9 / 10)
582 sum = node->count * 9 / 10;
583 if (node->count == 0)
584 ipcp_set_node_scale (node, 0);
586 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
589 /* Return true if there are some formal parameters whose value is IPA_TOP (in
590 the whole compilation unit). Change their values to IPA_BOTTOM, since they
591 most probably get their values from outside of this compilation unit. */
593 ipcp_change_tops_to_bottom (void)
596 struct cgraph_node *node;
600 for (node = cgraph_nodes; node; node = node->next)
603 struct ipa_node_params *info = IPA_NODE_REF (node);
604 count = ipa_get_param_count (info);
605 for (i = 0; i < count; i++)
607 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
608 if (lat->type == IPA_TOP)
613 fprintf (dump_file, "Forcing param ");
614 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
615 fprintf (dump_file, " of node %s to bottom.\n",
616 cgraph_node_name (node));
618 lat->type = IPA_BOTTOM;
620 if (!ipa_param_cannot_devirtualize_p (info, i)
621 && ipa_param_types_vec_empty (info, i))
624 ipa_set_param_cannot_devirtualize (info, i);
627 fprintf (dump_file, "Marking param ");
628 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
629 fprintf (dump_file, " of node %s as unusable for "
630 "devirtualization.\n",
631 cgraph_node_name (node));
639 /* Insert BINFO to the list of known types of parameter number I of the
640 function described by CALLEE_INFO. Return true iff the type information
641 associated with the callee parameter changed in any way. */
644 ipcp_add_param_type (struct ipa_node_params *callee_info, int i, tree binfo)
648 if (ipa_param_cannot_devirtualize_p (callee_info, i))
651 if (callee_info->params[i].types)
653 count = VEC_length (tree, callee_info->params[i].types);
654 for (j = 0; j < count; j++)
655 if (VEC_index (tree, callee_info->params[i].types, j) == binfo)
659 if (VEC_length (tree, callee_info->params[i].types)
660 == (unsigned) PARAM_VALUE (PARAM_DEVIRT_TYPE_LIST_SIZE))
661 return !ipa_set_param_cannot_devirtualize (callee_info, i);
663 VEC_safe_push (tree, heap, callee_info->params[i].types, binfo);
667 /* Copy known types information for parameter number CALLEE_IDX of CALLEE_INFO
668 from a parameter of CALLER_INFO as described by JF. Return true iff the
669 type information changed in any way. JF must be a pass-through or an
670 ancestor jump function. */
673 ipcp_copy_types (struct ipa_node_params *caller_info,
674 struct ipa_node_params *callee_info,
675 int callee_idx, struct ipa_jump_func *jf)
677 int caller_idx, j, count;
680 if (ipa_param_cannot_devirtualize_p (callee_info, callee_idx))
683 if (jf->type == IPA_JF_PASS_THROUGH)
685 if (jf->value.pass_through.operation != NOP_EXPR)
687 ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
690 caller_idx = jf->value.pass_through.formal_id;
693 caller_idx = jf->value.ancestor.formal_id;
695 if (ipa_param_cannot_devirtualize_p (caller_info, caller_idx))
697 ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
701 if (!caller_info->params[caller_idx].types)
705 count = VEC_length (tree, caller_info->params[caller_idx].types);
706 for (j = 0; j < count; j++)
708 tree binfo = VEC_index (tree, caller_info->params[caller_idx].types, j);
709 if (jf->type == IPA_JF_ANCESTOR)
711 binfo = get_binfo_at_offset (binfo, jf->value.ancestor.offset,
712 jf->value.ancestor.type);
715 ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
719 res |= ipcp_add_param_type (callee_info, callee_idx, binfo);
724 /* Propagate type information for parameter of CALLEE_INFO number I as
725 described by JF. CALLER_INFO describes the caller. Return true iff the
726 type information changed in any way. */
729 ipcp_propagate_types (struct ipa_node_params *caller_info,
730 struct ipa_node_params *callee_info,
731 struct ipa_jump_func *jf, int i)
736 case IPA_JF_CONST_MEMBER_PTR:
740 case IPA_JF_KNOWN_TYPE:
741 return ipcp_add_param_type (callee_info, i, jf->value.base_binfo);
743 case IPA_JF_PASS_THROUGH:
744 case IPA_JF_ANCESTOR:
745 return ipcp_copy_types (caller_info, callee_info, i, jf);
748 /* If we reach this we cannot use this parameter for devirtualization. */
749 return !ipa_set_param_cannot_devirtualize (callee_info, i);
752 /* Interprocedural analysis. The algorithm propagates constants from the
753 caller's parameters to the callee's arguments. */
755 ipcp_propagate_stage (void)
758 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
759 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
760 struct ipcp_lattice *dest_lat;
761 struct cgraph_edge *cs;
762 struct ipa_jump_func *jump_func;
763 struct ipa_func_list *wl;
766 ipa_check_create_node_params ();
767 ipa_check_create_edge_args ();
769 /* Initialize worklist to contain all functions. */
770 wl = ipa_init_func_list ();
773 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
774 struct ipa_node_params *info = IPA_NODE_REF (node);
776 for (cs = node->callees; cs; cs = cs->next_callee)
778 struct cgraph_node *callee = cgraph_function_or_thunk_node (cs->callee, NULL);
779 struct ipa_node_params *callee_info = IPA_NODE_REF (callee);
780 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
782 if (ipa_is_called_with_var_arguments (callee_info)
783 || !cs->callee->analyzed
784 || ipa_is_called_with_var_arguments (callee_info))
787 count = ipa_get_cs_argument_count (args);
788 for (i = 0; i < count; i++)
790 jump_func = ipa_get_ith_jump_func (args, i);
791 ipa_lattice_from_jfunc (info, &inc_lat, jump_func);
792 dest_lat = ipa_get_lattice (callee_info, i);
793 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
794 if (ipcp_lattice_changed (&new_lat, dest_lat))
796 dest_lat->type = new_lat.type;
797 dest_lat->constant = new_lat.constant;
798 ipa_push_func_to_list (&wl, callee);
801 if (ipcp_propagate_types (info, callee_info, jump_func, i))
802 ipa_push_func_to_list (&wl, callee);
808 /* Call the constant propagation algorithm and re-call it if necessary
809 (if there are undetermined values left). */
811 ipcp_iterate_stage (void)
813 struct cgraph_node *node;
814 n_cloning_candidates = 0;
817 fprintf (dump_file, "\nIPA iterate stage:\n\n");
820 ipa_update_after_lto_read ();
822 for (node = cgraph_nodes; node; node = node->next)
825 ipcp_initialize_node_lattices (node);
826 ipcp_compute_node_scale (node);
828 if (dump_file && (dump_flags & TDF_DETAILS))
830 ipcp_print_all_lattices (dump_file);
831 ipcp_function_scale_print (dump_file);
834 ipcp_propagate_stage ();
835 if (ipcp_change_tops_to_bottom ())
836 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
837 This change should be propagated. */
839 gcc_assert (n_cloning_candidates);
840 ipcp_propagate_stage ();
844 fprintf (dump_file, "\nIPA lattices after propagation:\n");
845 ipcp_print_all_lattices (dump_file);
846 if (dump_flags & TDF_DETAILS)
847 ipcp_print_profile_data (dump_file);
851 /* Check conditions to forbid constant insertion to function described by
854 ipcp_node_modifiable_p (struct cgraph_node *node)
856 /* Once we will be able to do in-place replacement, we can be more
858 return ipcp_versionable_function_p (node);
861 /* Print count scale data structures. */
863 ipcp_function_scale_print (FILE * f)
865 struct cgraph_node *node;
867 for (node = cgraph_nodes; node; node = node->next)
871 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
872 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
873 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
877 /* Print counts of all cgraph nodes. */
879 ipcp_print_func_profile_counts (FILE * f)
881 struct cgraph_node *node;
883 for (node = cgraph_nodes; node; node = node->next)
885 fprintf (f, "function %s: ", cgraph_node_name (node));
886 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
887 " \n", (HOST_WIDE_INT) node->count);
891 /* Print counts of all cgraph edges. */
893 ipcp_print_call_profile_counts (FILE * f)
895 struct cgraph_node *node;
896 struct cgraph_edge *cs;
898 for (node = cgraph_nodes; node; node = node->next)
900 for (cs = node->callees; cs; cs = cs->next_callee)
902 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
903 cgraph_node_name (cs->callee));
904 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
905 (HOST_WIDE_INT) cs->count);
910 /* Print profile info for all functions. */
912 ipcp_print_profile_data (FILE * f)
914 fprintf (f, "\nNODE COUNTS :\n");
915 ipcp_print_func_profile_counts (f);
916 fprintf (f, "\nCS COUNTS stage:\n");
917 ipcp_print_call_profile_counts (f);
920 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
921 processed by versioning, which operates according to the flags set.
922 PARM_TREE is the formal parameter found to be constant. LAT represents the
924 static struct ipa_replace_map *
925 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
927 struct ipa_replace_map *replace_map;
930 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
931 if (const_val == NULL_TREE)
935 fprintf (dump_file, " const ");
936 print_generic_expr (dump_file, lat->constant, 0);
937 fprintf (dump_file, " can't be converted to param ");
938 print_generic_expr (dump_file, parm_tree, 0);
939 fprintf (dump_file, "\n");
943 replace_map = ggc_alloc_ipa_replace_map ();
946 fprintf (dump_file, " replacing param ");
947 print_generic_expr (dump_file, parm_tree, 0);
948 fprintf (dump_file, " with const ");
949 print_generic_expr (dump_file, const_val, 0);
950 fprintf (dump_file, "\n");
952 replace_map->old_tree = parm_tree;
953 replace_map->new_tree = const_val;
954 replace_map->replace_p = true;
955 replace_map->ref_p = false;
960 /* Return true if this callsite should be redirected to the original callee
961 (instead of the cloned one). */
963 ipcp_need_redirect_p (struct cgraph_edge *cs)
965 struct ipa_node_params *orig_callee_info;
967 struct cgraph_node *node = cgraph_function_or_thunk_node (cs->callee, NULL);
968 struct cgraph_node *orig;
970 if (!n_cloning_candidates)
973 /* We can't redirect anything in thunks, yet. */
974 if (cs->caller->thunk.thunk_p)
977 if ((orig = ipcp_get_orig_node (node)) != NULL)
979 if (ipcp_get_orig_node (cs->caller))
982 orig_callee_info = IPA_NODE_REF (node);
983 count = ipa_get_param_count (orig_callee_info);
984 for (i = 0; i < count; i++)
986 struct ipcp_lattice *lat = ipa_get_lattice (orig_callee_info, i);
987 struct ipa_jump_func *jump_func;
989 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
990 if ((ipcp_lat_is_const (lat)
991 && jump_func->type != IPA_JF_CONST)
992 || (!ipa_param_cannot_devirtualize_p (orig_callee_info, i)
993 && !ipa_param_types_vec_empty (orig_callee_info, i)
994 && jump_func->type != IPA_JF_CONST
995 && jump_func->type != IPA_JF_KNOWN_TYPE))
1002 /* Fix the callsites and the call graph after function cloning was done. */
1004 ipcp_update_callgraph (void)
1006 struct cgraph_node *node;
1008 for (node = cgraph_nodes; node; node = node->next)
1009 if (node->analyzed && ipcp_node_is_clone (node))
1011 bitmap args_to_skip = NULL;
1012 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
1013 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
1014 int i, count = ipa_get_param_count (info);
1015 struct cgraph_edge *cs, *next;
1017 if (node->local.can_change_signature)
1019 args_to_skip = BITMAP_ALLOC (NULL);
1020 for (i = 0; i < count; i++)
1022 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
1024 /* We can proactively remove obviously unused arguments. */
1025 if (!ipa_is_param_used (info, i))
1027 bitmap_set_bit (args_to_skip, i);
1031 if (lat->type == IPA_CONST_VALUE)
1032 bitmap_set_bit (args_to_skip, i);
1035 for (cs = node->callers; cs; cs = next)
1037 next = cs->next_caller;
1038 if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
1041 fprintf (dump_file, "Redirecting edge %s/%i -> %s/%i "
1043 cgraph_node_name (cs->caller), cs->caller->uid,
1044 cgraph_node_name (cs->callee), cs->callee->uid,
1045 cgraph_node_name (orig_node), orig_node->uid);
1046 cgraph_redirect_edge_callee (cs, orig_node);
1052 /* Update profiling info for versioned functions and the functions they were
1055 ipcp_update_profiling (void)
1057 struct cgraph_node *node, *orig_node;
1058 gcov_type scale, scale_complement;
1059 struct cgraph_edge *cs;
1061 for (node = cgraph_nodes; node; node = node->next)
1063 if (ipcp_node_is_clone (node))
1065 orig_node = ipcp_get_orig_node (node);
1066 scale = ipcp_get_node_scale (orig_node);
1067 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
1068 scale_complement = REG_BR_PROB_BASE - scale;
1070 gcc_assert (scale_complement >= 0);
1072 orig_node->count * scale_complement / REG_BR_PROB_BASE;
1073 for (cs = node->callees; cs; cs = cs->next_callee)
1074 cs->count = cs->count * scale / REG_BR_PROB_BASE;
1075 for (cs = orig_node->callees; cs; cs = cs->next_callee)
1076 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
1081 /* If NODE was cloned, how much would program grow? */
1083 ipcp_estimate_growth (struct cgraph_node *node)
1085 struct cgraph_edge *cs;
1086 int redirectable_node_callers = 0;
1087 int removable_args = 0;
1089 = !cgraph_will_be_removed_from_program_if_no_direct_calls (node);
1090 VEC (tree, heap) *known_vals = NULL;
1091 struct ipa_node_params *info;
1095 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1096 if (cs->caller == node || !ipcp_need_redirect_p (cs))
1097 redirectable_node_callers++;
1099 need_original = true;
1101 /* If we will be able to fully replace original node, we never increase
1106 info = IPA_NODE_REF (node);
1107 count = ipa_get_param_count (info);
1108 VEC_safe_grow_cleared (tree, heap, known_vals, count);
1109 if (node->local.can_change_signature)
1110 for (i = 0; i < count; i++)
1112 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
1114 /* We can proactively remove obviously unused arguments. */
1115 if (!ipa_is_param_used (info, i))
1118 if (lat->type == IPA_CONST_VALUE)
1121 VEC_replace (tree, known_vals, i, lat->constant);
1125 /* We make just very simple estimate of savings for removal of operand from
1126 call site. Precise cost is difficult to get, as our size metric counts
1127 constants and moves as free. Generally we are looking for cases that
1128 small function is called very many times. */
1129 estimate_ipcp_clone_size_and_time (node, known_vals, &growth, NULL);
1130 VEC_free (tree, heap, known_vals);
1132 - removable_args * redirectable_node_callers;
1139 /* Estimate cost of cloning NODE. */
1141 ipcp_estimate_cloning_cost (struct cgraph_node *node)
1144 gcov_type count_sum = 1;
1145 struct cgraph_edge *e;
1148 cost = ipcp_estimate_growth (node) * 1000;
1152 fprintf (dump_file, "Versioning of %s will save code size\n",
1153 cgraph_node_name (node));
1157 for (e = node->callers; e; e = e->next_caller)
1158 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
1159 && !ipcp_need_redirect_p (e))
1161 count_sum += e->count;
1162 freq_sum += e->frequency + 1;
1166 cost /= count_sum * 1000 / max_count + 1;
1168 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
1170 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1171 cgraph_node_name (node), cost, inline_summary (node)->self_size,
1176 /* Walk indirect calls of NODE and if any polymorphic can be turned into a
1177 direct one now, do so. */
1180 ipcp_process_devirtualization_opportunities (struct cgraph_node *node)
1182 struct ipa_node_params *info = IPA_NODE_REF (node);
1183 struct cgraph_edge *ie, *next_ie;
1185 for (ie = node->indirect_calls; ie; ie = next_ie)
1188 HOST_WIDE_INT token, anc_offset;
1189 tree target, delta, otr_type;
1190 struct ipcp_lattice *lat;
1192 next_ie = ie->next_callee;
1193 if (!ie->indirect_info->polymorphic)
1195 param_index = ie->indirect_info->param_index;
1196 if (param_index == -1)
1199 lat = ipa_get_lattice (info, param_index);
1200 token = ie->indirect_info->otr_token;
1201 anc_offset = ie->indirect_info->anc_offset;
1202 otr_type = ie->indirect_info->otr_type;
1204 if (lat->type == IPA_CONST_VALUE)
1206 tree binfo = gimple_extract_devirt_binfo_from_cst (lat->constant);
1209 binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
1212 target = gimple_get_virt_method_for_binfo (token, binfo, &delta);
1218 if (ipa_param_cannot_devirtualize_p (info, param_index)
1219 || ipa_param_types_vec_empty (info, param_index))
1222 types_count = VEC_length (tree, info->params[param_index].types);
1223 for (j = 0; j < types_count; j++)
1225 tree binfo = VEC_index (tree, info->params[param_index].types, j);
1228 binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
1235 t = gimple_get_virt_method_for_binfo (token, binfo, &d);
1246 else if (target != t || !tree_int_cst_equal (delta, d))
1255 ipa_make_edge_direct_to_target (ie, target, delta);
1259 /* Return number of live constant parameters. */
1261 ipcp_const_param_count (struct cgraph_node *node)
1263 int const_param = 0;
1264 struct ipa_node_params *info = IPA_NODE_REF (node);
1265 int count = ipa_get_param_count (info);
1268 for (i = 0; i < count; i++)
1270 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
1271 if ((ipcp_lat_is_insertable (lat)
1272 /* Do not count obviously unused arguments. */
1273 && ipa_is_param_used (info, i))
1274 || (!ipa_param_cannot_devirtualize_p (info, i)
1275 && !ipa_param_types_vec_empty (info, i)))
1281 /* Given that a formal parameter of NODE given by INDEX is known to be constant
1282 CST, try to find any indirect edges that can be made direct and make them
1283 so. Note that INDEX is the number the parameter at the time of analyzing
1284 parameter uses and parameter removals should not be considered for it. (In
1285 fact, the parameter itself has just been removed.) */
1288 ipcp_discover_new_direct_edges (struct cgraph_node *node, int index, tree cst)
1290 struct cgraph_edge *ie, *next_ie;
1292 for (ie = node->indirect_calls; ie; ie = next_ie)
1294 struct cgraph_indirect_call_info *ici = ie->indirect_info;
1296 next_ie = ie->next_callee;
1297 if (ici->param_index != index
1298 || ici->polymorphic)
1301 ipa_make_edge_direct_to_target (ie, cst, NULL_TREE);
1306 /* Propagate the constant parameters found by ipcp_iterate_stage()
1307 to the function's code. */
1309 ipcp_insert_stage (void)
1311 struct cgraph_node *node, *node1 = NULL;
1313 VEC (cgraph_edge_p, heap) * redirect_callers;
1314 VEC (ipa_replace_map_p,gc)* replace_trees;
1317 struct ipa_replace_map *replace_param;
1319 long overall_size = 0, new_size = 0;
1322 ipa_check_create_node_params ();
1323 ipa_check_create_edge_args ();
1325 fprintf (dump_file, "\nIPA insert stage:\n\n");
1327 dead_nodes = BITMAP_ALLOC (NULL);
1329 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1331 if (node->count > max_count)
1332 max_count = node->count;
1333 overall_size += inline_summary (node)->self_size;
1336 max_new_size = overall_size;
1337 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1338 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1339 max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
1341 /* First collect all functions we proved to have constant arguments to
1343 heap = fibheap_new ();
1344 for (node = cgraph_nodes; node; node = node->next)
1346 struct ipa_node_params *info;
1347 /* Propagation of the constant is forbidden in certain conditions. */
1348 if (!node->analyzed || !ipcp_node_modifiable_p (node))
1350 info = IPA_NODE_REF (node);
1351 if (ipa_is_called_with_var_arguments (info))
1353 if (ipcp_const_param_count (node))
1354 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node),
1358 /* Now clone in priority order until code size growth limits are met or
1360 while (!fibheap_empty (heap))
1362 struct ipa_node_params *info;
1364 bitmap args_to_skip;
1365 struct cgraph_edge *cs;
1367 node = (struct cgraph_node *)fibheap_extract_min (heap);
1370 fprintf (dump_file, "considering function %s\n",
1371 cgraph_node_name (node));
1373 growth = ipcp_estimate_growth (node);
1375 if (new_size + growth > max_new_size)
1378 && cgraph_optimize_for_size_p (node))
1381 fprintf (dump_file, "Not versioning, cold code would grow");
1385 info = IPA_NODE_REF (node);
1386 count = ipa_get_param_count (info);
1388 replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
1390 if (node->local.can_change_signature)
1391 args_to_skip = BITMAP_GGC_ALLOC ();
1393 args_to_skip = NULL;
1394 for (i = 0; i < count; i++)
1396 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
1397 parm_tree = ipa_get_param (info, i);
1399 /* We can proactively remove obviously unused arguments. */
1400 if (!ipa_is_param_used (info, i))
1403 bitmap_set_bit (args_to_skip, i);
1407 if (lat->type == IPA_CONST_VALUE)
1410 ipcp_create_replace_map (parm_tree, lat);
1411 if (replace_param == NULL)
1413 VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
1415 bitmap_set_bit (args_to_skip, i);
1421 fprintf (dump_file, "Not versioning, some parameters couldn't be replaced");
1427 /* Look if original function becomes dead after cloning. */
1428 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1429 if (cs->caller == node || ipcp_need_redirect_p (cs))
1431 if (!cs && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
1432 bitmap_set_bit (dead_nodes, node->uid);
1434 redirect_callers = collect_callers_of_node (node);
1436 /* Redirecting all the callers of the node to the
1437 new versioned node. */
1439 cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
1440 args_to_skip, "constprop");
1441 args_to_skip = NULL;
1442 VEC_free (cgraph_edge_p, heap, redirect_callers);
1443 replace_trees = NULL;
1447 ipcp_process_devirtualization_opportunities (node1);
1450 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
1451 cgraph_node_name (node), (int)growth, (int)new_size);
1452 ipcp_init_cloned_node (node, node1);
1454 info = IPA_NODE_REF (node);
1455 for (i = 0; i < count; i++)
1457 struct ipcp_lattice *lat = ipa_get_lattice (info, i);
1458 if (lat->type == IPA_CONST_VALUE)
1459 ipcp_discover_new_direct_edges (node1, i, lat->constant);
1463 dump_function_to_file (node1->decl, dump_file, dump_flags);
1465 for (cs = node->callees; cs; cs = cs->next_callee)
1467 struct cgraph_node *callee = cgraph_function_or_thunk_node (cs->callee, NULL);
1470 fibheap_delete_node (heap, (fibnode_t) callee->aux);
1471 callee->aux = fibheap_insert (heap,
1472 ipcp_estimate_cloning_cost (callee),
1478 while (!fibheap_empty (heap))
1481 fprintf (dump_file, "skipping function %s\n",
1482 cgraph_node_name (node));
1483 node = (struct cgraph_node *) fibheap_extract_min (heap);
1486 fibheap_delete (heap);
1487 BITMAP_FREE (dead_nodes);
1488 ipcp_update_callgraph ();
1489 ipcp_update_profiling ();
1492 /* The IPCP driver. */
1496 cgraph_remove_unreachable_nodes (true,dump_file);
1499 fprintf (dump_file, "\nIPA structures before propagation:\n");
1500 if (dump_flags & TDF_DETAILS)
1501 ipa_print_all_params (dump_file);
1502 ipa_print_all_jump_functions (dump_file);
1504 ipa_check_create_node_params ();
1505 ipa_check_create_edge_args ();
1506 /* 2. Do the interprocedural propagation. */
1507 ipcp_iterate_stage ();
1508 /* 3. Insert the constants found to the functions. */
1509 ipcp_insert_stage ();
1510 if (dump_file && (dump_flags & TDF_DETAILS))
1512 fprintf (dump_file, "\nProfiling info after insert stage:\n");
1513 ipcp_print_profile_data (dump_file);
1515 /* Free all IPCP structures. */
1516 ipa_free_all_structures_after_ipa_cp ();
1518 fprintf (dump_file, "\nIPA constant propagation end\n");
1522 /* Initialization and computation of IPCP data structures. This is the initial
1523 intraprocedural analysis of functions, which gathers information to be
1524 propagated later on. */
1527 ipcp_generate_summary (void)
1529 struct cgraph_node *node;
1532 fprintf (dump_file, "\nIPA constant propagation start:\n");
1533 ipa_register_cgraph_hooks ();
1535 /* FIXME: We could propagate through thunks happily and we could be
1536 even able to clone them, if needed. Do that later. */
1537 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1539 /* Unreachable nodes should have been eliminated before ipcp. */
1540 gcc_assert (node->needed || node->reachable);
1542 inline_summary (node)->versionable = tree_versionable_function_p (node->decl);
1543 ipa_analyze_node (node);
1547 /* Write ipcp summary for nodes in SET. */
1549 ipcp_write_summary (cgraph_node_set set,
1550 varpool_node_set vset ATTRIBUTE_UNUSED)
1552 ipa_prop_write_jump_functions (set);
1555 /* Read ipcp summary. */
1557 ipcp_read_summary (void)
1559 ipa_prop_read_jump_functions ();
1562 /* Gate for IPCP optimization. */
1564 cgraph_gate_cp (void)
1566 /* FIXME: We should remove the optimize check after we ensure we never run
1567 IPA passes when not optimizing. */
1568 return flag_ipa_cp && optimize;
1571 struct ipa_opt_pass_d pass_ipa_cp =
1576 cgraph_gate_cp, /* gate */
1577 ipcp_driver, /* execute */
1580 0, /* static_pass_number */
1581 TV_IPA_CONSTANT_PROP, /* tv_id */
1582 0, /* properties_required */
1583 0, /* properties_provided */
1584 0, /* properties_destroyed */
1585 0, /* todo_flags_start */
1587 TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1589 ipcp_generate_summary, /* generate_summary */
1590 ipcp_write_summary, /* write_summary */
1591 ipcp_read_summary, /* read_summary */
1592 NULL, /* write_optimization_summary */
1593 NULL, /* read_optimization_summary */
1594 NULL, /* stmt_fixup */
1596 NULL, /* function_transform */
1597 NULL, /* variable_transform */