X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fipa-cp.c;h=45cb00b21694514a0d041bf632415e99a71915ec;hb=f7dba16a88a2911be8b6e6c09412c14d4a3dd317;hp=8f9937320a1de4a2ed3af1a50d0262a6dcf3ca21;hpb=16ac1565a5e2535909827ed5a0ce26a9fd6944cd;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 8f9937320a1..45cb00b2169 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -1,7 +1,9 @@ /* Interprocedural constant propagation - Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. - Contributed by Razya Ladelsky + + Contributed by Razya Ladelsky and Martin Jambor + This file is part of GCC. @@ -19,111 +21,92 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ -/* Interprocedural constant propagation. The aim of interprocedural constant - propagation (IPCP) is to find which function's argument has the same - constant value in each invocation throughout the whole program. For example, - consider the following program: - - int g (int y) - { - printf ("value is %d",y); - } +/* Interprocedural constant propagation (IPA-CP). - int f (int x) - { - g (x); - } + The goal of this transformation is to - int h (int y) - { - g (y); - } + 1) discover functions which are always invoked with some arguments with the + same known constant values and modify the functions so that the + subsequent optimizations can take advantage of the knowledge, and - void main (void) - { - f (3); - h (3); - } + 2) partial specialization - create specialized versions of functions + transformed in this way if some parameters are known constants only in + certain contexts but the estimated tradeoff between speedup and cost size + is deemed good. + The algorithm also propagates types and attempts to perform type based + devirtualization. Types are propagated much like constants. - The IPCP algorithm will find that g's formal argument y is always called - with the value 3. + The algorithm basically consists of three stages. In the first, functions + are analyzed one at a time and jump functions are constructed for all known + call-sites. In the second phase, the pass propagates information from the + jump functions across the call to reveal what values are available at what + call sites, performs estimations of effects of known values on functions and + their callees, and finally decides what specialized extra versions should be + created. In the third, the special versions materialize and appropriate + calls are redirected. - The algorithm used is based on "Interprocedural Constant Propagation", by - Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg - 152-161 + The algorithm used is to a certain extent based on "Interprocedural Constant + Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, + Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D + Cooper, Mary W. Hall, and Ken Kennedy. - The optimization is divided into three stages: First stage - intraprocedural analysis ======================================= + This phase computes jump_function and modification flags. - A jump function for a callsite represents the values passed as an actual - arguments of a given callsite. There are three types of values: - Pass through - the caller's formal parameter is passed as an actual argument. + A jump function for a call-site represents the values passed as an actual + arguments of a given call-site. In principle, there are three types of + values: + + Pass through - the caller's formal parameter is passed as an actual + argument, plus an operation on it can be performed. Constant - a constant is passed as an actual argument. Unknown - neither of the above. - The jump function info, ipa_jump_func, is stored in ipa_edge_args - structure (defined in ipa_prop.h and pointed to by cgraph_node->aux) - modified_flags are defined in ipa_node_params structure - (defined in ipa_prop.h and pointed to by cgraph_edge->aux). + All jump function types are described in detail in ipa-prop.h, together with + the data structures that represent them and methods of accessing them. - -ipcp_init_stage() is the first stage driver. + ipcp_generate_summary() is the main function of the first stage. Second stage - interprocedural analysis ======================================== - This phase does the interprocedural constant propagation. - It computes lattices for all formal parameters in the program - and their value that may be: - TOP - unknown. - BOTTOM - non constant. - CONSTANT - constant value. - Lattice describing a formal parameter p will have a constant value if all - callsites invoking this function have the same constant value passed to p. + This stage is itself divided into two phases. In the first, we propagate + known values over the call graph, in the second, we make cloning decisions. + It uses a different algorithm than the original Callahan's paper. - The lattices are stored in ipcp_lattice which is itself in ipa_node_params - structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). + First, we traverse the functions topologically from callers to callees and, + for each strongly connected component (SCC), we propagate constants + according to previously computed jump functions. We also record what known + values depend on other known values and estimate local effects. Finally, we + propagate cumulative information about these effects from dependant values + to those on which they depend. - -ipcp_iterate_stage() is the second stage driver. + Second, we again traverse the call graph in the same topological order and + make clones for functions which we know are called with the same values in + all contexts and decide about extra specialized clones of functions just for + some contexts - these decisions are based on both local estimates and + cumulative estimates propagated from callees. - Third phase - transformation of function code + ipcp_propagate_stage() and ipcp_decision_stage() together constitute the + third stage. + + Third phase - materialization of clones, call statement updates. ============================================ - Propagates the constant-valued formals into the function. - For each function whose parameters are constants, we create its clone. - - Then we process the clone in two ways: - 1. We insert an assignment statement 'parameter = const' at the beginning - of the cloned function. - 2. For read-only parameters that do not live in memory, we replace all their - uses with the constant. - - We also need to modify some callsites to call the cloned functions instead - of the original ones. For a callsite passing an argument found to be a - constant by IPCP, there are two different cases to handle: - 1. A constant is passed as an argument. In this case the callsite in the - should be redirected to call the cloned callee. - 2. A parameter (of the caller) passed as an argument (pass through - argument). In such cases both the caller and the callee have clones and - only the callsite in the cloned caller is redirected to call to the - cloned callee. - - This update is done in two steps: First all cloned functions are created - during a traversal of the call graph, during which all callsites are - redirected to call the cloned function. Then the callsites are traversed - and many calls redirected back to fit the description above. - - -ipcp_insert_stage() is the third phase driver. - -*/ + + This stage is currently performed by call graph code (mainly in cgraphunit.c + and tree-inline.c) according to instructions inserted to the call graph by + the second stage. */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tree.h" #include "target.h" +#include "gimple.h" #include "cgraph.h" #include "ipa-prop.h" #include "tree-flow.h" @@ -136,381 +119,372 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "fibheap.h" #include "params.h" +#include "ipa-inline.h" +#include "ipa-utils.h" -/* Number of functions identified as candidates for cloning. When not cloning - we can simplify iterate stage not forcing it to go through the decision - on what is profitable and what not. */ -static int n_cloning_candidates; +struct ipcp_value; -/* Maximal count found in program. */ -static gcov_type max_count; +/* Describes a particular source for an IPA-CP value. */ -/* Cgraph nodes that has been completely replaced by cloning during iterate - * stage and will be removed after ipcp is finished. */ -static bitmap dead_nodes; +struct ipcp_value_source +{ + /* The incoming edge that brought the value. */ + struct cgraph_edge *cs; + /* If the jump function that resulted into his value was a pass-through or an + ancestor, this is the ipcp_value of the caller from which the described + value has been derived. Otherwise it is NULL. */ + struct ipcp_value *val; + /* Next pointer in a linked list of sources of a value. */ + struct ipcp_value_source *next; + /* If the jump function that resulted into his value was a pass-through or an + ancestor, this is the index of the parameter of the caller the jump + function references. */ + int index; +}; -static void ipcp_print_profile_data (FILE *); -static void ipcp_function_scale_print (FILE *); +/* Describes one particular value stored in struct ipcp_lattice. */ -/* Get the original node field of ipa_node_params associated with node NODE. */ -static inline struct cgraph_node * -ipcp_get_orig_node (struct cgraph_node *node) +struct ipcp_value { - return IPA_NODE_REF (node)->ipcp_orig_node; -} + /* The actual value for the given parameter. This is either an IPA invariant + or a TREE_BINFO describing a type that can be used for + devirtualization. */ + tree value; + /* The list of sources from which this value originates. */ + struct ipcp_value_source *sources; + /* Next pointers in a linked list of all values in a lattice. */ + struct ipcp_value *next; + /* Next pointers in a linked list of values in a strongly connected component + of values. */ + struct ipcp_value *scc_next; + /* Next pointers in a linked list of SCCs of values sorted topologically + according their sources. */ + struct ipcp_value *topo_next; + /* A specialized node created for this value, NULL if none has been (so far) + created. */ + struct cgraph_node *spec_node; + /* Depth first search number and low link for topological sorting of + values. */ + int dfs, low_link; + /* Time benefit and size cost that specializing the function for this value + would bring about in this function alone. */ + int local_time_benefit, local_size_cost; + /* Time benefit and size cost that specializing the function for this value + can bring about in it's callees (transitively). */ + int prop_time_benefit, prop_size_cost; + /* True if this valye is currently on the topo-sort stack. */ + bool on_stack; +}; -/* Return true if NODE describes a cloned/versioned function. */ -static inline bool -ipcp_node_is_clone (struct cgraph_node *node) -{ - return (ipcp_get_orig_node (node) != NULL); -} +/* Allocation pools for values and their sources in ipa-cp. */ -/* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE - as the ipcp_orig_node field in ipa_node_params. */ -static void -ipcp_init_cloned_node (struct cgraph_node *orig_node, - struct cgraph_node *new_node) -{ - gcc_checking_assert (ipa_node_params_vector - && (VEC_length (ipa_node_params_t, - ipa_node_params_vector) - > (unsigned) cgraph_max_uid)); - gcc_checking_assert (IPA_NODE_REF (new_node)->params); - IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node; -} +alloc_pool ipcp_values_pool; +alloc_pool ipcp_sources_pool; -/* Return scale for NODE. */ -static inline gcov_type -ipcp_get_node_scale (struct cgraph_node *node) -{ - return IPA_NODE_REF (node)->count_scale; -} +/* Lattice describing potential values of a formal parameter of a function and + some of their other properties. TOP is represented by a lattice with zero + values and with contains_variable and bottom flags cleared. BOTTOM is + represented by a lattice with the bottom flag set. In that case, values and + contains_variable flag should be disregarded. */ -/* Set COUNT as scale for NODE. */ -static inline void -ipcp_set_node_scale (struct cgraph_node *node, gcov_type count) +struct ipcp_lattice { - IPA_NODE_REF (node)->count_scale = count; -} + /* The list of known values and types in this lattice. Note that values are + not deallocated if a lattice is set to bottom because there may be value + sources referencing them. */ + struct ipcp_value *values; + /* Number of known values and types in this lattice. */ + int values_count; + /* The lattice contains a variable component (in addition to values). */ + bool contains_variable; + /* The value of the lattice is bottom (i.e. variable and unusable for any + propagation). */ + bool bottom; + /* There is a virtual call based on this parameter. */ + bool virt_call; +}; -/* Return whether LAT is a constant lattice. */ -static inline bool -ipcp_lat_is_const (struct ipcp_lattice *lat) -{ - if (lat->type == IPA_CONST_VALUE) - return true; - else - return false; -} +/* Maximal count found in program. */ -/* Return whether LAT is a constant lattice that ipa-cp can actually insert - into the code (i.e. constants excluding member pointers and pointers). */ -static inline bool -ipcp_lat_is_insertable (struct ipcp_lattice *lat) +static gcov_type max_count; + +/* Original overall size of the program. */ + +static long overall_size, max_new_size; + +/* Head of the linked list of topologically sorted values. */ + +static struct ipcp_value *values_topo; + +/* Return the lattice corresponding to the Ith formal parameter of the function + described by INFO. */ +static inline struct ipcp_lattice * +ipa_get_lattice (struct ipa_node_params *info, int i) { - return lat->type == IPA_CONST_VALUE; + gcc_assert (i >= 0 && i < ipa_get_param_count (info)); + gcc_checking_assert (!info->ipcp_orig_node); + gcc_checking_assert (info->lattices); + return &(info->lattices[i]); } -/* Return true if LAT1 and LAT2 are equal. */ +/* Return whether LAT is a lattice with a single constant and without an + undefined value. */ + static inline bool -ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2) +ipa_lat_is_single_const (struct ipcp_lattice *lat) { - gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2)); - if (lat1->type != lat2->type) + if (lat->bottom + || lat->contains_variable + || lat->values_count != 1) return false; - - if (TREE_CODE (lat1->constant) == ADDR_EXPR - && TREE_CODE (lat2->constant) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL - && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL) - return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)), - DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0); else - return operand_equal_p (lat1->constant, lat2->constant, 0); + return true; } -/* Compute Meet arithmetics: - Meet (IPA_BOTTOM, x) = IPA_BOTTOM - Meet (IPA_TOP,x) = x - Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b. - MEET (const_a,const_b) = const_a, if const_a == const_b.*/ -static void -ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, - struct ipcp_lattice *lat2) -{ - if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM) - { - res->type = IPA_BOTTOM; - return; - } - if (lat1->type == IPA_TOP) - { - res->type = lat2->type; - res->constant = lat2->constant; - return; - } - if (lat2->type == IPA_TOP) - { - res->type = lat1->type; - res->constant = lat1->constant; - return; - } - if (!ipcp_lats_are_equal (lat1, lat2)) - { - res->type = IPA_BOTTOM; - return; - } - res->type = lat1->type; - res->constant = lat1->constant; -} +/* Return true iff the CS is an edge within a strongly connected component as + computed by ipa_reduced_postorder. */ -/* Return the lattice corresponding to the Ith formal parameter of the function - described by INFO. */ -static inline struct ipcp_lattice * -ipcp_get_lattice (struct ipa_node_params *info, int i) +static inline bool +edge_within_scc (struct cgraph_edge *cs) { - return &(info->params[i].ipcp_lattice); + struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux; + struct ipa_dfs_info *callee_dfs; + struct cgraph_node *callee = cgraph_function_node (cs->callee, NULL); + + callee_dfs = (struct ipa_dfs_info *) callee->aux; + return (caller_dfs + && callee_dfs + && caller_dfs->scc_no == callee_dfs->scc_no); } -/* Given the jump function JFUNC, compute the lattice LAT that describes the - value coming down the callsite. INFO describes the caller node so that - pass-through jump functions can be evaluated. */ +/* Print V which is extracted from a value in a lattice to F. */ + static void -ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, - struct ipa_jump_func *jfunc) +print_ipcp_constant_value (FILE * f, tree v) { - if (jfunc->type == IPA_JF_CONST) - { - lat->type = IPA_CONST_VALUE; - lat->constant = jfunc->value.constant; - } - else if (jfunc->type == IPA_JF_PASS_THROUGH) + if (TREE_CODE (v) == TREE_BINFO) { - struct ipcp_lattice *caller_lat; - tree cst; - - caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id); - lat->type = caller_lat->type; - if (caller_lat->type != IPA_CONST_VALUE) - return; - cst = caller_lat->constant; - - if (jfunc->value.pass_through.operation != NOP_EXPR) - { - tree restype; - if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) - == tcc_comparison) - restype = boolean_type_node; - else - restype = TREE_TYPE (cst); - cst = fold_binary (jfunc->value.pass_through.operation, - restype, cst, jfunc->value.pass_through.operand); - } - if (!cst || !is_gimple_ip_invariant (cst)) - lat->type = IPA_BOTTOM; - lat->constant = cst; + fprintf (f, "BINFO "); + print_generic_expr (f, BINFO_TYPE (v), 0); } - else if (jfunc->type == IPA_JF_ANCESTOR) + else if (TREE_CODE (v) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL) { - struct ipcp_lattice *caller_lat; - tree t; - bool ok; - - caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id); - lat->type = caller_lat->type; - if (caller_lat->type != IPA_CONST_VALUE) - return; - if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) - { - /* This can happen when the constant is a NULL pointer. */ - lat->type = IPA_BOTTOM; - return; - } - t = TREE_OPERAND (caller_lat->constant, 0); - ok = build_ref_for_offset (&t, TREE_TYPE (t), - jfunc->value.ancestor.offset, - jfunc->value.ancestor.type, false); - if (!ok) - { - lat->type = IPA_BOTTOM; - lat->constant = NULL_TREE; - } - else - lat->constant = build_fold_addr_expr (t); + fprintf (f, "& "); + print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)), 0); } else - lat->type = IPA_BOTTOM; -} - -/* True when OLD_LAT and NEW_LAT values are not the same. */ - -static bool -ipcp_lattice_changed (struct ipcp_lattice *old_lat, - struct ipcp_lattice *new_lat) -{ - if (old_lat->type == new_lat->type) - { - if (!ipcp_lat_is_const (old_lat)) - return false; - if (ipcp_lats_are_equal (old_lat, new_lat)) - return false; - } - return true; + print_generic_expr (f, v, 0); } /* Print all ipcp_lattices of all functions to F. */ + static void -ipcp_print_all_lattices (FILE * f) +print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits) { struct cgraph_node *node; int i, count; - fprintf (f, "\nLattice:\n"); - for (node = cgraph_nodes; node; node = node->next) + fprintf (f, "\nLattices:\n"); + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) { struct ipa_node_params *info; - if (!node->analyzed) - continue; info = IPA_NODE_REF (node); - fprintf (f, " Node: %s:\n", cgraph_node_name (node)); + fprintf (f, " Node: %s/%i:\n", cgraph_node_name (node), node->uid); count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + struct ipcp_value *val; + bool prev = false; fprintf (f, " param [%d]: ", i); - if (lat->type == IPA_CONST_VALUE) + if (lat->bottom) + { + fprintf (f, "BOTTOM\n"); + continue; + } + + if (!lat->values_count && !lat->contains_variable) + { + fprintf (f, "TOP\n"); + continue; + } + + if (lat->contains_variable) + { + fprintf (f, "VARIABLE"); + prev = true; + if (dump_benefits) + fprintf (f, "\n"); + } + + for (val = lat->values; val; val = val->next) { - tree cst = lat->constant; - fprintf (f, "type is CONST "); - print_generic_expr (f, cst, 0); - if (TREE_CODE (cst) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL) + if (dump_benefits && prev) + fprintf (f, " "); + else if (!dump_benefits && prev) + fprintf (f, ", "); + else + prev = true; + + print_ipcp_constant_value (f, val->value); + + if (dump_sources) { - fprintf (f, " -> "); - print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)), - 0); + struct ipcp_value_source *s; + + fprintf (f, " [from:"); + for (s = val->sources; s; s = s->next) + fprintf (f, " %i(%i)", s->cs->caller->uid,s->cs->frequency); + fprintf (f, "]"); } - fprintf (f, "\n"); + + if (dump_benefits) + fprintf (f, " [loc_time: %i, loc_size: %i, " + "prop_time: %i, prop_size: %i]\n", + val->local_time_benefit, val->local_size_cost, + val->prop_time_benefit, val->prop_size_cost); } - else if (lat->type == IPA_TOP) - fprintf (f, "type is TOP\n"); - else - fprintf (f, "type is BOTTOM\n"); + if (!dump_benefits) + fprintf (f, "\n"); } } } -/* Return true if ipcp algorithms would allow cloning NODE. */ +/* Determine whether it is at all technically possible to create clones of NODE + and store this information in the ipa_node_params structure associated + with NODE. */ + +static void +determine_versionability (struct cgraph_node *node) +{ + const char *reason = NULL; + + /* There are a number of generic reasons functions cannot be versioned. We + also cannot remove parameters if there are type attributes such as fnspec + present. */ + if (node->alias || node->thunk.thunk_p) + reason = "alias or thunk"; + else if (!node->local.versionable) + reason = "not a tree_versionable_function"; + else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) + reason = "insufficient body availability"; + + if (reason && dump_file && !node->alias && !node->thunk.thunk_p) + fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n", + cgraph_node_name (node), node->uid, reason); + + node->local.versionable = (reason == NULL); +} + +/* Return true if it is at all technically possible to create clones of a + NODE. */ static bool ipcp_versionable_function_p (struct cgraph_node *node) { - struct cgraph_edge *edge; + return node->local.versionable; +} - /* There are a number of generic reasons functions cannot be versioned. */ - if (!node->local.versionable) - return false; +/* Structure holding accumulated information about callers of a node. */ - /* Removing arguments doesn't work if the function takes varargs - or use __builtin_apply_args. */ - for (edge = node->callees; edge; edge = edge->next_callee) - { - tree t = edge->callee->decl; - if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL - && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS - || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START)) - return false; - } +struct caller_statistics +{ + gcov_type count_sum; + int n_calls, n_hot_calls, freq_sum; +}; + +/* Initialize fields of STAT to zeroes. */ + +static inline void +init_caller_stats (struct caller_statistics *stats) +{ + stats->count_sum = 0; + stats->n_calls = 0; + stats->n_hot_calls = 0; + stats->freq_sum = 0; +} + +/* Worker callback of cgraph_for_node_and_aliases accumulating statistics of + non-thunk incoming edges to NODE. */ + +static bool +gather_caller_stats (struct cgraph_node *node, void *data) +{ + struct caller_statistics *stats = (struct caller_statistics *) data; + struct cgraph_edge *cs; + + for (cs = node->callers; cs; cs = cs->next_caller) + if (cs->caller->thunk.thunk_p) + cgraph_for_node_and_aliases (cs->caller, gather_caller_stats, + stats, false); + else + { + stats->count_sum += cs->count; + stats->freq_sum += cs->frequency; + stats->n_calls++; + if (cgraph_maybe_hot_edge_p (cs)) + stats->n_hot_calls ++; + } + return false; - return true; } /* Return true if this NODE is viable candidate for cloning. */ + static bool ipcp_cloning_candidate_p (struct cgraph_node *node) { - int n_calls = 0; - int n_hot_calls = 0; - gcov_type direct_call_sum = 0; - struct cgraph_edge *e; + struct caller_statistics stats; - /* We never clone functions that are not visible from outside. - FIXME: in future we should clone such functions when they are called with - different constants, but current ipcp implementation is not good on this. - */ - if (cgraph_only_called_directly_p (node) || !node->analyzed) - return false; + gcc_checking_assert (cgraph_function_with_gimple_body_p (node)); - if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) - { - if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n", - cgraph_node_name (node)); - return false; - } - if (!ipcp_versionable_function_p (node)) + if (!flag_ipa_cp_clone) { if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n", + fprintf (dump_file, "Not considering %s for cloning; " + "-fipa-cp-clone disabled.\n", cgraph_node_name (node)); return false; } - for (e = node->callers; e; e = e->next_caller) - { - direct_call_sum += e->count; - n_calls ++; - if (cgraph_maybe_hot_edge_p (e)) - n_hot_calls ++; - } - if (!n_calls) + if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) { if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n", + fprintf (dump_file, "Not considering %s for cloning; " + "optimizing it for size.\n", cgraph_node_name (node)); return false; } - if (node->local.inline_summary.self_size < n_calls) - { - if (dump_file) - fprintf (dump_file, "Considering %s for cloning; code would shrink.\n", - cgraph_node_name (node)); - return true; - } - if (!flag_ipa_cp_clone) - { - if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n", - cgraph_node_name (node)); - return false; - } + init_caller_stats (&stats); + cgraph_for_node_and_aliases (node, gather_caller_stats, &stats, false); - if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) + if (inline_summary (node)->self_size < stats.n_calls) { if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n", + fprintf (dump_file, "Considering %s for cloning; code might shrink.\n", cgraph_node_name (node)); - return false; + return true; } /* When profile is available and function is hot, propagate into it even if calls seems cold; constant propagation can improve function's speed - significandly. */ + significantly. */ if (max_count) { - if (direct_call_sum > node->count * 90 / 100) + if (stats.count_sum > node->count * 90 / 100) { if (dump_file) - fprintf (dump_file, "Considering %s for cloning; usually called directly.\n", + fprintf (dump_file, "Considering %s for cloning; " + "usually called directly.\n", cgraph_node_name (node)); return true; } } - if (!n_hot_calls) + if (!stats.n_hot_calls) { if (dump_file) fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", @@ -523,704 +497,1947 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) return true; } -/* Initialize ipcp_lattices array. The lattices corresponding to supported - types (integers, real types and Fortran constants defined as const_decls) - are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ +/* Arrays representing a topological ordering of call graph nodes and a stack + of noes used during constant propagation. */ + +struct topo_info +{ + struct cgraph_node **order; + struct cgraph_node **stack; + int nnodes, stack_top; +}; + +/* Allocate the arrays in TOPO and topologically sort the nodes into order. */ + static void -ipcp_initialize_node_lattices (struct cgraph_node *node) +build_toporder_info (struct topo_info *topo) { - int i; - struct ipa_node_params *info = IPA_NODE_REF (node); - enum ipa_lattice_type type; - - if (ipa_is_called_with_var_arguments (info)) - type = IPA_BOTTOM; - else if (cgraph_only_called_directly_p (node)) - type = IPA_TOP; - /* When cloning is allowed, we can assume that externally visible functions - are not called. We will compensate this by cloning later. */ - else if (ipcp_cloning_candidate_p (node)) - type = IPA_TOP, n_cloning_candidates ++; - else - type = IPA_BOTTOM; + topo->order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); + topo->stack = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); + topo->stack_top = 0; + topo->nnodes = ipa_reduced_postorder (topo->order, true, true, NULL); +} + +/* Free information about strongly connected components and the arrays in + TOPO. */ - for (i = 0; i < ipa_get_param_count (info) ; i++) - ipcp_get_lattice (info, i)->type = type; +static void +free_toporder_info (struct topo_info *topo) +{ + ipa_free_postorder_info (); + free (topo->order); + free (topo->stack); } -/* build INTEGER_CST tree with type TREE_TYPE and value according to LAT. - Return the tree. */ -static tree -build_const_val (struct ipcp_lattice *lat, tree tree_type) +/* Add NODE to the stack in TOPO, unless it is already there. */ + +static inline void +push_node_to_stack (struct topo_info *topo, struct cgraph_node *node) { - tree val; + struct ipa_node_params *info = IPA_NODE_REF (node); + if (info->node_enqueued) + return; + info->node_enqueued = 1; + topo->stack[topo->stack_top++] = node; +} - gcc_assert (ipcp_lat_is_const (lat)); - val = lat->constant; +/* Pop a node from the stack in TOPO and return it or return NULL if the stack + is empty. */ - if (!useless_type_conversion_p (tree_type, TREE_TYPE (val))) +static struct cgraph_node * +pop_node_from_stack (struct topo_info *topo) +{ + if (topo->stack_top) { - if (fold_convertible_p (tree_type, val)) - return fold_build1 (NOP_EXPR, tree_type, val); - else - return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val); + struct cgraph_node *node; + topo->stack_top--; + node = topo->stack[topo->stack_top]; + IPA_NODE_REF (node)->node_enqueued = 0; + return node; } - return val; + else + return NULL; } -/* Compute the proper scale for NODE. It is the ratio between the number of - direct calls (represented on the incoming cgraph_edges) and sum of all - invocations of NODE (represented as count in cgraph_node). +/* Set lattice LAT to bottom and return true if it previously was not set as + such. */ - FIXME: This code is wrong. Since the callers can be also clones and - the clones are not scaled yet, the sums gets unrealistically high. - To properly compute the counts, we would need to do propagation across - callgraph (as external call to A might imply call to non-clonned B - if A's clone calls clonned B). */ -static void -ipcp_compute_node_scale (struct cgraph_node *node) +static inline bool +set_lattice_to_bottom (struct ipcp_lattice *lat) { - gcov_type sum; - struct cgraph_edge *cs; + bool ret = !lat->bottom; + lat->bottom = true; + return ret; +} - sum = 0; - /* Compute sum of all counts of callers. */ - for (cs = node->callers; cs != NULL; cs = cs->next_caller) - sum += cs->count; - /* Work around the unrealistically high sum problem. We just don't want - the non-cloned body to have negative or very low frequency. Since - majority of execution time will be spent in clones anyway, this should - give good enough profile. */ - if (sum > node->count * 9 / 10) - sum = node->count * 9 / 10; - if (node->count == 0) - ipcp_set_node_scale (node, 0); - else - ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count); +/* Mark lattice as containing an unknown value and return true if it previously + was not marked as such. */ + +static inline bool +set_lattice_contains_variable (struct ipcp_lattice *lat) +{ + bool ret = !lat->contains_variable; + lat->contains_variable = true; + return ret; } -/* Initialization and computation of IPCP data structures. This is the initial - intraprocedural analysis of functions, which gathers information to be - propagated later on. */ +/* Initialize ipcp_lattices. */ static void -ipcp_init_stage (void) +initialize_node_lattices (struct cgraph_node *node) { - struct cgraph_node *node; + struct ipa_node_params *info = IPA_NODE_REF (node); + struct cgraph_edge *ie; + bool disable = false, variable = false; + int i; - for (node = cgraph_nodes; node; node = node->next) - if (node->analyzed) - { - /* Unreachable nodes should have been eliminated before ipcp. */ - gcc_assert (node->needed || node->reachable); + gcc_checking_assert (cgraph_function_with_gimple_body_p (node)); + if (!node->local.local) + { + /* When cloning is allowed, we can assume that externally visible + functions are not called. We will compensate this by cloning + later. */ + if (ipcp_versionable_function_p (node) + && ipcp_cloning_candidate_p (node)) + variable = true; + else + disable = true; + } - node->local.versionable = tree_versionable_function_p (node->decl); - ipa_analyze_node (node); + if (disable || variable) + { + for (i = 0; i < ipa_get_param_count (info) ; i++) + { + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + if (disable) + set_lattice_to_bottom (lat); + else + set_lattice_contains_variable (lat); + } + if (dump_file && (dump_flags & TDF_DETAILS) + && node->alias && node->thunk.thunk_p) + fprintf (dump_file, "Marking all lattices of %s/%i as %s\n", + cgraph_node_name (node), node->uid, + disable ? "BOTTOM" : "VARIABLE"); + } + + for (ie = node->indirect_calls; ie; ie = ie->next_callee) + if (ie->indirect_info->polymorphic) + { + gcc_checking_assert (ie->indirect_info->param_index >= 0); + ipa_get_lattice (info, ie->indirect_info->param_index)->virt_call = 1; } } -/* Return true if there are some formal parameters whose value is IPA_TOP (in - the whole compilation unit). Change their values to IPA_BOTTOM, since they - most probably get their values from outside of this compilation unit. */ -static bool -ipcp_change_tops_to_bottom (void) +/* Return the result of a (possibly arithmetic) pass through jump function + JFUNC on the constant value INPUT. Return NULL_TREE if that cannot be + determined or itself is considered an interprocedural invariant. */ + +static tree +ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input) { - int i, count; - struct cgraph_node *node; - bool prop_again; + tree restype, res; + + gcc_checking_assert (is_gimple_ip_invariant (input)); + if (jfunc->value.pass_through.operation == NOP_EXPR) + return input; + + if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) + == tcc_comparison) + restype = boolean_type_node; + else + restype = TREE_TYPE (input); + res = fold_binary (jfunc->value.pass_through.operation, restype, + input, jfunc->value.pass_through.operand); + + if (res && !is_gimple_ip_invariant (res)) + return NULL_TREE; + + return res; +} - prop_again = false; - for (node = cgraph_nodes; node; node = node->next) +/* Return the result of an ancestor jump function JFUNC on the constant value + INPUT. Return NULL_TREE if that cannot be determined. */ + +static tree +ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input) +{ + if (TREE_CODE (input) == ADDR_EXPR) { - struct ipa_node_params *info = IPA_NODE_REF (node); - count = ipa_get_param_count (info); - for (i = 0; i < count; i++) - { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); - if (lat->type == IPA_TOP) - { - prop_again = true; - if (dump_file) - { - fprintf (dump_file, "Forcing param "); - print_generic_expr (dump_file, ipa_get_param (info, i), 0); - fprintf (dump_file, " of node %s to bottom.\n", - cgraph_node_name (node)); - } - lat->type = IPA_BOTTOM; - } - } + tree t = TREE_OPERAND (input, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + jfunc->value.ancestor.offset, + jfunc->value.ancestor.type, NULL, false); + return build_fold_addr_expr (t); } - return prop_again; + else + return NULL_TREE; } -/* Interprocedural analysis. The algorithm propagates constants from the - caller's parameters to the callee's arguments. */ -static void -ipcp_propagate_stage (void) +/* Extract the acual BINFO being described by JFUNC which must be a known type + jump function. */ + +static tree +ipa_value_from_known_type_jfunc (struct ipa_jump_func *jfunc) { - int i; - struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL }; - struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL }; - struct ipcp_lattice *dest_lat; - struct cgraph_edge *cs; - struct ipa_jump_func *jump_func; - struct ipa_func_list *wl; - int count; + tree base_binfo = TYPE_BINFO (jfunc->value.known_type.base_type); + if (!base_binfo) + return NULL_TREE; + return get_binfo_at_offset (base_binfo, + jfunc->value.known_type.offset, + jfunc->value.known_type.component_type); +} - ipa_check_create_node_params (); - ipa_check_create_edge_args (); +/* Determine whether JFUNC evaluates to a known value (that is either a + constant or a binfo) and if so, return it. Otherwise return NULL. INFO + describes the caller node so that pass-through jump functions can be + evaluated. */ - /* Initialize worklist to contain all functions. */ - wl = ipa_init_func_list (); - while (wl) +static tree +ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc) +{ + if (jfunc->type == IPA_JF_CONST) + return jfunc->value.constant; + else if (jfunc->type == IPA_JF_KNOWN_TYPE) + return ipa_value_from_known_type_jfunc (jfunc); + else if (jfunc->type == IPA_JF_PASS_THROUGH + || jfunc->type == IPA_JF_ANCESTOR) { - struct cgraph_node *node = ipa_pop_func_from_list (&wl); - struct ipa_node_params *info = IPA_NODE_REF (node); + tree input; + int idx; - for (cs = node->callees; cs; cs = cs->next_callee) - { - struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee); - struct ipa_edge_args *args = IPA_EDGE_REF (cs); + if (jfunc->type == IPA_JF_PASS_THROUGH) + idx = jfunc->value.pass_through.formal_id; + else + idx = jfunc->value.ancestor.formal_id; - if (ipa_is_called_with_var_arguments (callee_info) - || !cs->callee->analyzed - || ipa_is_called_with_var_arguments (callee_info)) - continue; + if (info->ipcp_orig_node) + input = VEC_index (tree, info->known_vals, idx); + else + { + struct ipcp_lattice *lat; - count = ipa_get_cs_argument_count (args); - for (i = 0; i < count; i++) + if (!info->lattices) { - jump_func = ipa_get_ith_jump_func (args, i); - ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); - dest_lat = ipcp_get_lattice (callee_info, i); - ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); - if (ipcp_lattice_changed (&new_lat, dest_lat)) - { - dest_lat->type = new_lat.type; - dest_lat->constant = new_lat.constant; - ipa_push_func_to_list (&wl, cs->callee); - } + gcc_checking_assert (!flag_ipa_cp); + return NULL_TREE; } + lat = ipa_get_lattice (info, idx); + if (!ipa_lat_is_single_const (lat)) + return NULL_TREE; + input = lat->values->value; } - } -} -/* Call the constant propagation algorithm and re-call it if necessary - (if there are undetermined values left). */ -static void -ipcp_iterate_stage (void) + if (!input) + return NULL_TREE; + + if (jfunc->type == IPA_JF_PASS_THROUGH) + { + if (jfunc->value.pass_through.operation == NOP_EXPR) + return input; + else if (TREE_CODE (input) == TREE_BINFO) + return NULL_TREE; + else + return ipa_get_jf_pass_through_result (jfunc, input); + } + else + { + if (TREE_CODE (input) == TREE_BINFO) + return get_binfo_at_offset (input, jfunc->value.ancestor.offset, + jfunc->value.ancestor.type); + else + return ipa_get_jf_ancestor_result (jfunc, input); + } + } + else + return NULL_TREE; +} + +/* Determine whether JFUNC evaluates to a constant and if so, return it. + Otherwise return NULL. INFO describes the caller node so that pass-through + jump functions can be evaluated. */ + +tree +ipa_cst_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc) +{ + tree res = ipa_value_from_jfunc (info, jfunc); + + if (res && TREE_CODE (res) == TREE_BINFO) + return NULL_TREE; + else + return res; +} + + +/* If checking is enabled, verify that no lattice is in the TOP state, i.e. not + bottom, not containing a variable component and without any known value at + the same time. */ + +DEBUG_FUNCTION void +ipcp_verify_propagated_values (void) { struct cgraph_node *node; - n_cloning_candidates = 0; - if (dump_file) - fprintf (dump_file, "\nIPA iterate stage:\n\n"); + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + { + struct ipa_node_params *info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (info); - if (in_lto_p) - ipa_update_after_lto_read (); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + + if (!lat->bottom + && !lat->contains_variable + && lat->values_count == 0) + { + if (dump_file) + { + fprintf (dump_file, "\nIPA lattices after constant " + "propagation:\n"); + print_all_lattices (dump_file, true, false); + } + + gcc_unreachable (); + } + } + } +} + +/* Return true iff X and Y should be considered equal values by IPA-CP. */ + +static bool +values_equal_for_ipcp_p (tree x, tree y) +{ + gcc_checking_assert (x != NULL_TREE && y != NULL_TREE); + + if (x == y) + return true; + + if (TREE_CODE (x) == TREE_BINFO || TREE_CODE (y) == TREE_BINFO) + return false; + + if (TREE_CODE (x) == ADDR_EXPR + && TREE_CODE (y) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL + && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL) + return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)), + DECL_INITIAL (TREE_OPERAND (y, 0)), 0); + else + return operand_equal_p (x, y, 0); +} + +/* Add a new value source to VAL, marking that a value comes from edge CS and + (if the underlying jump function is a pass-through or an ancestor one) from + a caller value SRC_VAL of a caller parameter described by SRC_INDEX. */ + +static void +add_value_source (struct ipcp_value *val, struct cgraph_edge *cs, + struct ipcp_value *src_val, int src_idx) +{ + struct ipcp_value_source *src; + + src = (struct ipcp_value_source *) pool_alloc (ipcp_sources_pool); + src->cs = cs; + src->val = src_val; + src->index = src_idx; + + src->next = val->sources; + val->sources = src; +} + + +/* Try to add NEWVAL to LAT, potentially creating a new struct ipcp_value for + it. CS, SRC_VAL and SRC_INDEX are meant for add_value_source and have the + same meaning. */ + +static bool +add_value_to_lattice (struct ipcp_lattice *lat, tree newval, + struct cgraph_edge *cs, struct ipcp_value *src_val, + int src_idx) +{ + struct ipcp_value *val; + + if (lat->bottom) + return false; + + + for (val = lat->values; val; val = val->next) + if (values_equal_for_ipcp_p (val->value, newval)) + { + if (edge_within_scc (cs)) + { + struct ipcp_value_source *s; + for (s = val->sources; s ; s = s->next) + if (s->cs == cs) + break; + if (s) + return false; + } + + add_value_source (val, cs, src_val, src_idx); + return false; + } + + if (lat->values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) + { + /* We can only free sources, not the values themselves, because sources + of other values in this this SCC might point to them. */ + for (val = lat->values; val; val = val->next) + { + while (val->sources) + { + struct ipcp_value_source *src = val->sources; + val->sources = src->next; + pool_free (ipcp_sources_pool, src); + } + } + + lat->values = NULL; + return set_lattice_to_bottom (lat); + } + + lat->values_count++; + val = (struct ipcp_value *) pool_alloc (ipcp_values_pool); + memset (val, 0, sizeof (*val)); + + add_value_source (val, cs, src_val, src_idx); + val->value = newval; + val->next = lat->values; + lat->values = val; + return true; +} + +/* Propagate values through a pass-through jump function JFUNC associated with + edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX + is the index of the source parameter. */ + +static bool +propagate_vals_accross_pass_through (struct cgraph_edge *cs, + struct ipa_jump_func *jfunc, + struct ipcp_lattice *src_lat, + struct ipcp_lattice *dest_lat, + int src_idx) +{ + struct ipcp_value *src_val; + bool ret = false; + + if (jfunc->value.pass_through.operation == NOP_EXPR) + for (src_val = src_lat->values; src_val; src_val = src_val->next) + ret |= add_value_to_lattice (dest_lat, src_val->value, cs, + src_val, src_idx); + /* Do not create new values when propagating within an SCC because if there + arithmetic functions with circular dependencies, there is infinite number + of them and we would just make lattices bottom. */ + else if (edge_within_scc (cs)) + ret = set_lattice_contains_variable (dest_lat); + else + for (src_val = src_lat->values; src_val; src_val = src_val->next) + { + tree cstval = src_val->value; + + if (TREE_CODE (cstval) == TREE_BINFO) + { + ret |= set_lattice_contains_variable (dest_lat); + continue; + } + cstval = ipa_get_jf_pass_through_result (jfunc, cstval); + + if (cstval) + ret |= add_value_to_lattice (dest_lat, cstval, cs, src_val, src_idx); + else + ret |= set_lattice_contains_variable (dest_lat); + } + + return ret; +} + +/* Propagate values through an ancestor jump function JFUNC associated with + edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX + is the index of the source parameter. */ - for (node = cgraph_nodes; node; node = node->next) +static bool +propagate_vals_accross_ancestor (struct cgraph_edge *cs, + struct ipa_jump_func *jfunc, + struct ipcp_lattice *src_lat, + struct ipcp_lattice *dest_lat, + int src_idx) +{ + struct ipcp_value *src_val; + bool ret = false; + + if (edge_within_scc (cs)) + return set_lattice_contains_variable (dest_lat); + + for (src_val = src_lat->values; src_val; src_val = src_val->next) { - ipcp_initialize_node_lattices (node); - ipcp_compute_node_scale (node); + tree t = src_val->value; + + if (TREE_CODE (t) == TREE_BINFO) + t = get_binfo_at_offset (t, jfunc->value.ancestor.offset, + jfunc->value.ancestor.type); + else + t = ipa_get_jf_ancestor_result (jfunc, t); + + if (t) + ret |= add_value_to_lattice (dest_lat, t, cs, src_val, src_idx); + else + ret |= set_lattice_contains_variable (dest_lat); } + + return ret; +} + +/* Propagate values across jump function JFUNC that is associated with edge CS + and put the values into DEST_LAT. */ + +static bool +propagate_accross_jump_function (struct cgraph_edge *cs, + struct ipa_jump_func *jfunc, + struct ipcp_lattice *dest_lat) +{ + if (dest_lat->bottom) + return false; + + if (jfunc->type == IPA_JF_CONST + || jfunc->type == IPA_JF_KNOWN_TYPE) + { + tree val; + + if (jfunc->type == IPA_JF_KNOWN_TYPE) + { + val = ipa_value_from_known_type_jfunc (jfunc); + if (!val) + return set_lattice_contains_variable (dest_lat); + } + else + val = jfunc->value.constant; + return add_value_to_lattice (dest_lat, val, cs, NULL, 0); + } + else if (jfunc->type == IPA_JF_PASS_THROUGH + || jfunc->type == IPA_JF_ANCESTOR) + { + struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + struct ipcp_lattice *src_lat; + int src_idx; + bool ret; + + if (jfunc->type == IPA_JF_PASS_THROUGH) + src_idx = jfunc->value.pass_through.formal_id; + else + src_idx = jfunc->value.ancestor.formal_id; + + src_lat = ipa_get_lattice (caller_info, src_idx); + if (src_lat->bottom) + return set_lattice_contains_variable (dest_lat); + + /* If we would need to clone the caller and cannot, do not propagate. */ + if (!ipcp_versionable_function_p (cs->caller) + && (src_lat->contains_variable + || (src_lat->values_count > 1))) + return set_lattice_contains_variable (dest_lat); + + if (jfunc->type == IPA_JF_PASS_THROUGH) + ret = propagate_vals_accross_pass_through (cs, jfunc, src_lat, + dest_lat, src_idx); + else + ret = propagate_vals_accross_ancestor (cs, jfunc, src_lat, dest_lat, + src_idx); + + if (src_lat->contains_variable) + ret |= set_lattice_contains_variable (dest_lat); + + return ret; + } + + /* TODO: We currently do not handle member method pointers in IPA-CP (we only + use it for indirect inlining), we should propagate them too. */ + return set_lattice_contains_variable (dest_lat); +} + +/* Propagate constants from the caller to the callee of CS. INFO describes the + caller. */ + +static bool +propagate_constants_accross_call (struct cgraph_edge *cs) +{ + struct ipa_node_params *callee_info; + enum availability availability; + struct cgraph_node *callee, *alias_or_thunk; + struct ipa_edge_args *args; + bool ret = false; + int i, args_count, parms_count; + + callee = cgraph_function_node (cs->callee, &availability); + if (!callee->analyzed) + return false; + gcc_checking_assert (cgraph_function_with_gimple_body_p (callee)); + callee_info = IPA_NODE_REF (callee); + + args = IPA_EDGE_REF (cs); + args_count = ipa_get_cs_argument_count (args); + parms_count = ipa_get_param_count (callee_info); + + /* If this call goes through a thunk we must not propagate to the first (0th) + parameter. However, we might need to uncover a thunk from below a series + of aliases first. */ + alias_or_thunk = cs->callee; + while (alias_or_thunk->alias) + alias_or_thunk = cgraph_alias_aliased_node (alias_or_thunk); + if (alias_or_thunk->thunk.thunk_p) + { + ret |= set_lattice_contains_variable (ipa_get_lattice (callee_info, 0)); + i = 1; + } + else + i = 0; + + for (; (i < args_count) && (i < parms_count); i++) + { + struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); + struct ipcp_lattice *dest_lat = ipa_get_lattice (callee_info, i); + + if (availability == AVAIL_OVERWRITABLE) + ret |= set_lattice_contains_variable (dest_lat); + else + ret |= propagate_accross_jump_function (cs, jump_func, dest_lat); + } + for (; i < parms_count; i++) + ret |= set_lattice_contains_variable (ipa_get_lattice (callee_info, i)); + + return ret; +} + +/* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS + (which can contain both constants and binfos) or KNOWN_BINFOS (which can be + NULL) return the destination. */ + +static tree +get_indirect_edge_target (struct cgraph_edge *ie, + VEC (tree, heap) *known_vals, + VEC (tree, heap) *known_binfos) +{ + int param_index = ie->indirect_info->param_index; + HOST_WIDE_INT token, anc_offset; + tree otr_type; + tree t; + + if (param_index == -1) + return NULL_TREE; + + if (!ie->indirect_info->polymorphic) + { + tree t = VEC_index (tree, known_vals, param_index); + if (t && + TREE_CODE (t) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL) + return TREE_OPERAND (t, 0); + else + return NULL_TREE; + } + + token = ie->indirect_info->otr_token; + anc_offset = ie->indirect_info->anc_offset; + otr_type = ie->indirect_info->otr_type; + + t = VEC_index (tree, known_vals, param_index); + if (!t && known_binfos) + t = VEC_index (tree, known_binfos, param_index); + if (!t) + return NULL_TREE; + + if (TREE_CODE (t) != TREE_BINFO) + { + tree binfo; + binfo = gimple_extract_devirt_binfo_from_cst (t); + if (!binfo) + return NULL_TREE; + binfo = get_binfo_at_offset (binfo, anc_offset, otr_type); + if (!binfo) + return NULL_TREE; + return gimple_get_virt_method_for_binfo (token, binfo); + } + else + { + tree binfo; + + binfo = get_binfo_at_offset (t, anc_offset, otr_type); + if (!binfo) + return NULL_TREE; + return gimple_get_virt_method_for_binfo (token, binfo); + } +} + +/* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS + and KNOWN_BINFOS. */ + +static int +devirtualization_time_bonus (struct cgraph_node *node, + VEC (tree, heap) *known_csts, + VEC (tree, heap) *known_binfos) +{ + struct cgraph_edge *ie; + int res = 0; + + for (ie = node->indirect_calls; ie; ie = ie->next_callee) + { + struct cgraph_node *callee; + struct inline_summary *isummary; + tree target; + + target = get_indirect_edge_target (ie, known_csts, known_binfos); + if (!target) + continue; + + /* Only bare minimum benefit for clearly un-inlineable targets. */ + res += 1; + callee = cgraph_get_node (target); + if (!callee || !callee->analyzed) + continue; + isummary = inline_summary (callee); + if (!isummary->inlinable) + continue; + + /* FIXME: The values below need re-considering and perhaps also + integrating into the cost metrics, at lest in some very basic way. */ + if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4) + res += 31; + else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2) + res += 15; + else if (isummary->size <= MAX_INLINE_INSNS_AUTO + || DECL_DECLARED_INLINE_P (callee->decl)) + res += 7; + } + + return res; +} + +/* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT + and SIZE_COST and with the sum of frequencies of incoming edges to the + potential new clone in FREQUENCIES. */ + +static bool +good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit, + int freq_sum, gcov_type count_sum, int size_cost) +{ + if (time_benefit == 0 + || !flag_ipa_cp_clone + || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) + return false; + + gcc_checking_assert (size_cost >= 0); + + /* FIXME: These decisions need tuning. */ + if (max_count) + { + int evaluation, factor = (count_sum * 1000) / max_count; + + evaluation = (time_benefit * factor) / size_cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " + "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC + ") -> evaluation: %i, threshold: %i\n", + time_benefit, size_cost, (HOST_WIDE_INT) count_sum, + evaluation, 500); + + return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD); + } + else + { + int evaluation = (time_benefit * freq_sum) / size_cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " + "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n", + time_benefit, size_cost, freq_sum, evaluation, + CGRAPH_FREQ_BASE /2); + + return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD); + } +} + + +/* Allocate KNOWN_CSTS and KNOWN_BINFOS and populate them with values of + parameters that are known independent of the context. INFO describes the + function. If REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all + removable parameters will be stored in it. */ + +static bool +gather_context_independent_values (struct ipa_node_params *info, + VEC (tree, heap) **known_csts, + VEC (tree, heap) **known_binfos, + int *removable_params_cost) +{ + int i, count = ipa_get_param_count (info); + bool ret = false; + + *known_csts = NULL; + *known_binfos = NULL; + VEC_safe_grow_cleared (tree, heap, *known_csts, count); + VEC_safe_grow_cleared (tree, heap, *known_binfos, count); + + if (removable_params_cost) + *removable_params_cost = 0; + + for (i = 0; i < count ; i++) + { + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + + if (ipa_lat_is_single_const (lat)) + { + struct ipcp_value *val = lat->values; + if (TREE_CODE (val->value) != TREE_BINFO) + { + VEC_replace (tree, *known_csts, i, val->value); + if (removable_params_cost) + *removable_params_cost + += estimate_move_cost (TREE_TYPE (val->value)); + ret = true; + } + else if (lat->virt_call) + { + VEC_replace (tree, *known_binfos, i, val->value); + ret = true; + } + else if (removable_params_cost + && !ipa_is_param_used (info, i)) + *removable_params_cost + += estimate_move_cost (TREE_TYPE (ipa_get_param (info, i))); + } + else if (removable_params_cost + && !ipa_is_param_used (info, i)) + *removable_params_cost + += estimate_move_cost (TREE_TYPE (ipa_get_param (info, i))); + } + + return ret; +} + +/* Iterate over known values of parameters of NODE and estimate the local + effects in terms of time and size they have. */ + +static void +estimate_local_effects (struct cgraph_node *node) +{ + struct ipa_node_params *info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (info); + VEC (tree, heap) *known_csts, *known_binfos; + bool always_const; + int base_time = inline_summary (node)->time; + int removable_params_cost; + + if (!count || !ipcp_versionable_function_p (node)) + return; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nEstimating effects for %s/%i, base_time: %i.\n", + cgraph_node_name (node), node->uid, base_time); + + always_const = gather_context_independent_values (info, &known_csts, + &known_binfos, + &removable_params_cost); + if (always_const) { - ipcp_print_all_lattices (dump_file); - ipcp_function_scale_print (dump_file); + struct caller_statistics stats; + int time, size; + + init_caller_stats (&stats); + cgraph_for_node_and_aliases (node, gather_caller_stats, &stats, false); + estimate_ipcp_clone_size_and_time (node, known_csts, &size, &time); + time -= devirtualization_time_bonus (node, known_csts, known_binfos); + time -= removable_params_cost; + size -= stats.n_calls * removable_params_cost; + + if (dump_file) + fprintf (dump_file, " - context independent values, size: %i, " + "time_benefit: %i\n", size, base_time - time); + + if (size <= 0 + || cgraph_will_be_removed_from_program_if_no_direct_calls (node)) + { + info->clone_for_all_contexts = true; + base_time = time; + + if (dump_file) + fprintf (dump_file, " Decided to specialize for all " + "known contexts, code not going to grow.\n"); + } + else if (good_cloning_opportunity_p (node, base_time - time, + stats.freq_sum, stats.count_sum, + size)) + { + if (size + overall_size <= max_new_size) + { + info->clone_for_all_contexts = true; + base_time = time; + overall_size += size; + + if (dump_file) + fprintf (dump_file, " Decided to specialize for all " + "known contexts, growth deemed beneficial.\n"); + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Not cloning for all contexts because " + "max_new_size would be reached with %li.\n", + size + overall_size); + } } - ipcp_propagate_stage (); - if (ipcp_change_tops_to_bottom ()) - /* Some lattices have changed from IPA_TOP to IPA_BOTTOM. - This change should be propagated. */ + for (i = 0; i < count ; i++) { - gcc_assert (n_cloning_candidates); - ipcp_propagate_stage (); + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + struct ipcp_value *val; + int emc; + + if (lat->bottom + || !lat->values + || VEC_index (tree, known_csts, i) + || VEC_index (tree, known_binfos, i)) + continue; + + for (val = lat->values; val; val = val->next) + { + int time, size, time_benefit; + + if (TREE_CODE (val->value) != TREE_BINFO) + { + VEC_replace (tree, known_csts, i, val->value); + VEC_replace (tree, known_binfos, i, NULL_TREE); + emc = estimate_move_cost (TREE_TYPE (val->value)); + } + else if (lat->virt_call) + { + VEC_replace (tree, known_csts, i, NULL_TREE); + VEC_replace (tree, known_binfos, i, val->value); + emc = 0; + } + else + continue; + + estimate_ipcp_clone_size_and_time (node, known_csts, &size, &time); + time_benefit = base_time - time + + devirtualization_time_bonus (node, known_csts, known_binfos) + + removable_params_cost + emc; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " - estimates for value "); + print_ipcp_constant_value (dump_file, val->value); + fprintf (dump_file, " for parameter "); + print_generic_expr (dump_file, ipa_get_param (info, i), 0); + fprintf (dump_file, ": time_benefit: %i, size: %i\n", + time_benefit, size); + } + + val->local_time_benefit = time_benefit; + val->local_size_cost = size; + } } - if (dump_file) + + VEC_free (tree, heap, known_csts); + VEC_free (tree, heap, known_binfos); +} + + +/* Add value CUR_VAL and all yet-unsorted values it is dependent on to the + topological sort of values. */ + +static void +add_val_to_toposort (struct ipcp_value *cur_val) +{ + static int dfs_counter = 0; + static struct ipcp_value *stack; + struct ipcp_value_source *src; + + if (cur_val->dfs) + return; + + dfs_counter++; + cur_val->dfs = dfs_counter; + cur_val->low_link = dfs_counter; + + cur_val->topo_next = stack; + stack = cur_val; + cur_val->on_stack = true; + + for (src = cur_val->sources; src; src = src->next) + if (src->val) + { + if (src->val->dfs == 0) + { + add_val_to_toposort (src->val); + if (src->val->low_link < cur_val->low_link) + cur_val->low_link = src->val->low_link; + } + else if (src->val->on_stack + && src->val->dfs < cur_val->low_link) + cur_val->low_link = src->val->dfs; + } + + if (cur_val->dfs == cur_val->low_link) { - fprintf (dump_file, "\nIPA lattices after propagation:\n"); - ipcp_print_all_lattices (dump_file); - if (dump_flags & TDF_DETAILS) - ipcp_print_profile_data (dump_file); + struct ipcp_value *v, *scc_list = NULL; + + do + { + v = stack; + stack = v->topo_next; + v->on_stack = false; + + v->scc_next = scc_list; + scc_list = v; + } + while (v != cur_val); + + cur_val->topo_next = values_topo; + values_topo = cur_val; } } -/* Check conditions to forbid constant insertion to function described by - NODE. */ -static inline bool -ipcp_node_modifiable_p (struct cgraph_node *node) +/* Add all values in lattices associated with NODE to the topological sort if + they are not there yet. */ + +static void +add_all_node_vals_to_toposort (struct cgraph_node *node) { - /* Once we will be able to do in-place replacement, we can be more - lax here. */ - return ipcp_versionable_function_p (node); + struct ipa_node_params *info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (info); + + for (i = 0; i < count ; i++) + { + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + struct ipcp_value *val; + + if (lat->bottom || !lat->values) + continue; + for (val = lat->values; val; val = val->next) + add_val_to_toposort (val); + } } -/* Print count scale data structures. */ +/* One pass of constants propagation along the call graph edges, from callers + to callees (requires topological ordering in TOPO), iterate over strongly + connected components. */ + static void -ipcp_function_scale_print (FILE * f) +propagate_constants_topo (struct topo_info *topo) { - struct cgraph_node *node; + int i; - for (node = cgraph_nodes; node; node = node->next) + for (i = topo->nnodes - 1; i >= 0; i--) { - if (!node->analyzed) + struct cgraph_node *v, *node = topo->order[i]; + struct ipa_dfs_info *node_dfs_info; + + if (!cgraph_function_with_gimple_body_p (node)) continue; - fprintf (f, "printing scale for %s: ", cgraph_node_name (node)); - fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC - " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node)); + + node_dfs_info = (struct ipa_dfs_info *) node->aux; + /* First, iteratively propagate within the strongly connected component + until all lattices stabilize. */ + v = node_dfs_info->next_cycle; + while (v) + { + push_node_to_stack (topo, v); + v = ((struct ipa_dfs_info *) v->aux)->next_cycle; + } + + v = node; + while (v) + { + struct cgraph_edge *cs; + + for (cs = v->callees; cs; cs = cs->next_callee) + if (edge_within_scc (cs) + && propagate_constants_accross_call (cs)) + push_node_to_stack (topo, cs->callee); + v = pop_node_from_stack (topo); + } + + /* Afterwards, propagate along edges leading out of the SCC, calculates + the local effects of the discovered constants and all valid values to + their topological sort. */ + v = node; + while (v) + { + struct cgraph_edge *cs; + + estimate_local_effects (v); + add_all_node_vals_to_toposort (v); + for (cs = v->callees; cs; cs = cs->next_callee) + if (!edge_within_scc (cs)) + propagate_constants_accross_call (cs); + + v = ((struct ipa_dfs_info *) v->aux)->next_cycle; + } + } +} + +/* Propagate the estimated effects of individual values along the topological + from the dependant values to those they depend on. */ + +static void +propagate_effects (void) +{ + struct ipcp_value *base; + + for (base = values_topo; base; base = base->topo_next) + { + struct ipcp_value_source *src; + struct ipcp_value *val; + int time = 0, size = 0; + + for (val = base; val; val = val->scc_next) + { + time += val->local_time_benefit + val->prop_time_benefit; + size += val->local_size_cost + val->prop_size_cost; + } + + for (val = base; val; val = val->scc_next) + for (src = val->sources; src; src = src->next) + if (src->val + && cgraph_maybe_hot_edge_p (src->cs)) + { + src->val->prop_time_benefit += time; + src->val->prop_size_cost += size; + } } } -/* Print counts of all cgraph nodes. */ + +/* Propagate constants, binfos and their effects from the summaries + interprocedurally. */ + static void -ipcp_print_func_profile_counts (FILE * f) +ipcp_propagate_stage (struct topo_info *topo) { struct cgraph_node *node; - for (node = cgraph_nodes; node; node = node->next) + if (dump_file) + fprintf (dump_file, "\n Propagating constants:\n\n"); + + if (in_lto_p) + ipa_update_after_lto_read (); + + + FOR_EACH_DEFINED_FUNCTION (node) + { + struct ipa_node_params *info = IPA_NODE_REF (node); + + determine_versionability (node); + if (cgraph_function_with_gimple_body_p (node)) + { + info->lattices = XCNEWVEC (struct ipcp_lattice, + ipa_get_param_count (info)); + initialize_node_lattices (node); + } + if (node->count > max_count) + max_count = node->count; + overall_size += inline_summary (node)->self_size; + } + + max_new_size = overall_size; + if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) + max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); + max_new_size += max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; + + if (dump_file) + fprintf (dump_file, "\noverall_size: %li, max_new_size: %li\n", + overall_size, max_new_size); + + propagate_constants_topo (topo); +#ifdef ENABLE_CHECKING + ipcp_verify_propagated_values (); +#endif + propagate_effects (); + + if (dump_file) + { + fprintf (dump_file, "\nIPA lattices after all propagation:\n"); + print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true); + } +} + +/* Discover newly direct outgoing edges from NODE which is a new clone with + known KNOWN_VALS and make them direct. */ + +static void +ipcp_discover_new_direct_edges (struct cgraph_node *node, + VEC (tree, heap) *known_vals) +{ + struct cgraph_edge *ie, *next_ie; + + for (ie = node->indirect_calls; ie; ie = next_ie) + { + tree target; + + next_ie = ie->next_callee; + target = get_indirect_edge_target (ie, known_vals, NULL); + if (target) + ipa_make_edge_direct_to_target (ie, target); + } +} + +/* Vector of pointers which for linked lists of clones of an original crgaph + edge. */ + +static VEC (cgraph_edge_p, heap) *next_edge_clone; + +static inline void +grow_next_edge_clone_vector (void) +{ + if (VEC_length (cgraph_edge_p, next_edge_clone) + <= (unsigned) cgraph_edge_max_uid) + VEC_safe_grow_cleared (cgraph_edge_p, heap, next_edge_clone, + cgraph_edge_max_uid + 1); +} + +/* Edge duplication hook to grow the appropriate linked list in + next_edge_clone. */ + +static void +ipcp_edge_duplication_hook (struct cgraph_edge *src, struct cgraph_edge *dst, + __attribute__((unused)) void *data) +{ + grow_next_edge_clone_vector (); + VEC_replace (cgraph_edge_p, next_edge_clone, dst->uid, + VEC_index (cgraph_edge_p, next_edge_clone, src->uid)); + VEC_replace (cgraph_edge_p, next_edge_clone, src->uid, dst); +} + +/* Get the next clone in the linked list of clones of an edge. */ + +static inline struct cgraph_edge * +get_next_cgraph_edge_clone (struct cgraph_edge *cs) +{ + return VEC_index (cgraph_edge_p, next_edge_clone, cs->uid); +} + +/* Return true if edge CS does bring about the value described by SRC. */ + +static bool +cgraph_edge_brings_value_p (struct cgraph_edge *cs, + struct ipcp_value_source *src) +{ + struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + + if (IPA_NODE_REF (cs->callee)->ipcp_orig_node + || caller_info->node_dead) + return false; + if (!src->val) + return true; + + if (caller_info->ipcp_orig_node) + { + tree t = VEC_index (tree, caller_info->known_vals, src->index); + return (t != NULL_TREE + && values_equal_for_ipcp_p (src->val->value, t)); + } + else + { + struct ipcp_lattice *lat = ipa_get_lattice (caller_info, src->index); + if (ipa_lat_is_single_const (lat) + && values_equal_for_ipcp_p (src->val->value, lat->values->value)) + return true; + else + return false; + } +} + +/* Given VAL, iterate over all its sources and if they still hold, add their + edge frequency and their number into *FREQUENCY and *CALLER_COUNT + respectively. */ + +static bool +get_info_about_necessary_edges (struct ipcp_value *val, int *freq_sum, + gcov_type *count_sum, int *caller_count) +{ + struct ipcp_value_source *src; + int freq = 0, count = 0; + gcov_type cnt = 0; + bool hot = false; + + for (src = val->sources; src; src = src->next) { - fprintf (f, "function %s: ", cgraph_node_name (node)); - fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC - " \n", (HOST_WIDE_INT) node->count); + struct cgraph_edge *cs = src->cs; + while (cs) + { + if (cgraph_edge_brings_value_p (cs, src)) + { + count++; + freq += cs->frequency; + cnt += cs->count; + hot |= cgraph_maybe_hot_edge_p (cs); + } + cs = get_next_cgraph_edge_clone (cs); + } } + + *freq_sum = freq; + *count_sum = cnt; + *caller_count = count; + return hot; } -/* Print counts of all cgraph edges. */ -static void -ipcp_print_call_profile_counts (FILE * f) +/* Return a vector of incoming edges that do bring value VAL. It is assumed + their number is known and equal to CALLER_COUNT. */ + +static VEC (cgraph_edge_p,heap) * +gather_edges_for_value (struct ipcp_value *val, int caller_count) { - struct cgraph_node *node; - struct cgraph_edge *cs; + struct ipcp_value_source *src; + VEC (cgraph_edge_p,heap) *ret; - for (node = cgraph_nodes; node; node = node->next) + ret = VEC_alloc (cgraph_edge_p, heap, caller_count); + for (src = val->sources; src; src = src->next) { - for (cs = node->callees; cs; cs = cs->next_callee) + struct cgraph_edge *cs = src->cs; + while (cs) { - fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller), - cgraph_node_name (cs->callee)); - fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n", - (HOST_WIDE_INT) cs->count); + if (cgraph_edge_brings_value_p (cs, src)) + VEC_quick_push (cgraph_edge_p, ret, cs); + cs = get_next_cgraph_edge_clone (cs); } } -} -/* Print profile info for all functions. */ -static void -ipcp_print_profile_data (FILE * f) -{ - fprintf (f, "\nNODE COUNTS :\n"); - ipcp_print_func_profile_counts (f); - fprintf (f, "\nCS COUNTS stage:\n"); - ipcp_print_call_profile_counts (f); + return ret; } -/* Build and initialize ipa_replace_map struct according to LAT. This struct is - processed by versioning, which operates according to the flags set. - PARM_TREE is the formal parameter found to be constant. LAT represents the - constant. */ +/* Construct a replacement map for a know VALUE for a formal parameter PARAM. + Return it or NULL if for some reason it cannot be created. */ + static struct ipa_replace_map * -ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat) +get_replacement_map (tree value, tree parm) { + tree req_type = TREE_TYPE (parm); struct ipa_replace_map *replace_map; - tree const_val; + + if (!useless_type_conversion_p (req_type, TREE_TYPE (value))) + { + if (fold_convertible_p (req_type, value)) + value = fold_build1 (NOP_EXPR, req_type, value); + else if (TYPE_SIZE (req_type) == TYPE_SIZE (TREE_TYPE (value))) + value = fold_build1 (VIEW_CONVERT_EXPR, req_type, value); + else + { + if (dump_file) + { + fprintf (dump_file, " const "); + print_generic_expr (dump_file, value, 0); + fprintf (dump_file, " can't be converted to param "); + print_generic_expr (dump_file, parm, 0); + fprintf (dump_file, "\n"); + } + return NULL; + } + } replace_map = ggc_alloc_ipa_replace_map (); - const_val = build_const_val (lat, TREE_TYPE (parm_tree)); if (dump_file) { - fprintf (dump_file, " replacing param "); - print_generic_expr (dump_file, parm_tree, 0); + fprintf (dump_file, " replacing param "); + print_generic_expr (dump_file, parm, 0); fprintf (dump_file, " with const "); - print_generic_expr (dump_file, const_val, 0); + print_generic_expr (dump_file, value, 0); fprintf (dump_file, "\n"); } - replace_map->old_tree = parm_tree; - replace_map->new_tree = const_val; + replace_map->old_tree = parm; + replace_map->new_tree = value; replace_map->replace_p = true; replace_map->ref_p = false; return replace_map; } -/* Return true if this callsite should be redirected to the original callee - (instead of the cloned one). */ -static bool -ipcp_need_redirect_p (struct cgraph_edge *cs) -{ - struct ipa_node_params *orig_callee_info; - int i, count; - struct ipa_jump_func *jump_func; - struct cgraph_node *node = cs->callee, *orig; - - if (!n_cloning_candidates) - return false; - - if ((orig = ipcp_get_orig_node (node)) != NULL) - node = orig; - if (ipcp_get_orig_node (cs->caller)) - return false; +/* Dump new profiling counts */ - orig_callee_info = IPA_NODE_REF (node); - count = ipa_get_param_count (orig_callee_info); - for (i = 0; i < count; i++) - { - struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); - if (ipcp_lat_is_const (lat)) - { - jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); - if (jump_func->type != IPA_JF_CONST) - return true; - } - } +static void +dump_profile_updates (struct cgraph_node *orig_node, + struct cgraph_node *new_node) +{ + struct cgraph_edge *cs; - return false; + fprintf (dump_file, " setting count of the specialized node to " + HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) new_node->count); + for (cs = new_node->callees; cs ; cs = cs->next_callee) + fprintf (dump_file, " edge to %s has count " + HOST_WIDE_INT_PRINT_DEC "\n", + cgraph_node_name (cs->callee), (HOST_WIDE_INT) cs->count); + + fprintf (dump_file, " setting count of the original node to " + HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) orig_node->count); + for (cs = orig_node->callees; cs ; cs = cs->next_callee) + fprintf (dump_file, " edge to %s is left with " + HOST_WIDE_INT_PRINT_DEC "\n", + cgraph_node_name (cs->callee), (HOST_WIDE_INT) cs->count); } -/* Fix the callsites and the call graph after function cloning was done. */ +/* After a specialized NEW_NODE version of ORIG_NODE has been created, update + their profile information to reflect this. */ + static void -ipcp_update_callgraph (void) +update_profiling_info (struct cgraph_node *orig_node, + struct cgraph_node *new_node) { - struct cgraph_node *node; + struct cgraph_edge *cs; + struct caller_statistics stats; + gcov_type new_sum, orig_sum; + gcov_type remainder, orig_node_count = orig_node->count; - for (node = cgraph_nodes; node; node = node->next) - if (node->analyzed && ipcp_node_is_clone (node)) - { - bitmap args_to_skip = BITMAP_ALLOC (NULL); - struct cgraph_node *orig_node = ipcp_get_orig_node (node); - struct ipa_node_params *info = IPA_NODE_REF (orig_node); - int i, count = ipa_get_param_count (info); - struct cgraph_edge *cs, *next; + if (orig_node_count == 0) + return; - for (i = 0; i < count; i++) - { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + init_caller_stats (&stats); + cgraph_for_node_and_aliases (orig_node, gather_caller_stats, &stats, false); + orig_sum = stats.count_sum; + init_caller_stats (&stats); + cgraph_for_node_and_aliases (new_node, gather_caller_stats, &stats, false); + new_sum = stats.count_sum; + + if (orig_node_count < orig_sum + new_sum) + { + if (dump_file) + fprintf (dump_file, " Problem: node %s/%i has too low count " + HOST_WIDE_INT_PRINT_DEC " while the sum of incoming " + "counts is " HOST_WIDE_INT_PRINT_DEC "\n", + cgraph_node_name (orig_node), orig_node->uid, + (HOST_WIDE_INT) orig_node_count, + (HOST_WIDE_INT) (orig_sum + new_sum)); + + orig_node_count = (orig_sum + new_sum) * 12 / 10; + if (dump_file) + fprintf (dump_file, " proceeding by pretending it was " + HOST_WIDE_INT_PRINT_DEC "\n", + (HOST_WIDE_INT) orig_node_count); + } - /* We can proactively remove obviously unused arguments. */ - if (!ipa_is_param_used (info, i)) - { - bitmap_set_bit (args_to_skip, i); - continue; - } + new_node->count = new_sum; + remainder = orig_node_count - new_sum; + orig_node->count = remainder; - if (lat->type == IPA_CONST_VALUE) - bitmap_set_bit (args_to_skip, i); - } - for (cs = node->callers; cs; cs = next) - { - next = cs->next_caller; - if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs)) - cgraph_redirect_edge_callee (cs, orig_node); - } - } + for (cs = new_node->callees; cs ; cs = cs->next_callee) + if (cs->frequency) + cs->count = cs->count * (new_sum * REG_BR_PROB_BASE + / orig_node_count) / REG_BR_PROB_BASE; + else + cs->count = 0; + + for (cs = orig_node->callees; cs ; cs = cs->next_callee) + cs->count = cs->count * (remainder * REG_BR_PROB_BASE + / orig_node_count) / REG_BR_PROB_BASE; + + if (dump_file) + dump_profile_updates (orig_node, new_node); } -/* Update profiling info for versioned functions and the functions they were - versioned from. */ +/* Update the respective profile of specialized NEW_NODE and the original + ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM + have been redirected to the specialized version. */ + static void -ipcp_update_profiling (void) +update_specialized_profile (struct cgraph_node *new_node, + struct cgraph_node *orig_node, + gcov_type redirected_sum) { - struct cgraph_node *node, *orig_node; - gcov_type scale, scale_complement; struct cgraph_edge *cs; + gcov_type new_node_count, orig_node_count = orig_node->count; + + if (dump_file) + fprintf (dump_file, " the sum of counts of redirected edges is " + HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) redirected_sum); + if (orig_node_count == 0) + return; + + gcc_assert (orig_node_count >= redirected_sum); + + new_node_count = new_node->count; + new_node->count += redirected_sum; + orig_node->count -= redirected_sum; + + for (cs = new_node->callees; cs ; cs = cs->next_callee) + if (cs->frequency) + cs->count += cs->count * redirected_sum / new_node_count; + else + cs->count = 0; - for (node = cgraph_nodes; node; node = node->next) + for (cs = orig_node->callees; cs ; cs = cs->next_callee) { - if (ipcp_node_is_clone (node)) - { - orig_node = ipcp_get_orig_node (node); - scale = ipcp_get_node_scale (orig_node); - node->count = orig_node->count * scale / REG_BR_PROB_BASE; - scale_complement = REG_BR_PROB_BASE - scale; - orig_node->count = - orig_node->count * scale_complement / REG_BR_PROB_BASE; - for (cs = node->callees; cs; cs = cs->next_callee) - cs->count = cs->count * scale / REG_BR_PROB_BASE; - for (cs = orig_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count * scale_complement / REG_BR_PROB_BASE; - } + gcov_type dec = cs->count * (redirected_sum * REG_BR_PROB_BASE + / orig_node_count) / REG_BR_PROB_BASE; + if (dec < cs->count) + cs->count -= dec; + else + cs->count = 0; } + + if (dump_file) + dump_profile_updates (orig_node, new_node); } -/* If NODE was cloned, how much would program grow? */ -static long -ipcp_estimate_growth (struct cgraph_node *node) +/* Create a specialized version of NODE with known constants and types of + parameters in KNOWN_VALS and redirect all edges in CALLERS to it. */ + +static struct cgraph_node * +create_specialized_node (struct cgraph_node *node, + VEC (tree, heap) *known_vals, + VEC (cgraph_edge_p,heap) *callers) { - struct cgraph_edge *cs; - int redirectable_node_callers = 0; - int removable_args = 0; - bool need_original = !cgraph_only_called_directly_p (node); - struct ipa_node_params *info; - int i, count; - int growth; + struct ipa_node_params *new_info, *info = IPA_NODE_REF (node); + VEC (ipa_replace_map_p,gc)* replace_trees = NULL; + struct cgraph_node *new_node; + int i, count = ipa_get_param_count (info); + bitmap args_to_skip; - for (cs = node->callers; cs != NULL; cs = cs->next_caller) - if (cs->caller == node || !ipcp_need_redirect_p (cs)) - redirectable_node_callers++; - else - need_original = true; + gcc_assert (!info->ipcp_orig_node); - /* If we will be able to fully replace orignal node, we never increase - program size. */ - if (!need_original) - return 0; + if (node->local.can_change_signature) + { + args_to_skip = BITMAP_GGC_ALLOC (); + for (i = 0; i < count; i++) + { + tree t = VEC_index (tree, known_vals, i); - info = IPA_NODE_REF (node); - count = ipa_get_param_count (info); - for (i = 0; i < count; i++) + if ((t && TREE_CODE (t) != TREE_BINFO) + || !ipa_is_param_used (info, i)) + bitmap_set_bit (args_to_skip, i); + } + } + else { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + args_to_skip = NULL; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " cannot change function signature\n"); + } - /* We can proactively remove obviously unused arguments. */ - if (!ipa_is_param_used (info, i)) - removable_args++; + for (i = 0; i < count ; i++) + { + tree t = VEC_index (tree, known_vals, i); + if (t && TREE_CODE (t) != TREE_BINFO) + { + struct ipa_replace_map *replace_map; - if (lat->type == IPA_CONST_VALUE) - removable_args++; + replace_map = get_replacement_map (t, ipa_get_param (info, i)); + if (replace_map) + VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_map); + } } - /* We make just very simple estimate of savings for removal of operand from - call site. Precise cost is dificult to get, as our size metric counts - constants and moves as free. Generally we are looking for cases that - small function is called very many times. */ - growth = node->local.inline_summary.self_size - - removable_args * redirectable_node_callers; - if (growth < 0) - return 0; - return growth; + new_node = cgraph_create_virtual_clone (node, callers, replace_trees, + args_to_skip, "constprop"); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " the new node is %s/%i.\n", + cgraph_node_name (new_node), new_node->uid); + gcc_checking_assert (ipa_node_params_vector + && (VEC_length (ipa_node_params_t, + ipa_node_params_vector) + > (unsigned) cgraph_max_uid)); + update_profiling_info (node, new_node); + new_info = IPA_NODE_REF (new_node); + new_info->ipcp_orig_node = node; + new_info->known_vals = known_vals; + + ipcp_discover_new_direct_edges (new_node, known_vals); + + VEC_free (cgraph_edge_p, heap, callers); + return new_node; } +/* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in + KNOWN_VALS with constants and types that are also known for all of the + CALLERS. */ -/* Estimate cost of cloning NODE. */ -static long -ipcp_estimate_cloning_cost (struct cgraph_node *node) +static void +find_more_values_for_callers_subset (struct cgraph_node *node, + VEC (tree, heap) *known_vals, + VEC (cgraph_edge_p,heap) *callers) { - int freq_sum = 1; - gcov_type count_sum = 1; - struct cgraph_edge *e; - int cost; + struct ipa_node_params *info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (info); - cost = ipcp_estimate_growth (node) * 1000; - if (!cost) + for (i = 0; i < count ; i++) { - if (dump_file) - fprintf (dump_file, "Versioning of %s will save code size\n", - cgraph_node_name (node)); - return 0; - } + struct cgraph_edge *cs; + tree newval = NULL_TREE; + int j; - for (e = node->callers; e; e = e->next_caller) - if (!bitmap_bit_p (dead_nodes, e->caller->uid) - && !ipcp_need_redirect_p (e)) - { - count_sum += e->count; - freq_sum += e->frequency + 1; - } + if (ipa_get_lattice (info, i)->bottom + || VEC_index (tree, known_vals, i)) + continue; - if (max_count) - cost /= count_sum * 1000 / max_count + 1; - else - cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; - if (dump_file) - fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", - cgraph_node_name (node), cost, node->local.inline_summary.self_size, - freq_sum); - return cost + 1; + FOR_EACH_VEC_ELT (cgraph_edge_p, callers, j, cs) + { + struct ipa_jump_func *jump_func; + tree t; + + if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) + { + newval = NULL_TREE; + break; + } + jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); + t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func); + if (!t + || (newval + && !values_equal_for_ipcp_p (t, newval))) + { + newval = NULL_TREE; + break; + } + else + newval = t; + } + + if (newval) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " adding an extra known value "); + print_ipcp_constant_value (dump_file, newval); + fprintf (dump_file, " for parameter "); + print_generic_expr (dump_file, ipa_get_param (info, i), 0); + fprintf (dump_file, "\n"); + } + + VEC_replace (tree, known_vals, i, newval); + } + } } -/* Return number of live constant parameters. */ -static int -ipcp_const_param_count (struct cgraph_node *node) +/* Given an original NODE and a VAL for which we have already created a + specialized clone, look whether there are incoming edges that still lead + into the old node but now also bring the requested value and also conform to + all other criteria such that they can be redirected the the special node. + This function can therefore redirect the final edge in a SCC. */ + +static void +perhaps_add_new_callers (struct cgraph_node *node, struct ipcp_value *val) { - int const_param = 0; - struct ipa_node_params *info = IPA_NODE_REF (node); - int count = ipa_get_param_count (info); - int i; + struct ipa_node_params *dest_info = IPA_NODE_REF (val->spec_node); + struct ipcp_value_source *src; + int count = ipa_get_param_count (dest_info); + gcov_type redirected_sum = 0; - for (i = 0; i < count; i++) + for (src = val->sources; src; src = src->next) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); - if (ipcp_lat_is_insertable (lat) - /* Do not count obviously unused arguments. */ - && ipa_is_param_used (info, i)) - const_param++; + struct cgraph_edge *cs = src->cs; + while (cs) + { + enum availability availability; + bool insufficient = false; + + if (cgraph_function_node (cs->callee, &availability) == node + && availability > AVAIL_OVERWRITABLE + && cgraph_edge_brings_value_p (cs, src)) + { + struct ipa_node_params *caller_info; + struct ipa_edge_args *args; + int i; + + caller_info = IPA_NODE_REF (cs->caller); + args = IPA_EDGE_REF (cs); + for (i = 0; i < count; i++) + { + struct ipa_jump_func *jump_func; + tree val, t; + + val = VEC_index (tree, dest_info->known_vals, i); + if (!val) + continue; + + if (i >= ipa_get_cs_argument_count (args)) + { + insufficient = true; + break; + } + jump_func = ipa_get_ith_jump_func (args, i); + t = ipa_value_from_jfunc (caller_info, jump_func); + if (!t || !values_equal_for_ipcp_p (val, t)) + { + insufficient = true; + break; + } + } + + if (!insufficient) + { + if (dump_file) + fprintf (dump_file, " - adding an extra caller %s/%i" + " of %s/%i\n", + cgraph_node_name (cs->caller), cs->caller->uid, + cgraph_node_name (val->spec_node), + val->spec_node->uid); + + cgraph_redirect_edge_callee (cs, val->spec_node); + redirected_sum += cs->count; + } + } + cs = get_next_cgraph_edge_clone (cs); + } } - return const_param; + + if (redirected_sum) + update_specialized_profile (val->spec_node, node, redirected_sum); } -/* Propagate the constant parameters found by ipcp_iterate_stage() - to the function's code. */ + +/* Copy KNOWN_BINFOS to KNOWN_VALS. */ + static void -ipcp_insert_stage (void) +move_binfos_to_values (VEC (tree, heap) *known_vals, + VEC (tree, heap) *known_binfos) { - struct cgraph_node *node, *node1 = NULL; + tree t; int i; - VEC (cgraph_edge_p, heap) * redirect_callers; - VEC (ipa_replace_map_p,gc)* replace_trees; - int node_callers, count; - tree parm_tree; - struct ipa_replace_map *replace_param; - fibheap_t heap; - long overall_size = 0, new_size = 0; - long max_new_size; - - ipa_check_create_node_params (); - ipa_check_create_edge_args (); - if (dump_file) - fprintf (dump_file, "\nIPA insert stage:\n\n"); - - dead_nodes = BITMAP_ALLOC (NULL); - for (node = cgraph_nodes; node; node = node->next) - if (node->analyzed) - { - if (node->count > max_count) - max_count = node->count; - overall_size += node->local.inline_summary.self_size; - } - - max_new_size = overall_size; - if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) - max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); - max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; + for (i = 0; VEC_iterate (tree, known_binfos, i, t); i++) + if (t) + VEC_replace (tree, known_vals, i, t); +} - /* First collect all functions we proved to have constant arguments to heap. */ - heap = fibheap_new (); - for (node = cgraph_nodes; node; node = node->next) - { - struct ipa_node_params *info; - /* Propagation of the constant is forbidden in certain conditions. */ - if (!node->analyzed || !ipcp_node_modifiable_p (node)) - continue; - info = IPA_NODE_REF (node); - if (ipa_is_called_with_var_arguments (info)) - continue; - if (ipcp_const_param_count (node)) - node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node); - } - /* Now clone in priority order until code size growth limits are met or - heap is emptied. */ - while (!fibheap_empty (heap)) - { - struct ipa_node_params *info; - int growth = 0; - bitmap args_to_skip; - struct cgraph_edge *cs; +/* Decide whether and what specialized clones of NODE should be created. */ - node = (struct cgraph_node *)fibheap_extract_min (heap); - node->aux = NULL; - if (dump_file) - fprintf (dump_file, "considering function %s\n", - cgraph_node_name (node)); +static bool +decide_whether_version_node (struct cgraph_node *node) +{ + struct ipa_node_params *info = IPA_NODE_REF (node); + int i, count = ipa_get_param_count (info); + VEC (tree, heap) *known_csts, *known_binfos; + bool ret = false; - growth = ipcp_estimate_growth (node); + if (count == 0) + return false; - if (new_size + growth > max_new_size) - break; - if (growth - && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))) - { - if (dump_file) - fprintf (dump_file, "Not versioning, cold code would grow"); - continue; - } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nEvaluating opportunities for %s/%i.\n", + cgraph_node_name (node), node->uid); - new_size += growth; + gather_context_independent_values (info, &known_csts, &known_binfos, + NULL); - /* Look if original function becomes dead after clonning. */ - for (cs = node->callers; cs != NULL; cs = cs->next_caller) - if (cs->caller == node || ipcp_need_redirect_p (cs)) - break; - if (!cs && cgraph_only_called_directly_p (node)) - bitmap_set_bit (dead_nodes, node->uid); + for (i = 0; i < count ; i++) + { + struct ipcp_lattice *lat = ipa_get_lattice (info, i); + struct ipcp_value *val; - info = IPA_NODE_REF (node); - count = ipa_get_param_count (info); + if (lat->bottom + || VEC_index (tree, known_csts, i) + || VEC_index (tree, known_binfos, i)) + continue; - replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1); - args_to_skip = BITMAP_GGC_ALLOC (); - for (i = 0; i < count; i++) + for (val = lat->values; val; val = val->next) { - struct ipcp_lattice *lat = ipcp_get_lattice (info, i); - parm_tree = ipa_get_param (info, i); + int freq_sum, caller_count; + gcov_type count_sum; + VEC (cgraph_edge_p, heap) *callers; + VEC (tree, heap) *kv; - /* We can proactively remove obviously unused arguments. */ - if (!ipa_is_param_used (info, i)) + if (val->spec_node) + { + perhaps_add_new_callers (node, val); + continue; + } + else if (val->local_size_cost + overall_size > max_new_size) { - bitmap_set_bit (args_to_skip, i); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Ignoring candidate value because " + "max_new_size would be reached with %li.\n", + val->local_size_cost + overall_size); continue; } + else if (!get_info_about_necessary_edges (val, &freq_sum, &count_sum, + &caller_count)) + continue; - if (lat->type == IPA_CONST_VALUE) + if (dump_file && (dump_flags & TDF_DETAILS)) { - replace_param = - ipcp_create_replace_map (parm_tree, lat); - VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param); - bitmap_set_bit (args_to_skip, i); + fprintf (dump_file, " - considering value "); + print_ipcp_constant_value (dump_file, val->value); + fprintf (dump_file, " for parameter "); + print_generic_expr (dump_file, ipa_get_param (info, i), 0); + fprintf (dump_file, " (caller_count: %i)\n", caller_count); } + + + if (!good_cloning_opportunity_p (node, val->local_time_benefit, + freq_sum, count_sum, + val->local_size_cost) + && !good_cloning_opportunity_p (node, + val->local_time_benefit + + val->prop_time_benefit, + freq_sum, count_sum, + val->local_size_cost + + val->prop_size_cost)) + continue; + + if (dump_file) + fprintf (dump_file, " Creating a specialized node of %s/%i.\n", + cgraph_node_name (node), node->uid); + + callers = gather_edges_for_value (val, caller_count); + kv = VEC_copy (tree, heap, known_csts); + move_binfos_to_values (kv, known_binfos); + VEC_replace (tree, kv, i, val->value); + find_more_values_for_callers_subset (node, kv, callers); + val->spec_node = create_specialized_node (node, kv, callers); + overall_size += val->local_size_cost; + info = IPA_NODE_REF (node); + + /* TODO: If for some lattice there is only one other known value + left, make a special node for it too. */ + ret = true; + + VEC_replace (tree, kv, i, val->value); } + } - /* Compute how many callers node has. */ - node_callers = 0; - for (cs = node->callers; cs != NULL; cs = cs->next_caller) - node_callers++; - redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); - for (cs = node->callers; cs != NULL; cs = cs->next_caller) - VEC_quick_push (cgraph_edge_p, redirect_callers, cs); - - /* Redirecting all the callers of the node to the - new versioned node. */ - node1 = - cgraph_create_virtual_clone (node, redirect_callers, replace_trees, - args_to_skip, "constprop"); - args_to_skip = NULL; - VEC_free (cgraph_edge_p, heap, redirect_callers); - replace_trees = NULL; + if (info->clone_for_all_contexts) + { + VEC (cgraph_edge_p, heap) *callers; - if (node1 == NULL) - continue; if (dump_file) - fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", - cgraph_node_name (node), (int)growth, (int)new_size); - ipcp_init_cloned_node (node, node1); + fprintf (dump_file, " - Creating a specialized node of %s/%i " + "for all known contexts.\n", cgraph_node_name (node), + node->uid); + + callers = collect_callers_of_node (node); + move_binfos_to_values (known_csts, known_binfos); + create_specialized_node (node, known_csts, callers); + info = IPA_NODE_REF (node); + info->clone_for_all_contexts = false; + ret = true; + } + else + VEC_free (tree, heap, known_csts); + + VEC_free (tree, heap, known_binfos); + return ret; +} - /* TODO: We can use indirect inlning info to produce new calls. */ +/* Transitively mark all callees of NODE within the same SCC as not dead. */ - if (dump_file) - dump_function_to_file (node1->decl, dump_file, dump_flags); +static void +spread_undeadness (struct cgraph_node *node) +{ + struct cgraph_edge *cs; + + for (cs = node->callees; cs; cs = cs->next_callee) + if (edge_within_scc (cs)) + { + struct cgraph_node *callee; + struct ipa_node_params *info; + + callee = cgraph_function_node (cs->callee, NULL); + info = IPA_NODE_REF (callee); - for (cs = node->callees; cs; cs = cs->next_callee) - if (cs->callee->aux) + if (info->node_dead) { - fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); - cs->callee->aux = fibheap_insert (heap, - ipcp_estimate_cloning_cost (cs->callee), - cs->callee); + info->node_dead = 0; + spread_undeadness (callee); } + } +} + +/* Return true if NODE has a caller from outside of its SCC that is not + dead. Worker callback for cgraph_for_node_and_aliases. */ + +static bool +has_undead_caller_from_outside_scc_p (struct cgraph_node *node, + void *data ATTRIBUTE_UNUSED) +{ + struct cgraph_edge *cs; + + for (cs = node->callers; cs; cs = cs->next_caller) + if (cs->caller->thunk.thunk_p + && cgraph_for_node_and_aliases (cs->caller, + has_undead_caller_from_outside_scc_p, + NULL, true)) + return true; + else if (!edge_within_scc (cs) + && !IPA_NODE_REF (cs->caller)->node_dead) + return true; + return false; +} + + +/* Identify nodes within the same SCC as NODE which are no longer needed + because of new clones and will be removed as unreachable. */ + +static void +identify_dead_nodes (struct cgraph_node *node) +{ + struct cgraph_node *v; + for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) + if (cgraph_will_be_removed_from_program_if_no_direct_calls (v) + && !cgraph_for_node_and_aliases (v, + has_undead_caller_from_outside_scc_p, + NULL, true)) + IPA_NODE_REF (v)->node_dead = 1; + + for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) + if (!IPA_NODE_REF (v)->node_dead) + spread_undeadness (v); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) + if (IPA_NODE_REF (v)->node_dead) + fprintf (dump_file, " Marking node as dead: %s/%i.\n", + cgraph_node_name (v), v->uid); } +} + +/* The decision stage. Iterate over the topological order of call graph nodes + TOPO and make specialized clones if deemed beneficial. */ + +static void +ipcp_decision_stage (struct topo_info *topo) +{ + int i; + + if (dump_file) + fprintf (dump_file, "\nIPA decision stage:\n\n"); - while (!fibheap_empty (heap)) + for (i = topo->nnodes - 1; i >= 0; i--) { - if (dump_file) - fprintf (dump_file, "skipping function %s\n", - cgraph_node_name (node)); - node = (struct cgraph_node *) fibheap_extract_min (heap); - node->aux = NULL; + struct cgraph_node *node = topo->order[i]; + bool change = false, iterate = true; + + while (iterate) + { + struct cgraph_node *v; + iterate = false; + for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) + if (cgraph_function_with_gimple_body_p (v) + && ipcp_versionable_function_p (v)) + iterate |= decide_whether_version_node (v); + + change |= iterate; + } + if (change) + identify_dead_nodes (node); } - fibheap_delete (heap); - BITMAP_FREE (dead_nodes); - ipcp_update_callgraph (); - ipcp_update_profiling (); } /* The IPCP driver. */ + static unsigned int ipcp_driver (void) { + struct cgraph_2edge_hook_list *edge_duplication_hook_holder; + struct topo_info topo; + cgraph_remove_unreachable_nodes (true,dump_file); + ipa_check_create_node_params (); + ipa_check_create_edge_args (); + grow_next_edge_clone_vector (); + edge_duplication_hook_holder = + cgraph_add_edge_duplication_hook (&ipcp_edge_duplication_hook, NULL); + ipcp_values_pool = create_alloc_pool ("IPA-CP values", + sizeof (struct ipcp_value), 32); + ipcp_sources_pool = create_alloc_pool ("IPA-CP value sources", + sizeof (struct ipcp_value_source), 64); if (dump_file) { fprintf (dump_file, "\nIPA structures before propagation:\n"); @@ -1228,37 +2445,48 @@ ipcp_driver (void) ipa_print_all_params (dump_file); ipa_print_all_jump_functions (dump_file); } - /* 2. Do the interprocedural propagation. */ - ipcp_iterate_stage (); - /* 3. Insert the constants found to the functions. */ - ipcp_insert_stage (); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "\nProfiling info after insert stage:\n"); - ipcp_print_profile_data (dump_file); - } + + /* Topological sort. */ + build_toporder_info (&topo); + /* Do the interprocedural propagation. */ + ipcp_propagate_stage (&topo); + /* Decide what constant propagation and cloning should be performed. */ + ipcp_decision_stage (&topo); + /* Free all IPCP structures. */ + free_toporder_info (&topo); + VEC_free (cgraph_edge_p, heap, next_edge_clone); + cgraph_remove_edge_duplication_hook (edge_duplication_hook_holder); ipa_free_all_structures_after_ipa_cp (); if (dump_file) fprintf (dump_file, "\nIPA constant propagation end\n"); return 0; } -/* Note function body size. */ +/* Initialization and computation of IPCP data structures. This is the initial + intraprocedural analysis of functions, which gathers information to be + propagated later on. */ + static void ipcp_generate_summary (void) { + struct cgraph_node *node; + if (dump_file) fprintf (dump_file, "\nIPA constant propagation start:\n"); - ipa_check_create_node_params (); - ipa_check_create_edge_args (); ipa_register_cgraph_hooks (); - /* 1. Call the init stage to initialize - the ipa_node_params and ipa_edge_args structures. */ - ipcp_init_stage (); + + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) + { + /* Unreachable nodes should have been eliminated before ipcp. */ + gcc_assert (node->needed || node->reachable); + node->local.versionable = tree_versionable_function_p (node->decl); + ipa_analyze_node (node); + } } /* Write ipcp summary for nodes in SET. */ + static void ipcp_write_summary (cgraph_node_set set, varpool_node_set vset ATTRIBUTE_UNUSED) @@ -1267,6 +2495,7 @@ ipcp_write_summary (cgraph_node_set set, } /* Read ipcp summary. */ + static void ipcp_read_summary (void) { @@ -1274,10 +2503,13 @@ ipcp_read_summary (void) } /* Gate for IPCP optimization. */ + static bool cgraph_gate_cp (void) { - return flag_ipa_cp; + /* FIXME: We should remove the optimize check after we ensure we never run + IPA passes when not optimizing. */ + return flag_ipa_cp && optimize; } struct ipa_opt_pass_d pass_ipa_cp = @@ -1295,7 +2527,7 @@ struct ipa_opt_pass_d pass_ipa_cp = 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_dump_cgraph | TODO_dump_func | + TODO_dump_cgraph | TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */ }, ipcp_generate_summary, /* generate_summary */