X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fipa-cp.c;h=f4aab5d2d8997fb093e9a95c7c2e419650f7a725;hb=1577e1c17a0e33424963d1dee28d9f819522eab6;hp=a129a74c7ffd3f510bcaf4927b43d57179a9b5e2;hpb=0acacf9e39c2885869ed8cea6b583626b83d13ee;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index a129a74c7ff..f4aab5d2d89 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -1,19 +1,20 @@ /* Interprocedural constant propagation - Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. Contributed by Razya Ladelsky - + This file is part of GCC. - + GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. - + GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ @@ -27,7 +28,7 @@ along with GCC; see the file COPYING3. If not see { printf ("value is %d",y); } - + int f (int x) { g (x); @@ -43,32 +44,32 @@ along with GCC; see the file COPYING3. If not see f (3); h (3); } - - + + The IPCP algorithm will find that g's formal argument y is always called with the value 3. The algorithm used is based on "Interprocedural Constant Propagation", by Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg 152-161 - + The optimization is divided into three stages: First stage - intraprocedural analysis ======================================= This phase computes jump_function and modification flags. - + A jump function for a callsite represents the values passed as an actual arguments of a given callsite. There are three types of values: Pass through - the caller's formal parameter is passed as an actual argument. Constant - a constant is passed as an actual argument. Unknown - neither of the above. - + The jump function info, ipa_jump_func, is stored in ipa_edge_args structure (defined in ipa_prop.h and pointed to by cgraph_node->aux) modified_flags are defined in ipa_node_params structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). - + -ipcp_init_stage() is the first stage driver. Second stage - interprocedural analysis @@ -79,10 +80,10 @@ along with GCC; see the file COPYING3. If not see TOP - unknown. BOTTOM - non constant. CONSTANT - constant value. - + Lattice describing a formal parameter p will have a constant value if all callsites invoking this function have the same constant value passed to p. - + The lattices are stored in ipcp_lattice which is itself in ipa_node_params structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). @@ -115,7 +116,7 @@ along with GCC; see the file COPYING3. If not see and many calls redirected back to fit the description above. -ipcp_insert_stage() is the third phase driver. - + */ #include "config.h" @@ -132,6 +133,23 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "tree-dump.h" #include "tree-inline.h" +#include "fibheap.h" +#include "params.h" + +/* Number of functions identified as candidates for cloning. When not cloning + we can simplify iterate stage not forcing it to go through the decision + on what is profitable and what not. */ +static int n_cloning_candidates; + +/* Maximal count found in program. */ +static gcov_type max_count; + +/* Cgraph nodes that has been completely replaced by cloning during iterate + * stage and will be removed after ipcp is finished. */ +static bitmap dead_nodes; + +static void ipcp_print_profile_data (FILE *); +static void ipcp_function_scale_print (FILE *); /* Get the original node field of ipa_node_params associated with node NODE. */ static inline struct cgraph_node * @@ -154,9 +172,20 @@ ipcp_init_cloned_node (struct cgraph_node *orig_node, struct cgraph_node *new_node) { ipa_check_create_node_params (); + ipa_initialize_node_params (new_node); IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node; - ipa_count_formal_params (new_node); - ipa_create_param_decls_array (new_node); +} + +/* Perform intraprocedrual analysis needed for ipcp. */ +static void +ipcp_analyze_node (struct cgraph_node *node) +{ + /* Unreachable nodes should have been eliminated before ipcp. */ + gcc_assert (node->needed || node->reachable); + + node->local.versionable = tree_versionable_function_p (node->decl); + ipa_initialize_node_params (node); + ipa_detect_param_modifications (node); } /* Return scale for NODE. */ @@ -177,7 +206,7 @@ ipcp_set_node_scale (struct cgraph_node *node, gcov_type count) static inline bool ipcp_lat_is_const (struct ipcp_lattice *lat) { - if (lat->type == IPA_CONST_VALUE || lat->type == IPA_CONST_VALUE_REF) + if (lat->type == IPA_CONST_VALUE) return true; else return false; @@ -188,11 +217,7 @@ ipcp_lat_is_const (struct ipcp_lattice *lat) static inline bool ipcp_lat_is_insertable (struct ipcp_lattice *lat) { - if ((lat->type == IPA_CONST_VALUE || lat->type == IPA_CONST_VALUE_REF) - && !POINTER_TYPE_P (TREE_TYPE (lat->constant))) - return true; - else - return false; + return lat->type == IPA_CONST_VALUE; } /* Return true if LAT1 and LAT2 are equal. */ @@ -203,10 +228,14 @@ ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2) if (lat1->type != lat2->type) return false; - if (operand_equal_p (lat1->constant, lat2->constant, 0)) - return true; - - return false; + if (TREE_CODE (lat1->constant) == ADDR_EXPR + && TREE_CODE (lat2->constant) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL + && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL) + return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)), + DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0); + else + return operand_equal_p (lat1->constant, lat2->constant, 0); } /* Compute Meet arithmetics: @@ -247,9 +276,9 @@ ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, /* Return the lattice corresponding to the Ith formal parameter of the function described by INFO. */ static inline struct ipcp_lattice * -ipcp_get_ith_lattice (struct ipa_node_params *info, int i) +ipcp_get_lattice (struct ipa_node_params *info, int i) { - return &(info->ipcp_lattices[i]); + return &(info->params[i].ipcp_lattice); } /* Given the jump function JFUNC, compute the lattice LAT that describes the @@ -259,23 +288,64 @@ static void ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, struct ipa_jump_func *jfunc) { - if (jfunc->type == IPA_CONST) + if (jfunc->type == IPA_JF_CONST) { lat->type = IPA_CONST_VALUE; lat->constant = jfunc->value.constant; } - else if (jfunc->type == IPA_CONST_REF) + else if (jfunc->type == IPA_JF_PASS_THROUGH) { - lat->type = IPA_CONST_VALUE_REF; - lat->constant = jfunc->value.constant; + struct ipcp_lattice *caller_lat; + tree cst; + + caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id); + lat->type = caller_lat->type; + if (caller_lat->type != IPA_CONST_VALUE) + return; + cst = caller_lat->constant; + + if (jfunc->value.pass_through.operation != NOP_EXPR) + { + tree restype; + if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) + == tcc_comparison) + restype = boolean_type_node; + else + restype = TREE_TYPE (cst); + cst = fold_binary (jfunc->value.pass_through.operation, + restype, cst, jfunc->value.pass_through.operand); + } + if (!cst || !is_gimple_ip_invariant (cst)) + lat->type = IPA_BOTTOM; + lat->constant = cst; } - else if (jfunc->type == IPA_PASS_THROUGH) + else if (jfunc->type == IPA_JF_ANCESTOR) { struct ipcp_lattice *caller_lat; + tree t; + bool ok; - caller_lat = ipcp_get_ith_lattice (info, jfunc->value.formal_id); + caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id); lat->type = caller_lat->type; - lat->constant = caller_lat->constant; + if (caller_lat->type != IPA_CONST_VALUE) + return; + if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) + { + /* This can happen when the constant is a NULL pointer. */ + lat->type = IPA_BOTTOM; + return; + } + t = TREE_OPERAND (caller_lat->constant, 0); + ok = build_ref_for_offset (&t, TREE_TYPE (t), + jfunc->value.ancestor.offset, + jfunc->value.ancestor.type, false); + if (!ok) + { + lat->type = IPA_BOTTOM; + lat->constant = NULL_TREE; + } + else + lat->constant = build_fold_addr_expr (t); } else lat->type = IPA_BOTTOM; @@ -304,7 +374,7 @@ ipcp_print_all_lattices (FILE * f) struct cgraph_node *node; int i, count; - fprintf (f, "\nLATTICE PRINT\n"); + fprintf (f, "\nLattice:\n"); for (node = cgraph_nodes; node; node = node->next) { struct ipa_node_params *info; @@ -312,17 +382,25 @@ ipcp_print_all_lattices (FILE * f) if (!node->analyzed) continue; info = IPA_NODE_REF (node); - fprintf (f, "Printing lattices %s:\n", cgraph_node_name (node)); + fprintf (f, " Node: %s:\n", cgraph_node_name (node)); count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); - fprintf (f, " param [%d]: ", i); - if (lat->type == IPA_CONST_VALUE || lat->type == IPA_CONST_VALUE_REF) + fprintf (f, " param [%d]: ", i); + if (lat->type == IPA_CONST_VALUE) { + tree cst = lat->constant; fprintf (f, "type is CONST "); - print_generic_expr (f, lat->constant, 0); + print_generic_expr (f, cst, 0); + if (TREE_CODE (cst) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL) + { + fprintf (f, " -> "); + print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)), + 0); + } fprintf (f, "\n"); } else if (lat->type == IPA_TOP) @@ -333,43 +411,149 @@ ipcp_print_all_lattices (FILE * f) } } -/* Initialize ipcp_lattices array. The lattices corresponding to supported - types (integers, real types and Fortran constants defined as const_decls) - are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ -static void -ipcp_initialize_node_lattices (struct cgraph_node *node) +/* Return true if ipcp algorithms would allow cloning NODE. */ + +static bool +ipcp_versionable_function_p (struct cgraph_node *node) { - int i; - struct ipa_node_params *info = IPA_NODE_REF (node); + struct cgraph_edge *edge; - info->ipcp_lattices = XCNEWVEC (struct ipcp_lattice, - ipa_get_param_count (info)); - for (i = 0; i < ipa_get_param_count (info) ; i++) + /* There are a number of generic reasons functions cannot be versioned. */ + if (!node->local.versionable) + return false; + + /* Removing arguments doesn't work if the function takes varargs + or use __builtin_apply_args. */ + for (edge = node->callees; edge; edge = edge->next_callee) { - tree parm_tree = ipa_get_ith_param (info, i); - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); + tree t = edge->callee->decl; + if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL + && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS + || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START)) + return false; + } - if (INTEGRAL_TYPE_P (TREE_TYPE (parm_tree)) - || SCALAR_FLOAT_TYPE_P (TREE_TYPE (parm_tree)) - || POINTER_TYPE_P (TREE_TYPE (parm_tree))) - lat->type = IPA_TOP; - else - lat->type = IPA_BOTTOM; + return true; +} + +/* Return true if this NODE is viable candidate for cloning. */ +static bool +ipcp_cloning_candidate_p (struct cgraph_node *node) +{ + int n_calls = 0; + int n_hot_calls = 0; + gcov_type direct_call_sum = 0; + struct cgraph_edge *e; + + /* We never clone functions that are not visible from outside. + FIXME: in future we should clone such functions when they are called with + different constants, but current ipcp implementation is not good on this. + */ + if (cgraph_only_called_directly_p (node) || !node->analyzed) + return false; + + if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n", + cgraph_node_name (node)); + return false; + } + if (!ipcp_versionable_function_p (node)) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n", + cgraph_node_name (node)); + return false; + } + for (e = node->callers; e; e = e->next_caller) + { + direct_call_sum += e->count; + n_calls ++; + if (cgraph_maybe_hot_edge_p (e)) + n_hot_calls ++; + } + + if (!n_calls) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n", + cgraph_node_name (node)); + return false; + } + if (node->local.inline_summary.self_size < n_calls) + { + if (dump_file) + fprintf (dump_file, "Considering %s for cloning; code would shrink.\n", + cgraph_node_name (node)); + return true; + } + + if (!flag_ipa_cp_clone) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n", + cgraph_node_name (node)); + return false; + } + + if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n", + cgraph_node_name (node)); + return false; + } + + /* When profile is available and function is hot, propagate into it even if + calls seems cold; constant propagation can improve function's speed + significandly. */ + if (max_count) + { + if (direct_call_sum > node->count * 90 / 100) + { + if (dump_file) + fprintf (dump_file, "Considering %s for cloning; usually called directly.\n", + cgraph_node_name (node)); + return true; + } } + if (!n_hot_calls) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", + cgraph_node_name (node)); + return false; + } + if (dump_file) + fprintf (dump_file, "Considering %s for cloning.\n", + cgraph_node_name (node)); + return true; } -/* Create a new assignment statement and make it the first statement in the - function. PARM1 is the lhs of the assignment and VAL is the rhs. */ +/* Initialize ipcp_lattices array. The lattices corresponding to supported + types (integers, real types and Fortran constants defined as const_decls) + are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ static void -constant_val_insert (tree parm1 ATTRIBUTE_UNUSED, tree val ATTRIBUTE_UNUSED) +ipcp_initialize_node_lattices (struct cgraph_node *node) { - gimple init_stmt = NULL; - edge e_step; + int i; + struct ipa_node_params *info = IPA_NODE_REF (node); + enum ipa_lattice_type type; + + if (ipa_is_called_with_var_arguments (info)) + type = IPA_BOTTOM; + else if (cgraph_only_called_directly_p (node)) + type = IPA_TOP; + /* When cloning is allowed, we can assume that externally visible functions + are not called. We will compensate this by cloning later. */ + else if (ipcp_cloning_candidate_p (node)) + type = IPA_TOP, n_cloning_candidates ++; + else + type = IPA_BOTTOM; - init_stmt = gimple_build_assign (parm1, val); - gcc_assert (init_stmt); - e_step = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)); - gsi_insert_on_edge_immediate (e_step, init_stmt); + for (i = 0; i < ipa_get_param_count (info) ; i++) + ipcp_get_lattice (info, i)->type = type; } /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT. @@ -377,31 +561,30 @@ constant_val_insert (tree parm1 ATTRIBUTE_UNUSED, tree val ATTRIBUTE_UNUSED) static tree build_const_val (struct ipcp_lattice *lat, tree tree_type) { - tree const_val = NULL; + tree val; gcc_assert (ipcp_lat_is_const (lat)); - const_val = fold_convert (tree_type, lat->constant); - return const_val; -} + val = lat->constant; -/* Build the tree representing the constant and call constant_val_insert(). */ -static void -ipcp_propagate_one_const (struct cgraph_node *node, int param, - struct ipcp_lattice *lat) -{ - tree const_val; - tree parm_tree; - - if (dump_file) - fprintf (dump_file, "propagating const to %s\n", cgraph_node_name (node)); - parm_tree = ipa_get_ith_param (IPA_NODE_REF (node), param); - const_val = build_const_val (lat, TREE_TYPE (parm_tree)); - constant_val_insert (parm_tree, const_val); + if (!useless_type_conversion_p (tree_type, TREE_TYPE (val))) + { + if (fold_convertible_p (tree_type, val)) + return fold_build1 (NOP_EXPR, tree_type, val); + else + return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val); + } + return val; } /* Compute the proper scale for NODE. It is the ratio between the number of direct calls (represented on the incoming cgraph_edges) and sum of all - invocations of NODE (represented as count in cgraph_node). */ + invocations of NODE (represented as count in cgraph_node). + + FIXME: This code is wrong. Since the callers can be also clones and + the clones are not scaled yet, the sums gets unrealistically high. + To properly compute the counts, we would need to do propagation across + callgraph (as external call to A might imply call to non-clonned B + if A's clone calls clonned B). */ static void ipcp_compute_node_scale (struct cgraph_node *node) { @@ -412,6 +595,12 @@ ipcp_compute_node_scale (struct cgraph_node *node) /* Compute sum of all counts of callers. */ for (cs = node->callers; cs != NULL; cs = cs->next_caller) sum += cs->count; + /* Work around the unrealistically high sum problem. We just don't want + the non-cloned body to have negative or very low frequency. Since + majority of execution time will be spent in clones anyway, this should + give good enough profile. */ + if (sum > node->count * 9 / 10) + sum = node->count * 9 / 10; if (node->count == 0) ipcp_set_node_scale (node, 0); else @@ -428,18 +617,8 @@ ipcp_init_stage (void) struct cgraph_edge *cs; for (node = cgraph_nodes; node; node = node->next) - { - if (!node->analyzed) - continue; - /* Unreachable nodes should have been eliminated before ipcp. */ - gcc_assert (node->needed || node->reachable); - - ipa_count_formal_params (node); - ipa_create_param_decls_array (node); - ipcp_initialize_node_lattices (node); - ipa_detect_param_modifications (node); - ipcp_compute_node_scale (node); - } + if (node->analyzed) + ipcp_analyze_node (node); for (node = cgraph_nodes; node; node = node->next) { if (!node->analyzed) @@ -447,18 +626,15 @@ ipcp_init_stage (void) /* building jump functions */ for (cs = node->callees; cs; cs = cs->next_callee) { - if (!cs->callee->analyzed) + /* We do not need to bother analyzing calls to unknown + functions unless they may become known during lto/whopr. */ + if (!cs->callee->analyzed && !flag_lto && !flag_whopr) continue; ipa_count_arguments (cs); if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) != ipa_get_param_count (IPA_NODE_REF (cs->callee))) - { - /* Handle cases of functions with - a variable number of parameters. */ - ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee)); - } - else - ipa_compute_jump_functions (cs); + ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee)); + ipa_compute_jump_functions (cs); } } } @@ -480,10 +656,17 @@ ipcp_change_tops_to_bottom (void) count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); if (lat->type == IPA_TOP) { prop_again = true; + if (dump_file) + { + fprintf (dump_file, "Forcing param "); + print_generic_expr (dump_file, ipa_get_param (info, i), 0); + fprintf (dump_file, " of node %s to bottom.\n", + cgraph_node_name (node)); + } lat->type = IPA_BOTTOM; } } @@ -507,6 +690,7 @@ ipcp_propagate_stage (void) ipa_check_create_node_params (); ipa_check_create_edge_args (); + /* Initialize worklist to contain all functions. */ wl = ipa_init_func_list (); while (wl) @@ -519,7 +703,9 @@ ipcp_propagate_stage (void) struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee); struct ipa_edge_args *args = IPA_EDGE_REF (cs); - if (ipa_is_called_with_var_arguments (callee_info)) + if (ipa_is_called_with_var_arguments (callee_info) + || !cs->callee->analyzed + || ipa_is_called_with_var_arguments (callee_info)) continue; count = ipa_get_cs_argument_count (args); @@ -527,7 +713,7 @@ ipcp_propagate_stage (void) { jump_func = ipa_get_ith_jump_func (args, i); ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); - dest_lat = ipcp_get_ith_lattice (callee_info, i); + dest_lat = ipcp_get_lattice (callee_info, i); ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); if (ipcp_lattice_changed (&new_lat, dest_lat)) { @@ -545,22 +731,51 @@ ipcp_propagate_stage (void) static void ipcp_iterate_stage (void) { + struct cgraph_node *node; + n_cloning_candidates = 0; + + if (dump_file) + fprintf (dump_file, "\nIPA iterate stage:\n\n"); + + if (in_lto_p) + ipa_update_after_lto_read (); + + for (node = cgraph_nodes; node; node = node->next) + { + ipcp_initialize_node_lattices (node); + ipcp_compute_node_scale (node); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + ipcp_print_all_lattices (dump_file); + ipcp_function_scale_print (dump_file); + } + ipcp_propagate_stage (); if (ipcp_change_tops_to_bottom ()) /* Some lattices have changed from IPA_TOP to IPA_BOTTOM. This change should be propagated. */ - ipcp_propagate_stage (); + { + gcc_assert (n_cloning_candidates); + ipcp_propagate_stage (); + } + if (dump_file) + { + fprintf (dump_file, "\nIPA lattices after propagation:\n"); + ipcp_print_all_lattices (dump_file); + if (dump_flags & TDF_DETAILS) + ipcp_print_profile_data (dump_file); + } } /* Check conditions to forbid constant insertion to function described by NODE. */ static inline bool -ipcp_node_not_modifiable_p (struct cgraph_node *node) +ipcp_node_modifiable_p (struct cgraph_node *node) { - /* ??? Handle pending sizes case. */ - if (DECL_UNINLINABLE (node->decl)) - return true; - return false; + /* Once we will be able to do in-place replacement, we can be more + lax here. */ + return ipcp_versionable_function_p (node); } /* Print count scale data structures. */ @@ -612,109 +827,6 @@ ipcp_print_call_profile_counts (FILE * f) } } -/* Print all counts and probabilities of cfg edges of all functions. */ -static void -ipcp_print_edge_profiles (FILE * f) -{ - struct cgraph_node *node; - basic_block bb; - edge_iterator ei; - edge e; - - for (node = cgraph_nodes; node; node = node->next) - { - fprintf (f, "function %s: \n", cgraph_node_name (node)); - if (node->analyzed) - { - bb = - ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); - fprintf (f, "ENTRY: "); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency); - - if (bb->succs) - FOR_EACH_EDGE (e, ei, bb->succs) - { - if (e->dest == - EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION - (node->decl))) - fprintf (f, "edge ENTRY -> EXIT, Count"); - else - fprintf (f, "edge ENTRY -> %d, Count", e->dest->index); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " Prob %d\n", (HOST_WIDE_INT) e->count, - e->probability); - } - FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - { - fprintf (f, "bb[%d]: ", bb->index); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency); - FOR_EACH_EDGE (e, ei, bb->succs) - { - if (e->dest == - EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION - (node->decl))) - fprintf (f, "edge %d -> EXIT, Count", e->src->index); - else - fprintf (f, "edge %d -> %d, Count", e->src->index, - e->dest->index); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC " Prob %d\n", - (HOST_WIDE_INT) e->count, e->probability); - } - } - } - } -} - -/* Print counts and frequencies for all basic blocks of all functions. */ -static void -ipcp_print_bb_profiles (FILE * f) -{ - basic_block bb; - struct cgraph_node *node; - - for (node = cgraph_nodes; node; node = node->next) - { - fprintf (f, "function %s: \n", cgraph_node_name (node)); - if (node->analyzed) - { - bb = - ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); - fprintf (f, "ENTRY: Count"); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " Frequency %d\n", (HOST_WIDE_INT) bb->count, - bb->frequency); - - FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - { - fprintf (f, "bb[%d]: Count", bb->index); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " Frequency %d\n", (HOST_WIDE_INT) bb->count, - bb->frequency); - } - bb = - EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); - fprintf (f, "EXIT: Count"); - fprintf (f, " " HOST_WIDE_INT_PRINT_DEC - " Frequency %d\n", (HOST_WIDE_INT) bb->count, - bb->frequency); - - } - } -} - -/* Print all IPCP data structures to F. */ -static void -ipcp_print_all_structures (FILE * f) -{ - ipcp_print_all_lattices (f); - ipcp_function_scale_print (f); - ipa_print_all_tree_maps (f); - ipa_print_all_param_flags (f); - ipa_print_all_jump_functions (f); -} - /* Print profile info for all functions. */ static void ipcp_print_profile_data (FILE * f) @@ -723,10 +835,6 @@ ipcp_print_profile_data (FILE * f) ipcp_print_func_profile_counts (f); fprintf (f, "\nCS COUNTS stage:\n"); ipcp_print_call_profile_counts (f); - fprintf (f, "\nBB COUNTS and FREQUENCIES :\n"); - ipcp_print_bb_profiles (f); - fprintf (f, "\nCFG EDGES COUNTS and PROBABILITIES :\n"); - ipcp_print_edge_profiles (f); } /* Build and initialize ipa_replace_map struct according to LAT. This struct is @@ -734,32 +842,25 @@ ipcp_print_profile_data (FILE * f) PARM_TREE is the formal parameter found to be constant. LAT represents the constant. */ static struct ipa_replace_map * -ipcp_create_replace_map (struct function *func, tree parm_tree, - struct ipcp_lattice *lat) +ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat) { struct ipa_replace_map *replace_map; tree const_val; - replace_map = XCNEW (struct ipa_replace_map); - if (is_gimple_reg (parm_tree) && gimple_default_def (func, parm_tree) - && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_default_def (func, - parm_tree))) - { - if (dump_file) - fprintf (dump_file, "replacing param with const\n"); - const_val = build_const_val (lat, TREE_TYPE (parm_tree)); - replace_map->old_tree =gimple_default_def (func, parm_tree); - replace_map->new_tree = const_val; - replace_map->replace_p = true; - replace_map->ref_p = false; - } - else + replace_map = GGC_NEW (struct ipa_replace_map); + const_val = build_const_val (lat, TREE_TYPE (parm_tree)); + if (dump_file) { - replace_map->old_tree = NULL; - replace_map->new_tree = NULL; - replace_map->replace_p = false; - replace_map->ref_p = false; + fprintf (dump_file, " replacing param "); + print_generic_expr (dump_file, parm_tree, 0); + fprintf (dump_file, " with const "); + print_generic_expr (dump_file, const_val, 0); + fprintf (dump_file, "\n"); } + replace_map->old_tree = parm_tree; + replace_map->new_tree = const_val; + replace_map->replace_p = true; + replace_map->ref_p = false; return replace_map; } @@ -772,16 +873,25 @@ ipcp_need_redirect_p (struct cgraph_edge *cs) struct ipa_node_params *orig_callee_info; int i, count; struct ipa_jump_func *jump_func; + struct cgraph_node *node = cs->callee, *orig; - orig_callee_info = IPA_NODE_REF (ipcp_get_orig_node (cs->callee)); + if (!n_cloning_candidates) + return false; + + if ((orig = ipcp_get_orig_node (node)) != NULL) + node = orig; + if (ipcp_get_orig_node (cs->caller)) + return false; + + orig_callee_info = IPA_NODE_REF (node); count = ipa_get_param_count (orig_callee_info); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (orig_callee_info, i); + struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); if (ipcp_lat_is_const (lat)) { jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); - if (!ipcp_lat_is_const (lat)) + if (jump_func->type != IPA_JF_CONST) return true; } } @@ -793,49 +903,38 @@ ipcp_need_redirect_p (struct cgraph_edge *cs) static void ipcp_update_callgraph (void) { - struct cgraph_node *node, *orig_callee; - struct cgraph_edge *cs; + struct cgraph_node *node; for (node = cgraph_nodes; node; node = node->next) - { - /* want to fix only original nodes */ - if (!node->analyzed || ipcp_node_is_clone (node)) - continue; - for (cs = node->callees; cs; cs = cs->next_callee) - if (ipcp_node_is_clone (cs->callee)) + if (node->analyzed && ipcp_node_is_clone (node)) + { + bitmap args_to_skip = BITMAP_ALLOC (NULL); + struct cgraph_node *orig_node = ipcp_get_orig_node (node); + struct ipa_node_params *info = IPA_NODE_REF (orig_node); + int i, count = ipa_get_param_count (info); + struct cgraph_edge *cs, *next; + + for (i = 0; i < count; i++) { - /* Callee is a cloned node */ - orig_callee = ipcp_get_orig_node (cs->callee); - if (ipcp_need_redirect_p (cs)) + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (!ipa_is_param_used (info, i)) { - cgraph_redirect_edge_callee (cs, orig_callee); - gimple_call_set_fndecl (cs->call_stmt, orig_callee->decl); + bitmap_set_bit (args_to_skip, i); + continue; } - } - } -} - -/* Update all cfg basic blocks in NODE according to SCALE. */ -static void -ipcp_update_bb_counts (struct cgraph_node *node, gcov_type scale) -{ - basic_block bb; - - FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - bb->count = bb->count * scale / REG_BR_PROB_BASE; -} - -/* Update all cfg edges in NODE according to SCALE. */ -static void -ipcp_update_edges_counts (struct cgraph_node *node, gcov_type scale) -{ - basic_block bb; - edge_iterator ei; - edge e; - FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) - FOR_EACH_EDGE (e, ei, bb->succs) - e->count = e->count * scale / REG_BR_PROB_BASE; + if (lat->type == IPA_CONST_VALUE) + bitmap_set_bit (args_to_skip, i); + } + for (cs = node->callers; cs; cs = next) + { + next = cs->next_caller; + if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs)) + cgraph_redirect_edge_callee (cs, orig_node); + } + } } /* Update profiling info for versioned functions and the functions they were @@ -861,64 +960,229 @@ ipcp_update_profiling (void) cs->count = cs->count * scale / REG_BR_PROB_BASE; for (cs = orig_node->callees; cs; cs = cs->next_callee) cs->count = cs->count * scale_complement / REG_BR_PROB_BASE; - ipcp_update_bb_counts (node, scale); - ipcp_update_bb_counts (orig_node, scale_complement); - ipcp_update_edges_counts (node, scale); - ipcp_update_edges_counts (orig_node, scale_complement); } } } +/* If NODE was cloned, how much would program grow? */ +static long +ipcp_estimate_growth (struct cgraph_node *node) +{ + struct cgraph_edge *cs; + int redirectable_node_callers = 0; + int removable_args = 0; + bool need_original = !cgraph_only_called_directly_p (node); + struct ipa_node_params *info; + int i, count; + int growth; + + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + if (cs->caller == node || !ipcp_need_redirect_p (cs)) + redirectable_node_callers++; + else + need_original = true; + + /* If we will be able to fully replace orignal node, we never increase + program size. */ + if (!need_original) + return 0; + + info = IPA_NODE_REF (node); + count = ipa_get_param_count (info); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (!ipa_is_param_used (info, i)) + removable_args++; + + if (lat->type == IPA_CONST_VALUE) + removable_args++; + } + + /* We make just very simple estimate of savings for removal of operand from + call site. Precise cost is dificult to get, as our size metric counts + constants and moves as free. Generally we are looking for cases that + small function is called very many times. */ + growth = node->local.inline_summary.self_size + - removable_args * redirectable_node_callers; + if (growth < 0) + return 0; + return growth; +} + + +/* Estimate cost of cloning NODE. */ +static long +ipcp_estimate_cloning_cost (struct cgraph_node *node) +{ + int freq_sum = 1; + gcov_type count_sum = 1; + struct cgraph_edge *e; + int cost; + + cost = ipcp_estimate_growth (node) * 1000; + if (!cost) + { + if (dump_file) + fprintf (dump_file, "Versioning of %s will save code size\n", + cgraph_node_name (node)); + return 0; + } + + for (e = node->callers; e; e = e->next_caller) + if (!bitmap_bit_p (dead_nodes, e->caller->uid) + && !ipcp_need_redirect_p (e)) + { + count_sum += e->count; + freq_sum += e->frequency + 1; + } + + if (max_count) + cost /= count_sum * 1000 / max_count + 1; + else + cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; + if (dump_file) + fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", + cgraph_node_name (node), cost, node->local.inline_summary.self_size, + freq_sum); + return cost + 1; +} + +/* Return number of live constant parameters. */ +static int +ipcp_const_param_count (struct cgraph_node *node) +{ + int const_param = 0; + struct ipa_node_params *info = IPA_NODE_REF (node); + int count = ipa_get_param_count (info); + int i; + + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + if (ipcp_lat_is_insertable (lat) + /* Do not count obviously unused arguments. */ + && ipa_is_param_used (info, i)) + const_param++; + } + return const_param; +} + /* Propagate the constant parameters found by ipcp_iterate_stage() to the function's code. */ static void ipcp_insert_stage (void) { struct cgraph_node *node, *node1 = NULL; - int i, const_param; + int i; VEC (cgraph_edge_p, heap) * redirect_callers; - varray_type replace_trees; - struct cgraph_edge *cs; + VEC (ipa_replace_map_p,gc)* replace_trees; int node_callers, count; tree parm_tree; struct ipa_replace_map *replace_param; + fibheap_t heap; + long overall_size = 0, new_size = 0; + long max_new_size; ipa_check_create_node_params (); ipa_check_create_edge_args (); + if (dump_file) + fprintf (dump_file, "\nIPA insert stage:\n\n"); + + dead_nodes = BITMAP_ALLOC (NULL); for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed) + { + if (node->count > max_count) + max_count = node->count; + overall_size += node->local.inline_summary.self_size; + } + + max_new_size = overall_size; + if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) + max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); + max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; + + /* First collect all functions we proved to have constant arguments to heap. */ + heap = fibheap_new (); + for (node = cgraph_nodes; node; node = node->next) { struct ipa_node_params *info; /* Propagation of the constant is forbidden in certain conditions. */ - if (!node->analyzed || ipcp_node_not_modifiable_p (node)) + if (!node->analyzed || !ipcp_node_modifiable_p (node)) continue; info = IPA_NODE_REF (node); if (ipa_is_called_with_var_arguments (info)) continue; - const_param = 0; - count = ipa_get_param_count (info); - for (i = 0; i < count; i++) + if (ipcp_const_param_count (node)) + node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node); + } + + /* Now clone in priority order until code size growth limits are met or + heap is emptied. */ + while (!fibheap_empty (heap)) + { + struct ipa_node_params *info; + int growth = 0; + bitmap args_to_skip; + struct cgraph_edge *cs; + + node = (struct cgraph_node *)fibheap_extract_min (heap); + node->aux = NULL; + if (dump_file) + fprintf (dump_file, "considering function %s\n", + cgraph_node_name (node)); + + growth = ipcp_estimate_growth (node); + + if (new_size + growth > max_new_size) + break; + if (growth + && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))) { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); - if (ipcp_lat_is_insertable (lat)) - const_param++; + if (dump_file) + fprintf (dump_file, "Not versioning, cold code would grow"); + continue; } - if (const_param == 0) - continue; - VARRAY_GENERIC_PTR_INIT (replace_trees, const_param, "replace_trees"); + + new_size += growth; + + /* Look if original function becomes dead after clonning. */ + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + if (cs->caller == node || ipcp_need_redirect_p (cs)) + break; + if (!cs && cgraph_only_called_directly_p (node)) + bitmap_set_bit (dead_nodes, node->uid); + + info = IPA_NODE_REF (node); + count = ipa_get_param_count (info); + + replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1); + args_to_skip = BITMAP_GGC_ALLOC (); for (i = 0; i < count; i++) { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); - if (lat->type == IPA_CONST_VALUE - && !POINTER_TYPE_P (TREE_TYPE (lat->constant))) + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + parm_tree = ipa_get_param (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (!ipa_is_param_used (info, i)) + { + bitmap_set_bit (args_to_skip, i); + continue; + } + + if (lat->type == IPA_CONST_VALUE) { - parm_tree = ipa_get_ith_param (info, i); replace_param = - ipcp_create_replace_map (DECL_STRUCT_FUNCTION (node->decl), - parm_tree, lat); - VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param); + ipcp_create_replace_map (parm_tree, lat); + VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param); + bitmap_set_bit (args_to_skip, i); } } + /* Compute how many callers node has. */ node_callers = 0; for (cs = node->callers; cs != NULL; cs = cs->next_caller) @@ -926,50 +1190,48 @@ ipcp_insert_stage (void) redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); for (cs = node->callers; cs != NULL; cs = cs->next_caller) VEC_quick_push (cgraph_edge_p, redirect_callers, cs); + /* Redirecting all the callers of the node to the new versioned node. */ node1 = - cgraph_function_versioning (node, redirect_callers, replace_trees); + cgraph_create_virtual_clone (node, redirect_callers, replace_trees, + args_to_skip); + args_to_skip = NULL; VEC_free (cgraph_edge_p, heap, redirect_callers); - VARRAY_CLEAR (replace_trees); + replace_trees = NULL; + if (node1 == NULL) continue; if (dump_file) - fprintf (dump_file, "versioned function %s\n", - cgraph_node_name (node)); + fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", + cgraph_node_name (node), (int)growth, (int)new_size); ipcp_init_cloned_node (node, node1); - if (const_param > 0) - { - push_cfun (DECL_STRUCT_FUNCTION (node1->decl)); - gimple_register_cfg_hooks (); - current_function_decl = node1->decl; - for (i = 0; i < count; i++) - { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); - if (ipcp_lat_is_insertable (lat)) - { - parm_tree = ipa_get_ith_param (info, i); - if (lat->type != IPA_CONST_VALUE_REF - && !is_gimple_reg (parm_tree)) - ipcp_propagate_one_const (node1, i, lat); - } - } - if (gimple_in_ssa_p (cfun)) - { - update_ssa (TODO_update_ssa); -#ifdef ENABLE_CHECKING - verify_ssa (true); -#endif - } - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); - pop_cfun (); - current_function_decl = NULL; - } + /* TODO: We can use indirect inlning info to produce new calls. */ + if (dump_file) dump_function_to_file (node1->decl, dump_file, dump_flags); + + for (cs = node->callees; cs; cs = cs->next_callee) + if (cs->callee->aux) + { + fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); + cs->callee->aux = fibheap_insert (heap, + ipcp_estimate_cloning_cost (cs->callee), + cs->callee); + } } + + while (!fibheap_empty (heap)) + { + if (dump_file) + fprintf (dump_file, "skipping function %s\n", + cgraph_node_name (node)); + node = (struct cgraph_node *) fibheap_extract_min (heap); + node->aux = NULL; + } + fibheap_delete (heap); + BITMAP_FREE (dead_nodes); ipcp_update_callgraph (); ipcp_update_profiling (); } @@ -978,43 +1240,59 @@ ipcp_insert_stage (void) static unsigned int ipcp_driver (void) { - if (dump_file) - fprintf (dump_file, "\nIPA constant propagation start:\n"); - ipa_check_create_node_params (); - ipa_check_create_edge_args (); - ipa_register_cgraph_hooks (); - /* 1. Call the init stage to initialize - the ipa_node_params and ipa_edge_args structures. */ - ipcp_init_stage (); + cgraph_remove_unreachable_nodes (true,dump_file); if (dump_file) { fprintf (dump_file, "\nIPA structures before propagation:\n"); - ipcp_print_all_structures (dump_file); + if (dump_flags & TDF_DETAILS) + ipa_print_all_params (dump_file); + ipa_print_all_jump_functions (dump_file); } /* 2. Do the interprocedural propagation. */ ipcp_iterate_stage (); - if (dump_file) - { - fprintf (dump_file, "\nIPA structures after propagation:\n"); - ipcp_print_all_structures (dump_file); - fprintf (dump_file, "\nProfiling info before insert stage:\n"); - ipcp_print_profile_data (dump_file); - } /* 3. Insert the constants found to the functions. */ ipcp_insert_stage (); - if (dump_file) + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "\nProfiling info after insert stage:\n"); ipcp_print_profile_data (dump_file); } /* Free all IPCP structures. */ - free_all_ipa_structures_after_ipa_cp (); + ipa_free_all_structures_after_ipa_cp (); if (dump_file) fprintf (dump_file, "\nIPA constant propagation end\n"); - cgraph_remove_unreachable_nodes (true, NULL); return 0; } +/* Note function body size. */ +static void +ipcp_generate_summary (void) +{ + if (dump_file) + fprintf (dump_file, "\nIPA constant propagation start:\n"); + ipa_check_create_node_params (); + ipa_check_create_edge_args (); + ipa_register_cgraph_hooks (); + /* 1. Call the init stage to initialize + the ipa_node_params and ipa_edge_args structures. */ + ipcp_init_stage (); +} + +/* Write ipcp summary for nodes in SET. */ +static void +ipcp_write_summary (cgraph_node_set set, + varpool_node_set vset ATTRIBUTE_UNUSED) +{ + ipa_prop_write_jump_functions (set); +} + +/* Read ipcp summary. */ +static void +ipcp_read_summary (void) +{ + ipa_prop_read_jump_functions (); +} + /* Gate for IPCP optimization. */ static bool cgraph_gate_cp (void) @@ -1022,10 +1300,10 @@ cgraph_gate_cp (void) return flag_ipa_cp; } -struct simple_ipa_opt_pass pass_ipa_cp = +struct ipa_opt_pass_d pass_ipa_cp = { { - SIMPLE_IPA_PASS, + IPA_PASS, "cp", /* name */ cgraph_gate_cp, /* gate */ ipcp_driver, /* execute */ @@ -1034,9 +1312,19 @@ struct simple_ipa_opt_pass pass_ipa_cp = 0, /* static_pass_number */ TV_IPA_CONSTANT_PROP, /* tv_id */ 0, /* properties_required */ - PROP_trees, /* properties_provided */ + 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_dump_cgraph | TODO_dump_func /* todo_flags_finish */ - } + TODO_dump_cgraph | TODO_dump_func | + TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */ + }, + ipcp_generate_summary, /* generate_summary */ + ipcp_write_summary, /* write_summary */ + ipcp_read_summary, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* TODOs */ + NULL, /* function_transform */ + NULL, /* variable_transform */ };