/* Tree inlining.
- Copyright 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007
+ Free Software Foundation, Inc.
Contributed by Alexandre Oliva <aoliva@redhat.com>
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "ipa-prop.h"
#include "value-prof.h"
#include "tree-pass.h"
+#include "target.h"
+#include "integrate.h"
/* I'm not real happy about this, but we need to handle gimple and
non-gimple trees. */
o Provide heuristics to clamp inlining of recursive template
calls? */
+
+/* Weights that estimate_num_insns uses for heuristics in inlining. */
+
+eni_weights eni_inlining_weights;
+
+/* Weights that estimate_num_insns uses to estimate the size of the
+ produced code. */
+
+eni_weights eni_size_weights;
+
+/* Weights that estimate_num_insns uses to estimate the time necessary
+ to execute the produced code. */
+
+eni_weights eni_time_weights;
+
/* Prototypes. */
static tree declare_return_variable (copy_body_data *, tree, tree, tree *);
void
insert_decl_map (copy_body_data *id, tree key, tree value)
{
- splay_tree_insert (id->decl_map, (splay_tree_key) key,
- (splay_tree_value) value);
+ *pointer_map_insert (id->decl_map, key) = value;
/* Always insert an identity map as well. If we see this same new
node again, we won't want to duplicate it a second time. */
if (key != value)
- splay_tree_insert (id->decl_map, (splay_tree_key) value,
- (splay_tree_value) value);
+ *pointer_map_insert (id->decl_map, value) = value;
}
/* Construct new SSA name for old NAME. ID is the inline context. */
remap_ssa_name (tree name, copy_body_data *id)
{
tree new;
- splay_tree_node n;
+ tree *n;
gcc_assert (TREE_CODE (name) == SSA_NAME);
- n = splay_tree_lookup (id->decl_map, (splay_tree_key) name);
+ n = (tree *) pointer_map_contains (id->decl_map, name);
if (n)
- return (tree) n->value;
+ return *n;
/* Do not set DEF_STMT yet as statement is not copied yet. We do that
in copy_bb. */
tree
remap_decl (tree decl, copy_body_data *id)
{
- splay_tree_node n;
+ tree *n;
tree fn;
/* We only remap local variables in the current function. */
/* See if we have remapped this declaration. */
- n = splay_tree_lookup (id->decl_map, (splay_tree_key) decl);
+ n = (tree *) pointer_map_contains (id->decl_map, decl);
/* If we didn't already have an equivalent for this declaration,
create one now. */
return t;
}
- return unshare_expr ((tree) n->value);
+ return unshare_expr (*n);
}
static tree
remap_type_1 (tree type, copy_body_data *id)
{
- splay_tree_node node;
+ tree *node;
tree new, t;
if (type == NULL)
return type;
/* See if we have remapped this type. */
- node = splay_tree_lookup (id->decl_map, (splay_tree_key) type);
+ node = (tree *) pointer_map_contains (id->decl_map, type);
if (node)
- return (tree) node->value;
+ return *node;
/* The type only needs remapping if it's variably modified. */
if (! variably_modified_type_p (type, id->src_fn))
{
case INTEGER_TYPE:
case REAL_TYPE:
+ case FIXED_POINT_TYPE:
case ENUMERAL_TYPE:
case BOOLEAN_TYPE:
t = TYPE_MIN_VALUE (new);
tree
remap_type (tree type, copy_body_data *id)
{
- splay_tree_node node;
+ tree *node;
if (type == NULL)
return type;
/* See if we have remapped this type. */
- node = splay_tree_lookup (id->decl_map, (splay_tree_key) type);
+ node = (tree *) pointer_map_contains (id->decl_map, type);
if (node)
- return (tree) node->value;
+ return *node;
/* The type only needs remapping if it's variably modified. */
if (! variably_modified_type_p (type, id->src_fn))
/* We can not chain the local static declarations into the unexpanded_var_list
as we can't duplicate them or break one decl rule. Go ahead and link
them into unexpanded_var_list. */
- if (!lang_hooks.tree_inlining.auto_var_in_fn_p (old_var, id->src_fn)
+ if (!auto_var_in_fn_p (old_var, id->src_fn)
&& !DECL_EXTERNAL (old_var))
{
cfun->unexpanded_var_list = tree_cons (NULL_TREE, old_var,
variables. We don't want to copy static variables; there's only
one of those, no matter how many times we inline the containing
function. Similarly for globals from an outer function. */
- else if (lang_hooks.tree_inlining.auto_var_in_fn_p (*tp, fn))
+ else if (auto_var_in_fn_p (*tp, fn))
{
tree new_decl;
discarding. */
if (TREE_CODE (*tp) == GIMPLE_MODIFY_STMT
&& GIMPLE_STMT_OPERAND (*tp, 0) == GIMPLE_STMT_OPERAND (*tp, 1)
- && (lang_hooks.tree_inlining.auto_var_in_fn_p
- (GIMPLE_STMT_OPERAND (*tp, 0), fn)))
+ && (auto_var_in_fn_p (GIMPLE_STMT_OPERAND (*tp, 0), fn)))
{
/* Some assignments VAR = VAR; don't generate any rtl code
and thus don't count as variable modification. Avoid
keeping bogosities like 0 = 0. */
tree decl = GIMPLE_STMT_OPERAND (*tp, 0), value;
- splay_tree_node n;
+ tree *n;
- n = splay_tree_lookup (id->decl_map, (splay_tree_key) decl);
+ n = (tree *) pointer_map_contains (id->decl_map, decl);
if (n)
{
- value = (tree) n->value;
+ value = *n;
STRIP_TYPE_NOPS (value);
if (TREE_CONSTANT (value) || TREE_READONLY_DECL_P (value))
{
/* Get rid of *& from inline substitutions that can happen when a
pointer argument is an ADDR_EXPR. */
tree decl = TREE_OPERAND (*tp, 0);
- splay_tree_node n;
+ tree *n;
- n = splay_tree_lookup (id->decl_map, (splay_tree_key) decl);
+ n = (tree *) pointer_map_contains (id->decl_map, decl);
if (n)
{
tree new;
build_fold_indirect_ref wouldn't strip the INDIRECT_REF,
but we absolutely rely on that. As fold_indirect_ref
does other useful transformations, try that first, though. */
- tree type = TREE_TYPE (TREE_TYPE ((tree)n->value));
- new = unshare_expr ((tree)n->value);
+ tree type = TREE_TYPE (TREE_TYPE (*n));
+ new = unshare_expr (*n);
old = *tp;
*tp = fold_indirect_ref_1 (type, new);
if (! *tp)
new_block = id->block;
if (TREE_BLOCK (*tp))
{
- splay_tree_node n;
- n = splay_tree_lookup (id->decl_map,
- (splay_tree_key) TREE_BLOCK (*tp));
+ tree *n;
+ n = (tree *) pointer_map_contains (id->decl_map,
+ TREE_BLOCK (*tp));
gcc_assert (n);
- new_block = (tree) n->value;
+ new_block = *n;
}
TREE_BLOCK (*tp) = new_block;
}
(NULL_TREE,
id->eh_region_offset + TREE_INT_CST_LOW (TREE_OPERAND (*tp, 0)));
- if (!GIMPLE_TUPLE_P (*tp))
+ if (!GIMPLE_TUPLE_P (*tp) && TREE_CODE (*tp) != OMP_CLAUSE)
TREE_TYPE (*tp) = remap_type (TREE_TYPE (*tp), id);
/* The copied TARGET_EXPR has never been expanded, even if the
copy_basic_block = create_basic_block (NULL, (void *) 0,
(basic_block) bb->prev_bb->aux);
copy_basic_block->count = bb->count * count_scale / REG_BR_PROB_BASE;
- copy_basic_block->frequency = (bb->frequency
+
+ /* We are going to rebuild frequencies from scratch. These values have just
+ small importance to drive canonicalize_loop_headers. */
+ copy_basic_block->frequency = ((gcov_type)bb->frequency
* frequency_scale / REG_BR_PROB_BASE);
+ if (copy_basic_block->frequency > BB_FREQ_MAX)
+ copy_basic_block->frequency = BB_FREQ_MAX;
copy_bsi = bsi_start (copy_basic_block);
for (bsi = bsi_start (bb);
into multiple statements, we need to process all of them. */
while (!bsi_end_p (copy_bsi))
{
- stmt = bsi_stmt (copy_bsi);
+ tree *stmtp = bsi_stmt_ptr (copy_bsi);
+ tree stmt = *stmtp;
call = get_call_expr_in (stmt);
+
+ if (call && CALL_EXPR_VA_ARG_PACK (call) && id->call_expr)
+ {
+ /* __builtin_va_arg_pack () should be replaced by
+ all arguments corresponding to ... in the caller. */
+ tree p, *argarray, new_call, *call_ptr;
+ int nargs = call_expr_nargs (id->call_expr);
+
+ for (p = DECL_ARGUMENTS (id->src_fn); p; p = TREE_CHAIN (p))
+ nargs--;
+
+ argarray = (tree *) alloca ((nargs + call_expr_nargs (call))
+ * sizeof (tree));
+
+ memcpy (argarray, CALL_EXPR_ARGP (call),
+ call_expr_nargs (call) * sizeof (*argarray));
+ memcpy (argarray + call_expr_nargs (call),
+ CALL_EXPR_ARGP (id->call_expr)
+ + (call_expr_nargs (id->call_expr) - nargs),
+ nargs * sizeof (*argarray));
+
+ new_call = build_call_array (TREE_TYPE (call),
+ CALL_EXPR_FN (call),
+ nargs + call_expr_nargs (call),
+ argarray);
+ /* Copy all CALL_EXPR flags, locus and block, except
+ CALL_EXPR_VA_ARG_PACK flag. */
+ CALL_EXPR_STATIC_CHAIN (new_call)
+ = CALL_EXPR_STATIC_CHAIN (call);
+ CALL_EXPR_TAILCALL (new_call) = CALL_EXPR_TAILCALL (call);
+ CALL_EXPR_RETURN_SLOT_OPT (new_call)
+ = CALL_EXPR_RETURN_SLOT_OPT (call);
+ CALL_FROM_THUNK_P (new_call) = CALL_FROM_THUNK_P (call);
+ CALL_CANNOT_INLINE_P (new_call)
+ = CALL_CANNOT_INLINE_P (call);
+ TREE_NOTHROW (new_call) = TREE_NOTHROW (call);
+ SET_EXPR_LOCUS (new_call, EXPR_LOCUS (call));
+ TREE_BLOCK (new_call) = TREE_BLOCK (call);
+
+ call_ptr = stmtp;
+ if (TREE_CODE (*call_ptr) == GIMPLE_MODIFY_STMT)
+ call_ptr = &GIMPLE_STMT_OPERAND (*call_ptr, 1);
+ if (TREE_CODE (*call_ptr) == WITH_SIZE_EXPR)
+ call_ptr = &TREE_OPERAND (*call_ptr, 0);
+ gcc_assert (*call_ptr == call);
+ *call_ptr = new_call;
+ stmt = *stmtp;
+ update_stmt (stmt);
+ }
+
+ /* Statements produced by inlining can be unfolded, especially
+ when we constant propagated some operands. We can't fold
+ them right now for two reasons:
+ 1) folding require SSA_NAME_DEF_STMTs to be correct
+ 2) we can't change function calls to builtins.
+ So we just mark statement for later folding. We mark
+ all new statements, instead just statements that has changed
+ by some nontrivial substitution so even statements made
+ foldable indirectly are updated. If this turns out to be
+ expensive, copy_body can be told to watch for nontrivial
+ changes. */
+ if (id->statements_to_fold)
+ pointer_set_insert (id->statements_to_fold, stmt);
/* We're duplicating a CALL_EXPR. Find any corresponding
callgraph edges and update or duplicate them. */
if (call && (decl = get_callee_fndecl (call)))
edge = cgraph_edge (id->src_node, orig_stmt);
if (edge)
cgraph_clone_edge (edge, id->dst_node, stmt,
- REG_BR_PROB_BASE, 1, true);
+ REG_BR_PROB_BASE, 1, edge->frequency, true);
break;
case CB_CGE_MOVE_CLONES:
gcc_assert (lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt)
!= 0);
- if (tree_could_throw_p (stmt))
+ if (tree_could_throw_p (stmt)
+ /* When we are cloning for inlining, we are supposed to
+ construct a clone that calls precisely the same functions
+ as original. However IPA optimizers might've proved
+ earlier some function calls as non-trapping that might
+ render some basic blocks dead that might become
+ unreachable.
+
+ We can't update SSA with unreachable blocks in CFG and thus
+ we prevent the scenario by preserving even the "dead" eh
+ edges until the point they are later removed by
+ fixup_cfg pass. */
+ || (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES
+ && lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt) > 0))
{
int region = lookup_stmt_eh_region_fn (id->src_cfun, orig_stmt);
/* Add an entry for the copied tree in the EH hashtable.
/* Register specific tree functions. */
tree_register_cfg_hooks ();
*new_cfun = *DECL_STRUCT_FUNCTION (callee_fndecl);
+ new_cfun->funcdef_no = get_next_funcdef_no ();
VALUE_HISTOGRAMS (new_cfun) = NULL;
new_cfun->unexpanded_var_list = NULL;
new_cfun->cfg = NULL;
new_cfun->decl = new_fndecl /*= copy_node (callee_fndecl)*/;
- new_cfun->ib_boundaries_block = NULL;
DECL_STRUCT_FUNCTION (new_fndecl) = new_cfun;
push_cfun (new_cfun);
init_empty_tree_cfg ();
new->aux = bb;
}
- last = n_basic_blocks;
+ last = last_basic_block;
/* Now that we've duplicated the blocks, duplicate their edges. */
FOR_ALL_BB_FN (bb, cfun_to_copy)
copy_edges_for_bb (bb, count_scale);
}
/* Zero out AUX fields of newly created block during EH edge
insertion. */
- for (; last < n_basic_blocks; last++)
+ for (; last < last_basic_block; last++)
BASIC_BLOCK (last)->aux = NULL;
entry_block_map->aux = NULL;
exit_block_map->aux = NULL;
var = get_base_address (TREE_OPERAND (value, 0));
- return var && lang_hooks.tree_inlining.auto_var_in_fn_p (var, fn);
+ return var && auto_var_in_fn_p (var, fn);
}
static void
tree init_stmt;
tree var;
tree var_sub;
- tree rhs = value ? fold_convert (TREE_TYPE (p), value) : NULL;
+ tree rhs = value;
tree def = (gimple_in_ssa_p (cfun)
? gimple_default_def (id->src_cfun, p) : NULL);
+ if (value
+ && value != error_mark_node
+ && !useless_type_conversion_p (TREE_TYPE (p), TREE_TYPE (value)))
+ rhs = fold_build1 (NOP_EXPR, TREE_TYPE (p), value);
+
/* If the parameter is never assigned to, has no SSA_NAMEs created,
we may not need to create a new variable here at all. Instead, we may
be able to just use the argument value. */
It is not big deal to prohibit constant propagation here as
we will constant propagate in DOM1 pass anyway. */
if (is_gimple_min_invariant (value)
- && lang_hooks.types_compatible_p (TREE_TYPE (value), TREE_TYPE (p))
+ && useless_type_conversion_p (TREE_TYPE (p),
+ TREE_TYPE (value))
/* We have to be very careful about ADDR_EXPR. Make sure
the base variable isn't a local variable of the inlined
function, e.g., when doing recursive inlining, direct or
represent multiple variables for purposes of debugging. */
if (gimple_in_ssa_p (cfun) && rhs && def && is_gimple_reg (p)
&& (TREE_CODE (rhs) == SSA_NAME
- || is_gimple_min_invariant (rhs)))
+ || is_gimple_min_invariant (rhs))
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def))
{
insert_decl_map (id, def, rhs);
return;
if (def && gimple_in_ssa_p (cfun) && is_gimple_reg (p))
{
def = remap_ssa_name (def, id);
- init_stmt = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (var), def, rhs);
+ init_stmt = build_gimple_modify_stmt (def, rhs);
SSA_NAME_DEF_STMT (def) = init_stmt;
SSA_NAME_IS_DEFAULT_DEF (def) = 0;
set_default_def (var, NULL);
}
else
- init_stmt = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (var), var, rhs);
+ init_stmt = build_gimple_modify_stmt (var, rhs);
/* If we did not create a gimple value and we did not create a gimple
cast of a gimple value, then we will need to gimplify INIT_STMTS
}
/* Generate code to initialize the parameters of the function at the
- top of the stack in ID from the ARGS (presented as a TREE_LIST). */
+ top of the stack in ID from the CALL_EXPR EXP. */
static void
-initialize_inlined_parameters (copy_body_data *id, tree args, tree static_chain,
+initialize_inlined_parameters (copy_body_data *id, tree exp,
tree fn, basic_block bb)
{
tree parms;
tree a;
tree p;
tree vars = NULL_TREE;
- int argnum = 0;
+ call_expr_arg_iterator iter;
+ tree static_chain = CALL_EXPR_STATIC_CHAIN (exp);
/* Figure out what the parameters are. */
parms = DECL_ARGUMENTS (fn);
/* Loop through the parameter declarations, replacing each with an
equivalent VAR_DECL, appropriately initialized. */
- for (p = parms, a = args; p;
- a = a ? TREE_CHAIN (a) : a, p = TREE_CHAIN (p))
- {
- tree value;
-
- ++argnum;
-
- /* Find the initializer. */
- value = lang_hooks.tree_inlining.convert_parm_for_inlining
- (p, a ? TREE_VALUE (a) : NULL_TREE, fn, argnum);
-
- setup_one_parameter (id, p, value, fn, bb, &vars);
- }
+ for (p = parms, a = first_call_expr_arg (exp, &iter); p;
+ a = next_call_expr_arg (&iter), p = TREE_CHAIN (p))
+ setup_one_parameter (id, p, a, fn, bb, &vars);
/* Initialize the static chain. */
p = DECL_STRUCT_FUNCTION (fn)->static_chain_decl;
bool use_it = false;
/* We can't use MODIFY_DEST if there's type promotion involved. */
- if (!lang_hooks.types_compatible_p (caller_type, callee_type))
+ if (!useless_type_conversion_p (callee_type, caller_type))
use_it = false;
/* ??? If we're assigning to a variable sized type, then we must
/* Build the use expr. If the return type of the function was
promoted, convert it back to the expected type. */
use = var;
- if (!lang_hooks.types_compatible_p (TREE_TYPE (var), caller_type))
+ if (!useless_type_conversion_p (caller_type, TREE_TYPE (var)))
use = fold_convert (caller_type, var);
STRIP_USELESS_TYPE_CONVERSION (use);
inlinable_function_p (tree fn)
{
bool inlinable = true;
+ bool do_warning;
+ tree always_inline;
/* If we've already decided this function shouldn't be inlined,
there's no need to check again. */
if (DECL_UNINLINABLE (fn))
return false;
- /* See if there is any language-specific reason it cannot be
- inlined. (It is important that this hook be called early because
- in C++ it may result in template instantiation.)
- If the function is not inlinable for language-specific reasons,
- it is left up to the langhook to explain why. */
- inlinable = !lang_hooks.tree_inlining.cannot_inline_tree_fn (&fn);
+ /* We only warn for functions declared `inline' by the user. */
+ do_warning = (warn_inline
+ && DECL_INLINE (fn)
+ && DECL_DECLARED_INLINE_P (fn)
+ && !DECL_IN_SYSTEM_HEADER (fn));
+
+ always_inline = lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn));
+
+ if (flag_really_no_inline
+ && always_inline == NULL)
+ {
+ if (do_warning)
+ warning (OPT_Winline, "function %q+F can never be inlined because it "
+ "is suppressed using -fno-inline", fn);
+ inlinable = false;
+ }
+
+ /* Don't auto-inline anything that might not be bound within
+ this unit of translation. */
+ else if (!DECL_DECLARED_INLINE_P (fn)
+ && DECL_REPLACEABLE_P (fn))
+ inlinable = false;
+
+ else if (!function_attribute_inlinable_p (fn))
+ {
+ if (do_warning)
+ warning (OPT_Winline, "function %q+F can never be inlined because it "
+ "uses attributes conflicting with inlining", fn);
+ inlinable = false;
+ }
/* If we don't have the function body available, we can't inline it.
However, this should not be recorded since we also get here for
about functions that would for example call alloca. But since
this a property of the function, just one warning is enough.
As a bonus we can now give more details about the reason why a
- function is not inlinable.
- We only warn for functions declared `inline' by the user. */
- bool do_warning = (warn_inline
- && DECL_INLINE (fn)
- && DECL_DECLARED_INLINE_P (fn)
- && !DECL_IN_SYSTEM_HEADER (fn));
-
- if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn)))
+ function is not inlinable. */
+ if (always_inline)
sorry (inline_forbidden_reason, fn);
else if (do_warning)
warning (OPT_Winline, inline_forbidden_reason, fn);
return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
}
+/* Arguments for estimate_num_insns_1. */
+
+struct eni_data
+{
+ /* Used to return the number of insns. */
+ int count;
+
+ /* Weights of various constructs. */
+ eni_weights *weights;
+};
+
/* Used by estimate_num_insns. Estimate number of instructions seen
by given statement. */
static tree
estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
{
- int *count = (int *) data;
+ struct eni_data *d = data;
tree x = *tp;
+ unsigned cost;
if (IS_TYPE_OR_DECL_P (x))
{
case BIND_EXPR:
case WITH_CLEANUP_EXPR:
case NOP_EXPR:
+ case CONVERT_EXPR:
case VIEW_CONVERT_EXPR:
case SAVE_EXPR:
case ADDR_EXPR:
case OMP_CLAUSE:
case OMP_RETURN:
case OMP_CONTINUE:
+ case OMP_SECTIONS_SWITCH:
break;
/* We don't account constants for now. Assume that the cost is amortized
case IDENTIFIER_NODE:
case INTEGER_CST:
case REAL_CST:
+ case FIXED_CST:
case COMPLEX_CST:
case VECTOR_CST:
case STRING_CST:
*walk_subtrees = 0;
return NULL;
+ /* CHANGE_DYNAMIC_TYPE_EXPR explicitly expands to nothing. */
+ case CHANGE_DYNAMIC_TYPE_EXPR:
+ *walk_subtrees = 0;
+ return NULL;
+
/* Try to estimate the cost of assignments. We have three cases to
deal with:
1) Simple assignments to registers;
/* Otherwise it's a store, so fall through to compute the move cost. */
case CONSTRUCTOR:
- *count += estimate_move_cost (TREE_TYPE (x));
+ d->count += estimate_move_cost (TREE_TYPE (x));
break;
/* Assign cost of 1 to usual operations.
case VEC_COND_EXPR:
case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ case FIXED_CONVERT_EXPR:
case FIX_TRUNC_EXPR:
case NEGATE_EXPR:
case UNEQ_EXPR:
case LTGT_EXPR:
- case CONVERT_EXPR:
-
case CONJ_EXPR:
case PREDECREMENT_EXPR:
case POSTDECREMENT_EXPR:
case POSTINCREMENT_EXPR:
- case SWITCH_EXPR:
-
case ASM_EXPR:
case REALIGN_LOAD_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
- case VEC_PACK_MOD_EXPR:
+ case VEC_UNPACK_FLOAT_HI_EXPR:
+ case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
+ case VEC_PACK_FIX_TRUNC_EXPR:
case WIDEN_MULT_EXPR:
case VEC_INTERLEAVE_LOW_EXPR:
case RESX_EXPR:
- *count += 1;
+ d->count += 1;
+ break;
+
+ case SWITCH_EXPR:
+ /* TODO: Cost of a switch should be derived from the number of
+ branches. */
+ d->count += d->weights->switch_cost;
break;
/* Few special cases of expensive operations. This is useful
case FLOOR_MOD_EXPR:
case ROUND_MOD_EXPR:
case RDIV_EXPR:
- *count += 10;
+ d->count += d->weights->div_mod_cost;
break;
case CALL_EXPR:
{
tree decl = get_callee_fndecl (x);
- tree arg;
+ cost = d->weights->call_cost;
if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
switch (DECL_FUNCTION_CODE (decl))
{
return NULL_TREE;
case BUILT_IN_EXPECT:
return NULL_TREE;
+ /* Prefetch instruction is not expensive. */
+ case BUILT_IN_PREFETCH:
+ cost = 1;
+ break;
default:
break;
}
that does use function declaration to figure out the arguments. */
if (!decl)
{
- for (arg = TREE_OPERAND (x, 1); arg; arg = TREE_CHAIN (arg))
- *count += estimate_move_cost (TREE_TYPE (TREE_VALUE (arg)));
+ tree a;
+ call_expr_arg_iterator iter;
+ FOR_EACH_CALL_EXPR_ARG (a, iter, x)
+ d->count += estimate_move_cost (TREE_TYPE (a));
}
else
{
+ tree arg;
for (arg = DECL_ARGUMENTS (decl); arg; arg = TREE_CHAIN (arg))
- *count += estimate_move_cost (TREE_TYPE (arg));
+ d->count += estimate_move_cost (TREE_TYPE (arg));
}
- *count += PARAM_VALUE (PARAM_INLINE_CALL_COST);
+ d->count += cost;
break;
}
case OMP_CRITICAL:
case OMP_ATOMIC:
/* OpenMP directives are generally very expensive. */
- *count += 40;
+ d->count += d->weights->omp_cost;
break;
default:
return NULL;
}
-/* Estimate number of instructions that will be created by expanding EXPR. */
+/* Estimate number of instructions that will be created by expanding EXPR.
+ WEIGHTS contains weights attributed to various constructs. */
int
-estimate_num_insns (tree expr)
+estimate_num_insns (tree expr, eni_weights *weights)
{
- int num = 0;
struct pointer_set_t *visited_nodes;
basic_block bb;
block_stmt_iterator bsi;
struct function *my_function;
+ struct eni_data data;
+
+ data.count = 0;
+ data.weights = weights;
/* If we're given an entire function, walk the CFG. */
if (TREE_CODE (expr) == FUNCTION_DECL)
bsi_next (&bsi))
{
walk_tree (bsi_stmt_ptr (bsi), estimate_num_insns_1,
- &num, visited_nodes);
+ &data, visited_nodes);
}
}
pointer_set_destroy (visited_nodes);
}
else
- walk_tree_without_duplicates (&expr, estimate_num_insns_1, &num);
+ walk_tree_without_duplicates (&expr, estimate_num_insns_1, &data);
- return num;
+ return data.count;
}
-typedef struct function *function_p;
-
-DEF_VEC_P(function_p);
-DEF_VEC_ALLOC_P(function_p,heap);
-
-/* Initialized with NOGC, making this poisonous to the garbage collector. */
-static VEC(function_p,heap) *cfun_stack;
+/* Initializes weights used by estimate_num_insns. */
void
-push_cfun (struct function *new_cfun)
+init_inline_once (void)
{
- VEC_safe_push (function_p, heap, cfun_stack, cfun);
- cfun = new_cfun;
-}
-
-void
-pop_cfun (void)
-{
- cfun = VEC_pop (function_p, cfun_stack);
+ eni_inlining_weights.call_cost = PARAM_VALUE (PARAM_INLINE_CALL_COST);
+ eni_inlining_weights.div_mod_cost = 10;
+ eni_inlining_weights.switch_cost = 1;
+ eni_inlining_weights.omp_cost = 40;
+
+ eni_size_weights.call_cost = 1;
+ eni_size_weights.div_mod_cost = 1;
+ eni_size_weights.switch_cost = 10;
+ eni_size_weights.omp_cost = 40;
+
+ /* Estimating time for call is difficult, since we have no idea what the
+ called function does. In the current uses of eni_time_weights,
+ underestimating the cost does less harm than overestimating it, so
+ we choose a rather small value here. */
+ eni_time_weights.call_cost = 10;
+ eni_time_weights.div_mod_cost = 10;
+ eni_time_weights.switch_cost = 4;
+ eni_time_weights.omp_cost = 40;
}
/* Install new lexical TREE_BLOCK underneath 'current_block'. */
tree t;
tree use_retvar;
tree fn;
- splay_tree st;
- tree args;
+ struct pointer_map_t *st;
tree return_slot;
tree modify_dest;
location_t saved_location;
(incorrect node sharing is most common reason for missing edges. */
gcc_assert (dest->needed || !flag_unit_at_a_time);
cgraph_create_edge (id->dst_node, dest, stmt,
- bb->count, bb->loop_depth)->inline_failed
+ bb->count, CGRAPH_FREQ_BASE,
+ bb->loop_depth)->inline_failed
= N_("originally indirect function call not considered for inlining");
+ if (dump_file)
+ {
+ fprintf (dump_file, "Created new direct edge to %s",
+ cgraph_node_name (dest));
+ }
goto egress;
}
/* Local declarations will be replaced by their equivalents in this
map. */
st = id->decl_map;
- id->decl_map = splay_tree_new (splay_tree_compare_pointers,
- NULL, NULL);
-
- /* Initialize the parameters. */
- args = TREE_OPERAND (t, 1);
+ id->decl_map = pointer_map_create ();
/* Record the function we are about to inline. */
id->src_fn = fn;
id->src_node = cg_edge->callee;
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
+ id->call_expr = t;
- initialize_inlined_parameters (id, args, TREE_OPERAND (t, 2), fn, bb);
+ initialize_inlined_parameters (id, t, fn, bb);
if (DECL_INITIAL (fn))
add_lexical_block (id->block, remap_blocks (DECL_INITIAL (fn), id));
}
/* Clean up. */
- splay_tree_delete (id->decl_map);
+ pointer_map_destroy (id->decl_map);
id->decl_map = st;
/* If the inlined function returns a result that we care about,
return false;
}
+/* Walk all basic blocks created after FIRST and try to fold every statement
+ in the STATEMENTS pointer set. */
+static void
+fold_marked_statements (int first, struct pointer_set_t *statements)
+{
+ for (;first < n_basic_blocks;first++)
+ if (BASIC_BLOCK (first))
+ {
+ block_stmt_iterator bsi;
+ for (bsi = bsi_start (BASIC_BLOCK (first));
+ !bsi_end_p (bsi); bsi_next (&bsi))
+ if (pointer_set_contains (statements, bsi_stmt (bsi)))
+ {
+ tree old_stmt = bsi_stmt (bsi);
+ if (fold_stmt (bsi_stmt_ptr (bsi)))
+ {
+ update_stmt (bsi_stmt (bsi));
+ if (maybe_clean_or_replace_eh_stmt (old_stmt, bsi_stmt (bsi)))
+ tree_purge_dead_eh_edges (BASIC_BLOCK (first));
+ }
+ }
+ }
+}
+
+/* Return true if BB has at least one abnormal outgoing edge. */
+
+static inline bool
+has_abnormal_outgoing_edge_p (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->flags & EDGE_ABNORMAL)
+ return true;
+
+ return false;
+}
+
+/* When a block from the inlined function contains a call with side-effects
+ in the middle gets inlined in a function with non-locals labels, the call
+ becomes a potential non-local goto so we need to add appropriate edge. */
+
+static void
+make_nonlocal_label_edges (void)
+{
+ block_stmt_iterator bsi;
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ {
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ if (tree_can_make_abnormal_goto (stmt))
+ {
+ if (stmt == bsi_stmt (bsi_last (bb)))
+ {
+ if (!has_abnormal_outgoing_edge_p (bb))
+ make_abnormal_goto_edges (bb, true);
+ }
+ else
+ {
+ edge e = split_block (bb, stmt);
+ bb = e->src;
+ make_abnormal_goto_edges (bb, true);
+ }
+ break;
+ }
+
+ /* Update PHIs on nonlocal goto receivers we (possibly)
+ just created new edges into. */
+ if (TREE_CODE (stmt) == LABEL_EXPR
+ && gimple_in_ssa_p (cfun))
+ {
+ tree target = LABEL_EXPR_LABEL (stmt);
+ if (DECL_NONLOCAL (target))
+ {
+ tree phi;
+
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ {
+ gcc_assert (SSA_NAME_OCCURS_IN_ABNORMAL_PHI
+ (PHI_RESULT (phi)));
+ mark_sym_for_renaming
+ (SSA_NAME_VAR (PHI_RESULT (phi)));
+ }
+ }
+ }
+ }
+ }
+}
+
/* Expand calls to inline functions in the body of FN. */
-void
+unsigned int
optimize_inline_calls (tree fn)
{
copy_body_data id;
tree prev_fn;
basic_block bb;
+ int last = n_basic_blocks;
/* There is no point in performing inlining if errors have already
occurred -- and we might crash if we try to inline invalid
code. */
if (errorcount || sorrycount)
- return;
+ return 0;
/* Clear out ID. */
memset (&id, 0, sizeof (id));
id.transform_new_cfg = false;
id.transform_return_to_modify = true;
id.transform_lang_insert_block = false;
+ id.statements_to_fold = pointer_set_create ();
push_gimplify_context ();
+ /* We make no attempts to keep dominance info up-to-date. */
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+
/* Reach the trees by walking over the CFG, and note the
enclosing basic-blocks in the call edges. */
/* We walk the blocks going forward, because inlined function bodies
gimple_expand_calls_inline (bb, &id);
pop_gimplify_context (NULL);
- /* Renumber the (code) basic_blocks consecutively. */
- compact_blocks ();
- /* Renumber the lexical scoping (non-code) blocks consecutively. */
- number_blocks (fn);
#ifdef ENABLE_CHECKING
{
gcc_assert (e->inline_failed);
}
#endif
- /* We need to rescale frequencies again to peak at REG_BR_PROB_BASE
- as inlining loops might increase the maximum. */
- if (ENTRY_BLOCK_PTR->count)
- counts_to_freqs ();
- if (gimple_in_ssa_p (cfun))
- {
- /* We make no attempts to keep dominance info up-to-date. */
- free_dominance_info (CDI_DOMINATORS);
- free_dominance_info (CDI_POST_DOMINATORS);
- delete_unreachable_blocks ();
- update_ssa (TODO_update_ssa);
- fold_cond_expr_cond ();
- if (need_ssa_update_p ())
- update_ssa (TODO_update_ssa);
- }
- else
- fold_cond_expr_cond ();
+
+ /* Fold the statements before compacting/renumbering the basic blocks. */
+ fold_marked_statements (last, id.statements_to_fold);
+ pointer_set_destroy (id.statements_to_fold);
+
+ /* Renumber the (code) basic_blocks consecutively. */
+ compact_blocks ();
+ /* Renumber the lexical scoping (non-code) blocks consecutively. */
+ number_blocks (fn);
+
+ /* We are not going to maintain the cgraph edges up to date.
+ Kill it so it won't confuse us. */
+ cgraph_node_remove_callees (id.dst_node);
+
+ fold_cond_expr_cond ();
+ if (current_function_has_nonlocal_label)
+ make_nonlocal_label_edges ();
/* It would be nice to check SSA/CFG/statement consistency here, but it is
not possible yet - the IPA passes might make various functions to not
throw and they don't care to proactively update local EH info. This is
done later in fixup_cfg pass that also execute the verification. */
+ return (TODO_update_ssa | TODO_cleanup_cfg
+ | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0)
+ | (profile_status != PROFILE_ABSENT ? TODO_rebuild_frequencies : 0));
}
/* FN is a function that has a complete body, and CLONE is a function whose
id.src_fn = fn;
id.dst_fn = clone;
id.src_cfun = DECL_STRUCT_FUNCTION (fn);
- id.decl_map = (splay_tree)arg_map;
+ id.decl_map = (struct pointer_map_t *)arg_map;
id.copy_decl = copy_decl_no_change;
id.transform_call_graph_edges = CB_CGE_DUPLICATE;
static void
remap_save_expr (tree *tp, void *st_, int *walk_subtrees)
{
- splay_tree st = (splay_tree) st_;
- splay_tree_node n;
+ struct pointer_map_t *st = (struct pointer_map_t *) st_;
+ tree *n;
tree t;
/* See if we already encountered this SAVE_EXPR. */
- n = splay_tree_lookup (st, (splay_tree_key) *tp);
+ n = (tree *) pointer_map_contains (st, *tp);
/* If we didn't already remap this SAVE_EXPR, do so now. */
if (!n)
t = copy_node (*tp);
/* Remember this SAVE_EXPR. */
- splay_tree_insert (st, (splay_tree_key) *tp, (splay_tree_value) t);
+ *pointer_map_insert (st, *tp) = t;
/* Make sure we don't remap an already-remapped SAVE_EXPR. */
- splay_tree_insert (st, (splay_tree_key) t, (splay_tree_value) t);
+ *pointer_map_insert (st, t) = t;
}
else
{
/* We've already walked into this SAVE_EXPR; don't do it again. */
*walk_subtrees = 0;
- t = (tree) n->value;
+ t = *n;
}
/* Replace this SAVE_EXPR with the copy. */
unsave_r (tree *tp, int *walk_subtrees, void *data)
{
copy_body_data *id = (copy_body_data *) data;
- splay_tree st = id->decl_map;
- splay_tree_node n;
+ struct pointer_map_t *st = id->decl_map;
+ tree *n;
/* Only a local declaration (variable or label). */
if ((TREE_CODE (*tp) == VAR_DECL && !TREE_STATIC (*tp))
|| TREE_CODE (*tp) == LABEL_DECL)
{
/* Lookup the declaration. */
- n = splay_tree_lookup (st, (splay_tree_key) *tp);
+ n = (tree *) pointer_map_contains (st, *tp);
/* If it's there, remap it. */
if (n)
- *tp = (tree) n->value;
+ *tp = *n;
}
else if (TREE_CODE (*tp) == STATEMENT_LIST)
memset (&id, 0, sizeof (id));
id.src_fn = current_function_decl;
id.dst_fn = current_function_decl;
- id.decl_map = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ id.decl_map = pointer_map_create ();
id.copy_decl = copy_decl_no_change;
id.transform_call_graph_edges = CB_CGE_DUPLICATE;
walk_tree (&expr, unsave_r, &id, NULL);
/* Clean up. */
- splay_tree_delete (id.decl_map);
+ pointer_map_destroy (id.decl_map);
return expr;
}
TREE_READONLY (copy) = TREE_READONLY (decl);
TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (decl);
DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (decl);
+ DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (decl);
return copy_decl_for_dup_finish (id, decl, copy);
}
{
TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (decl);
DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (decl);
+ DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (decl);
}
return copy_decl_for_dup_finish (id, decl, copy);
struct ipa_replace_map *replace_info;
basic_block old_entry_block;
tree t_step;
+ tree old_current_function_decl = current_function_decl;
gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
&& TREE_CODE (new_decl) == FUNCTION_DECL);
old_version_node = cgraph_node (old_decl);
new_version_node = cgraph_node (new_decl);
- allocate_struct_function (new_decl);
- /* Cfun points to the new allocated function struct at this point. */
- cfun->function_end_locus = DECL_SOURCE_LOCATION (new_decl);
-
DECL_ARTIFICIAL (new_decl) = 1;
DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
+ /* Prepare the data structures for the tree copy. */
+ memset (&id, 0, sizeof (id));
+
/* Generate a new name for the new version. */
if (!update_clones)
{
DECL_NAME (new_decl) = create_tmp_var_name (NULL);
SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
SET_DECL_RTL (new_decl, NULL_RTX);
+ id.statements_to_fold = pointer_set_create ();
}
-
- /* Prepare the data structures for the tree copy. */
- memset (&id, 0, sizeof (id));
- id.decl_map = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ id.decl_map = pointer_map_create ();
id.src_fn = old_decl;
id.dst_fn = new_decl;
id.src_node = old_version_node;
number_blocks (new_decl);
/* Clean up. */
- splay_tree_delete (id.decl_map);
- fold_cond_expr_cond ();
+ pointer_map_destroy (id.decl_map);
+ if (!update_clones)
+ {
+ fold_marked_statements (0, id.statements_to_fold);
+ pointer_set_destroy (id.statements_to_fold);
+ fold_cond_expr_cond ();
+ }
if (gimple_in_ssa_p (cfun))
{
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+ if (!update_clones)
+ delete_unreachable_blocks ();
update_ssa (TODO_update_ssa);
+ if (!update_clones)
+ {
+ fold_cond_expr_cond ();
+ if (need_ssa_update_p ())
+ update_ssa (TODO_update_ssa);
+ }
}
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);
pop_cfun ();
- current_function_decl = NULL;
+ current_function_decl = old_current_function_decl;
+ gcc_assert (!current_function_decl
+ || DECL_STRUCT_FUNCTION (current_function_decl) == cfun);
return;
}
id.src_fn = current_function_decl;
id.dst_fn = current_function_decl;
id.src_cfun = cfun;
- id.decl_map = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ id.decl_map = pointer_map_create ();
type = remap_type_1 (type, &id);
- splay_tree_delete (id.decl_map);
+ pointer_map_destroy (id.decl_map);
return type;
}