-/* Callgraph based intraprocedural optimizations.
- Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Callgraph based interprocedural optimizations.
+ Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
Contributed by Jan Hubicka
This file is part of GCC.
02110-1301, USA. */
/* This module implements main driver of compilation process as well as
- few basic intraprocedural optimizers.
+ few basic interprocedural optimizers.
The main scope of this file is to act as an interface in between
tree based frontends and the backend (and middle end)
This function is called once front-end has parsed whole body of function
and it is certain that the function body nor the declaration will change.
- (There is one exception needed for implementing GCC extern inline function.)
+ (There is one exception needed for implementing GCC extern inline
+ function.)
- - cgraph_varpool_finalize_variable
+ - varpool_finalize_variable
This function has same behavior as the above but is used for static
variables.
- cgraph_finalize_compilation_unit
- This function is called once compilation unit is finalized and it will
- no longer change.
+ This function is called once (source level) compilation unit is finalized
+ and it will no longer change.
In the unit-at-a-time the call-graph construction and local function
analysis takes place here. Bodies of unreachable functions are released
to conserve memory usage.
- ??? The compilation unit in this point of view should be compilation
- unit as defined by the language - for instance C frontend allows multiple
- compilation units to be parsed at once and it should call function each
- time parsing is done so we save memory.
+ The function can be called multiple times when multiple source level
+ compilation units are combined (such as in C frontend)
- cgraph_optimize
taken are marked as local. Backend can then use this information to
modify calling conventions, do better inlining or similar optimizations.
- - cgraph_assemble_pending_functions
- - cgraph_varpool_assemble_pending_variables
-
- In non-unit-at-a-time mode these functions can be used to force compilation
- of functions or variables that are known to be needed at given stage
- of compilation
-
- cgraph_mark_needed_node
- - cgraph_varpool_mark_needed_node
+ - varpool_mark_needed_node
- When function or variable is referenced by some hidden way (for instance
- via assembly code and marked by attribute "used"), the call-graph data structure
- must be updated accordingly by this function.
+ When function or variable is referenced by some hidden way the call-graph
+ data structure must be updated accordingly by this function.
+ There should be little need to call this function and all the references
+ should be made explicit to cgraph code. At present these functions are
+ used by C++ frotend to explicitely mark the keyed methods.
- analyze_expr callback
This function is used to expand function and pass it into RTL back-end.
Front-end should not make any assumptions about when this function can be
called. In particular cgraph_assemble_pending_functions,
- cgraph_varpool_assemble_pending_variables, cgraph_finalize_function,
- cgraph_varpool_finalize_function, cgraph_optimize can cause arbitrarily
+ varpool_assemble_pending_variables, cgraph_finalize_function,
+ varpool_finalize_function, cgraph_optimize can cause arbitrarily
previously finalized functions to be expanded.
We implement two compilation modes.
static void cgraph_expand_function (struct cgraph_node *);
static tree record_reference (tree *, int *, void *);
static void cgraph_output_pending_asms (void);
+static void cgraph_increase_alignment (void);
/* Records tree nodes seen in record_reference. Simply using
walk_tree_without_duplicates doesn't guarantee each node is visited
}
/* If the user told us it is used, then it must be so. */
- if (node->local.externally_visible
- || lookup_attribute ("used", DECL_ATTRIBUTES (decl)))
+ if (node->local.externally_visible)
+ return true;
+
+ if (!flag_unit_at_a_time && lookup_attribute ("used", DECL_ATTRIBUTES (decl)))
return true;
/* ??? If the assembler name is set by hand, it is possible to assemble
return false;
}
-/* Walk the decls we marked as necessary and see if they reference new
- variables or functions and add them into the worklists. */
-static bool
-cgraph_varpool_analyze_pending_decls (void)
-{
- bool changed = false;
- timevar_push (TV_CGRAPH);
-
- while (cgraph_varpool_first_unanalyzed_node)
- {
- tree decl = cgraph_varpool_first_unanalyzed_node->decl;
-
- cgraph_varpool_first_unanalyzed_node->analyzed = true;
-
- cgraph_varpool_first_unanalyzed_node = cgraph_varpool_first_unanalyzed_node->next_needed;
-
- /* Compute the alignment early so function body expanders are
- already informed about increased alignment. */
- align_variable (decl, 0);
-
- if (DECL_INITIAL (decl))
- {
- visited_nodes = pointer_set_create ();
- walk_tree (&DECL_INITIAL (decl), record_reference, NULL, visited_nodes);
- pointer_set_destroy (visited_nodes);
- visited_nodes = NULL;
- }
- changed = true;
- }
- timevar_pop (TV_CGRAPH);
- return changed;
-}
-
-/* Optimization of function bodies might've rendered some variables as
- unnecessary so we want to avoid these from being compiled.
-
- This is done by pruning the queue and keeping only the variables that
- really appear needed (ie they are either externally visible or referenced
- by compiled function). Re-doing the reachability analysis on variables
- brings back the remaining variables referenced by these. */
-static void
-cgraph_varpool_remove_unreferenced_decls (void)
-{
- struct cgraph_varpool_node *next, *node = cgraph_varpool_nodes_queue;
-
- cgraph_varpool_reset_queue ();
-
- if (errorcount || sorrycount)
- return;
-
- while (node)
- {
- tree decl = node->decl;
- next = node->next_needed;
- node->needed = 0;
-
- if (node->finalized
- && ((DECL_ASSEMBLER_NAME_SET_P (decl)
- && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
- || node->force_output
- || decide_is_variable_needed (node, decl)
- /* ??? Cgraph does not yet rule the world with an iron hand,
- and does not control the emission of debug information.
- After a variable has its DECL_RTL set, we must assume that
- it may be referenced by the debug information, and we can
- no longer elide it. */
- || DECL_RTL_SET_P (decl)))
- cgraph_varpool_mark_needed_node (node);
-
- node = next;
- }
- /* Make sure we mark alias targets as used targets. */
- finish_aliases_1 ();
- cgraph_varpool_analyze_pending_decls ();
-}
-
-
/* When not doing unit-at-a-time, output all functions enqueued.
Return true when such a functions were found. */
-bool
+static bool
cgraph_assemble_pending_functions (void)
{
bool output = false;
if (!flag_unit_at_a_time)
{
- struct cgraph_node *n;
+ struct cgraph_node *n, *next;
- for (n = cgraph_nodes; n; n = n->next)
- if (n->global.inlined_to == node)
- cgraph_remove_node (n);
+ for (n = cgraph_nodes; n; n = next)
+ {
+ next = n->next;
+ if (n->global.inlined_to == node)
+ cgraph_remove_node (n);
+ }
}
cgraph_node_remove_callees (node);
after rtl has been generated. */
if (TREE_STATIC (t) || DECL_EXTERNAL (t))
{
- cgraph_varpool_mark_needed_node (cgraph_varpool_node (t));
+ varpool_mark_needed_node (varpool_node (t));
if (lang_hooks.callgraph.analyze_expr)
return lang_hooks.callgraph.analyze_expr (tp, walk_subtrees,
data);
bb->loop_depth);
walk_tree (&TREE_OPERAND (call, 1),
record_reference, node, visited_nodes);
- if (TREE_CODE (stmt) == MODIFY_EXPR)
- walk_tree (&TREE_OPERAND (stmt, 0),
+ if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
+ walk_tree (&GIMPLE_STMT_OPERAND (stmt, 0),
record_reference, node, visited_nodes);
}
else
if (TREE_CODE (decl) == VAR_DECL
&& (TREE_STATIC (decl) && !DECL_EXTERNAL (decl))
&& flag_unit_at_a_time)
- cgraph_varpool_finalize_decl (decl);
+ varpool_finalize_decl (decl);
else if (TREE_CODE (decl) == VAR_DECL && DECL_INITIAL (decl))
walk_tree (&DECL_INITIAL (decl), record_reference, node, visited_nodes);
}
visited_nodes = NULL;
}
+void
+record_references_in_initializer (tree decl)
+{
+ visited_nodes = pointer_set_create ();
+ walk_tree (&DECL_INITIAL (decl), record_reference, NULL, visited_nodes);
+ pointer_set_destroy (visited_nodes);
+ visited_nodes = NULL;
+}
+
+
/* Give initial reasons why inlining would fail. Those gets
either NULLified or usually overwritten by more precise reason
later. */
block_stmt_iterator bsi;
bool error_found = false;
+ if (errorcount || sorrycount)
+ return;
+
timevar_push (TV_CGRAPH_VERIFY);
for (e = node->callees; e; e = e->next_callee)
if (e->aux)
verify_cgraph_node (node);
}
-/* Output one variable, if necessary. Return whether we output it. */
-static bool
-cgraph_varpool_assemble_decl (struct cgraph_varpool_node *node)
-{
- tree decl = node->decl;
-
- if (!TREE_ASM_WRITTEN (decl)
- && !node->alias
- && !DECL_EXTERNAL (decl)
- && (TREE_CODE (decl) != VAR_DECL || !DECL_HAS_VALUE_EXPR_P (decl)))
- {
- assemble_variable (decl, 0, 1, 0);
- /* Local static variables are never seen by check_global_declarations
- so we need to output debug info by hand. */
- if (DECL_CONTEXT (decl)
- && (TREE_CODE (DECL_CONTEXT (decl)) == BLOCK
- || TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL)
- && errorcount == 0 && sorrycount == 0)
- {
- timevar_push (TV_SYMOUT);
- (*debug_hooks->global_decl) (decl);
- timevar_pop (TV_SYMOUT);
- }
- return true;
- }
-
- return false;
-}
-
-/* Output all variables enqueued to be assembled. */
-bool
-cgraph_varpool_assemble_pending_decls (void)
-{
- bool changed = false;
-
- if (errorcount || sorrycount)
- return false;
-
- /* EH might mark decls as needed during expansion. This should be safe since
- we don't create references to new function, but it should not be used
- elsewhere. */
- cgraph_varpool_analyze_pending_decls ();
-
- while (cgraph_varpool_nodes_queue)
- {
- struct cgraph_varpool_node *node = cgraph_varpool_nodes_queue;
-
- cgraph_varpool_nodes_queue = cgraph_varpool_nodes_queue->next_needed;
- if (cgraph_varpool_assemble_decl (node))
- changed = true;
- node->next_needed = NULL;
- }
- return changed;
-}
-
/* Output all asm statements we have stored up to be output. */
static void
/* First kill forward declaration so reverse inlining works properly. */
cgraph_create_edges (node, decl);
+ node->local.estimated_self_stack_size = estimated_stack_frame_size ();
+ node->global.estimated_stack_size = node->local.estimated_self_stack_size;
+ node->global.stack_frame_offset = 0;
node->local.inlinable = tree_inlinable_function_p (decl);
- node->local.self_insns = estimate_num_insns (decl);
+ if (!flag_unit_at_a_time)
+ node->local.self_insns = estimate_num_insns (decl);
if (node->local.inlinable)
node->local.disregard_inline_limits
= lang_hooks.tree_inlining.disregard_inline_limits (decl);
current_function_decl = NULL;
}
+/* Look for externally_visible and used attributes and mark cgraph nodes
+ accordingly.
+
+ We cannot mark the nodes at the point the attributes are processed (in
+ handle_*_attribute) because the copy of the declarations available at that
+ point may not be canonical. For example, in:
+
+ void f();
+ void f() __attribute__((used));
+
+ the declaration we see in handle_used_attribute will be the second
+ declaration -- but the front end will subsequently merge that declaration
+ with the original declaration and discard the second declaration.
+
+ Furthermore, we can't mark these nodes in cgraph_finalize_function because:
+
+ void f() {}
+ void f() __attribute__((externally_visible));
+
+ is valid.
+
+ So, we walk the nodes at the end of the translation unit, applying the
+ attributes at that point. */
+
+static void
+process_function_and_variable_attributes (struct cgraph_node *first,
+ struct varpool_node *first_var)
+{
+ struct cgraph_node *node;
+ struct varpool_node *vnode;
+
+ for (node = cgraph_nodes; node != first; node = node->next)
+ {
+ tree decl = node->decl;
+ if (lookup_attribute ("used", DECL_ATTRIBUTES (decl)))
+ {
+ mark_decl_referenced (decl);
+ if (node->local.finalized)
+ cgraph_mark_needed_node (node);
+ }
+ if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl)))
+ {
+ if (! TREE_PUBLIC (node->decl))
+ warning (OPT_Wattributes,
+ "%J%<externally_visible%> attribute have effect only on public objects",
+ node->decl);
+ else
+ {
+ if (node->local.finalized)
+ cgraph_mark_needed_node (node);
+ node->local.externally_visible = true;
+ }
+ }
+ }
+ for (vnode = varpool_nodes; vnode != first_var; vnode = vnode->next)
+ {
+ tree decl = vnode->decl;
+ if (lookup_attribute ("used", DECL_ATTRIBUTES (decl)))
+ {
+ mark_decl_referenced (decl);
+ if (vnode->finalized)
+ varpool_mark_needed_node (vnode);
+ }
+ if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl)))
+ {
+ if (! TREE_PUBLIC (vnode->decl))
+ warning (OPT_Wattributes,
+ "%J%<externally_visible%> attribute have effect only on public objects",
+ vnode->decl);
+ else
+ {
+ if (vnode->finalized)
+ varpool_mark_needed_node (vnode);
+ vnode->externally_visible = true;
+ }
+ }
+ }
+}
+
/* Analyze the whole compilation unit once it is parsed completely. */
void
cgraph_finalize_compilation_unit (void)
{
- struct cgraph_node *node;
+ struct cgraph_node *node, *next;
/* Keep track of already processed nodes when called multiple times for
intermodule optimization. */
static struct cgraph_node *first_analyzed;
+ struct cgraph_node *first_processed = first_analyzed;
+ static struct varpool_node *first_analyzed_var;
+
+ if (errorcount || sorrycount)
+ return;
finish_aliases_1 ();
{
cgraph_output_pending_asms ();
cgraph_assemble_pending_functions ();
+ varpool_output_debug_info ();
return;
}
if (!quiet_flag)
{
- fprintf (stderr, "\nAnalyzing compilation unit");
+ fprintf (stderr, "\nAnalyzing compilation unit\n");
fflush (stderr);
}
timevar_push (TV_CGRAPH);
- cgraph_varpool_analyze_pending_decls ();
+ process_function_and_variable_attributes (first_processed,
+ first_analyzed_var);
+ first_processed = cgraph_nodes;
+ first_analyzed_var = varpool_nodes;
+ varpool_analyze_pending_decls ();
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Initial entry points:");
if (!edge->callee->reachable)
cgraph_mark_reachable_node (edge->callee);
- cgraph_varpool_analyze_pending_decls ();
+ /* We finalize local static variables during constructing callgraph
+ edges. Process their attributes too. */
+ process_function_and_variable_attributes (first_processed,
+ first_analyzed_var);
+ first_processed = cgraph_nodes;
+ first_analyzed_var = varpool_nodes;
+ varpool_analyze_pending_decls ();
}
/* Collect entry points to the unit. */
-
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Unit entry points:");
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\nReclaiming functions:");
- for (node = cgraph_nodes; node != first_analyzed; node = node->next)
+ for (node = cgraph_nodes; node != first_analyzed; node = next)
{
tree decl = node->decl;
+ next = node->next;
if (node->local.finalized && !DECL_SAVED_TREE (decl))
cgraph_reset_node (node);
union
{
struct cgraph_node *f;
- struct cgraph_varpool_node *v;
+ struct varpool_node *v;
struct cgraph_asm_node *a;
} u;
};
struct cgraph_order_sort *nodes;
int i;
struct cgraph_node *pf;
- struct cgraph_varpool_node *pv;
+ struct varpool_node *pv;
struct cgraph_asm_node *pa;
max = cgraph_order;
nodes = (struct cgraph_order_sort *) alloca (size);
memset (nodes, 0, size);
- cgraph_varpool_analyze_pending_decls ();
+ varpool_analyze_pending_decls ();
for (pf = cgraph_nodes; pf; pf = pf->next)
{
}
}
- for (pv = cgraph_varpool_nodes_queue; pv; pv = pv->next_needed)
+ for (pv = varpool_nodes_queue; pv; pv = pv->next_needed)
{
i = pv->order;
gcc_assert (nodes[i].kind == ORDER_UNDEFINED);
break;
case ORDER_VAR:
- cgraph_varpool_assemble_decl (nodes[i].u.v);
+ varpool_assemble_decl (nodes[i].u.v);
break;
case ORDER_ASM:
cgraph_function_and_variable_visibility (void)
{
struct cgraph_node *node;
- struct cgraph_varpool_node *vnode;
+ struct varpool_node *vnode;
for (node = cgraph_nodes; node; node = node->next)
{
&& !DECL_EXTERNAL (node->decl)
&& !node->local.externally_visible);
}
- for (vnode = cgraph_varpool_nodes_queue; vnode; vnode = vnode->next_needed)
+ for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
{
if (vnode->needed
&& !flag_whole_program
{
struct cgraph_node *node;
if (!cgraph_global_info_ready)
- return (DECL_INLINE (decl) && !flag_really_no_inline);
+ return (flag_really_no_inline
+ ? lang_hooks.tree_inlining.disregard_inline_limits (decl)
+ : DECL_INLINE (decl));
/* Look if there is any clone around. */
for (node = cgraph_node (decl); node; node = node->next_clone)
if (node->global.inlined_to)
void
cgraph_optimize (void)
{
+ if (errorcount || sorrycount)
+ return;
+
#ifdef ENABLE_CHECKING
verify_cgraph ();
#endif
if (!flag_unit_at_a_time)
{
cgraph_output_pending_asms ();
- cgraph_varpool_assemble_pending_decls ();
+ varpool_assemble_pending_decls ();
+ varpool_output_debug_info ();
return;
}
- process_pending_assemble_externals ();
-
/* Frontend may output common variables after the unit has been finalized.
It is safe to deal with them here as they are always zero initialized. */
- cgraph_varpool_analyze_pending_decls ();
+ varpool_analyze_pending_decls ();
timevar_push (TV_CGRAPHOPT);
if (!quiet_flag)
- fprintf (stderr, "Performing intraprocedural optimizations\n");
+ fprintf (stderr, "Performing interprocedural optimizations\n");
cgraph_function_and_variable_visibility ();
if (cgraph_dump_file)
/* This pass remove bodies of extern inline functions we never inlined.
Do this later so other IPA passes see what is really going on. */
cgraph_remove_unreachable_nodes (false, dump_file);
+ cgraph_increase_alignment ();
cgraph_global_info_ready = true;
if (cgraph_dump_file)
{
cgraph_output_pending_asms ();
cgraph_expand_all_functions ();
- cgraph_varpool_remove_unreferenced_decls ();
+ varpool_remove_unreferenced_decls ();
- cgraph_varpool_assemble_pending_decls ();
+ varpool_assemble_pending_decls ();
+ varpool_output_debug_info ();
}
if (cgraph_dump_file)
/* Double check that all inline clones are gone and that all
function bodies have been released from memory. */
if (flag_unit_at_a_time
- && !dump_enabled_p (TDI_tree_all)
&& !(sorrycount || errorcount))
{
struct cgraph_node *node;
#endif
}
+/* Increase alignment of global arrays to improve vectorization potential.
+ TODO:
+ - Consider also structs that have an array field.
+ - Use ipa analysis to prune arrays that can't be vectorized?
+ This should involve global alignment analysis and in the future also
+ array padding. */
+
+static void
+cgraph_increase_alignment (void)
+{
+ if (flag_section_anchors && flag_tree_vectorize)
+ {
+ struct varpool_node *vnode;
+
+ /* Increase the alignment of all global arrays for vectorization. */
+ for (vnode = varpool_nodes_queue;
+ vnode;
+ vnode = vnode->next_needed)
+ {
+ tree vectype, decl = vnode->decl;
+ unsigned int alignment;
+
+ if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+ continue;
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
+ if (!vectype)
+ continue;
+ alignment = TYPE_ALIGN (vectype);
+ if (DECL_ALIGN (decl) >= alignment)
+ continue;
+
+ if (vect_can_force_dr_alignment_p (decl, alignment))
+ {
+ DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
+ DECL_USER_ALIGN (decl) = 1;
+ if (cgraph_dump_file)
+ {
+ fprintf (cgraph_dump_file, "Increasing alignment of decl: ");
+ print_generic_expr (cgraph_dump_file, decl, TDF_SLIM);
+ }
+ }
+ }
+ }
+}
+
/* Generate and emit a static constructor or destructor. WHICH must be
one of 'I' or 'D'. BODY should be a STATEMENT_LIST containing
GENERIC statements. */
tree decl, name, resdecl;
sprintf (which_buf, "%c_%d", which, counter++);
- name = get_file_function_name_long (which_buf);
+ name = get_file_function_name (which_buf);
decl = build_decl (FUNCTION_DECL, name,
build_function_type (void_type_node, void_list_node));
#endif
return first_clone;
}
-