#include "system.h"
#include "coretypes.h"
#include "tm.h"
-#include "toplev.h"
#include "tree.h"
#include "expr.h"
#include "flags.h"
#include "gcov-io.h"
static void output_varpool (cgraph_node_set, varpool_node_set);
-static void output_cgraph_opt_summary (void);
+static void output_cgraph_opt_summary (cgraph_node_set set);
static void input_cgraph_opt_summary (VEC (cgraph_node_ptr, heap) * nodes);
gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE
| ECF_MAY_BE_ALLOCA
| ECF_SIBCALL
+ | ECF_LEAF
| ECF_NOVOPS)));
}
lto_output_bitpack (&bp);
lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
lto_output_sleb128_stream (ob->main_stream, node->count);
+ lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale);
if (tag == LTO_cgraph_analyzed_node)
{
bp_pack_value (&bp, node->alias, 1);
bp_pack_value (&bp, node->finalized_by_frontend, 1);
bp_pack_value (&bp, node->frequency, 2);
+ bp_pack_value (&bp, node->only_called_at_startup, 1);
+ bp_pack_value (&bp, node->only_called_at_exit, 1);
lto_output_bitpack (&bp);
+ lto_output_uleb128_stream (ob->main_stream, node->resolution);
if (node->same_body)
{
lto_output_fn_decl_index (ob->decl_state, ob->main_stream,
alias->thunk.alias);
}
+ lto_output_uleb128_stream (ob->main_stream, alias->resolution);
alias = alias->previous;
}
while (alias);
bp_pack_value (&bp, node->force_output, 1);
bp_pack_value (&bp, node->finalized, 1);
bp_pack_value (&bp, node->alias, 1);
- bp_pack_value (&bp, node->const_value_known, 1);
gcc_assert (!node->alias || !node->extra_name);
gcc_assert (node->finalized || !node->analyzed);
gcc_assert (node->needed);
else
ref = LCC_NOT_FOUND;
lto_output_sleb128_stream (ob->main_stream, ref);
+ lto_output_uleb128_stream (ob->main_stream, node->resolution);
if (count)
{
lto_output_uleb128_stream (ob->main_stream, count);
for (alias = node->extra_name; alias; alias = alias->next)
- lto_output_var_decl_index (ob->decl_state, ob->main_stream, alias->decl);
+ {
+ lto_output_var_decl_index (ob->decl_state, ob->main_stream, alias->decl);
+ lto_output_uleb128_stream (ob->main_stream, alias->resolution);
+ }
}
}
{
if (profile_info)
{
- /* We do not output num, it is not terribly useful. */
+ /* We do not output num, sum_all and run_max, they are not used by
+ GCC profile feedback and they are difficult to merge from multiple
+ units. */
gcc_assert (profile_info->runs);
lto_output_uleb128_stream (ob->main_stream, profile_info->runs);
- lto_output_sleb128_stream (ob->main_stream, profile_info->sum_all);
- lto_output_sleb128_stream (ob->main_stream, profile_info->run_max);
- lto_output_sleb128_stream (ob->main_stream, profile_info->sum_max);
+ lto_output_uleb128_stream (ob->main_stream, profile_info->sum_max);
}
else
lto_output_uleb128_stream (ob->main_stream, 0);
static bool asm_nodes_output = false;
if (flag_wpa)
- output_cgraph_opt_summary ();
+ output_cgraph_opt_summary (set);
ob = lto_create_simple_output_block (LTO_section_cgraph);
unsigned int self_time,
unsigned int time_inlining_benefit,
unsigned int self_size,
- unsigned int size_inlining_benefit)
+ unsigned int size_inlining_benefit,
+ enum ld_plugin_symbol_resolution resolution)
{
node->aux = (void *) tag;
node->local.inline_summary.estimated_self_stack_size = stack_size;
node->alias = bp_unpack_value (bp, 1);
node->finalized_by_frontend = bp_unpack_value (bp, 1);
node->frequency = (enum node_frequency)bp_unpack_value (bp, 2);
+ node->only_called_at_startup = bp_unpack_value (bp, 1);
+ node->only_called_at_exit = bp_unpack_value (bp, 1);
+ node->resolution = resolution;
}
/* Output the part of the cgraph in SET. */
int size_inlining_benefit = 0;
unsigned long same_body_count = 0;
int clone_ref;
+ enum ld_plugin_symbol_resolution resolution;
clone_ref = lto_input_sleb128 (ib);
node = cgraph_node (fn_decl);
node->count = lto_input_sleb128 (ib);
+ node->count_materialization_scale = lto_input_sleb128 (ib);
if (tag == LTO_cgraph_analyzed_node)
{
"node %d", node->uid);
bp = lto_input_bitpack (ib);
+ resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib);
input_overwrite_node (file_data, node, tag, &bp, stack_size, self_time,
time_inlining_benefit, self_size,
- size_inlining_benefit);
+ size_inlining_benefit, resolution);
/* Store a reference for now, and fix up later to be a pointer. */
node->global.inlined_to = (cgraph_node_ptr) (intptr_t) ref;
{
tree alias_decl;
int type;
+ struct cgraph_node *alias;
decl_index = lto_input_uleb128 (ib);
alias_decl = lto_file_decl_data_get_fn_decl (file_data, decl_index);
type = lto_input_uleb128 (ib);
tree real_alias;
decl_index = lto_input_uleb128 (ib);
real_alias = lto_file_decl_data_get_fn_decl (file_data, decl_index);
- cgraph_same_body_alias (alias_decl, real_alias);
+ alias = cgraph_same_body_alias (alias_decl, real_alias);
}
else
{
tree real_alias;
decl_index = lto_input_uleb128 (ib);
real_alias = lto_file_decl_data_get_fn_decl (file_data, decl_index);
- cgraph_add_thunk (alias_decl, fn_decl, type & 2, fixed_offset,
- virtual_value,
- (type & 4) ? size_int (virtual_value) : NULL_TREE,
- real_alias);
+ alias = cgraph_add_thunk (alias_decl, fn_decl, type & 2, fixed_offset,
+ virtual_value,
+ (type & 4) ? size_int (virtual_value) : NULL_TREE,
+ real_alias);
}
+ alias->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib);
}
return node;
}
node->force_output = bp_unpack_value (&bp, 1);
node->finalized = bp_unpack_value (&bp, 1);
node->alias = bp_unpack_value (&bp, 1);
- node->const_value_known = bp_unpack_value (&bp, 1);
node->analyzed = node->finalized;
node->used_from_other_partition = bp_unpack_value (&bp, 1);
node->in_other_partition = bp_unpack_value (&bp, 1);
ref = lto_input_sleb128 (ib);
/* Store a reference for now, and fix up later to be a pointer. */
node->same_comdat_group = (struct varpool_node *) (intptr_t) ref;
+ node->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib);
if (aliases_p)
{
count = lto_input_uleb128 (ib);
{
tree decl = lto_file_decl_data_get_var_decl (file_data,
lto_input_uleb128 (ib));
- varpool_extra_name_alias (decl, var_decl);
+ struct varpool_node *alias;
+ alias = varpool_extra_name_alias (decl, var_decl);
+ alias->resolution = (enum ld_plugin_symbol_resolution)lto_input_uleb128 (ib);
}
}
return node;
/* Input profile_info from IB. */
static void
-input_profile_summary (struct lto_input_block *ib)
+input_profile_summary (struct lto_input_block *ib,
+ struct lto_file_decl_data *file_data)
{
unsigned int runs = lto_input_uleb128 (ib);
if (runs)
{
- if (!profile_info)
- {
- profile_info = <o_gcov_summary;
- lto_gcov_summary.runs = runs;
- lto_gcov_summary.sum_all = lto_input_sleb128 (ib);
- lto_gcov_summary.run_max = lto_input_sleb128 (ib);
- lto_gcov_summary.sum_max = lto_input_sleb128 (ib);
- }
- /* We can support this by scaling all counts to nearest common multiple
- of all different runs, but it is perhaps not worth the effort. */
- else if (profile_info->runs != runs
- || profile_info->sum_all != lto_input_sleb128 (ib)
- || profile_info->run_max != lto_input_sleb128 (ib)
- || profile_info->sum_max != lto_input_sleb128 (ib))
- sorry ("Combining units with different profiles is not supported.");
- /* We allow some units to have profile and other to not have one. This will
- just make unprofiled units to be size optimized that is sane. */
+ file_data->profile_info.runs = runs;
+ file_data->profile_info.sum_max = lto_input_uleb128 (ib);
+ if (runs > file_data->profile_info.sum_max)
+ fatal_error ("Corrupted profile info in %s: sum_max is smaller than runs",
+ file_data->file_name);
}
}
+/* Rescale profile summaries to the same number of runs in the whole unit. */
+
+static void
+merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
+{
+ struct lto_file_decl_data *file_data;
+ unsigned int j;
+ gcov_unsigned_t max_runs = 0;
+ struct cgraph_node *node;
+ struct cgraph_edge *edge;
+
+ /* Find unit with maximal number of runs. If we ever get serious about
+ roundoff errors, we might also consider computing smallest common
+ multiply. */
+ for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
+ if (max_runs < file_data->profile_info.runs)
+ max_runs = file_data->profile_info.runs;
+
+ if (!max_runs)
+ return;
+
+ /* Simple overflow check. We probably don't need to support that many train
+ runs. Such a large value probably imply data corruption anyway. */
+ if (max_runs > INT_MAX / REG_BR_PROB_BASE)
+ {
+ sorry ("At most %i profile runs is supported. Perhaps corrupted profile?",
+ INT_MAX / REG_BR_PROB_BASE);
+ return;
+ }
+
+ profile_info = <o_gcov_summary;
+ lto_gcov_summary.runs = max_runs;
+ lto_gcov_summary.sum_max = 0;
+
+ /* Rescale all units to the maximal number of runs.
+ sum_max can not be easily merged, as we have no idea what files come from
+ the same run. We do not use the info anyway, so leave it 0. */
+ for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
+ if (file_data->profile_info.runs)
+ {
+ int scale = ((REG_BR_PROB_BASE * max_runs
+ + file_data->profile_info.runs / 2)
+ / file_data->profile_info.runs);
+ lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max,
+ (file_data->profile_info.sum_max
+ * scale
+ + REG_BR_PROB_BASE / 2)
+ / REG_BR_PROB_BASE);
+ }
+
+ /* Watch roundoff errors. */
+ if (lto_gcov_summary.sum_max < max_runs)
+ lto_gcov_summary.sum_max = max_runs;
+
+ /* If merging already happent at WPA time, we are done. */
+ if (flag_ltrans)
+ return;
+
+ /* Now compute count_materialization_scale of each node.
+ During LTRANS we already have values of count_materialization_scale
+ computed, so just update them. */
+ for (node = cgraph_nodes; node; node = node->next)
+ if (node->local.lto_file_data->profile_info.runs)
+ {
+ int scale;
+
+ scale =
+ ((node->count_materialization_scale * max_runs
+ + node->local.lto_file_data->profile_info.runs / 2)
+ / node->local.lto_file_data->profile_info.runs);
+ node->count_materialization_scale = scale;
+ if (scale < 0)
+ fatal_error ("Profile information in %s corrupted",
+ file_data->file_name);
+
+ if (scale == REG_BR_PROB_BASE)
+ continue;
+ for (edge = node->callees; edge; edge = edge->next_callee)
+ edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2)
+ / REG_BR_PROB_BASE);
+ node->count = ((node->count * scale + REG_BR_PROB_BASE / 2)
+ / REG_BR_PROB_BASE);
+ }
+}
+
/* Input and merge the cgraph from each of the .o files passed to
lto1. */
ib = lto_create_simple_input_block (file_data, LTO_section_cgraph,
&data, &len);
- input_profile_summary (ib);
+ if (!ib)
+ fatal_error ("cannot find LTO cgraph in %s", file_data->file_name);
+ input_profile_summary (ib, file_data);
file_data->cgraph_node_encoder = lto_cgraph_encoder_new ();
nodes = input_cgraph_1 (file_data, ib);
lto_destroy_simple_input_block (file_data, LTO_section_cgraph,
ib = lto_create_simple_input_block (file_data, LTO_section_varpool,
&data, &len);
+ if (!ib)
+ fatal_error ("cannot find LTO varpool in %s", file_data->file_name);
varpool = input_varpool_1 (file_data, ib);
lto_destroy_simple_input_block (file_data, LTO_section_varpool,
ib, data, len);
ib = lto_create_simple_input_block (file_data, LTO_section_refs,
&data, &len);
+ if (!ib)
+ fatal_error("cannot find LTO section refs in %s", file_data->file_name);
input_refs (ib, nodes, varpool);
lto_destroy_simple_input_block (file_data, LTO_section_refs,
ib, data, len);
VEC_free (cgraph_node_ptr, heap, nodes);
VEC_free (varpool_node_ptr, heap, varpool);
}
+ merge_profile_summaries (file_data_vec);
+
/* Clear out the aux field that was used to store enough state to
tell which nodes should be overwritten. */
/* True when we need optimization summary for NODE. */
static int
-output_cgraph_opt_summary_p (struct cgraph_node *node)
+output_cgraph_opt_summary_p (struct cgraph_node *node, cgraph_node_set set)
{
- if (!node->clone_of)
- return false;
- return (node->clone.tree_map
- || node->clone.args_to_skip
- || node->clone.combined_args_to_skip);
+ struct cgraph_edge *e;
+
+ if (cgraph_node_in_set_p (node, set))
+ {
+ for (e = node->callees; e; e = e->next_callee)
+ if (e->indirect_info
+ && e->indirect_info->thunk_delta != 0)
+ return true;
+
+ for (e = node->indirect_calls; e; e = e->next_callee)
+ if (e->indirect_info->thunk_delta != 0)
+ return true;
+ }
+
+ return (node->clone_of
+ && (node->clone.tree_map
+ || node->clone.args_to_skip
+ || node->clone.combined_args_to_skip));
+}
+
+/* Output optimization summary for EDGE to OB. */
+static void
+output_edge_opt_summary (struct output_block *ob,
+ struct cgraph_edge *edge)
+{
+ if (edge->indirect_info)
+ lto_output_sleb128_stream (ob->main_stream,
+ edge->indirect_info->thunk_delta);
+ else
+ lto_output_sleb128_stream (ob->main_stream, 0);
}
/* Output optimization summary for NODE to OB. */
static void
output_node_opt_summary (struct output_block *ob,
- struct cgraph_node *node)
+ struct cgraph_node *node,
+ cgraph_node_set set)
{
unsigned int index;
bitmap_iterator bi;
struct ipa_replace_map *map;
struct bitpack_d bp;
int i;
+ struct cgraph_edge *e;
lto_output_uleb128_stream (ob->main_stream,
bitmap_count_bits (node->clone.args_to_skip));
bp_pack_value (&bp, map->ref_p, 1);
lto_output_bitpack (&bp);
}
+
+ if (cgraph_node_in_set_p (node, set))
+ {
+ for (e = node->callees; e; e = e->next_callee)
+ output_edge_opt_summary (ob, e);
+ for (e = node->indirect_calls; e; e = e->next_callee)
+ output_edge_opt_summary (ob, e);
+ }
}
/* Output optimization summaries stored in callgraph.
At the moment it is the clone info structure. */
static void
-output_cgraph_opt_summary (void)
+output_cgraph_opt_summary (cgraph_node_set set)
{
struct cgraph_node *node;
int i, n_nodes;
encoder = ob->decl_state->cgraph_node_encoder;
n_nodes = lto_cgraph_encoder_size (encoder);
for (i = 0; i < n_nodes; i++)
- if (output_cgraph_opt_summary_p (lto_cgraph_encoder_deref (encoder, i)))
+ if (output_cgraph_opt_summary_p (lto_cgraph_encoder_deref (encoder, i),
+ set))
count++;
lto_output_uleb128_stream (ob->main_stream, count);
for (i = 0; i < n_nodes; i++)
{
node = lto_cgraph_encoder_deref (encoder, i);
- if (output_cgraph_opt_summary_p (node))
+ if (output_cgraph_opt_summary_p (node, set))
{
lto_output_uleb128_stream (ob->main_stream, i);
- output_node_opt_summary (ob, node);
+ output_node_opt_summary (ob, node, set);
}
}
produce_asm (ob, NULL);
destroy_output_block (ob);
}
-/* Input optimiation summary of NODE. */
+/* Input optimisation summary of EDGE. */
+
+static void
+input_edge_opt_summary (struct cgraph_edge *edge,
+ struct lto_input_block *ib_main)
+{
+ HOST_WIDE_INT thunk_delta;
+ thunk_delta = lto_input_sleb128 (ib_main);
+ if (thunk_delta != 0)
+ {
+ gcc_assert (!edge->indirect_info);
+ edge->indirect_info = cgraph_allocate_init_indirect_info ();
+ edge->indirect_info->thunk_delta = thunk_delta;
+ }
+}
+
+/* Input optimisation summary of NODE. */
static void
input_node_opt_summary (struct cgraph_node *node,
int count;
int bit;
struct bitpack_d bp;
+ struct cgraph_edge *e;
count = lto_input_uleb128 (ib_main);
if (count)
map->replace_p = bp_unpack_value (&bp, 1);
map->ref_p = bp_unpack_value (&bp, 1);
}
+ for (e = node->callees; e; e = e->next_callee)
+ input_edge_opt_summary (e, ib_main);
+ for (e = node->indirect_calls; e; e = e->next_callee)
+ input_edge_opt_summary (e, ib_main);
}
/* Read section in file FILE_DATA of length LEN with data DATA. */
input_node_opt_summary (VEC_index (cgraph_node_ptr, nodes, ref),
&ib_main, data_in);
}
- lto_free_section_data (file_data, LTO_section_jump_functions, NULL, data,
+ lto_free_section_data (file_data, LTO_section_cgraph_opt_sum, NULL, data,
len);
lto_data_in_delete (data_in);
}