return acc && acc->first_child;
}
+/* Return true iff ACC is (partly) covered by at least one replacement. */
+
+static bool
+access_has_replacements_p (struct access *acc)
+{
+ struct access *child;
+ if (acc->grp_to_be_replaced)
+ return true;
+ for (child = acc->first_child; child; child = child->next_sibling)
+ if (access_has_replacements_p (child))
+ return true;
+ return false;
+}
+
/* Return a vector of pointers to accesses for the variable given in BASE or
NULL if there is none. */
{
*msg = "structure field size not fixed";
return true;
- }
+ }
+ if (!host_integerp (bit_position (fld), 0))
+ {
+ *msg = "structure field size too big";
+ return true;
+ }
if (AGGREGATE_TYPE_P (ft)
&& int_bit_position (fld) % BITS_PER_UNIT != 0)
{
disqualify_base_of_expr (tree t, const char *reason)
{
t = get_base_address (t);
- if (sra_mode == SRA_MODE_EARLY_IPA
+ if (t
+ && sra_mode == SRA_MODE_EARLY_IPA
&& TREE_CODE (t) == MEM_REF)
t = get_ssa_base_param (TREE_OPERAND (t, 0));
return false;
}
+/* Return true if EXP is a memory reference less aligned than what the access
+ ACC would require. This is invoked only on strict-alignment targets. */
+
+static bool
+tree_non_aligned_mem_for_access_p (tree exp, struct access *acc)
+{
+ unsigned int acc_align;
+
+ /* The alignment of the access is that of its expression. However, it may
+ have been artificially increased, e.g. by a local alignment promotion,
+ so we cap it to the alignment of the type of the base, on the grounds
+ that valid sub-accesses cannot be more aligned than that. */
+ acc_align = get_object_alignment (acc->expr);
+ if (acc->base && acc_align > TYPE_ALIGN (TREE_TYPE (acc->base)))
+ acc_align = TYPE_ALIGN (TREE_TYPE (acc->base));
+
+ return tree_non_aligned_mem_p (exp, acc_align);
+}
+
/* Scan expressions occuring in STMT, create access structures for all accesses
to candidates for scalarization and remove those candidates which occur in
statements or expressions that prevent them from being split apart. Return
if (lacc)
{
lacc->grp_assignment_write = 1;
- if (STRICT_ALIGNMENT
- && tree_non_aligned_mem_p (rhs, get_object_alignment (lhs)))
+ if (STRICT_ALIGNMENT && tree_non_aligned_mem_for_access_p (rhs, lacc))
lacc->grp_unscalarizable_region = 1;
}
if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
&& !is_gimple_reg_type (racc->type))
bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc->base));
- if (STRICT_ALIGNMENT
- && tree_non_aligned_mem_p (lhs, get_object_alignment (rhs)))
+ if (STRICT_ALIGNMENT && tree_non_aligned_mem_for_access_p (lhs, racc))
racc->grp_unscalarizable_region = 1;
}
tree prev_base = base;
tree off;
HOST_WIDE_INT base_offset;
+ unsigned HOST_WIDE_INT misalign;
+ unsigned int align;
gcc_checking_assert (offset % BITS_PER_UNIT == 0);
base = build_fold_addr_expr (unshare_expr (base));
}
+ /* If prev_base were always an originally performed access
+ we can extract more optimistic alignment information
+ by looking at the access mode. That would constrain the
+ alignment of base + base_offset which we would need to
+ adjust according to offset. */
+ align = get_pointer_alignment_1 (base, &misalign);
+ if (misalign == 0
+ && (TREE_CODE (prev_base) == MEM_REF
+ || TREE_CODE (prev_base) == TARGET_MEM_REF))
+ align = MAX (align, TYPE_ALIGN (TREE_TYPE (prev_base)));
+ misalign += (double_int_sext (tree_to_double_int (off),
+ TYPE_PRECISION (TREE_TYPE (off))).low
+ * BITS_PER_UNIT);
+ misalign = misalign & (align - 1);
+ if (misalign != 0)
+ align = (misalign & -misalign);
+ if (align < TYPE_ALIGN (exp_type))
+ exp_type = build_aligned_type (exp_type, align);
+
return fold_build2_loc (loc, MEM_REF, exp_type, base, off);
}
&& (root->grp_scalar_write || root->grp_assignment_write))))
{
bool new_integer_type;
- if (TREE_CODE (root->type) == ENUMERAL_TYPE)
+ /* Always create access replacements that cover the whole access.
+ For integral types this means the precision has to match.
+ Avoid assumptions based on the integral type kind, too. */
+ if (INTEGRAL_TYPE_P (root->type)
+ && (TREE_CODE (root->type) != INTEGER_TYPE
+ || TYPE_PRECISION (root->type) != root->size)
+ /* But leave bitfield accesses alone. */
+ && (TREE_CODE (root->expr) != COMPONENT_REF
+ || !DECL_BIT_FIELD (TREE_OPERAND (root->expr, 1))))
{
tree rt = root->type;
- root->type = build_nonstandard_integer_type (TYPE_PRECISION (rt),
+ gcc_assert ((root->offset % BITS_PER_UNIT) == 0
+ && (root->size % BITS_PER_UNIT) == 0);
+ root->type = build_nonstandard_integer_type (root->size,
TYPE_UNSIGNED (rt));
+ root->expr = build_ref_for_offset (UNKNOWN_LOCATION,
+ root->base, root->offset,
+ root->type, NULL, false);
new_integer_type = true;
}
else
if (!acc)
return SRA_AM_NONE;
+ if (gimple_clobber_p (*stmt))
+ {
+ /* Remove clobbers of fully scalarized variables, otherwise
+ do nothing. */
+ if (acc->grp_covered)
+ {
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (gsi, true);
+ return SRA_AM_REMOVED;
+ }
+ else
+ return SRA_AM_NONE;
+ }
+
loc = gimple_location (*stmt);
if (VEC_length (constructor_elt,
CONSTRUCTOR_ELTS (gimple_assign_rhs1 (*stmt))) > 0)
force_gimple_rhs = true;
sra_stats.exprs++;
}
+ else if (racc
+ && !racc->grp_unscalarized_data
+ && TREE_CODE (lhs) == SSA_NAME
+ && !access_has_replacements_p (racc))
+ {
+ rhs = get_repl_default_def_ssa_name (racc);
+ modify_this_stmt = true;
+ sra_stats.exprs++;
+ }
if (modify_this_stmt)
{
??? This should move to fold_stmt which we simply should
call after building a VIEW_CONVERT_EXPR here. */
if (AGGREGATE_TYPE_P (TREE_TYPE (lhs))
- && !contains_bitfld_comp_ref_p (lhs)
- && !access_has_children_p (lacc))
+ && !contains_bitfld_comp_ref_p (lhs))
{
lhs = build_ref_for_model (loc, lhs, 0, racc, gsi, false);
gimple_assign_set_lhs (*stmt, lhs);
}
else if (AGGREGATE_TYPE_P (TREE_TYPE (rhs))
- && !contains_vce_or_bfcref_p (rhs)
- && !access_has_children_p (racc))
+ && !contains_vce_or_bfcref_p (rhs))
rhs = build_ref_for_model (loc, rhs, 0, lacc, gsi, false);
if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
gsi, true, true, loc);
sra_stats.separate_lhs_rhs_handling++;
+
+ /* This gimplification must be done after generate_subtree_copies,
+ lest we insert the subtree copies in the middle of the gimplified
+ sequence. */
+ if (force_gimple_rhs)
+ rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ if (gimple_assign_rhs1 (*stmt) != rhs)
+ {
+ modify_this_stmt = true;
+ gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
+ gcc_assert (*stmt == gsi_stmt (orig_gsi));
+ }
+
+ return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
}
else
{
- if (access_has_children_p (lacc) && access_has_children_p (racc))
+ if (access_has_children_p (lacc)
+ && access_has_children_p (racc)
+ /* When an access represents an unscalarizable region, it usually
+ represents accesses with variable offset and thus must not be used
+ to generate new memory accesses. */
+ && !lacc->grp_unscalarizable_region
+ && !racc->grp_unscalarizable_region)
{
gimple_stmt_iterator orig_gsi = *gsi;
enum unscalarized_data_handling refreshed;
}
else
{
- if (racc)
+ if (access_has_children_p (racc)
+ && !racc->grp_unscalarized_data)
{
- if (!racc->grp_to_be_replaced && !racc->grp_unscalarized_data)
+ if (dump_file)
{
- if (dump_file)
- {
- fprintf (dump_file, "Removing load: ");
- print_gimple_stmt (dump_file, *stmt, 0, 0);
- }
-
- if (TREE_CODE (lhs) == SSA_NAME)
- {
- rhs = get_repl_default_def_ssa_name (racc);
- if (!useless_type_conversion_p (TREE_TYPE (lhs),
- TREE_TYPE (rhs)))
- rhs = fold_build1_loc (loc, VIEW_CONVERT_EXPR,
- TREE_TYPE (lhs), rhs);
- }
- else
- {
- if (racc->first_child)
- generate_subtree_copies (racc->first_child, lhs,
- racc->offset, 0, 0, gsi,
- false, false, loc);
-
- gcc_assert (*stmt == gsi_stmt (*gsi));
- unlink_stmt_vdef (*stmt);
- gsi_remove (gsi, true);
- sra_stats.deleted++;
- return SRA_AM_REMOVED;
- }
+ fprintf (dump_file, "Removing load: ");
+ print_gimple_stmt (dump_file, *stmt, 0, 0);
}
- else if (racc->first_child)
- generate_subtree_copies (racc->first_child, lhs, racc->offset,
- 0, 0, gsi, false, true, loc);
+ generate_subtree_copies (racc->first_child, lhs,
+ racc->offset, 0, 0, gsi,
+ false, false, loc);
+ gcc_assert (*stmt == gsi_stmt (*gsi));
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (gsi, true);
+ sra_stats.deleted++;
+ return SRA_AM_REMOVED;
}
+ /* Restore the aggregate RHS from its components so the
+ prevailing aggregate copy does the right thing. */
+ if (access_has_children_p (racc))
+ generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0,
+ gsi, false, false, loc);
+ /* Re-load the components of the aggregate copy destination.
+ But use the RHS aggregate to load from to expose more
+ optimization opportunities. */
if (access_has_children_p (lacc))
generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
0, 0, gsi, true, true, loc);
}
- }
- /* This gimplification must be done after generate_subtree_copies, lest we
- insert the subtree copies in the middle of the gimplified sequence. */
- if (force_gimple_rhs)
- rhs = force_gimple_operand_gsi (&orig_gsi, rhs, true, NULL_TREE,
- true, GSI_SAME_STMT);
- if (gimple_assign_rhs1 (*stmt) != rhs)
- {
- modify_this_stmt = true;
- gimple_assign_set_rhs_from_tree (&orig_gsi, rhs);
- gcc_assert (*stmt == gsi_stmt (orig_gsi));
+ return SRA_AM_NONE;
}
-
- return modify_this_stmt ? SRA_AM_MODIFIED : SRA_AM_NONE;
}
/* Traverse the function body and all modifications as decided in
if (by_ref && repr->non_addressable)
return 0;
+ /* Do not decompose a non-BLKmode param in a way that would
+ create BLKmode params. Especially for by-reference passing
+ (thus, pointer-type param) this is hardly worthwhile. */
+ if (DECL_MODE (parm) != BLKmode
+ && TYPE_MODE (repr->type) == BLKmode)
+ return 0;
+
if (!by_ref || (!repr->grp_maybe_modified
&& !repr->grp_not_necessarilly_dereferenced))
total_size += repr->size;
if (dump_file)
fprintf (dump_file, "Adjusting call (%i -> %i) %s -> %s\n",
cs->caller->uid, cs->callee->uid,
- cgraph_node_name (cs->caller),
- cgraph_node_name (cs->callee));
+ xstrdup (cgraph_node_name (cs->caller)),
+ xstrdup (cgraph_node_name (cs->callee)));
ipa_modify_call_arguments (cs, cs->call_stmt, adjustments);
current_function_decl = NULL_TREE;
new_node = cgraph_function_versioning (node, redirect_callers, NULL, NULL,
- NULL, NULL, "isra");
+ false, NULL, NULL, "isra");
+ VEC_free (cgraph_edge_p, heap, redirect_callers);
+
current_function_decl = new_node->decl;
push_cfun (DECL_STRUCT_FUNCTION (new_node->decl));