X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-sra.c;h=267655d4bdfabc328a5e599023ee36fd28c9086a;hb=e557e5850b3200b0dd00123f4e77ed23a9184aa2;hp=46a3c59e117c188b657a0d5bcc66288361e4a89a;hpb=ae5a4794476d324a0190599d68449fb489c9f14e;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 46a3c59e117..267655d4bdf 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -1,21 +1,21 @@ /* Scalar Replacement of Aggregates (SRA) converts some structure references into scalar references, exposing them to the scalar optimizers. - Copyright (C) 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. Contributed by Diego Novillo This file is part of GCC. - + GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA @@ -44,27 +44,37 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "timevar.h" #include "flags.h" #include "bitmap.h" +#include "obstack.h" +#include "target.h" +/* expr.h is needed for MOVE_RATIO. */ +#include "expr.h" +#include "params.h" + + +/* This object of this pass is to replace a non-addressable aggregate with a + set of independent variables. Most of the time, all of these variables + will be scalars. But a secondary objective is to break up larger + aggregates into smaller aggregates. In the process we may find that some + bits of the larger aggregate can be deleted as unreferenced. + + This substitution is done globally. More localized substitutions would + be the purvey of a load-store motion pass. + + The optimization proceeds in phases: + + (1) Identify variables that have types that are candidates for + decomposition. + (2) Scan the function looking for the ways these variables are used. + In particular we're interested in the number of times a variable + (or member) is needed as a complete unit, and the number of times + a variable (or member) is copied. -/* Maximum number of fields that a structure should have to be scalarized. - FIXME This limit has been arbitrarily set to 5. Experiment to find a - sensible setting. */ -#define MAX_NFIELDS_FOR_SRA 5 + (3) Based on the usage profile, instantiate substitution variables. -/* Codes indicating how to copy one structure into another. */ -enum sra_copy_mode { SCALAR_SCALAR, FIELD_SCALAR, SCALAR_FIELD }; + (4) Scan the function making replacements. +*/ -/* Local functions. */ -static inline bool can_be_scalarized_p (tree); -static inline void insert_edge_copies (tree stmt, basic_block bb); -static tree create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode); -static inline void scalarize_component_ref (tree, tree *tp); -static void scalarize_structures (void); -static void scalarize_stmt (block_stmt_iterator *); -static void scalarize_modify_expr (block_stmt_iterator *); -static void scalarize_call_expr (block_stmt_iterator *); -static void scalarize_asm_expr (block_stmt_iterator *); -static void scalarize_return_expr (block_stmt_iterator *); /* The set of aggregate variables that are candidates for scalarization. */ static bitmap sra_candidates; @@ -73,65 +83,70 @@ static bitmap sra_candidates; beginning of the function. */ static bitmap needs_copy_in; -/* This structure holds the mapping between and element of an aggregate - and the scalar replacement variable. */ +/* Sets of bit pairs that cache type decomposition and instantiation. */ +static bitmap sra_type_decomp_cache; +static bitmap sra_type_inst_cache; + +/* One of these structures is created for each candidate aggregate + and each (accessed) member of such an aggregate. */ struct sra_elt { - enum tree_code kind; - tree base; - tree field; - tree replace; -}; - -static htab_t sra_map; + /* A tree of the elements. Used when we want to traverse everything. */ + struct sra_elt *parent; + struct sra_elt *children; + struct sra_elt *sibling; -static hashval_t -sra_elt_hash (const void *x) -{ - const struct sra_elt *e = x; - hashval_t h = (size_t) e->base * e->kind; - if (e->kind == COMPONENT_REF) - h ^= (size_t) e->field; - return h; -} + /* If this element is a root, then this is the VAR_DECL. If this is + a sub-element, this is some token used to identify the reference. + In the case of COMPONENT_REF, this is the FIELD_DECL. In the case + of an ARRAY_REF, this is the (constant) index. In the case of a + complex number, this is a zero or one. */ + tree element; -static int -sra_elt_eq (const void *x, const void *y) -{ - const struct sra_elt *a = x; - const struct sra_elt *b = y; + /* The type of the element. */ + tree type; - if (a->kind != b->kind) - return false; - if (a->base != b->base) - return false; - if (a->kind == COMPONENT_REF) - if (a->field != b->field) - return false; + /* A VAR_DECL, for any sub-element we've decided to replace. */ + tree replacement; - return true; -} + /* The number of times the element is referenced as a whole. I.e. + given "a.b.c", this would be incremented for C, but not for A or B. */ + unsigned int n_uses; -/* Mark all the variables in VDEF operands for STMT for renaming. - This becomes necessary when we modify all of a non-scalar. */ + /* The number of times the element is copied to or from another + scalarizable element. */ + unsigned int n_copies; -static void -mark_all_vdefs (tree stmt) -{ - vdef_optype vdefs; - size_t i, n; + /* True if TYPE is scalar. */ + bool is_scalar; - get_stmt_operands (stmt); - vdefs = VDEF_OPS (stmt_ann (stmt)); - n = NUM_VDEFS (vdefs); + /* True if we saw something about this element that prevents scalarization, + such as non-constant indexing. */ + bool cannot_scalarize; - for (i = 0; i < n; i++) - { - tree sym = VDEF_RESULT (vdefs, i); - bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); - } -} + /* True if we've decided that structure-to-structure assignment + should happen via memcpy and not per-element. */ + bool use_block_copy; + + /* A flag for use with/after random access traversals. */ + bool visited; +}; + +/* Random access to the child of a parent is performed by hashing. + This prevents quadratic behavior, and allows SRA to function + reasonably on larger records. */ +static htab_t sra_map; + +/* All structures are allocated out of the following obstack. */ +static struct obstack sra_obstack; + +/* Debugging functions. */ +static void dump_sra_elt_name (FILE *, struct sra_elt *); +extern void debug_sra_elt_name (struct sra_elt *); +/* Forward declarations. */ +static tree generate_element_ref (struct sra_elt *); + /* Return true if DECL is an SRA candidate. */ static bool @@ -140,143 +155,103 @@ is_sra_candidate_decl (tree decl) return DECL_P (decl) && bitmap_bit_p (sra_candidates, var_ann (decl)->uid); } -/* Return true if EXP is of the form , where REF is one of the - field access references we handle and DECL is an SRA candidate. - - Set ALLOW_BIT_FIELD_REF to accept BIT_FIELD_REF as well. This is - normally false, except when we're trying to work around it. */ +/* Return true if TYPE is a scalar type. */ static bool -is_sra_candidate_ref (tree exp, bool allow_bit_field_ref) +is_sra_scalar_type (tree type) { - switch (TREE_CODE (exp)) - { - case BIT_FIELD_REF: - if (!allow_bit_field_ref) - break; - /* FALLTHRU */ + enum tree_code code = TREE_CODE (type); + return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE + || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE + || code == CHAR_TYPE || code == POINTER_TYPE || code == OFFSET_TYPE + || code == REFERENCE_TYPE); +} - case COMPONENT_REF: - case REALPART_EXPR: - case IMAGPART_EXPR: - return is_sra_candidate_decl (TREE_OPERAND (exp, 0)); +/* Return true if TYPE can be decomposed into a set of independent variables. - default: - break; - } + Note that this doesn't imply that all elements of TYPE can be + instantiated, just that if we decide to break up the type into + separate pieces that it can be done. */ - return false; -} +static bool +type_can_be_decomposed_p (tree type) +{ + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree t; -/* Return the scalar in SRA_MAP[VAR_IX][FIELD_IX]. If none exists, create - a new scalar with type TYPE. */ + /* Avoid searching the same type twice. */ + if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) + return false; -static tree -lookup_scalar (struct sra_elt *key, tree type) -{ - struct sra_elt **slot, *res; + /* The type must have a definite nonzero size. */ + if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || integer_zerop (TYPE_SIZE (type))) + goto fail; - slot = (struct sra_elt **) htab_find_slot (sra_map, key, INSERT); - res = *slot; - if (!res) + /* The type must be a non-union aggregate. */ + switch (TREE_CODE (type)) { - res = xmalloc (sizeof (*res)); - *slot = res; - *res = *key; - res->replace = make_rename_temp (type, "SR"); + case RECORD_TYPE: + { + bool saw_one_field = false; - if (DECL_NAME (key->base) && !DECL_IGNORED_P (key->base)) - { - char *name = NULL; - switch (key->kind) - { - case COMPONENT_REF: - if (!DECL_NAME (key->field)) - break; - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$", - IDENTIFIER_POINTER (DECL_NAME (key->field)), - NULL); - break; - case REALPART_EXPR: - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$real", NULL); - break; - case IMAGPART_EXPR: - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$imag", NULL); - break; - default: - abort (); - } - if (name) + for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) + if (TREE_CODE (t) == FIELD_DECL) { - DECL_NAME (res->replace) = get_identifier (name); - free (name); - } - } - - DECL_SOURCE_LOCATION (res->replace) = DECL_SOURCE_LOCATION (key->base); - TREE_NO_WARNING (res->replace) = TREE_NO_WARNING (key->base); - DECL_ARTIFICIAL (res->replace) = DECL_ARTIFICIAL (key->base); - } - - return res->replace; -} - + /* Reject incorrectly represented bit fields. */ + if (DECL_BIT_FIELD (t) + && (tree_low_cst (DECL_SIZE (t), 1) + != TYPE_PRECISION (TREE_TYPE (t)))) + goto fail; -/* Given a structure reference VAR.FIELD, return a scalar representing it. - If no scalar is found, a new one is created and added to the SRA_MAP - matrix. */ - -static tree -get_scalar_for_field (tree var, tree field) -{ - struct sra_elt key; - -#ifdef ENABLE_CHECKING - /* Validate that FIELD actually exists in VAR's type. */ - { - tree f; - for (f = TYPE_FIELDS (TREE_TYPE (var)); f ; f = TREE_CHAIN (f)) - if (f == field) - goto found; - abort (); - found:; - } -#endif - - key.kind = COMPONENT_REF; - key.base = var; - key.field = field; + saw_one_field = true; + } - return lookup_scalar (&key, TREE_TYPE (field)); -} + /* Record types must have at least one field. */ + if (!saw_one_field) + goto fail; + } + break; + case ARRAY_TYPE: + /* Array types must have a fixed lower and upper bound. */ + t = TYPE_DOMAIN (type); + if (t == NULL) + goto fail; + if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) + goto fail; + if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) + goto fail; + break; -/* Similarly for the parts of a complex type. */ + case COMPLEX_TYPE: + break; -static tree -get_scalar_for_complex_part (tree var, enum tree_code part) -{ - struct sra_elt key; + default: + goto fail; + } - key.kind = part; - key.base = var; + bitmap_set_bit (sra_type_decomp_cache, cache+0); + return true; - return lookup_scalar (&key, TREE_TYPE (TREE_TYPE (var))); + fail: + bitmap_set_bit (sra_type_decomp_cache, cache+1); + return false; } -/* Return true if the fields of VAR can be replaced by scalar temporaries. - This only happens if VAR is not call-clobbered and it contains less - than MAX_NFIELDS_FOR_SRA scalar fields. */ +/* Return true if DECL can be decomposed into a set of independent + (though not necessarily scalar) variables. */ -static inline bool -can_be_scalarized_p (tree var) +static bool +decl_can_be_decomposed_p (tree var) { - tree field, type; - int nfields; + /* Early out for scalars. */ + if (is_sra_scalar_type (TREE_TYPE (var))) + return false; + /* The variable must not be aliased. */ if (!is_gimple_non_addressable (var)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -288,6 +263,7 @@ can_be_scalarized_p (tree var) return false; } + /* The variable must not be volatile. */ if (TREE_THIS_VOLATILE (var)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -299,863 +275,1838 @@ can_be_scalarized_p (tree var) return false; } - /* Any COMPLEX_TYPE that has reached this point can be scalarized. */ - if (TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE) + /* We must be able to decompose the variable's type. */ + if (!type_can_be_decomposed_p (TREE_TYPE (var))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cannot scalarize variable "); + print_generic_expr (dump_file, var, dump_flags); + fprintf (dump_file, " because its type cannot be decomposed\n"); + } + return false; + } + + return true; +} + +/* Return true if TYPE can be *completely* decomposed into scalars. */ + +static bool +type_can_instantiate_all_elements (tree type) +{ + if (is_sra_scalar_type (type)) return true; + if (!type_can_be_decomposed_p (type)) + return false; - type = TREE_TYPE (var); - nfields = 0; - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + switch (TREE_CODE (type)) { - if (TREE_CODE (field) != FIELD_DECL) - continue; + case RECORD_TYPE: + { + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree f; - /* FIXME: We really should recurse down the type hierarchy and - scalarize the fields at the leaves. */ - if (AGGREGATE_TYPE_P (TREE_TYPE (field))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains an aggregate type field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); - } + if (bitmap_bit_p (sra_type_inst_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_inst_cache, cache+1)) return false; - } - /* FIXME: Similarly. Indeed, considering that we treat complex - as an aggregate, this is exactly the same problem. - Structures with __complex__ fields are tested in the libstdc++ - testsuite: 26_numerics/complex_inserters_extractors.cc. */ - if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) - { - if (dump_file && (dump_flags & TDF_DETAILS)) + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains a __complex__ field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); + if (!type_can_instantiate_all_elements (TREE_TYPE (f))) + { + bitmap_set_bit (sra_type_inst_cache, cache+1); + return false; + } } - return false; - } - /* FIXME. We don't scalarize structures with bit fields yet. To - support this, we should make sure that all the fields fit in one - word and modify every operation done on the scalarized bit fields - to mask them properly. */ - if (DECL_BIT_FIELD (field)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains a bit-field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); - } - return false; - } + bitmap_set_bit (sra_type_inst_cache, cache+0); + return true; + } - nfields++; - if (nfields > MAX_NFIELDS_FOR_SRA) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains more than %d fields\n", - MAX_NFIELDS_FOR_SRA); - } - return false; - } - } + case ARRAY_TYPE: + return type_can_instantiate_all_elements (TREE_TYPE (type)); - /* If the structure had no FIELD_DECLs, then don't bother - scalarizing it. */ - return nfields > 0; -} + case COMPLEX_TYPE: + return true; + default: + gcc_unreachable (); + } +} -/* Replace the COMPONENT_REF, REALPART_EXPR or IMAGPART_EXPR pointed-to by - TP inside STMT with the corresponding scalar replacement from SRA_MAP. */ +/* Test whether ELT or some sub-element cannot be scalarized. */ -static inline void -scalarize_component_ref (tree stmt, tree *tp) +static bool +can_completely_scalarize_p (struct sra_elt *elt) { - tree t = *tp, obj = TREE_OPERAND (t, 0); + struct sra_elt *c; + + if (elt->cannot_scalarize) + return false; - /* When scalarizing a function argument, we will need to insert copy-in - operations from the original PARM_DECLs. Note that these copy-in - operations may end up being dead, but we won't know until we rename - the new variables into SSA. */ - if (TREE_CODE (obj) == PARM_DECL) - bitmap_set_bit (needs_copy_in, var_ann (obj)->uid); + for (c = elt->children; c ; c = c->sibling) + if (!can_completely_scalarize_p (c)) + return false; + + return true; +} + + +/* A simplified tree hashing algorithm that only handles the types of + trees we expect to find in sra_elt->element. */ + +static hashval_t +sra_hash_tree (tree t) +{ + hashval_t h; switch (TREE_CODE (t)) { - case COMPONENT_REF: - t = get_scalar_for_field (obj, TREE_OPERAND (t, 1)); + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + h = DECL_UID (t); break; - case REALPART_EXPR: - case IMAGPART_EXPR: - t = get_scalar_for_complex_part (obj, TREE_CODE (t)); + + case INTEGER_CST: + h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); + break; + + case FIELD_DECL: + /* We can have types that are compatible, but have different member + lists, so we can't hash fields by ID. Use offsets instead. */ + h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0); + h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h); break; + default: - abort (); + gcc_unreachable (); } - *tp = t; - modify_stmt (stmt); + return h; } +/* Hash function for type SRA_PAIR. */ + +static hashval_t +sra_elt_hash (const void *x) +{ + const struct sra_elt *e = x; + const struct sra_elt *p; + hashval_t h; -/* Scalarize the structure assignment for the statement pointed by SI_P. */ + h = sra_hash_tree (e->element); -static void -scalarize_structure_assignment (block_stmt_iterator *si_p) + /* Take into account everything back up the chain. Given that chain + lengths are rarely very long, this should be acceptable. If we + truly identify this as a performance problem, it should work to + hash the pointer value "e->parent". */ + for (p = e->parent; p ; p = p->parent) + h = (h * 65521) ^ sra_hash_tree (p->element); + + return h; +} + +/* Equality function for type SRA_PAIR. */ + +static int +sra_elt_eq (const void *x, const void *y) { - var_ann_t lhs_ann, rhs_ann; - tree lhs, rhs, list, orig_stmt; - bool lhs_can, rhs_can; - - orig_stmt = bsi_stmt (*si_p); - lhs = TREE_OPERAND (orig_stmt, 0); - rhs = TREE_OPERAND (orig_stmt, 1); - list = NULL_TREE; - -#if defined ENABLE_CHECKING - if (TREE_CODE (orig_stmt) != MODIFY_EXPR) - abort (); -#endif + const struct sra_elt *a = x; + const struct sra_elt *b = y; + tree ae, be; - /* Remove all type casts from RHS. This may seem heavy handed but - it's actually safe and it is necessary in the presence of C++ - reinterpret_cast<> where structure assignments of different - structures will be present in the IL. This was the case of PR - 13347 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13347) which - had something like this: - - struct A f; - struct B g; - f = (struct A)g; - - Both 'f' and 'g' were scalarizable, but the presence of the type - cast was causing SRA to not replace the RHS of the assignment - with g's scalar replacements. Furthermore, the fact that this - assignment reached this point without causing syntax errors means - that the type cast is safe and that a field-by-field assignment - from 'g' into 'f' is the right thing to do. */ - STRIP_NOPS (rhs); - - lhs_ann = DECL_P (lhs) ? var_ann (lhs) : NULL; - rhs_ann = DECL_P (rhs) ? var_ann (rhs) : NULL; - -#if defined ENABLE_CHECKING - /* Two different variables should not have the same UID. */ - if (lhs_ann - && rhs_ann - && lhs != rhs - && lhs_ann->uid == rhs_ann->uid) - abort (); -#endif + if (a->parent != b->parent) + return false; - lhs_can = lhs_ann && bitmap_bit_p (sra_candidates, lhs_ann->uid); - rhs_can = rhs_ann && bitmap_bit_p (sra_candidates, rhs_ann->uid); + ae = a->element; + be = b->element; - /* Both LHS and RHS are scalarizable. */ - if (lhs_can && rhs_can) - list = create_scalar_copies (lhs, rhs, SCALAR_SCALAR); + if (ae == be) + return true; + if (TREE_CODE (ae) != TREE_CODE (be)) + return false; - /* Only RHS is scalarizable. */ - else if (rhs_can) - list = create_scalar_copies (lhs, rhs, FIELD_SCALAR); + switch (TREE_CODE (ae)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + /* These are all pointer unique. */ + return false; - /* Only LHS is scalarizable. */ - else if (lhs_can) - list = create_scalar_copies (lhs, rhs, SCALAR_FIELD); + case INTEGER_CST: + /* Integers are not pointer unique, so compare their values. */ + return tree_int_cst_equal (ae, be); - /* If neither side is scalarizable, do nothing else. */ - else - return; + case FIELD_DECL: + /* Fields are unique within a record, but not between + compatible records. */ + if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be)) + return false; + return fields_compatible_p (ae, be); - /* Set line number information for our replacements. */ - if (EXPR_HAS_LOCATION (orig_stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (orig_stmt)); - - /* Replace the existing statement with the newly created list of - scalarized copies. When replacing the original statement, the first - copy replaces it and the remaining copies are inserted either after - the first copy or on the outgoing edges of the original statement's - block. */ - { - tree_stmt_iterator tsi = tsi_start (list); - bsi_replace (si_p, tsi_stmt (tsi), true); - tsi_delink (&tsi); - if (stmt_ends_bb_p (orig_stmt)) - insert_edge_copies (list, bb_for_stmt (orig_stmt)); - else - bsi_insert_after (si_p, list, BSI_CONTINUE_LINKING); - } + default: + gcc_unreachable (); + } } +/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT + may be null, in which case CHILD must be a DECL. */ -/* Traverse all the referenced variables in the program looking for - structures that could be replaced with scalars. */ - -static bool -find_candidates_for_sra (void) +static struct sra_elt * +lookup_element (struct sra_elt *parent, tree child, tree type, + enum insert_option insert) { - size_t i; - bool any_set = false; + struct sra_elt dummy; + struct sra_elt **slot; + struct sra_elt *elt; - for (i = 0; i < num_referenced_vars; i++) + dummy.parent = parent; + dummy.element = child; + + slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); + if (!slot && insert == NO_INSERT) + return NULL; + + elt = *slot; + if (!elt && insert == INSERT) { - tree var = referenced_var (i); + *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt)); + memset (elt, 0, sizeof (*elt)); + + elt->parent = parent; + elt->element = child; + elt->type = type; + elt->is_scalar = is_sra_scalar_type (type); - if ((TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE - || TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE) - && can_be_scalarized_p (var)) + if (parent) { - bitmap_set_bit (sra_candidates, var_ann (var)->uid); - any_set = true; + elt->sibling = parent->children; + parent->children = elt; + } + + /* If this is a parameter, then if we want to scalarize, we have + one copy from the true function parameter. Count it now. */ + if (TREE_CODE (child) == PARM_DECL) + { + elt->n_copies = 1; + bitmap_set_bit (needs_copy_in, var_ann (child)->uid); } } - return any_set; + return elt; } +/* Return true if the ARRAY_REF in EXPR is a constant, in bounds access. */ -/* Insert STMT on all the outgoing edges out of BB. Note that if BB - has more than one edge, STMT will be replicated for each edge. Also, - abnormal edges will be ignored. */ - -static inline void -insert_edge_copies (tree stmt, basic_block bb) +static bool +is_valid_const_index (tree expr) { - edge e; - bool first_copy; + tree dom, t, index = TREE_OPERAND (expr, 1); - first_copy = true; - for (e = bb->succ; e; e = e->succ_next) - { - /* We don't need to insert copies on abnormal edges. The - value of the scalar replacement is not guaranteed to - be valid through an abnormal edge. */ - if (!(e->flags & EDGE_ABNORMAL)) - { - if (first_copy) - { - bsi_insert_on_edge (e, stmt); - first_copy = false; - } - else - bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt)); - } - } -} + if (TREE_CODE (index) != INTEGER_CST) + return false; + /* Watch out for stupid user tricks, indexing outside the array. -/* Append a new assignment statement to TSI. */ + Careful, we're not called only on scalarizable types, so do not + assume constant array bounds. We needn't do anything with such + cases, since they'll be referring to objects that we should have + already rejected for scalarization, so returning false is fine. */ -static tree -csc_assign (tree_stmt_iterator *tsi, tree lhs, tree rhs) -{ - tree stmt = build (MODIFY_EXPR, TREE_TYPE (lhs), lhs, rhs); - modify_stmt (stmt); - tsi_link_after (tsi, stmt, TSI_NEW_STMT); - return stmt; + dom = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (expr, 0))); + if (dom == NULL) + return false; + + t = TYPE_MIN_VALUE (dom); + if (!t || TREE_CODE (t) != INTEGER_CST) + return false; + if (tree_int_cst_lt (index, t)) + return false; + + t = TYPE_MAX_VALUE (dom); + if (!t || TREE_CODE (t) != INTEGER_CST) + return false; + if (tree_int_cst_lt (t, index)) + return false; + + return true; } -/* A subroutine of create_scalar_copies. Construct a COMPONENT_REF - expression for BASE referencing FIELD. INDEX is the field index. */ +/* Create or return the SRA_ELT structure for EXPR if the expression + refers to a scalarizable variable. */ -static tree -csc_build_component_ref (tree base, tree field) +static struct sra_elt * +maybe_lookup_element_for_expr (tree expr) { - switch (TREE_CODE (base)) + struct sra_elt *elt; + tree child; + + switch (TREE_CODE (expr)) { - case CONSTRUCTOR: - /* Only appears on RHS. The only remaining CONSTRUCTORS for - record types that should remain are empty, and imply that - the entire structure should be zeroed. */ - if (CONSTRUCTOR_ELTS (base)) - abort (); - return convert (TREE_TYPE (field), integer_zero_node); + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + if (is_sra_candidate_decl (expr)) + return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); + return NULL; + + case ARRAY_REF: + /* We can't scalarize variable array indicies. */ + if (is_valid_const_index (expr)) + child = TREE_OPERAND (expr, 1); + else + return NULL; + break; - default: - /* Avoid sharing BASE when building the different COMPONENT_REFs. - We let the first field have the original version. */ - if (field != TYPE_FIELDS (TREE_TYPE (base))) - base = unshare_expr (base); + case COMPONENT_REF: + /* Don't look through unions. */ + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE) + return NULL; + child = TREE_OPERAND (expr, 1); break; - case VAR_DECL: - case PARM_DECL: - /* Special case for the above -- decls are always shared. */ + case REALPART_EXPR: + child = integer_zero_node; break; + case IMAGPART_EXPR: + child = integer_one_node; + break; + + default: + return NULL; } - return build (COMPONENT_REF, TREE_TYPE (field), base, field); + elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); + if (elt) + return lookup_element (elt, child, TREE_TYPE (expr), INSERT); + return NULL; } -/* Similarly for REALPART_EXPR and IMAGPART_EXPR for complex types. */ + +/* Functions to walk just enough of the tree to see all scalarizable + references, and categorize them. */ -static tree -csc_build_complex_part (tree base, enum tree_code part) +/* A set of callbacks for phases 2 and 4. They'll be invoked for the + various kinds of references seen. In all cases, *BSI is an iterator + pointing to the statement being processed. */ +struct sra_walk_fns { - switch (TREE_CODE (base)) - { - case COMPLEX_CST: - if (part == REALPART_EXPR) - return TREE_REALPART (base); - else - return TREE_IMAGPART (base); + /* Invoked when ELT is required as a unit. Note that ELT might refer to + a leaf node, in which case this is a simple scalar reference. *EXPR_P + points to the location of the expression. IS_OUTPUT is true if this + is a left-hand-side reference. */ + void (*use) (struct sra_elt *elt, tree *expr_p, + block_stmt_iterator *bsi, bool is_output); + + /* Invoked when we have a copy between two scalarizable references. */ + void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi); + + /* Invoked when ELT is initialized from a constant. VALUE may be NULL, + in which case it should be treated as an empty CONSTRUCTOR. */ + void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi); + + /* Invoked when we have a copy between one scalarizable reference ELT + and one non-scalarizable reference OTHER. IS_OUTPUT is true if ELT + is on the left-hand side. */ + void (*ldst) (struct sra_elt *elt, tree other, + block_stmt_iterator *bsi, bool is_output); + + /* True during phase 2, false during phase 4. */ + /* ??? This is a hack. */ + bool initial_scan; +}; - case COMPLEX_EXPR: - if (part == REALPART_EXPR) - return TREE_OPERAND (base, 0); - else - return TREE_OPERAND (base, 1); +#ifdef ENABLE_CHECKING +/* Invoked via walk_tree, if *TP contains a candidate decl, return it. */ - default: - /* Avoid sharing BASE when building the different references. - We let the real part have the original version. */ - if (part != REALPART_EXPR) - base = unshare_expr (base); - break; +static tree +sra_find_candidate_decl (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + tree t = *tp; + enum tree_code code = TREE_CODE (t); - case VAR_DECL: - case PARM_DECL: - /* Special case for the above -- decls are always shared. */ - break; + if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) + { + *walk_subtrees = 0; + if (is_sra_candidate_decl (t)) + return t; } + else if (TYPE_P (t)) + *walk_subtrees = 0; - return build1 (part, TREE_TYPE (TREE_TYPE (base)), base); + return NULL; } +#endif -/* Create and return a list of assignments to perform a scalarized - structure assignment 'LHS = RHS'. Both LHS and RHS are assumed to be - of an aggregate or complex type. Three types of copies may be specified: +/* Walk most expressions looking for a scalarizable aggregate. + If we find one, invoke FNS->USE. */ - SCALAR_SCALAR will emit assignments for all the scalar temporaries - corresponding to the fields of LHS and RHS. +static void +sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output, + const struct sra_walk_fns *fns) +{ + tree expr = *expr_p; + tree inner = expr; + bool disable_scalarization = false; + + /* We're looking to collect a reference expression between EXPR and INNER, + such that INNER is a scalarizable decl and all other nodes through EXPR + are references that we can scalarize. If we come across something that + we can't scalarize, we reset EXPR. This has the effect of making it + appear that we're referring to the larger expression as a whole. */ + + while (1) + switch (TREE_CODE (inner)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + /* If there is a scalarizable decl at the bottom, then process it. */ + if (is_sra_candidate_decl (inner)) + { + struct sra_elt *elt = maybe_lookup_element_for_expr (expr); + if (disable_scalarization) + elt->cannot_scalarize = true; + else + fns->use (elt, expr_p, bsi, is_output); + } + return; + + case ARRAY_REF: + /* Non-constant index means any member may be accessed. Prevent the + expression from being scalarized. If we were to treat this as a + reference to the whole array, we can wind up with a single dynamic + index reference inside a loop being overridden by several constant + index references during loop setup. It's possible that this could + be avoided by using dynamic usage counts based on BB trip counts + (based on loop analysis or profiling), but that hardly seems worth + the effort. */ + /* ??? Hack. Figure out how to push this into the scan routines + without duplicating too much code. */ + if (!is_valid_const_index (inner)) + { + disable_scalarization = true; + goto use_all; + } + /* ??? Are we assured that non-constant bounds and stride will have + the same value everywhere? I don't think Fortran will... */ + if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; - FIELD_SCALAR will emit assignments from the scalar replacements of - RHS into each of the fields of the LHS. + case COMPONENT_REF: + /* A reference to a union member constitutes a reference to the + entire union. */ + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE) + goto use_all; + /* ??? See above re non-constant stride. */ + if (TREE_OPERAND (inner, 2)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; - SCALAR_FIELD will emit assignments from each field of the RHS into - the scalar replacements of the LHS. */ + case REALPART_EXPR: + case IMAGPART_EXPR: + inner = TREE_OPERAND (inner, 0); + break; -static tree -create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode) -{ - tree type, list; - tree_stmt_iterator tsi; - -#if defined ENABLE_CHECKING - /* Sanity checking. Check that we are not trying to scalarize a - non-decl. */ - if (!DECL_P (lhs) && (mode == SCALAR_FIELD || mode == SCALAR_SCALAR)) - abort (); - if (!DECL_P (rhs) && (mode == FIELD_SCALAR || mode == SCALAR_SCALAR)) - abort (); + case BIT_FIELD_REF: + /* A bit field reference (access to *multiple* fields simultaneously) + is not currently scalarized. Consider this an access to the + complete outer element, to which walk_tree will bring us next. */ + goto use_all; + + case ARRAY_RANGE_REF: + /* Similarly, an subrange reference is used to modify indexing. Which + means that the canonical element names that we have won't work. */ + goto use_all; + + case VIEW_CONVERT_EXPR: + case NOP_EXPR: + /* Similarly, a view/nop explicitly wants to look at an object in a + type other than the one we've scalarized. */ + goto use_all; + + case WITH_SIZE_EXPR: + /* This is a transparent wrapper. The entire inner expression really + is being used. */ + goto use_all; + + use_all: + expr_p = &TREE_OPERAND (inner, 0); + inner = expr = *expr_p; + break; + + default: +#ifdef ENABLE_CHECKING + /* Validate that we're not missing any references. */ + gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)); #endif + return; + } +} - type = TREE_TYPE (lhs); - list = alloc_stmt_list (); - tsi = tsi_start (list); +/* Walk a TREE_LIST of values looking for scalarizable aggregates. + If we find one, invoke FNS->USE. */ - /* VA_ARG_EXPRs have side effects, so we need to copy it first to a - temporary before scalarizing. FIXME: This should disappear once - VA_ARG_EXPRs are properly lowered. */ - if (TREE_CODE (rhs) == VA_ARG_EXPR) - { - tree stmt, tmp; +static void +sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output, + const struct sra_walk_fns *fns) +{ + tree op; + for (op = list; op ; op = TREE_CHAIN (op)) + sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns); +} + +/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_call_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns); +} + +/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable + aggregates. If we find one, invoke FNS->USE. */ + +static void +sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns); + sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns); +} - /* Add TMP = VA_ARG_EXPR <> */ - tmp = make_rename_temp (TREE_TYPE (rhs), NULL); - stmt = csc_assign (&tsi, tmp, rhs); +/* Walk a MODIFY_EXPR and categorize the assignment appropriately. */ + +static void +sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + struct sra_elt *lhs_elt, *rhs_elt; + tree lhs, rhs; - /* Mark all the variables in VDEF operands for renaming, because - the VA_ARG_EXPR will now be in a different statement. */ - mark_all_vdefs (stmt); + lhs = TREE_OPERAND (expr, 0); + rhs = TREE_OPERAND (expr, 1); + lhs_elt = maybe_lookup_element_for_expr (lhs); + rhs_elt = maybe_lookup_element_for_expr (rhs); - /* Set RHS to be the new temporary TMP. */ - rhs = tmp; + /* If both sides are scalarizable, this is a COPY operation. */ + if (lhs_elt && rhs_elt) + { + fns->copy (lhs_elt, rhs_elt, bsi); + return; } - /* When making *_SCALAR copies from PARM_DECLs, we will need to insert - copy-in operations from the original PARM_DECLs. Note that these - copy-in operations may end up being dead, but we won't know until - we rename the new variables into SSA. */ - if ((mode == SCALAR_SCALAR || mode == FIELD_SCALAR) - && TREE_CODE (rhs) == PARM_DECL) - bitmap_set_bit (needs_copy_in, var_ann (rhs)->uid); + /* If the RHS is scalarizable, handle it. There are only two cases. */ + if (rhs_elt) + { + if (!rhs_elt->is_scalar) + fns->ldst (rhs_elt, lhs, bsi, false); + else + fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false); + } - /* Now create scalar copies for each individual field according to MODE. */ - if (TREE_CODE (type) == COMPLEX_TYPE) + /* If it isn't scalarizable, there may be scalarizable variables within, so + check for a call or else walk the RHS to see if we need to do any + copy-in operations. We need to do it before the LHS is scalarized so + that the statements get inserted in the proper place, before any + copy-out operations. */ + else { - /* Create scalar copies of both the real and imaginary parts. */ - tree real_lhs, real_rhs, imag_lhs, imag_rhs; + tree call = get_call_expr_in (rhs); + if (call) + sra_walk_call_expr (call, bsi, fns); + else + sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns); + } - if (mode == SCALAR_FIELD) - { - real_rhs = csc_build_complex_part (rhs, REALPART_EXPR); - imag_rhs = csc_build_complex_part (rhs, IMAGPART_EXPR); - } + /* Likewise, handle the LHS being scalarizable. We have cases similar + to those above, but also want to handle RHS being constant. */ + if (lhs_elt) + { + /* If this is an assignment from a constant, or constructor, then + we have access to all of the elements individually. Invoke INIT. */ + if (TREE_CODE (rhs) == COMPLEX_EXPR + || TREE_CODE (rhs) == COMPLEX_CST + || TREE_CODE (rhs) == CONSTRUCTOR) + fns->init (lhs_elt, rhs, bsi); + + /* If this is an assignment from read-only memory, treat this as if + we'd been passed the constructor directly. Invoke INIT. */ + else if (TREE_CODE (rhs) == VAR_DECL + && TREE_STATIC (rhs) + && TREE_READONLY (rhs) + && targetm.binds_local_p (rhs)) + fns->init (lhs_elt, DECL_INITIAL (rhs), bsi); + + /* If this is a copy from a non-scalarizable lvalue, invoke LDST. + The lvalue requirement prevents us from trying to directly scalarize + the result of a function call. Which would result in trying to call + the function multiple times, and other evil things. */ + else if (!lhs_elt->is_scalar && is_gimple_addressable (rhs)) + fns->ldst (lhs_elt, rhs, bsi, true); + + /* Otherwise we're being used in some context that requires the + aggregate to be seen as a whole. Invoke USE. */ else + fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true); + } + + /* Similarly to above, LHS_ELT being null only means that the LHS as a + whole is not a scalarizable reference. There may be occurrences of + scalarizable variables within, which implies a USE. */ + else + sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns); +} + +/* Entry point to the walk functions. Search the entire function, + invoking the callbacks in FNS on each of the references to + scalarizable variables. */ + +static void +sra_walk_function (const struct sra_walk_fns *fns) +{ + basic_block bb; + block_stmt_iterator si, ni; + + /* ??? Phase 4 could derive some benefit to walking the function in + dominator tree order. */ + + FOR_EACH_BB (bb) + for (si = bsi_start (bb); !bsi_end_p (si); si = ni) + { + tree stmt, t; + stmt_ann_t ann; + + stmt = bsi_stmt (si); + ann = stmt_ann (stmt); + + ni = si; + bsi_next (&ni); + + /* If the statement has no virtual operands, then it doesn't + make any structure references that we care about. */ + if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0 + && NUM_VUSES (VUSE_OPS (ann)) == 0 + && NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0) + continue; + + switch (TREE_CODE (stmt)) + { + case RETURN_EXPR: + /* If we have "return " then the return value is + already exposed for our pleasure. Walk it as a USE to + force all the components back in place for the return. + + If we have an embedded assignment, then is of + a type that gets returned in registers in this ABI, and + we do not wish to extend their lifetimes. Treat this + as a USE of the variable on the RHS of this assignment. */ + + t = TREE_OPERAND (stmt, 0); + if (TREE_CODE (t) == MODIFY_EXPR) + sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns); + else + sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns); + break; + + case MODIFY_EXPR: + sra_walk_modify_expr (stmt, &si, fns); + break; + case CALL_EXPR: + sra_walk_call_expr (stmt, &si, fns); + break; + case ASM_EXPR: + sra_walk_asm_expr (stmt, &si, fns); + break; + + default: + break; + } + } +} + +/* Phase One: Scan all referenced variables in the program looking for + structures that could be decomposed. */ + +static bool +find_candidates_for_sra (void) +{ + size_t i; + bool any_set = false; + + for (i = 0; i < num_referenced_vars; i++) + { + tree var = referenced_var (i); + if (decl_can_be_decomposed_p (var)) + { + bitmap_set_bit (sra_candidates, var_ann (var)->uid); + any_set = true; + } + } + + return any_set; +} + + +/* Phase Two: Scan all references to scalarizable variables. Count the + number of times they are used or copied respectively. */ + +/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is + considered a copy, because we can decompose the reference such that + the sub-elements needn't be contiguous. */ + +static void +scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED) +{ + elt->n_uses += 1; +} + +static void +scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; + rhs_elt->n_copies += 1; +} + +static void +scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; +} + +static void +scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED) +{ + elt->n_copies += 1; +} + +/* Dump the values we collected during the scanning phase. */ + +static void +scan_dump (struct sra_elt *elt) +{ + struct sra_elt *c; + + dump_sra_elt_name (dump_file, elt); + fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); + + for (c = elt->children; c ; c = c->sibling) + scan_dump (c); +} + +/* Entry point to phase 2. Scan the entire function, building up + scalarization data structures, recording copies and uses. */ + +static void +scan_function (void) +{ + static const struct sra_walk_fns fns = { + scan_use, scan_copy, scan_init, scan_ldst, true + }; + bitmap_iterator bi; + + sra_walk_function (&fns); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned i; + + fputs ("\nScan results:\n", dump_file); + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) { - real_rhs = get_scalar_for_complex_part (rhs, REALPART_EXPR); - imag_rhs = get_scalar_for_complex_part (rhs, IMAGPART_EXPR); + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + scan_dump (elt); } + fputc ('\n', dump_file); + } +} + +/* Phase Three: Make decisions about which variables to scalarize, if any. + All elements to be scalarized have replacement variables made for them. */ - if (mode == FIELD_SCALAR) - { - /* In this case we do not need to create but one statement, - since we can create a new complex value whole. */ +/* A subroutine of build_element_name. Recursively build the element + name on the obstack. */ + +static void +build_element_name_1 (struct sra_elt *elt) +{ + tree t; + char buffer[32]; - if (TREE_CONSTANT (real_rhs) && TREE_CONSTANT (imag_rhs)) - rhs = build_complex (type, real_rhs, imag_rhs); + if (elt->parent) + { + build_element_name_1 (elt->parent); + obstack_1grow (&sra_obstack, '$'); + + if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) + { + if (elt->element == integer_zero_node) + obstack_grow (&sra_obstack, "real", 4); else - rhs = build (COMPLEX_EXPR, type, real_rhs, imag_rhs); - csc_assign (&tsi, lhs, rhs); + obstack_grow (&sra_obstack, "imag", 4); + return; } + } + + t = elt->element; + if (TREE_CODE (t) == INTEGER_CST) + { + /* ??? Eh. Don't bother doing double-wide printing. */ + sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + else + { + tree name = DECL_NAME (t); + if (name) + obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), + IDENTIFIER_LENGTH (name)); else { - real_lhs = get_scalar_for_complex_part (lhs, REALPART_EXPR); - imag_lhs = get_scalar_for_complex_part (lhs, IMAGPART_EXPR); - - csc_assign (&tsi, real_lhs, real_rhs); - csc_assign (&tsi, imag_lhs, imag_rhs); + sprintf (buffer, "D%u", DECL_UID (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); } } +} + +/* Construct a pretty variable name for an element's replacement variable. + The name is built on the obstack. */ + +static char * +build_element_name (struct sra_elt *elt) +{ + build_element_name_1 (elt); + obstack_1grow (&sra_obstack, '\0'); + return obstack_finish (&sra_obstack); +} + +/* Instantiate an element as an independent variable. */ + +static void +instantiate_element (struct sra_elt *elt) +{ + struct sra_elt *base_elt; + tree var, base; + + for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) + continue; + base = base_elt->element; + + elt->replacement = var = make_rename_temp (elt->type, "SR"); + DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); + DECL_ARTIFICIAL (var) = 1; + + if (TREE_THIS_VOLATILE (elt->type)) + { + TREE_THIS_VOLATILE (var) = 1; + TREE_SIDE_EFFECTS (var) = 1; + } + + if (DECL_NAME (base) && !DECL_IGNORED_P (base)) + { + char *pretty_name = build_element_name (elt); + DECL_NAME (var) = get_identifier (pretty_name); + obstack_free (&sra_obstack, pretty_name); + + DECL_DEBUG_EXPR (var) = generate_element_ref (elt); + DECL_DEBUG_EXPR_IS_FROM (var) = 1; + + DECL_IGNORED_P (var) = 0; + TREE_NO_WARNING (var) = TREE_NO_WARNING (base); + } else { - tree lf, rf; + DECL_IGNORED_P (var) = 1; + /* ??? We can't generate any warning that would be meaningful. */ + TREE_NO_WARNING (var) = 1; + } - /* ??? C++ generates copies between different pointer-to-member - structures of different types. To combat this, we must track - the field of both the left and right structures, so that we - index the variables with fields of their own type. */ + if (dump_file) + { + fputs (" ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputs (" -> ", dump_file); + print_generic_expr (dump_file, var, dump_flags); + fputc ('\n', dump_file); + } +} - for (lf = TYPE_FIELDS (type), rf = TYPE_FIELDS (TREE_TYPE (rhs)); - lf; - lf = TREE_CHAIN (lf), rf = TREE_CHAIN (rf)) - { - tree lhs_var, rhs_var; +/* Make one pass across an element tree deciding whether or not it's + profitable to instantiate individual leaf scalars. - /* Only copy FIELD_DECLs. */ - if (TREE_CODE (lf) != FIELD_DECL) - continue; + PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES + fields all the way up the tree. */ - if (mode == FIELD_SCALAR) - lhs_var = csc_build_component_ref (lhs, lf); - else - lhs_var = get_scalar_for_field (lhs, lf); +static void +decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, + unsigned int parent_copies) +{ + if (dump_file && !elt->parent) + { + fputs ("Initial instantiation for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } - if (mode == SCALAR_FIELD) - rhs_var = csc_build_component_ref (rhs, rf); - else - rhs_var = get_scalar_for_field (rhs, rf); + if (elt->cannot_scalarize) + return; + + if (elt->is_scalar) + { + /* The decision is simple: instantiate if we're used more frequently + than the parent needs to be seen as a complete unit. */ + if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) + instantiate_element (elt); + } + else + { + struct sra_elt *c; + unsigned int this_uses = elt->n_uses + parent_uses; + unsigned int this_copies = elt->n_copies + parent_copies; + + for (c = elt->children; c ; c = c->sibling) + decide_instantiation_1 (c, this_uses, this_copies); + } +} + +/* Compute the size and number of all instantiated elements below ELT. + We will only care about this if the size of the complete structure + fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ + +static unsigned int +sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) +{ + if (elt->replacement) + { + *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); + return 1; + } + else + { + struct sra_elt *c; + unsigned int count = 0; + + for (c = elt->children; c ; c = c->sibling) + count += sum_instantiated_sizes (c, sizep); + + return count; + } +} + +/* Instantiate fields in ELT->TYPE that are not currently present as + children of ELT. */ + +static void instantiate_missing_elements (struct sra_elt *elt); + +static void +instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) +{ + struct sra_elt *sub = lookup_element (elt, child, type, INSERT); + if (sub->is_scalar) + { + if (sub->replacement == NULL) + instantiate_element (sub); + } + else + instantiate_missing_elements (sub); +} - csc_assign (&tsi, lhs_var, rhs_var); +static void +instantiate_missing_elements (struct sra_elt *elt) +{ + tree type = elt->type; + + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + tree f; + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + instantiate_missing_elements_1 (elt, f, TREE_TYPE (f)); + break; + } + + case ARRAY_TYPE: + { + tree i, max, subtype; + + i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); + max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + subtype = TREE_TYPE (type); + + while (1) + { + instantiate_missing_elements_1 (elt, i, subtype); + if (tree_int_cst_equal (i, max)) + break; + i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); + } + + break; + } + + case COMPLEX_TYPE: + type = TREE_TYPE (type); + instantiate_missing_elements_1 (elt, integer_zero_node, type); + instantiate_missing_elements_1 (elt, integer_one_node, type); + break; + + default: + gcc_unreachable (); + } +} + +/* Make one pass across an element tree deciding whether to perform block + or element copies. If we decide on element copies, instantiate all + elements. Return true if there are any instantiated sub-elements. */ + +static bool +decide_block_copy (struct sra_elt *elt) +{ + struct sra_elt *c; + bool any_inst; + + /* If scalarization is disabled, respect it. */ + if (elt->cannot_scalarize) + { + elt->use_block_copy = 1; + + if (dump_file) + { + fputs ("Scalarization disabled for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); } + + return false; } - /* All the scalar copies just created will either create new definitions - or remove existing definitions of LHS, so we need to mark it for - renaming. */ - if (TREE_SIDE_EFFECTS (list)) + /* Don't decide if we've no uses. */ + if (elt->n_uses == 0 && elt->n_copies == 0) + ; + + else if (!elt->is_scalar) { - if (mode == SCALAR_FIELD || mode == SCALAR_SCALAR) + tree size_tree = TYPE_SIZE_UNIT (elt->type); + bool use_block_copy = true; + + /* Tradeoffs for COMPLEX types pretty much always make it better + to go ahead and split the components. */ + if (TREE_CODE (elt->type) == COMPLEX_TYPE) + use_block_copy = false; + + /* Don't bother trying to figure out the rest if the structure is + so large we can't do easy arithmetic. This also forces block + copies for variable sized structures. */ + else if (host_integerp (size_tree, 1)) { - /* If the LHS has been scalarized, mark it for renaming. */ - bitmap_set_bit (vars_to_rename, var_ann (lhs)->uid); + unsigned HOST_WIDE_INT full_size, inst_size = 0; + unsigned int inst_count; + unsigned int max_size; + + /* If the sra-max-structure-size parameter is 0, then the + user has not overridden the parameter and we can choose a + sensible default. */ + max_size = SRA_MAX_STRUCTURE_SIZE + ? SRA_MAX_STRUCTURE_SIZE + : MOVE_RATIO * UNITS_PER_WORD; + + full_size = tree_low_cst (size_tree, 1); + + /* ??? What to do here. If there are two fields, and we've only + instantiated one, then instantiating the other is clearly a win. + If there are a large number of fields then the size of the copy + is much more of a factor. */ + + /* If the structure is small, and we've made copies, go ahead + and instantiate, hoping that the copies will go away. */ + if (full_size <= max_size + && elt->n_copies > elt->n_uses) + use_block_copy = false; + else + { + inst_count = sum_instantiated_sizes (elt, &inst_size); + + if (inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO) + use_block_copy = false; + } + + /* In order to avoid block copy, we have to be able to instantiate + all elements of the type. See if this is possible. */ + if (!use_block_copy + && (!can_completely_scalarize_p (elt) + || !type_can_instantiate_all_elements (elt->type))) + use_block_copy = true; } - else if (mode == FIELD_SCALAR) + elt->use_block_copy = use_block_copy; + + if (dump_file) { - /* Otherwise, mark all the symbols in the VDEFs for the last - scalarized statement just created. Since all the statements - introduce the same VDEFs, we only need to check the last one. */ - mark_all_vdefs (tsi_stmt (tsi)); + fprintf (dump_file, "Using %s for ", + use_block_copy ? "block-copy" : "element-copy"); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + if (!use_block_copy) + { + instantiate_missing_elements (elt); + return true; } - else - abort (); } - return list; + any_inst = elt->replacement != NULL; + + for (c = elt->children; c ; c = c->sibling) + any_inst |= decide_block_copy (c); + + return any_inst; } -/* A helper function that creates the copies, updates line info, - and emits the code either before or after BSI. */ +/* Entry point to phase 3. Instantiate scalar replacement variables. */ static void -emit_scalar_copies (block_stmt_iterator *bsi, tree lhs, tree rhs, - enum sra_copy_mode mode) +decide_instantiations (void) { - tree list = create_scalar_copies (lhs, rhs, mode); - tree stmt = bsi_stmt (*bsi); + unsigned int i; + bool cleared_any; + bitmap_head done_head; + bitmap_iterator bi; - if (EXPR_HAS_LOCATION (stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); + /* We cannot clear bits from a bitmap we're iterating over, + so save up all the bits to clear until the end. */ + bitmap_initialize (&done_head, &bitmap_default_obstack); + cleared_any = false; - bsi_insert_before (bsi, list, BSI_SAME_STMT); + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + { + decide_instantiation_1 (elt, 0, 0); + if (!decide_block_copy (elt)) + elt = NULL; + } + if (!elt) + { + bitmap_set_bit (&done_head, i); + cleared_any = true; + } + } + + if (cleared_any) + { + bitmap_and_compl_into (sra_candidates, &done_head); + bitmap_and_compl_into (needs_copy_in, &done_head); + } + bitmap_clear (&done_head); + + if (dump_file) + fputc ('\n', dump_file); } -/* Traverse all the statements in the function replacing references to - scalarizable structures with their corresponding scalar temporaries. */ + +/* Phase Four: Update the function to match the replacements created. */ + +/* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for + renaming. This becomes necessary when we modify all of a non-scalar. */ static void -scalarize_structures (void) +mark_all_v_defs (tree stmt) { - basic_block bb; - block_stmt_iterator si; - size_t i; + tree sym; + ssa_op_iter iter; - FOR_EACH_BB (bb) - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - tree stmt; - stmt_ann_t ann; + get_stmt_operands (stmt); - stmt = bsi_stmt (si); - ann = stmt_ann (stmt); + FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS) + { + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); + } +} - /* If the statement has no virtual operands, then it doesn't make - structure references that we care about. */ - if (NUM_VDEFS (VDEF_OPS (ann)) == 0 - && NUM_VUSES (VUSE_OPS (ann)) == 0) - continue; +/* Build a single level component reference to ELT rooted at BASE. */ - /* Structure references may only appear in certain statements. */ - if (TREE_CODE (stmt) != MODIFY_EXPR - && TREE_CODE (stmt) != CALL_EXPR - && TREE_CODE (stmt) != RETURN_EXPR - && TREE_CODE (stmt) != ASM_EXPR) - continue; +static tree +generate_one_element_ref (struct sra_elt *elt, tree base) +{ + switch (TREE_CODE (TREE_TYPE (base))) + { + case RECORD_TYPE: + { + tree field = elt->element; - scalarize_stmt (&si); + /* Watch out for compatible records with differing field lists. */ + if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base))) + field = find_compatible_field (TREE_TYPE (base), field); + + return build (COMPONENT_REF, elt->type, base, field, NULL); } - /* Initialize the scalar replacements for every structure that is a - function argument. */ - EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, - { - tree var = referenced_var (i); - tree list = create_scalar_copies (var, var, SCALAR_FIELD); - bsi_insert_on_edge (ENTRY_BLOCK_PTR->succ, list); - }); + case ARRAY_TYPE: + return build (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); + + case COMPLEX_TYPE: + if (elt->element == integer_zero_node) + return build (REALPART_EXPR, elt->type, base); + else + return build (IMAGPART_EXPR, elt->type, base); - /* Commit edge insertions. */ - bsi_commit_edge_inserts (NULL); + default: + gcc_unreachable (); + } } +/* Build a full component reference to ELT rooted at its native variable. */ + +static tree +generate_element_ref (struct sra_elt *elt) +{ + if (elt->parent) + return generate_one_element_ref (elt, generate_element_ref (elt->parent)); + else + return elt->element; +} -/* Scalarize structure references in the statement pointed by SI_P. */ +/* Generate a set of assignment statements in *LIST_P to copy all + instantiated elements under ELT to or from the equivalent structure + rooted at EXPR. COPY_OUT controls the direction of the copy, with + true meaning to copy out of EXPR into ELT. */ static void -scalarize_stmt (block_stmt_iterator *si_p) +generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, + tree *list_p) { - tree stmt = bsi_stmt (*si_p); - - /* Handle assignments. */ - if (TREE_CODE (stmt) == MODIFY_EXPR - && TREE_CODE (TREE_OPERAND (stmt, 1)) != CALL_EXPR) - scalarize_modify_expr (si_p); - - /* Handle RETURN_EXPR. */ - else if (TREE_CODE (stmt) == RETURN_EXPR) - scalarize_return_expr (si_p); - - /* Handle function calls (note that this must be handled after - MODIFY_EXPR and RETURN_EXPR because a function call can appear in - both). */ - else if (get_call_expr_in (stmt) != NULL_TREE) - scalarize_call_expr (si_p); - - /* Handle ASM_EXPRs. */ - else if (TREE_CODE (stmt) == ASM_EXPR) - scalarize_asm_expr (si_p); -} + struct sra_elt *c; + tree t; + if (elt->replacement) + { + if (copy_out) + t = build (MODIFY_EXPR, void_type_node, elt->replacement, expr); + else + t = build (MODIFY_EXPR, void_type_node, expr, elt->replacement); + append_to_statement_list (t, list_p); + } + else + { + for (c = elt->children; c ; c = c->sibling) + { + t = generate_one_element_ref (c, unshare_expr (expr)); + generate_copy_inout (c, copy_out, t, list_p); + } + } +} -/* Helper for scalarize_stmt to handle assignments. */ +/* Generate a set of assignment statements in *LIST_P to copy all instantiated + elements under SRC to their counterparts under DST. There must be a 1-1 + correspondence of instantiated elements. */ static void -scalarize_modify_expr (block_stmt_iterator *si_p) +generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p) { - tree stmt = bsi_stmt (*si_p); - tree lhs = TREE_OPERAND (stmt, 0); - tree rhs = TREE_OPERAND (stmt, 1); + struct sra_elt *dc, *sc; - /* Found AGGREGATE.FIELD = ... */ - if (is_sra_candidate_ref (lhs, false)) + for (dc = dst->children; dc ; dc = dc->sibling) { - tree sym; - vdef_optype vdefs; - - scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 0)); - - /* Mark the LHS to be renamed, as we have just removed the previous - VDEF for AGGREGATE. The statement should have exactly one VDEF - for variable AGGREGATE. */ - vdefs = STMT_VDEF_OPS (stmt); - if (NUM_VDEFS (vdefs) != 1) - abort (); - sym = SSA_NAME_VAR (VDEF_RESULT (vdefs, 0)); - bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); + sc = lookup_element (src, dc->element, NULL, NO_INSERT); + gcc_assert (sc); + generate_element_copy (dc, sc, list_p); } - /* Found ... = AGGREGATE.FIELD */ - else if (is_sra_candidate_ref (rhs, false)) - scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 1)); - - /* Found ... = BIT_FIELD_REF <>. This is similar to a CALL_EXPR, if the - operand of the BIT_FIELD_REF is a scalarizable structure, we need to - copy from its scalar replacements before doing the bitfield operation. - - FIXME: BIT_FIELD_REFs are often generated by fold-const.c. This is - not always desirable because they obfuscate the original predicates, - limiting what the tree optimizers may do. For instance, in - testsuite/g++.dg/opt/nrv4.C the use of SRA allows the optimizers to - optimize function main() to 'return 0;'. However, the folder - generates a BIT_FIELD_REF operation for one of the comparisons, - preventing the optimizers from removing all the redundant - operations. */ - else if (is_sra_candidate_ref (rhs, true)) + if (dst->replacement) { - tree var = TREE_OPERAND (rhs, 0); - emit_scalar_copies (si_p, var, var, FIELD_SCALAR); + tree t; + + gcc_assert (src->replacement); + + t = build (MODIFY_EXPR, void_type_node, dst->replacement, + src->replacement); + append_to_statement_list (t, list_p); } +} - /* Found AGGREGATE = ... or ... = AGGREGATE */ - else if (DECL_P (lhs) || DECL_P (rhs)) - scalarize_structure_assignment (si_p); +/* Generate a set of assignment statements in *LIST_P to zero all instantiated + elements under ELT. In addition, do not assign to elements that have been + marked VISITED but do reset the visited flag; this allows easy coordination + with generate_element_init. */ + +static void +generate_element_zero (struct sra_elt *elt, tree *list_p) +{ + struct sra_elt *c; + + if (elt->visited) + { + elt->visited = false; + return; + } + + for (c = elt->children; c ; c = c->sibling) + generate_element_zero (c, list_p); + + if (elt->replacement) + { + tree t; + + gcc_assert (elt->is_scalar); + t = fold_convert (elt->type, integer_zero_node); + + t = build (MODIFY_EXPR, void_type_node, elt->replacement, t); + append_to_statement_list (t, list_p); + } } +/* Generate an assignment VAR = INIT, where INIT may need gimplification. + Add the result to *LIST_P. */ + +static void +generate_one_element_init (tree var, tree init, tree *list_p) +{ + /* The replacement can be almost arbitrarily complex. Gimplify. */ + tree stmt = build (MODIFY_EXPR, void_type_node, var, init); + gimplify_and_add (stmt, list_p); +} -/* Scalarize structure references in LIST. Use DONE to avoid duplicates. */ +/* Generate a set of assignment statements in *LIST_P to set all instantiated + elements under ELT with the contents of the initializer INIT. In addition, + mark all assigned elements VISITED; this allows easy coordination with + generate_element_zero. Return false if we found a case we couldn't + handle. */ -static inline void -scalarize_tree_list (tree list, block_stmt_iterator *si_p, bitmap done) +static bool +generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p) { - tree op; + bool result = true; + enum tree_code init_code; + struct sra_elt *sub; + tree t; - for (op = list; op; op = TREE_CHAIN (op)) + /* We can be passed DECL_INITIAL of a static variable. It might have a + conversion, which we strip off here. */ + STRIP_USELESS_TYPE_CONVERSION (init); + init_code = TREE_CODE (init); + + if (elt->is_scalar) { - tree arg = TREE_VALUE (op); + if (elt->replacement) + { + generate_one_element_init (elt->replacement, init, list_p); + elt->visited = true; + } + return result; + } - if (is_sra_candidate_decl (arg)) + switch (init_code) + { + case COMPLEX_CST: + case COMPLEX_EXPR: + for (sub = elt->children; sub ; sub = sub->sibling) { - int index = var_ann (arg)->uid; - if (!bitmap_bit_p (done, index)) - { - emit_scalar_copies (si_p, arg, arg, FIELD_SCALAR); - bitmap_set_bit (done, index); - } + if (sub->element == integer_zero_node) + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); + else + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); + result &= generate_element_init_1 (sub, t, list_p); } - else if (is_sra_candidate_ref (arg, false)) + break; + + case CONSTRUCTOR: + for (t = CONSTRUCTOR_ELTS (init); t ; t = TREE_CHAIN (t)) { - tree stmt = bsi_stmt (*si_p); - scalarize_component_ref (stmt, &TREE_VALUE (op)); + sub = lookup_element (elt, TREE_PURPOSE (t), NULL, NO_INSERT); + if (sub == NULL) + continue; + result &= generate_element_init_1 (sub, TREE_VALUE (t), list_p); } + break; + + default: + elt->visited = true; + result = false; } -} + return result; +} -/* Helper for scalarize_stmt to handle function calls. */ +/* A wrapper function for generate_element_init_1 that handles cleanup after + gimplification. */ -static void -scalarize_call_expr (block_stmt_iterator *si_p) +static bool +generate_element_init (struct sra_elt *elt, tree init, tree *list_p) { - tree stmt = bsi_stmt (*si_p); - tree call = (TREE_CODE (stmt) == MODIFY_EXPR) ? TREE_OPERAND (stmt, 1) : stmt; - struct bitmap_head_def done_head; - - /* First scalarize the arguments. Order is important, because the copy - operations for the arguments need to go before the call. - Scalarization of the return value needs to go after the call. */ - bitmap_initialize (&done_head, 1); - scalarize_tree_list (TREE_OPERAND (call, 1), si_p, &done_head); - bitmap_clear (&done_head); + bool ret; + + push_gimplify_context (); + ret = generate_element_init_1 (elt, init, list_p); + pop_gimplify_context (NULL); - /* Scalarize the return value, if any. */ - if (TREE_CODE (stmt) == MODIFY_EXPR) + /* The replacement can expose previously unreferenced variables. */ + if (ret && *list_p) { - tree var = TREE_OPERAND (stmt, 0); + tree_stmt_iterator i; + size_t old, new, j; + + old = num_referenced_vars; + + for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i)) + find_new_referenced_vars (tsi_stmt_ptr (i)); + + new = num_referenced_vars; + for (j = old; j < new; ++j) + bitmap_set_bit (vars_to_rename, j); + } + + return ret; +} + +/* Insert STMT on all the outgoing edges out of BB. Note that if BB + has more than one edge, STMT will be replicated for each edge. Also, + abnormal edges will be ignored. */ + +void +insert_edge_copies (tree stmt, basic_block bb) +{ + edge e; + edge_iterator ei; + bool first_copy; - /* If the LHS of the assignment is a scalarizable structure, insert - copies into the scalar replacements after the call. */ - if (is_sra_candidate_decl (var)) + first_copy = true; + FOR_EACH_EDGE (e, ei, bb->succs) + { + /* We don't need to insert copies on abnormal edges. The + value of the scalar replacement is not guaranteed to + be valid through an abnormal edge. */ + if (!(e->flags & EDGE_ABNORMAL)) { - tree list = create_scalar_copies (var, var, SCALAR_FIELD); - if (EXPR_HAS_LOCATION (stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - if (stmt_ends_bb_p (stmt)) - insert_edge_copies (list, bb_for_stmt (stmt)); + if (first_copy) + { + bsi_insert_on_edge (e, stmt); + first_copy = false; + } else - bsi_insert_after (si_p, list, BSI_NEW_STMT); + bsi_insert_on_edge (e, unsave_expr_now (stmt)); } } } +/* Helper function to insert LIST before BSI, and set up line number info. */ + +static void +sra_insert_before (block_stmt_iterator *bsi, tree list) +{ + tree stmt = bsi_stmt (*bsi); + + if (EXPR_HAS_LOCATION (stmt)) + annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); + bsi_insert_before (bsi, list, BSI_SAME_STMT); +} -/* Helper for scalarize_stmt to handle ASM_EXPRs. */ +/* Similarly, but insert after BSI. Handles insertion onto edges as well. */ static void -scalarize_asm_expr (block_stmt_iterator *si_p) +sra_insert_after (block_stmt_iterator *bsi, tree list) { - tree stmt = bsi_stmt (*si_p); - struct bitmap_head_def done_head; + tree stmt = bsi_stmt (*bsi); - bitmap_initialize (&done_head, 1); - scalarize_tree_list (ASM_INPUTS (stmt), si_p, &done_head); - scalarize_tree_list (ASM_OUTPUTS (stmt), si_p, &done_head); - bitmap_clear (&done_head); + if (EXPR_HAS_LOCATION (stmt)) + annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - /* ??? Process outputs after the asm. */ + if (stmt_ends_bb_p (stmt)) + insert_edge_copies (list, bsi->bb); + else + bsi_insert_after (bsi, list, BSI_SAME_STMT); } - -/* Helper for scalarize_stmt to handle return expressions. */ +/* Similarly, but replace the statement at BSI. */ static void -scalarize_return_expr (block_stmt_iterator *si_p) +sra_replace (block_stmt_iterator *bsi, tree list) { - tree stmt = bsi_stmt (*si_p); - tree op = TREE_OPERAND (stmt, 0); + sra_insert_before (bsi, list); + bsi_remove (bsi); + if (bsi_end_p (*bsi)) + *bsi = bsi_last (bsi->bb); + else + bsi_prev (bsi); +} - if (op == NULL_TREE) - return; +/* Scalarize a USE. To recap, this is either a simple reference to ELT, + if elt is scalar, or some occurrence of ELT that requires a complete + aggregate. IS_OUTPUT is true if ELT is being modified. */ - /* Handle a bare RESULT_DECL. This will handle for types needed - constructors, or possibly after NRV type optimizations. */ - if (is_sra_candidate_decl (op)) - emit_scalar_copies (si_p, op, op, FIELD_SCALAR); - else if (TREE_CODE (op) == MODIFY_EXPR) +static void +scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi, + bool is_output) +{ + tree list = NULL, stmt = bsi_stmt (*bsi); + + if (elt->replacement) + { + /* If we have a replacement, then updating the reference is as + simple as modifying the existing statement in place. */ + if (is_output) + mark_all_v_defs (stmt); + *expr_p = elt->replacement; + modify_stmt (stmt); + } + else { - tree *rhs_p = &TREE_OPERAND (op, 1); - tree rhs = *rhs_p; + /* Otherwise we need some copies. If ELT is being read, then we want + to store all (modified) sub-elements back into the structure before + the reference takes place. If ELT is being written, then we want to + load the changed values back into our shadow variables. */ + /* ??? We don't check modified for reads, we just always write all of + the values. We should be able to record the SSA number of the VOP + for which the values were last read. If that number matches the + SSA number of the VOP in the current statement, then we needn't + emit an assignment. This would also eliminate double writes when + a structure is passed as more than one argument to a function call. + This optimization would be most effective if sra_walk_function + processed the blocks in dominator order. */ + + generate_copy_inout (elt, is_output, generate_element_ref (elt), &list); + if (list == NULL) + return; + mark_all_v_defs (expr_first (list)); + if (is_output) + sra_insert_after (bsi, list); + else + sra_insert_before (bsi, list); + } +} - /* Handle 'return STRUCTURE;' */ - if (is_sra_candidate_decl (rhs)) - emit_scalar_copies (si_p, rhs, rhs, FIELD_SCALAR); +/* Scalarize a COPY. To recap, this is an assignment statement between + two scalarizable references, LHS_ELT and RHS_ELT. */ - /* Handle 'return STRUCTURE.FIELD;' */ - else if (is_sra_candidate_ref (rhs, false)) - scalarize_component_ref (stmt, rhs_p); +static void +scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi) +{ + tree list, stmt; + + if (lhs_elt->replacement && rhs_elt->replacement) + { + /* If we have two scalar operands, modify the existing statement. */ + stmt = bsi_stmt (*bsi); - /* Handle 'return CALL_EXPR;' */ - else if (TREE_CODE (rhs) == CALL_EXPR) + /* See the commentary in sra_walk_function concerning + RETURN_EXPR, and why we should never see one here. */ + gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); + + TREE_OPERAND (stmt, 0) = lhs_elt->replacement; + TREE_OPERAND (stmt, 1) = rhs_elt->replacement; + modify_stmt (stmt); + } + else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) + { + /* If either side requires a block copy, then sync the RHS back + to the original structure, leave the original assignment + statement (which will perform the block copy), then load the + LHS values out of its now-updated original structure. */ + /* ??? Could perform a modified pair-wise element copy. That + would at least allow those elements that are instantiated in + both structures to be optimized well. */ + + list = NULL; + generate_copy_inout (rhs_elt, false, + generate_element_ref (rhs_elt), &list); + if (list) { - struct bitmap_head_def done_head; - bitmap_initialize (&done_head, 1); - scalarize_tree_list (TREE_OPERAND (rhs, 1), si_p, &done_head); - bitmap_clear (&done_head); + mark_all_v_defs (expr_first (list)); + sra_insert_before (bsi, list); } + + list = NULL; + generate_copy_inout (lhs_elt, true, + generate_element_ref (lhs_elt), &list); + if (list) + sra_insert_after (bsi, list); + } + else + { + /* Otherwise both sides must be fully instantiated. In which + case perform pair-wise element assignments and replace the + original block copy statement. */ + + stmt = bsi_stmt (*bsi); + mark_all_v_defs (stmt); + + list = NULL; + generate_element_copy (lhs_elt, rhs_elt, &list); + gcc_assert (list); + sra_replace (bsi, list); } } +/* Scalarize an INIT. To recap, this is an assignment to a scalarizable + reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or + COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty + CONSTRUCTOR. */ + +static void +scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi) +{ + bool result = true; + tree list = NULL; + + /* Generate initialization statements for all members extant in the RHS. */ + if (rhs) + { + /* Unshare the expression just in case this is from a decl's initial. */ + rhs = unshare_expr (rhs); + result = generate_element_init (lhs_elt, rhs, &list); + } + + /* CONSTRUCTOR is defined such that any member not mentioned is assigned + a zero value. Initialize the rest of the instantiated elements. */ + generate_element_zero (lhs_elt, &list); -/* Debugging dump for the scalar replacement map. */ + if (!result) + { + /* If we failed to convert the entire initializer, then we must + leave the structure assignment in place and must load values + from the structure into the slots for which we did not find + constants. The easiest way to do this is to generate a complete + copy-out, and then follow that with the constant assignments + that we were able to build. DCE will clean things up. */ + tree list0 = NULL; + generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt), + &list0); + append_to_statement_list (list, &list0); + list = list0; + } -static int -dump_sra_map_trav (void **slot, void *data) + if (lhs_elt->use_block_copy || !result) + { + /* Since LHS is not fully instantiated, we must leave the structure + assignment in place. Treating this case differently from a USE + exposes constants to later optimizations. */ + if (list) + { + mark_all_v_defs (expr_first (list)); + sra_insert_after (bsi, list); + } + } + else + { + /* The LHS is fully instantiated. The list of initializations + replaces the original structure assignment. */ + gcc_assert (list); + mark_all_v_defs (bsi_stmt (*bsi)); + sra_replace (bsi, list); + } +} + +/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP + on all INDIRECT_REFs. */ + +static tree +mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) { - struct sra_elt *e = *slot; - FILE *f = data; + tree t = *tp; - switch (e->kind) + if (TREE_CODE (t) == INDIRECT_REF) { - case REALPART_EXPR: - fputs ("__real__ ", f); - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, " -> %s\n", get_name (e->replace)); - break; - case IMAGPART_EXPR: - fputs ("__imag__ ", f); - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, " -> %s\n", get_name (e->replace)); - break; - case COMPONENT_REF: - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, ".%s -> %s\n", get_name (e->field), get_name (e->replace)); - break; - default: - abort (); + TREE_THIS_NOTRAP (t) = 1; + *walk_subtrees = 0; } + else if (IS_TYPE_OR_DECL_P (t)) + *walk_subtrees = 0; - return 1; + return NULL; } +/* Scalarize a LDST. To recap, this is an assignment between one scalarizable + reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true + if ELT is on the left-hand side. */ + static void -dump_sra_map (FILE *f) +scalarize_ldst (struct sra_elt *elt, tree other, + block_stmt_iterator *bsi, bool is_output) { - fputs ("Scalar replacements:\n", f); - htab_traverse_noresize (sra_map, dump_sra_map_trav, f); - fputs ("\n\n", f); + /* Shouldn't have gotten called for a scalar. */ + gcc_assert (!elt->replacement); + + if (elt->use_block_copy) + { + /* Since ELT is not fully instantiated, we have to leave the + block copy in place. Treat this as a USE. */ + scalarize_use (elt, NULL, bsi, is_output); + } + else + { + /* The interesting case is when ELT is fully instantiated. In this + case we can have each element stored/loaded directly to/from the + corresponding slot in OTHER. This avoids a block copy. */ + + tree list = NULL, stmt = bsi_stmt (*bsi); + + mark_all_v_defs (stmt); + generate_copy_inout (elt, is_output, other, &list); + gcc_assert (list); + + /* Preserve EH semantics. */ + if (stmt_ends_bb_p (stmt)) + { + tree_stmt_iterator tsi; + tree first; + + /* Extract the first statement from LIST. */ + tsi = tsi_start (list); + first = tsi_stmt (tsi); + tsi_delink (&tsi); + + /* Replace the old statement with this new representative. */ + bsi_replace (bsi, first, true); + + if (!tsi_end_p (tsi)) + { + /* If any reference would trap, then they all would. And more + to the point, the first would. Therefore none of the rest + will trap since the first didn't. Indicate this by + iterating over the remaining statements and set + TREE_THIS_NOTRAP in all INDIRECT_REFs. */ + do + { + walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL); + tsi_next (&tsi); + } + while (!tsi_end_p (tsi)); + + insert_edge_copies (list, bsi->bb); + } + } + else + sra_replace (bsi, list); + } } -/* Main entry point to Scalar Replacement of Aggregates (SRA). This pass - re-writes non-aliased structure references into scalar temporaries. The - goal is to expose some/all structures to the scalar optimizers. +/* Generate initializations for all scalarizable parameters. */ - FNDECL is the function to process. - - VARS_TO_RENAME_P is a pointer to the set of variables that need to be - renamed into SSA after this pass is done. These are going to be all the - new scalars created by the SRA process. Notice that since this pass - creates new variables, the bitmap representing all the variables in the - program will be re-sized here. +static void +scalarize_parms (void) +{ + tree list = NULL; + unsigned i; + bitmap_iterator bi; - PHASE indicates which dump file from the DUMP_FILES array to use when - dumping debugging information. + EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi) + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + generate_copy_inout (elt, true, var, &list); + } - TODO + if (list) + insert_edge_copies (list, ENTRY_BLOCK_PTR); +} - 1- Scalarize COMPLEX_TYPEs - 2- Scalarize ARRAY_REFs that are always referenced with a - constant index. - 3- Timings to determine when scalarization is not profitable. - 4- Determine what's a good value for MAX_NFIELDS_FOR_SRA. */ +/* Entry point to phase 4. Update the function to match replacements. */ static void -tree_sra (void) +scalarize_function (void) { - /* Initialize local variables. */ - sra_candidates = BITMAP_XMALLOC (); - sra_map = NULL; - needs_copy_in = NULL; + static const struct sra_walk_fns fns = { + scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false + }; - /* Find structures to be scalarized. */ - if (!find_candidates_for_sra ()) + sra_walk_function (&fns); + scalarize_parms (); + bsi_commit_edge_inserts (); +} + + +/* Debug helper function. Print ELT in a nice human-readable format. */ + +static void +dump_sra_elt_name (FILE *f, struct sra_elt *elt) +{ + if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) { - BITMAP_XFREE (sra_candidates); - return; + fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); + dump_sra_elt_name (f, elt->parent); + } + else + { + if (elt->parent) + dump_sra_elt_name (f, elt->parent); + if (DECL_P (elt->element)) + { + if (TREE_CODE (elt->element) == FIELD_DECL) + fputc ('.', f); + print_generic_expr (f, elt->element, dump_flags); + } + else + fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", + TREE_INT_CST_LOW (elt->element)); } +} - /* If we found any, re-write structure references with their - corresponding scalar replacement. */ - sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, free); - needs_copy_in = BITMAP_XMALLOC (); +/* Likewise, but callable from the debugger. */ - scalarize_structures (); +void +debug_sra_elt_name (struct sra_elt *elt) +{ + dump_sra_elt_name (stderr, elt); + fputc ('\n', stderr); +} - if (dump_file) - dump_sra_map (dump_file); +/* Main entry point. */ + +static void +tree_sra (void) +{ + /* Initialize local variables. */ + gcc_obstack_init (&sra_obstack); + sra_candidates = BITMAP_ALLOC (NULL); + needs_copy_in = BITMAP_ALLOC (NULL); + sra_type_decomp_cache = BITMAP_ALLOC (NULL); + sra_type_inst_cache = BITMAP_ALLOC (NULL); + sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); + + /* Scan. If we find anything, instantiate and scalarize. */ + if (find_candidates_for_sra ()) + { + scan_function (); + decide_instantiations (); + scalarize_function (); + } /* Free allocated memory. */ htab_delete (sra_map); sra_map = NULL; - BITMAP_XFREE (needs_copy_in); - BITMAP_XFREE (sra_candidates); + BITMAP_FREE (sra_candidates); + BITMAP_FREE (needs_copy_in); + BITMAP_FREE (sra_type_decomp_cache); + BITMAP_FREE (sra_type_inst_cache); + obstack_free (&sra_obstack, NULL); } static bool @@ -1164,7 +2115,7 @@ gate_sra (void) return flag_tree_sra != 0; } -struct tree_opt_pass pass_sra = +struct tree_opt_pass pass_sra = { "sra", /* name */ gate_sra, /* gate */ @@ -1173,10 +2124,11 @@ struct tree_opt_pass pass_sra = NULL, /* next */ 0, /* static_pass_number */ TV_TREE_SRA, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ + PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_func | TODO_rename_vars - | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ + | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */ + 0 /* letter */ };