PR middle-end/22156

[pf3gnuchains/gcc-fork.git] / gcc / tree-sra.c
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c

index 2131d00..239fd84 100644 (file)
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1,31 +1,31 @@
  /* Scalar Replacement of Aggregates (SRA) converts some structure
     references into scalar references, exposing them to the scalar
     optimizers.
  /* Scalar Replacement of Aggregates (SRA) converts some structure
     references into scalar references, exposing them to the scalar
     optimizers.
-   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007
+     Free Software Foundation, Inc.
     Contributed by Diego Novillo <dnovillo@redhat.com>
  
  This file is part of GCC.
     Contributed by Diego Novillo <dnovillo@redhat.com>
  
  This file is part of GCC.
-   
+
  GCC is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.
  GCC is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.
-   
+
  GCC is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  for more details.
  GCC is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  for more details.
-   
+
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING.  If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.  */
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
  
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
  
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
-#include "errors.h"
  #include "ggc.h"
  #include "tree.h"
  
  #include "ggc.h"
  #include "tree.h"
  
@@ -38,33 +38,49 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "langhooks.h"
  #include "tree-inline.h"
  #include "tree-flow.h"
  #include "langhooks.h"
  #include "tree-inline.h"
  #include "tree-flow.h"
-#include "tree-simple.h"
+#include "tree-gimple.h"
  #include "tree-dump.h"
  #include "tree-pass.h"
  #include "timevar.h"
  #include "flags.h"
  #include "bitmap.h"
  #include "tree-dump.h"
  #include "tree-pass.h"
  #include "timevar.h"
  #include "flags.h"
  #include "bitmap.h"
+#include "obstack.h"
+#include "target.h"
+/* expr.h is needed for MOVE_RATIO.  */
+#include "expr.h"
+#include "params.h"
+
+
+/* This object of this pass is to replace a non-addressable aggregate with a
+   set of independent variables.  Most of the time, all of these variables
+   will be scalars.  But a secondary objective is to break up larger
+   aggregates into smaller aggregates.  In the process we may find that some
+   bits of the larger aggregate can be deleted as unreferenced.
+
+   This substitution is done globally.  More localized substitutions would
+   be the purvey of a load-store motion pass.
+
+   The optimization proceeds in phases:
+
+     (1) Identify variables that have types that are candidates for
+        decomposition.
  
  
+     (2) Scan the function looking for the ways these variables are used.
+        In particular we're interested in the number of times a variable
+        (or member) is needed as a complete unit, and the number of times
+        a variable (or member) is copied.
  
  
-/* Maximum number of fields that a structure should have to be scalarized.
-   FIXME  This limit has been arbitrarily set to 5.  Experiment to find a
-         sensible setting.  */
-#define MAX_NFIELDS_FOR_SRA    5
+     (3) Based on the usage profile, instantiate substitution variables.
  
  
-/* Codes indicating how to copy one structure into another.  */
-enum sra_copy_mode { SCALAR_SCALAR, FIELD_SCALAR, SCALAR_FIELD };
+     (4) Scan the function making replacements.
+*/
  
  
-/* Local functions.  */
-static inline bool can_be_scalarized_p (tree);
-static inline void insert_edge_copies (tree stmt, basic_block bb);
-static tree create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode);
-static inline void scalarize_component_ref (tree, tree *tp);
-static void scalarize_structures (void);
-static void scalarize_stmt (block_stmt_iterator *);
-static void scalarize_modify_expr (block_stmt_iterator *);
-static void scalarize_call_expr (block_stmt_iterator *);
-static void scalarize_asm_expr (block_stmt_iterator *);
-static void scalarize_return_expr (block_stmt_iterator *);
+
+/* True if this is the "early" pass, before inlining.  */
+static bool early_sra;
+
+/* The set of todo flags to return from tree_sra.  */
+static unsigned int todoflags;
  
  /* The set of aggregate variables that are candidates for scalarization.  */
  static bitmap sra_candidates;
  
  /* The set of aggregate variables that are candidates for scalarization.  */
  static bitmap sra_candidates;
@@ -73,221 +89,228 @@ static bitmap sra_candidates;
     beginning of the function.  */
  static bitmap needs_copy_in;
  
     beginning of the function.  */
  static bitmap needs_copy_in;
  
-/* This structure holds the mapping between and element of an aggregate
-   and the scalar replacement variable.  */
+/* Sets of bit pairs that cache type decomposition and instantiation.  */
+static bitmap sra_type_decomp_cache;
+static bitmap sra_type_inst_cache;
+
+/* One of these structures is created for each candidate aggregate and
+   each (accessed) member or group of members of such an aggregate.  */
  struct sra_elt
  {
  struct sra_elt
  {
-  enum tree_code kind;
-  tree base;
-  tree field;
-  tree replace;
-};
-    
-static htab_t sra_map;
+  /* A tree of the elements.  Used when we want to traverse everything.  */
+  struct sra_elt *parent;
+  struct sra_elt *groups;
+  struct sra_elt *children;
+  struct sra_elt *sibling;
  
  
-static hashval_t
-sra_elt_hash (const void *x)
-{
-  const struct sra_elt *e = x;
-  hashval_t h = (size_t) e->base * e->kind;
-  if (e->kind == COMPONENT_REF)
-    h ^= (size_t) e->field;
-  return h;
-}
+  /* If this element is a root, then this is the VAR_DECL.  If this is
+     a sub-element, this is some token used to identify the reference.
+     In the case of COMPONENT_REF, this is the FIELD_DECL.  In the case
+     of an ARRAY_REF, this is the (constant) index.  In the case of an
+     ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR.  In the case
+     of a complex number, this is a zero or one.  */
+  tree element;
  
  
-static int
-sra_elt_eq (const void *x, const void *y)
-{
-  const struct sra_elt *a = x;
-  const struct sra_elt *b = y;
+  /* The type of the element.  */
+  tree type;
  
  
-  if (a->kind != b->kind)
-    return false;
-  if (a->base != b->base)
-    return false;
-  if (a->kind == COMPONENT_REF)
-    if (a->field != b->field)
-      return false;
+  /* A VAR_DECL, for any sub-element we've decided to replace.  */
+  tree replacement;
  
  
-  return true;
-}
+  /* The number of times the element is referenced as a whole.  I.e.
+     given "a.b.c", this would be incremented for C, but not for A or B.  */
+  unsigned int n_uses;
  
  
-/* Build a temporary.  Make sure and register it to be renamed.  */
+  /* The number of times the element is copied to or from another
+     scalarizable element.  */
+  unsigned int n_copies;
  
  
-static tree
-make_temp (tree type, const char *prefix)
-{
-  tree t = create_tmp_var (type, prefix);
-  add_referenced_tmp_var (t);
-  bitmap_set_bit (vars_to_rename, var_ann (t)->uid);
-  return t;
-}
+  /* True if TYPE is scalar.  */
+  bool is_scalar;
  
  
-/* Mark all the variables in VDEF operands for STMT for renaming.
-   This becomes necessary when we modify all of a non-scalar.  */
+  /* True if this element is a group of members of its parent.  */
+  bool is_group;
  
  
-static void
-mark_all_vdefs (tree stmt)
+  /* True if we saw something about this element that prevents scalarization,
+     such as non-constant indexing.  */
+  bool cannot_scalarize;
+
+  /* True if we've decided that structure-to-structure assignment
+     should happen via memcpy and not per-element.  */
+  bool use_block_copy;
+
+  /* True if everything under this element has been marked TREE_NO_WARNING.  */
+  bool all_no_warning;
+
+  /* A flag for use with/after random access traversals.  */
+  bool visited;
+
+  /* True if there is BIT_FIELD_REF on the lhs with a vector. */
+  bool is_vector_lhs;
+};
+
+#define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
+
+#define FOR_EACH_ACTUAL_CHILD(CHILD, ELT)                      \
+  for ((CHILD) = (ELT)->is_group                               \
+                ? next_child_for_group (NULL, (ELT))           \
+                : (ELT)->children;                             \
+       (CHILD);                                                        \
+       (CHILD) = (ELT)->is_group                               \
+                ? next_child_for_group ((CHILD), (ELT))        \
+                : (CHILD)->sibling)
+
+/* Helper function for above macro.  Return next child in group.  */
+static struct sra_elt *
+next_child_for_group (struct sra_elt *child, struct sra_elt *group)
  {
  {
-  vdef_optype vdefs;
-  size_t i, n;
+  gcc_assert (group->is_group);
  
  
-  get_stmt_operands (stmt);
-  vdefs = VDEF_OPS (stmt_ann (stmt));
-  n = NUM_VDEFS (vdefs);
+  /* Find the next child in the parent.  */
+  if (child)
+    child = child->sibling;
+  else
+    child = group->parent->children;
  
  
-  for (i = 0; i < n; i++)
+  /* Skip siblings that do not belong to the group.  */
+  while (child)
      {
      {
-      tree sym = VDEF_RESULT (vdefs, i);
-      bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
+      tree g_elt = group->element;
+      if (TREE_CODE (g_elt) == RANGE_EXPR)
+       {
+         if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0))
+             && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element))
+           break;
+       }
+      else
+       gcc_unreachable ();
+
+      child = child->sibling;
      }
      }
+
+  return child;
  }
  
  }
  
+/* Random access to the child of a parent is performed by hashing.
+   This prevents quadratic behavior, and allows SRA to function
+   reasonably on larger records.  */
+static htab_t sra_map;
+
+/* All structures are allocated out of the following obstack.  */
+static struct obstack sra_obstack;
+
+/* Debugging functions.  */
+static void dump_sra_elt_name (FILE *, struct sra_elt *);
+extern void debug_sra_elt_name (struct sra_elt *);
+
+/* Forward declarations.  */
+static tree generate_element_ref (struct sra_elt *);
+\f
  /* Return true if DECL is an SRA candidate.  */
  
  static bool
  is_sra_candidate_decl (tree decl)
  {
  /* Return true if DECL is an SRA candidate.  */
  
  static bool
  is_sra_candidate_decl (tree decl)
  {
-  return DECL_P (decl) && bitmap_bit_p (sra_candidates, var_ann (decl)->uid);
+  return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
  }
  
  }
  
-/* Return true if EXP is of the form <ref decl>, where REF is one of the
-   field access references we handle and DECL is an SRA candidate. 
-
-   Set ALLOW_BIT_FIELD_REF to accept BIT_FIELD_REF as well.  This is
-   normally false, except when we're trying to work around it.  */
+/* Return true if TYPE is a scalar type.  */
  
  static bool
  
  static bool
-is_sra_candidate_ref (tree exp, bool allow_bit_field_ref)
+is_sra_scalar_type (tree type)
  {
  {
-  switch (TREE_CODE (exp))
-    {
-    case BIT_FIELD_REF:
-      if (!allow_bit_field_ref)
-       break;
-      /* FALLTHRU */
+  enum tree_code code = TREE_CODE (type);
+  return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
+         || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
+         || code == POINTER_TYPE || code == OFFSET_TYPE
+         || code == REFERENCE_TYPE);
+}
  
  
-    case COMPONENT_REF:
-    case REALPART_EXPR:
-    case IMAGPART_EXPR:
-      return is_sra_candidate_decl (TREE_OPERAND (exp, 0));
+/* Return true if TYPE can be decomposed into a set of independent variables.
  
  
-    default:
-      break;
-    }
+   Note that this doesn't imply that all elements of TYPE can be
+   instantiated, just that if we decide to break up the type into
+   separate pieces that it can be done.  */
  
  
-  return false;
-}
+bool
+sra_type_can_be_decomposed_p (tree type)
+{
+  unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
+  tree t;
  
  
-/* Return the scalar in SRA_MAP[VAR_IX][FIELD_IX].  If none exists, create
-   a new scalar with type TYPE.  */
+  /* Avoid searching the same type twice.  */
+  if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
+    return true;
+  if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
+    return false;
  
  
-static tree
-lookup_scalar (struct sra_elt *key, tree type)
-{
-  struct sra_elt **slot, *res;
+  /* The type must have a definite nonzero size.  */
+  if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+      || integer_zerop (TYPE_SIZE (type)))
+    goto fail;
  
  
-  slot = (struct sra_elt **) htab_find_slot (sra_map, key, INSERT);
-  res = *slot;
-  if (!res)
+  /* The type must be a non-union aggregate.  */
+  switch (TREE_CODE (type))
      {
      {
-      res = xmalloc (sizeof (*res));
-      *slot = res;
-      *res = *key;
-      res->replace = make_temp (type, "SR");
+    case RECORD_TYPE:
+      {
+       bool saw_one_field = false;
  
  
-      if (DECL_NAME (key->base) && !DECL_IGNORED_P (key->base))
-       {
-         char *name = NULL;
-         switch (key->kind)
-           {
-           case COMPONENT_REF:
-             if (!DECL_NAME (key->field))
-               break;
-             name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
-                            "$",
-                            IDENTIFIER_POINTER (DECL_NAME (key->field)),
-                            NULL);
-             break;
-           case REALPART_EXPR:
-             name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
-                            "$real", NULL);
-             break;
-           case IMAGPART_EXPR:
-             name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
-                            "$imag", NULL);
-             break;
-           default:
-             abort ();
-           }
-         if (name)
+       for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
+         if (TREE_CODE (t) == FIELD_DECL)
             {
             {
-             DECL_NAME (res->replace) = get_identifier (name);
-             free (name);
-           }
-       }
-
-      DECL_SOURCE_LOCATION (res->replace) = DECL_SOURCE_LOCATION (key->base);
-      TREE_NO_WARNING (res->replace) = TREE_NO_WARNING (key->base);
-      DECL_ARTIFICIAL (res->replace) = DECL_ARTIFICIAL (key->base);
-    }
-
-  return res->replace;
-}
-
-
-/* Given a structure reference VAR.FIELD, return a scalar representing it.
-   If no scalar is found, a new one is created and added to the SRA_MAP
-   matrix.  */
-
-static tree
-get_scalar_for_field (tree var, tree field)
-{
-  struct sra_elt key;
-
-#ifdef ENABLE_CHECKING
-  /* Validate that FIELD actually exists in VAR's type.  */
-  {
-    tree f;
-    for (f = TYPE_FIELDS (TREE_TYPE (var)); f ; f = TREE_CHAIN (f))
-      if (f == field)
-       goto found;
-    abort ();
-   found:;
-  }
-#endif
+             /* Reject incorrectly represented bit fields.  */
+             if (DECL_BIT_FIELD (t)
+                 && (tree_low_cst (DECL_SIZE (t), 1)
+                     != TYPE_PRECISION (TREE_TYPE (t))))
+               goto fail;
  
  
-  key.kind = COMPONENT_REF;
-  key.base = var;
-  key.field = field;
+             saw_one_field = true;
+           }
  
  
-  return lookup_scalar (&key, TREE_TYPE (field));
-}
+       /* Record types must have at least one field.  */
+       if (!saw_one_field)
+         goto fail;
+      }
+      break;
  
  
+    case ARRAY_TYPE:
+      /* Array types must have a fixed lower and upper bound.  */
+      t = TYPE_DOMAIN (type);
+      if (t == NULL)
+       goto fail;
+      if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
+       goto fail;
+      if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
+       goto fail;
+      break;
  
  
-/* Similarly for the parts of a complex type.  */
+    case COMPLEX_TYPE:
+      break;
  
  
-static tree
-get_scalar_for_complex_part (tree var, enum tree_code part)
-{
-  struct sra_elt key;
+    default:
+      goto fail;
+    }
  
  
-  key.kind = part;
-  key.base = var;
+  bitmap_set_bit (sra_type_decomp_cache, cache+0);
+  return true;
  
  
-  return lookup_scalar (&key, TREE_TYPE (TREE_TYPE (var)));
+ fail:
+  bitmap_set_bit (sra_type_decomp_cache, cache+1);
+  return false;
  }
  
  }
  
-/* Return true if the fields of VAR can be replaced by scalar temporaries.
-   This only happens if VAR is not call-clobbered and it contains less
-   than MAX_NFIELDS_FOR_SRA scalar fields.  */
+/* Return true if DECL can be decomposed into a set of independent
+   (though not necessarily scalar) variables.  */
  
  
-static inline bool
-can_be_scalarized_p (tree var)
+static bool
+decl_can_be_decomposed_p (tree var)
  {
  {
-  tree field, type;
-  int nfields;
+  /* Early out for scalars.  */
+  if (is_sra_scalar_type (TREE_TYPE (var)))
+    return false;
  
  
+  /* The variable must not be aliased.  */
    if (!is_gimple_non_addressable (var))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
    if (!is_gimple_non_addressable (var))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
@@ -299,6 +322,7 @@ can_be_scalarized_p (tree var)
        return false;
      }
  
        return false;
      }
  
+  /* The variable must not be volatile.  */
    if (TREE_THIS_VOLATILE (var))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
    if (TREE_THIS_VOLATILE (var))
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
@@ -310,863 +334,2095 @@ can_be_scalarized_p (tree var)
        return false;
      }
  
        return false;
      }
  
-  /* Any COMPLEX_TYPE that has reached this point can be scalarized.  */
-  if (TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
+  /* We must be able to decompose the variable's type.  */
+  if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       {
+         fprintf (dump_file, "Cannot scalarize variable ");
+         print_generic_expr (dump_file, var, dump_flags);
+         fprintf (dump_file, " because its type cannot be decomposed\n");
+       }
+      return false;
+    }
+
+  /* HACK: if we decompose a va_list_type_node before inlining, then we'll
+     confuse tree-stdarg.c, and we won't be able to figure out which and
+     how many arguments are accessed.  This really should be improved in
+     tree-stdarg.c, as the decomposition is truely a win.  This could also
+     be fixed if the stdarg pass ran early, but this can't be done until
+     we've aliasing information early too.  See PR 30791.  */
+  if (early_sra
+      && TYPE_MAIN_VARIANT (TREE_TYPE (var))
+        == TYPE_MAIN_VARIANT (va_list_type_node))
+    return false;
+
+  return true;
+}
+
+/* Return true if TYPE can be *completely* decomposed into scalars.  */
+
+static bool
+type_can_instantiate_all_elements (tree type)
+{
+  if (is_sra_scalar_type (type))
      return true;
      return true;
+  if (!sra_type_can_be_decomposed_p (type))
+    return false;
  
  
-  type = TREE_TYPE (var);
-  nfields = 0;
-  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+  switch (TREE_CODE (type))
      {
      {
-      if (TREE_CODE (field) != FIELD_DECL)
-       continue;
+    case RECORD_TYPE:
+      {
+       unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
+       tree f;
  
  
-      /* FIXME: We really should recurse down the type hierarchy and
-        scalarize the fields at the leaves.  */
-      if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           {
-             fprintf (dump_file, "Cannot scalarize variable ");
-             print_generic_expr (dump_file, var, dump_flags);
-             fprintf (dump_file,
-                      " because it contains an aggregate type field, ");
-             print_generic_expr (dump_file, field, dump_flags);
-             fprintf (dump_file, "\n");
-           }
+       if (bitmap_bit_p (sra_type_inst_cache, cache+0))
+         return true;
+       if (bitmap_bit_p (sra_type_inst_cache, cache+1))
           return false;
           return false;
-       }
  
  
-      /* FIXME: Similarly.  Indeed, considering that we treat complex
-        as an aggregate, this is exactly the same problem.
-        Structures with __complex__ fields are tested in the libstdc++
-        testsuite: 26_numerics/complex_inserters_extractors.cc.  */
-      if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
+       for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
+         if (TREE_CODE (f) == FIELD_DECL)
             {
             {
-             fprintf (dump_file, "Cannot scalarize variable ");
-             print_generic_expr (dump_file, var, dump_flags);
-             fprintf (dump_file,
-                      " because it contains a __complex__ field, ");
-             print_generic_expr (dump_file, field, dump_flags);
-             fprintf (dump_file, "\n");
+             if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
+               {
+                 bitmap_set_bit (sra_type_inst_cache, cache+1);
+                 return false;
+               }
             }
             }
-         return false;
-       }
  
  
-      /* FIXME.  We don't scalarize structures with bit fields yet.  To
-        support this, we should make sure that all the fields fit in one
-        word and modify every operation done on the scalarized bit fields
-        to mask them properly.  */
-      if (DECL_BIT_FIELD (field))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           {
-             fprintf (dump_file, "Cannot scalarize variable ");
-             print_generic_expr (dump_file, var, dump_flags);
-             fprintf (dump_file,
-                      " because it contains a bit-field, ");
-             print_generic_expr (dump_file, field, dump_flags);
-             fprintf (dump_file, "\n");
-           }
-         return false;
-       }
+       bitmap_set_bit (sra_type_inst_cache, cache+0);
+       return true;
+      }
  
  
-      nfields++;
-      if (nfields > MAX_NFIELDS_FOR_SRA)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           {
-             fprintf (dump_file, "Cannot scalarize variable ");
-             print_generic_expr (dump_file, var, dump_flags);
-             fprintf (dump_file,
-                      " because it contains more than %d fields\n", 
-                      MAX_NFIELDS_FOR_SRA);
-           }
-         return false;
-       }
-    }
+    case ARRAY_TYPE:
+      return type_can_instantiate_all_elements (TREE_TYPE (type));
  
  
-  /* If the structure had no FIELD_DECLs, then don't bother
-     scalarizing it.  */
-  return nfields > 0;
-}
+    case COMPLEX_TYPE:
+      return true;
  
  
+    default:
+      gcc_unreachable ();
+    }
+}
  
  
-/* Replace the COMPONENT_REF, REALPART_EXPR or IMAGPART_EXPR pointed-to by
-   TP inside STMT with the corresponding scalar replacement from SRA_MAP.  */
+/* Test whether ELT or some sub-element cannot be scalarized.  */
  
  
-static inline void
-scalarize_component_ref (tree stmt, tree *tp)
+static bool
+can_completely_scalarize_p (struct sra_elt *elt)
  {
  {
-  tree t = *tp, obj = TREE_OPERAND (t, 0);
+  struct sra_elt *c;
+
+  if (elt->cannot_scalarize)
+    return false;
+
+  for (c = elt->children; c; c = c->sibling)
+    if (!can_completely_scalarize_p (c))
+      return false;
+
+  for (c = elt->groups; c; c = c->sibling)
+    if (!can_completely_scalarize_p (c))
+      return false;
  
  
-  /* When scalarizing a function argument, we will need to insert copy-in
-     operations from the original PARM_DECLs. Note that these copy-in
-     operations may end up being dead, but we won't know until we rename
-     the new variables into SSA.  */
-  if (TREE_CODE (obj) == PARM_DECL)
-    bitmap_set_bit (needs_copy_in, var_ann (obj)->uid);
+  return true;
+}
+
+\f
+/* A simplified tree hashing algorithm that only handles the types of
+   trees we expect to find in sra_elt->element.  */
+
+static hashval_t
+sra_hash_tree (tree t)
+{
+  hashval_t h;
  
    switch (TREE_CODE (t))
      {
  
    switch (TREE_CODE (t))
      {
-    case COMPONENT_REF:
-      t = get_scalar_for_field (obj, TREE_OPERAND (t, 1));
+    case VAR_DECL:
+    case PARM_DECL:
+    case RESULT_DECL:
+      h = DECL_UID (t);
        break;
        break;
-    case REALPART_EXPR:
-    case IMAGPART_EXPR:
-      t = get_scalar_for_complex_part (obj, TREE_CODE (t));
+
+    case INTEGER_CST:
+      h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
+      break;
+
+    case RANGE_EXPR:
+      h = iterative_hash_expr (TREE_OPERAND (t, 0), 0);
+      h = iterative_hash_expr (TREE_OPERAND (t, 1), h);
+      break;
+
+    case FIELD_DECL:
+      /* We can have types that are compatible, but have different member
+        lists, so we can't hash fields by ID.  Use offsets instead.  */
+      h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
+      h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
        break;
        break;
+
      default:
      default:
-      abort ();
+      gcc_unreachable ();
      }
  
      }
  
-  *tp = t;
-  modify_stmt (stmt);
+  return h;
  }
  
  }
  
+/* Hash function for type SRA_PAIR.  */
  
  
-/* Scalarize the structure assignment for the statement pointed by SI_P.  */
-
-static void
-scalarize_structure_assignment (block_stmt_iterator *si_p)
+static hashval_t
+sra_elt_hash (const void *x)
  {
  {
-  var_ann_t lhs_ann, rhs_ann;
-  tree lhs, rhs, list, orig_stmt;
-  bool lhs_can, rhs_can;
-
-  orig_stmt = bsi_stmt (*si_p);
-  lhs = TREE_OPERAND (orig_stmt, 0);
-  rhs = TREE_OPERAND (orig_stmt, 1);
-  list = NULL_TREE;
-
-#if defined ENABLE_CHECKING
-  if (TREE_CODE (orig_stmt) != MODIFY_EXPR)
-    abort ();
-#endif
+  const struct sra_elt *e = x;
+  const struct sra_elt *p;
+  hashval_t h;
  
  
-  /* Remove all type casts from RHS.  This may seem heavy handed but
-     it's actually safe and it is necessary in the presence of C++
-     reinterpret_cast<> where structure assignments of different
-     structures will be present in the IL.  This was the case of PR
-     13347 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13347) which
-     had something like this:
-
-       struct A f;
-       struct B g;
-       f = (struct A)g;
-
-     Both 'f' and 'g' were scalarizable, but the presence of the type
-     cast was causing SRA to not replace the RHS of the assignment
-     with g's scalar replacements.  Furthermore, the fact that this
-     assignment reached this point without causing syntax errors means
-     that the type cast is safe and that a field-by-field assignment
-     from 'g' into 'f' is the right thing to do.  */
-  STRIP_NOPS (rhs);
-
-  lhs_ann = DECL_P (lhs) ? var_ann (lhs) : NULL;
-  rhs_ann = DECL_P (rhs) ? var_ann (rhs) : NULL;
-
-#if defined ENABLE_CHECKING
-  /* Two different variables should not have the same UID.  */
-  if (lhs_ann
-      && rhs_ann
-      && lhs != rhs
-      && lhs_ann->uid == rhs_ann->uid)
-    abort ();
-#endif
+  h = sra_hash_tree (e->element);
  
  
-  lhs_can = lhs_ann && bitmap_bit_p (sra_candidates, lhs_ann->uid);
-  rhs_can = rhs_ann && bitmap_bit_p (sra_candidates, rhs_ann->uid);
+  /* Take into account everything back up the chain.  Given that chain
+     lengths are rarely very long, this should be acceptable.  If we
+     truly identify this as a performance problem, it should work to
+     hash the pointer value "e->parent".  */
+  for (p = e->parent; p ; p = p->parent)
+    h = (h * 65521) ^ sra_hash_tree (p->element);
  
  
-  /* Both LHS and RHS are scalarizable.  */
-  if (lhs_can && rhs_can)
-    list = create_scalar_copies (lhs, rhs, SCALAR_SCALAR);
+  return h;
+}
  
  
-  /* Only RHS is scalarizable.  */
-  else if (rhs_can)
-    list = create_scalar_copies (lhs, rhs, FIELD_SCALAR);
+/* Equality function for type SRA_PAIR.  */
  
  
-  /* Only LHS is scalarizable.  */
-  else if (lhs_can)
-    list = create_scalar_copies (lhs, rhs, SCALAR_FIELD);
+static int
+sra_elt_eq (const void *x, const void *y)
+{
+  const struct sra_elt *a = x;
+  const struct sra_elt *b = y;
+  tree ae, be;
  
  
-  /* If neither side is scalarizable, do nothing else.  */
-  else
-    return;
+  if (a->parent != b->parent)
+    return false;
  
  
-  /* Set line number information for our replacements.  */
-  if (EXPR_HAS_LOCATION (orig_stmt))
-    annotate_all_with_locus (&list, EXPR_LOCATION (orig_stmt));
+  ae = a->element;
+  be = b->element;
  
  
-  /* Replace the existing statement with the newly created list of
-     scalarized copies.  When replacing the original statement, the first
-     copy replaces it and the remaining copies are inserted either after
-     the first copy or on the outgoing edges of the original statement's
-     block.  */
-  {
-    tree_stmt_iterator tsi = tsi_start (list);
-    bsi_replace (si_p, tsi_stmt (tsi), true);
-    tsi_delink (&tsi);
-    if (stmt_ends_bb_p (orig_stmt))
-      insert_edge_copies (list, bb_for_stmt (orig_stmt));
-    else
-      bsi_insert_after (si_p, list, BSI_CONTINUE_LINKING);
-  }
-}
+  if (ae == be)
+    return true;
+  if (TREE_CODE (ae) != TREE_CODE (be))
+    return false;
  
  
+  switch (TREE_CODE (ae))
+    {
+    case VAR_DECL:
+    case PARM_DECL:
+    case RESULT_DECL:
+      /* These are all pointer unique.  */
+      return false;
  
  
-/* Traverse all the referenced variables in the program looking for
-   structures that could be replaced with scalars.  */
+    case INTEGER_CST:
+      /* Integers are not pointer unique, so compare their values.  */
+      return tree_int_cst_equal (ae, be);
  
  
-static bool
-find_candidates_for_sra (void)
-{
-  size_t i;
-  bool any_set = false;
+    case RANGE_EXPR:
+      return
+       tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0))
+       && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1));
  
  
-  for (i = 0; i < num_referenced_vars; i++)
-    {
-      tree var = referenced_var (i);
+    case FIELD_DECL:
+      /* Fields are unique within a record, but not between
+        compatible records.  */
+      if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
+       return false;
+      return fields_compatible_p (ae, be);
  
  
-      if ((TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE
-          || TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
-         && can_be_scalarized_p (var))
-       {
-         bitmap_set_bit (sra_candidates, var_ann (var)->uid);
-         any_set = true;
-       }
+    default:
+      gcc_unreachable ();
      }
      }
-
-  return any_set;
  }
  
  }
  
+/* Create or return the SRA_ELT structure for CHILD in PARENT.  PARENT
+   may be null, in which case CHILD must be a DECL.  */
  
  
-/* Insert STMT on all the outgoing edges out of BB.  Note that if BB
-   has more than one edge, STMT will be replicated for each edge.  Also,
-   abnormal edges will be ignored.  */
-
-static inline void
-insert_edge_copies (tree stmt, basic_block bb)
+static struct sra_elt *
+lookup_element (struct sra_elt *parent, tree child, tree type,
+               enum insert_option insert)
  {
  {
-  edge e;
-  bool first_copy;
+  struct sra_elt dummy;
+  struct sra_elt **slot;
+  struct sra_elt *elt;
  
  
-  first_copy = true;
-  for (e = bb->succ; e; e = e->succ_next)
+  if (parent)
+    dummy.parent = parent->is_group ? parent->parent : parent;
+  else
+    dummy.parent = NULL;
+  dummy.element = child;
+
+  slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
+  if (!slot && insert == NO_INSERT)
+    return NULL;
+
+  elt = *slot;
+  if (!elt && insert == INSERT)
      {
      {
-      /* We don't need to insert copies on abnormal edges.  The
-        value of the scalar replacement is not guaranteed to
-        be valid through an abnormal edge.  */
-      if (!(e->flags & EDGE_ABNORMAL))
+      *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
+      memset (elt, 0, sizeof (*elt));
+
+      elt->parent = parent;
+      elt->element = child;
+      elt->type = type;
+      elt->is_scalar = is_sra_scalar_type (type);
+
+      if (parent)
         {
         {
-         if (first_copy)
+         if (IS_ELEMENT_FOR_GROUP (elt->element))
             {
             {
-             bsi_insert_on_edge (e, stmt);
-             first_copy = false;
+             elt->is_group = true;
+             elt->sibling = parent->groups;
+             parent->groups = elt;
             }
           else
             }
           else
-           bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt));
+           {
+             elt->sibling = parent->children;
+             parent->children = elt;
+           }
         }
         }
-    }
-}
-
  
  
-/* Append a new assignment statement to TSI.  */
+      /* If this is a parameter, then if we want to scalarize, we have
+        one copy from the true function parameter.  Count it now.  */
+      if (TREE_CODE (child) == PARM_DECL)
+       {
+         elt->n_copies = 1;
+         bitmap_set_bit (needs_copy_in, DECL_UID (child));
+       }
+    }
  
  
-static tree
-csc_assign (tree_stmt_iterator *tsi, tree lhs, tree rhs)
-{
-  tree stmt = build (MODIFY_EXPR, TREE_TYPE (lhs), lhs, rhs);
-  modify_stmt (stmt);
-  tsi_link_after (tsi, stmt, TSI_NEW_STMT);
-  return stmt;
+  return elt;
  }
  
  }
  
-/* A subroutine of create_scalar_copies.  Construct a COMPONENT_REF
-   expression for BASE referencing FIELD.  INDEX is the field index.  */
+/* Create or return the SRA_ELT structure for EXPR if the expression
+   refers to a scalarizable variable.  */
  
  
-static tree
-csc_build_component_ref (tree base, tree field)
+static struct sra_elt *
+maybe_lookup_element_for_expr (tree expr)
  {
  {
-  switch (TREE_CODE (base))
-    {
-    case CONSTRUCTOR:
-      /* Only appears on RHS.  The only remaining CONSTRUCTORS for
-        record types that should remain are empty, and imply that
-        the entire structure should be zeroed.  */
-      if (CONSTRUCTOR_ELTS (base))
-       abort ();
-      return convert (TREE_TYPE (field), integer_zero_node);
-
-    default:
-      /* Avoid sharing BASE when building the different COMPONENT_REFs.
-        We let the first field have the original version.  */
-      if (field != TYPE_FIELDS (TREE_TYPE (base)))
-       base = unshare_expr (base);
-      break;
+  struct sra_elt *elt;
+  tree child;
  
  
+  switch (TREE_CODE (expr))
+    {
      case VAR_DECL:
      case PARM_DECL:
      case VAR_DECL:
      case PARM_DECL:
-      /* Special case for the above -- decls are always shared.  */
+    case RESULT_DECL:
+      if (is_sra_candidate_decl (expr))
+       return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
+      return NULL;
+
+    case ARRAY_REF:
+      /* We can't scalarize variable array indices.  */
+      if (in_array_bounds_p (expr))
+        child = TREE_OPERAND (expr, 1);
+      else
+       return NULL;
        break;
        break;
-    }
-
-  return build (COMPONENT_REF, TREE_TYPE (field), base, field);
-}
  
  
-/* Similarly for REALPART_EXPR and IMAGPART_EXPR for complex types.  */
-
-static tree
-csc_build_complex_part (tree base, enum tree_code part)
-{
-  switch (TREE_CODE (base))
-    {
-    case COMPLEX_CST:
-      if (part == REALPART_EXPR)
-       return TREE_REALPART (base);
+    case ARRAY_RANGE_REF:
+      /* We can't scalarize variable array indices.  */
+      if (range_in_array_bounds_p (expr))
+       {
+         tree domain = TYPE_DOMAIN (TREE_TYPE (expr));
+         child = build2 (RANGE_EXPR, integer_type_node,
+                         TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain));
+       }
        else
        else
-       return TREE_IMAGPART (base);
+       return NULL;
+      break;
  
  
-    case COMPLEX_EXPR:
-      if (part == REALPART_EXPR)
-        return TREE_OPERAND (base, 0);
+    case COMPONENT_REF:
+      /* Don't look through unions.  */
+      if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
+       return NULL;
+      child = TREE_OPERAND (expr, 1);
+      break;
+
+    case REALPART_EXPR:
+      child = integer_zero_node;
+      break;
+    case IMAGPART_EXPR:
+      child = integer_one_node;
+      break;
+
+    default:
+      return NULL;
+    }
+
+  elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
+  if (elt)
+    return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
+  return NULL;
+}
+
+\f
+/* Functions to walk just enough of the tree to see all scalarizable
+   references, and categorize them.  */
+
+/* A set of callbacks for phases 2 and 4.  They'll be invoked for the
+   various kinds of references seen.  In all cases, *BSI is an iterator
+   pointing to the statement being processed.  */
+struct sra_walk_fns
+{
+  /* Invoked when ELT is required as a unit.  Note that ELT might refer to
+     a leaf node, in which case this is a simple scalar reference.  *EXPR_P
+     points to the location of the expression.  IS_OUTPUT is true if this
+     is a left-hand-side reference.  USE_ALL is true if we saw something we
+     couldn't quite identify and had to force the use of the entire object.  */
+  void (*use) (struct sra_elt *elt, tree *expr_p,
+              block_stmt_iterator *bsi, bool is_output, bool use_all);
+
+  /* Invoked when we have a copy between two scalarizable references.  */
+  void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+               block_stmt_iterator *bsi);
+
+  /* Invoked when ELT is initialized from a constant.  VALUE may be NULL,
+     in which case it should be treated as an empty CONSTRUCTOR.  */
+  void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
+
+  /* Invoked when we have a copy between one scalarizable reference ELT
+     and one non-scalarizable reference OTHER without side-effects. 
+     IS_OUTPUT is true if ELT is on the left-hand side.  */
+  void (*ldst) (struct sra_elt *elt, tree other,
+               block_stmt_iterator *bsi, bool is_output);
+
+  /* True during phase 2, false during phase 4.  */
+  /* ??? This is a hack.  */
+  bool initial_scan;
+};
+
+#ifdef ENABLE_CHECKING
+/* Invoked via walk_tree, if *TP contains a candidate decl, return it.  */
+
+static tree
+sra_find_candidate_decl (tree *tp, int *walk_subtrees,
+                        void *data ATTRIBUTE_UNUSED)
+{
+  tree t = *tp;
+  enum tree_code code = TREE_CODE (t);
+
+  if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
+    {
+      *walk_subtrees = 0;
+      if (is_sra_candidate_decl (t))
+       return t;
+    }
+  else if (TYPE_P (t))
+    *walk_subtrees = 0;
+
+  return NULL;
+}
+#endif
+
+/* Walk most expressions looking for a scalarizable aggregate.
+   If we find one, invoke FNS->USE.  */
+
+static void
+sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
+              const struct sra_walk_fns *fns)
+{
+  tree expr = *expr_p;
+  tree inner = expr;
+  bool disable_scalarization = false;
+  bool use_all_p = false;
+
+  /* We're looking to collect a reference expression between EXPR and INNER,
+     such that INNER is a scalarizable decl and all other nodes through EXPR
+     are references that we can scalarize.  If we come across something that
+     we can't scalarize, we reset EXPR.  This has the effect of making it
+     appear that we're referring to the larger expression as a whole.  */
+
+  while (1)
+    switch (TREE_CODE (inner))
+      {
+      case VAR_DECL:
+      case PARM_DECL:
+      case RESULT_DECL:
+       /* If there is a scalarizable decl at the bottom, then process it.  */
+       if (is_sra_candidate_decl (inner))
+         {
+           struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
+           if (disable_scalarization)
+             elt->cannot_scalarize = true;
+           else
+             fns->use (elt, expr_p, bsi, is_output, use_all_p);
+         }
+       return;
+
+      case ARRAY_REF:
+       /* Non-constant index means any member may be accessed.  Prevent the
+          expression from being scalarized.  If we were to treat this as a
+          reference to the whole array, we can wind up with a single dynamic
+          index reference inside a loop being overridden by several constant
+          index references during loop setup.  It's possible that this could
+          be avoided by using dynamic usage counts based on BB trip counts
+          (based on loop analysis or profiling), but that hardly seems worth
+          the effort.  */
+       /* ??? Hack.  Figure out how to push this into the scan routines
+          without duplicating too much code.  */
+       if (!in_array_bounds_p (inner))
+         {
+           disable_scalarization = true;
+           goto use_all;
+         }
+       /* ??? Are we assured that non-constant bounds and stride will have
+          the same value everywhere?  I don't think Fortran will...  */
+       if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
+         goto use_all;
+       inner = TREE_OPERAND (inner, 0);
+       break;
+
+      case ARRAY_RANGE_REF:
+       if (!range_in_array_bounds_p (inner))
+         {
+           disable_scalarization = true;
+           goto use_all;
+         }
+       /* ??? See above non-constant bounds and stride .  */
+       if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
+         goto use_all;
+       inner = TREE_OPERAND (inner, 0);
+       break;
+
+      case COMPONENT_REF:
+       /* A reference to a union member constitutes a reference to the
+          entire union.  */
+       if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
+         goto use_all;
+       /* ??? See above re non-constant stride.  */
+       if (TREE_OPERAND (inner, 2))
+         goto use_all;
+       inner = TREE_OPERAND (inner, 0);
+       break;
+
+      case REALPART_EXPR:
+      case IMAGPART_EXPR:
+       inner = TREE_OPERAND (inner, 0);
+       break;
+
+      case BIT_FIELD_REF:
+       /* A bit field reference to a specific vector is scalarized but for
+          ones for inputs need to be marked as used on the left hand size so
+          when we scalarize it, we can mark that variable as non renamable.  */
+       if (is_output
+           && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE)
+         {
+           struct sra_elt *elt
+             = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0));
+           if (elt)
+             elt->is_vector_lhs = true;
+         }
+       /* A bit field reference (access to *multiple* fields simultaneously)
+          is not currently scalarized.  Consider this an access to the
+          complete outer element, to which walk_tree will bring us next.  */
+         
+       goto use_all;
+
+      case VIEW_CONVERT_EXPR:
+      case NOP_EXPR:
+       /* Similarly, a view/nop explicitly wants to look at an object in a
+          type other than the one we've scalarized.  */
+       goto use_all;
+
+      case WITH_SIZE_EXPR:
+       /* This is a transparent wrapper.  The entire inner expression really
+          is being used.  */
+       goto use_all;
+
+      use_all:
+        expr_p = &TREE_OPERAND (inner, 0);
+       inner = expr = *expr_p;
+       use_all_p = true;
+       break;
+
+      default:
+#ifdef ENABLE_CHECKING
+       /* Validate that we're not missing any references.  */
+       gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
+#endif
+       return;
+      }
+}
+
+/* Walk a TREE_LIST of values looking for scalarizable aggregates.
+   If we find one, invoke FNS->USE.  */
+
+static void
+sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
+                   const struct sra_walk_fns *fns)
+{
+  tree op;
+  for (op = list; op ; op = TREE_CHAIN (op))
+    sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
+}
+
+/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
+   If we find one, invoke FNS->USE.  */
+
+static void
+sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
+                   const struct sra_walk_fns *fns)
+{
+  int i;
+  int nargs = call_expr_nargs (expr);
+  for (i = 0; i < nargs; i++)
+    sra_walk_expr (&CALL_EXPR_ARG (expr, i), bsi, false, fns);
+}
+
+/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
+   aggregates.  If we find one, invoke FNS->USE.  */
+
+static void
+sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
+                  const struct sra_walk_fns *fns)
+{
+  sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
+  sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
+}
+
+/* Walk a GIMPLE_MODIFY_STMT and categorize the assignment appropriately.  */
+
+static void
+sra_walk_gimple_modify_stmt (tree expr, block_stmt_iterator *bsi,
+                     const struct sra_walk_fns *fns)
+{
+  struct sra_elt *lhs_elt, *rhs_elt;
+  tree lhs, rhs;
+
+  lhs = GIMPLE_STMT_OPERAND (expr, 0);
+  rhs = GIMPLE_STMT_OPERAND (expr, 1);
+  lhs_elt = maybe_lookup_element_for_expr (lhs);
+  rhs_elt = maybe_lookup_element_for_expr (rhs);
+
+  /* If both sides are scalarizable, this is a COPY operation.  */
+  if (lhs_elt && rhs_elt)
+    {
+      fns->copy (lhs_elt, rhs_elt, bsi);
+      return;
+    }
+
+  /* If the RHS is scalarizable, handle it.  There are only two cases.  */
+  if (rhs_elt)
+    {
+      if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
+       fns->ldst (rhs_elt, lhs, bsi, false);
+      else
+       fns->use (rhs_elt, &GIMPLE_STMT_OPERAND (expr, 1), bsi, false, false);
+    }
+
+  /* If it isn't scalarizable, there may be scalarizable variables within, so
+     check for a call or else walk the RHS to see if we need to do any
+     copy-in operations.  We need to do it before the LHS is scalarized so
+     that the statements get inserted in the proper place, before any
+     copy-out operations.  */
+  else
+    {
+      tree call = get_call_expr_in (rhs);
+      if (call)
+       sra_walk_call_expr (call, bsi, fns);
+      else
+       sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 1), bsi, false, fns);
+    }
+
+  /* Likewise, handle the LHS being scalarizable.  We have cases similar
+     to those above, but also want to handle RHS being constant.  */
+  if (lhs_elt)
+    {
+      /* If this is an assignment from a constant, or constructor, then
+        we have access to all of the elements individually.  Invoke INIT.  */
+      if (TREE_CODE (rhs) == COMPLEX_EXPR
+         || TREE_CODE (rhs) == COMPLEX_CST
+         || TREE_CODE (rhs) == CONSTRUCTOR)
+       fns->init (lhs_elt, rhs, bsi);
+
+      /* If this is an assignment from read-only memory, treat this as if
+        we'd been passed the constructor directly.  Invoke INIT.  */
+      else if (TREE_CODE (rhs) == VAR_DECL
+              && TREE_STATIC (rhs)
+              && TREE_READONLY (rhs)
+              && targetm.binds_local_p (rhs))
+       fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
+
+      /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
+        The lvalue requirement prevents us from trying to directly scalarize
+        the result of a function call.  Which would result in trying to call
+        the function multiple times, and other evil things.  */
+      else if (!lhs_elt->is_scalar
+              && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
+       fns->ldst (lhs_elt, rhs, bsi, true);
+
+      /* Otherwise we're being used in some context that requires the
+        aggregate to be seen as a whole.  Invoke USE.  */
+      else
+       fns->use (lhs_elt, &GIMPLE_STMT_OPERAND (expr, 0), bsi, true, false);
+    }
+
+  /* Similarly to above, LHS_ELT being null only means that the LHS as a
+     whole is not a scalarizable reference.  There may be occurrences of
+     scalarizable variables within, which implies a USE.  */
+  else
+    sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 0), bsi, true, fns);
+}
+
+/* Entry point to the walk functions.  Search the entire function,
+   invoking the callbacks in FNS on each of the references to
+   scalarizable variables.  */
+
+static void
+sra_walk_function (const struct sra_walk_fns *fns)
+{
+  basic_block bb;
+  block_stmt_iterator si, ni;
+
+  /* ??? Phase 4 could derive some benefit to walking the function in
+     dominator tree order.  */
+
+  FOR_EACH_BB (bb)
+    for (si = bsi_start (bb); !bsi_end_p (si); si = ni)
+      {
+       tree stmt, t;
+       stmt_ann_t ann;
+
+       stmt = bsi_stmt (si);
+       ann = stmt_ann (stmt);
+
+       ni = si;
+       bsi_next (&ni);
+
+       /* If the statement has no virtual operands, then it doesn't
+          make any structure references that we care about.  */
+       if (gimple_aliases_computed_p (cfun)
+           && ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)))
+             continue;
+
+       switch (TREE_CODE (stmt))
+         {
+         case RETURN_EXPR:
+           /* If we have "return <retval>" then the return value is
+              already exposed for our pleasure.  Walk it as a USE to
+              force all the components back in place for the return.
+
+              If we have an embedded assignment, then <retval> is of
+              a type that gets returned in registers in this ABI, and
+              we do not wish to extend their lifetimes.  Treat this
+              as a USE of the variable on the RHS of this assignment.  */
+
+           t = TREE_OPERAND (stmt, 0);
+           if (t == NULL_TREE)
+             ;
+           else if (TREE_CODE (t) == GIMPLE_MODIFY_STMT)
+             sra_walk_expr (&GIMPLE_STMT_OPERAND (t, 1), &si, false, fns);
+           else
+             sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
+           break;
+
+         case GIMPLE_MODIFY_STMT:
+           sra_walk_gimple_modify_stmt (stmt, &si, fns);
+           break;
+         case CALL_EXPR:
+           sra_walk_call_expr (stmt, &si, fns);
+           break;
+         case ASM_EXPR:
+           sra_walk_asm_expr (stmt, &si, fns);
+           break;
+
+         default:
+           break;
+         }
+      }
+}
+\f
+/* Phase One: Scan all referenced variables in the program looking for
+   structures that could be decomposed.  */
+
+static bool
+find_candidates_for_sra (void)
+{
+  bool any_set = false;
+  tree var;
+  referenced_var_iterator rvi;
+
+  FOR_EACH_REFERENCED_VAR (var, rvi)
+    {
+      if (decl_can_be_decomposed_p (var))
+        {
+          bitmap_set_bit (sra_candidates, DECL_UID (var));
+          any_set = true;
+        }
+    }
+
+  return any_set;
+}
+
+\f
+/* Phase Two: Scan all references to scalarizable variables.  Count the
+   number of times they are used or copied respectively.  */
+
+/* Callbacks to fill in SRA_WALK_FNS.  Everything but USE is
+   considered a copy, because we can decompose the reference such that
+   the sub-elements needn't be contiguous.  */
+
+static void
+scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
+         block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+         bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED)
+{
+  elt->n_uses += 1;
+}
+
+static void
+scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+          block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
+{
+  lhs_elt->n_copies += 1;
+  rhs_elt->n_copies += 1;
+}
+
+static void
+scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
+          block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
+{
+  lhs_elt->n_copies += 1;
+}
+
+static void
+scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
+          block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+          bool is_output ATTRIBUTE_UNUSED)
+{
+  elt->n_copies += 1;
+}
+
+/* Dump the values we collected during the scanning phase.  */
+
+static void
+scan_dump (struct sra_elt *elt)
+{
+  struct sra_elt *c;
+
+  dump_sra_elt_name (dump_file, elt);
+  fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
+
+  for (c = elt->children; c ; c = c->sibling)
+    scan_dump (c);
+
+  for (c = elt->groups; c ; c = c->sibling)
+    scan_dump (c);
+}
+
+/* Entry point to phase 2.  Scan the entire function, building up
+   scalarization data structures, recording copies and uses.  */
+
+static void
+scan_function (void)
+{
+  static const struct sra_walk_fns fns = {
+    scan_use, scan_copy, scan_init, scan_ldst, true
+  };
+  bitmap_iterator bi;
+
+  sra_walk_function (&fns);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      unsigned i;
+
+      fputs ("\nScan results:\n", dump_file);
+      EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
+       {
+         tree var = referenced_var (i);
+         struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+         if (elt)
+           scan_dump (elt);
+       }
+      fputc ('\n', dump_file);
+    }
+}
+\f
+/* Phase Three: Make decisions about which variables to scalarize, if any.
+   All elements to be scalarized have replacement variables made for them.  */
+
+/* A subroutine of build_element_name.  Recursively build the element
+   name on the obstack.  */
+
+static void
+build_element_name_1 (struct sra_elt *elt)
+{
+  tree t;
+  char buffer[32];
+
+  if (elt->parent)
+    {
+      build_element_name_1 (elt->parent);
+      obstack_1grow (&sra_obstack, '$');
+
+      if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
+       {
+         if (elt->element == integer_zero_node)
+           obstack_grow (&sra_obstack, "real", 4);
+         else
+           obstack_grow (&sra_obstack, "imag", 4);
+         return;
+       }
+    }
+
+  t = elt->element;
+  if (TREE_CODE (t) == INTEGER_CST)
+    {
+      /* ??? Eh.  Don't bother doing double-wide printing.  */
+      sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
+      obstack_grow (&sra_obstack, buffer, strlen (buffer));
+    }
+  else
+    {
+      tree name = DECL_NAME (t);
+      if (name)
+       obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
+                     IDENTIFIER_LENGTH (name));
+      else
+       {
+         sprintf (buffer, "D%u", DECL_UID (t));
+         obstack_grow (&sra_obstack, buffer, strlen (buffer));
+       }
+    }
+}
+
+/* Construct a pretty variable name for an element's replacement variable.
+   The name is built on the obstack.  */
+
+static char *
+build_element_name (struct sra_elt *elt)
+{
+  build_element_name_1 (elt);
+  obstack_1grow (&sra_obstack, '\0');
+  return XOBFINISH (&sra_obstack, char *);
+}
+
+/* Instantiate an element as an independent variable.  */
+
+static void
+instantiate_element (struct sra_elt *elt)
+{
+  struct sra_elt *base_elt;
+  tree var, base;
+
+  for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
+    continue;
+  base = base_elt->element;
+
+  elt->replacement = var = make_rename_temp (elt->type, "SR");
+
+  /* For vectors, if used on the left hand side with BIT_FIELD_REF,
+     they are not a gimple register.  */
+  if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs)
+    DECL_GIMPLE_REG_P (var) = 0;
+
+  DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
+  DECL_ARTIFICIAL (var) = 1;
+
+  if (TREE_THIS_VOLATILE (elt->type))
+    {
+      TREE_THIS_VOLATILE (var) = 1;
+      TREE_SIDE_EFFECTS (var) = 1;
+    }
+
+  if (DECL_NAME (base) && !DECL_IGNORED_P (base))
+    {
+      char *pretty_name = build_element_name (elt);
+      DECL_NAME (var) = get_identifier (pretty_name);
+      obstack_free (&sra_obstack, pretty_name);
+
+      SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
+      DECL_DEBUG_EXPR_IS_FROM (var) = 1;
+      
+      DECL_IGNORED_P (var) = 0;
+      TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
+      if (elt->element && TREE_NO_WARNING (elt->element))
+       TREE_NO_WARNING (var) = 1;
+    }
+  else
+    {
+      DECL_IGNORED_P (var) = 1;
+      /* ??? We can't generate any warning that would be meaningful.  */
+      TREE_NO_WARNING (var) = 1;
+    }
+
+  if (dump_file)
+    {
+      fputs ("  ", dump_file);
+      dump_sra_elt_name (dump_file, elt);
+      fputs (" -> ", dump_file);
+      print_generic_expr (dump_file, var, dump_flags);
+      fputc ('\n', dump_file);
+    }
+}
+
+/* Make one pass across an element tree deciding whether or not it's
+   profitable to instantiate individual leaf scalars.
+
+   PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
+   fields all the way up the tree.  */
+
+static void
+decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
+                       unsigned int parent_copies)
+{
+  if (dump_file && !elt->parent)
+    {
+      fputs ("Initial instantiation for ", dump_file);
+      dump_sra_elt_name (dump_file, elt);
+      fputc ('\n', dump_file);
+    }
+
+  if (elt->cannot_scalarize)
+    return;
+
+  if (elt->is_scalar)
+    {
+      /* The decision is simple: instantiate if we're used more frequently
+        than the parent needs to be seen as a complete unit.  */
+      if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
+       instantiate_element (elt);
+    }
+  else
+    {
+      struct sra_elt *c, *group;
+      unsigned int this_uses = elt->n_uses + parent_uses;
+      unsigned int this_copies = elt->n_copies + parent_copies;
+
+      /* Consider groups of sub-elements as weighing in favour of
+        instantiation whatever their size.  */
+      for (group = elt->groups; group ; group = group->sibling)
+       FOR_EACH_ACTUAL_CHILD (c, group)
+         {
+           c->n_uses += group->n_uses;
+           c->n_copies += group->n_copies;
+         }
+
+      for (c = elt->children; c ; c = c->sibling)
+       decide_instantiation_1 (c, this_uses, this_copies);
+    }
+}
+
+/* Compute the size and number of all instantiated elements below ELT.
+   We will only care about this if the size of the complete structure
+   fits in a HOST_WIDE_INT, so we don't have to worry about overflow.  */
+
+static unsigned int
+sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
+{
+  if (elt->replacement)
+    {
+      *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
+      return 1;
+    }
+  else
+    {
+      struct sra_elt *c;
+      unsigned int count = 0;
+
+      for (c = elt->children; c ; c = c->sibling)
+       count += sum_instantiated_sizes (c, sizep);
+
+      return count;
+    }
+}
+
+/* Instantiate fields in ELT->TYPE that are not currently present as
+   children of ELT.  */
+
+static void instantiate_missing_elements (struct sra_elt *elt);
+
+static void
+instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
+{
+  struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
+  if (sub->is_scalar)
+    {
+      if (sub->replacement == NULL)
+       instantiate_element (sub);
+    }
+  else
+    instantiate_missing_elements (sub);
+}
+
+static void
+instantiate_missing_elements (struct sra_elt *elt)
+{
+  tree type = elt->type;
+
+  switch (TREE_CODE (type))
+    {
+    case RECORD_TYPE:
+      {
+       tree f;
+       for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
+         if (TREE_CODE (f) == FIELD_DECL)
+           {
+             tree field_type = TREE_TYPE (f);
+
+             /* canonicalize_component_ref() unwidens some bit-field
+                types (not marked as DECL_BIT_FIELD in C++), so we
+                must do the same, lest we may introduce type
+                mismatches.  */
+             if (INTEGRAL_TYPE_P (field_type)
+                 && DECL_MODE (f) != TYPE_MODE (field_type))
+               field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF,
+                                                              field_type,
+                                                              elt->element,
+                                                              f, NULL_TREE),
+                                                      NULL_TREE));
+
+             instantiate_missing_elements_1 (elt, f, field_type);
+           }
+       break;
+      }
+
+    case ARRAY_TYPE:
+      {
+       tree i, max, subtype;
+
+       i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
+       max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
+       subtype = TREE_TYPE (type);
+
+       while (1)
+         {
+           instantiate_missing_elements_1 (elt, i, subtype);
+           if (tree_int_cst_equal (i, max))
+             break;
+           i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
+         }
+
+       break;
+      }
+
+    case COMPLEX_TYPE:
+      type = TREE_TYPE (type);
+      instantiate_missing_elements_1 (elt, integer_zero_node, type);
+      instantiate_missing_elements_1 (elt, integer_one_node, type);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if there is only one non aggregate field in the record, TYPE.
+   Return false otherwise.  */
+
+static bool
+single_scalar_field_in_record_p (tree type)
+{
+   int num_fields = 0;
+   tree field;
+   if (TREE_CODE (type) != RECORD_TYPE)
+     return false;
+
+   for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+     if (TREE_CODE (field) == FIELD_DECL)
+       {
+         num_fields++;
+
+         if (num_fields == 2)
+           return false;
+        
+         if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
+           return false;
+       }
+
+   return true;
+}
+
+/* Make one pass across an element tree deciding whether to perform block
+   or element copies.  If we decide on element copies, instantiate all
+   elements.  Return true if there are any instantiated sub-elements.  */
+
+static bool
+decide_block_copy (struct sra_elt *elt)
+{
+  struct sra_elt *c;
+  bool any_inst;
+
+  /* We shouldn't be invoked on groups of sub-elements as they must
+     behave like their parent as far as block copy is concerned.  */
+  gcc_assert (!elt->is_group);
+
+  /* If scalarization is disabled, respect it.  */
+  if (elt->cannot_scalarize)
+    {
+      elt->use_block_copy = 1;
+
+      if (dump_file)
+       {
+         fputs ("Scalarization disabled for ", dump_file);
+         dump_sra_elt_name (dump_file, elt);
+         fputc ('\n', dump_file);
+       }
+
+      /* Disable scalarization of sub-elements */
+      for (c = elt->children; c; c = c->sibling)
+       {
+         c->cannot_scalarize = 1;
+         decide_block_copy (c);
+       }
+
+      /* Groups behave like their parent.  */
+      for (c = elt->groups; c; c = c->sibling)
+       {
+         c->cannot_scalarize = 1;
+         c->use_block_copy = 1;
+       }
+
+      return false;
+    }
+
+  /* Don't decide if we've no uses.  */
+  if (elt->n_uses == 0 && elt->n_copies == 0)
+    ;
+
+  else if (!elt->is_scalar)
+    {
+      tree size_tree = TYPE_SIZE_UNIT (elt->type);
+      bool use_block_copy = true;
+
+      /* Tradeoffs for COMPLEX types pretty much always make it better
+        to go ahead and split the components.  */
+      if (TREE_CODE (elt->type) == COMPLEX_TYPE)
+       use_block_copy = false;
+
+      /* Don't bother trying to figure out the rest if the structure is
+        so large we can't do easy arithmetic.  This also forces block
+        copies for variable sized structures.  */
+      else if (host_integerp (size_tree, 1))
+       {
+         unsigned HOST_WIDE_INT full_size, inst_size = 0;
+         unsigned int max_size, max_count, inst_count, full_count;
+
+         /* If the sra-max-structure-size parameter is 0, then the
+            user has not overridden the parameter and we can choose a
+            sensible default.  */
+         max_size = SRA_MAX_STRUCTURE_SIZE
+           ? SRA_MAX_STRUCTURE_SIZE
+           : MOVE_RATIO * UNITS_PER_WORD;
+         max_count = SRA_MAX_STRUCTURE_COUNT
+           ? SRA_MAX_STRUCTURE_COUNT
+           : MOVE_RATIO;
+
+         full_size = tree_low_cst (size_tree, 1);
+         full_count = count_type_elements (elt->type, false);
+         inst_count = sum_instantiated_sizes (elt, &inst_size);
+
+         /* If there is only one scalar field in the record, don't block copy.  */
+         if (single_scalar_field_in_record_p (elt->type))
+           use_block_copy = false;
+
+         /* ??? What to do here.  If there are two fields, and we've only
+            instantiated one, then instantiating the other is clearly a win.
+            If there are a large number of fields then the size of the copy
+            is much more of a factor.  */
+
+         /* If the structure is small, and we've made copies, go ahead
+            and instantiate, hoping that the copies will go away.  */
+         if (full_size <= max_size
+             && (full_count - inst_count) <= max_count
+             && elt->n_copies > elt->n_uses)
+           use_block_copy = false;
+         else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
+                  && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
+           use_block_copy = false;
+
+         /* In order to avoid block copy, we have to be able to instantiate
+            all elements of the type.  See if this is possible.  */
+         if (!use_block_copy
+             && (!can_completely_scalarize_p (elt)
+                 || !type_can_instantiate_all_elements (elt->type)))
+           use_block_copy = true;
+       }
+
+      elt->use_block_copy = use_block_copy;
+
+      /* Groups behave like their parent.  */
+      for (c = elt->groups; c; c = c->sibling)
+       c->use_block_copy = use_block_copy;
+
+      if (dump_file)
+       {
+         fprintf (dump_file, "Using %s for ",
+                  use_block_copy ? "block-copy" : "element-copy");
+         dump_sra_elt_name (dump_file, elt);
+         fputc ('\n', dump_file);
+       }
+
+      if (!use_block_copy)
+       {
+         instantiate_missing_elements (elt);
+         return true;
+       }
+    }
+
+  any_inst = elt->replacement != NULL;
+
+  for (c = elt->children; c ; c = c->sibling)
+    any_inst |= decide_block_copy (c);
+
+  return any_inst;
+}
+
+/* Entry point to phase 3.  Instantiate scalar replacement variables.  */
+
+static void
+decide_instantiations (void)
+{
+  unsigned int i;
+  bool cleared_any;
+  bitmap_head done_head;
+  bitmap_iterator bi;
+
+  /* We cannot clear bits from a bitmap we're iterating over,
+     so save up all the bits to clear until the end.  */
+  bitmap_initialize (&done_head, &bitmap_default_obstack);
+  cleared_any = false;
+
+  EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
+    {
+      tree var = referenced_var (i);
+      struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+      if (elt)
+       {
+         decide_instantiation_1 (elt, 0, 0);
+         if (!decide_block_copy (elt))
+           elt = NULL;
+       }
+      if (!elt)
+       {
+         bitmap_set_bit (&done_head, i);
+         cleared_any = true;
+       }
+    }
+
+  if (cleared_any)
+    {
+      bitmap_and_compl_into (sra_candidates, &done_head);
+      bitmap_and_compl_into (needs_copy_in, &done_head);
+    }
+  bitmap_clear (&done_head);
+  
+  mark_set_for_renaming (sra_candidates);
+
+  if (dump_file)
+    fputc ('\n', dump_file);
+}
+
+\f
+/* Phase Four: Update the function to match the replacements created.  */
+
+/* Mark all the variables in VDEF/VUSE operators for STMT for
+   renaming. This becomes necessary when we modify all of a
+   non-scalar.  */
+
+static void
+mark_all_v_defs_1 (tree stmt)
+{
+  tree sym;
+  ssa_op_iter iter;
+
+  update_stmt_if_modified (stmt);
+
+  FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS)
+    {
+      if (TREE_CODE (sym) == SSA_NAME)
+       sym = SSA_NAME_VAR (sym);
+      mark_sym_for_renaming (sym);
+    }
+}
+
+
+/* Mark all the variables in virtual operands in all the statements in
+   LIST for renaming.  */
+
+static void
+mark_all_v_defs (tree list)
+{
+  if (TREE_CODE (list) != STATEMENT_LIST)
+    mark_all_v_defs_1 (list);
+  else
+    {
+      tree_stmt_iterator i;
+      for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i))
+       mark_all_v_defs_1 (tsi_stmt (i));
+    }
+}
+
+
+/* Mark every replacement under ELT with TREE_NO_WARNING.  */
+
+static void
+mark_no_warning (struct sra_elt *elt)
+{
+  if (!elt->all_no_warning)
+    {
+      if (elt->replacement)
+       TREE_NO_WARNING (elt->replacement) = 1;
+      else
+       {
+         struct sra_elt *c;
+         FOR_EACH_ACTUAL_CHILD (c, elt)
+           mark_no_warning (c);
+       }
+      elt->all_no_warning = true;
+    }
+}
+
+/* Build a single level component reference to ELT rooted at BASE.  */
+
+static tree
+generate_one_element_ref (struct sra_elt *elt, tree base)
+{
+  switch (TREE_CODE (TREE_TYPE (base)))
+    {
+    case RECORD_TYPE:
+      {
+       tree field = elt->element;
+
+       /* Watch out for compatible records with differing field lists.  */
+       if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
+         field = find_compatible_field (TREE_TYPE (base), field);
+
+        return build3 (COMPONENT_REF, elt->type, base, field, NULL);
+      }
+
+    case ARRAY_TYPE:
+      if (TREE_CODE (elt->element) == RANGE_EXPR)
+       return build4 (ARRAY_RANGE_REF, elt->type, base,
+                      TREE_OPERAND (elt->element, 0), NULL, NULL);
        else
        else
-        return TREE_OPERAND (base, 1);
+       return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
  
  
-    default:
-      /* Avoid sharing BASE when building the different references.
-        We let the real part have the original version.  */
-      if (part != REALPART_EXPR)
-       base = unshare_expr (base);
-      break;
+    case COMPLEX_TYPE:
+      if (elt->element == integer_zero_node)
+       return build1 (REALPART_EXPR, elt->type, base);
+      else
+       return build1 (IMAGPART_EXPR, elt->type, base);
  
  
-    case VAR_DECL:
-    case PARM_DECL:
-      /* Special case for the above -- decls are always shared.  */
-      break;
+    default:
+      gcc_unreachable ();
      }
      }
+}
  
  
-  return build1 (part, TREE_TYPE (TREE_TYPE (base)), base);
+/* Build a full component reference to ELT rooted at its native variable.  */
+
+static tree
+generate_element_ref (struct sra_elt *elt)
+{
+  if (elt->parent)
+    return generate_one_element_ref (elt, generate_element_ref (elt->parent));
+  else
+    return elt->element;
  }
  
  }
  
-/* Create and return a list of assignments to perform a scalarized
-   structure assignment 'LHS = RHS'.  Both LHS and RHS are assumed to be
-   of an aggregate or complex type.  Three types of copies may be specified:
+/* Create an assignment statement from SRC to DST.  */
  
  
-   SCALAR_SCALAR will emit assignments for all the scalar temporaries
-      corresponding to the fields of LHS and RHS.
+static tree
+sra_build_assignment (tree dst, tree src)
+{
+  /* It was hoped that we could perform some type sanity checking
+     here, but since front-ends can emit accesses of fields in types
+     different from their nominal types and copy structures containing
+     them as a whole, we'd have to handle such differences here.
+     Since such accesses under different types require compatibility
+     anyway, there's little point in making tests and/or adding
+     conversions to ensure the types of src and dst are the same.
+     So we just assume type differences at this point are ok.  */
+  return build_gimple_modify_stmt (dst, src);
+}
  
  
-   FIELD_SCALAR will emit assignments from the scalar replacements of
-      RHS into each of the fields of the LHS.
+/* Generate a set of assignment statements in *LIST_P to copy all
+   instantiated elements under ELT to or from the equivalent structure
+   rooted at EXPR.  COPY_OUT controls the direction of the copy, with
+   true meaning to copy out of EXPR into ELT.  */
  
  
-   SCALAR_FIELD will emit assignments from each field of the RHS into
-      the scalar replacements of the LHS.  */
+static void
+generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
+                    tree *list_p)
+{
+  struct sra_elt *c;
+  tree t;
  
  
-static tree
-create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode)
-{
-  tree type, list;
-  tree_stmt_iterator tsi;
-
-#if defined ENABLE_CHECKING
-  /* Sanity checking.  Check that we are not trying to scalarize a
-     non-decl.  */
-  if (!DECL_P (lhs) && (mode == SCALAR_FIELD || mode == SCALAR_SCALAR))
-    abort ();
-  if (!DECL_P (rhs) && (mode == FIELD_SCALAR || mode == SCALAR_SCALAR))
-    abort ();
-#endif
+  if (!copy_out && TREE_CODE (expr) == SSA_NAME
+      && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
+    {
+      tree r, i;
  
  
-  type = TREE_TYPE (lhs);
-  list = alloc_stmt_list ();
-  tsi = tsi_start (list);
+      c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
+      r = c->replacement;
+      c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
+      i = c->replacement;
  
  
-  /* VA_ARG_EXPRs have side effects, so we need to copy it first to a
-     temporary before scalarizing.  FIXME: This should disappear once
-     VA_ARG_EXPRs are properly lowered.  */
-  if (TREE_CODE (rhs) == VA_ARG_EXPR)
+      t = build2 (COMPLEX_EXPR, elt->type, r, i);
+      t = sra_build_assignment (expr, t);
+      SSA_NAME_DEF_STMT (expr) = t;
+      append_to_statement_list (t, list_p);
+    }
+  else if (elt->replacement)
+    {
+      if (copy_out)
+       t = sra_build_assignment (elt->replacement, expr);
+      else
+       t = sra_build_assignment (expr, elt->replacement);
+      append_to_statement_list (t, list_p);
+    }
+  else
      {
      {
-      tree stmt, tmp;
+      FOR_EACH_ACTUAL_CHILD (c, elt)
+       {
+         t = generate_one_element_ref (c, unshare_expr (expr));
+         generate_copy_inout (c, copy_out, t, list_p);
+       }
+    }
+}
  
  
-      /* Add TMP = VA_ARG_EXPR <>  */
-      tmp = make_temp (TREE_TYPE (rhs), NULL);
-      stmt = csc_assign (&tsi, tmp, rhs);
+/* Generate a set of assignment statements in *LIST_P to copy all instantiated
+   elements under SRC to their counterparts under DST.  There must be a 1-1
+   correspondence of instantiated elements.  */
  
  
-      /* Mark all the variables in VDEF operands for renaming, because
-        the VA_ARG_EXPR will now be in a different statement.  */
-      mark_all_vdefs (stmt);
+static void
+generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
+{
+  struct sra_elt *dc, *sc;
  
  
-      /* Set RHS to be the new temporary TMP.  */
-      rhs = tmp;
+  FOR_EACH_ACTUAL_CHILD (dc, dst)
+    {
+      sc = lookup_element (src, dc->element, NULL, NO_INSERT);
+      gcc_assert (sc);
+      generate_element_copy (dc, sc, list_p);
      }
  
      }
  
-  /* When making *_SCALAR copies from PARM_DECLs, we will need to insert
-     copy-in operations from the original PARM_DECLs.  Note that these
-     copy-in operations may end up being dead, but we won't know until
-     we rename the new variables into SSA.  */
-  if ((mode == SCALAR_SCALAR || mode == FIELD_SCALAR)
-      && TREE_CODE (rhs) == PARM_DECL)
-    bitmap_set_bit (needs_copy_in, var_ann (rhs)->uid);
-
-  /* Now create scalar copies for each individual field according to MODE.  */
-  if (TREE_CODE (type) == COMPLEX_TYPE)
+  if (dst->replacement)
      {
      {
-      /* Create scalar copies of both the real and imaginary parts.  */
-      tree real_lhs, real_rhs, imag_lhs, imag_rhs;
+      tree t;
  
  
-      if (mode == SCALAR_FIELD)
-       {
-         real_rhs = csc_build_complex_part (rhs, REALPART_EXPR);
-         imag_rhs = csc_build_complex_part (rhs, IMAGPART_EXPR);
-       }
-      else
-       {
-         real_rhs = get_scalar_for_complex_part (rhs, REALPART_EXPR);
-         imag_rhs = get_scalar_for_complex_part (rhs, IMAGPART_EXPR);
-       }
+      gcc_assert (src->replacement);
  
  
-      if (mode == FIELD_SCALAR)
-       {
-         /* In this case we do not need to create but one statement,
-            since we can create a new complex value whole.  */
+      t = sra_build_assignment (dst->replacement, src->replacement);
+      append_to_statement_list (t, list_p);
+    }
+}
  
  
-         if (TREE_CONSTANT (real_rhs) && TREE_CONSTANT (imag_rhs))
-           rhs = build_complex (type, real_rhs, imag_rhs);
-         else
-           rhs = build (COMPLEX_EXPR, type, real_rhs, imag_rhs);
-         csc_assign (&tsi, lhs, rhs);
-       }
-      else
-       {
-         real_lhs = get_scalar_for_complex_part (lhs, REALPART_EXPR);
-         imag_lhs = get_scalar_for_complex_part (lhs, IMAGPART_EXPR);
+/* Generate a set of assignment statements in *LIST_P to zero all instantiated
+   elements under ELT.  In addition, do not assign to elements that have been
+   marked VISITED but do reset the visited flag; this allows easy coordination
+   with generate_element_init.  */
  
  
-         csc_assign (&tsi, real_lhs, real_rhs);
-         csc_assign (&tsi, imag_lhs, imag_rhs);
-       }
+static void
+generate_element_zero (struct sra_elt *elt, tree *list_p)
+{
+  struct sra_elt *c;
+
+  if (elt->visited)
+    {
+      elt->visited = false;
+      return;
      }
      }
-  else
+
+  FOR_EACH_ACTUAL_CHILD (c, elt)
+    generate_element_zero (c, list_p);
+
+  if (elt->replacement)
      {
      {
-      tree lf, rf;
+      tree t;
  
  
-      /* ??? C++ generates copies between different pointer-to-member
-        structures of different types.  To combat this, we must track
-        the field of both the left and right structures, so that we
-        index the variables with fields of their own type.  */
+      gcc_assert (elt->is_scalar);
+      t = fold_convert (elt->type, integer_zero_node);
  
  
-      for (lf = TYPE_FIELDS (type), rf = TYPE_FIELDS (TREE_TYPE (rhs));
-          lf;
-          lf = TREE_CHAIN (lf), rf = TREE_CHAIN (rf))
-       {
-         tree lhs_var, rhs_var;
+      t = sra_build_assignment (elt->replacement, t);
+      append_to_statement_list (t, list_p);
+    }
+}
  
  
-         /* Only copy FIELD_DECLs.  */
-         if (TREE_CODE (lf) != FIELD_DECL)
-           continue;
+/* Generate an assignment VAR = INIT, where INIT may need gimplification.
+   Add the result to *LIST_P.  */
  
  
-         if (mode == FIELD_SCALAR)
-           lhs_var = csc_build_component_ref (lhs, lf);
-         else
-           lhs_var = get_scalar_for_field (lhs, lf);
+static void
+generate_one_element_init (tree var, tree init, tree *list_p)
+{
+  /* The replacement can be almost arbitrarily complex.  Gimplify.  */
+  tree stmt = sra_build_assignment (var, init);
+  gimplify_and_add (stmt, list_p);
+}
  
  
-         if (mode == SCALAR_FIELD)
-           rhs_var = csc_build_component_ref (rhs, rf);
-         else
-           rhs_var = get_scalar_for_field (rhs, rf);
+/* Generate a set of assignment statements in *LIST_P to set all instantiated
+   elements under ELT with the contents of the initializer INIT.  In addition,
+   mark all assigned elements VISITED; this allows easy coordination with
+   generate_element_zero.  Return false if we found a case we couldn't
+   handle.  */
  
  
-         csc_assign (&tsi, lhs_var, rhs_var);
+static bool
+generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p)
+{
+  bool result = true;
+  enum tree_code init_code;
+  struct sra_elt *sub;
+  tree t;
+  unsigned HOST_WIDE_INT idx;
+  tree value, purpose;
+
+  /* We can be passed DECL_INITIAL of a static variable.  It might have a
+     conversion, which we strip off here.  */
+  STRIP_USELESS_TYPE_CONVERSION (init);
+  init_code = TREE_CODE (init);
+
+  if (elt->is_scalar)
+    {
+      if (elt->replacement)
+       {
+         generate_one_element_init (elt->replacement, init, list_p);
+         elt->visited = true;
         }
         }
+      return result;
      }
  
      }
  
-  /* All the scalar copies just created will either create new definitions
-     or remove existing definitions of LHS, so we need to mark it for
-     renaming.  */
-  if (TREE_SIDE_EFFECTS (list))
+  switch (init_code)
      {
      {
-      if (mode == SCALAR_FIELD || mode == SCALAR_SCALAR)
+    case COMPLEX_CST:
+    case COMPLEX_EXPR:
+      FOR_EACH_ACTUAL_CHILD (sub, elt)
         {
         {
-         /* If the LHS has been scalarized, mark it for renaming.  */
-         bitmap_set_bit (vars_to_rename, var_ann (lhs)->uid);
+         if (sub->element == integer_zero_node)
+           t = (init_code == COMPLEX_EXPR
+                ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
+         else
+           t = (init_code == COMPLEX_EXPR
+                ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
+         result &= generate_element_init_1 (sub, t, list_p);
         }
         }
-      else if (mode == FIELD_SCALAR)
+      break;
+
+    case CONSTRUCTOR:
+      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
         {
         {
-         /* Otherwise, mark all the symbols in the VDEFs for the last
-            scalarized statement just created.  Since all the statements
-            introduce the same VDEFs, we only need to check the last one.  */
-         mark_all_vdefs (tsi_stmt (tsi));
+         if (TREE_CODE (purpose) == RANGE_EXPR)
+           {
+             tree lower = TREE_OPERAND (purpose, 0);
+             tree upper = TREE_OPERAND (purpose, 1);
+
+             while (1)
+               {
+                 sub = lookup_element (elt, lower, NULL, NO_INSERT);
+                 if (sub != NULL)
+                   result &= generate_element_init_1 (sub, value, list_p);
+                 if (tree_int_cst_equal (lower, upper))
+                   break;
+                 lower = int_const_binop (PLUS_EXPR, lower,
+                                          integer_one_node, true);
+               }
+           }
+         else
+           {
+             sub = lookup_element (elt, purpose, NULL, NO_INSERT);
+             if (sub != NULL)
+               result &= generate_element_init_1 (sub, value, list_p);
+           }
         }
         }
-      else
-       abort ();
+      break;
+
+    default:
+      elt->visited = true;
+      result = false;
      }
  
      }
  
-  return list;
+  return result;
  }
  
  }
  
-/* A helper function that creates the copies, updates line info,
-   and emits the code either before or after BSI.  */
+/* A wrapper function for generate_element_init_1 that handles cleanup after
+   gimplification.  */
  
  
-static void
-emit_scalar_copies (block_stmt_iterator *bsi, tree lhs, tree rhs,
-                   enum sra_copy_mode mode)
+static bool
+generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
  {
  {
-  tree list = create_scalar_copies (lhs, rhs, mode);
-  tree stmt = bsi_stmt (*bsi);
-
-  if (EXPR_HAS_LOCATION (stmt))
-    annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
+  bool ret;
  
  
-  bsi_insert_before (bsi, list, BSI_SAME_STMT);
-}
+  push_gimplify_context ();
+  ret = generate_element_init_1 (elt, init, list_p);
+  pop_gimplify_context (NULL);
  
  
-/* Traverse all the statements in the function replacing references to
-   scalarizable structures with their corresponding scalar temporaries.  */
+  /* The replacement can expose previously unreferenced variables.  */
+  if (ret && *list_p)
+    {
+      tree_stmt_iterator i;
  
  
-static void
-scalarize_structures (void)
-{
-  basic_block bb;
-  block_stmt_iterator si;
-  size_t i;
+      for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i))
+       find_new_referenced_vars (tsi_stmt_ptr (i));
+    }
  
  
-  FOR_EACH_BB (bb)
-    for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
-      {
-       tree stmt;
-       stmt_ann_t ann;
+  return ret;
+}
  
  
-       stmt = bsi_stmt (si);
-       ann = stmt_ann (stmt);
+/* Insert STMT on all the outgoing edges out of BB.  Note that if BB
+   has more than one edge, STMT will be replicated for each edge.  Also,
+   abnormal edges will be ignored.  */
  
  
-       /* If the statement has no virtual operands, then it doesn't make
-          structure references that we care about.  */
-       if (NUM_VDEFS (VDEF_OPS (ann)) == 0
-           && NUM_VUSES (VUSE_OPS (ann)) == 0)
-         continue;
+void
+insert_edge_copies (tree stmt, basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  bool first_copy;
  
  
-       /* Structure references may only appear in certain statements.  */
-       if (TREE_CODE (stmt) != MODIFY_EXPR
-           && TREE_CODE (stmt) != CALL_EXPR
-           && TREE_CODE (stmt) != RETURN_EXPR
-           && TREE_CODE (stmt) != ASM_EXPR)
-         continue;
+  first_copy = true;
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    {
+      /* We don't need to insert copies on abnormal edges.  The
+        value of the scalar replacement is not guaranteed to
+        be valid through an abnormal edge.  */
+      if (!(e->flags & EDGE_ABNORMAL))
+       {
+         if (first_copy)
+           {
+             bsi_insert_on_edge (e, stmt);
+             first_copy = false;
+           }
+         else
+           bsi_insert_on_edge (e, unsave_expr_now (stmt));
+       }
+    }
+}
  
  
-       scalarize_stmt (&si);
-      }
+/* Helper function to insert LIST before BSI, and set up line number info.  */
  
  
-  /* Initialize the scalar replacements for every structure that is a
-     function argument.  */
-  EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i,
-    {
-      tree var = referenced_var (i);
-      tree list = create_scalar_copies (var, var, SCALAR_FIELD);
-      bsi_insert_on_edge (ENTRY_BLOCK_PTR->succ, list);
-    });
+void
+sra_insert_before (block_stmt_iterator *bsi, tree list)
+{
+  tree stmt = bsi_stmt (*bsi);
  
  
-  /* Commit edge insertions.  */
-  bsi_commit_edge_inserts (NULL);
+  if (EXPR_HAS_LOCATION (stmt))
+    annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
+  bsi_insert_before (bsi, list, BSI_SAME_STMT);
  }
  
  }
  
+/* Similarly, but insert after BSI.  Handles insertion onto edges as well.  */
  
  
-/* Scalarize structure references in the statement pointed by SI_P.  */
-
-static void
-scalarize_stmt (block_stmt_iterator *si_p)
+void
+sra_insert_after (block_stmt_iterator *bsi, tree list)
  {
  {
-  tree stmt = bsi_stmt (*si_p);
+  tree stmt = bsi_stmt (*bsi);
  
  
-  /* Handle assignments.  */
-  if (TREE_CODE (stmt) == MODIFY_EXPR
-      && TREE_CODE (TREE_OPERAND (stmt, 1)) != CALL_EXPR)
-    scalarize_modify_expr (si_p);
+  if (EXPR_HAS_LOCATION (stmt))
+    annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
  
  
-  /* Handle RETURN_EXPR.  */
-  else if (TREE_CODE (stmt) == RETURN_EXPR)
-    scalarize_return_expr (si_p);
+  if (stmt_ends_bb_p (stmt))
+    insert_edge_copies (list, bsi->bb);
+  else
+    bsi_insert_after (bsi, list, BSI_SAME_STMT);
+}
  
  
-  /* Handle function calls (note that this must be handled after
-     MODIFY_EXPR and RETURN_EXPR because a function call can appear in
-     both).  */
-  else if (get_call_expr_in (stmt) != NULL_TREE)
-    scalarize_call_expr (si_p);
+/* Similarly, but replace the statement at BSI.  */
  
  
-  /* Handle ASM_EXPRs.  */
-  else if (TREE_CODE (stmt) == ASM_EXPR)
-    scalarize_asm_expr (si_p);
+static void
+sra_replace (block_stmt_iterator *bsi, tree list)
+{
+  sra_insert_before (bsi, list);
+  bsi_remove (bsi, false);
+  if (bsi_end_p (*bsi))
+    *bsi = bsi_last (bsi->bb);
+  else
+    bsi_prev (bsi);
  }
  
  }
  
-
-/* Helper for scalarize_stmt to handle assignments.  */
+/* Scalarize a USE.  To recap, this is either a simple reference to ELT,
+   if elt is scalar, or some occurrence of ELT that requires a complete
+   aggregate.  IS_OUTPUT is true if ELT is being modified.  */
  
  static void
  
  static void
-scalarize_modify_expr (block_stmt_iterator *si_p)
+scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
+              bool is_output, bool use_all)
  {
  {
-  tree stmt = bsi_stmt (*si_p);
-  tree lhs = TREE_OPERAND (stmt, 0);
-  tree rhs = TREE_OPERAND (stmt, 1);
+  tree list = NULL, stmt = bsi_stmt (*bsi);
  
  
-  /* Found AGGREGATE.FIELD = ...  */
-  if (is_sra_candidate_ref (lhs, false))
+  if (elt->replacement)
      {
      {
-      tree sym;
-      vdef_optype vdefs;
-
-      scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 0));
-
-      /* Mark the LHS to be renamed, as we have just removed the previous
-        VDEF for AGGREGATE.  The statement should have exactly one VDEF
-        for variable AGGREGATE.  */
-      vdefs = STMT_VDEF_OPS (stmt);
-      if (NUM_VDEFS (vdefs) != 1)
-       abort ();
-      sym = SSA_NAME_VAR (VDEF_RESULT (vdefs, 0));
-      bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
+      /* If we have a replacement, then updating the reference is as
+        simple as modifying the existing statement in place.  */
+      if (is_output)
+       mark_all_v_defs (stmt);
+      *expr_p = elt->replacement;
+      update_stmt (stmt);
      }
      }
-
-  /* Found ... = AGGREGATE.FIELD  */
-  else if (is_sra_candidate_ref (rhs, false))
-    scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 1));
-
-  /* Found ... = BIT_FIELD_REF <>.  This is similar to a CALL_EXPR, if the
-     operand of the BIT_FIELD_REF is a scalarizable structure, we need to
-     copy from its scalar replacements before doing the bitfield operation.
-
-     FIXME: BIT_FIELD_REFs are often generated by fold-const.c.  This is
-     not always desirable because they obfuscate the original predicates,
-     limiting what the tree optimizers may do.  For instance, in
-     testsuite/g++.dg/opt/nrv4.C the use of SRA allows the optimizers to
-     optimize function main() to 'return 0;'.  However, the folder
-     generates a BIT_FIELD_REF operation for one of the comparisons,
-     preventing the optimizers from removing all the redundant
-     operations.  */
-  else if (is_sra_candidate_ref (rhs, true))
+  else
      {
      {
-      tree var = TREE_OPERAND (rhs, 0);
-      emit_scalar_copies (si_p, var, var, FIELD_SCALAR);
+      /* Otherwise we need some copies.  If ELT is being read, then we want
+        to store all (modified) sub-elements back into the structure before
+        the reference takes place.  If ELT is being written, then we want to
+        load the changed values back into our shadow variables.  */
+      /* ??? We don't check modified for reads, we just always write all of
+        the values.  We should be able to record the SSA number of the VOP
+        for which the values were last read.  If that number matches the
+        SSA number of the VOP in the current statement, then we needn't
+        emit an assignment.  This would also eliminate double writes when
+        a structure is passed as more than one argument to a function call.
+        This optimization would be most effective if sra_walk_function
+        processed the blocks in dominator order.  */
+
+      generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
+      if (list == NULL)
+       return;
+      mark_all_v_defs (list);
+      if (is_output)
+       sra_insert_after (bsi, list);
+      else
+       {
+         sra_insert_before (bsi, list);
+         if (use_all)
+           mark_no_warning (elt);
+       }
      }
      }
-
-  /* Found AGGREGATE = ... or ... = AGGREGATE  */
-  else if (DECL_P (lhs) || DECL_P (rhs))
-    scalarize_structure_assignment (si_p);
  }
  
  }
  
+/* Scalarize a COPY.  To recap, this is an assignment statement between
+   two scalarizable references, LHS_ELT and RHS_ELT.  */
  
  
-/* Scalarize structure references in LIST.  Use DONE to avoid duplicates.  */
-
-static inline void
-scalarize_tree_list (tree list, block_stmt_iterator *si_p, bitmap done)
+static void
+scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+               block_stmt_iterator *bsi)
  {
  {
-  tree op;
+  tree list, stmt;
  
  
-  for (op = list; op; op = TREE_CHAIN (op))
+  if (lhs_elt->replacement && rhs_elt->replacement)
      {
      {
-      tree arg = TREE_VALUE (op);
+      /* If we have two scalar operands, modify the existing statement.  */
+      stmt = bsi_stmt (*bsi);
+
+      /* See the commentary in sra_walk_function concerning
+        RETURN_EXPR, and why we should never see one here.  */
+      gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
  
  
-      if (is_sra_candidate_decl (arg))
+      GIMPLE_STMT_OPERAND (stmt, 0) = lhs_elt->replacement;
+      GIMPLE_STMT_OPERAND (stmt, 1) = rhs_elt->replacement;
+      update_stmt (stmt);
+    }
+  else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
+    {
+      /* If either side requires a block copy, then sync the RHS back
+        to the original structure, leave the original assignment
+        statement (which will perform the block copy), then load the
+        LHS values out of its now-updated original structure.  */
+      /* ??? Could perform a modified pair-wise element copy.  That
+        would at least allow those elements that are instantiated in
+        both structures to be optimized well.  */
+
+      list = NULL;
+      generate_copy_inout (rhs_elt, false,
+                          generate_element_ref (rhs_elt), &list);
+      if (list)
         {
         {
-         int index = var_ann (arg)->uid;
-         if (!bitmap_bit_p (done, index))
-           {
-             emit_scalar_copies (si_p, arg, arg, FIELD_SCALAR);
-             bitmap_set_bit (done, index);
-           }
+         mark_all_v_defs (list);
+         sra_insert_before (bsi, list);
         }
         }
-      else if (is_sra_candidate_ref (arg, false))
+
+      list = NULL;
+      generate_copy_inout (lhs_elt, true,
+                          generate_element_ref (lhs_elt), &list);
+      if (list)
         {
         {
-         tree stmt = bsi_stmt (*si_p);
-         scalarize_component_ref (stmt, &TREE_VALUE (op));
+         mark_all_v_defs (list);
+         sra_insert_after (bsi, list);
         }
      }
         }
      }
+  else
+    {
+      /* Otherwise both sides must be fully instantiated.  In which
+        case perform pair-wise element assignments and replace the
+        original block copy statement.  */
+
+      stmt = bsi_stmt (*bsi);
+      mark_all_v_defs (stmt);
+
+      list = NULL;
+      generate_element_copy (lhs_elt, rhs_elt, &list);
+      gcc_assert (list);
+      mark_all_v_defs (list);
+      sra_replace (bsi, list);
+    }
  }
  
  }
  
-
-/* Helper for scalarize_stmt to handle function calls.  */
+/* Scalarize an INIT.  To recap, this is an assignment to a scalarizable
+   reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
+   COMPLEX_EXPR.  If RHS is NULL, it should be treated as an empty
+   CONSTRUCTOR.  */
  
  static void
  
  static void
-scalarize_call_expr (block_stmt_iterator *si_p)
+scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
  {
  {
-  tree stmt = bsi_stmt (*si_p);
-  tree call = (TREE_CODE (stmt) == MODIFY_EXPR) ? TREE_OPERAND (stmt, 1) : stmt;
-  struct bitmap_head_def done_head;
+  bool result = true;
+  tree list = NULL;
  
  
-  /* First scalarize the arguments.  Order is important, because the copy
-     operations for the arguments need to go before the call.
-     Scalarization of the return value needs to go after the call.  */
-  bitmap_initialize (&done_head, 1);
-  scalarize_tree_list (TREE_OPERAND (call, 1), si_p, &done_head);
-  bitmap_clear (&done_head);
+  /* Generate initialization statements for all members extant in the RHS.  */
+  if (rhs)
+    {
+      /* Unshare the expression just in case this is from a decl's initial.  */
+      rhs = unshare_expr (rhs);
+      result = generate_element_init (lhs_elt, rhs, &list);
+    }
  
  
-  /* Scalarize the return value, if any.  */
-  if (TREE_CODE (stmt) == MODIFY_EXPR)
+  /* CONSTRUCTOR is defined such that any member not mentioned is assigned
+     a zero value.  Initialize the rest of the instantiated elements.  */
+  generate_element_zero (lhs_elt, &list);
+
+  if (!result)
      {
      {
-      tree var = TREE_OPERAND (stmt, 0);
+      /* If we failed to convert the entire initializer, then we must
+        leave the structure assignment in place and must load values
+        from the structure into the slots for which we did not find
+        constants.  The easiest way to do this is to generate a complete
+        copy-out, and then follow that with the constant assignments
+        that we were able to build.  DCE will clean things up.  */
+      tree list0 = NULL;
+      generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
+                          &list0);
+      append_to_statement_list (list, &list0);
+      list = list0;
+    }
  
  
-      /* If the LHS of the assignment is a scalarizable structure, insert
-        copies into the scalar replacements after the call.  */
-      if (is_sra_candidate_decl (var))
+  if (lhs_elt->use_block_copy || !result)
+    {
+      /* Since LHS is not fully instantiated, we must leave the structure
+        assignment in place.  Treating this case differently from a USE
+        exposes constants to later optimizations.  */
+      if (list)
         {
         {
-         tree list = create_scalar_copies (var, var, SCALAR_FIELD);
-         if (EXPR_HAS_LOCATION (stmt))
-           annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
-         if (stmt_ends_bb_p (stmt))
-           insert_edge_copies (list, bb_for_stmt (stmt));
-         else
-           bsi_insert_after (si_p, list, BSI_NEW_STMT);
+         mark_all_v_defs (list);
+         sra_insert_after (bsi, list);
         }
      }
         }
      }
+  else
+    {
+      /* The LHS is fully instantiated.  The list of initializations
+        replaces the original structure assignment.  */
+      gcc_assert (list);
+      mark_all_v_defs (bsi_stmt (*bsi));
+      mark_all_v_defs (list);
+      sra_replace (bsi, list);
+    }
  }
  
  }
  
+/* A subroutine of scalarize_ldst called via walk_tree.  Set TREE_NO_TRAP
+   on all INDIRECT_REFs.  */
  
  
-/* Helper for scalarize_stmt to handle ASM_EXPRs.  */
-
-static void
-scalarize_asm_expr (block_stmt_iterator *si_p)
+static tree
+mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
  {
  {
-  tree stmt = bsi_stmt (*si_p);
-  struct bitmap_head_def done_head;
+  tree t = *tp;
  
  
-  bitmap_initialize (&done_head, 1);
-  scalarize_tree_list (ASM_INPUTS (stmt), si_p, &done_head);
-  scalarize_tree_list (ASM_OUTPUTS (stmt), si_p, &done_head);
-  bitmap_clear (&done_head);
+  if (TREE_CODE (t) == INDIRECT_REF)
+    {
+      TREE_THIS_NOTRAP (t) = 1;
+      *walk_subtrees = 0;
+    }
+  else if (IS_TYPE_OR_DECL_P (t))
+    *walk_subtrees = 0;
  
  
-  /* ??? Process outputs after the asm.  */
+  return NULL;
  }
  
  }
  
-
-/* Helper for scalarize_stmt to handle return expressions.  */
+/* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
+   reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
+   if ELT is on the left-hand side.  */
  
  static void
  
  static void
-scalarize_return_expr (block_stmt_iterator *si_p)
+scalarize_ldst (struct sra_elt *elt, tree other,
+               block_stmt_iterator *bsi, bool is_output)
  {
  {
-  tree stmt = bsi_stmt (*si_p);
-  tree op = TREE_OPERAND (stmt, 0);
+  /* Shouldn't have gotten called for a scalar.  */
+  gcc_assert (!elt->replacement);
  
  
-  if (op == NULL_TREE)
-    return;
-
-  /* Handle a bare RESULT_DECL.  This will handle for types needed
-     constructors, or possibly after NRV type optimizations.  */
-  if (is_sra_candidate_decl (op))
-    emit_scalar_copies (si_p, op, op, FIELD_SCALAR);
-  else if (TREE_CODE (op) == MODIFY_EXPR)
+  if (elt->use_block_copy)
+    {
+      /* Since ELT is not fully instantiated, we have to leave the
+        block copy in place.  Treat this as a USE.  */
+      scalarize_use (elt, NULL, bsi, is_output, false);
+    }
+  else
      {
      {
-      tree *rhs_p = &TREE_OPERAND (op, 1);
-      tree rhs = *rhs_p;
+      /* The interesting case is when ELT is fully instantiated.  In this
+        case we can have each element stored/loaded directly to/from the
+        corresponding slot in OTHER.  This avoids a block copy.  */
  
  
-      /* Handle 'return STRUCTURE;'  */
-      if (is_sra_candidate_decl (rhs))
-       emit_scalar_copies (si_p, rhs, rhs, FIELD_SCALAR);
+      tree list = NULL, stmt = bsi_stmt (*bsi);
  
  
-      /* Handle 'return STRUCTURE.FIELD;'  */
-      else if (is_sra_candidate_ref (rhs, false))
-       scalarize_component_ref (stmt, rhs_p);
+      mark_all_v_defs (stmt);
+      generate_copy_inout (elt, is_output, other, &list);
+      mark_all_v_defs (list);
+      gcc_assert (list);
  
  
-      /* Handle 'return CALL_EXPR;'  */
-      else if (TREE_CODE (rhs) == CALL_EXPR)
+      /* Preserve EH semantics.  */
+      if (stmt_ends_bb_p (stmt))
         {
         {
-         struct bitmap_head_def done_head;
-         bitmap_initialize (&done_head, 1);
-         scalarize_tree_list (TREE_OPERAND (rhs, 1), si_p, &done_head);
-         bitmap_clear (&done_head);
+         tree_stmt_iterator tsi;
+         tree first;
+
+         /* Extract the first statement from LIST.  */
+         tsi = tsi_start (list);
+         first = tsi_stmt (tsi);
+         tsi_delink (&tsi);
+
+         /* Replace the old statement with this new representative.  */
+         bsi_replace (bsi, first, true);
+
+         if (!tsi_end_p (tsi))
+           {
+             /* If any reference would trap, then they all would.  And more
+                to the point, the first would.  Therefore none of the rest
+                will trap since the first didn't.  Indicate this by
+                iterating over the remaining statements and set
+                TREE_THIS_NOTRAP in all INDIRECT_REFs.  */
+             do
+               {
+                 walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
+                 tsi_next (&tsi);
+               }
+             while (!tsi_end_p (tsi));
+
+             insert_edge_copies (list, bsi->bb);
+           }
         }
         }
+      else
+       sra_replace (bsi, list);
      }
  }
  
      }
  }
  
+/* Generate initializations for all scalarizable parameters.  */
  
  
-/* Debugging dump for the scalar replacement map.  */
-
-static int
-dump_sra_map_trav (void **slot, void *data)
+static void
+scalarize_parms (void)
  {
  {
-  struct sra_elt *e = *slot;
-  FILE *f = data;
+  tree list = NULL;
+  unsigned i;
+  bitmap_iterator bi;
  
  
-  switch (e->kind)
+  EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
      {
      {
-    case REALPART_EXPR:
-      fputs ("__real__ ", f);
-      print_generic_expr (dump_file, e->base, dump_flags);
-      fprintf (f, " -> %s\n", get_name (e->replace));
-      break;
-    case IMAGPART_EXPR:
-      fputs ("__imag__ ", f);
-      print_generic_expr (dump_file, e->base, dump_flags);
-      fprintf (f, " -> %s\n", get_name (e->replace));
-      break;
-    case COMPONENT_REF:
-      print_generic_expr (dump_file, e->base, dump_flags);
-      fprintf (f, ".%s -> %s\n", get_name (e->field), get_name (e->replace));
-      break;
-    default:
-      abort ();
+      tree var = referenced_var (i);
+      struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+      generate_copy_inout (elt, true, var, &list);
+    }
+
+  if (list)
+    {
+      insert_edge_copies (list, ENTRY_BLOCK_PTR);
+      mark_all_v_defs (list);
      }
      }
+}
+
+/* Entry point to phase 4.  Update the function to match replacements.  */
+
+static void
+scalarize_function (void)
+{
+  static const struct sra_walk_fns fns = {
+    scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
+  };
  
  
-  return 1;
+  sra_walk_function (&fns);
+  scalarize_parms ();
+  bsi_commit_edge_inserts ();
  }
  
  }
  
+\f
+/* Debug helper function.  Print ELT in a nice human-readable format.  */
+
  static void
  static void
-dump_sra_map (FILE *f)
+dump_sra_elt_name (FILE *f, struct sra_elt *elt)
  {
  {
-  fputs ("Scalar replacements:\n", f);
-  htab_traverse_noresize (sra_map, dump_sra_map_trav, f);
-  fputs ("\n\n", f);
+  if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
+    {
+      fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
+      dump_sra_elt_name (f, elt->parent);
+    }
+  else
+    {
+      if (elt->parent)
+        dump_sra_elt_name (f, elt->parent);
+      if (DECL_P (elt->element))
+       {
+         if (TREE_CODE (elt->element) == FIELD_DECL)
+           fputc ('.', f);
+         print_generic_expr (f, elt->element, dump_flags);
+       }
+      else if (TREE_CODE (elt->element) == RANGE_EXPR)
+       fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]",
+                TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)),
+                TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1)));
+      else
+       fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
+                TREE_INT_CST_LOW (elt->element));
+    }
  }
  
  }
  
-/* Main entry point to Scalar Replacement of Aggregates (SRA).  This pass
-   re-writes non-aliased structure references into scalar temporaries.  The
-   goal is to expose some/all structures to the scalar optimizers.
+/* Likewise, but callable from the debugger.  */
  
  
-   FNDECL is the function to process.
-   
-   VARS_TO_RENAME_P is a pointer to the set of variables that need to be
-   renamed into SSA after this pass is done.  These are going to be all the
-   new scalars created by the SRA process.  Notice that since this pass
-   creates new variables, the bitmap representing all the variables in the
-   program will be re-sized here.
+void
+debug_sra_elt_name (struct sra_elt *elt)
+{
+  dump_sra_elt_name (stderr, elt);
+  fputc ('\n', stderr);
+}
  
  
-   PHASE indicates which dump file from the DUMP_FILES array to use when
-   dumping debugging information.
+void 
+sra_init_cache (void)
+{
+  if (sra_type_decomp_cache) 
+    return;
  
  
-   TODO
+  sra_type_decomp_cache = BITMAP_ALLOC (NULL);
+  sra_type_inst_cache = BITMAP_ALLOC (NULL);
+}
  
  
-   1- Scalarize COMPLEX_TYPEs
-   2- Scalarize ARRAY_REFs that are always referenced with a
-      constant index.
-   3- Timings to determine when scalarization is not profitable.
-   4- Determine what's a good value for MAX_NFIELDS_FOR_SRA.  */
+/* Main entry point.  */
  
  
-static void
+static unsigned int
  tree_sra (void)
  {
    /* Initialize local variables.  */
  tree_sra (void)
  {
    /* Initialize local variables.  */
-  sra_candidates = BITMAP_XMALLOC ();
-  sra_map = NULL;
-  needs_copy_in = NULL;
-
-  /* Find structures to be scalarized.  */
-  if (!find_candidates_for_sra ())
+  todoflags = 0;
+  gcc_obstack_init (&sra_obstack);
+  sra_candidates = BITMAP_ALLOC (NULL);
+  needs_copy_in = BITMAP_ALLOC (NULL);
+  sra_init_cache ();
+  sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
+
+  /* Scan.  If we find anything, instantiate and scalarize.  */
+  if (find_candidates_for_sra ())
      {
      {
-      BITMAP_XFREE (sra_candidates);
-      return;
+      scan_function ();
+      decide_instantiations ();
+      scalarize_function ();
      }
  
      }
  
-  /* If we found any, re-write structure references with their
-     corresponding scalar replacement.  */
-  sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, free);
-  needs_copy_in = BITMAP_XMALLOC ();
-
-  scalarize_structures ();
-
-  if (dump_file)
-    dump_sra_map (dump_file);
-
    /* Free allocated memory.  */
    htab_delete (sra_map);
    sra_map = NULL;
    /* Free allocated memory.  */
    htab_delete (sra_map);
    sra_map = NULL;
-  BITMAP_XFREE (needs_copy_in);
-  BITMAP_XFREE (sra_candidates);
+  BITMAP_FREE (sra_candidates);
+  BITMAP_FREE (needs_copy_in);
+  BITMAP_FREE (sra_type_decomp_cache);
+  BITMAP_FREE (sra_type_inst_cache);
+  obstack_free (&sra_obstack, NULL);
+  return todoflags;
+}
+
+static unsigned int
+tree_sra_early (void)
+{
+  unsigned int ret;
+
+  early_sra = true;
+  ret = tree_sra ();
+  early_sra = false;
+
+  return ret;
  }
  
  static bool
  }
  
  static bool
@@ -1175,7 +2431,27 @@ gate_sra (void)
    return flag_tree_sra != 0;
  }
  
    return flag_tree_sra != 0;
  }
  
-struct tree_opt_pass pass_sra = 
+struct tree_opt_pass pass_sra_early =
+{
+  "esra",                              /* name */
+  gate_sra,                            /* gate */
+  tree_sra_early,                      /* execute */
+  NULL,                                        /* sub */
+  NULL,                                        /* next */
+  0,                                   /* static_pass_number */
+  TV_TREE_SRA,                         /* tv_id */
+  PROP_cfg | PROP_ssa,                 /* properties_required */
+  0,                                   /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  TODO_dump_func
+  | TODO_update_ssa
+  | TODO_ggc_collect
+  | TODO_verify_ssa,                   /* todo_flags_finish */
+  0                                    /* letter */
+};
+
+struct tree_opt_pass pass_sra =
  {
    "sra",                               /* name */
    gate_sra,                            /* gate */
  {
    "sra",                               /* name */
    gate_sra,                            /* gate */
@@ -1186,8 +2462,11 @@ struct tree_opt_pass pass_sra =
    TV_TREE_SRA,                         /* tv_id */
    PROP_cfg | PROP_ssa,                 /* properties_required */
    0,                                   /* properties_provided */
    TV_TREE_SRA,                         /* tv_id */
    PROP_cfg | PROP_ssa,                 /* properties_required */
    0,                                   /* properties_provided */
-  0,                                   /* properties_destroyed */
+  0,                                   /* properties_destroyed */
    0,                                   /* todo_flags_start */
    0,                                   /* todo_flags_start */
-  TODO_dump_func | TODO_rename_vars
-    | TODO_ggc_collect | TODO_verify_ssa  /* todo_flags_finish */
+  TODO_dump_func
+  | TODO_update_ssa
+  | TODO_ggc_collect
+  | TODO_verify_ssa,                   /* todo_flags_finish */
+  0                                    /* letter */
  };
  };