1 /* Scalar Replacement of Aggregates (SRA) converts some structure
2 references into scalar references, exposing them to the scalar
4 Copyright (C) 2003, 2004, 2005, 2006, 2007
5 Free Software Foundation, Inc.
6 Contributed by Diego Novillo <dnovillo@redhat.com>
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option) any
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "coretypes.h"
32 /* These RTL headers are needed for basic-block.h. */
35 #include "hard-reg-set.h"
36 #include "basic-block.h"
37 #include "diagnostic.h"
38 #include "langhooks.h"
39 #include "tree-inline.h"
40 #include "tree-flow.h"
41 #include "tree-gimple.h"
42 #include "tree-dump.h"
43 #include "tree-pass.h"
49 /* expr.h is needed for MOVE_RATIO. */
54 /* This object of this pass is to replace a non-addressable aggregate with a
55 set of independent variables. Most of the time, all of these variables
56 will be scalars. But a secondary objective is to break up larger
57 aggregates into smaller aggregates. In the process we may find that some
58 bits of the larger aggregate can be deleted as unreferenced.
60 This substitution is done globally. More localized substitutions would
61 be the purvey of a load-store motion pass.
63 The optimization proceeds in phases:
65 (1) Identify variables that have types that are candidates for
68 (2) Scan the function looking for the ways these variables are used.
69 In particular we're interested in the number of times a variable
70 (or member) is needed as a complete unit, and the number of times
71 a variable (or member) is copied.
73 (3) Based on the usage profile, instantiate substitution variables.
75 (4) Scan the function making replacements.
79 /* True if this is the "early" pass, before inlining. */
80 static bool early_sra;
82 /* The set of todo flags to return from tree_sra. */
83 static unsigned int todoflags;
85 /* The set of aggregate variables that are candidates for scalarization. */
86 static bitmap sra_candidates;
88 /* Set of scalarizable PARM_DECLs that need copy-in operations at the
89 beginning of the function. */
90 static bitmap needs_copy_in;
92 /* Sets of bit pairs that cache type decomposition and instantiation. */
93 static bitmap sra_type_decomp_cache;
94 static bitmap sra_type_inst_cache;
96 /* One of these structures is created for each candidate aggregate and
97 each (accessed) member or group of members of such an aggregate. */
100 /* A tree of the elements. Used when we want to traverse everything. */
101 struct sra_elt *parent;
102 struct sra_elt *groups;
103 struct sra_elt *children;
104 struct sra_elt *sibling;
106 /* If this element is a root, then this is the VAR_DECL. If this is
107 a sub-element, this is some token used to identify the reference.
108 In the case of COMPONENT_REF, this is the FIELD_DECL. In the case
109 of an ARRAY_REF, this is the (constant) index. In the case of an
110 ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case
111 of a complex number, this is a zero or one. */
114 /* The type of the element. */
117 /* A VAR_DECL, for any sub-element we've decided to replace. */
120 /* The number of times the element is referenced as a whole. I.e.
121 given "a.b.c", this would be incremented for C, but not for A or B. */
124 /* The number of times the element is copied to or from another
125 scalarizable element. */
126 unsigned int n_copies;
128 /* True if TYPE is scalar. */
131 /* True if this element is a group of members of its parent. */
134 /* True if we saw something about this element that prevents scalarization,
135 such as non-constant indexing. */
136 bool cannot_scalarize;
138 /* True if we've decided that structure-to-structure assignment
139 should happen via memcpy and not per-element. */
142 /* True if everything under this element has been marked TREE_NO_WARNING. */
145 /* A flag for use with/after random access traversals. */
148 /* True if there is BIT_FIELD_REF on the lhs with a vector. */
151 /* 1 if the element is a field that is part of a block, 2 if the field
152 is the block itself, 0 if it's neither. */
153 char in_bitfld_block;
156 #define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
158 #define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \
159 for ((CHILD) = (ELT)->is_group \
160 ? next_child_for_group (NULL, (ELT)) \
163 (CHILD) = (ELT)->is_group \
164 ? next_child_for_group ((CHILD), (ELT)) \
167 /* Helper function for above macro. Return next child in group. */
168 static struct sra_elt *
169 next_child_for_group (struct sra_elt *child, struct sra_elt *group)
171 gcc_assert (group->is_group);
173 /* Find the next child in the parent. */
175 child = child->sibling;
177 child = group->parent->children;
179 /* Skip siblings that do not belong to the group. */
182 tree g_elt = group->element;
183 if (TREE_CODE (g_elt) == RANGE_EXPR)
185 if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0))
186 && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element))
192 child = child->sibling;
198 /* Random access to the child of a parent is performed by hashing.
199 This prevents quadratic behavior, and allows SRA to function
200 reasonably on larger records. */
201 static htab_t sra_map;
203 /* All structures are allocated out of the following obstack. */
204 static struct obstack sra_obstack;
206 /* Debugging functions. */
207 static void dump_sra_elt_name (FILE *, struct sra_elt *);
208 extern void debug_sra_elt_name (struct sra_elt *);
210 /* Forward declarations. */
211 static tree generate_element_ref (struct sra_elt *);
213 /* Return true if DECL is an SRA candidate. */
216 is_sra_candidate_decl (tree decl)
218 return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
221 /* Return true if TYPE is a scalar type. */
224 is_sra_scalar_type (tree type)
226 enum tree_code code = TREE_CODE (type);
227 return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
228 || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
229 || code == POINTER_TYPE || code == OFFSET_TYPE
230 || code == REFERENCE_TYPE);
233 /* Return true if TYPE can be decomposed into a set of independent variables.
235 Note that this doesn't imply that all elements of TYPE can be
236 instantiated, just that if we decide to break up the type into
237 separate pieces that it can be done. */
240 sra_type_can_be_decomposed_p (tree type)
242 unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
245 /* Avoid searching the same type twice. */
246 if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
248 if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
251 /* The type must have a definite nonzero size. */
252 if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
253 || integer_zerop (TYPE_SIZE (type)))
256 /* The type must be a non-union aggregate. */
257 switch (TREE_CODE (type))
261 bool saw_one_field = false;
263 for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
264 if (TREE_CODE (t) == FIELD_DECL)
266 /* Reject incorrectly represented bit fields. */
267 if (DECL_BIT_FIELD (t)
268 && (tree_low_cst (DECL_SIZE (t), 1)
269 != TYPE_PRECISION (TREE_TYPE (t))))
272 saw_one_field = true;
275 /* Record types must have at least one field. */
282 /* Array types must have a fixed lower and upper bound. */
283 t = TYPE_DOMAIN (type);
286 if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
288 if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
299 bitmap_set_bit (sra_type_decomp_cache, cache+0);
303 bitmap_set_bit (sra_type_decomp_cache, cache+1);
307 /* Return true if DECL can be decomposed into a set of independent
308 (though not necessarily scalar) variables. */
311 decl_can_be_decomposed_p (tree var)
313 /* Early out for scalars. */
314 if (is_sra_scalar_type (TREE_TYPE (var)))
317 /* The variable must not be aliased. */
318 if (!is_gimple_non_addressable (var))
320 if (dump_file && (dump_flags & TDF_DETAILS))
322 fprintf (dump_file, "Cannot scalarize variable ");
323 print_generic_expr (dump_file, var, dump_flags);
324 fprintf (dump_file, " because it must live in memory\n");
329 /* The variable must not be volatile. */
330 if (TREE_THIS_VOLATILE (var))
332 if (dump_file && (dump_flags & TDF_DETAILS))
334 fprintf (dump_file, "Cannot scalarize variable ");
335 print_generic_expr (dump_file, var, dump_flags);
336 fprintf (dump_file, " because it is declared volatile\n");
341 /* We must be able to decompose the variable's type. */
342 if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
344 if (dump_file && (dump_flags & TDF_DETAILS))
346 fprintf (dump_file, "Cannot scalarize variable ");
347 print_generic_expr (dump_file, var, dump_flags);
348 fprintf (dump_file, " because its type cannot be decomposed\n");
353 /* HACK: if we decompose a va_list_type_node before inlining, then we'll
354 confuse tree-stdarg.c, and we won't be able to figure out which and
355 how many arguments are accessed. This really should be improved in
356 tree-stdarg.c, as the decomposition is truely a win. This could also
357 be fixed if the stdarg pass ran early, but this can't be done until
358 we've aliasing information early too. See PR 30791. */
360 && TYPE_MAIN_VARIANT (TREE_TYPE (var))
361 == TYPE_MAIN_VARIANT (va_list_type_node))
367 /* Return true if TYPE can be *completely* decomposed into scalars. */
370 type_can_instantiate_all_elements (tree type)
372 if (is_sra_scalar_type (type))
374 if (!sra_type_can_be_decomposed_p (type))
377 switch (TREE_CODE (type))
381 unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
384 if (bitmap_bit_p (sra_type_inst_cache, cache+0))
386 if (bitmap_bit_p (sra_type_inst_cache, cache+1))
389 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
390 if (TREE_CODE (f) == FIELD_DECL)
392 if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
394 bitmap_set_bit (sra_type_inst_cache, cache+1);
399 bitmap_set_bit (sra_type_inst_cache, cache+0);
404 return type_can_instantiate_all_elements (TREE_TYPE (type));
414 /* Test whether ELT or some sub-element cannot be scalarized. */
417 can_completely_scalarize_p (struct sra_elt *elt)
421 if (elt->cannot_scalarize)
424 for (c = elt->children; c; c = c->sibling)
425 if (!can_completely_scalarize_p (c))
428 for (c = elt->groups; c; c = c->sibling)
429 if (!can_completely_scalarize_p (c))
436 /* A simplified tree hashing algorithm that only handles the types of
437 trees we expect to find in sra_elt->element. */
440 sra_hash_tree (tree t)
444 switch (TREE_CODE (t))
453 h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
457 h = iterative_hash_expr (TREE_OPERAND (t, 0), 0);
458 h = iterative_hash_expr (TREE_OPERAND (t, 1), h);
462 /* We can have types that are compatible, but have different member
463 lists, so we can't hash fields by ID. Use offsets instead. */
464 h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
465 h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
469 /* Don't take operand 0 into account, that's our parent. */
470 h = iterative_hash_expr (TREE_OPERAND (t, 1), 0);
471 h = iterative_hash_expr (TREE_OPERAND (t, 2), h);
481 /* Hash function for type SRA_PAIR. */
484 sra_elt_hash (const void *x)
486 const struct sra_elt *e = x;
487 const struct sra_elt *p;
490 h = sra_hash_tree (e->element);
492 /* Take into account everything except bitfield blocks back up the
493 chain. Given that chain lengths are rarely very long, this
494 should be acceptable. If we truly identify this as a performance
495 problem, it should work to hash the pointer value
497 for (p = e->parent; p ; p = p->parent)
498 if (!p->in_bitfld_block)
499 h = (h * 65521) ^ sra_hash_tree (p->element);
504 /* Equality function for type SRA_PAIR. */
507 sra_elt_eq (const void *x, const void *y)
509 const struct sra_elt *a = x;
510 const struct sra_elt *b = y;
512 const struct sra_elt *ap = a->parent;
513 const struct sra_elt *bp = b->parent;
516 while (ap->in_bitfld_block)
519 while (bp->in_bitfld_block)
530 if (TREE_CODE (ae) != TREE_CODE (be))
533 switch (TREE_CODE (ae))
538 /* These are all pointer unique. */
542 /* Integers are not pointer unique, so compare their values. */
543 return tree_int_cst_equal (ae, be);
547 tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0))
548 && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1));
551 /* Fields are unique within a record, but not between
552 compatible records. */
553 if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
555 return fields_compatible_p (ae, be);
559 tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1))
560 && tree_int_cst_equal (TREE_OPERAND (ae, 2), TREE_OPERAND (be, 2));
567 /* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT
568 may be null, in which case CHILD must be a DECL. */
570 static struct sra_elt *
571 lookup_element (struct sra_elt *parent, tree child, tree type,
572 enum insert_option insert)
574 struct sra_elt dummy;
575 struct sra_elt **slot;
579 dummy.parent = parent->is_group ? parent->parent : parent;
582 dummy.element = child;
584 slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
585 if (!slot && insert == NO_INSERT)
589 if (!elt && insert == INSERT)
591 *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
592 memset (elt, 0, sizeof (*elt));
594 elt->parent = parent;
595 elt->element = child;
597 elt->is_scalar = is_sra_scalar_type (type);
601 if (IS_ELEMENT_FOR_GROUP (elt->element))
603 elt->is_group = true;
604 elt->sibling = parent->groups;
605 parent->groups = elt;
609 elt->sibling = parent->children;
610 parent->children = elt;
614 /* If this is a parameter, then if we want to scalarize, we have
615 one copy from the true function parameter. Count it now. */
616 if (TREE_CODE (child) == PARM_DECL)
619 bitmap_set_bit (needs_copy_in, DECL_UID (child));
626 /* Create or return the SRA_ELT structure for EXPR if the expression
627 refers to a scalarizable variable. */
629 static struct sra_elt *
630 maybe_lookup_element_for_expr (tree expr)
635 switch (TREE_CODE (expr))
640 if (is_sra_candidate_decl (expr))
641 return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
645 /* We can't scalarize variable array indices. */
646 if (in_array_bounds_p (expr))
647 child = TREE_OPERAND (expr, 1);
652 case ARRAY_RANGE_REF:
653 /* We can't scalarize variable array indices. */
654 if (range_in_array_bounds_p (expr))
656 tree domain = TYPE_DOMAIN (TREE_TYPE (expr));
657 child = build2 (RANGE_EXPR, integer_type_node,
658 TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain));
665 /* Don't look through unions. */
666 if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
668 child = TREE_OPERAND (expr, 1);
672 child = integer_zero_node;
675 child = integer_one_node;
682 elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
684 return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
689 /* Functions to walk just enough of the tree to see all scalarizable
690 references, and categorize them. */
692 /* A set of callbacks for phases 2 and 4. They'll be invoked for the
693 various kinds of references seen. In all cases, *BSI is an iterator
694 pointing to the statement being processed. */
697 /* Invoked when ELT is required as a unit. Note that ELT might refer to
698 a leaf node, in which case this is a simple scalar reference. *EXPR_P
699 points to the location of the expression. IS_OUTPUT is true if this
700 is a left-hand-side reference. */
701 void (*use) (struct sra_elt *elt, tree *expr_p,
702 block_stmt_iterator *bsi, bool is_output);
704 /* Invoked when we have a copy between two scalarizable references. */
705 void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
706 block_stmt_iterator *bsi);
708 /* Invoked when ELT is initialized from a constant. VALUE may be NULL,
709 in which case it should be treated as an empty CONSTRUCTOR. */
710 void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
712 /* Invoked when we have a copy between one scalarizable reference ELT
713 and one non-scalarizable reference OTHER without side-effects.
714 IS_OUTPUT is true if ELT is on the left-hand side. */
715 void (*ldst) (struct sra_elt *elt, tree other,
716 block_stmt_iterator *bsi, bool is_output);
718 /* True during phase 2, false during phase 4. */
719 /* ??? This is a hack. */
723 #ifdef ENABLE_CHECKING
724 /* Invoked via walk_tree, if *TP contains a candidate decl, return it. */
727 sra_find_candidate_decl (tree *tp, int *walk_subtrees,
728 void *data ATTRIBUTE_UNUSED)
731 enum tree_code code = TREE_CODE (t);
733 if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
736 if (is_sra_candidate_decl (t))
746 /* Walk most expressions looking for a scalarizable aggregate.
747 If we find one, invoke FNS->USE. */
750 sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
751 const struct sra_walk_fns *fns)
755 bool disable_scalarization = false;
757 /* We're looking to collect a reference expression between EXPR and INNER,
758 such that INNER is a scalarizable decl and all other nodes through EXPR
759 are references that we can scalarize. If we come across something that
760 we can't scalarize, we reset EXPR. This has the effect of making it
761 appear that we're referring to the larger expression as a whole. */
764 switch (TREE_CODE (inner))
769 /* If there is a scalarizable decl at the bottom, then process it. */
770 if (is_sra_candidate_decl (inner))
772 struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
773 if (disable_scalarization)
774 elt->cannot_scalarize = true;
776 fns->use (elt, expr_p, bsi, is_output);
781 /* Non-constant index means any member may be accessed. Prevent the
782 expression from being scalarized. If we were to treat this as a
783 reference to the whole array, we can wind up with a single dynamic
784 index reference inside a loop being overridden by several constant
785 index references during loop setup. It's possible that this could
786 be avoided by using dynamic usage counts based on BB trip counts
787 (based on loop analysis or profiling), but that hardly seems worth
789 /* ??? Hack. Figure out how to push this into the scan routines
790 without duplicating too much code. */
791 if (!in_array_bounds_p (inner))
793 disable_scalarization = true;
796 /* ??? Are we assured that non-constant bounds and stride will have
797 the same value everywhere? I don't think Fortran will... */
798 if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
800 inner = TREE_OPERAND (inner, 0);
803 case ARRAY_RANGE_REF:
804 if (!range_in_array_bounds_p (inner))
806 disable_scalarization = true;
809 /* ??? See above non-constant bounds and stride . */
810 if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
812 inner = TREE_OPERAND (inner, 0);
816 /* A reference to a union member constitutes a reference to the
818 if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
820 /* ??? See above re non-constant stride. */
821 if (TREE_OPERAND (inner, 2))
823 inner = TREE_OPERAND (inner, 0);
828 inner = TREE_OPERAND (inner, 0);
832 /* A bit field reference to a specific vector is scalarized but for
833 ones for inputs need to be marked as used on the left hand size so
834 when we scalarize it, we can mark that variable as non renamable. */
836 && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE)
839 = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0));
841 elt->is_vector_lhs = true;
843 /* A bit field reference (access to *multiple* fields simultaneously)
844 is not currently scalarized. Consider this an access to the
845 complete outer element, to which walk_tree will bring us next. */
849 case VIEW_CONVERT_EXPR:
851 /* Similarly, a view/nop explicitly wants to look at an object in a
852 type other than the one we've scalarized. */
856 /* This is a transparent wrapper. The entire inner expression really
861 expr_p = &TREE_OPERAND (inner, 0);
862 inner = expr = *expr_p;
866 #ifdef ENABLE_CHECKING
867 /* Validate that we're not missing any references. */
868 gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
874 /* Walk a TREE_LIST of values looking for scalarizable aggregates.
875 If we find one, invoke FNS->USE. */
878 sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
879 const struct sra_walk_fns *fns)
882 for (op = list; op ; op = TREE_CHAIN (op))
883 sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
886 /* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
887 If we find one, invoke FNS->USE. */
890 sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
891 const struct sra_walk_fns *fns)
894 int nargs = call_expr_nargs (expr);
895 for (i = 0; i < nargs; i++)
896 sra_walk_expr (&CALL_EXPR_ARG (expr, i), bsi, false, fns);
899 /* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
900 aggregates. If we find one, invoke FNS->USE. */
903 sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
904 const struct sra_walk_fns *fns)
906 sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
907 sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
910 static void sra_replace (block_stmt_iterator *bsi, tree list);
911 static tree sra_build_elt_assignment (struct sra_elt *elt, tree src);
913 /* Walk a GIMPLE_MODIFY_STMT and categorize the assignment appropriately. */
916 sra_walk_gimple_modify_stmt (tree expr, block_stmt_iterator *bsi,
917 const struct sra_walk_fns *fns)
919 struct sra_elt *lhs_elt, *rhs_elt;
922 lhs = GIMPLE_STMT_OPERAND (expr, 0);
923 rhs = GIMPLE_STMT_OPERAND (expr, 1);
924 lhs_elt = maybe_lookup_element_for_expr (lhs);
925 rhs_elt = maybe_lookup_element_for_expr (rhs);
927 /* If both sides are scalarizable, this is a COPY operation. */
928 if (lhs_elt && rhs_elt)
930 fns->copy (lhs_elt, rhs_elt, bsi);
934 /* If the RHS is scalarizable, handle it. There are only two cases. */
937 if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
938 fns->ldst (rhs_elt, lhs, bsi, false);
940 fns->use (rhs_elt, &GIMPLE_STMT_OPERAND (expr, 1), bsi, false);
943 /* If it isn't scalarizable, there may be scalarizable variables within, so
944 check for a call or else walk the RHS to see if we need to do any
945 copy-in operations. We need to do it before the LHS is scalarized so
946 that the statements get inserted in the proper place, before any
947 copy-out operations. */
950 tree call = get_call_expr_in (rhs);
952 sra_walk_call_expr (call, bsi, fns);
954 sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 1), bsi, false, fns);
957 /* Likewise, handle the LHS being scalarizable. We have cases similar
958 to those above, but also want to handle RHS being constant. */
961 /* If this is an assignment from a constant, or constructor, then
962 we have access to all of the elements individually. Invoke INIT. */
963 if (TREE_CODE (rhs) == COMPLEX_EXPR
964 || TREE_CODE (rhs) == COMPLEX_CST
965 || TREE_CODE (rhs) == CONSTRUCTOR)
966 fns->init (lhs_elt, rhs, bsi);
968 /* If this is an assignment from read-only memory, treat this as if
969 we'd been passed the constructor directly. Invoke INIT. */
970 else if (TREE_CODE (rhs) == VAR_DECL
972 && TREE_READONLY (rhs)
973 && targetm.binds_local_p (rhs))
974 fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
976 /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
977 The lvalue requirement prevents us from trying to directly scalarize
978 the result of a function call. Which would result in trying to call
979 the function multiple times, and other evil things. */
980 else if (!lhs_elt->is_scalar
981 && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
982 fns->ldst (lhs_elt, rhs, bsi, true);
984 /* Otherwise we're being used in some context that requires the
985 aggregate to be seen as a whole. Invoke USE. */
988 fns->use (lhs_elt, &GIMPLE_STMT_OPERAND (expr, 0), bsi, true);
992 /* Similarly to above, LHS_ELT being null only means that the LHS as a
993 whole is not a scalarizable reference. There may be occurrences of
994 scalarizable variables within, which implies a USE. */
996 sra_walk_expr (&GIMPLE_STMT_OPERAND (expr, 0), bsi, true, fns);
999 /* Entry point to the walk functions. Search the entire function,
1000 invoking the callbacks in FNS on each of the references to
1001 scalarizable variables. */
1004 sra_walk_function (const struct sra_walk_fns *fns)
1007 block_stmt_iterator si, ni;
1009 /* ??? Phase 4 could derive some benefit to walking the function in
1010 dominator tree order. */
1013 for (si = bsi_start (bb); !bsi_end_p (si); si = ni)
1018 stmt = bsi_stmt (si);
1019 ann = stmt_ann (stmt);
1024 /* If the statement has no virtual operands, then it doesn't
1025 make any structure references that we care about. */
1026 if (gimple_aliases_computed_p (cfun)
1027 && ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)))
1030 switch (TREE_CODE (stmt))
1033 /* If we have "return <retval>" then the return value is
1034 already exposed for our pleasure. Walk it as a USE to
1035 force all the components back in place for the return.
1037 If we have an embedded assignment, then <retval> is of
1038 a type that gets returned in registers in this ABI, and
1039 we do not wish to extend their lifetimes. Treat this
1040 as a USE of the variable on the RHS of this assignment. */
1042 t = TREE_OPERAND (stmt, 0);
1045 else if (TREE_CODE (t) == GIMPLE_MODIFY_STMT)
1046 sra_walk_expr (&GIMPLE_STMT_OPERAND (t, 1), &si, false, fns);
1048 sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
1051 case GIMPLE_MODIFY_STMT:
1052 sra_walk_gimple_modify_stmt (stmt, &si, fns);
1055 sra_walk_call_expr (stmt, &si, fns);
1058 sra_walk_asm_expr (stmt, &si, fns);
1067 /* Phase One: Scan all referenced variables in the program looking for
1068 structures that could be decomposed. */
1071 find_candidates_for_sra (void)
1073 bool any_set = false;
1075 referenced_var_iterator rvi;
1077 FOR_EACH_REFERENCED_VAR (var, rvi)
1079 if (decl_can_be_decomposed_p (var))
1081 bitmap_set_bit (sra_candidates, DECL_UID (var));
1090 /* Phase Two: Scan all references to scalarizable variables. Count the
1091 number of times they are used or copied respectively. */
1093 /* Callbacks to fill in SRA_WALK_FNS. Everything but USE is
1094 considered a copy, because we can decompose the reference such that
1095 the sub-elements needn't be contiguous. */
1098 scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
1099 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1100 bool is_output ATTRIBUTE_UNUSED)
1106 scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
1107 block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1109 lhs_elt->n_copies += 1;
1110 rhs_elt->n_copies += 1;
1114 scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
1115 block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1117 lhs_elt->n_copies += 1;
1121 scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
1122 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1123 bool is_output ATTRIBUTE_UNUSED)
1128 /* Dump the values we collected during the scanning phase. */
1131 scan_dump (struct sra_elt *elt)
1135 dump_sra_elt_name (dump_file, elt);
1136 fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
1138 for (c = elt->children; c ; c = c->sibling)
1141 for (c = elt->groups; c ; c = c->sibling)
1145 /* Entry point to phase 2. Scan the entire function, building up
1146 scalarization data structures, recording copies and uses. */
1149 scan_function (void)
1151 static const struct sra_walk_fns fns = {
1152 scan_use, scan_copy, scan_init, scan_ldst, true
1156 sra_walk_function (&fns);
1158 if (dump_file && (dump_flags & TDF_DETAILS))
1162 fputs ("\nScan results:\n", dump_file);
1163 EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1165 tree var = referenced_var (i);
1166 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1170 fputc ('\n', dump_file);
1174 /* Phase Three: Make decisions about which variables to scalarize, if any.
1175 All elements to be scalarized have replacement variables made for them. */
1177 /* A subroutine of build_element_name. Recursively build the element
1178 name on the obstack. */
1181 build_element_name_1 (struct sra_elt *elt)
1188 build_element_name_1 (elt->parent);
1189 obstack_1grow (&sra_obstack, '$');
1191 if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
1193 if (elt->element == integer_zero_node)
1194 obstack_grow (&sra_obstack, "real", 4);
1196 obstack_grow (&sra_obstack, "imag", 4);
1202 if (TREE_CODE (t) == INTEGER_CST)
1204 /* ??? Eh. Don't bother doing double-wide printing. */
1205 sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
1206 obstack_grow (&sra_obstack, buffer, strlen (buffer));
1208 else if (TREE_CODE (t) == BIT_FIELD_REF)
1210 sprintf (buffer, "B" HOST_WIDE_INT_PRINT_DEC,
1211 tree_low_cst (TREE_OPERAND (t, 2), 1));
1212 obstack_grow (&sra_obstack, buffer, strlen (buffer));
1213 sprintf (buffer, "F" HOST_WIDE_INT_PRINT_DEC,
1214 tree_low_cst (TREE_OPERAND (t, 1), 1));
1215 obstack_grow (&sra_obstack, buffer, strlen (buffer));
1219 tree name = DECL_NAME (t);
1221 obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
1222 IDENTIFIER_LENGTH (name));
1225 sprintf (buffer, "D%u", DECL_UID (t));
1226 obstack_grow (&sra_obstack, buffer, strlen (buffer));
1231 /* Construct a pretty variable name for an element's replacement variable.
1232 The name is built on the obstack. */
1235 build_element_name (struct sra_elt *elt)
1237 build_element_name_1 (elt);
1238 obstack_1grow (&sra_obstack, '\0');
1239 return XOBFINISH (&sra_obstack, char *);
1242 /* Instantiate an element as an independent variable. */
1245 instantiate_element (struct sra_elt *elt)
1247 struct sra_elt *base_elt;
1249 bool nowarn = TREE_NO_WARNING (elt->element);
1251 for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
1253 nowarn = base_elt->parent->n_uses
1254 || TREE_NO_WARNING (base_elt->parent->element);
1255 base = base_elt->element;
1257 elt->replacement = var = make_rename_temp (elt->type, "SR");
1259 /* For vectors, if used on the left hand side with BIT_FIELD_REF,
1260 they are not a gimple register. */
1261 if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs)
1262 DECL_GIMPLE_REG_P (var) = 0;
1264 DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
1265 DECL_ARTIFICIAL (var) = 1;
1267 if (TREE_THIS_VOLATILE (elt->type))
1269 TREE_THIS_VOLATILE (var) = 1;
1270 TREE_SIDE_EFFECTS (var) = 1;
1273 if (DECL_NAME (base) && !DECL_IGNORED_P (base))
1275 char *pretty_name = build_element_name (elt);
1276 DECL_NAME (var) = get_identifier (pretty_name);
1277 obstack_free (&sra_obstack, pretty_name);
1279 SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
1280 DECL_DEBUG_EXPR_IS_FROM (var) = 1;
1282 DECL_IGNORED_P (var) = 0;
1283 TREE_NO_WARNING (var) = nowarn;
1287 DECL_IGNORED_P (var) = 1;
1288 /* ??? We can't generate any warning that would be meaningful. */
1289 TREE_NO_WARNING (var) = 1;
1294 fputs (" ", dump_file);
1295 dump_sra_elt_name (dump_file, elt);
1296 fputs (" -> ", dump_file);
1297 print_generic_expr (dump_file, var, dump_flags);
1298 fputc ('\n', dump_file);
1302 /* Make one pass across an element tree deciding whether or not it's
1303 profitable to instantiate individual leaf scalars.
1305 PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
1306 fields all the way up the tree. */
1309 decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
1310 unsigned int parent_copies)
1312 if (dump_file && !elt->parent)
1314 fputs ("Initial instantiation for ", dump_file);
1315 dump_sra_elt_name (dump_file, elt);
1316 fputc ('\n', dump_file);
1319 if (elt->cannot_scalarize)
1324 /* The decision is simple: instantiate if we're used more frequently
1325 than the parent needs to be seen as a complete unit. */
1326 if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
1327 instantiate_element (elt);
1331 struct sra_elt *c, *group;
1332 unsigned int this_uses = elt->n_uses + parent_uses;
1333 unsigned int this_copies = elt->n_copies + parent_copies;
1335 /* Consider groups of sub-elements as weighing in favour of
1336 instantiation whatever their size. */
1337 for (group = elt->groups; group ; group = group->sibling)
1338 FOR_EACH_ACTUAL_CHILD (c, group)
1340 c->n_uses += group->n_uses;
1341 c->n_copies += group->n_copies;
1344 for (c = elt->children; c ; c = c->sibling)
1345 decide_instantiation_1 (c, this_uses, this_copies);
1349 /* Compute the size and number of all instantiated elements below ELT.
1350 We will only care about this if the size of the complete structure
1351 fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */
1354 sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
1356 if (elt->replacement)
1358 *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
1364 unsigned int count = 0;
1366 for (c = elt->children; c ; c = c->sibling)
1367 count += sum_instantiated_sizes (c, sizep);
1373 /* Instantiate fields in ELT->TYPE that are not currently present as
1376 static void instantiate_missing_elements (struct sra_elt *elt);
1378 static struct sra_elt *
1379 instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
1381 struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
1384 if (sub->replacement == NULL)
1385 instantiate_element (sub);
1388 instantiate_missing_elements (sub);
1392 /* Obtain the canonical type for field F of ELEMENT. */
1395 canon_type_for_field (tree f, tree element)
1397 tree field_type = TREE_TYPE (f);
1399 /* canonicalize_component_ref() unwidens some bit-field types (not
1400 marked as DECL_BIT_FIELD in C++), so we must do the same, lest we
1401 may introduce type mismatches. */
1402 if (INTEGRAL_TYPE_P (field_type)
1403 && DECL_MODE (f) != TYPE_MODE (field_type))
1404 field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF,
1413 /* Look for adjacent fields of ELT starting at F that we'd like to
1414 scalarize as a single variable. Return the last field of the
1418 try_instantiate_multiple_fields (struct sra_elt *elt, tree f)
1420 unsigned HOST_WIDE_INT align, oalign, word, bit, size, alchk;
1421 enum machine_mode mode;
1422 tree first = f, prev;
1424 struct sra_elt *block;
1426 if (!is_sra_scalar_type (TREE_TYPE (f))
1427 || !host_integerp (DECL_FIELD_OFFSET (f), 1)
1428 || !host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)
1429 || !host_integerp (DECL_SIZE (f), 1)
1430 || lookup_element (elt, f, NULL, NO_INSERT))
1433 /* Taking the alignment of elt->element is not enough, since it
1434 might be just an array index or some such. We shouldn't need to
1435 initialize align here, but our optimizers don't always realize
1436 that, if we leave the loop without initializing align, we'll fail
1437 the assertion right after the loop. */
1438 align = (unsigned HOST_WIDE_INT)-1;
1439 for (block = elt; block; block = block->parent)
1440 if (DECL_P (block->element))
1442 align = DECL_ALIGN (block->element);
1447 oalign = DECL_OFFSET_ALIGN (f);
1448 word = tree_low_cst (DECL_FIELD_OFFSET (f), 1);
1449 bit = tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
1450 size = tree_low_cst (DECL_SIZE (f), 1);
1458 if ((bit & alchk) != ((bit + size - 1) & alchk))
1461 /* Find adjacent fields in the same alignment word. */
1463 for (prev = f, f = TREE_CHAIN (f);
1464 f && TREE_CODE (f) == FIELD_DECL
1465 && is_sra_scalar_type (TREE_TYPE (f))
1466 && host_integerp (DECL_FIELD_OFFSET (f), 1)
1467 && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)
1468 && host_integerp (DECL_SIZE (f), 1)
1469 && (HOST_WIDE_INT)word == tree_low_cst (DECL_FIELD_OFFSET (f), 1)
1470 && !lookup_element (elt, f, NULL, NO_INSERT);
1471 prev = f, f = TREE_CHAIN (f))
1473 unsigned HOST_WIDE_INT nbit, nsize;
1475 nbit = tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
1476 nsize = tree_low_cst (DECL_SIZE (f), 1);
1478 if (bit + size == nbit)
1480 if ((bit & alchk) != ((nbit + nsize - 1) & alchk))
1484 else if (nbit + nsize == bit)
1486 if ((nbit & alchk) != ((bit + size - 1) & alchk))
1500 gcc_assert ((bit & alchk) == ((bit + size - 1) & alchk));
1502 /* Try to widen the bit range so as to cover padding bits as well. */
1504 if ((bit & ~alchk) || size != align)
1506 unsigned HOST_WIDE_INT mbit = bit & alchk;
1507 unsigned HOST_WIDE_INT msize = align;
1509 for (f = TYPE_FIELDS (elt->type);
1510 f; f = TREE_CHAIN (f))
1512 unsigned HOST_WIDE_INT fword, fbit, fsize;
1514 /* Skip the fields from first to prev. */
1521 if (!(TREE_CODE (f) == FIELD_DECL
1522 && host_integerp (DECL_FIELD_OFFSET (f), 1)
1523 && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)))
1526 fword = tree_low_cst (DECL_FIELD_OFFSET (f), 1);
1527 /* If we're past the selected word, we're fine. */
1531 fbit = tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
1533 if (host_integerp (DECL_SIZE (f), 1))
1534 fsize = tree_low_cst (DECL_SIZE (f), 1);
1536 /* Assume a variable-sized field takes up all space till
1537 the end of the word. ??? Endianness issues? */
1538 fsize = align - fbit;
1542 /* A large field might start at a previous word and
1543 extend into the selected word. Exclude those
1544 bits. ??? Endianness issues? */
1545 HOST_WIDE_INT diff = fbit + fsize
1546 - (HOST_WIDE_INT)((word - fword) * BITS_PER_UNIT + mbit);
1556 gcc_assert (fword == word);
1558 /* Non-overlapping, great. */
1559 if (fbit + fsize <= mbit
1560 || mbit + msize <= fbit)
1565 unsigned HOST_WIDE_INT diff = fbit + fsize - mbit;
1569 else if (fbit > mbit)
1570 msize -= (mbit + msize - fbit);
1580 /* Now we know the bit range we're interested in. Find the smallest
1581 machine mode we can use to access it. */
1583 for (mode = smallest_mode_for_size (size, MODE_INT);
1585 mode = GET_MODE_WIDER_MODE (mode))
1587 gcc_assert (mode != VOIDmode);
1589 alchk = GET_MODE_PRECISION (mode) - 1;
1592 if ((bit & alchk) == ((bit + size - 1) & alchk))
1596 gcc_assert (~alchk < align);
1598 /* Create the field group as a single variable. */
1600 type = lang_hooks.types.type_for_mode (mode, 1);
1602 var = build3 (BIT_FIELD_REF, type, NULL_TREE,
1604 bitsize_int (word * BITS_PER_UNIT + bit));
1605 BIT_FIELD_REF_UNSIGNED (var) = 1;
1607 block = instantiate_missing_elements_1 (elt, var, type);
1608 gcc_assert (block && block->is_scalar);
1610 var = block->replacement;
1612 if (((word * BITS_PER_UNIT + bit) & ~alchk)
1613 || (HOST_WIDE_INT)size != tree_low_cst (DECL_SIZE (var), 1))
1615 block->replacement = build3 (BIT_FIELD_REF,
1616 TREE_TYPE (block->element), var,
1618 bitsize_int ((word * BITS_PER_UNIT
1620 BIT_FIELD_REF_UNSIGNED (block->replacement) = 1;
1621 TREE_NO_WARNING (block->replacement) = 1;
1624 block->in_bitfld_block = 2;
1626 /* Add the member fields to the group, such that they access
1627 portions of the group variable. */
1629 for (f = first; f != TREE_CHAIN (prev); f = TREE_CHAIN (f))
1631 tree field_type = canon_type_for_field (f, elt->element);
1632 struct sra_elt *fld = lookup_element (block, f, field_type, INSERT);
1634 gcc_assert (fld && fld->is_scalar && !fld->replacement);
1636 fld->replacement = build3 (BIT_FIELD_REF, field_type, var,
1639 ((word * BITS_PER_UNIT
1641 (DECL_FIELD_BIT_OFFSET (f))))
1643 BIT_FIELD_REF_UNSIGNED (fld->replacement) = TYPE_UNSIGNED (field_type);
1644 TREE_NO_WARNING (block->replacement) = 1;
1645 fld->in_bitfld_block = 1;
1652 instantiate_missing_elements (struct sra_elt *elt)
1654 tree type = elt->type;
1656 switch (TREE_CODE (type))
1661 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
1662 if (TREE_CODE (f) == FIELD_DECL)
1664 tree last = try_instantiate_multiple_fields (elt, f);
1672 instantiate_missing_elements_1 (elt, f,
1673 canon_type_for_field
1681 tree i, max, subtype;
1683 i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
1684 max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
1685 subtype = TREE_TYPE (type);
1689 instantiate_missing_elements_1 (elt, i, subtype);
1690 if (tree_int_cst_equal (i, max))
1692 i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
1699 type = TREE_TYPE (type);
1700 instantiate_missing_elements_1 (elt, integer_zero_node, type);
1701 instantiate_missing_elements_1 (elt, integer_one_node, type);
1709 /* Return true if there is only one non aggregate field in the record, TYPE.
1710 Return false otherwise. */
1713 single_scalar_field_in_record_p (tree type)
1717 if (TREE_CODE (type) != RECORD_TYPE)
1720 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1721 if (TREE_CODE (field) == FIELD_DECL)
1725 if (num_fields == 2)
1728 if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
1735 /* Make one pass across an element tree deciding whether to perform block
1736 or element copies. If we decide on element copies, instantiate all
1737 elements. Return true if there are any instantiated sub-elements. */
1740 decide_block_copy (struct sra_elt *elt)
1745 /* We shouldn't be invoked on groups of sub-elements as they must
1746 behave like their parent as far as block copy is concerned. */
1747 gcc_assert (!elt->is_group);
1749 /* If scalarization is disabled, respect it. */
1750 if (elt->cannot_scalarize)
1752 elt->use_block_copy = 1;
1756 fputs ("Scalarization disabled for ", dump_file);
1757 dump_sra_elt_name (dump_file, elt);
1758 fputc ('\n', dump_file);
1761 /* Disable scalarization of sub-elements */
1762 for (c = elt->children; c; c = c->sibling)
1764 c->cannot_scalarize = 1;
1765 decide_block_copy (c);
1768 /* Groups behave like their parent. */
1769 for (c = elt->groups; c; c = c->sibling)
1771 c->cannot_scalarize = 1;
1772 c->use_block_copy = 1;
1778 /* Don't decide if we've no uses. */
1779 if (elt->n_uses == 0 && elt->n_copies == 0)
1782 else if (!elt->is_scalar)
1784 tree size_tree = TYPE_SIZE_UNIT (elt->type);
1785 bool use_block_copy = true;
1787 /* Tradeoffs for COMPLEX types pretty much always make it better
1788 to go ahead and split the components. */
1789 if (TREE_CODE (elt->type) == COMPLEX_TYPE)
1790 use_block_copy = false;
1792 /* Don't bother trying to figure out the rest if the structure is
1793 so large we can't do easy arithmetic. This also forces block
1794 copies for variable sized structures. */
1795 else if (host_integerp (size_tree, 1))
1797 unsigned HOST_WIDE_INT full_size, inst_size = 0;
1798 unsigned int max_size, max_count, inst_count, full_count;
1800 /* If the sra-max-structure-size parameter is 0, then the
1801 user has not overridden the parameter and we can choose a
1802 sensible default. */
1803 max_size = SRA_MAX_STRUCTURE_SIZE
1804 ? SRA_MAX_STRUCTURE_SIZE
1805 : MOVE_RATIO * UNITS_PER_WORD;
1806 max_count = SRA_MAX_STRUCTURE_COUNT
1807 ? SRA_MAX_STRUCTURE_COUNT
1810 full_size = tree_low_cst (size_tree, 1);
1811 full_count = count_type_elements (elt->type, false);
1812 inst_count = sum_instantiated_sizes (elt, &inst_size);
1814 /* If there is only one scalar field in the record, don't block copy. */
1815 if (single_scalar_field_in_record_p (elt->type))
1816 use_block_copy = false;
1818 /* ??? What to do here. If there are two fields, and we've only
1819 instantiated one, then instantiating the other is clearly a win.
1820 If there are a large number of fields then the size of the copy
1821 is much more of a factor. */
1823 /* If the structure is small, and we've made copies, go ahead
1824 and instantiate, hoping that the copies will go away. */
1825 if (full_size <= max_size
1826 && (full_count - inst_count) <= max_count
1827 && elt->n_copies > elt->n_uses)
1828 use_block_copy = false;
1829 else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
1830 && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
1831 use_block_copy = false;
1833 /* In order to avoid block copy, we have to be able to instantiate
1834 all elements of the type. See if this is possible. */
1836 && (!can_completely_scalarize_p (elt)
1837 || !type_can_instantiate_all_elements (elt->type)))
1838 use_block_copy = true;
1841 elt->use_block_copy = use_block_copy;
1843 /* Groups behave like their parent. */
1844 for (c = elt->groups; c; c = c->sibling)
1845 c->use_block_copy = use_block_copy;
1849 fprintf (dump_file, "Using %s for ",
1850 use_block_copy ? "block-copy" : "element-copy");
1851 dump_sra_elt_name (dump_file, elt);
1852 fputc ('\n', dump_file);
1855 if (!use_block_copy)
1857 instantiate_missing_elements (elt);
1862 any_inst = elt->replacement != NULL;
1864 for (c = elt->children; c ; c = c->sibling)
1865 any_inst |= decide_block_copy (c);
1870 /* Entry point to phase 3. Instantiate scalar replacement variables. */
1873 decide_instantiations (void)
1877 bitmap_head done_head;
1880 /* We cannot clear bits from a bitmap we're iterating over,
1881 so save up all the bits to clear until the end. */
1882 bitmap_initialize (&done_head, &bitmap_default_obstack);
1883 cleared_any = false;
1885 EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1887 tree var = referenced_var (i);
1888 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1891 decide_instantiation_1 (elt, 0, 0);
1892 if (!decide_block_copy (elt))
1897 bitmap_set_bit (&done_head, i);
1904 bitmap_and_compl_into (sra_candidates, &done_head);
1905 bitmap_and_compl_into (needs_copy_in, &done_head);
1907 bitmap_clear (&done_head);
1909 mark_set_for_renaming (sra_candidates);
1912 fputc ('\n', dump_file);
1916 /* Phase Four: Update the function to match the replacements created. */
1918 /* Mark all the variables in VDEF/VUSE operators for STMT for
1919 renaming. This becomes necessary when we modify all of a
1923 mark_all_v_defs_1 (tree stmt)
1928 update_stmt_if_modified (stmt);
1930 FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS)
1932 if (TREE_CODE (sym) == SSA_NAME)
1933 sym = SSA_NAME_VAR (sym);
1934 mark_sym_for_renaming (sym);
1939 /* Mark all the variables in virtual operands in all the statements in
1940 LIST for renaming. */
1943 mark_all_v_defs (tree list)
1945 if (TREE_CODE (list) != STATEMENT_LIST)
1946 mark_all_v_defs_1 (list);
1949 tree_stmt_iterator i;
1950 for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i))
1951 mark_all_v_defs_1 (tsi_stmt (i));
1956 /* Mark every replacement under ELT with TREE_NO_WARNING. */
1959 mark_no_warning (struct sra_elt *elt)
1961 if (!elt->all_no_warning)
1963 if (elt->replacement)
1964 TREE_NO_WARNING (elt->replacement) = 1;
1968 FOR_EACH_ACTUAL_CHILD (c, elt)
1969 mark_no_warning (c);
1971 elt->all_no_warning = true;
1975 /* Build a single level component reference to ELT rooted at BASE. */
1978 generate_one_element_ref (struct sra_elt *elt, tree base)
1980 switch (TREE_CODE (TREE_TYPE (base)))
1984 tree field = elt->element;
1986 /* We can't test elt->in_bitfld_blk here because, when this is
1987 called from instantiate_element, we haven't set this field
1989 if (TREE_CODE (field) == BIT_FIELD_REF)
1991 tree ret = copy_node (field);
1992 TREE_OPERAND (ret, 0) = base;
1996 /* Watch out for compatible records with differing field lists. */
1997 if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
1998 field = find_compatible_field (TREE_TYPE (base), field);
2000 return build3 (COMPONENT_REF, elt->type, base, field, NULL);
2004 if (TREE_CODE (elt->element) == RANGE_EXPR)
2005 return build4 (ARRAY_RANGE_REF, elt->type, base,
2006 TREE_OPERAND (elt->element, 0), NULL, NULL);
2008 return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
2011 if (elt->element == integer_zero_node)
2012 return build1 (REALPART_EXPR, elt->type, base);
2014 return build1 (IMAGPART_EXPR, elt->type, base);
2021 /* Build a full component reference to ELT rooted at its native variable. */
2024 generate_element_ref (struct sra_elt *elt)
2027 return generate_one_element_ref (elt, generate_element_ref (elt->parent));
2029 return elt->element;
2032 /* Create an assignment statement from SRC to DST. */
2035 sra_build_assignment (tree dst, tree src)
2037 /* It was hoped that we could perform some type sanity checking
2038 here, but since front-ends can emit accesses of fields in types
2039 different from their nominal types and copy structures containing
2040 them as a whole, we'd have to handle such differences here.
2041 Since such accesses under different types require compatibility
2042 anyway, there's little point in making tests and/or adding
2043 conversions to ensure the types of src and dst are the same.
2044 So we just assume type differences at this point are ok. */
2045 return build_gimple_modify_stmt (dst, src);
2048 /* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment()
2049 takes care of assignments, but we must create copies for uses. */
2050 #define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : copy_node (t))
2053 sra_build_elt_assignment (struct sra_elt *elt, tree src)
2055 tree dst = elt->replacement;
2056 tree var, type, tmp, tmp2, tmp3;
2058 tree cst, cst2, mask;
2059 tree minshift, maxshift;
2061 if (TREE_CODE (dst) != BIT_FIELD_REF
2062 || !elt->in_bitfld_block)
2063 return sra_build_assignment (REPLDUP (dst), src);
2065 var = TREE_OPERAND (dst, 0);
2067 /* Try to widen the assignment to the entire variable.
2068 We need the source to be a BIT_FIELD_REF as well, such that, for
2069 BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>,
2070 if sp >= dp, we can turn it into
2071 d = BIT_FIELD_REF<s,sp+sz,sp-dp>. */
2072 if (elt->in_bitfld_block == 2
2073 && TREE_CODE (src) == BIT_FIELD_REF
2074 && !tree_int_cst_lt (TREE_OPERAND (src, 2), TREE_OPERAND (dst, 2)))
2076 src = fold_build3 (BIT_FIELD_REF, TREE_TYPE (var),
2077 TREE_OPERAND (src, 0),
2078 size_binop (PLUS_EXPR, TREE_OPERAND (src, 1),
2079 TREE_OPERAND (dst, 2)),
2080 size_binop (MINUS_EXPR, TREE_OPERAND (src, 2),
2081 TREE_OPERAND (dst, 2)));
2082 BIT_FIELD_REF_UNSIGNED (src) = 1;
2084 return sra_build_assignment (var, src);
2087 if (!is_gimple_reg (var))
2088 return sra_build_assignment (REPLDUP (dst), src);
2090 list = alloc_stmt_list ();
2092 cst = TREE_OPERAND (dst, 2);
2093 if (WORDS_BIG_ENDIAN)
2095 cst = size_binop (MINUS_EXPR, DECL_SIZE (var), cst);
2101 cst2 = size_binop (PLUS_EXPR, TREE_OPERAND (dst, 1),
2102 TREE_OPERAND (dst, 2));
2103 if (WORDS_BIG_ENDIAN)
2105 cst2 = size_binop (MINUS_EXPR, DECL_SIZE (var), cst2);
2111 type = TREE_TYPE (var);
2113 mask = build_int_cst_wide (type, 1, 0);
2114 cst = int_const_binop (LSHIFT_EXPR, mask, maxshift, 1);
2115 cst2 = int_const_binop (LSHIFT_EXPR, mask, minshift, 1);
2116 mask = int_const_binop (MINUS_EXPR, cst, cst2, 1);
2117 mask = fold_build1 (BIT_NOT_EXPR, type, mask);
2119 if (!WORDS_BIG_ENDIAN)
2120 cst2 = TREE_OPERAND (dst, 2);
2122 tmp = make_rename_temp (type, "SR");
2123 stmt = build_gimple_modify_stmt (tmp,
2124 fold_build2 (BIT_AND_EXPR, type,
2126 append_to_statement_list (stmt, &list);
2128 if (is_gimple_reg (src))
2132 tmp2 = make_rename_temp (TREE_TYPE (src), "SR");
2133 stmt = sra_build_assignment (tmp2, src);
2134 append_to_statement_list (stmt, &list);
2137 if (!TYPE_UNSIGNED (TREE_TYPE (tmp2))
2138 || TYPE_MAIN_VARIANT (TREE_TYPE (tmp2)) != TYPE_MAIN_VARIANT (type))
2140 tmp3 = make_rename_temp (type, "SR");
2141 tmp2 = fold_build3 (BIT_FIELD_REF, type, tmp2, TREE_OPERAND (dst, 1),
2143 if (TREE_CODE (tmp2) == BIT_FIELD_REF)
2144 BIT_FIELD_REF_UNSIGNED (tmp2) = 1;
2145 stmt = sra_build_assignment (tmp3, tmp2);
2146 append_to_statement_list (stmt, &list);
2150 if (!integer_zerop (minshift))
2152 tmp3 = make_rename_temp (type, "SR");
2153 stmt = build_gimple_modify_stmt (tmp3,
2154 fold_build2 (LSHIFT_EXPR, type,
2156 append_to_statement_list (stmt, &list);
2160 stmt = build_gimple_modify_stmt (var,
2161 fold_build2 (BIT_IOR_EXPR, type,
2163 append_to_statement_list (stmt, &list);
2168 /* Generate a set of assignment statements in *LIST_P to copy all
2169 instantiated elements under ELT to or from the equivalent structure
2170 rooted at EXPR. COPY_OUT controls the direction of the copy, with
2171 true meaning to copy out of EXPR into ELT. */
2174 generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
2180 if (!copy_out && TREE_CODE (expr) == SSA_NAME
2181 && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
2185 c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
2187 c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
2190 t = build2 (COMPLEX_EXPR, elt->type, r, i);
2191 t = sra_build_assignment (expr, t);
2192 SSA_NAME_DEF_STMT (expr) = t;
2193 append_to_statement_list (t, list_p);
2195 else if (elt->replacement)
2198 t = sra_build_elt_assignment (elt, expr);
2200 t = sra_build_assignment (expr, REPLDUP (elt->replacement));
2201 append_to_statement_list (t, list_p);
2205 FOR_EACH_ACTUAL_CHILD (c, elt)
2207 t = generate_one_element_ref (c, unshare_expr (expr));
2208 generate_copy_inout (c, copy_out, t, list_p);
2213 /* Generate a set of assignment statements in *LIST_P to copy all instantiated
2214 elements under SRC to their counterparts under DST. There must be a 1-1
2215 correspondence of instantiated elements. */
2218 generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
2220 struct sra_elt *dc, *sc;
2222 FOR_EACH_ACTUAL_CHILD (dc, dst)
2224 sc = lookup_element (src, dc->element, NULL, NO_INSERT);
2225 if (!sc && dc->in_bitfld_block == 2)
2227 struct sra_elt *dcs;
2229 FOR_EACH_ACTUAL_CHILD (dcs, dc)
2231 sc = lookup_element (src, dcs->element, NULL, NO_INSERT);
2233 generate_element_copy (dcs, sc, list_p);
2239 generate_element_copy (dc, sc, list_p);
2242 if (dst->replacement)
2246 gcc_assert (src->replacement);
2248 t = sra_build_elt_assignment (dst, REPLDUP (src->replacement));
2249 append_to_statement_list (t, list_p);
2253 /* Generate a set of assignment statements in *LIST_P to zero all instantiated
2254 elements under ELT. In addition, do not assign to elements that have been
2255 marked VISITED but do reset the visited flag; this allows easy coordination
2256 with generate_element_init. */
2259 generate_element_zero (struct sra_elt *elt, tree *list_p)
2265 elt->visited = false;
2269 if (!elt->in_bitfld_block)
2270 FOR_EACH_ACTUAL_CHILD (c, elt)
2271 generate_element_zero (c, list_p);
2273 if (elt->replacement)
2277 gcc_assert (elt->is_scalar);
2278 t = fold_convert (elt->type, integer_zero_node);
2280 t = sra_build_elt_assignment (elt, t);
2281 append_to_statement_list (t, list_p);
2285 /* Generate an assignment VAR = INIT, where INIT may need gimplification.
2286 Add the result to *LIST_P. */
2289 generate_one_element_init (struct sra_elt *elt, tree init, tree *list_p)
2291 /* The replacement can be almost arbitrarily complex. Gimplify. */
2292 tree stmt = sra_build_elt_assignment (elt, init);
2293 gimplify_and_add (stmt, list_p);
2296 /* Generate a set of assignment statements in *LIST_P to set all instantiated
2297 elements under ELT with the contents of the initializer INIT. In addition,
2298 mark all assigned elements VISITED; this allows easy coordination with
2299 generate_element_zero. Return false if we found a case we couldn't
2303 generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p)
2306 enum tree_code init_code;
2307 struct sra_elt *sub;
2309 unsigned HOST_WIDE_INT idx;
2310 tree value, purpose;
2312 /* We can be passed DECL_INITIAL of a static variable. It might have a
2313 conversion, which we strip off here. */
2314 STRIP_USELESS_TYPE_CONVERSION (init);
2315 init_code = TREE_CODE (init);
2319 if (elt->replacement)
2321 generate_one_element_init (elt, init, list_p);
2322 elt->visited = true;
2331 FOR_EACH_ACTUAL_CHILD (sub, elt)
2333 if (sub->element == integer_zero_node)
2334 t = (init_code == COMPLEX_EXPR
2335 ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
2337 t = (init_code == COMPLEX_EXPR
2338 ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
2339 result &= generate_element_init_1 (sub, t, list_p);
2344 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
2346 if (TREE_CODE (purpose) == RANGE_EXPR)
2348 tree lower = TREE_OPERAND (purpose, 0);
2349 tree upper = TREE_OPERAND (purpose, 1);
2353 sub = lookup_element (elt, lower, NULL, NO_INSERT);
2355 result &= generate_element_init_1 (sub, value, list_p);
2356 if (tree_int_cst_equal (lower, upper))
2358 lower = int_const_binop (PLUS_EXPR, lower,
2359 integer_one_node, true);
2364 sub = lookup_element (elt, purpose, NULL, NO_INSERT);
2366 result &= generate_element_init_1 (sub, value, list_p);
2372 elt->visited = true;
2379 /* A wrapper function for generate_element_init_1 that handles cleanup after
2383 generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
2387 push_gimplify_context ();
2388 ret = generate_element_init_1 (elt, init, list_p);
2389 pop_gimplify_context (NULL);
2391 /* The replacement can expose previously unreferenced variables. */
2394 tree_stmt_iterator i;
2396 for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i))
2397 find_new_referenced_vars (tsi_stmt_ptr (i));
2403 /* Insert STMT on all the outgoing edges out of BB. Note that if BB
2404 has more than one edge, STMT will be replicated for each edge. Also,
2405 abnormal edges will be ignored. */
2408 insert_edge_copies (tree stmt, basic_block bb)
2415 FOR_EACH_EDGE (e, ei, bb->succs)
2417 /* We don't need to insert copies on abnormal edges. The
2418 value of the scalar replacement is not guaranteed to
2419 be valid through an abnormal edge. */
2420 if (!(e->flags & EDGE_ABNORMAL))
2424 bsi_insert_on_edge (e, stmt);
2428 bsi_insert_on_edge (e, unsave_expr_now (stmt));
2433 /* Helper function to insert LIST before BSI, and set up line number info. */
2436 sra_insert_before (block_stmt_iterator *bsi, tree list)
2438 tree stmt = bsi_stmt (*bsi);
2440 if (EXPR_HAS_LOCATION (stmt))
2441 annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
2442 bsi_insert_before (bsi, list, BSI_SAME_STMT);
2445 /* Similarly, but insert after BSI. Handles insertion onto edges as well. */
2448 sra_insert_after (block_stmt_iterator *bsi, tree list)
2450 tree stmt = bsi_stmt (*bsi);
2452 if (EXPR_HAS_LOCATION (stmt))
2453 annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
2455 if (stmt_ends_bb_p (stmt))
2456 insert_edge_copies (list, bsi->bb);
2458 bsi_insert_after (bsi, list, BSI_SAME_STMT);
2461 /* Similarly, but replace the statement at BSI. */
2464 sra_replace (block_stmt_iterator *bsi, tree list)
2466 sra_insert_before (bsi, list);
2467 bsi_remove (bsi, false);
2468 if (bsi_end_p (*bsi))
2469 *bsi = bsi_last (bsi->bb);
2474 /* Scalarize a USE. To recap, this is either a simple reference to ELT,
2475 if elt is scalar, or some occurrence of ELT that requires a complete
2476 aggregate. IS_OUTPUT is true if ELT is being modified. */
2479 scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
2482 tree list = NULL, stmt = bsi_stmt (*bsi);
2484 if (elt->replacement)
2486 /* If we have a replacement, then updating the reference is as
2487 simple as modifying the existing statement in place. */
2490 if (TREE_CODE (elt->replacement) == BIT_FIELD_REF
2491 && is_gimple_reg (TREE_OPERAND (elt->replacement, 0))
2492 && TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
2493 && &GIMPLE_STMT_OPERAND (stmt, 0) == expr_p)
2495 tree newstmt = sra_build_elt_assignment
2496 (elt, GIMPLE_STMT_OPERAND (stmt, 1));
2497 if (TREE_CODE (newstmt) != STATEMENT_LIST)
2499 tree list = alloc_stmt_list ();
2500 append_to_statement_list (newstmt, &list);
2503 sra_replace (bsi, newstmt);
2507 mark_all_v_defs (stmt);
2509 *expr_p = REPLDUP (elt->replacement);
2514 /* Otherwise we need some copies. If ELT is being read, then we want
2515 to store all (modified) sub-elements back into the structure before
2516 the reference takes place. If ELT is being written, then we want to
2517 load the changed values back into our shadow variables. */
2518 /* ??? We don't check modified for reads, we just always write all of
2519 the values. We should be able to record the SSA number of the VOP
2520 for which the values were last read. If that number matches the
2521 SSA number of the VOP in the current statement, then we needn't
2522 emit an assignment. This would also eliminate double writes when
2523 a structure is passed as more than one argument to a function call.
2524 This optimization would be most effective if sra_walk_function
2525 processed the blocks in dominator order. */
2527 generate_copy_inout (elt, false, generate_element_ref (elt), &list);
2530 mark_all_v_defs (list);
2531 sra_insert_before (bsi, list);
2532 mark_no_warning (elt);
2538 generate_copy_inout (elt, true, generate_element_ref (elt), &list);
2541 mark_all_v_defs (list);
2542 sra_insert_after (bsi, list);
2548 /* Scalarize a COPY. To recap, this is an assignment statement between
2549 two scalarizable references, LHS_ELT and RHS_ELT. */
2552 scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
2553 block_stmt_iterator *bsi)
2557 if (lhs_elt->replacement && rhs_elt->replacement)
2559 /* If we have two scalar operands, modify the existing statement. */
2560 stmt = bsi_stmt (*bsi);
2562 /* See the commentary in sra_walk_function concerning
2563 RETURN_EXPR, and why we should never see one here. */
2564 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2566 GIMPLE_STMT_OPERAND (stmt, 0) = lhs_elt->replacement;
2567 GIMPLE_STMT_OPERAND (stmt, 1) = REPLDUP (rhs_elt->replacement);
2570 else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
2572 /* If either side requires a block copy, then sync the RHS back
2573 to the original structure, leave the original assignment
2574 statement (which will perform the block copy), then load the
2575 LHS values out of its now-updated original structure. */
2576 /* ??? Could perform a modified pair-wise element copy. That
2577 would at least allow those elements that are instantiated in
2578 both structures to be optimized well. */
2581 generate_copy_inout (rhs_elt, false,
2582 generate_element_ref (rhs_elt), &list);
2585 mark_all_v_defs (list);
2586 sra_insert_before (bsi, list);
2590 generate_copy_inout (lhs_elt, true,
2591 generate_element_ref (lhs_elt), &list);
2594 mark_all_v_defs (list);
2595 sra_insert_after (bsi, list);
2600 /* Otherwise both sides must be fully instantiated. In which
2601 case perform pair-wise element assignments and replace the
2602 original block copy statement. */
2604 stmt = bsi_stmt (*bsi);
2605 mark_all_v_defs (stmt);
2608 generate_element_copy (lhs_elt, rhs_elt, &list);
2610 mark_all_v_defs (list);
2611 sra_replace (bsi, list);
2615 /* Scalarize an INIT. To recap, this is an assignment to a scalarizable
2616 reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
2617 COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty
2621 scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
2626 /* Generate initialization statements for all members extant in the RHS. */
2629 /* Unshare the expression just in case this is from a decl's initial. */
2630 rhs = unshare_expr (rhs);
2631 result = generate_element_init (lhs_elt, rhs, &list);
2634 /* CONSTRUCTOR is defined such that any member not mentioned is assigned
2635 a zero value. Initialize the rest of the instantiated elements. */
2636 generate_element_zero (lhs_elt, &list);
2640 /* If we failed to convert the entire initializer, then we must
2641 leave the structure assignment in place and must load values
2642 from the structure into the slots for which we did not find
2643 constants. The easiest way to do this is to generate a complete
2644 copy-out, and then follow that with the constant assignments
2645 that we were able to build. DCE will clean things up. */
2647 generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
2649 append_to_statement_list (list, &list0);
2653 if (lhs_elt->use_block_copy || !result)
2655 /* Since LHS is not fully instantiated, we must leave the structure
2656 assignment in place. Treating this case differently from a USE
2657 exposes constants to later optimizations. */
2660 mark_all_v_defs (list);
2661 sra_insert_after (bsi, list);
2666 /* The LHS is fully instantiated. The list of initializations
2667 replaces the original structure assignment. */
2669 mark_all_v_defs (bsi_stmt (*bsi));
2670 mark_all_v_defs (list);
2671 sra_replace (bsi, list);
2675 /* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP
2676 on all INDIRECT_REFs. */
2679 mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
2683 if (TREE_CODE (t) == INDIRECT_REF)
2685 TREE_THIS_NOTRAP (t) = 1;
2688 else if (IS_TYPE_OR_DECL_P (t))
2694 /* Scalarize a LDST. To recap, this is an assignment between one scalarizable
2695 reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true
2696 if ELT is on the left-hand side. */
2699 scalarize_ldst (struct sra_elt *elt, tree other,
2700 block_stmt_iterator *bsi, bool is_output)
2702 /* Shouldn't have gotten called for a scalar. */
2703 gcc_assert (!elt->replacement);
2705 if (elt->use_block_copy)
2707 /* Since ELT is not fully instantiated, we have to leave the
2708 block copy in place. Treat this as a USE. */
2709 scalarize_use (elt, NULL, bsi, is_output);
2713 /* The interesting case is when ELT is fully instantiated. In this
2714 case we can have each element stored/loaded directly to/from the
2715 corresponding slot in OTHER. This avoids a block copy. */
2717 tree list = NULL, stmt = bsi_stmt (*bsi);
2719 mark_all_v_defs (stmt);
2720 generate_copy_inout (elt, is_output, other, &list);
2722 mark_all_v_defs (list);
2724 /* Preserve EH semantics. */
2725 if (stmt_ends_bb_p (stmt))
2727 tree_stmt_iterator tsi;
2730 /* Extract the first statement from LIST. */
2731 tsi = tsi_start (list);
2732 first = tsi_stmt (tsi);
2735 /* Replace the old statement with this new representative. */
2736 bsi_replace (bsi, first, true);
2738 if (!tsi_end_p (tsi))
2740 /* If any reference would trap, then they all would. And more
2741 to the point, the first would. Therefore none of the rest
2742 will trap since the first didn't. Indicate this by
2743 iterating over the remaining statements and set
2744 TREE_THIS_NOTRAP in all INDIRECT_REFs. */
2747 walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
2750 while (!tsi_end_p (tsi));
2752 insert_edge_copies (list, bsi->bb);
2756 sra_replace (bsi, list);
2760 /* Generate initializations for all scalarizable parameters. */
2763 scalarize_parms (void)
2769 EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
2771 tree var = referenced_var (i);
2772 struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
2773 generate_copy_inout (elt, true, var, &list);
2778 insert_edge_copies (list, ENTRY_BLOCK_PTR);
2779 mark_all_v_defs (list);
2783 /* Entry point to phase 4. Update the function to match replacements. */
2786 scalarize_function (void)
2788 static const struct sra_walk_fns fns = {
2789 scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
2792 sra_walk_function (&fns);
2794 bsi_commit_edge_inserts ();
2798 /* Debug helper function. Print ELT in a nice human-readable format. */
2801 dump_sra_elt_name (FILE *f, struct sra_elt *elt)
2803 if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
2805 fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
2806 dump_sra_elt_name (f, elt->parent);
2811 dump_sra_elt_name (f, elt->parent);
2812 if (DECL_P (elt->element))
2814 if (TREE_CODE (elt->element) == FIELD_DECL)
2816 print_generic_expr (f, elt->element, dump_flags);
2818 else if (TREE_CODE (elt->element) == BIT_FIELD_REF)
2819 fprintf (f, "$B" HOST_WIDE_INT_PRINT_DEC "F" HOST_WIDE_INT_PRINT_DEC,
2820 tree_low_cst (TREE_OPERAND (elt->element, 2), 1),
2821 tree_low_cst (TREE_OPERAND (elt->element, 1), 1));
2822 else if (TREE_CODE (elt->element) == RANGE_EXPR)
2823 fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]",
2824 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)),
2825 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1)));
2827 fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
2828 TREE_INT_CST_LOW (elt->element));
2832 /* Likewise, but callable from the debugger. */
2835 debug_sra_elt_name (struct sra_elt *elt)
2837 dump_sra_elt_name (stderr, elt);
2838 fputc ('\n', stderr);
2842 sra_init_cache (void)
2844 if (sra_type_decomp_cache)
2847 sra_type_decomp_cache = BITMAP_ALLOC (NULL);
2848 sra_type_inst_cache = BITMAP_ALLOC (NULL);
2851 /* Main entry point. */
2856 /* Initialize local variables. */
2858 gcc_obstack_init (&sra_obstack);
2859 sra_candidates = BITMAP_ALLOC (NULL);
2860 needs_copy_in = BITMAP_ALLOC (NULL);
2862 sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
2864 /* Scan. If we find anything, instantiate and scalarize. */
2865 if (find_candidates_for_sra ())
2868 decide_instantiations ();
2869 scalarize_function ();
2872 /* Free allocated memory. */
2873 htab_delete (sra_map);
2875 BITMAP_FREE (sra_candidates);
2876 BITMAP_FREE (needs_copy_in);
2877 BITMAP_FREE (sra_type_decomp_cache);
2878 BITMAP_FREE (sra_type_inst_cache);
2879 obstack_free (&sra_obstack, NULL);
2884 tree_sra_early (void)
2898 return flag_tree_sra != 0;
2901 struct tree_opt_pass pass_sra_early =
2904 gate_sra, /* gate */
2905 tree_sra_early, /* execute */
2908 0, /* static_pass_number */
2909 TV_TREE_SRA, /* tv_id */
2910 PROP_cfg | PROP_ssa, /* properties_required */
2911 0, /* properties_provided */
2912 0, /* properties_destroyed */
2913 0, /* todo_flags_start */
2917 | TODO_verify_ssa, /* todo_flags_finish */
2921 struct tree_opt_pass pass_sra =
2924 gate_sra, /* gate */
2925 tree_sra, /* execute */
2928 0, /* static_pass_number */
2929 TV_TREE_SRA, /* tv_id */
2930 PROP_cfg | PROP_ssa, /* properties_required */
2931 0, /* properties_provided */
2932 0, /* properties_destroyed */
2933 0, /* todo_flags_start */
2937 | TODO_verify_ssa, /* todo_flags_finish */