1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
23 Matrix flattening optimization tries to replace a N-dimensional
24 matrix with its equivalent M-dimensional matrix, where M < N.
25 This first implementation focuses on global matrices defined dynamically.
27 When N==1, we actually flatten the whole matrix.
28 For instance consider a two-dimensional array a [dim1] [dim2].
29 The code for allocating space for it usually looks like:
31 a = (int **) malloc(dim1 * sizeof(int *));
32 for (i=0; i<dim1; i++)
33 a[i] = (int *) malloc (dim2 * sizeof(int));
35 If the array "a" is found suitable for this optimization,
36 its allocation is replaced by:
38 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42 The two main phases of the optimization are the analysis
44 The driver of the optimization is matrix_reorg ().
51 We'll number the dimensions outside-in, meaning the most external
52 is 0, then 1, and so on.
53 The analysis part of the optimization determines K, the escape
54 level of a N-dimensional matrix (K <= N), that allows flattening of
55 the external dimensions 0,1,..., K-1. Escape level 0 means that the
56 whole matrix escapes and no flattening is possible.
58 The analysis part is implemented in analyze_matrix_allocation_site()
59 and analyze_matrix_accesses().
63 In this phase we define the new flattened matrices that replace the
64 original matrices in the code.
65 Implemented in transform_allocation_sites(),
66 transform_access_sites().
70 The idea of Matrix Transposing is organizing the matrix in a different
71 layout such that the dimensions are reordered.
72 This could produce better cache behavior in some cases.
74 For example, lets look at the matrix accesses in the following loop:
80 This loop can produce good cache behavior because the elements of
81 the inner dimension are accessed sequentially.
83 However, if the accesses of the matrix were of the following form:
89 In this loop we iterate the columns and not the rows.
90 Therefore, replacing the rows and columns
91 would have had an organization with better (cache) locality.
92 Replacing the dimensions of the matrix is called matrix transposing.
94 This example, of course, could be enhanced to multiple dimensions matrices
97 Since a program could include all kind of accesses, there is a decision
98 mechanism, implemented in analyze_transpose(), which implements a
99 heuristic that tries to determine whether to transpose the matrix or not,
100 according to the form of the more dominant accesses.
101 This decision is transferred to the flattening mechanism, and whether
102 the matrix was transposed or not, the matrix is flattened (if possible).
104 This decision making is based on profiling information and loop information.
105 If profiling information is available, decision making mechanism will be
106 operated, otherwise the matrix will only be flattened (if possible).
108 Both optimizations are described in the paper "Matrix flattening and
109 transposing in GCC" which was presented in GCC summit 2006.
110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf
116 #include "coretypes.h"
121 #include "tree-inline.h"
122 #include "tree-flow.h"
123 #include "tree-flow-inline.h"
124 #include "langhooks.h"
132 #include "diagnostic.h"
136 #include "c-common.h"
138 #include "function.h"
139 #include "basic-block.h"
141 #include "tree-iterator.h"
142 #include "tree-pass.h"
144 #include "tree-data-ref.h"
145 #include "tree-chrec.h"
146 #include "tree-scalar-evolution.h"
149 We need to collect a lot of data from the original malloc,
150 particularly as the gimplifier has converted:
152 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
156 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
158 T5 = (struct_type *) T4;
161 The following struct fields allow us to collect all the necessary data from
162 the gimplified program. The comments in the struct below are all based
163 on the gimple example above. */
165 struct malloc_call_data
167 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
168 tree size_var; /* Var decl for T3. */
169 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
172 /* The front end of the compiler, when parsing statements of the form:
174 var = (type_cast) malloc (sizeof (type));
176 always converts this single statement into the following statements
181 T.3 = (type_cast) T.2;
184 Since we need to create new malloc statements and modify the original
185 statements somewhat, we need to find all four of the above statements.
186 Currently record_call_1 (called for building cgraph edges) finds and
187 records the statements containing the actual call to malloc, but we
188 need to find the rest of the variables/statements on our own. That
189 is what the following function does. */
191 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
193 tree size_var = NULL;
198 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
200 tmp = get_call_expr_in (stmt);
201 malloc_fn_decl = CALL_EXPR_FN (tmp);
202 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
203 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
204 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
208 arg1 = CALL_EXPR_ARG (tmp, 0);
211 m_data->call_stmt = stmt;
212 m_data->size_var = size_var;
213 if (TREE_CODE (size_var) != VAR_DECL)
214 m_data->malloc_size = size_var;
216 m_data->malloc_size = NULL_TREE;
219 /* Information about matrix access site.
220 For example: if an access site of matrix arr is arr[i][j]
221 the ACCESS_SITE_INFO structure will have the address
222 of arr as its stmt. The INDEX_INFO will hold information about the
223 initial address and index of each dimension. */
224 struct access_site_info
226 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
229 /* In case of POINTER_PLUS_EXPR, what is the offset. */
232 /* The index which created the offset. */
235 /* The indirection level of this statement. */
238 /* TRUE for allocation site FALSE for access site. */
241 /* The function containing the access site. */
244 /* This access is iterated in the inner most loop */
245 bool iterated_by_inner_most_loop_p;
248 typedef struct access_site_info *access_site_info_p;
249 DEF_VEC_P (access_site_info_p);
250 DEF_VEC_ALLOC_P (access_site_info_p, heap);
252 /* Information about matrix to flatten. */
255 /* Decl tree of this matrix. */
257 /* Number of dimensions; number
258 of "*" in the type declaration. */
261 /* Minimum indirection level that escapes, 0 means that
262 the whole matrix escapes, k means that dimensions
263 0 to ACTUAL_DIM - k escapes. */
264 int min_indirect_level_escape;
266 tree min_indirect_level_escape_stmt;
268 /* Is the matrix transposed. */
269 bool is_transposed_p;
271 /* Hold the allocation site for each level (dimension).
272 We can use NUM_DIMS as the upper bound and allocate the array
273 once with this number of elements and no need to use realloc and
274 MAX_MALLOCED_LEVEL. */
275 tree *malloc_for_level;
277 int max_malloced_level;
279 /* The location of the allocation sites (they must be in one
281 tree allocation_function_decl;
283 /* The calls to free for each level of indirection. */
290 /* An array which holds for each dimension its size. where
291 dimension 0 is the outer most (one that contains all the others).
293 tree *dimension_size;
295 /* An array which holds for each dimension it's original size
296 (before transposing and flattening take place). */
297 tree *dimension_size_orig;
299 /* An array which holds for each dimension the size of the type of
300 of elements accessed in that level (in bytes). */
301 HOST_WIDE_INT *dimension_type_size;
303 int dimension_type_size_len;
305 /* An array collecting the count of accesses for each dimension. */
306 gcov_type *dim_hot_level;
308 /* An array of the accesses to be flattened.
309 elements are of type "struct access_site_info *". */
310 VEC (access_site_info_p, heap) * access_l;
312 /* A map of how the dimensions will be organized at the end of
317 /* In each phi node we want to record the indirection level we have when we
318 get to the phi node. Usually we will have phi nodes with more than two
319 arguments, then we must assure that all of them get to the phi node with
320 the same indirection level, otherwise it's not safe to do the flattening.
321 So we record the information regarding the indirection level each time we
322 get to the phi node in this hash table. */
324 struct matrix_access_phi_node
327 int indirection_level;
330 /* We use this structure to find if the SSA variable is accessed inside the
331 tree and record the tree containing it. */
333 struct ssa_acc_in_tree
335 /* The variable whose accesses in the tree we are looking for. */
337 /* The tree and code inside it the ssa_var is accessed, currently
338 it could be an INDIRECT_REF or CALL_EXPR. */
339 enum tree_code t_code;
341 /* The place in the containing tree. */
347 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
349 static int transform_allocation_sites (void **, void *);
350 static int transform_access_sites (void **, void *);
351 static int analyze_transpose (void **, void *);
352 static int dump_matrix_reorg_analysis (void **, void *);
354 static bool check_transpose_p;
356 /* Hash function used for the phi nodes. */
359 mat_acc_phi_hash (const void *p)
361 const struct matrix_access_phi_node *ma_phi = p;
363 return htab_hash_pointer (ma_phi->phi);
366 /* Equality means phi node pointers are the same. */
369 mat_acc_phi_eq (const void *p1, const void *p2)
371 const struct matrix_access_phi_node *phi1 = p1;
372 const struct matrix_access_phi_node *phi2 = p2;
374 if (phi1->phi == phi2->phi)
380 /* Hold the PHI nodes we visit during the traversal for escaping
382 static htab_t htab_mat_acc_phi_nodes = NULL;
384 /* This hash-table holds the information about the matrices we are
386 static htab_t matrices_to_reorg = NULL;
388 /* Return a hash for MTT, which is really a "matrix_info *". */
390 mtt_info_hash (const void *mtt)
392 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
395 /* Return true if MTT1 and MTT2 (which are really both of type
396 "matrix_info *") refer to the same decl. */
398 mtt_info_eq (const void *mtt1, const void *mtt2)
400 const struct matrix_info *i1 = mtt1;
401 const struct matrix_info *i2 = mtt2;
403 if (i1->decl == i2->decl)
409 /* Return the inner most tree that is not a cast. */
411 get_inner_of_cast_expr (tree t)
413 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR
414 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
415 t = TREE_OPERAND (t, 0);
420 /* Return false if STMT may contain a vector expression.
421 In this situation, all matrices should not be flattened. */
423 may_flatten_matrices_1 (tree stmt)
427 switch (TREE_CODE (stmt))
429 case GIMPLE_MODIFY_STMT:
430 t = GIMPLE_STMT_OPERAND (stmt, 1);
431 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR)
433 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
437 pointee = TREE_TYPE (t);
438 while (POINTER_TYPE_P (pointee))
439 pointee = TREE_TYPE (pointee);
440 if (TREE_CODE (pointee) == VECTOR_TYPE)
444 "Found vector type, don't flatten matrix\n");
448 t = TREE_OPERAND (t, 0);
452 /* Asm code could contain vector operations. */
461 /* Return false if there are hand-written vectors in the program.
462 We disable the flattening in such a case. */
464 may_flatten_matrices (struct cgraph_node *node)
467 struct function *func;
469 block_stmt_iterator bsi;
474 func = DECL_STRUCT_FUNCTION (decl);
475 FOR_EACH_BB_FN (bb, func)
476 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
477 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
483 /* Given a VAR_DECL, check its type to determine whether it is
484 a definition of a dynamic allocated matrix and therefore is
485 a suitable candidate for the matrix flattening optimization.
486 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
487 a MATRIX_INFO structure, fill it with the relevant information
488 and return a pointer to it.
489 TODO: handle also statically defined arrays. */
490 static struct matrix_info *
491 analyze_matrix_decl (tree var_decl)
493 struct matrix_info *m_node, tmpmi, *mi;
497 gcc_assert (matrices_to_reorg);
499 if (TREE_CODE (var_decl) == PARM_DECL)
500 var_type = DECL_ARG_TYPE (var_decl);
501 else if (TREE_CODE (var_decl) == VAR_DECL)
502 var_type = TREE_TYPE (var_decl);
506 if (!POINTER_TYPE_P (var_type))
509 while (POINTER_TYPE_P (var_type))
511 var_type = TREE_TYPE (var_type);
518 if (!COMPLETE_TYPE_P (var_type)
519 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
522 /* Check to see if this pointer is already in there. */
523 tmpmi.decl = var_decl;
524 mi = htab_find (matrices_to_reorg, &tmpmi);
529 /* Record the matrix. */
531 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
532 m_node->decl = var_decl;
533 m_node->num_dims = dim_num;
535 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
537 /* Init min_indirect_level_escape to -1 to indicate that no escape
538 analysis has been done yet. */
539 m_node->min_indirect_level_escape = -1;
540 m_node->is_transposed_p = false;
549 struct matrix_info *mat = (struct matrix_info *) e;
555 free (mat->free_stmts);
556 if (mat->dim_hot_level)
557 free (mat->dim_hot_level);
558 if (mat->malloc_for_level)
559 free (mat->malloc_for_level);
562 /* Find all potential matrices.
563 TODO: currently we handle only multidimensional
564 dynamically allocated arrays. */
566 find_matrices_decl (void)
568 struct matrix_info *tmp;
570 struct varpool_node *vnode;
572 gcc_assert (matrices_to_reorg);
574 /* For every global variable in the program:
575 Check to see if it's of a candidate type and record it. */
576 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
578 tree var_decl = vnode->decl;
580 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
583 if (matrices_to_reorg)
584 if ((tmp = analyze_matrix_decl (var_decl)))
586 if (!TREE_ADDRESSABLE (var_decl))
588 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
596 /* Mark that the matrix MI escapes at level L. */
598 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
600 if (mi->min_indirect_level_escape == -1
601 || (mi->min_indirect_level_escape > l))
603 mi->min_indirect_level_escape = l;
604 mi->min_indirect_level_escape_stmt = s;
608 /* Find if the SSA variable is accessed inside the
609 tree and record the tree containing it.
610 The only relevant uses are the case of SSA_NAME, or SSA inside
611 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
613 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
617 call_expr_arg_iterator iter;
619 a->t_code = TREE_CODE (t);
629 if (SSA_VAR_P (TREE_OPERAND (t, 0))
630 && TREE_OPERAND (t, 0) == a->ssa_var)
634 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
636 if (arg == a->ssa_var)
639 call = get_call_expr_in (t);
640 if (call && (decl = get_callee_fndecl (call)))
646 case POINTER_PLUS_EXPR:
649 op1 = TREE_OPERAND (t, 0);
650 op2 = TREE_OPERAND (t, 1);
652 if (op1 == a->ssa_var)
657 else if (op2 == a->ssa_var)
668 /* Record the access/allocation site information for matrix MI so we can
669 handle it later in transformation. */
671 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
672 tree index, int level, bool is_alloc)
674 struct access_site_info *acc_info;
677 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
680 = (struct access_site_info *)
681 xcalloc (1, sizeof (struct access_site_info));
682 acc_info->stmt = stmt;
683 acc_info->offset = offset;
684 acc_info->index = index;
685 acc_info->function_decl = current_function_decl;
686 acc_info->level = level;
687 acc_info->is_alloc = is_alloc;
689 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
693 /* Record the malloc as the allocation site of the given LEVEL. But
694 first we Make sure that all the size parameters passed to malloc in
695 all the allocation sites could be pre-calculated before the call to
696 the malloc of level 0 (the main malloc call). */
698 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
700 struct malloc_call_data mcd;
702 /* Make sure that the allocation sites are in the same function. */
703 if (!mi->allocation_function_decl)
704 mi->allocation_function_decl = current_function_decl;
705 else if (mi->allocation_function_decl != current_function_decl)
707 int min_malloc_level;
709 gcc_assert (mi->malloc_for_level);
711 /* Find the minimum malloc level that already has been seen;
712 we known its allocation function must be
713 MI->allocation_function_decl since it's different than
714 CURRENT_FUNCTION_DECL then the escaping level should be
715 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
716 must be set accordingly. */
717 for (min_malloc_level = 0;
718 min_malloc_level < mi->max_malloced_level
719 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
720 if (level < min_malloc_level)
722 mi->allocation_function_decl = current_function_decl;
723 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
727 mark_min_matrix_escape_level (mi, level, stmt);
728 /* cannot be that (level == min_malloc_level)
729 we would have returned earlier. */
734 /* Find the correct malloc information. */
735 collect_data_for_malloc_call (stmt, &mcd);
737 /* We accept only calls to malloc function; we do not accept
738 calls like calloc and realloc. */
739 if (!mi->malloc_for_level)
741 mi->malloc_for_level = xcalloc (level + 1, sizeof (tree));
742 mi->max_malloced_level = level + 1;
744 else if (mi->max_malloced_level <= level)
747 = xrealloc (mi->malloc_for_level, (level + 1) * sizeof (tree));
749 /* Zero the newly allocated items. */
750 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
751 0, (level - mi->max_malloced_level) * sizeof (tree));
753 mi->max_malloced_level = level + 1;
755 mi->malloc_for_level[level] = stmt;
758 /* Given an assignment statement STMT that we know that its
759 left-hand-side is the matrix MI variable, we traverse the immediate
760 uses backwards until we get to a malloc site. We make sure that
761 there is one and only one malloc site that sets this variable. When
762 we are performing the flattening we generate a new variable that
763 will hold the size for each dimension; each malloc that allocates a
764 dimension has the size parameter; we use that parameter to
765 initialize the dimension size variable so we can use it later in
766 the address calculations. LEVEL is the dimension we're inspecting.
767 Return if STMT is related to an allocation site. */
770 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
771 int level, sbitmap visited)
773 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
775 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
777 rhs = get_inner_of_cast_expr (rhs);
778 if (TREE_CODE (rhs) == SSA_NAME)
780 tree def = SSA_NAME_DEF_STMT (rhs);
782 analyze_matrix_allocation_site (mi, def, level, visited);
786 /* A result of call to malloc. */
787 else if (TREE_CODE (rhs) == CALL_EXPR)
789 int call_flags = call_expr_flags (rhs);
791 if (!(call_flags & ECF_MALLOC))
793 mark_min_matrix_escape_level (mi, level, stmt);
799 const char *malloc_fname;
801 malloc_fn_decl = CALL_EXPR_FN (rhs);
802 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
803 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
806 mark_min_matrix_escape_level (mi, level, stmt);
809 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
810 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
811 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
815 "Matrix %s is an argument to function %s\n",
816 get_name (mi->decl), get_name (malloc_fn_decl));
817 mark_min_matrix_escape_level (mi, level, stmt);
821 /* This is a call to malloc. Check to see if this is the first
822 call in this indirection level; if so, mark it; if not, mark
824 if (mi->malloc_for_level
825 && mi->malloc_for_level[level]
826 && mi->malloc_for_level[level] != stmt)
828 mark_min_matrix_escape_level (mi, level, stmt);
832 add_allocation_site (mi, stmt, level);
835 /* If we are back to the original matrix variable then we
836 are sure that this is analyzed as an access site. */
837 else if (rhs == mi->decl)
840 /* Looks like we don't know what is happening in this
841 statement so be in the safe side and mark it as escaping. */
842 mark_min_matrix_escape_level (mi, level, stmt);
845 /* The transposing decision making.
846 In order to to calculate the profitability of transposing, we collect two
847 types of information regarding the accesses:
848 1. profiling information used to express the hotness of an access, that
849 is how often the matrix is accessed by this access site (count of the
851 2. which dimension in the access site is iterated by the inner
852 most loop containing this access.
854 The matrix will have a calculated value of weighted hotness for each
856 Intuitively the hotness level of a dimension is a function of how
857 many times it was the most frequently accessed dimension in the
858 highly executed access sites of this matrix.
860 As computed by following equation:
863 \ \ dim_hot_level[i] +=
866 acc[j]->dim[i]->iter_by_inner_loop * count(j)
868 Where n is the number of dims and m is the number of the matrix
869 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
870 iterates over dim[i] in innermost loop, and is 0 otherwise.
872 The organization of the new matrix should be according to the
873 hotness of each dimension. The hotness of the dimension implies
874 the locality of the elements.*/
876 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878 struct matrix_info *mi = *slot;
879 int min_escape_l = mi->min_indirect_level_escape;
882 struct access_site_info *acc_info;
885 if (min_escape_l < 2 || !mi->access_l)
890 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
893 VEC_free (access_site_info_p, heap, mi->access_l);
898 if (!mi->dim_hot_level)
900 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
903 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
906 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
907 && acc_info->level < min_escape_l)
909 loop = loop_containing_stmt (acc_info->stmt);
910 if (!loop || loop->inner)
915 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
921 istep = int_cst_value (iv.step);
924 acc_info->iterated_by_inner_most_loop_p = 1;
925 mi->dim_hot_level[acc_info->level] +=
926 bb_for_stmt (acc_info->stmt)->count;
934 VEC_free (access_site_info_p, heap, mi->access_l);
939 /* Find the index which defines the OFFSET from base.
940 We walk from use to def until we find how the offset was defined. */
942 get_index_from_offset (tree offset, tree def_stmt)
944 tree op1, op2, expr, index;
946 if (TREE_CODE (def_stmt) == PHI_NODE)
948 expr = get_inner_of_cast_expr (GIMPLE_STMT_OPERAND (def_stmt, 1));
949 if (TREE_CODE (expr) == SSA_NAME)
950 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
951 else if (TREE_CODE (expr) == MULT_EXPR)
953 op1 = TREE_OPERAND (expr, 0);
954 op2 = TREE_OPERAND (expr, 1);
955 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
957 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
964 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
965 of the type related to the SSA_VAR, or the type related to the
966 lhs of STMT, in the case that it is an INDIRECT_REF. */
968 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
969 int current_indirect_level)
972 HOST_WIDE_INT type_size;
974 /* Update type according to the type of the INDIRECT_REF expr. */
975 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
976 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == INDIRECT_REF)
978 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
979 gcc_assert (POINTER_TYPE_P
980 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
982 int_size_in_bytes (TREE_TYPE
984 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
987 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989 /* Record the size of elements accessed (as a whole)
990 in the current indirection level (dimension). If the size of
991 elements is not known at compile time, mark it as escaping. */
993 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
996 int l = current_indirect_level;
998 if (!mi->dimension_type_size)
1000 mi->dimension_type_size
1001 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1002 mi->dimension_type_size_len = l + 1;
1004 else if (mi->dimension_type_size_len < l + 1)
1006 mi->dimension_type_size
1007 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1008 (l + 1) * sizeof (HOST_WIDE_INT));
1009 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1010 0, (l + 1 - mi->dimension_type_size_len)
1011 * sizeof (HOST_WIDE_INT));
1012 mi->dimension_type_size_len = l + 1;
1014 /* Make sure all the accesses in the same level have the same size
1016 if (!mi->dimension_type_size[l])
1017 mi->dimension_type_size[l] = type_size;
1018 else if (mi->dimension_type_size[l] != type_size)
1019 mark_min_matrix_escape_level (mi, l, stmt);
1023 /* USE_STMT represents a call_expr ,where one of the arguments is the
1024 ssa var that we want to check because it came from some use of matrix
1025 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1029 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1030 int current_indirect_level)
1032 tree call = get_call_expr_in (use_stmt);
1033 if (call && get_callee_fndecl (call))
1035 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1039 "Matrix %s: Function call %s, level %d escapes.\n",
1040 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1041 current_indirect_level);
1042 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1044 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1045 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1046 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1049 /*Record the free statements so we can delete them
1051 int l = current_indirect_level;
1053 mi->free_stmts[l].stmt = use_stmt;
1054 mi->free_stmts[l].func = current_function_decl;
1059 /* USE_STMT represents a phi node of the ssa var that we want to
1060 check because it came from some use of matrix
1062 We check all the escaping levels that get to the PHI node
1063 and make sure they are all the same escaping;
1064 if not (which is rare) we let the escaping level be the
1065 minimum level that gets into that PHI because starting from
1066 that level we cannot expect the behavior of the indirections.
1067 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1070 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1071 int current_indirect_level, sbitmap visited,
1072 bool record_accesses)
1075 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1077 tmp_maphi.phi = use_stmt;
1078 if ((maphi = htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1080 if (maphi->indirection_level == current_indirect_level)
1084 int level = MIN (maphi->indirection_level,
1085 current_indirect_level);
1089 maphi->indirection_level = level;
1090 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1092 tree def = PHI_ARG_DEF (use_stmt, j);
1094 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1095 t = SSA_NAME_DEF_STMT (def);
1097 mark_min_matrix_escape_level (mi, level, t);
1101 maphi = (struct matrix_access_phi_node *)
1102 xcalloc (1, sizeof (struct matrix_access_phi_node));
1103 maphi->phi = use_stmt;
1104 maphi->indirection_level = current_indirect_level;
1106 /* Insert to hash table. */
1107 pmaphi = (struct matrix_access_phi_node **)
1108 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1109 gcc_assert (pmaphi);
1112 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1114 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1115 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1116 current_indirect_level, false, visited,
1118 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1122 /* USE_STMT represents a modify statement (the rhs or lhs include
1123 the ssa var that we want to check because it came from some use of matrix
1125 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1128 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1129 tree use_stmt, int current_indirect_level,
1130 bool last_op, sbitmap visited,
1131 bool record_accesses)
1134 tree lhs = GIMPLE_STMT_OPERAND (use_stmt, 0);
1135 tree rhs = GIMPLE_STMT_OPERAND (use_stmt, 1);
1136 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1138 memset (&lhs_acc, 0, sizeof (lhs_acc));
1139 memset (&rhs_acc, 0, sizeof (rhs_acc));
1141 lhs_acc.ssa_var = ssa_var;
1142 lhs_acc.t_code = ERROR_MARK;
1143 ssa_accessed_in_tree (lhs, &lhs_acc);
1144 rhs_acc.ssa_var = ssa_var;
1145 rhs_acc.t_code = ERROR_MARK;
1146 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1148 /* The SSA must be either in the left side or in the right side,
1149 to understand what is happening.
1150 In case the SSA_NAME is found in both sides we should be escaping
1151 at this level because in this case we cannot calculate the
1152 address correctly. */
1153 if ((lhs_acc.var_found && rhs_acc.var_found
1154 && lhs_acc.t_code == INDIRECT_REF)
1155 || (!rhs_acc.var_found && !lhs_acc.var_found))
1157 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1158 return current_indirect_level;
1160 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1162 /* If we are storing to the matrix at some level, then mark it as
1163 escaping at that level. */
1164 if (lhs_acc.var_found)
1167 int l = current_indirect_level + 1;
1169 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1170 def = get_inner_of_cast_expr (rhs);
1171 if (TREE_CODE (def) != SSA_NAME)
1172 mark_min_matrix_escape_level (mi, l, use_stmt);
1175 def = SSA_NAME_DEF_STMT (def);
1176 analyze_matrix_allocation_site (mi, def, l, visited);
1177 if (record_accesses)
1178 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1179 NULL_TREE, l, true);
1180 update_type_size (mi, use_stmt, NULL, l);
1182 return current_indirect_level;
1184 /* Now, check the right-hand-side, to see how the SSA variable
1186 if (rhs_acc.var_found)
1188 /* If we are passing the ssa name to a function call and
1189 the pointer escapes when passed to the function
1190 (not the case of free), then we mark the matrix as
1191 escaping at this level. */
1192 if (rhs_acc.t_code == CALL_EXPR)
1194 analyze_accesses_for_call_expr (mi, use_stmt,
1195 current_indirect_level);
1197 return current_indirect_level;
1199 if (rhs_acc.t_code != INDIRECT_REF
1200 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1202 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1203 return current_indirect_level;
1205 /* If the access in the RHS has an indirection increase the
1206 indirection level. */
1207 if (rhs_acc.t_code == INDIRECT_REF)
1209 if (record_accesses)
1210 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1212 current_indirect_level, true);
1213 current_indirect_level += 1;
1215 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1217 gcc_assert (rhs_acc.second_op);
1219 /* Currently we support only one PLUS expression on the
1220 SSA_NAME that holds the base address of the current
1221 indirection level; to support more general case there
1222 is a need to hold a stack of expressions and regenerate
1223 the calculation later. */
1224 mark_min_matrix_escape_level (mi, current_indirect_level,
1231 op1 = TREE_OPERAND (rhs, 0);
1232 op2 = TREE_OPERAND (rhs, 1);
1234 op2 = (op1 == ssa_var) ? op2 : op1;
1235 if (TREE_CODE (op2) == INTEGER_CST)
1237 build_int_cst (TREE_TYPE (op1),
1238 TREE_INT_CST_LOW (op2) /
1239 int_size_in_bytes (TREE_TYPE (op1)));
1243 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1244 if (index == NULL_TREE)
1246 mark_min_matrix_escape_level (mi,
1247 current_indirect_level,
1249 return current_indirect_level;
1252 if (record_accesses)
1253 record_access_alloc_site_info (mi, use_stmt, op2,
1255 current_indirect_level, false);
1258 /* If we are storing this level of indirection mark it as
1260 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1262 int l = current_indirect_level;
1264 /* One exception is when we are storing to the matrix
1265 variable itself; this is the case of malloc, we must make
1266 sure that it's the one and only one call to malloc so
1267 we call analyze_matrix_allocation_site to check
1269 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1270 mark_min_matrix_escape_level (mi, current_indirect_level,
1274 /* Also update the escaping level. */
1275 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1276 if (record_accesses)
1277 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1278 NULL_TREE, l, true);
1283 /* We are placing it in an SSA, follow that SSA. */
1284 analyze_matrix_accesses (mi, lhs,
1285 current_indirect_level,
1286 rhs_acc.t_code == POINTER_PLUS_EXPR,
1287 visited, record_accesses);
1290 return current_indirect_level;
1293 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1294 follow its uses and level of indirection and find out the minimum
1295 indirection level it escapes in (the highest dimension) and the maximum
1296 level it is accessed in (this will be the actual dimension of the
1297 matrix). The information is accumulated in MI.
1298 We look at the immediate uses, if one escapes we finish; if not,
1299 we make a recursive call for each one of the immediate uses of the
1300 resulting SSA name. */
1302 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1303 int current_indirect_level, bool last_op,
1304 sbitmap visited, bool record_accesses)
1306 imm_use_iterator imm_iter;
1307 use_operand_p use_p;
1309 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1310 current_indirect_level);
1312 /* We don't go beyond the escaping level when we are performing the
1313 flattening. NOTE: we keep the last indirection level that doesn't
1315 if (mi->min_indirect_level_escape > -1
1316 && mi->min_indirect_level_escape <= current_indirect_level)
1319 /* Now go over the uses of the SSA_NAME and check how it is used in
1320 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1321 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1322 be any number of copies and casts. */
1323 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1327 tree use_stmt = USE_STMT (use_p);
1328 if (TREE_CODE (use_stmt) == PHI_NODE)
1329 /* We check all the escaping levels that get to the PHI node
1330 and make sure they are all the same escaping;
1331 if not (which is rare) we let the escaping level be the
1332 minimum level that gets into that PHI because starting from
1333 that level we cannot expect the behavior of the indirections. */
1335 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1336 visited, record_accesses);
1338 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1339 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1340 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1341 current_indirect_level =
1342 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1343 current_indirect_level, last_op,
1344 visited, record_accesses);
1349 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1350 the malloc size expression and check that those aren't changed
1351 over the function. */
1353 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1358 block_stmt_iterator bsi;
1361 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1364 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1366 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1368 stmt = bsi_stmt (bsi);
1369 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1371 if (GIMPLE_STMT_OPERAND (stmt, 0) == t)
1379 /* Go backwards in the use-def chains and find out the expression
1380 represented by the possible SSA name in EXPR, until it is composed
1381 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1382 we make sure that all the arguments represent the same subexpression,
1383 otherwise we fail. */
1385 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1387 tree def_stmt, op1, op2, res;
1389 switch (TREE_CODE (expr))
1392 /* Case of loop, we don't know to represent this expression. */
1393 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1396 SET_BIT (visited, SSA_NAME_VERSION (expr));
1397 def_stmt = SSA_NAME_DEF_STMT (expr);
1398 res = can_calculate_expr_before_stmt (def_stmt, visited);
1399 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1405 case POINTER_PLUS_EXPR:
1409 op1 = TREE_OPERAND (expr, 0);
1410 op2 = TREE_OPERAND (expr, 1);
1412 op1 = can_calculate_expr_before_stmt (op1, visited);
1415 op2 = can_calculate_expr_before_stmt (op2, visited);
1417 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1419 case GIMPLE_MODIFY_STMT:
1420 return can_calculate_expr_before_stmt (GIMPLE_STMT_OPERAND (expr, 1),
1427 /* Make sure all the arguments represent the same value. */
1428 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1431 tree def = PHI_ARG_DEF (expr, j);
1433 new_res = can_calculate_expr_before_stmt (def, visited);
1434 if (res == NULL_TREE)
1436 else if (!new_res || !expressions_equal_p (res, new_res))
1443 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1444 if (res != NULL_TREE)
1445 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1454 /* There should be only one allocation function for the dimensions
1455 that don't escape. Here we check the allocation sites in this
1456 function. We must make sure that all the dimensions are allocated
1457 using malloc and that the malloc size parameter expression could be
1458 pre-calculated before the call to the malloc of dimension 0.
1460 Given a candidate matrix for flattening -- MI -- check if it's
1461 appropriate for flattening -- we analyze the allocation
1462 sites that we recorded in the previous analysis. The result of the
1463 analysis is a level of indirection (matrix dimension) in which the
1464 flattening is safe. We check the following conditions:
1465 1. There is only one allocation site for each dimension.
1466 2. The allocation sites of all the dimensions are in the same
1468 (The above two are being taken care of during the analysis when
1469 we check the allocation site).
1470 3. All the dimensions that we flatten are allocated at once; thus
1471 the total size must be known before the allocation of the
1472 dimension 0 (top level) -- we must make sure we represent the
1473 size of the allocation as an expression of global parameters or
1474 constants and that those doesn't change over the function. */
1477 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1480 block_stmt_iterator bsi;
1481 basic_block bb_level_0;
1482 struct matrix_info *mi = *slot;
1485 if (!mi->malloc_for_level)
1488 visited = sbitmap_alloc (num_ssa_names);
1490 /* Do nothing if the current function is not the allocation
1492 if (mi->allocation_function_decl != current_function_decl
1493 /* We aren't in the main allocation function yet. */
1494 || !mi->malloc_for_level[0])
1497 for (level = 1; level < mi->max_malloced_level; level++)
1498 if (!mi->malloc_for_level[level])
1501 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1503 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1504 bb_level_0 = bsi.bb;
1506 /* Check if the expression of the size passed to malloc could be
1507 pre-calculated before the malloc of level 0. */
1508 for (level = 1; level < mi->min_indirect_level_escape; level++)
1510 tree call_stmt, size;
1511 struct malloc_call_data mcd;
1513 call_stmt = mi->malloc_for_level[level];
1515 /* Find the correct malloc information. */
1516 collect_data_for_malloc_call (call_stmt, &mcd);
1518 /* No need to check anticipation for constants. */
1519 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1521 if (!mi->dimension_size)
1523 mi->dimension_size =
1524 (tree *) xcalloc (mi->min_indirect_level_escape,
1526 mi->dimension_size_orig =
1527 (tree *) xcalloc (mi->min_indirect_level_escape,
1530 mi->dimension_size[level] = mcd.size_var;
1531 mi->dimension_size_orig[level] = mcd.size_var;
1534 /* ??? Here we should also add the way to calculate the size
1535 expression not only know that it is anticipated. */
1536 sbitmap_zero (visited);
1537 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1538 if (size == NULL_TREE)
1540 mark_min_matrix_escape_level (mi, level, call_stmt);
1543 "Matrix %s: Cannot calculate the size of allocation. escaping at level %d\n",
1544 get_name (mi->decl), level);
1547 if (!mi->dimension_size)
1549 mi->dimension_size =
1550 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1551 mi->dimension_size_orig =
1552 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1554 mi->dimension_size[level] = size;
1555 mi->dimension_size_orig[level] = size;
1558 /* We don't need those anymore. */
1559 for (level = mi->min_indirect_level_escape;
1560 level < mi->max_malloced_level; level++)
1561 mi->malloc_for_level[level] = NULL;
1565 /* Track all access and allocation sites. */
1567 find_sites_in_func (bool record)
1569 sbitmap visited_stmts_1;
1571 block_stmt_iterator bsi;
1574 struct matrix_info tmpmi, *mi;
1576 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1580 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1582 stmt = bsi_stmt (bsi);
1583 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1584 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == VAR_DECL)
1586 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 0);
1587 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1589 sbitmap_zero (visited_stmts_1);
1590 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1593 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1594 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME
1595 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == VAR_DECL)
1597 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 1);
1598 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1600 sbitmap_zero (visited_stmts_1);
1601 analyze_matrix_accesses (mi,
1602 GIMPLE_STMT_OPERAND (stmt, 0), 0,
1603 false, visited_stmts_1, record);
1608 sbitmap_free (visited_stmts_1);
1611 /* Traverse the use-def chains to see if there are matrices that
1612 are passed through pointers and we cannot know how they are accessed.
1613 For each SSA-name defined by a global variable of our interest,
1614 we traverse the use-def chains of the SSA and follow the indirections,
1615 and record in what level of indirection the use of the variable
1616 escapes. A use of a pointer escapes when it is passed to a function,
1617 stored into memory or assigned (except in malloc and free calls). */
1620 record_all_accesses_in_func (void)
1623 sbitmap visited_stmts_1;
1625 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1627 for (i = 0; i < num_ssa_names; i++)
1629 struct matrix_info tmpmi, *mi;
1630 tree ssa_var = ssa_name (i);
1634 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1636 rhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1637 lhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1638 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1641 /* If the RHS is a matrix that we want to analyze, follow the def-use
1642 chain for this SSA_VAR and check for escapes or apply the
1645 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1647 /* This variable will track the visited PHI nodes, so we can limit
1648 its size to the maximum number of SSA names. */
1649 sbitmap_zero (visited_stmts_1);
1650 analyze_matrix_accesses (mi, ssa_var,
1651 0, false, visited_stmts_1, true);
1655 sbitmap_free (visited_stmts_1);
1658 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1660 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1664 tree result1, ratio, log, orig_tree, new_tree;
1666 x = exact_log2 (orig);
1667 y = exact_log2 (new);
1669 if (x != -1 && y != -1)
1675 log = build_int_cst (TREE_TYPE (result), x - y);
1677 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1680 log = build_int_cst (TREE_TYPE (result), y - x);
1681 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1685 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1686 new_tree = build_int_cst (TREE_TYPE (result), new);
1687 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1688 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1694 /* We know that we are allowed to perform matrix flattening (according to the
1695 escape analysis), so we traverse the use-def chains of the SSA vars
1696 defined by the global variables pointing to the matrices of our interest.
1697 in each use of the SSA we calculate the offset from the base address
1698 according to the following equation:
1700 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1701 escaping level is m <= k, and a' is the new allocated matrix,
1702 will be translated to :
1707 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1711 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1713 block_stmt_iterator bsi;
1714 struct matrix_info *mi = *slot;
1715 int min_escape_l = mi->min_indirect_level_escape;
1716 struct access_site_info *acc_info;
1719 if (min_escape_l < 2 || !mi->access_l)
1721 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1726 /* This is possible because we collect the access sites before
1727 we determine the final minimum indirection level. */
1728 if (acc_info->level >= min_escape_l)
1733 if (acc_info->is_alloc)
1735 if (acc_info->level >= 0 && bb_for_stmt (acc_info->stmt))
1739 tree stmt = acc_info->stmt;
1741 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1742 mark_sym_for_renaming (SSA_NAME_VAR (def));
1743 bsi = bsi_for_stmt (stmt);
1744 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1745 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 0)) ==
1746 SSA_NAME && acc_info->level < min_escape_l - 1)
1748 imm_use_iterator imm_iter;
1749 use_operand_p use_p;
1752 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1753 GIMPLE_STMT_OPERAND (acc_info->stmt,
1755 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1757 tree conv, tmp, stmts;
1759 /* Emit convert statement to convert to type of use. */
1761 fold_build1 (CONVERT_EXPR,
1762 TREE_TYPE (GIMPLE_STMT_OPERAND
1763 (acc_info->stmt, 0)),
1764 TREE_OPERAND (GIMPLE_STMT_OPERAND
1765 (acc_info->stmt, 1), 0));
1767 create_tmp_var (TREE_TYPE
1768 (GIMPLE_STMT_OPERAND
1769 (acc_info->stmt, 0)), "new");
1770 add_referenced_var (tmp);
1772 fold_build2 (GIMPLE_MODIFY_STMT,
1773 TREE_TYPE (GIMPLE_STMT_OPERAND
1774 (acc_info->stmt, 0)), tmp,
1776 tmp = make_ssa_name (tmp, stmts);
1777 GIMPLE_STMT_OPERAND (stmts, 0) = tmp;
1778 bsi = bsi_for_stmt (acc_info->stmt);
1779 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1780 SET_USE (use_p, tmp);
1783 if (acc_info->level < min_escape_l - 1)
1784 bsi_remove (&bsi, true);
1789 orig = GIMPLE_STMT_OPERAND (acc_info->stmt, 1);
1790 type = TREE_TYPE (orig);
1791 if (TREE_CODE (orig) == INDIRECT_REF
1792 && acc_info->level < min_escape_l - 1)
1794 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1795 from "pointer to type" to "type". */
1797 build1 (NOP_EXPR, TREE_TYPE (orig),
1798 GIMPLE_STMT_OPERAND (orig, 0));
1799 GIMPLE_STMT_OPERAND (acc_info->stmt, 1) = orig;
1801 else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1802 && acc_info->level < (min_escape_l))
1804 imm_use_iterator imm_iter;
1805 use_operand_p use_p;
1808 int k = acc_info->level;
1809 tree num_elements, total_elements;
1811 tree d_size = mi->dimension_size[k];
1813 /* We already make sure in the analysis that the first operand
1814 is the base and the second is the offset. */
1815 offset = acc_info->offset;
1816 if (mi->dim_map[k] == min_escape_l - 1)
1818 if (!check_transpose_p || mi->is_transposed_p == false)
1823 tree d_type_size, d_type_size_k;
1825 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1826 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1829 compute_offset (mi->dimension_type_size[min_escape_l],
1830 mi->dimension_type_size[k + 1], offset);
1832 total_elements = new_offset;
1833 if (new_offset != offset)
1835 bsi = bsi_for_stmt (acc_info->stmt);
1836 tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1838 true, BSI_SAME_STMT);
1846 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1848 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1849 fold_convert (sizetype, d_size));
1850 add_referenced_var (d_size);
1851 bsi = bsi_for_stmt (acc_info->stmt);
1852 tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1853 NULL, true, BSI_SAME_STMT);
1855 /* Replace the offset if needed. */
1858 if (TREE_CODE (offset) == SSA_NAME)
1862 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1863 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1864 if (use_stmt == acc_info->stmt)
1865 SET_USE (use_p, tmp1);
1869 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1870 TREE_OPERAND (orig, 1) = tmp1;
1874 /* ??? meanwhile this happens because we record the same access
1875 site more than once; we should be using a hash table to
1876 avoid this and insert the STMT of the access site only
1879 gcc_unreachable (); */
1882 VEC_free (access_site_info_p, heap, mi->access_l);
1884 update_ssa (TODO_update_ssa);
1885 #ifdef ENABLE_CHECKING
1891 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1894 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1899 for (i = 0; i < n - 1; i++)
1901 for (j = 0; j < n - 1 - i; j++)
1903 if (a[j + 1] < a[j])
1905 tmp = a[j]; /* swap a[j] and a[j+1] */
1909 dim_map[j] = dim_map[j + 1];
1910 dim_map[j + 1] = tmp1;
1916 /* Replace multiple mallocs (one for each dimension) to one malloc
1917 with the size of DIM1*DIM2*...*DIMN*size_of_element
1918 Make sure that we hold the size in the malloc site inside a
1919 new global variable; this way we ensure that the size doesn't
1920 change and it is accessible from all the other functions that
1921 uses the matrix. Also, the original calls to free are deleted,
1922 and replaced by a new call to free the flattened matrix. */
1925 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1928 struct matrix_info *mi;
1929 tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1930 struct cgraph_node *c_node;
1931 struct cgraph_edge *e;
1932 block_stmt_iterator bsi;
1933 struct malloc_call_data mcd;
1934 HOST_WIDE_INT element_size;
1936 imm_use_iterator imm_iter;
1937 use_operand_p use_p;
1938 tree old_size_0, tmp;
1944 min_escape_l = mi->min_indirect_level_escape;
1946 if (!mi->malloc_for_level)
1947 mi->min_indirect_level_escape = 0;
1949 if (mi->min_indirect_level_escape < 2)
1952 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1953 for (i = 0; i < mi->min_indirect_level_escape; i++)
1955 if (check_transpose_p)
1961 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1962 for (i = 0; i < min_escape_l; i++)
1964 fprintf (dump_file, "dim %d before sort ", i);
1965 if (mi->dim_hot_level)
1967 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1968 mi->dim_hot_level[i]);
1971 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1972 mi->min_indirect_level_escape);
1974 for (i = 0; i < min_escape_l; i++)
1976 fprintf (dump_file, "dim %d after sort\n", i);
1977 if (mi->dim_hot_level)
1978 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1979 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1981 for (i = 0; i < mi->min_indirect_level_escape; i++)
1984 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1986 if (mi->dim_map[i] != i)
1990 "Transposed dimensions: dim %d is now dim %d\n",
1992 mi->is_transposed_p = true;
1998 for (i = 0; i < mi->min_indirect_level_escape; i++)
2001 /* Call statement of allocation site of level 0. */
2002 call_stmt_0 = mi->malloc_for_level[0];
2004 /* Finds the correct malloc information. */
2005 collect_data_for_malloc_call (call_stmt_0, &mcd);
2007 mi->dimension_size[0] = mcd.size_var;
2008 mi->dimension_size_orig[0] = mcd.size_var;
2009 /* Make sure that the variables in the size expression for
2010 all the dimensions (above level 0) aren't modified in
2011 the allocation function. */
2012 for (i = 1; i < mi->min_indirect_level_escape; i++)
2016 /* mi->dimension_size must contain the expression of the size calculated
2017 in check_allocation_function. */
2018 gcc_assert (mi->dimension_size[i]);
2020 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2021 check_var_notmodified_p,
2022 mi->allocation_function_decl);
2025 mark_min_matrix_escape_level (mi, i, t);
2030 if (mi->min_indirect_level_escape < 2)
2033 /* Since we should make sure that the size expression is available
2034 before the call to malloc of level 0. */
2035 bsi = bsi_for_stmt (call_stmt_0);
2037 /* Find out the size of each dimension by looking at the malloc
2038 sites and create a global variable to hold it.
2039 We add the assignment to the global before the malloc of level 0. */
2041 /* To be able to produce gimple temporaries. */
2042 oldfn = current_function_decl;
2043 current_function_decl = mi->allocation_function_decl;
2044 cfun = DECL_STRUCT_FUNCTION (mi->allocation_function_decl);
2046 /* Set the dimension sizes as follows:
2047 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2048 where n is the maximum non escaping level. */
2049 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2050 prev_dim_size = NULL_TREE;
2052 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2054 tree dim_size, dim_var, tmp;
2057 /* Now put the size expression in a global variable and initialize it to
2058 the size expression before the malloc of level 0. */
2060 add_new_static_var (TREE_TYPE
2061 (mi->dimension_size_orig[mi->dim_map[i]]));
2062 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2064 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2065 /* Find which dim ID becomes dim I. */
2066 for (id = 0; id < mi->min_indirect_level_escape; id++)
2067 if (mi->dim_map[id] == i)
2070 build_int_cst (type, mi->dimension_type_size[id + 1]);
2072 prev_dim_size = build_int_cst (type, element_size);
2073 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2075 dim_size = mi->dimension_size_orig[id];
2080 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2083 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2085 dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2086 true, BSI_SAME_STMT);
2087 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2088 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2089 GIMPLE_STMT_OPERAND (tmp, 0) = dim_var;
2090 mark_symbols_for_renaming (tmp);
2091 bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2093 prev_dim_size = mi->dimension_size[i] = dim_var;
2095 update_ssa (TODO_update_ssa);
2096 /* Replace the malloc size argument in the malloc of level 0 to be
2097 the size of all the dimensions. */
2098 malloc_stmt = GIMPLE_STMT_OPERAND (call_stmt_0, 1);
2099 c_node = cgraph_node (mi->allocation_function_decl);
2100 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2101 tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2102 NULL, true, BSI_SAME_STMT);
2103 if (TREE_CODE (old_size_0) == SSA_NAME)
2105 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2106 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2107 if (use_stmt == call_stmt_0)
2108 SET_USE (use_p, tmp);
2110 /* When deleting the calls to malloc we need also to remove the edge from
2111 the call graph to keep it consistent. Notice that cgraph_edge may
2112 create a new node in the call graph if there is no node for the given
2113 declaration; this shouldn't be the case but currently there is no way to
2114 check this outside of "cgraph.c". */
2115 for (i = 1; i < mi->min_indirect_level_escape; i++)
2117 block_stmt_iterator bsi;
2118 tree use_stmt1 = NULL;
2121 tree call_stmt = mi->malloc_for_level[i];
2122 call = GIMPLE_STMT_OPERAND (call_stmt, 1);
2123 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2124 e = cgraph_edge (c_node, call_stmt);
2126 cgraph_remove_edge (e);
2127 bsi = bsi_for_stmt (call_stmt);
2128 /* Remove the call stmt. */
2129 bsi_remove (&bsi, true);
2130 /* remove the type cast stmt. */
2131 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2132 GIMPLE_STMT_OPERAND (call_stmt, 0))
2134 use_stmt1 = use_stmt;
2135 bsi = bsi_for_stmt (use_stmt);
2136 bsi_remove (&bsi, true);
2138 /* Remove the assignment of the allocated area. */
2139 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2140 GIMPLE_STMT_OPERAND (use_stmt1, 0))
2142 bsi = bsi_for_stmt (use_stmt);
2143 bsi_remove (&bsi, true);
2146 update_ssa (TODO_update_ssa);
2147 #ifdef ENABLE_CHECKING
2150 /* Delete the calls to free. */
2151 for (i = 1; i < mi->min_indirect_level_escape; i++)
2153 block_stmt_iterator bsi;
2156 /* ??? wonder why this case is possible but we failed on it once. */
2157 if (!mi->free_stmts[i].stmt)
2160 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2161 c_node = cgraph_node (mi->free_stmts[i].func);
2163 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2164 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2166 cgraph_remove_edge (e);
2167 current_function_decl = mi->free_stmts[i].func;
2168 cfun = DECL_STRUCT_FUNCTION (mi->free_stmts[i].func);
2169 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2170 bsi_remove (&bsi, true);
2172 /* Return to the previous situation. */
2173 current_function_decl = oldfn;
2174 cfun = oldfn ? DECL_STRUCT_FUNCTION (oldfn) : NULL;
2180 /* Print out the results of the escape analysis. */
2182 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2184 struct matrix_info *mi = *slot;
2188 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2189 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2190 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2191 fprintf (dump_file, "\n");
2192 if (mi->min_indirect_level_escape >= 2)
2193 fprintf (dump_file, "Flattened %d dimensions \n",
2194 mi->min_indirect_level_escape);
2199 /* Perform matrix flattening. */
2204 struct cgraph_node *node;
2207 check_transpose_p = true;
2209 check_transpose_p = false;
2210 /* If there are hand written vectors, we skip this optimization. */
2211 for (node = cgraph_nodes; node; node = node->next)
2212 if (!may_flatten_matrices (node))
2214 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2215 /* Find and record all potential matrices in the program. */
2216 find_matrices_decl ();
2217 /* Analyze the accesses of the matrices (escaping analysis). */
2218 for (node = cgraph_nodes; node; node = node->next)
2223 temp_fn = current_function_decl;
2224 current_function_decl = node->decl;
2225 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2226 bitmap_obstack_initialize (NULL);
2227 tree_register_cfg_hooks ();
2229 if (!gimple_in_ssa_p (cfun))
2231 free_dominance_info (CDI_DOMINATORS);
2232 free_dominance_info (CDI_POST_DOMINATORS);
2234 current_function_decl = temp_fn;
2239 #ifdef ENABLE_CHECKING
2240 verify_flow_info ();
2243 if (!matrices_to_reorg)
2245 free_dominance_info (CDI_DOMINATORS);
2246 free_dominance_info (CDI_POST_DOMINATORS);
2248 current_function_decl = temp_fn;
2253 /* Create htap for phi nodes. */
2254 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2255 mat_acc_phi_eq, free);
2256 if (!check_transpose_p)
2257 find_sites_in_func (false);
2260 find_sites_in_func (true);
2261 loop_optimizer_init (LOOPS_NORMAL);
2264 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2268 loop_optimizer_finalize ();
2269 current_loops = NULL;
2272 /* If the current function is the allocation function for any of
2273 the matrices we check its allocation and the escaping level. */
2274 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2275 free_dominance_info (CDI_DOMINATORS);
2276 free_dominance_info (CDI_POST_DOMINATORS);
2278 current_function_decl = temp_fn;
2280 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2281 /* Now transform the accesses. */
2282 for (node = cgraph_nodes; node; node = node->next)
2285 /* Remember that allocation sites have been handled. */
2288 temp_fn = current_function_decl;
2289 current_function_decl = node->decl;
2290 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2291 bitmap_obstack_initialize (NULL);
2292 tree_register_cfg_hooks ();
2293 record_all_accesses_in_func ();
2294 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2295 free_dominance_info (CDI_DOMINATORS);
2296 free_dominance_info (CDI_POST_DOMINATORS);
2298 current_function_decl = temp_fn;
2300 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2302 current_function_decl = NULL;
2304 matrices_to_reorg = NULL;
2309 /* The condition for matrix flattening to be performed. */
2311 gate_matrix_reorg (void)
2313 return flag_ipa_matrix_reorg /*&& flag_whole_program */ ;
2316 struct tree_opt_pass pass_ipa_matrix_reorg = {
2317 "matrix-reorg", /* name */
2318 gate_matrix_reorg, /* gate */
2319 matrix_reorg, /* execute */
2322 0, /* static_pass_number */
2324 0, /* properties_required */
2325 PROP_trees, /* properties_provided */
2326 0, /* properties_destroyed */
2327 0, /* todo_flags_start */
2328 TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */