1 /* Gimple Represented as Polyhedra.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@inria.fr>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass converts GIMPLE to GRAPHITE, performs some loop
22 transformations and then converts the resulting representation back
25 An early description of this pass can be found in the GCC Summit'06
26 paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC".
27 The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to
30 One important document to read is CLooG's internal manual:
31 http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD
32 that describes the data structure of loops used in this file, and
33 the functions that are used for transforming the code. */
37 #include "coretypes.h"
42 #include "basic-block.h"
43 #include "diagnostic.h"
44 #include "tree-flow.h"
46 #include "tree-dump.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
54 #include "value-prof.h"
55 #include "pointer-set.h"
59 #include "cloog/cloog.h"
62 static VEC (scop_p, heap) *current_scops;
64 /* Converts a GMP constant V to a tree and returns it. */
67 gmp_cst_to_tree (Value v)
69 return build_int_cst (integer_type_node, value_get_si (v));
72 /* Debug the list of old induction variables for this SCOP. */
75 debug_oldivs (scop_p scop)
80 fprintf (stderr, "Old IVs:");
82 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++)
84 fprintf (stderr, "(");
85 print_generic_expr (stderr, oldiv->t, 0);
86 fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num);
88 fprintf (stderr, "\n");
91 /* Debug the loops around basic block GB. */
94 debug_loop_vec (graphite_bb_p gb)
99 fprintf (stderr, "Loop Vec:");
101 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
102 fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1);
104 fprintf (stderr, "\n");
107 /* Returns true if stack ENTRY is a constant. */
110 iv_stack_entry_is_constant (iv_stack_entry *entry)
112 return entry->kind == iv_stack_entry_const;
115 /* Returns true if stack ENTRY is an induction variable. */
118 iv_stack_entry_is_iv (iv_stack_entry *entry)
120 return entry->kind == iv_stack_entry_iv;
123 /* Push (IV, NAME) on STACK. */
126 loop_iv_stack_push_iv (loop_iv_stack stack, tree iv, const char *name)
128 iv_stack_entry *entry = XNEW (iv_stack_entry);
129 name_tree named_iv = XNEW (struct name_tree);
132 named_iv->name = name;
134 entry->kind = iv_stack_entry_iv;
135 entry->data.iv = named_iv;
137 VEC_safe_push (iv_stack_entry_p, heap, *stack, entry);
140 /* Inserts a CONSTANT in STACK at INDEX. */
143 loop_iv_stack_insert_constant (loop_iv_stack stack, int index,
146 iv_stack_entry *entry = XNEW (iv_stack_entry);
148 entry->kind = iv_stack_entry_const;
149 entry->data.constant = constant;
151 VEC_safe_insert (iv_stack_entry_p, heap, *stack, index, entry);
154 /* Pops and frees an element out of STACK. */
157 loop_iv_stack_pop (loop_iv_stack stack)
159 iv_stack_entry_p entry = VEC_pop (iv_stack_entry_p, *stack);
161 free (entry->data.iv);
165 /* Get the IV at INDEX in STACK. */
168 loop_iv_stack_get_iv (loop_iv_stack stack, int index)
170 iv_stack_entry_p entry = VEC_index (iv_stack_entry_p, *stack, index);
171 iv_stack_entry_data data = entry->data;
173 return iv_stack_entry_is_iv (entry) ? data.iv->t : data.constant;
176 /* Get the IV from its NAME in STACK. */
179 loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name)
182 iv_stack_entry_p entry;
184 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
186 name_tree iv = entry->data.iv;
187 if (!strcmp (name, iv->name))
194 /* Prints on stderr the contents of STACK. */
197 debug_loop_iv_stack (loop_iv_stack stack)
200 iv_stack_entry_p entry;
203 fprintf (stderr, "(");
205 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
210 fprintf (stderr, " ");
212 if (iv_stack_entry_is_iv (entry))
214 name_tree iv = entry->data.iv;
215 fprintf (stderr, "%s:", iv->name);
216 print_generic_expr (stderr, iv->t, 0);
220 tree constant = entry->data.constant;
221 print_generic_expr (stderr, constant, 0);
222 fprintf (stderr, ":");
223 print_generic_expr (stderr, constant, 0);
227 fprintf (stderr, ")\n");
233 free_loop_iv_stack (loop_iv_stack stack)
236 iv_stack_entry_p entry;
238 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++)
240 free (entry->data.iv);
244 VEC_free (iv_stack_entry_p, heap, *stack);
247 /* Inserts constants derived from the USER_STMT argument list into the
248 STACK. This is needed to map old ivs to constants when loops have
252 loop_iv_stack_patch_for_consts (loop_iv_stack stack,
253 struct clast_user_stmt *user_stmt)
255 struct clast_stmt *t;
257 for (t = user_stmt->substitutions; t; t = t->next)
259 struct clast_term *term = (struct clast_term*)
260 ((struct clast_assignment *)t)->RHS;
262 /* FIXME: What should be done with expr_bin, expr_red? */
263 if (((struct clast_assignment *)t)->RHS->type == expr_term
266 tree value = gmp_cst_to_tree (term->val);
267 loop_iv_stack_insert_constant (stack, index, value);
273 /* Removes all constants in the iv STACK. */
276 loop_iv_stack_remove_constants (loop_iv_stack stack)
279 iv_stack_entry *entry;
281 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry);)
283 if (iv_stack_entry_is_constant (entry))
285 free (VEC_index (iv_stack_entry_p, *stack, i));
286 VEC_ordered_remove (iv_stack_entry_p, *stack, i);
293 /* Returns a new loop_to_cloog_loop_str structure. */
295 static inline struct loop_to_cloog_loop_str *
296 new_loop_to_cloog_loop_str (int loop_num,
298 CloogLoop *cloog_loop)
300 struct loop_to_cloog_loop_str *result;
302 result = XNEW (struct loop_to_cloog_loop_str);
303 result->loop_num = loop_num;
304 result->cloog_loop = cloog_loop;
305 result->loop_position = loop_position;
310 /* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */
313 hash_loop_to_cloog_loop (const void *elt)
315 return ((const struct loop_to_cloog_loop_str *) elt)->loop_num;
318 /* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */
321 eq_loop_to_cloog_loop (const void *el1, const void *el2)
323 const struct loop_to_cloog_loop_str *elt1, *elt2;
325 elt1 = (const struct loop_to_cloog_loop_str *) el1;
326 elt2 = (const struct loop_to_cloog_loop_str *) el2;
327 return elt1->loop_num == elt2->loop_num;
330 /* Compares two graphite bbs and returns an integer less than, equal to, or
331 greater than zero if the first argument is considered to be respectively
332 less than, equal to, or greater than the second.
333 We compare using the lexicographic order of the static schedules. */
336 gbb_compare (const void *p_1, const void *p_2)
338 const struct graphite_bb *const gbb_1
339 = *(const struct graphite_bb *const*) p_1;
340 const struct graphite_bb *const gbb_2
341 = *(const struct graphite_bb *const*) p_2;
343 return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1),
344 gbb_nb_loops (gbb_1) + 1,
345 GBB_STATIC_SCHEDULE (gbb_2),
346 gbb_nb_loops (gbb_2) + 1);
349 /* Sort graphite bbs in SCOP. */
352 graphite_sort_gbbs (scop_p scop)
354 VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop);
356 qsort (VEC_address (graphite_bb_p, bbs),
357 VEC_length (graphite_bb_p, bbs),
358 sizeof (graphite_bb_p), gbb_compare);
361 /* Dump conditions of a graphite basic block GBB on FILE. */
364 dump_gbb_conditions (FILE *file, graphite_bb_p gbb)
368 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb);
370 if (VEC_empty (gimple, conditions))
373 fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index);
375 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
376 print_gimple_stmt (file, stmt, 0, 0);
378 fprintf (file, "}\n");
381 /* Converts the graphite scheduling function into a cloog scattering
382 matrix. This scattering matrix is used to limit the possible cloog
383 output to valid programs in respect to the scheduling function.
385 SCATTERING_DIMENSIONS specifies the dimensionality of the scattering
386 matrix. CLooG 0.14.0 and previous versions require, that all scattering
387 functions of one CloogProgram have the same dimensionality, therefore we
388 allow to specify it. (Should be removed in future versions) */
391 schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions)
394 scop_p scop = GBB_SCOP (gb);
396 int nb_iterators = gbb_nb_loops (gb);
398 /* The cloog scattering matrix consists of these colums:
400 scattering_dimensions cols = Scattering dimensions,
401 nb_iterators cols = bb's iterators,
402 scop_nb_params cols = Parameters,
407 scattering_dimensions = 5
417 s1 s2 s3 s4 s5 i p1 p2 1
418 1 0 0 0 0 0 0 0 -4 = 0
419 0 1 0 0 0 -1 0 0 0 = 0
420 0 0 1 0 0 0 0 0 -5 = 0 */
421 int nb_params = scop_nb_params (scop);
422 int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1;
423 int col_const = nb_cols - 1;
424 int col_iter_offset = 1 + scattering_dimensions;
426 CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols);
428 gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1);
430 /* Initialize the identity matrix. */
431 for (i = 0; i < scattering_dimensions; i++)
432 value_set_si (scat->p[i][i + 1], 1);
434 /* Textual order outside the first loop */
435 value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]);
437 /* For all surrounding loops. */
438 for (i = 0; i < nb_iterators; i++)
440 int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1];
442 /* Iterations of this loop. */
443 value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1);
445 /* Textual order inside this loop. */
446 value_set_si (scat->p[2 * i + 2][col_const], -schedule);
452 /* Print the schedules of GB to FILE with INDENT white spaces before.
453 VERBOSITY determines how verbose the code pretty printers are. */
456 print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity)
458 CloogMatrix *scattering;
461 fprintf (file, "\nGBB (\n");
463 print_loops_bb (file, GBB_BB (gb), indent+2, verbosity);
467 fprintf (file, " (domain: \n");
468 cloog_matrix_print (file, GBB_DOMAIN (gb));
469 fprintf (file, " )\n");
472 if (GBB_STATIC_SCHEDULE (gb))
474 fprintf (file, " (static schedule: ");
475 print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb),
476 gbb_nb_loops (gb) + 1);
477 fprintf (file, " )\n");
482 fprintf (file, " (contained loops: \n");
483 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++)
485 fprintf (file, " iterator %d => NULL \n", i);
487 fprintf (file, " iterator %d => loop %d \n", i,
489 fprintf (file, " )\n");
492 if (GBB_DATA_REFS (gb))
493 dump_data_references (file, GBB_DATA_REFS (gb));
495 if (GBB_CONDITIONS (gb))
497 fprintf (file, " (conditions: \n");
498 dump_gbb_conditions (file, gb);
499 fprintf (file, " )\n");
503 && GBB_STATIC_SCHEDULE (gb))
505 fprintf (file, " (scattering: \n");
506 scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1);
507 cloog_matrix_print (file, scattering);
508 cloog_matrix_free (scattering);
509 fprintf (file, " )\n");
512 fprintf (file, ")\n");
515 /* Print to STDERR the schedules of GB with VERBOSITY level. */
518 debug_gbb (graphite_bb_p gb, int verbosity)
520 print_graphite_bb (stderr, gb, 0, verbosity);
524 /* Print SCOP to FILE. VERBOSITY determines how verbose the pretty
528 print_scop (FILE *file, scop_p scop, int verbosity)
533 fprintf (file, "\nSCoP_%d_%d (\n",
534 SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index);
536 fprintf (file, " (cloog: \n");
537 cloog_program_print (file, SCOP_PROG (scop));
538 fprintf (file, " )\n");
545 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
546 print_graphite_bb (file, gb, 0, verbosity);
549 fprintf (file, ")\n");
552 /* Print all the SCOPs to FILE. VERBOSITY determines how verbose the
553 code pretty printers are. */
556 print_scops (FILE *file, int verbosity)
561 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
562 print_scop (file, scop, verbosity);
565 /* Debug SCOP. VERBOSITY determines how verbose the code pretty
569 debug_scop (scop_p scop, int verbosity)
571 print_scop (stderr, scop, verbosity);
574 /* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how
575 verbose the code pretty printers are. */
578 debug_scops (int verbosity)
580 print_scops (stderr, verbosity);
583 /* Pretty print to FILE the SCOP in DOT format. */
586 dot_scop_1 (FILE *file, scop_p scop)
591 basic_block entry = SCOP_ENTRY (scop);
592 basic_block exit = SCOP_EXIT (scop);
594 fprintf (file, "digraph SCoP_%d_%d {\n", entry->index,
600 fprintf (file, "%d [shape=triangle];\n", bb->index);
603 fprintf (file, "%d [shape=box];\n", bb->index);
605 if (bb_in_scop_p (bb, scop))
606 fprintf (file, "%d [color=red];\n", bb->index);
608 FOR_EACH_EDGE (e, ei, bb->succs)
609 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
612 fputs ("}\n\n", file);
615 /* Display SCOP using dotty. */
618 dot_scop (scop_p scop)
620 dot_scop_1 (stderr, scop);
623 /* Pretty print all SCoPs in DOT format and mark them with different colors.
624 If there are not enough colors, paint later SCoPs gray.
626 - "*" after the node number: entry of a SCoP,
627 - "#" after the node number: exit of a SCoP,
628 - "()" entry or exit not part of SCoP. */
631 dot_all_scops_1 (FILE *file)
640 /* Disable debugging while printing graph. */
641 int tmp_dump_flags = dump_flags;
644 fprintf (file, "digraph all {\n");
648 int part_of_scop = false;
650 /* Use HTML for every bb label. So we are able to print bbs
651 which are part of two different SCoPs, with two different
652 background colors. */
653 fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ",
655 fprintf (file, "CELLSPACING=\"0\">\n");
657 /* Select color for SCoP. */
658 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
659 if (bb_in_scop_p (bb, scop)
660 || (SCOP_EXIT (scop) == bb)
661 || (SCOP_ENTRY (scop) == bb))
720 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color);
722 if (!bb_in_scop_p (bb, scop))
723 fprintf (file, " (");
725 if (bb == SCOP_ENTRY (scop)
726 && bb == SCOP_EXIT (scop))
727 fprintf (file, " %d*# ", bb->index);
728 else if (bb == SCOP_ENTRY (scop))
729 fprintf (file, " %d* ", bb->index);
730 else if (bb == SCOP_EXIT (scop))
731 fprintf (file, " %d# ", bb->index);
733 fprintf (file, " %d ", bb->index);
735 if (!bb_in_scop_p (bb, scop))
738 fprintf (file, "</TD></TR>\n");
744 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">");
745 fprintf (file, " %d </TD></TR>\n", bb->index);
748 fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n");
753 FOR_EACH_EDGE (e, ei, bb->succs)
754 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index);
757 fputs ("}\n\n", file);
759 /* Enable debugging again. */
760 dump_flags = tmp_dump_flags;
763 /* Display all SCoPs using dotty. */
768 /* When debugging, enable the following code. This cannot be used
769 in production compilers because it calls "system". */
771 FILE *stream = fopen ("/tmp/allscops.dot", "w");
774 dot_all_scops_1 (stream);
777 system ("dotty /tmp/allscops.dot");
779 dot_all_scops_1 (stderr);
783 /* Returns the outermost loop in SCOP that contains BB. */
786 outermost_loop_in_scop (scop_p scop, basic_block bb)
790 nest = bb->loop_father;
791 while (loop_outer (nest) && loop_in_scop_p (loop_outer (nest), scop))
792 nest = loop_outer (nest);
797 /* Returns the block preceding the entry of SCOP. */
800 block_before_scop (scop_p scop)
802 return SESE_ENTRY (SCOP_REGION (scop))->src;
805 /* Return true when EXPR is an affine function in LOOP with parameters
806 instantiated relative to SCOP_ENTRY. */
809 loop_affine_expr (basic_block scop_entry, struct loop *loop, tree expr)
812 tree scev = analyze_scalar_evolution (loop, expr);
814 scev = instantiate_scev (scop_entry, loop, scev);
816 return (evolution_function_is_invariant_p (scev, n)
817 || evolution_function_is_affine_multivariate_p (scev, n));
820 /* Return false if the tree_code of the operand OP or any of its operands
824 exclude_component_ref (tree op)
831 if (TREE_CODE (op) == COMPONENT_REF)
835 len = TREE_OPERAND_LENGTH (op);
836 for (i = 0; i < len; ++i)
838 if (!exclude_component_ref (TREE_OPERAND (op, i)))
847 /* Return true if the operand OP is simple. */
850 is_simple_operand (loop_p loop, gimple stmt, tree op)
852 /* It is not a simple operand when it is a declaration, */
854 /* or a structure, */
855 || AGGREGATE_TYPE_P (TREE_TYPE (op))
856 /* or a memory access that cannot be analyzed by the data
857 reference analysis. */
858 || ((handled_component_p (op) || INDIRECT_REF_P (op))
859 && !stmt_simple_memref_p (loop, stmt, op)))
862 return exclude_component_ref (op);
865 /* Return true only when STMT is simple enough for being handled by
866 Graphite. This depends on SCOP_ENTRY, as the parametetrs are
867 initialized relatively to this basic block. */
870 stmt_simple_for_scop_p (basic_block scop_entry, gimple stmt)
872 basic_block bb = gimple_bb (stmt);
873 struct loop *loop = bb->loop_father;
875 /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects.
876 Calls have side-effects, except those to const or pure
878 if (gimple_has_volatile_ops (stmt)
879 || (gimple_code (stmt) == GIMPLE_CALL
880 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
881 || (gimple_code (stmt) == GIMPLE_ASM))
884 switch (gimple_code (stmt))
894 enum tree_code code = gimple_cond_code (stmt);
896 /* We can only handle this kind of conditional expressions.
897 For inequalities like "if (i != 3 * k)" we need unions of
898 polyhedrons. Expressions like "if (a)" or "if (a == 15)" need
899 them for the else branch. */
900 if (!(code == LT_EXPR
909 FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
910 if (!loop_affine_expr (scop_entry, loop, op))
918 enum tree_code code = gimple_assign_rhs_code (stmt);
920 switch (get_gimple_rhs_class (code))
922 case GIMPLE_UNARY_RHS:
923 case GIMPLE_SINGLE_RHS:
924 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
925 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));
927 case GIMPLE_BINARY_RHS:
928 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
929 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
930 && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
932 case GIMPLE_INVALID_RHS:
941 size_t n = gimple_call_num_args (stmt);
942 tree lhs = gimple_call_lhs (stmt);
944 for (i = 0; i < n; i++)
946 tree arg = gimple_call_arg (stmt, i);
948 if (!(is_simple_operand (loop, stmt, lhs)
949 && is_simple_operand (loop, stmt, arg)))
957 /* These nodes cut a new scope. */
964 /* Returns the statement of BB that contains a harmful operation: that
965 can be a function call with side effects, the induction variables
966 are not linear with respect to SCOP_ENTRY, etc. The current open
967 scop should end before this statement. */
970 harmful_stmt_in_bb (basic_block scop_entry, basic_block bb)
972 gimple_stmt_iterator gsi;
974 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
975 if (!stmt_simple_for_scop_p (scop_entry, gsi_stmt (gsi)))
976 return gsi_stmt (gsi);
981 /* Returns true when BB will be represented in graphite. Return false
982 for the basic blocks that contain code eliminated in the code
983 generation pass: i.e. induction variables and exit conditions. */
986 graphite_stmt_p (scop_p scop, basic_block bb,
987 VEC (data_reference_p, heap) *drs)
989 gimple_stmt_iterator gsi;
990 loop_p loop = bb->loop_father;
992 if (VEC_length (data_reference_p, drs) > 0)
995 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
997 gimple stmt = gsi_stmt (gsi);
999 switch (gimple_code (stmt))
1001 /* Control flow expressions can be ignored, as they are
1002 represented in the iteration domains and will be
1003 regenerated by graphite. */
1011 tree var = gimple_assign_lhs (stmt);
1012 var = analyze_scalar_evolution (loop, var);
1013 var = instantiate_scev (block_before_scop (scop), loop, var);
1015 if (chrec_contains_undetermined (var))
1029 /* Store the GRAPHITE representation of BB. */
1032 new_graphite_bb (scop_p scop, basic_block bb)
1034 struct graphite_bb *gbb;
1035 VEC (data_reference_p, heap) *drs = VEC_alloc (data_reference_p, heap, 5);
1036 struct loop *nest = outermost_loop_in_scop (scop, bb);
1037 gimple_stmt_iterator gsi;
1039 bitmap_set_bit (SCOP_BBS_B (scop), bb->index);
1041 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1042 find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs);
1044 if (!graphite_stmt_p (scop, bb, drs))
1046 free_data_refs (drs);
1050 gbb = XNEW (struct graphite_bb);
1053 GBB_SCOP (gbb) = scop;
1054 GBB_DATA_REFS (gbb) = drs;
1055 GBB_DOMAIN (gbb) = NULL;
1056 GBB_CONDITIONS (gbb) = NULL;
1057 GBB_CONDITION_CASES (gbb) = NULL;
1058 GBB_LOOPS (gbb) = NULL;
1059 VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb);
1065 free_graphite_bb (struct graphite_bb *gbb)
1067 if (GBB_DOMAIN (gbb))
1068 cloog_matrix_free (GBB_DOMAIN (gbb));
1070 VEC_free (gimple, heap, GBB_CONDITIONS (gbb));
1071 VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb));
1072 VEC_free (loop_p, heap, GBB_LOOPS (gbb));
1073 GBB_BB (gbb)->aux = 0;
1077 /* Creates a new scop starting with ENTRY. */
1080 new_scop (edge entry, edge exit)
1082 scop_p scop = XNEW (struct scop);
1084 gcc_assert (entry && exit);
1086 SCOP_REGION (scop) = XNEW (struct sese);
1087 SESE_ENTRY (SCOP_REGION (scop)) = entry;
1088 SESE_EXIT (SCOP_REGION (scop)) = exit;
1089 SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3);
1090 SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3);
1091 SCOP_BBS_B (scop) = BITMAP_ALLOC (NULL);
1092 SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL);
1093 SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3);
1094 SCOP_ADD_PARAMS (scop) = true;
1095 SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3);
1096 SCOP_PROG (scop) = cloog_program_malloc ();
1097 cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ());
1098 SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop,
1099 eq_loop_to_cloog_loop,
1107 free_scop (scop_p scop)
1111 struct graphite_bb *gb;
1114 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
1115 free_graphite_bb (gb);
1117 VEC_free (graphite_bb_p, heap, SCOP_BBS (scop));
1118 BITMAP_FREE (SCOP_BBS_B (scop));
1119 BITMAP_FREE (SCOP_LOOPS (scop));
1120 VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop));
1122 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++)
1124 VEC_free (name_tree, heap, SCOP_OLDIVS (scop));
1126 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
1129 VEC_free (name_tree, heap, SCOP_PARAMS (scop));
1130 cloog_program_free (SCOP_PROG (scop));
1131 htab_delete (SCOP_LOOP2CLOOG_LOOP (scop));
1132 XDELETE (SCOP_REGION (scop));
1136 /* Deletes all scops in SCOPS. */
1139 free_scops (VEC (scop_p, heap) *scops)
1144 for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++)
1147 VEC_free (scop_p, heap, scops);
1150 typedef enum gbb_type {
1152 GBB_LOOP_SING_EXIT_HEADER,
1153 GBB_LOOP_MULT_EXIT_HEADER,
1160 /* Detect the type of BB. Loop headers are only marked, if they are
1161 new. This means their loop_father is different to LAST_LOOP.
1162 Otherwise they are treated like any other bb and their type can be
1166 get_bb_type (basic_block bb, struct loop *last_loop)
1168 VEC (basic_block, heap) *dom;
1170 struct loop *loop = bb->loop_father;
1172 /* Check, if we entry into a new loop. */
1173 if (loop != last_loop)
1175 if (single_exit (loop) != NULL)
1176 return GBB_LOOP_SING_EXIT_HEADER;
1177 else if (loop->num != 0)
1178 return GBB_LOOP_MULT_EXIT_HEADER;
1180 return GBB_COND_HEADER;
1183 dom = get_dominated_by (CDI_DOMINATORS, bb);
1184 nb_dom = VEC_length (basic_block, dom);
1185 VEC_free (basic_block, heap, dom);
1190 nb_suc = VEC_length (edge, bb->succs);
1192 if (nb_dom == 1 && nb_suc == 1)
1195 return GBB_COND_HEADER;
1198 /* A SCoP detection region, defined using bbs as borders.
1199 All control flow touching this region, comes in passing basic_block ENTRY and
1200 leaves passing basic_block EXIT. By using bbs instead of edges for the
1201 borders we are able to represent also regions that do not have a single
1203 But as they have a single entry basic_block and a single exit basic_block, we
1204 are able to generate for every sd_region a single entry and exit edge.
1211 / \ This region contains: {3, 4, 5, 6, 7, 8}
1219 typedef struct sd_region_p
1221 /* The entry bb dominates all bbs in the sd_region. It is part of the
1225 /* The exit bb postdominates all bbs in the sd_region, but is not
1226 part of the region. */
1230 DEF_VEC_O(sd_region);
1231 DEF_VEC_ALLOC_O(sd_region, heap);
1234 /* Moves the scops from SOURCE to TARGET and clean up SOURCE. */
1237 move_sd_regions (VEC (sd_region, heap) **source, VEC (sd_region, heap) **target)
1242 for (i = 0; VEC_iterate (sd_region, *source, i, s); i++)
1243 VEC_safe_push (sd_region, heap, *target, s);
1245 VEC_free (sd_region, heap, *source);
1248 /* Store information needed by scopdet_* functions. */
1252 /* Where the last open scop would stop if the current BB is harmful. */
1255 /* Where the next scop would start if the current BB is harmful. */
1258 /* The bb or one of its children contains open loop exits. That means
1259 loop exit nodes that are not surrounded by a loop dominated by bb. */
1262 /* The bb or one of its children contains only structures we can handle. */
1267 static struct scopdet_info build_scops_1 (basic_block, VEC (sd_region, heap) **,
1270 /* Calculates BB infos. If bb is difficult we add valid SCoPs dominated by BB
1271 to SCOPS. TYPE is the gbb_type of BB. */
1273 static struct scopdet_info
1274 scopdet_basic_block_info (basic_block bb, VEC (sd_region, heap) **scops,
1277 struct loop *loop = bb->loop_father;
1278 struct scopdet_info result;
1281 /* XXX: ENTRY_BLOCK_PTR could be optimized in later steps. */
1282 stmt = harmful_stmt_in_bb (ENTRY_BLOCK_PTR, bb);
1283 result.difficult = (stmt != NULL);
1290 result.exits = false;
1295 result.next = single_succ (bb);
1296 result.exits = false;
1300 case GBB_LOOP_SING_EXIT_HEADER:
1302 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap,3);
1303 struct scopdet_info sinfo;
1305 sinfo = build_scops_1 (bb, &tmp_scops, loop);
1307 result.last = single_exit (bb->loop_father)->src;
1308 result.next = single_exit (bb->loop_father)->dest;
1310 /* If we do not dominate result.next, remove it. It's either
1311 the EXIT_BLOCK_PTR, or another bb dominates it and will
1312 call the scop detection for this bb. */
1313 if (!dominated_by_p (CDI_DOMINATORS, result.next, bb))
1316 if (result.last->loop_father != loop)
1319 if (TREE_CODE (number_of_latch_executions (loop))
1321 result.difficult = true;
1323 if (sinfo.difficult)
1324 move_sd_regions (&tmp_scops, scops);
1326 VEC_free (sd_region, heap, tmp_scops);
1328 result.exits = false;
1329 result.difficult |= sinfo.difficult;
1333 case GBB_LOOP_MULT_EXIT_HEADER:
1335 /* XXX: For now we just do not join loops with multiple exits. If the
1336 exits lead to the same bb it may be possible to join the loop. */
1337 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1338 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1341 build_scops_1 (bb, &tmp_scops, loop);
1343 /* Scan the code dominated by this loop. This means all bbs, that are
1344 are dominated by a bb in this loop, but are not part of this loop.
1347 - The loop exit destination is dominated by the exit sources.
1349 TODO: We miss here the more complex cases:
1350 - The exit destinations are dominated by another bb inside the
1352 - The loop dominates bbs, that are not exit destinations. */
1353 for (i = 0; VEC_iterate (edge, exits, i, e); i++)
1354 if (e->src->loop_father == loop
1355 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src))
1357 /* Pass loop_outer to recognize e->dest as loop header in
1359 if (e->dest->loop_father->header == e->dest)
1360 build_scops_1 (e->dest, &tmp_scops,
1361 loop_outer (e->dest->loop_father));
1363 build_scops_1 (e->dest, &tmp_scops, e->dest->loop_father);
1368 result.difficult = true;
1369 result.exits = false;
1370 move_sd_regions (&tmp_scops, scops);
1371 VEC_free (edge, heap, exits);
1374 case GBB_COND_HEADER:
1376 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1377 struct scopdet_info sinfo;
1378 VEC (basic_block, heap) *dominated;
1381 basic_block last_bb = NULL;
1383 result.exits = false;
1385 /* First check the successors of BB, and check if it is possible to join
1386 the different branches. */
1387 for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++)
1389 /* Ignore loop exits. They will be handled after the loop body. */
1390 if (is_loop_exit (loop, e->dest))
1392 result.exits = true;
1396 /* Do not follow edges that lead to the end of the
1397 conditions block. For example, in
1407 the edge from 0 => 6. Only check if all paths lead to
1410 if (!single_pred_p (e->dest))
1412 /* Check, if edge leads directly to the end of this
1419 if (e->dest != last_bb)
1420 result.difficult = true;
1425 if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb))
1427 result.difficult = true;
1431 sinfo = build_scops_1 (e->dest, &tmp_scops, loop);
1433 result.exits |= sinfo.exits;
1434 result.last = sinfo.last;
1435 result.difficult |= sinfo.difficult;
1437 /* Checks, if all branches end at the same point.
1438 If that is true, the condition stays joinable.
1439 Have a look at the example above. */
1440 if (sinfo.last && single_succ_p (sinfo.last))
1442 basic_block next_tmp = single_succ (sinfo.last);
1447 if (next_tmp != last_bb)
1448 result.difficult = true;
1451 result.difficult = true;
1454 /* If the condition is joinable. */
1455 if (!result.exits && !result.difficult)
1457 /* Only return a next pointer if we dominate this pointer.
1458 Otherwise it will be handled by the bb dominating it. */
1459 if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb)
1460 result.next = last_bb;
1464 VEC_free (sd_region, heap, tmp_scops);
1468 /* Scan remaining bbs dominated by BB. */
1469 dominated = get_dominated_by (CDI_DOMINATORS, bb);
1471 for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++)
1473 /* Ignore loop exits: they will be handled after the loop body. */
1474 if (loop_depth (find_common_loop (loop, dom_bb->loop_father))
1475 < loop_depth (loop))
1477 result.exits = true;
1481 /* Ignore the bbs processed above. */
1482 if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb)
1485 if (loop_depth (loop) > loop_depth (dom_bb->loop_father))
1486 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop_outer (loop));
1488 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop);
1491 result.exits |= sinfo.exits;
1492 result.difficult = true;
1496 VEC_free (basic_block, heap, dominated);
1499 move_sd_regions (&tmp_scops, scops);
1511 /* Creates the SCoPs and writes entry and exit points for every SCoP. */
1513 static struct scopdet_info
1514 build_scops_1 (basic_block current, VEC (sd_region, heap) **scops, loop_p loop)
1517 bool in_scop = false;
1518 sd_region open_scop;
1519 struct scopdet_info sinfo;
1521 /* Initialize result. */
1522 struct scopdet_info result;
1523 result.exits = false;
1524 result.difficult = false;
1527 open_scop.entry = NULL;
1528 open_scop.exit = NULL;
1531 /* Loop over the dominance tree. If we meet a difficult bb, close
1532 the current SCoP. Loop and condition header start a new layer,
1533 and can only be added if all bbs in deeper layers are simple. */
1534 while (current != NULL)
1536 sinfo = scopdet_basic_block_info (current, scops, get_bb_type (current,
1539 if (!in_scop && !(sinfo.exits || sinfo.difficult))
1541 open_scop.entry = current;
1542 open_scop.exit = NULL;
1545 else if (in_scop && (sinfo.exits || sinfo.difficult))
1547 open_scop.exit = current;
1548 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1552 result.difficult |= sinfo.difficult;
1553 result.exits |= sinfo.exits;
1555 current = sinfo.next;
1558 /* Try to close open_scop, if we are still in an open SCoP. */
1564 for (i = 0; VEC_iterate (edge, sinfo.last->succs, i, e); i++)
1565 if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last, e->dest))
1566 open_scop.exit = e->dest;
1568 if (!open_scop.exit && open_scop.entry != sinfo.last)
1569 open_scop.exit = sinfo.last;
1572 VEC_safe_push (sd_region, heap, *scops, &open_scop);
1576 result.last = sinfo.last;
1580 /* Checks if a bb is contained in REGION. */
1583 bb_in_sd_region (basic_block bb, sd_region *region)
1585 return dominated_by_p (CDI_DOMINATORS, bb, region->entry)
1586 && !(dominated_by_p (CDI_DOMINATORS, bb, region->exit)
1587 && !dominated_by_p (CDI_DOMINATORS, region->entry,
1591 /* Returns the single entry edge of REGION, if it does not exits NULL. */
1594 find_single_entry_edge (sd_region *region)
1600 FOR_EACH_EDGE (e, ei, region->entry->preds)
1601 if (!bb_in_sd_region (e->src, region))
1616 /* Returns the single exit edge of REGION, if it does not exits NULL. */
1619 find_single_exit_edge (sd_region *region)
1625 FOR_EACH_EDGE (e, ei, region->exit->preds)
1626 if (bb_in_sd_region (e->src, region))
1641 /* Create a single entry edge for REGION. */
1644 create_single_entry_edge (sd_region *region)
1646 if (find_single_entry_edge (region))
1649 /* There are multiple predecessors for bb_3
1662 There are two edges (1->3, 2->3), that point from outside into the region,
1663 and another one (5->3), a loop latch, lead to bb_3.
1671 | |\ (3.0 -> 3.1) = single entry edge
1680 If the loop is part of the SCoP, we have to redirect the loop latches.
1686 | | (3.0 -> 3.1) = entry edge
1695 if (region->entry->loop_father->header != region->entry
1696 || dominated_by_p (CDI_DOMINATORS,
1697 loop_latch_edge (region->entry->loop_father)->src,
1700 edge forwarder = split_block_after_labels (region->entry);
1701 region->entry = forwarder->dest;
1704 /* This case is never executed, as the loop headers seem always to have a
1705 single edge pointing from outside into the loop. */
1708 #ifdef ENABLE_CHECKING
1709 gcc_assert (find_single_entry_edge (region));
1713 /* Check if the sd_region, mentioned in EDGE, has no exit bb. */
1716 sd_region_without_exit (edge e)
1718 sd_region *r = (sd_region *) e->aux;
1721 return r->exit == NULL;
1726 /* Create a single exit edge for REGION. */
1729 create_single_exit_edge (sd_region *region)
1733 edge forwarder = NULL;
1736 if (find_single_exit_edge (region))
1739 /* We create a forwarder bb (5) for all edges leaving this region
1740 (3->5, 4->5). All other edges leading to the same bb, are moved
1741 to a new bb (6). If these edges where part of another region (2->5)
1742 we update the region->exit pointer, of this region.
1744 To identify which edge belongs to which region we depend on the e->aux
1745 pointer in every edge. It points to the region of the edge or to NULL,
1746 if the edge is not part of any region.
1748 1 2 3 4 1->5 no region, 2->5 region->exit = 5,
1749 \| |/ 3->5 region->exit = NULL, 4->5 region->exit = NULL
1754 1 2 3 4 1->6 no region, 2->6 region->exit = 6,
1755 | | \/ 3->5 no region, 4->5 no region,
1757 \| / 5->6 region->exit = 6
1760 Now there is only a single exit edge (5->6). */
1761 exit = region->exit;
1762 region->exit = NULL;
1763 forwarder = make_forwarder_block (exit, &sd_region_without_exit, NULL);
1765 /* Unmark the edges, that are no longer exit edges. */
1766 FOR_EACH_EDGE (e, ei, forwarder->src->preds)
1770 /* Mark the new exit edge. */
1771 single_succ_edge (forwarder->src)->aux = region;
1773 /* Update the exit bb of all regions, where exit edges lead to
1775 FOR_EACH_EDGE (e, ei, forwarder->dest->preds)
1777 ((sd_region *) e->aux)->exit = forwarder->dest;
1779 #ifdef ENABLE_CHECKING
1780 gcc_assert (find_single_exit_edge (region));
1784 /* Unmark the exit edges of all REGIONS.
1785 See comment in "create_single_exit_edge". */
1788 unmark_exit_edges (VEC (sd_region, heap) *regions)
1795 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1796 FOR_EACH_EDGE (e, ei, s->exit->preds)
1801 /* Mark the exit edges of all REGIONS.
1802 See comment in "create_single_exit_edge". */
1805 mark_exit_edges (VEC (sd_region, heap) *regions)
1812 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1813 FOR_EACH_EDGE (e, ei, s->exit->preds)
1814 if (bb_in_sd_region (e->src, s))
1818 /* Free and compute again all the dominators information. */
1821 recompute_all_dominators (void)
1823 free_dominance_info (CDI_DOMINATORS);
1824 free_dominance_info (CDI_POST_DOMINATORS);
1825 calculate_dominance_info (CDI_DOMINATORS);
1826 calculate_dominance_info (CDI_POST_DOMINATORS);
1829 /* Verifies properties that GRAPHITE should maintain during translation. */
1832 graphite_verify (void)
1834 #ifdef ENABLE_CHECKING
1835 verify_loop_structure ();
1836 verify_dominators (CDI_DOMINATORS);
1837 verify_dominators (CDI_POST_DOMINATORS);
1842 /* Create for all scop regions a single entry and a single exit edge. */
1845 create_sese_edges (VEC (sd_region, heap) *regions)
1850 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1851 create_single_entry_edge (s);
1853 mark_exit_edges (regions);
1855 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++)
1856 create_single_exit_edge (s);
1858 unmark_exit_edges (regions);
1860 #ifdef ENABLE_CHECKING
1861 verify_loop_structure ();
1862 verify_dominators (CDI_DOMINATORS);
1867 /* Create graphite SCoPs from an array of scop detection regions. */
1870 build_graphite_scops (VEC (sd_region, heap) *scop_regions)
1875 for (i = 0; VEC_iterate (sd_region, scop_regions, i, s); i++)
1877 edge entry = find_single_entry_edge (s);
1878 edge exit = find_single_exit_edge (s);
1879 scop_p scop = new_scop (entry, exit);
1880 VEC_safe_push (scop_p, heap, current_scops, scop);
1882 /* Are there overlapping SCoPs? */
1883 #ifdef ENABLE_CHECKING
1888 for (j = 0; VEC_iterate (sd_region, scop_regions, j, s2); j++)
1890 gcc_assert (!bb_in_sd_region (s->entry, s2));
1896 /* Find static control parts. */
1901 struct loop *loop = current_loops->tree_root;
1902 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
1904 build_scops_1 (single_succ (ENTRY_BLOCK_PTR), &tmp_scops, loop);
1905 create_sese_edges (tmp_scops);
1906 build_graphite_scops (tmp_scops);
1907 VEC_free (sd_region, heap, tmp_scops);
1910 /* Gather the basic blocks belonging to the SCOP. */
1913 build_scop_bbs (scop_p scop)
1915 basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
1916 sbitmap visited = sbitmap_alloc (last_basic_block);
1919 sbitmap_zero (visited);
1920 stack[sp++] = SCOP_ENTRY (scop);
1924 basic_block bb = stack[--sp];
1925 int depth = loop_depth (bb->loop_father);
1926 int num = bb->loop_father->num;
1930 /* Scop's exit is not in the scop. Exclude also bbs, which are
1931 dominated by the SCoP exit. These are e.g. loop latches. */
1932 if (TEST_BIT (visited, bb->index)
1933 || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
1934 /* Every block in the scop is dominated by scop's entry. */
1935 || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop)))
1938 new_graphite_bb (scop, bb);
1939 SET_BIT (visited, bb->index);
1941 /* First push the blocks that have to be processed last. Note
1942 that this means that the order in which the code is organized
1943 below is important: do not reorder the following code. */
1944 FOR_EACH_EDGE (e, ei, bb->succs)
1945 if (! TEST_BIT (visited, e->dest->index)
1946 && (int) loop_depth (e->dest->loop_father) < depth)
1947 stack[sp++] = e->dest;
1949 FOR_EACH_EDGE (e, ei, bb->succs)
1950 if (! TEST_BIT (visited, e->dest->index)
1951 && (int) loop_depth (e->dest->loop_father) == depth
1952 && e->dest->loop_father->num != num)
1953 stack[sp++] = e->dest;
1955 FOR_EACH_EDGE (e, ei, bb->succs)
1956 if (! TEST_BIT (visited, e->dest->index)
1957 && (int) loop_depth (e->dest->loop_father) == depth
1958 && e->dest->loop_father->num == num
1959 && EDGE_COUNT (e->dest->preds) > 1)
1960 stack[sp++] = e->dest;
1962 FOR_EACH_EDGE (e, ei, bb->succs)
1963 if (! TEST_BIT (visited, e->dest->index)
1964 && (int) loop_depth (e->dest->loop_father) == depth
1965 && e->dest->loop_father->num == num
1966 && EDGE_COUNT (e->dest->preds) == 1)
1967 stack[sp++] = e->dest;
1969 FOR_EACH_EDGE (e, ei, bb->succs)
1970 if (! TEST_BIT (visited, e->dest->index)
1971 && (int) loop_depth (e->dest->loop_father) > depth)
1972 stack[sp++] = e->dest;
1976 sbitmap_free (visited);
1979 /* Returns the number of reduction phi nodes in LOOP. */
1982 nb_reductions_in_loop (loop_p loop)
1985 gimple_stmt_iterator gsi;
1987 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1989 gimple phi = gsi_stmt (gsi);
1992 if (!is_gimple_reg (PHI_RESULT (phi)))
1995 scev = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1996 scev = instantiate_parameters (loop, scev);
1997 if (chrec_contains_undetermined (scev))
2004 /* A LOOP is in normal form when it contains only one scalar phi node
2005 that defines the main induction variable of the loop, only one
2006 increment of the IV, and only one exit condition. */
2009 graphite_loop_normal_form (loop_p loop)
2011 struct tree_niter_desc niter;
2014 edge exit = single_dom_exit (loop);
2016 if (!number_of_iterations_exit (loop, exit, &niter, false))
2019 nit = force_gimple_operand (unshare_expr (niter.niter), &stmts, true,
2022 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2024 /* One IV per loop. */
2025 if (nb_reductions_in_loop (loop) > 0)
2028 return canonicalize_loop_ivs (loop, NULL, nit);
2031 /* Record LOOP as occuring in SCOP. Returns true when the operation
2035 scop_record_loop (scop_p scop, loop_p loop)
2040 if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
2043 bitmap_set_bit (SCOP_LOOPS (scop), loop->num);
2044 VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop);
2046 induction_var = graphite_loop_normal_form (loop);
2050 oldiv = XNEW (struct name_tree);
2051 oldiv->t = induction_var;
2052 oldiv->name = get_name (SSA_NAME_VAR (oldiv->t));
2054 VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv);
2058 /* Build the loop nests contained in SCOP. Returns true when the
2059 operation was successful. */
2062 build_scop_loop_nests (scop_p scop)
2066 struct loop *loop0, *loop1;
2069 if (bb_in_scop_p (bb, scop))
2071 struct loop *loop = bb->loop_father;
2073 /* Only add loops if they are completely contained in the SCoP. */
2074 if (loop->header == bb
2075 && bb_in_scop_p (loop->latch, scop))
2077 if (!scop_record_loop (scop, loop))
2082 /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It
2083 can be the case that an inner loop is inserted before an outer
2084 loop. To avoid this, semi-sort once. */
2085 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++)
2087 if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1)
2090 loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1);
2091 if (loop0->num > loop1->num)
2093 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1);
2094 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0);
2101 /* Build dynamic schedules for all the BBs. */
2104 build_scop_dynamic_schedules (scop_p scop)
2106 int i, dim, loop_num, row, col;
2109 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2111 loop_num = GBB_BB (gb)->loop_father->num;
2115 dim = nb_loops_around_gb (gb);
2116 GBB_DYNAMIC_SCHEDULE (gb) = cloog_matrix_alloc (dim, dim);
2118 for (row = 0; row < GBB_DYNAMIC_SCHEDULE (gb)->NbRows; row++)
2119 for (col = 0; col < GBB_DYNAMIC_SCHEDULE (gb)->NbColumns; col++)
2121 value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 1);
2123 value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 0);
2126 GBB_DYNAMIC_SCHEDULE (gb) = NULL;
2130 /* Build for BB the static schedule.
2132 The STATIC_SCHEDULE is defined like this:
2151 Static schedules for A to F:
2164 build_scop_canonical_schedules (scop_p scop)
2168 int nb = scop_nb_loops (scop) + 1;
2170 SCOP_STATIC_SCHEDULE (scop) = lambda_vector_new (nb);
2172 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2174 int offset = nb_loops_around_gb (gb);
2176 /* After leaving a loop, it is possible that the schedule is not
2177 set at zero. This loop reinitializes components located
2180 for (j = offset + 1; j < nb; j++)
2181 if (SCOP_STATIC_SCHEDULE (scop)[j])
2183 memset (&(SCOP_STATIC_SCHEDULE (scop)[j]), 0,
2184 sizeof (int) * (nb - j));
2185 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2189 GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (offset + 1);
2190 lambda_vector_copy (SCOP_STATIC_SCHEDULE (scop),
2191 GBB_STATIC_SCHEDULE (gb), offset + 1);
2193 ++SCOP_STATIC_SCHEDULE (scop)[offset];
2197 /* Build the LOOPS vector for all bbs in SCOP. */
2200 build_bb_loops (scop_p scop)
2205 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2210 depth = nb_loops_around_gb (gb) - 1;
2212 GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3);
2213 VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1);
2215 loop = GBB_BB (gb)->loop_father;
2217 while (scop_contains_loop (scop, loop))
2219 VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop);
2220 loop = loop_outer (loop);
2226 /* Get the index for parameter VAR in SCOP. */
2229 param_index (tree var, scop_p scop)
2235 gcc_assert (TREE_CODE (var) == SSA_NAME);
2237 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2241 gcc_assert (SCOP_ADD_PARAMS (scop));
2243 nvar = XNEW (struct name_tree);
2246 VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar);
2247 return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1;
2250 /* Scan EXPR and translate it to an inequality vector INEQ that will
2251 be added, or subtracted, in the constraint domain matrix C at row
2252 R. K is the number of columns for loop iterators in C. */
2255 scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k,
2258 int cst_col, param_col;
2260 if (e == chrec_dont_know)
2263 switch (TREE_CODE (e))
2265 case POLYNOMIAL_CHREC:
2267 tree left = CHREC_LEFT (e);
2268 tree right = CHREC_RIGHT (e);
2269 int var = CHREC_VARIABLE (e);
2271 if (TREE_CODE (right) != INTEGER_CST)
2276 int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1;
2279 value_sub_int (c->p[r][loop_col], c->p[r][loop_col],
2280 int_cst_value (right));
2282 value_add_int (c->p[r][loop_col], c->p[r][loop_col],
2283 int_cst_value (right));
2286 switch (TREE_CODE (left))
2288 case POLYNOMIAL_CHREC:
2289 scan_tree_for_params (s, left, c, r, k, subtract);
2293 /* Constant part. */
2296 int v = int_cst_value (left);
2297 cst_col = c->NbColumns - 1;
2302 subtract = subtract ? false : true;
2306 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2308 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2313 scan_tree_for_params (s, left, c, r, k, subtract);
2320 if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
2325 gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
2327 value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
2328 value_multiply (k, k, val);
2331 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2338 gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
2340 value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
2341 value_multiply (k, k, val);
2344 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2349 case POINTER_PLUS_EXPR:
2350 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2351 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2355 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2356 value_oppose (k, k);
2357 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract);
2361 value_oppose (k, k);
2362 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2366 param_col = param_index (e, s);
2370 param_col += c->NbColumns - scop_nb_params (s) - 1;
2373 value_subtract (c->p[r][param_col], c->p[r][param_col], k);
2375 value_addto (c->p[r][param_col], c->p[r][param_col], k);
2382 int v = int_cst_value (e);
2383 cst_col = c->NbColumns - 1;
2388 subtract = subtract ? false : true;
2392 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v);
2394 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v);
2400 case NON_LVALUE_EXPR:
2401 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract);
2410 /* Data structure for idx_record_params. */
2418 /* For a data reference with an ARRAY_REF as its BASE, record the
2419 parameters occurring in IDX. DTA is passed in as complementary
2420 information, and is used by the automatic walker function. This
2421 function is a callback for for_each_index. */
2424 idx_record_params (tree base, tree *idx, void *dta)
2426 struct irp_data *data = (struct irp_data *) dta;
2428 if (TREE_CODE (base) != ARRAY_REF)
2431 if (TREE_CODE (*idx) == SSA_NAME)
2434 scop_p scop = data->scop;
2435 struct loop *loop = data->loop;
2438 scev = analyze_scalar_evolution (loop, *idx);
2439 scev = instantiate_scev (block_before_scop (scop), loop, scev);
2442 value_set_si (one, 1);
2443 scan_tree_for_params (scop, scev, NULL, 0, one, false);
2450 /* Find parameters with respect to SCOP in BB. We are looking in memory
2451 access functions, conditions and loop bounds. */
2454 find_params_in_bb (scop_p scop, graphite_bb_p gb)
2457 data_reference_p dr;
2459 loop_p father = GBB_BB (gb)->loop_father;
2461 for (i = 0; VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), i, dr); i++)
2463 struct irp_data irp;
2467 for_each_index (&dr->ref, idx_record_params, &irp);
2471 /* Find parameters in conditional statements. */
2472 for (i = 0; VEC_iterate (gimple, GBB_CONDITIONS (gb), i, stmt); i++)
2475 loop_p loop = father;
2479 lhs = gimple_cond_lhs (stmt);
2480 lhs = analyze_scalar_evolution (loop, lhs);
2481 lhs = instantiate_scev (block_before_scop (scop), loop, lhs);
2483 rhs = gimple_cond_rhs (stmt);
2484 rhs = analyze_scalar_evolution (loop, rhs);
2485 rhs = instantiate_scev (block_before_scop (scop), loop, rhs);
2488 scan_tree_for_params (scop, lhs, NULL, 0, one, false);
2489 value_set_si (one, 1);
2490 scan_tree_for_params (scop, rhs, NULL, 0, one, false);
2495 /* Saves in NV the name of variable P->T. */
2498 save_var_name (char **nv, int i, name_tree p)
2500 const char *name = get_name (SSA_NAME_VAR (p->t));
2504 int len = strlen (name) + 16;
2505 nv[i] = XNEWVEC (char, len);
2506 snprintf (nv[i], len, "%s_%d", name, SSA_NAME_VERSION (p->t));
2510 nv[i] = XNEWVEC (char, 16);
2511 snprintf (nv[i], 2 + 16, "T_%d", SSA_NAME_VERSION (p->t));
2517 /* Return the maximal loop depth in SCOP. */
2520 scop_max_loop_depth (scop_p scop)
2524 int max_nb_loops = 0;
2526 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
2528 int nb_loops = gbb_nb_loops (gbb);
2529 if (max_nb_loops < nb_loops)
2530 max_nb_loops = nb_loops;
2533 return max_nb_loops;
2536 /* Initialize Cloog's parameter names from the names used in GIMPLE.
2537 Initialize Cloog's iterator names, using 'graphite_iterator_%d'
2538 from 0 to scop_nb_loops (scop). */
2541 initialize_cloog_names (scop_p scop)
2543 int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop));
2544 char **params = XNEWVEC (char *, nb_params);
2545 int nb_iterators = scop_max_loop_depth (scop);
2546 int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop));
2547 char **iterators = XNEWVEC (char *, nb_iterators * 2);
2548 char **scattering = XNEWVEC (char *, nb_scattering);
2551 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++)
2552 save_var_name (params, i, p);
2554 cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)),
2556 cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)),
2559 for (i = 0; i < nb_iterators; i++)
2562 iterators[i] = XNEWVEC (char, len);
2563 snprintf (iterators[i], len, "graphite_iterator_%d", i);
2566 cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)),
2568 cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)),
2571 for (i = 0; i < nb_scattering; i++)
2574 scattering[i] = XNEWVEC (char, len);
2575 snprintf (scattering[i], len, "s_%d", i);
2578 cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)),
2580 cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)),
2584 /* Record the parameters used in the SCOP. A variable is a parameter
2585 in a scop if it does not vary during the execution of that scop. */
2588 find_scop_parameters (scop_p scop)
2596 value_set_si (one, 1);
2598 /* Find the parameters used in the loop bounds. */
2599 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
2601 tree nb_iters = number_of_latch_executions (loop);
2603 if (!chrec_contains_symbols (nb_iters))
2606 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2607 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2608 scan_tree_for_params (scop, nb_iters, NULL, 0, one, false);
2613 /* Find the parameters used in data accesses. */
2614 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2615 find_params_in_bb (scop, gb);
2617 SCOP_ADD_PARAMS (scop) = false;
2620 /* Build the context constraints for SCOP: constraints and relations
2624 build_scop_context (scop_p scop)
2626 int nb_params = scop_nb_params (scop);
2627 CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2);
2629 /* Insert '0 >= 0' in the context matrix, as it is not allowed to be
2632 value_set_si (matrix->p[0][0], 1);
2634 value_set_si (matrix->p[0][nb_params + 1], 0);
2636 cloog_program_set_context (SCOP_PROG (scop),
2637 cloog_domain_matrix2domain (matrix));
2638 cloog_matrix_free (matrix);
2641 /* Returns a graphite_bb from BB. */
2643 static inline graphite_bb_p
2644 gbb_from_bb (basic_block bb)
2646 return (graphite_bb_p) bb->aux;
2649 /* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the
2650 number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the
2651 constraints matrix for the surrounding loops. */
2654 build_loop_iteration_domains (scop_p scop, struct loop *loop,
2655 CloogMatrix *outer_cstr, int nb_outer_loops)
2661 int nb_rows = outer_cstr->NbRows + 1;
2662 int nb_cols = outer_cstr->NbColumns + 1;
2664 /* Last column of CSTR is the column of constants. */
2665 int cst_col = nb_cols - 1;
2667 /* The column for the current loop is just after the columns of
2668 other outer loops. */
2669 int loop_col = nb_outer_loops + 1;
2671 tree nb_iters = number_of_latch_executions (loop);
2673 /* When the number of iterations is a constant or a parameter, we
2674 add a constraint for the upper bound of the loop. So add a row
2675 to the constraint matrix before allocating it. */
2676 if (TREE_CODE (nb_iters) == INTEGER_CST
2677 || !chrec_contains_undetermined (nb_iters))
2680 cstr = cloog_matrix_alloc (nb_rows, nb_cols);
2682 /* Copy the outer constraints. */
2683 for (i = 0; i < outer_cstr->NbRows; i++)
2685 /* Copy the eq/ineq and loops columns. */
2686 for (j = 0; j < loop_col; j++)
2687 value_assign (cstr->p[i][j], outer_cstr->p[i][j]);
2689 /* Leave an empty column in CSTR for the current loop, and then
2690 copy the parameter columns. */
2691 for (j = loop_col; j < outer_cstr->NbColumns; j++)
2692 value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]);
2696 row = outer_cstr->NbRows;
2697 value_set_si (cstr->p[row][0], 1);
2698 value_set_si (cstr->p[row][loop_col], 1);
2700 /* loop_i <= nb_iters */
2701 if (TREE_CODE (nb_iters) == INTEGER_CST)
2704 value_set_si (cstr->p[row][0], 1);
2705 value_set_si (cstr->p[row][loop_col], -1);
2707 value_set_si (cstr->p[row][cst_col],
2708 int_cst_value (nb_iters));
2710 else if (!chrec_contains_undetermined (nb_iters))
2712 /* Otherwise nb_iters contains parameters: scan the nb_iters
2713 expression and build its matrix representation. */
2717 value_set_si (cstr->p[row][0], 1);
2718 value_set_si (cstr->p[row][loop_col], -1);
2720 nb_iters = analyze_scalar_evolution (loop, nb_iters);
2721 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters);
2724 value_set_si (one, 1);
2725 scan_tree_for_params (scop, nb_iters, cstr, row, one, false);
2731 if (loop->inner && loop_in_scop_p (loop->inner, scop))
2732 build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1);
2734 /* Only go to the next loops, if we are not at the outermost layer. These
2735 have to be handled seperately, as we can be sure, that the chain at this
2736 layer will be connected. */
2737 if (nb_outer_loops != 0 && loop->next && loop_in_scop_p (loop->next, scop))
2738 build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops);
2740 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
2741 if (gbb_loop (gb) == loop)
2742 GBB_DOMAIN (gb) = cloog_matrix_copy (cstr);
2744 cloog_matrix_free (cstr);
2747 /* Add conditions to the domain of GB. */
2750 add_conditions_to_domain (graphite_bb_p gb)
2754 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb);
2755 CloogMatrix *domain = GBB_DOMAIN (gb);
2756 scop_p scop = GBB_SCOP (gb);
2760 unsigned nb_new_rows = 0;
2763 if (VEC_empty (gimple, conditions))
2768 nb_rows = domain->NbRows;
2769 nb_cols = domain->NbColumns;
2774 nb_cols = scop_nb_params (scop) + 2;
2777 /* Count number of necessary new rows to add the conditions to the
2779 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2781 switch (gimple_code (stmt))
2785 enum tree_code code = gimple_cond_code (stmt);
2791 /* NE and EQ statements are not supported right know. */
2807 /* Switch statements are not supported right know. */
2818 /* Enlarge the matrix. */
2820 CloogMatrix *new_domain;
2821 new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols);
2823 for (i = 0; i < nb_rows; i++)
2824 for (j = 0; j < nb_cols; j++)
2825 value_assign (new_domain->p[i][j], domain->p[i][j]);
2827 cloog_matrix_free (domain);
2828 domain = new_domain;
2829 GBB_DOMAIN (gb) = new_domain;
2832 /* Add the conditions to the new enlarged domain matrix. */
2834 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++)
2836 switch (gimple_code (stmt))
2841 enum tree_code code;
2844 loop_p loop = GBB_BB (gb)->loop_father;
2846 left = gimple_cond_lhs (stmt);
2847 right = gimple_cond_rhs (stmt);
2849 left = analyze_scalar_evolution (loop, left);
2850 right = analyze_scalar_evolution (loop, right);
2852 left = instantiate_scev (block_before_scop (scop), loop, left);
2853 right = instantiate_scev (block_before_scop (scop), loop, right);
2855 code = gimple_cond_code (stmt);
2857 /* The conditions for ELSE-branches are inverted. */
2858 if (VEC_index (gimple, gb->condition_cases, i) == NULL)
2859 code = invert_tree_comparison (code, false);
2864 /* NE statements are not supported right know. */
2868 value_set_si (domain->p[row][0], 1);
2870 value_set_si (one, 1);
2871 scan_tree_for_params (scop, left, domain, row, one, true);
2872 value_set_si (one, 1);
2873 scan_tree_for_params (scop, right, domain, row, one, false);
2875 value_set_si (domain->p[row][0], 1);
2876 value_set_si (one, 1);
2877 scan_tree_for_params (scop, left, domain, row, one, false);
2878 value_set_si (one, 1);
2879 scan_tree_for_params (scop, right, domain, row, one, true);
2884 value_set_si (domain->p[row][0], 1);
2886 value_set_si (one, 1);
2887 scan_tree_for_params (scop, left, domain, row, one, true);
2888 value_set_si (one, 1);
2889 scan_tree_for_params (scop, right, domain, row, one, false);
2890 value_sub_int (domain->p[row][nb_cols - 1],
2891 domain->p[row][nb_cols - 1], 1);
2896 value_set_si (domain->p[row][0], 1);
2898 value_set_si (one, 1);
2899 scan_tree_for_params (scop, left, domain, row, one, false);
2900 value_set_si (one, 1);
2901 scan_tree_for_params (scop, right, domain, row, one, true);
2902 value_sub_int (domain->p[row][nb_cols - 1],
2903 domain->p[row][nb_cols - 1], 1);
2908 value_set_si (domain->p[row][0], 1);
2910 value_set_si (one, 1);
2911 scan_tree_for_params (scop, left, domain, row, one, true);
2912 value_set_si (one, 1);
2913 scan_tree_for_params (scop, right, domain, row, one, false);
2918 value_set_si (domain->p[row][0], 1);
2920 value_set_si (one, 1);
2921 scan_tree_for_params (scop, left, domain, row, one, false);
2922 value_set_si (one, 1);
2923 scan_tree_for_params (scop, right, domain, row, one, true);
2934 /* Switch statements are not supported right know. */
2945 /* Helper recursive function. */
2948 build_scop_conditions_1 (VEC (gimple, heap) **conditions,
2949 VEC (gimple, heap) **cases, basic_block bb,
2954 gimple_stmt_iterator gsi;
2955 basic_block bb_child, bb_iter;
2956 VEC (basic_block, heap) *dom;
2958 /* Make sure we are in the SCoP. */
2959 if (!bb_in_scop_p (bb, scop))
2962 /* Record conditions in graphite_bb. */
2963 gbb = gbb_from_bb (bb);
2966 GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions);
2967 GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases);
2968 add_conditions_to_domain (gbb);
2971 dom = get_dominated_by (CDI_DOMINATORS, bb);
2973 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2975 gimple stmt = gsi_stmt (gsi);
2976 VEC (edge, gc) *edges;
2979 switch (gimple_code (stmt))
2983 for (i = 0; VEC_iterate (edge, edges, i, e); i++)
2984 if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb))
2985 && VEC_length (edge, e->dest->preds) == 1)
2987 /* Remove the scanned block from the dominator successors. */
2988 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
2989 if (bb_iter == e->dest)
2991 VEC_unordered_remove (basic_block, dom, j);
2995 /* Recursively scan the then or else part. */
2996 if (e->flags & EDGE_TRUE_VALUE)
2997 VEC_safe_push (gimple, heap, *cases, stmt);
2998 else if (e->flags & EDGE_FALSE_VALUE)
2999 VEC_safe_push (gimple, heap, *cases, NULL);
3003 VEC_safe_push (gimple, heap, *conditions, stmt);
3004 build_scop_conditions_1 (conditions, cases, e->dest, scop);
3005 VEC_pop (gimple, *conditions);
3006 VEC_pop (gimple, *cases);
3013 gimple_stmt_iterator gsi_search_gimple_label;
3015 for (i = 0; i < gimple_switch_num_labels (stmt); ++i)
3017 basic_block bb_iter;
3019 size_t n_cases = VEC_length (gimple, *conditions);
3020 unsigned n = gimple_switch_num_labels (stmt);
3022 bb_child = label_to_block
3023 (CASE_LABEL (gimple_switch_label (stmt, i)));
3025 /* Do not handle multiple values for the same block. */
3026 for (k = 0; k < n; k++)
3029 (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child)
3035 /* Switch cases with more than one predecessor are not
3037 if (VEC_length (edge, bb_child->preds) != 1)
3040 /* Recursively scan the corresponding 'case' block. */
3042 for (gsi_search_gimple_label = gsi_start_bb (bb_child);
3043 !gsi_end_p (gsi_search_gimple_label);
3044 gsi_next (&gsi_search_gimple_label))
3046 gimple stmt_gimple_label
3047 = gsi_stmt (gsi_search_gimple_label);
3049 if (gimple_code (stmt_gimple_label) == GIMPLE_LABEL)
3051 tree t = gimple_label_label (stmt_gimple_label);
3053 if (t == gimple_switch_label (stmt, i))
3054 VEC_replace (gimple, *cases, n_cases,
3061 build_scop_conditions_1 (conditions, cases, bb_child, scop);
3063 /* Remove the scanned block from the dominator successors. */
3064 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++)
3065 if (bb_iter == bb_child)
3067 VEC_unordered_remove (basic_block, dom, j);
3072 VEC_pop (gimple, *conditions);
3073 VEC_pop (gimple, *cases);
3081 /* Scan all immediate dominated successors. */
3082 for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++)
3083 build_scop_conditions_1 (conditions, cases, bb_child, scop);
3085 VEC_free (basic_block, heap, dom);
3088 /* Record all 'if' and 'switch' conditions in each gbb of SCOP. */
3091 build_scop_conditions (scop_p scop)
3093 VEC (gimple, heap) *conditions = NULL;
3094 VEC (gimple, heap) *cases = NULL;
3096 build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop);
3098 VEC_free (gimple, heap, conditions);
3099 VEC_free (gimple, heap, cases);
3102 /* Build the current domain matrix: the loops belonging to the current
3103 SCOP, and that vary for the execution of the current basic block.
3104 Returns false if there is no loop in SCOP. */
3107 build_scop_iteration_domain (scop_p scop)
3110 CloogMatrix *outer_cstr;
3113 /* Build cloog loop for all loops, that are in the uppermost loop layer of
3115 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++)
3116 if (!loop_in_scop_p (loop_outer (loop), scop))
3118 /* The outermost constraints is a matrix that has:
3119 -first column: eq/ineq boolean
3120 -last column: a constant
3121 -scop_nb_params columns for the parameters used in the scop. */
3122 outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
3123 build_loop_iteration_domains (scop, loop, outer_cstr, 0);
3124 cloog_matrix_free (outer_cstr);
3130 /* Initializes an equation CY of the access matrix using the
3131 information for a subscript from AF, relatively to the loop
3132 indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is
3133 the dimension of the array access, i.e. the number of
3134 subscripts. Returns true when the operation succeeds. */
3137 build_access_matrix_with_af (tree af, lambda_vector cy,
3138 scop_p scop, int ndim)
3142 switch (TREE_CODE (af))
3144 case POLYNOMIAL_CHREC:
3146 struct loop *outer_loop;
3147 tree left = CHREC_LEFT (af);
3148 tree right = CHREC_RIGHT (af);
3151 if (TREE_CODE (right) != INTEGER_CST)
3154 outer_loop = get_loop (CHREC_VARIABLE (af));
3155 var = nb_loops_around_loop_in_scop (outer_loop, scop);
3156 cy[var] = int_cst_value (right);
3158 switch (TREE_CODE (left))
3160 case POLYNOMIAL_CHREC:
3161 return build_access_matrix_with_af (left, cy, scop, ndim);
3164 cy[ndim - 1] = int_cst_value (left);
3168 return build_access_matrix_with_af (left, cy, scop, ndim);
3173 build_access_matrix_with_af (TREE_OPERAND (af, 0), cy, scop, ndim);
3174 build_access_matrix_with_af (TREE_OPERAND (af, 1), cy, scop, ndim);
3178 build_access_matrix_with_af (TREE_OPERAND (af, 0), cy, scop, ndim);
3179 build_access_matrix_with_af (TREE_OPERAND (af, 1), cy, scop, ndim);
3183 cy[ndim - 1] = int_cst_value (af);
3187 param_col = param_index (af, scop);
3188 cy [ndim - scop_nb_params (scop) + param_col - 1] = 1;
3192 /* FIXME: access_fn can have parameters. */
3197 /* Initialize the access matrix in the data reference REF with respect
3198 to the loop nesting LOOP_NEST. Return true when the operation
3202 build_access_matrix (data_reference_p ref, graphite_bb_p gb)
3204 int i, ndim = DR_NUM_DIMENSIONS (ref);
3205 struct access_matrix *am = GGC_NEW (struct access_matrix);
3207 AM_MATRIX (am) = VEC_alloc (lambda_vector, heap, ndim);
3208 DR_SCOP (ref) = GBB_SCOP (gb);
3210 for (i = 0; i < ndim; i++)
3212 lambda_vector v = lambda_vector_new (ref_nb_loops (ref));
3213 scop_p scop = GBB_SCOP (gb);
3214 tree af = DR_ACCESS_FN (ref, i);
3216 if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref)))
3219 VEC_safe_push (lambda_vector, heap, AM_MATRIX (am), v);
3222 DR_ACCESS_MATRIX (ref) = am;
3226 /* Build the access matrices for the data references in the SCOP. */
3229 build_scop_data_accesses (scop_p scop)
3234 /* FIXME: Construction of access matrix is disabled until some
3235 pass, like the data dependence analysis, is using it. */
3238 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
3241 data_reference_p dr;
3243 /* Construct the access matrix for each data ref, with respect to
3244 the loop nest of the current BB in the considered SCOP. */
3246 VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr);
3249 bool res = build_access_matrix (dr, gb);
3251 /* FIXME: At this point the DRs should always have an affine
3252 form. For the moment this fails as build_access_matrix
3253 does not build matrices with parameters. */
3259 /* Returns the tree variable from the name NAME that was given in
3260 Cloog representation. All the parameters are stored in PARAMS, and
3261 all the loop induction variables are stored in IVSTACK.
3263 FIXME: This is a hack, and Cloog should be fixed to not work with
3264 variable names represented as "char *string", but with void
3265 pointers that could be casted back to a tree. The only problem in
3266 doing that is that Cloog's pretty printer still assumes that
3267 variable names are char *strings. The solution would be to have a
3268 function pointer for pretty-printing that can be redirected to be
3269 print_generic_stmt in our case, or fprintf by default.
3270 ??? Too ugly to live. */
3273 clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params,
3274 loop_iv_stack ivstack)
3280 for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
3281 if (!strcmp (name, t->name))
3284 iv = loop_iv_stack_get_iv_from_name (ivstack, name);
3291 /* A union needed to convert from CLAST expressions to GMP values. */
3294 struct clast_expr *c;
3298 /* Converts a Cloog AST expression E back to a GCC expression tree. */
3301 clast_to_gcc_expression (struct clast_expr *e,
3302 VEC (name_tree, heap) *params,
3303 loop_iv_stack ivstack)
3305 tree type = integer_type_node;
3313 struct clast_term *t = (struct clast_term *) e;
3317 if (value_one_p (t->val))
3318 return clast_name_to_gcc (t->var, params, ivstack);
3320 else if (value_mone_p (t->val))
3321 return fold_build1 (NEGATE_EXPR, type,
3322 clast_name_to_gcc (t->var, params, ivstack));
3324 return fold_build2 (MULT_EXPR, type,
3325 gmp_cst_to_tree (t->val),
3326 clast_name_to_gcc (t->var, params, ivstack));
3329 return gmp_cst_to_tree (t->val);
3334 struct clast_reduction *r = (struct clast_reduction *) e;
3341 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3345 gcc_assert (r->n >= 1
3346 && r->elts[0]->type == expr_term
3347 && r->elts[1]->type == expr_term);
3349 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3350 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3351 return fold_build2 (PLUS_EXPR, type, left, right);
3358 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3362 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3363 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3364 return fold_build2 (MIN_EXPR, type, left, right);
3374 return clast_to_gcc_expression (r->elts[0], params, ivstack);
3378 left = clast_to_gcc_expression (r->elts[0], params, ivstack);
3379 right = clast_to_gcc_expression (r->elts[1], params, ivstack);
3380 return fold_build2 (MAX_EXPR, type, left, right);
3396 struct clast_binary *b = (struct clast_binary *) e;
3397 struct clast_expr *lhs = (struct clast_expr *) b->LHS;
3398 tree tl = clast_to_gcc_expression (lhs, params, ivstack);
3402 r.c = (struct clast_expr *) b->RHS;
3403 tr = gmp_cst_to_tree (r.v);
3407 case clast_bin_fdiv:
3408 return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr);
3410 case clast_bin_cdiv:
3411 return fold_build2 (CEIL_DIV_EXPR, type, tl, tr);
3414 return fold_build2 (EXACT_DIV_EXPR, type, tl, tr);
3417 return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr);
3431 /* Translates a clast equation CLEQ to a tree. */
3434 graphite_translate_clast_equation (scop_p scop,
3435 struct clast_equation *cleq,
3436 loop_iv_stack ivstack)
3438 enum tree_code comp;
3439 tree lhs = clast_to_gcc_expression (cleq->LHS, SCOP_PARAMS (scop), ivstack);
3440 tree rhs = clast_to_gcc_expression (cleq->RHS, SCOP_PARAMS (scop), ivstack);
3442 if (cleq->sign == 0)
3445 else if (cleq->sign > 0)
3451 return fold_build2 (comp, integer_type_node, lhs, rhs);
3454 /* Creates the test for the condition in STMT. */
3457 graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt,
3458 loop_iv_stack ivstack)
3463 for (i = 0; i < stmt->n; i++)
3465 tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack);
3468 cond = fold_build2 (TRUTH_AND_EXPR, integer_type_node, cond, eq);
3476 /* Creates a new if region corresponding to Cloog's guard. */
3479 graphite_create_new_guard (scop_p scop, edge entry_edge,
3480 struct clast_guard *stmt,
3481 loop_iv_stack ivstack)
3483 tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack);
3484 edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
3489 /* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction
3490 variable for the new LOOP. New LOOP is attached to CFG starting at
3491 ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child
3492 loop of the OUTER_LOOP. */
3494 static struct loop *
3495 graphite_create_new_loop (scop_p scop, edge entry_edge,
3496 struct clast_for *stmt, loop_iv_stack ivstack,
3501 tree stride, lowb, upb;
3504 gcc_assert (stmt->LB
3507 stride = gmp_cst_to_tree (stmt->stride);
3508 lowb = clast_to_gcc_expression (stmt->LB, SCOP_PARAMS (scop), ivstack);
3509 ivvar = create_tmp_var (integer_type_node, "graphiteIV");
3510 add_referenced_var (ivvar);
3512 upb = clast_to_gcc_expression (stmt->UB, SCOP_PARAMS (scop), ivstack);
3513 loop = create_empty_loop_on_edge (entry_edge, lowb, stride, upb, ivvar,
3514 &iv_before, outer ? outer
3515 : entry_edge->src->loop_father);
3517 loop_iv_stack_push_iv (ivstack, iv_before, stmt->iterator);
3522 /* Structure containing the mapping between the old names and the new
3523 names used after block copy in the new loop context. */
3524 typedef struct rename_map_elt
3526 tree old_name, new_name;
3530 /* Print to stderr the element ELT. */
3533 debug_rename_elt (rename_map_elt elt)
3535 fprintf (stderr, "(");
3536 print_generic_expr (stderr, elt->old_name, 0);
3537 fprintf (stderr, ", ");
3538 print_generic_expr (stderr, elt->new_name, 0);
3539 fprintf (stderr, ")\n");
3542 /* Helper function for debug_rename_map. */
3545 debug_rename_map_1 (void **slot, void *s ATTRIBUTE_UNUSED)
3547 struct rename_map_elt *entry = (struct rename_map_elt *) *slot;
3548 debug_rename_elt (entry);
3552 /* Print to stderr all the elements of MAP. */
3555 debug_rename_map (htab_t map)
3557 htab_traverse (map, debug_rename_map_1, NULL);
3560 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */
3562 static inline rename_map_elt
3563 new_rename_map_elt (tree old_name, tree new_name)
3567 res = XNEW (struct rename_map_elt);
3568 res->old_name = old_name;
3569 res->new_name = new_name;
3574 /* Computes a hash function for database element ELT. */
3577 rename_map_elt_info (const void *elt)
3579 return htab_hash_pointer (((const struct rename_map_elt *) elt)->old_name);
3582 /* Compares database elements E1 and E2. */
3585 eq_rename_map_elts (const void *e1, const void *e2)
3587 const struct rename_map_elt *elt1 = (const struct rename_map_elt *) e1;
3588 const struct rename_map_elt *elt2 = (const struct rename_map_elt *) e2;
3590 return (elt1->old_name == elt2->old_name);
3593 /* Returns the new name associated to OLD_NAME in MAP. */
3596 get_new_name_from_old_name (htab_t map, tree old_name)
3598 struct rename_map_elt tmp;
3601 tmp.old_name = old_name;
3602 slot = htab_find_slot (map, &tmp, NO_INSERT);
3605 return ((rename_map_elt) *slot)->new_name;
3610 /* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */
3613 rename_variables_in_stmt (gimple stmt, htab_t map)
3616 use_operand_p use_p;
3618 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3620 tree use = USE_FROM_PTR (use_p);
3621 tree new_name = get_new_name_from_old_name (map, use);
3623 replace_exp (use_p, new_name);
3629 /* Returns true if SSA_NAME is a parameter of SCOP. */
3632 is_parameter (scop_p scop, tree ssa_name)
3635 VEC (name_tree, heap) *params = SCOP_PARAMS (scop);
3638 for (i = 0; VEC_iterate (name_tree, params, i, param); i++)
3639 if (param->t == ssa_name)
3645 /* Returns true if NAME is an induction variable. */
3650 return gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_PHI;
3653 static void expand_scalar_variables_stmt (gimple, basic_block, scop_p,
3656 /* Constructs a tree which only contains old_ivs and parameters. Any
3657 other variables that are defined outside BB will be eliminated by
3658 using their definitions in the constructed tree. OLD_LOOP_FATHER
3659 is the original loop that contained BB. */
3662 expand_scalar_variables_expr (tree type, tree op0, enum tree_code code,
3663 tree op1, basic_block bb, scop_p scop,
3664 loop_p old_loop_father, htab_t map)
3666 if ((TREE_CODE_CLASS (code) == tcc_constant
3667 && code == INTEGER_CST)
3668 || TREE_CODE_CLASS (code) == tcc_reference)
3671 if (TREE_CODE_CLASS (code) == tcc_unary)
3673 tree op0_type = TREE_TYPE (op0);
3674 enum tree_code op0_code = TREE_CODE (op0);
3676 expand_scalar_variables_expr (op0_type, op0, op0_code,
3677 NULL, bb, scop, old_loop_father, map);
3679 return fold_build1 (code, type, op0_expr);
3682 if (TREE_CODE_CLASS (code) == tcc_binary)
3684 tree op0_type = TREE_TYPE (op0);
3685 enum tree_code op0_code = TREE_CODE (op0);
3687 expand_scalar_variables_expr (op0_type, op0, op0_code,
3688 NULL, bb, scop, old_loop_father, map);
3689 tree op1_type = TREE_TYPE (op1);
3690 enum tree_code op1_code = TREE_CODE (op1);
3692 expand_scalar_variables_expr (op1_type, op1, op1_code,
3693 NULL, bb, scop, old_loop_father, map);
3695 return fold_build2 (code, type, op0_expr, op1_expr);
3698 if (code == SSA_NAME)
3702 enum tree_code subcode;
3704 if (is_parameter (scop, op0)
3706 return get_new_name_from_old_name (map, op0);
3708 def_stmt = SSA_NAME_DEF_STMT (op0);
3710 if (gimple_bb (def_stmt) == bb)
3712 /* If the defining statement is in the basic block already
3713 we do not need to create a new expression for it, we
3714 only need to ensure its operands are expanded. */
3715 expand_scalar_variables_stmt (def_stmt, bb, scop,
3716 old_loop_father, map);
3717 return get_new_name_from_old_name (map, op0);
3722 if (gimple_code (def_stmt) != GIMPLE_ASSIGN
3723 || !bb_in_scop_p (gimple_bb (def_stmt), scop))
3724 return get_new_name_from_old_name (map, op0);
3726 var0 = gimple_assign_rhs1 (def_stmt);
3727 subcode = gimple_assign_rhs_code (def_stmt);
3728 var1 = gimple_assign_rhs2 (def_stmt);
3730 return expand_scalar_variables_expr (type, var0, subcode, var1,
3731 bb, scop, old_loop_father, map);
3739 /* Replicates any uses of non-parameters and non-old-ivs variablesthat
3740 are defind outside BB with code that is inserted in BB.
3741 OLD_LOOP_FATHER is the original loop that contained STMT. */
3744 expand_scalar_variables_stmt (gimple stmt, basic_block bb, scop_p scop,
3745 loop_p old_loop_father, htab_t map)
3748 use_operand_p use_p;
3750 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3752 tree use = USE_FROM_PTR (use_p);
3753 tree type = TREE_TYPE (use);
3754 enum tree_code code = TREE_CODE (use);
3755 tree use_expr = expand_scalar_variables_expr (type, use, code, NULL, bb,
3756 scop, old_loop_father, map);
3757 if (use_expr != use)
3759 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3761 force_gimple_operand_gsi (&gsi, use_expr, true, NULL,
3762 true, GSI_NEW_STMT);
3763 replace_exp (use_p, new_use);
3770 /* Copies the definitions outside of BB of variables that are not
3771 induction variables nor parameters. BB must only contain
3772 "external" references to these types of variables. OLD_LOOP_FATHER
3773 is the original loop that contained BB. */
3776 expand_scalar_variables (basic_block bb, scop_p scop,
3777 loop_p old_loop_father, htab_t map)
3779 gimple_stmt_iterator gsi;
3781 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3783 gimple stmt = gsi_stmt (gsi);
3784 expand_scalar_variables_stmt (stmt, bb, scop, old_loop_father, map);
3789 /* Rename all the SSA_NAMEs from block BB that appear in IVSTACK in
3790 terms of new induction variables. OLD is the original loop that
3794 rename_variables (basic_block bb, htab_t map)
3796 gimple_stmt_iterator gsi;
3798 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3799 rename_variables_in_stmt (gsi_stmt (gsi), map);
3802 /* Remove condition from BB. */
3805 remove_condition (basic_block bb)
3807 gimple last = last_stmt (bb);
3809 if (last && gimple_code (last) == GIMPLE_COND)
3811 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3812 gsi_remove (&gsi, true);
3816 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */
3819 get_true_edge_from_guard_bb (basic_block bb)
3824 FOR_EACH_EDGE (e, ei, bb->succs)
3825 if (e->flags & EDGE_TRUE_VALUE)
3832 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag cleared. */
3835 get_false_edge_from_guard_bb (basic_block bb)
3840 FOR_EACH_EDGE (e, ei, bb->succs)
3841 if (!(e->flags & EDGE_TRUE_VALUE))
3848 /* Inserts in MAP a tuple (OLD_NAME, NEW_NAME) for the induction
3849 variables of the loops around GBB in SCOP, i.e. GBB_LOOPS.
3850 NEW_NAME is obtained from IVSTACK. IVSTACK has the same stack
3851 ordering as GBB_LOOPS. */
3854 build_iv_mapping (loop_iv_stack ivstack, htab_t map, gbb_p gbb, scop_p scop)
3860 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++)
3862 struct rename_map_elt tmp;
3864 if (!flow_bb_inside_loop_p (iv->loop, GBB_BB (gbb)))
3867 tmp.old_name = iv->t;
3868 slot = htab_find_slot (map, &tmp, INSERT);
3872 tree new_name = loop_iv_stack_get_iv (ivstack,
3873 gbb_loop_index (gbb, iv->loop));
3874 *slot = new_rename_map_elt (iv->t, new_name);
3879 /* Register in MAP the tuple (old_name, new_name). */
3882 register_old_new_names (htab_t map, tree old_name, tree new_name)
3884 struct rename_map_elt tmp;
3887 tmp.old_name = old_name;
3888 slot = htab_find_slot (map, &tmp, INSERT);
3891 *slot = new_rename_map_elt (old_name, new_name);
3894 /* Create a duplicate of the basic block BB. NOTE: This does not
3895 preserve SSA form. */
3898 graphite_copy_stmts_from_block (basic_block bb, basic_block new_bb, htab_t map)
3900 gimple_stmt_iterator gsi, gsi_tgt;
3902 gsi_tgt = gsi_start_bb (new_bb);
3903 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3905 def_operand_p def_p;
3906 ssa_op_iter op_iter;
3908 gimple stmt = gsi_stmt (gsi);
3911 if (gimple_code (stmt) == GIMPLE_LABEL)
3914 /* Create a new copy of STMT and duplicate STMT's virtual
3916 copy = gimple_copy (stmt);
3917 gsi_insert_after (&gsi_tgt, copy, GSI_NEW_STMT);
3918 mark_symbols_for_renaming (copy);
3920 region = lookup_stmt_eh_region (stmt);
3922 add_stmt_to_eh_region (copy, region);
3923 gimple_duplicate_stmt_histograms (cfun, copy, cfun, stmt);
3925 /* Create new names for all the definitions created by COPY and
3926 add replacement mappings for each new name. */
3927 FOR_EACH_SSA_DEF_OPERAND (def_p, copy, op_iter, SSA_OP_DEF)
3929 tree old_name = DEF_FROM_PTR (def_p);
3930 tree new_name = create_new_def_for (old_name, copy, def_p);
3931 register_old_new_names (map, old_name, new_name);
3936 /* Copies BB and includes in the copied BB all the statements that can
3937 be reached following the use-def chains from the memory accesses,
3938 and returns the next edge following this new block. */
3941 copy_bb_and_scalar_dependences (basic_block bb, scop_p scop,
3942 loop_p context_loop,
3943 edge next_e, htab_t map)
3945 basic_block new_bb = split_edge (next_e);
3947 next_e = single_succ_edge (new_bb);
3948 graphite_copy_stmts_from_block (bb, new_bb, map);
3949 remove_condition (new_bb);
3950 rename_variables (new_bb, map);
3951 remove_phi_nodes (new_bb);
3952 expand_scalar_variables (new_bb, scop, context_loop, map);
3957 /* Translates a CLAST statement STMT to GCC representation. NEXT_E is
3958 the edge where new generated code should be attached. BB_EXIT is the last
3959 basic block that defines the scope of code generation. CONTEXT_LOOP is the
3960 loop in which the generated code will be placed (might be NULL). */
3963 translate_clast (scop_p scop, struct loop *context_loop,
3964 struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack)
3969 if (CLAST_STMT_IS_A (stmt, stmt_root))
3970 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3972 if (CLAST_STMT_IS_A (stmt, stmt_user))
3975 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
3976 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
3978 if (GBB_BB (gbb) == ENTRY_BLOCK_PTR)
3981 map = htab_create (10, rename_map_elt_info, eq_rename_map_elts, free);
3982 loop_iv_stack_patch_for_consts (ivstack, (struct clast_user_stmt *) stmt);
3983 build_iv_mapping (ivstack, map, gbb, scop);
3984 next_e = copy_bb_and_scalar_dependences (GBB_BB (gbb), scop,
3985 context_loop, next_e, map);
3987 loop_iv_stack_remove_constants (ivstack);
3988 update_ssa (TODO_update_ssa);
3989 recompute_all_dominators ();
3991 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
3994 if (CLAST_STMT_IS_A (stmt, stmt_for))
3997 = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt,
3998 ivstack, context_loop ? context_loop
4000 edge last_e = single_exit (loop);
4002 next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body,
4003 single_pred_edge (loop->latch), ivstack);
4004 redirect_edge_succ_nodup (next_e, loop->latch);
4006 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
4007 loop_iv_stack_pop (ivstack);
4008 last_e = single_succ_edge (split_edge (last_e));
4009 recompute_all_dominators ();
4011 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
4014 if (CLAST_STMT_IS_A (stmt, stmt_guard))
4016 edge last_e = graphite_create_new_guard (scop, next_e,
4017 ((struct clast_guard *) stmt),
4019 edge true_e = get_true_edge_from_guard_bb (next_e->dest);
4020 next_e = translate_clast (scop, context_loop,
4021 ((struct clast_guard *) stmt)->then,
4024 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack);
4027 if (CLAST_STMT_IS_A (stmt, stmt_block))
4029 next_e = translate_clast (scop, context_loop,
4030 ((struct clast_block *) stmt)->body,
4033 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack);
4039 /* Free the SCATTERING domain list. */
4042 free_scattering (CloogDomainList *scattering)
4046 CloogDomain *dom = cloog_domain (scattering);
4047 CloogDomainList *next = cloog_next_domain (scattering);
4049 cloog_domain_free (dom);
4055 /* Build cloog program for SCoP. */
4058 build_cloog_prog (scop_p scop)
4061 int max_nb_loops = scop_max_loop_depth (scop);
4063 CloogLoop *loop_list = NULL;
4064 CloogBlockList *block_list = NULL;
4065 CloogDomainList *scattering = NULL;
4066 CloogProgram *prog = SCOP_PROG (scop);
4067 int nbs = 2 * max_nb_loops + 1;
4068 int *scaldims = (int *) xmalloc (nbs * (sizeof (int)));
4070 cloog_program_set_nb_scattdims (prog, nbs);
4071 initialize_cloog_names (scop);
4073 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
4075 /* Build new block. */
4076 CloogMatrix *domain = GBB_DOMAIN (gbb);
4077 CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index);
4078 CloogBlock *block = cloog_block_alloc (stmt, 0, NULL,
4079 nb_loops_around_gb (gbb));
4080 cloog_statement_set_usr (stmt, gbb);
4082 /* Add empty domain to all bbs, which do not yet have a domain, as they
4083 are not part of any loop. */
4086 domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2);
4087 GBB_DOMAIN (gbb) = domain;
4090 /* Build loop list. */
4092 CloogLoop *new_loop_list = cloog_loop_malloc ();
4093 cloog_loop_set_next (new_loop_list, loop_list);
4094 cloog_loop_set_domain (new_loop_list,
4095 cloog_domain_matrix2domain (domain));
4096 cloog_loop_set_block (new_loop_list, block);
4097 loop_list = new_loop_list;
4100 /* Build block list. */
4102 CloogBlockList *new_block_list = cloog_block_list_malloc ();
4104 cloog_block_list_set_next (new_block_list, block_list);
4105 cloog_block_list_set_block (new_block_list, block);
4106 block_list = new_block_list;
4109 /* Build scattering list. */
4111 /* XXX: Replace with cloog_domain_list_alloc(), when available. */
4112 CloogDomainList *new_scattering
4113 = (CloogDomainList *) xmalloc (sizeof (CloogDomainList));
4114 CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs);
4116 cloog_set_next_domain (new_scattering, scattering);
4117 cloog_set_domain (new_scattering,
4118 cloog_domain_matrix2domain (scat_mat));
4119 scattering = new_scattering;
4120 cloog_matrix_free (scat_mat);
4124 cloog_program_set_loop (prog, loop_list);
4125 cloog_program_set_blocklist (prog, block_list);
4127 for (i = 0; i < nbs; i++)
4130 cloog_program_set_scaldims (prog, scaldims);
4132 /* Extract scalar dimensions to simplify the code generation problem. */
4133 cloog_program_extract_scalars (prog, scattering);
4135 /* Apply scattering. */
4136 cloog_program_scatter (prog, scattering);
4137 free_scattering (scattering);
4139 /* Iterators corresponding to scalar dimensions have to be extracted. */
4140 cloog_names_scalarize (cloog_program_names (prog), nbs,
4141 cloog_program_scaldims (prog));
4143 /* Free blocklist. */
4145 CloogBlockList *next = cloog_program_blocklist (prog);
4149 CloogBlockList *toDelete = next;
4150 next = cloog_block_list_next (next);
4151 cloog_block_list_set_next (toDelete, NULL);
4152 cloog_block_list_set_block (toDelete, NULL);
4153 cloog_block_list_free (toDelete);
4155 cloog_program_set_blocklist (prog, NULL);
4159 /* Return the options that will be used in GLOOG. */
4161 static CloogOptions *
4162 set_cloog_options (void)
4164 CloogOptions *options = cloog_options_malloc ();
4166 /* Change cloog output language to C. If we do use FORTRAN instead, cloog
4167 will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if
4168 we pass an incomplete program to cloog. */
4169 options->language = LANGUAGE_C;
4171 /* Enable complex equality spreading: removes dummy statements
4172 (assignments) in the generated code which repeats the
4173 substitution equations for statements. This is useless for
4177 /* Enable C pretty-printing mode: normalizes the substitution
4178 equations for statements. */
4181 /* Allow cloog to build strides with a stride width different to one.
4182 This example has stride = 4:
4184 for (i = 0; i < 20; i += 4)
4186 options->strides = 1;
4188 /* Disable optimizations and make cloog generate source code closer to the
4189 input. This is useful for debugging, but later we want the optimized
4192 XXX: We can not disable optimizations, as loop blocking is not working
4197 options->l = INT_MAX;
4203 /* Prints STMT to STDERR. */
4206 debug_clast_stmt (struct clast_stmt *stmt)
4208 CloogOptions *options = set_cloog_options ();
4210 pprint (stderr, stmt, 0, options);
4213 /* Find the right transform for the SCOP, and return a Cloog AST
4214 representing the new form of the program. */
4216 static struct clast_stmt *
4217 find_transform (scop_p scop)
4219 struct clast_stmt *stmt;
4220 CloogOptions *options = set_cloog_options ();
4222 /* Connect new cloog prog generation to graphite. */
4223 build_cloog_prog (scop);
4225 if (dump_file && (dump_flags & TDF_DETAILS))
4227 fprintf (dump_file, "Cloog Input [\n");
4228 cloog_program_print (dump_file, SCOP_PROG(scop));
4229 fprintf (dump_file, "]\n");
4232 SCOP_PROG (scop) = cloog_program_generate (SCOP_PROG (scop), options);
4233 stmt = cloog_clast_create (SCOP_PROG (scop), options);
4235 if (dump_file && (dump_flags & TDF_DETAILS))
4237 fprintf (dump_file, "Cloog Output[\n");
4238 pprint (dump_file, stmt, 0, options);
4239 cloog_program_dump_cloog (dump_file, SCOP_PROG (scop));
4240 fprintf (dump_file, "]\n");
4243 cloog_options_free (options);
4247 /* Returns true when it is possible to generate code for this STMT.
4248 For the moment we cannot generate code when Cloog decides to
4249 duplicate a statement, as we do not do a copy, but a move.
4250 USED_BASIC_BLOCKS records the blocks that have already been seen.
4251 We return false if we have to generate code twice for the same
4255 can_generate_code_stmt (struct clast_stmt *stmt,
4256 struct pointer_set_t *used_basic_blocks)
4261 if (CLAST_STMT_IS_A (stmt, stmt_root))
4262 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4264 if (CLAST_STMT_IS_A (stmt, stmt_user))
4266 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement;
4267 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs);
4269 if (pointer_set_contains (used_basic_blocks, gbb))
4271 pointer_set_insert (used_basic_blocks, gbb);
4272 return can_generate_code_stmt (stmt->next, used_basic_blocks);
4275 if (CLAST_STMT_IS_A (stmt, stmt_for))
4276 return can_generate_code_stmt (((struct clast_for *) stmt)->body,
4278 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4280 if (CLAST_STMT_IS_A (stmt, stmt_guard))
4281 return can_generate_code_stmt (((struct clast_guard *) stmt)->then,
4284 if (CLAST_STMT_IS_A (stmt, stmt_block))
4285 return can_generate_code_stmt (((struct clast_block *) stmt)->body,
4287 && can_generate_code_stmt (stmt->next, used_basic_blocks);
4292 /* Returns true when it is possible to generate code for this STMT. */
4295 can_generate_code (struct clast_stmt *stmt)
4298 struct pointer_set_t *used_basic_blocks = pointer_set_create ();
4300 result = can_generate_code_stmt (stmt, used_basic_blocks);
4301 pointer_set_destroy (used_basic_blocks);
4305 /* Remove from the CFG the REGION. */
4308 remove_sese_region (sese region)
4310 VEC (basic_block, heap) *bbs = NULL;
4311 basic_block entry_bb = SESE_ENTRY (region)->dest;
4312 basic_block exit_bb = SESE_EXIT (region)->dest;
4316 VEC_safe_push (basic_block, heap, bbs, entry_bb);
4317 gather_blocks_in_sese_region (entry_bb, exit_bb, &bbs);
4319 for (i = 0; VEC_iterate (basic_block, bbs, i, bb); i++)
4320 delete_basic_block (bb);
4322 VEC_free (basic_block, heap, bbs);
4325 typedef struct ifsese {
4332 if_region_entry (ifsese if_region)
4334 return SESE_ENTRY (if_region->region);
4338 if_region_exit (ifsese if_region)
4340 return SESE_EXIT (if_region->region);
4343 static inline basic_block
4344 if_region_get_condition_block (ifsese if_region)
4346 return if_region_entry (if_region)->dest;
4350 if_region_set_false_region (ifsese if_region, sese region)
4352 basic_block condition = if_region_get_condition_block (if_region);
4353 edge false_edge = get_false_edge_from_guard_bb (condition);
4354 edge entry_region = SESE_ENTRY (region);
4355 edge exit_region = SESE_EXIT (region);
4356 basic_block before_region = entry_region->src;
4357 basic_block last_in_region = exit_region->src;
4358 void **slot = htab_find_slot_with_hash (current_loops->exits, exit_region,
4359 htab_hash_pointer (exit_region),
4362 entry_region->flags = false_edge->flags;
4363 false_edge->flags = exit_region->flags;
4365 redirect_edge_pred (entry_region, condition);
4366 redirect_edge_pred (exit_region, before_region);
4367 redirect_edge_pred (false_edge, last_in_region);
4369 exit_region->flags = EDGE_FALLTHRU;
4370 recompute_all_dominators ();
4372 SESE_EXIT (region) = single_succ_edge (false_edge->dest);
4373 if_region->false_region = region;
4377 struct loop_exit *loop_exit = GGC_CNEW (struct loop_exit);
4379 memcpy (loop_exit, *((struct loop_exit **) slot), sizeof (struct loop_exit));
4380 htab_clear_slot (current_loops->exits, slot);
4382 slot = htab_find_slot_with_hash (current_loops->exits, false_edge,
4383 htab_hash_pointer (false_edge),
4385 loop_exit->e = false_edge;
4387 false_edge->src->loop_father->exits->next = loop_exit;
4392 create_if_region_on_edge (edge entry, tree condition)
4396 sese sese_region = GGC_NEW (struct sese);
4397 sese true_region = GGC_NEW (struct sese);
4398 sese false_region = GGC_NEW (struct sese);
4399 ifsese if_region = GGC_NEW (struct ifsese);
4400 edge exit = create_empty_if_region_on_edge (entry, condition);
4402 if_region->region = sese_region;
4403 if_region->region->entry = entry;
4404 if_region->region->exit = exit;
4406 FOR_EACH_EDGE (e, ei, entry->dest->succs)
4408 if (e->flags & EDGE_TRUE_VALUE)
4410 true_region->entry = e;
4411 true_region->exit = single_succ_edge (e->dest);
4412 if_region->true_region = true_region;
4414 else if (e->flags & EDGE_FALSE_VALUE)
4416 false_region->entry = e;
4417 false_region->exit = single_succ_edge (e->dest);
4418 if_region->false_region = false_region;
4425 /* Moves REGION in a condition expression:
4433 move_sese_in_condition (sese region)
4435 basic_block pred_block = split_edge (SESE_ENTRY (region));
4436 ifsese if_region = NULL;
4438 SESE_ENTRY (region) = single_succ_edge (pred_block);
4439 if_region = create_if_region_on_edge (single_pred_edge (pred_block), integer_one_node);
4440 if_region_set_false_region (if_region, region);
4445 /* Returns true when BB is in REGION. */
4448 bb_in_sese_p (basic_block bb, sese region)
4450 return (dominated_by_p (CDI_DOMINATORS, bb, SESE_ENTRY (region)->src)
4451 && dominated_by_p (CDI_POST_DOMINATORS, bb, SESE_EXIT (region)->dest));
4454 /* For USE in BB, if it is used outside of the REGION it is defined in,
4455 mark it for rewrite. Record basic block BB where it is used
4456 to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap. */
4459 sese_find_uses_to_rename_use (sese region, basic_block bb, tree use,
4460 bitmap *use_blocks, bitmap need_phis)
4465 if (TREE_CODE (use) != SSA_NAME)
4468 ver = SSA_NAME_VERSION (use);
4469 def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
4471 || !bb_in_sese_p (def_bb, region)
4472 || bb_in_sese_p (bb, region))
4475 if (!use_blocks[ver])
4476 use_blocks[ver] = BITMAP_ALLOC (NULL);
4477 bitmap_set_bit (use_blocks[ver], bb->index);
4479 bitmap_set_bit (need_phis, ver);
4482 /* Marks names that are used in BB and outside of the loop they are
4483 defined in for rewrite. Records the set of blocks in that the ssa
4484 names are defined to USE_BLOCKS. Record the SSA names that will
4485 need exit PHIs in NEED_PHIS. */
4488 sese_find_uses_to_rename_bb (sese region, basic_block bb,
4489 bitmap *use_blocks, bitmap need_phis)
4491 gimple_stmt_iterator bsi;
4497 FOR_EACH_EDGE (e, ei, bb->succs)
4498 for (bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi); gsi_next (&bsi))
4499 sese_find_uses_to_rename_use (region, bb,
4500 PHI_ARG_DEF_FROM_EDGE (gsi_stmt (bsi), e),
4501 use_blocks, need_phis);
4503 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4504 FOR_EACH_SSA_TREE_OPERAND (var, gsi_stmt (bsi), iter, SSA_OP_ALL_USES)
4505 sese_find_uses_to_rename_use (region, bb, var, use_blocks, need_phis);
4508 /* Add exit phis for the USE on EXIT. */
4511 sese_add_exit_phis_edge (basic_block exit, tree use, edge false_e, edge true_e)
4513 gimple phi = create_phi_node (use, exit);
4515 create_new_def_for (gimple_phi_result (phi), phi,
4516 gimple_phi_result_ptr (phi));
4517 add_phi_arg (phi, use, false_e);
4518 add_phi_arg (phi, integer_zero_node, true_e);
4521 /* Add phi nodes for VAR that is used in LIVEIN. Phi nodes are
4522 inserted in block WHERE. */
4525 sese_add_exit_phis_var (basic_block where, tree var, bitmap livein,
4526 edge false_e, edge true_e)
4529 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
4531 if (is_gimple_reg (var))
4532 bitmap_clear_bit (livein, def_bb->index);
4534 bitmap_set_bit (livein, def_bb->index);
4536 def = BITMAP_ALLOC (NULL);
4537 bitmap_set_bit (def, def_bb->index);
4538 compute_global_livein (livein, def);
4541 sese_add_exit_phis_edge (where, var, false_e, true_e);
4544 /* Insert in the block WHERE phi nodes for variables defined in REGION
4545 and used outside the REGION. */
4548 rewrite_into_sese_closed_ssa (sese region, basic_block where,
4549 edge false_e, edge true_e)
4554 bitmap names_to_rename = BITMAP_ALLOC (NULL);
4555 unsigned old_num_ssa_names = num_ssa_names;
4556 bitmap *use_blocks = XCNEWVEC (bitmap, old_num_ssa_names);
4558 update_ssa (TODO_update_ssa);
4561 sese_find_uses_to_rename_bb (region, bb, use_blocks, names_to_rename);
4563 EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
4564 sese_add_exit_phis_var (where, ssa_name (i), use_blocks[i],
4567 update_ssa (TODO_update_ssa);
4569 for (i = 0; i < old_num_ssa_names; i++)
4570 BITMAP_FREE (use_blocks[i]);
4573 BITMAP_FREE (names_to_rename);
4576 /* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
4580 gloog (scop_p scop, struct clast_stmt *stmt)
4582 edge new_scop_exit_edge = NULL;
4583 VEC (iv_stack_entry_p, heap) *ivstack = VEC_alloc (iv_stack_entry_p, heap,
4585 loop_p context_loop;
4586 ifsese if_region = NULL;
4588 if (!can_generate_code (stmt))
4590 cloog_clast_free (stmt);
4594 if_region = move_sese_in_condition (SCOP_REGION (scop));
4595 rewrite_into_sese_closed_ssa (SCOP_REGION (scop),
4596 if_region->region->exit->src,
4597 if_region->false_region->exit,
4598 if_region->true_region->exit);
4600 context_loop = SESE_ENTRY (SCOP_REGION (scop))->src->loop_father;
4601 new_scop_exit_edge = translate_clast (scop, context_loop,
4602 stmt, if_region->true_region->entry,
4605 cleanup_tree_cfg ();
4606 recompute_all_dominators ();
4608 free_loop_iv_stack (&ivstack);
4609 cloog_clast_free (stmt);
4612 /* Returns the number of data references in SCOP. */
4615 nb_data_refs_in_scop (scop_p scop)
4621 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
4622 res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb));
4627 /* Move the loop at index LOOP and insert it before index NEW_LOOP_POS.
4628 This transformartion is only valid, if the loop nest between i and k is
4629 perfectly nested. Therefore we do not need to change the static schedule.
4633 for (i = 0; i < 50; i++)
4635 for (k = 5; k < 100; k++)
4638 To move k before i use:
4640 graphite_trans_bb_move_loop (A, 2, 0)
4642 for (k = 5; k < 100; k++)
4643 for (i = 0; i < 50; i++)
4649 graphite_trans_bb_move_loop (A, 0, 2)
4651 This function does not check the validity of interchanging loops.
4652 This should be checked before calling this function. */
4655 graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
4658 CloogMatrix *domain = GBB_DOMAIN (gb);
4662 gcc_assert (loop < gbb_nb_loops (gb)
4663 && new_loop_pos < gbb_nb_loops (gb));
4665 /* Update LOOPS vector. */
4666 tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
4667 VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
4668 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
4670 /* Move the domain columns. */
4671 if (loop < new_loop_pos)
4672 for (row = 0; row < domain->NbRows; row++)
4676 value_assign (tmp, domain->p[row][loop + 1]);
4678 for (j = loop ; j < new_loop_pos - 1; j++)
4679 value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
4681 value_assign (domain->p[row][new_loop_pos], tmp);
4685 for (row = 0; row < domain->NbRows; row++)
4689 value_assign (tmp, domain->p[row][loop + 1]);
4691 for (j = loop ; j > new_loop_pos; j--)
4692 value_assign (domain->p[row][j + 1], domain->p[row][j]);
4694 value_assign (domain->p[row][new_loop_pos + 1], tmp);
4699 /* Get the index of the column representing constants in the DOMAIN
4703 const_column_index (CloogMatrix *domain)
4705 return domain->NbColumns - 1;
4709 /* Get the first index that is positive or negative, determined
4710 following the value of POSITIVE, in matrix DOMAIN in COLUMN. */
4713 get_first_matching_sign_row_index (CloogMatrix *domain, int column,
4718 for (row = 0; row < domain->NbRows; row++)
4720 int val = value_get_si (domain->p[row][column]);
4722 if (val > 0 && positive)
4725 else if (val < 0 && !positive)
4732 /* Get the lower bound of COLUMN in matrix DOMAIN. */
4735 get_lower_bound_row (CloogMatrix *domain, int column)
4737 return get_first_matching_sign_row_index (domain, column, true);
4740 /* Get the upper bound of COLUMN in matrix DOMAIN. */
4743 get_upper_bound_row (CloogMatrix *domain, int column)
4745 return get_first_matching_sign_row_index (domain, column, false);
4748 /* Get the lower bound of LOOP. */
4751 get_lower_bound (CloogMatrix *domain, int loop, Value lower_bound_result)
4753 int lower_bound_row = get_lower_bound_row (domain, loop);
4754 value_assign (lower_bound_result,
4755 domain->p[lower_bound_row][const_column_index(domain)]);
4758 /* Get the upper bound of LOOP. */
4761 get_upper_bound (CloogMatrix *domain, int loop, Value upper_bound_result)
4763 int upper_bound_row = get_upper_bound_row (domain, loop);
4764 value_assign (upper_bound_result,
4765 domain->p[upper_bound_row][const_column_index(domain)]);
4768 /* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
4769 Always valid, but not always a performance improvement. */
4772 graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
4776 CloogMatrix *domain = GBB_DOMAIN (gb);
4777 CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
4778 domain->NbColumns + 1);
4780 int col_loop_old = loop_depth + 2;
4781 int col_loop_strip = col_loop_old - 1;
4783 Value old_lower_bound;
4784 Value old_upper_bound;
4786 gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1);
4788 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL);
4790 GBB_DOMAIN (gb) = new_domain;
4793 nrows = 4, ncols = 4
4802 for (row = 0; row < domain->NbRows; row++)
4803 for (col = 0; col < domain->NbColumns; col++)
4804 if (col <= loop_depth)
4805 value_assign (new_domain->p[row][col], domain->p[row][col]);
4807 value_assign (new_domain->p[row][col + 1], domain->p[row][col]);
4811 nrows = 6, ncols = 5
4823 row = domain->NbRows;
4825 /* Add outer loop. */
4826 value_init (old_lower_bound);
4827 value_init (old_upper_bound);
4828 get_lower_bound (new_domain, col_loop_old, old_lower_bound);
4829 get_upper_bound (new_domain, col_loop_old, old_upper_bound);
4831 /* Set Lower Bound */
4832 value_set_si (new_domain->p[row][0], 1);
4833 value_set_si (new_domain->p[row][col_loop_strip], 1);
4834 value_assign (new_domain->p[row][const_column_index (new_domain)],
4836 value_clear (old_lower_bound);
4846 1 0 0 -1 99 | copy old lower bound
4853 Value new_upper_bound;
4854 Value strip_size_value;
4856 value_init (new_upper_bound);
4857 value_init (strip_size_value);
4858 value_set_si (strip_size_value, (int) stride);
4860 value_pdivision (new_upper_bound, old_upper_bound, strip_size_value);
4861 value_add_int (new_upper_bound, new_upper_bound, 1);
4863 /* Set Upper Bound */
4864 value_set_si (new_domain->p[row][0], 1);
4865 value_set_si (new_domain->p[row][col_loop_strip], -1);
4866 value_assign (new_domain->p[row][const_column_index (new_domain)],
4869 value_clear (strip_size_value);
4870 value_clear (old_upper_bound);
4871 value_clear (new_upper_bound);
4882 1 0 -1 0 25 (divide old upper bound with stride)
4887 row = get_lower_bound_row (new_domain, col_loop_old);
4888 /* Add local variable to keep linear representation. */
4889 value_set_si (new_domain->p[row][0], 1);
4890 value_set_si (new_domain->p[row][const_column_index (new_domain)],0);
4891 value_set_si (new_domain->p[row][col_loop_old], 1);
4892 value_set_si (new_domain->p[row][col_loop_strip], -1*((int)stride));
4903 1 0 -1 0 25 (divide old upper bound with stride)
4908 row = new_domain->NbRows-1;
4910 value_set_si (new_domain->p[row][0], 1);
4911 value_set_si (new_domain->p[row][col_loop_old], -1);
4912 value_set_si (new_domain->p[row][col_loop_strip], stride);
4913 value_set_si (new_domain->p[row][const_column_index (new_domain)],
4922 1 0 -4 1 0 j >= 4*jj
4925 1 0 -1 0 25 25 >= jj
4926 0 0 4 -1 3 jj+3 >= j
4929 cloog_matrix_free (domain);
4931 /* Update static schedule. */
4934 int nb_loops = gbb_nb_loops (gb);
4935 lambda_vector new_schedule = lambda_vector_new (nb_loops + 1);
4937 for (i = 0; i <= loop_depth; i++)
4938 new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i];
4940 for (i = loop_depth + 1; i <= nb_loops - 2; i++)
4941 new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i];
4943 GBB_STATIC_SCHEDULE (gb) = new_schedule;
4947 /* Returns true when the strip mining of LOOP_INDEX by STRIDE is
4948 profitable or undecidable. GB is the statement around which the
4949 loops will be strip mined. */
4952 strip_mine_profitable_p (graphite_bb_p gb, int stride,
4961 loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index);
4962 exit = single_exit (loop);
4964 niter = find_loop_niter (loop, &exit);
4965 if (niter == chrec_dont_know
4966 || TREE_CODE (niter) != INTEGER_CST)
4969 niter_val = int_cst_value (niter);
4971 if (niter_val < stride)
4974 if (dump_file && (dump_flags & TDF_DETAILS))
4976 fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:",
4978 fprintf (dump_file, "number of iterations is too low.\n");
4985 /* Determines when the interchange of LOOP_A and LOOP_B belonging to
4989 is_interchange_valid (scop_p scop, int loop_a, int loop_b)
4992 VEC (ddr_p, heap) *dependence_relations;
4993 VEC (data_reference_p, heap) *datarefs;
4995 struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a);
4996 int depth = perfect_loop_nest_depth (nest);
4997 lambda_trans_matrix trans;
4999 gcc_assert (loop_a < loop_b);
5001 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
5002 datarefs = VEC_alloc (data_reference_p, heap, 10);
5004 if (!compute_data_dependences_for_loop (nest, true, &datarefs,
5005 &dependence_relations))
5008 if (dump_file && (dump_flags & TDF_DETAILS))
5009 dump_ddrs (dump_file, dependence_relations);
5011 trans = lambda_trans_matrix_new (depth, depth);
5012 lambda_matrix_id (LTM_MATRIX (trans), depth);
5014 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
5016 if (!lambda_transform_legal_p (trans, depth, dependence_relations))
5018 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a);
5024 free_dependence_relations (dependence_relations);
5025 free_data_refs (datarefs);
5029 /* Loop block the LOOPS innermost loops of GB with stride size STRIDE.
5033 for (i = 0; i <= 50; i++=4)
5034 for (k = 0; k <= 100; k++=4)
5035 for (l = 0; l <= 200; l++=4)
5038 To strip mine the two inner most loops with stride = 4 call:
5040 graphite_trans_bb_block (A, 4, 2)
5042 for (i = 0; i <= 50; i++)
5043 for (kk = 0; kk <= 100; kk+=4)
5044 for (ll = 0; ll <= 200; ll+=4)
5045 for (k = kk; k <= min (100, kk + 3); k++)
5046 for (l = ll; l <= min (200, ll + 3); l++)
5051 graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops)
5054 int nb_loops = gbb_nb_loops (gb);
5055 int start = nb_loops - loops;
5056 scop_p scop = GBB_SCOP (gb);
5058 gcc_assert (scop_contains_loop (scop, gbb_loop (gb)));
5060 for (i = start ; i < nb_loops; i++)
5061 for (j = i + 1; j < nb_loops; j++)
5062 if (!is_interchange_valid (scop, i, j))
5064 if (dump_file && (dump_flags & TDF_DETAILS))
5066 "\nInterchange not valid for loops %d and %d:\n", i, j);
5069 else if (dump_file && (dump_flags & TDF_DETAILS))
5071 "\nInterchange valid for loops %d and %d:\n", i, j);
5073 /* Check if strip mining is profitable for every loop. */
5074 for (i = 0; i < nb_loops - start; i++)
5075 if (!strip_mine_profitable_p (gb, stride, start + i))
5078 /* Strip mine loops. */
5079 for (i = 0; i < nb_loops - start; i++)
5080 graphite_trans_bb_strip_mine (gb, start + 2 * i, stride);
5082 /* Interchange loops. */
5083 for (i = 1; i < nb_loops - start; i++)
5084 graphite_trans_bb_move_loop (gb, start + 2 * i, start + i);
5089 /* Loop block LOOPS innermost loops of a loop nest. BBS represent the
5090 basic blocks that belong to the loop nest to be blocked. */
5093 graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops)
5097 bool transform_done = false;
5099 /* TODO: - Calculate the stride size automatically. */
5100 int stride_size = 64;
5102 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++)
5103 transform_done |= graphite_trans_bb_block (gb, stride_size, loops);
5105 return transform_done;
5108 /* Loop block all basic blocks of SCOP. Return false when the
5109 transform is not performed. */
5112 graphite_trans_scop_block (scop_p scop)
5118 bool perfect = true;
5119 bool transform_done = false;
5121 VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3);
5122 int max_schedule = scop_max_loop_depth (scop) + 1;
5123 lambda_vector last_schedule = lambda_vector_new (max_schedule);
5125 if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0)
5128 /* Get the data of the first bb. */
5129 gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0);
5130 last_nb_loops = gbb_nb_loops (gb);
5131 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5133 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5135 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
5137 /* We did the first bb before. */
5141 nb_loops = gbb_nb_loops (gb);
5143 /* If the number of loops is unchanged and only the last element of the
5144 schedule changes, we stay in the loop nest. */
5145 if (nb_loops == last_nb_loops
5146 && (last_schedule [nb_loops + 1]
5147 != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1]))
5149 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5153 /* Otherwise, we left the innermost loop. So check, if the last bb was in
5154 a perfect loop nest and how many loops are contained in this perfect
5157 Count the number of zeros from the end of the schedule. They are the
5158 number of surrounding loops.
5161 last_bb 2 3 2 0 0 0 0 3
5165 last_bb 2 3 2 0 0 0 0 3
5169 If there is no zero, there were other bbs in outer loops and the loop
5170 nest is not perfect. */
5171 for (j = last_nb_loops - 1; j >= 0; j--)
5173 if (last_schedule [j] != 0
5174 || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1))
5183 /* Found perfect loop nest. */
5184 if (perfect && last_nb_loops - j >= 2)
5185 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5187 /* Check if we start with a new loop.
5191 last_bb 2 3 2 0 0 0 0 3
5194 Here we start with the loop "2 3 2 0 0 1"
5196 last_bb 2 3 2 0 0 0 0 3
5199 But here not, so the loop nest can never be perfect. */
5201 perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0);
5203 /* Update the last_bb infos. We do not do that for the bbs in the same
5204 loop, as the data we use is not changed. */
5205 last_nb_loops = nb_loops;
5206 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule,
5208 VEC_truncate (graphite_bb_p, bbs, 0);
5209 VEC_safe_push (graphite_bb_p, heap, bbs, gb);
5212 /* Check if the last loop nest was perfect. It is the same check as above,
5213 but the comparison with the next bb is missing. */
5214 for (j = last_nb_loops - 1; j >= 0; j--)
5215 if (last_schedule [j] != 0)
5223 /* Found perfect loop nest. */
5224 if (last_nb_loops - j > 0)
5225 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j);
5226 VEC_free (graphite_bb_p, heap, bbs);
5228 if (dump_file && (dump_flags & TDF_DETAILS))
5229 fprintf (dump_file, "\nLoop blocked.\n");
5231 return transform_done;
5234 /* Apply graphite transformations to all the basic blocks of SCOP. */
5237 graphite_apply_transformations (scop_p scop)
5239 bool transform_done = false;
5241 /* Sort the list of bbs. Keep them always sorted. */
5242 graphite_sort_gbbs (scop);
5244 if (flag_loop_block)
5245 transform_done = graphite_trans_scop_block (scop);
5247 /* Generate code even if we did not apply any real transformation.
5248 This also allows to check the performance for the identity
5249 transformation: GIMPLE -> GRAPHITE -> GIMPLE
5250 Keep in mind that CLooG optimizes in control, so the loop structure
5251 may change, even if we only use -fgraphite-identity. */
5252 if (flag_graphite_identity)
5253 transform_done = true;
5255 return transform_done;
5258 /* We limit all SCoPs to SCoPs, that are completely surrounded by a loop.
5268 * SCoP frontier, as this line is not surrounded by any loop. *
5272 This is necessary as scalar evolution and parameter detection need a
5273 outermost loop to initialize parameters correctly.
5275 TODO: FIX scalar evolution and parameter detection to allow more flexible
5281 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3);
5286 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5290 build_scop_bbs (scop);
5292 if (!build_scop_loop_nests (scop))
5295 for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++)
5296 if (!loop_in_scop_p (loop_outer (loop), scop))
5298 sd_region open_scop;
5299 open_scop.entry = loop->header;
5300 open_scop.exit = single_exit (loop)->dest;
5301 VEC_safe_push (sd_region, heap, tmp_scops, &open_scop);
5305 free_scops (current_scops);
5306 current_scops = VEC_alloc (scop_p, heap, 3);
5308 create_sese_edges (tmp_scops);
5309 build_graphite_scops (tmp_scops);
5310 VEC_free (sd_region, heap, tmp_scops);
5313 /* Perform a set of linear transforms on the loops of the current
5317 graphite_transform_loops (void)
5322 if (number_of_loops () <= 1)
5325 current_scops = VEC_alloc (scop_p, heap, 3);
5326 recompute_all_dominators ();
5328 if (dump_file && (dump_flags & TDF_DETAILS))
5329 fprintf (dump_file, "Graphite loop transformations \n");
5331 initialize_original_copy_tables ();
5332 cloog_initialize ();
5336 if (dump_file && (dump_flags & TDF_DETAILS))
5337 fprintf (dump_file, "\nnumber of SCoPs: %d\n",
5338 VEC_length (scop_p, current_scops));
5340 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++)
5342 build_scop_bbs (scop);
5343 if (!build_scop_loop_nests (scop))
5346 build_scop_canonical_schedules (scop);
5347 build_bb_loops (scop);
5348 build_scop_conditions (scop);
5349 find_scop_parameters (scop);
5350 build_scop_context (scop);
5352 if (dump_file && (dump_flags & TDF_DETAILS))
5354 fprintf (dump_file, "\n(In SCoP %d:\n", i);
5355 fprintf (dump_file, "\nnumber of bbs: %d\n",
5356 VEC_length (graphite_bb_p, SCOP_BBS (scop)));
5357 fprintf (dump_file, "\nnumber of loops: %d)\n",
5358 VEC_length (loop_p, SCOP_LOOP_NEST (scop)));
5361 if (!build_scop_iteration_domain (scop))
5364 build_scop_data_accesses (scop);
5365 build_scop_dynamic_schedules (scop);
5367 if (dump_file && (dump_flags & TDF_DETAILS))
5369 int nbrefs = nb_data_refs_in_scop (scop);
5370 fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs);
5373 if (graphite_apply_transformations (scop))
5374 gloog (scop, find_transform (scop));
5375 #ifdef ENABLE_CHECKING
5378 struct clast_stmt *stmt = find_transform (scop);
5379 cloog_clast_free (stmt);
5385 free_scops (current_scops);
5387 free_original_copy_tables ();
5390 #else /* If Cloog is not available: #ifndef HAVE_cloog. */
5393 graphite_transform_loops (void)
5395 sorry ("Graphite loop optimizations cannot be used");