1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
33 #include "tree-dump.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-pass.h"
41 #include "value-prof.h"
42 #include "pointer-set.h"
49 #include "graphite-ppl.h"
51 #include "graphite-poly.h"
53 /* The loop flattening pass transforms loop nests into a single loop,
54 removing the loop nesting structure. The auto-vectorization can
55 then apply on the full loop body, without needing the outer-loop
58 The canonical example is as follows: suppose that we have a loop
59 nest with known iteration counts
61 | for (i = 1; i <= 6; i++)
62 | for (j = 1; j <= 6; j++)
65 The loop flattening is performed by linearizing the iteration space
66 using the function "f (x) = 6 * i + j". In this case, CLooG would
69 | for (c1=7;c1<=42;c1++) {
74 There are several limitations for loop flattening that are linked
75 to the expressivity of the polyhedral model. One has to take an
76 upper bound approximation to deal with the parametric case of loop
77 flattening. For example, in the loop nest:
79 | for (i = 1; i <= N; i++)
80 | for (j = 1; j <= M; j++)
83 One would like to flatten this loop using a linearization function
84 like this "f (x) = M * i + j". However CLooG's schedules are not
85 expressive enough to deal with this case, and so the parameter M
86 has to be replaced by an integer upper bound approximation. If we
87 further know in the context of the scop that "M <= 6", then it is
88 possible to linearize the loop with "f (x) = 6 * i + j". In this
89 case, CLooG would produce this code:
91 | for (c1=7;c1<=6*M+N;c1++) {
93 | if (i <= floord(c1-1,6)) {
98 For an arbitrarily complex loop nests the algorithm proceeds in two
99 steps. First, the LST is flattened by removing the loops structure
100 and by inserting the statements in the order they appear in
101 depth-first order. Then, the scattering of each statement is
102 transformed such that it
104 Supposing that the original program is represented by the following
111 | (loop_4 stmt_5 stmt_6)
117 Loop flattening traverses the LST in depth-first order, and
118 flattens pairs of loops successively by projecting the inner loops
119 in the iteration domain of the outer loops:
121 lst_project_loop (loop_2, loop_3, stride)
125 | (loop_2 stmt_3 stmt_4
126 | (loop_4 stmt_5 stmt_6)
132 lst_project_loop (loop_2, loop_4, stride)
136 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
140 lst_project_loop (loop_1, loop_2, stride)
143 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
146 At each step, the iteration domain of the outer loop is enlarged to
147 contain enough points to iterate over the inner loop domain. */
149 /* Initializes RES to the number of iterations of the linearized loop
150 LST. RES is the cardinal of the iteration domain of LST. */
153 lst_linearized_niter (lst_p lst, mpz_t res)
162 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
165 lst_linearized_niter (l, n);
166 mpz_add (res, res, n);
169 if (LST_LOOP_P (lst))
171 lst_niter_for_loop (lst, n);
173 if (mpz_cmp_si (res, 0) != 0)
174 mpz_mul (res, res, n);
182 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
186 lst_offset (lst_p stmt, mpz_t offset)
188 lst_p inner = LST_LOOP_FATHER (stmt);
189 poly_bb_p pbb = LST_PBB (stmt);
190 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
191 int inner_depth = lst_depth (inner);
192 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
193 ppl_Linear_Expression_t expr;
194 ppl_dimension_type dim;
195 ppl_Coefficient_t one;
200 ppl_new_Coefficient (&one);
201 ppl_assign_Coefficient_from_mpz_t (one, x);
203 ppl_Polyhedron_space_dimension (poly, &dim);
204 ppl_new_Linear_Expression_with_dimension (&expr, dim);
206 ppl_set_coef (expr, inner_dim, 1);
207 ppl_set_inhomogeneous_gmp (expr, offset);
208 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
209 ppl_delete_Linear_Expression (expr);
210 ppl_delete_Coefficient (one);
213 /* Scale by FACTOR the loop LST containing STMT. */
216 lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
219 ppl_Coefficient_t one;
220 int outer_depth = lst_depth (lst);
221 poly_bb_p pbb = LST_PBB (stmt);
222 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
223 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
224 ppl_Linear_Expression_t expr;
225 ppl_dimension_type dim;
229 ppl_new_Coefficient (&one);
230 ppl_assign_Coefficient_from_mpz_t (one, x);
232 ppl_Polyhedron_space_dimension (poly, &dim);
233 ppl_new_Linear_Expression_with_dimension (&expr, dim);
235 /* outer_dim = factor * outer_dim. */
236 ppl_set_coef_gmp (expr, outer_dim, factor);
237 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
238 ppl_delete_Linear_Expression (expr);
241 ppl_delete_Coefficient (one);
244 /* Project the INNER loop into the iteration domain of the OUTER loop.
245 STRIDE is the number of iterations between two iterations of the
249 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
254 ppl_Coefficient_t one;
255 int outer_depth = lst_depth (outer);
256 int inner_depth = lst_depth (inner);
260 ppl_new_Coefficient (&one);
261 ppl_assign_Coefficient_from_mpz_t (one, x);
263 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
265 poly_bb_p pbb = LST_PBB (stmt);
266 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
267 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
268 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
269 ppl_Linear_Expression_t expr;
270 ppl_dimension_type dim;
271 ppl_dimension_type *ds;
273 /* There should be no loops under INNER. */
274 gcc_assert (!LST_LOOP_P (stmt));
275 ppl_Polyhedron_space_dimension (poly, &dim);
276 ppl_new_Linear_Expression_with_dimension (&expr, dim);
278 /* outer_dim = outer_dim * stride + inner_dim. */
279 ppl_set_coef (expr, inner_dim, 1);
280 ppl_set_coef_gmp (expr, outer_dim, stride);
281 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
282 ppl_delete_Linear_Expression (expr);
284 /* Project on inner_dim. */
285 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
286 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
287 ppl_delete_Linear_Expression (expr);
289 /* Remove inner loop and the static schedule of its body. */
290 ds = XNEWVEC (ppl_dimension_type, 2);
292 ds[1] = inner_dim + 1;
293 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
294 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
299 ppl_delete_Coefficient (one);
302 /* Flattens the loop nest LST. Return true when something changed.
303 OFFSET is used to compute the number of iterations of the outermost
304 loop before the current LST is executed. */
307 lst_flatten_loop (lst_p lst, mpz_t init_offset)
312 mpz_t n, one, offset, stride;
318 mpz_set (offset, init_offset);
321 lst_linearized_niter (lst, stride);
322 lst_niter_for_loop (lst, n);
323 mpz_tdiv_q (stride, stride, n);
325 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
330 lst_flatten_loop (l, offset);
331 lst_niter_for_loop (l, n);
333 lst_project_loop (lst, l, stride);
335 /* The offset is the number of iterations minus 1, as we want
336 to execute the next statements at the same iteration as the
337 last iteration of the loop. */
339 mpz_add (offset, offset, n);
343 lst_scale (lst, l, stride);
344 if (mpz_cmp_si (offset, 0) != 0)
345 lst_offset (l, offset);
348 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
350 lst_remove_loop_and_inline_stmts_in_loop_father (l);
359 /* Remove all but the first 3 dimensions of the scattering:
360 - dim0: the static schedule for the loop
361 - dim1: the dynamic schedule of the loop
362 - dim2: the static schedule for the loop body. */
365 remove_unused_scattering_dimensions (lst_p lst)
370 ppl_Coefficient_t one;
374 ppl_new_Coefficient (&one);
375 ppl_assign_Coefficient_from_mpz_t (one, x);
377 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
379 poly_bb_p pbb = LST_PBB (stmt);
380 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
381 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
382 ppl_dimension_type *ds;
384 /* There should be no loops inside LST after flattening. */
385 gcc_assert (!LST_LOOP_P (stmt));
387 if (!nb_dims_to_remove)
390 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
391 for (j = 0; j < nb_dims_to_remove; j++)
394 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
395 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
400 ppl_delete_Coefficient (one);
403 /* Flattens all the loop nests of LST. Return true when something
407 lst_do_flatten (lst_p lst)
415 || !LST_LOOP_P (lst))
419 mpz_set_si (zero, 0);
421 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
424 res |= lst_flatten_loop (l, zero);
425 remove_unused_scattering_dimensions (l);
428 lst_update_scattering (lst);
433 /* Flatten all the loop nests in SCOP. Returns true when something
437 flatten_all_loops (scop_p scop)
439 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));