1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
35 #include "tree-chrec.h"
36 #include "tree-data-ref.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-pass.h"
40 #include "value-prof.h"
41 #include "pointer-set.h"
48 #include "graphite-ppl.h"
50 #include "graphite-poly.h"
52 /* The loop flattening pass transforms loop nests into a single loop,
53 removing the loop nesting structure. The auto-vectorization can
54 then apply on the full loop body, without needing the outer-loop
57 The loop flattening pass that has been described in a very Fortran
58 specific way in the 1992 paper by Reinhard von Hanxleden and Ken
59 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
60 Transformations" available from
61 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
63 The canonical example is as follows: suppose that we have a loop
64 nest with known iteration counts
66 | for (i = 1; i <= 6; i++)
67 | for (j = 1; j <= 6; j++)
70 The loop flattening is performed by linearizing the iteration space
71 using the function "f (x) = 6 * i + j". In this case, CLooG would
74 | for (c1=7;c1<=42;c1++) {
79 There are several limitations for loop flattening that are linked
80 to the expressivity of the polyhedral model. One has to take an
81 upper bound approximation to deal with the parametric case of loop
82 flattening. For example, in the loop nest:
84 | for (i = 1; i <= N; i++)
85 | for (j = 1; j <= M; j++)
88 One would like to flatten this loop using a linearization function
89 like this "f (x) = M * i + j". However CLooG's schedules are not
90 expressive enough to deal with this case, and so the parameter M
91 has to be replaced by an integer upper bound approximation. If we
92 further know in the context of the scop that "M <= 6", then it is
93 possible to linearize the loop with "f (x) = 6 * i + j". In this
94 case, CLooG would produce this code:
96 | for (c1=7;c1<=6*M+N;c1++) {
98 | if (i <= floord(c1-1,6)) {
103 For an arbitrarily complex loop nest the algorithm proceeds in two
104 steps. First, the LST is flattened by removing the loops structure
105 and by inserting the statements in the order they appear in
106 depth-first order. Then, the scattering of each statement is
107 transformed accordingly.
109 Supposing that the original program is represented by the following
116 | (loop_4 stmt_5 stmt_6)
122 Loop flattening traverses the LST in depth-first order, and
123 flattens pairs of loops successively by projecting the inner loops
124 in the iteration domain of the outer loops:
126 lst_project_loop (loop_2, loop_3, stride)
130 | (loop_2 stmt_3 stmt_4
131 | (loop_4 stmt_5 stmt_6)
137 lst_project_loop (loop_2, loop_4, stride)
141 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
145 lst_project_loop (loop_1, loop_2, stride)
148 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
151 At each step, the iteration domain of the outer loop is enlarged to
152 contain enough points to iterate over the inner loop domain. */
154 /* Initializes RES to the number of iterations of the linearized loop
155 LST. RES is the cardinal of the iteration domain of LST. */
158 lst_linearized_niter (lst_p lst, mpz_t res)
167 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
170 lst_linearized_niter (l, n);
171 mpz_add (res, res, n);
174 if (LST_LOOP_P (lst))
176 lst_niter_for_loop (lst, n);
178 if (mpz_cmp_si (res, 0) != 0)
179 mpz_mul (res, res, n);
187 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
191 lst_offset (lst_p stmt, mpz_t offset)
193 lst_p inner = LST_LOOP_FATHER (stmt);
194 poly_bb_p pbb = LST_PBB (stmt);
195 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
196 int inner_depth = lst_depth (inner);
197 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
198 ppl_Linear_Expression_t expr;
199 ppl_dimension_type dim;
200 ppl_Coefficient_t one;
205 ppl_new_Coefficient (&one);
206 ppl_assign_Coefficient_from_mpz_t (one, x);
208 ppl_Polyhedron_space_dimension (poly, &dim);
209 ppl_new_Linear_Expression_with_dimension (&expr, dim);
211 ppl_set_coef (expr, inner_dim, 1);
212 ppl_set_inhomogeneous_gmp (expr, offset);
213 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
214 ppl_delete_Linear_Expression (expr);
215 ppl_delete_Coefficient (one);
218 /* Scale by FACTOR the loop LST containing STMT. */
221 lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
224 ppl_Coefficient_t one;
225 int outer_depth = lst_depth (lst);
226 poly_bb_p pbb = LST_PBB (stmt);
227 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
228 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
229 ppl_Linear_Expression_t expr;
230 ppl_dimension_type dim;
234 ppl_new_Coefficient (&one);
235 ppl_assign_Coefficient_from_mpz_t (one, x);
237 ppl_Polyhedron_space_dimension (poly, &dim);
238 ppl_new_Linear_Expression_with_dimension (&expr, dim);
240 /* outer_dim = factor * outer_dim. */
241 ppl_set_coef_gmp (expr, outer_dim, factor);
242 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
243 ppl_delete_Linear_Expression (expr);
246 ppl_delete_Coefficient (one);
249 /* Project the INNER loop into the iteration domain of the OUTER loop.
250 STRIDE is the number of iterations between two iterations of the
254 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
259 ppl_Coefficient_t one;
260 int outer_depth = lst_depth (outer);
261 int inner_depth = lst_depth (inner);
265 ppl_new_Coefficient (&one);
266 ppl_assign_Coefficient_from_mpz_t (one, x);
268 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
270 poly_bb_p pbb = LST_PBB (stmt);
271 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
272 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
273 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
274 ppl_Linear_Expression_t expr;
275 ppl_dimension_type dim;
276 ppl_dimension_type *ds;
278 /* There should be no loops under INNER. */
279 gcc_assert (!LST_LOOP_P (stmt));
280 ppl_Polyhedron_space_dimension (poly, &dim);
281 ppl_new_Linear_Expression_with_dimension (&expr, dim);
283 /* outer_dim = outer_dim * stride + inner_dim. */
284 ppl_set_coef (expr, inner_dim, 1);
285 ppl_set_coef_gmp (expr, outer_dim, stride);
286 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
287 ppl_delete_Linear_Expression (expr);
289 /* Project on inner_dim. */
290 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
291 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
292 ppl_delete_Linear_Expression (expr);
294 /* Remove inner loop and the static schedule of its body. */
295 ds = XNEWVEC (ppl_dimension_type, 2);
297 ds[1] = inner_dim + 1;
298 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
299 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
304 ppl_delete_Coefficient (one);
307 /* Flattens the loop nest LST. Return true when something changed.
308 OFFSET is used to compute the number of iterations of the outermost
309 loop before the current LST is executed. */
312 lst_flatten_loop (lst_p lst, mpz_t init_offset)
317 mpz_t n, one, offset, stride;
323 mpz_set (offset, init_offset);
326 lst_linearized_niter (lst, stride);
327 lst_niter_for_loop (lst, n);
328 mpz_tdiv_q (stride, stride, n);
330 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
335 lst_flatten_loop (l, offset);
336 lst_niter_for_loop (l, n);
338 lst_project_loop (lst, l, stride);
340 /* The offset is the number of iterations minus 1, as we want
341 to execute the next statements at the same iteration as the
342 last iteration of the loop. */
344 mpz_add (offset, offset, n);
348 lst_scale (lst, l, stride);
349 if (mpz_cmp_si (offset, 0) != 0)
350 lst_offset (l, offset);
353 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
355 lst_remove_loop_and_inline_stmts_in_loop_father (l);
364 /* Remove all but the first 3 dimensions of the scattering:
365 - dim0: the static schedule for the loop
366 - dim1: the dynamic schedule of the loop
367 - dim2: the static schedule for the loop body. */
370 remove_unused_scattering_dimensions (lst_p lst)
375 ppl_Coefficient_t one;
379 ppl_new_Coefficient (&one);
380 ppl_assign_Coefficient_from_mpz_t (one, x);
382 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
384 poly_bb_p pbb = LST_PBB (stmt);
385 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
386 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
387 ppl_dimension_type *ds;
389 /* There should be no loops inside LST after flattening. */
390 gcc_assert (!LST_LOOP_P (stmt));
392 if (!nb_dims_to_remove)
395 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
396 for (j = 0; j < nb_dims_to_remove; j++)
399 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
400 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
405 ppl_delete_Coefficient (one);
408 /* Flattens all the loop nests of LST. Return true when something
412 lst_do_flatten (lst_p lst)
420 || !LST_LOOP_P (lst))
424 mpz_set_si (zero, 0);
426 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
429 res |= lst_flatten_loop (l, zero);
430 remove_unused_scattering_dimensions (l);
433 lst_update_scattering (lst);
438 /* Flatten all the loop nests in SCOP. Returns true when something
442 flatten_all_loops (scop_p scop)
444 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));