X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-loop-linear.c;h=7e5f298be16a662a1dea45076d772ac2165de013;hb=8d3b4bb34e09689588a318c65a0bc9e5e3281dbb;hp=bf353e59f5b4ba99e8f1ee1e35a2c59e4795ad8c;hpb=04bd1b812339656ba9b4d7926930d0e0e4afb508;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c index bf353e59f5b..7e5f298be16 100644 --- a/gcc/tree-loop-linear.c +++ b/gcc/tree-loop-linear.c @@ -1,12 +1,12 @@ /* Linear Loop transforms - Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. Contributed by Daniel Berlin . This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later +Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY @@ -15,9 +15,8 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ +along with GCC; see the file COPYING3. If not see +. */ #include "config.h" @@ -31,6 +30,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "rtl.h" #include "basic-block.h" #include "diagnostic.h" +#include "obstack.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" @@ -89,13 +89,13 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA */ static void -gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations, - VEC (data_reference_p, heap) *datarefs, - struct loop *loop, - struct loop *first_loop, - unsigned int *dependence_steps, - unsigned int *nb_deps_not_carried_by_loop, - unsigned int *access_strides) +gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations ATTRIBUTE_UNUSED, + VEC (data_reference_p, heap) *datarefs ATTRIBUTE_UNUSED, + struct loop *loop ATTRIBUTE_UNUSED, + struct loop *first_loop ATTRIBUTE_UNUSED, + unsigned int *dependence_steps ATTRIBUTE_UNUSED, + unsigned int *nb_deps_not_carried_by_loop ATTRIBUTE_UNUSED, + double_int *access_strides ATTRIBUTE_UNUSED) { unsigned int i, j; struct data_dependence_relation *ddr; @@ -103,7 +103,7 @@ gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations, *dependence_steps = 0; *nb_deps_not_carried_by_loop = 0; - *access_strides = 0; + *access_strides = double_int_zero; for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++) { @@ -117,7 +117,7 @@ gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations, for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) { - int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth]; + int dist = DDR_DIST_VECT (ddr, j)[loop_depth (loop) - loop_depth (first_loop)]; if (dist == 0) (*nb_deps_not_carried_by_loop) += 1; @@ -134,24 +134,30 @@ gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations, for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) { unsigned int it; - tree stmt = DR_STMT (dr); + tree ref = DR_REF (dr); + gimple stmt = DR_STMT (dr); struct loop *stmt_loop = loop_containing_stmt (stmt); struct loop *inner_loop = first_loop->inner; - + if (inner_loop != stmt_loop && !flow_loop_nested_p (inner_loop, stmt_loop)) continue; - for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) + + for (it = 0; it < DR_NUM_DIMENSIONS (dr); + it++, ref = TREE_OPERAND (ref, 0)) { - tree chrec = DR_ACCESS_FN (dr, it); - tree tstride = evolution_part_in_loop_num - (chrec, loop->num); - - if (tstride == NULL_TREE - || TREE_CODE (tstride) != INTEGER_CST) + int num = am_vector_index_for_loop (DR_ACCESS_MATRIX (dr), loop->num); + int istride = AM_GET_ACCESS_MATRIX_ELEMENT (DR_ACCESS_MATRIX (dr), it, num); + tree array_size = TYPE_SIZE (TREE_TYPE (ref)); + double_int dstride; + + if (array_size == NULL_TREE + || TREE_CODE (array_size) != INTEGER_CST) continue; - - (*access_strides) += int_cst_value (tstride); + + dstride = double_int_mul (tree_to_double_int (array_size), + shwi_to_double_int (istride)); + (*access_strides) = double_int_add (*access_strides, dstride); } } } @@ -171,25 +177,35 @@ try_interchange_loops (lambda_trans_matrix trans, VEC (data_reference_p, heap) *datarefs, struct loop *first_loop) { + bool res; struct loop *loop_i; struct loop *loop_j; unsigned int dependence_steps_i, dependence_steps_j; - unsigned int access_strides_i, access_strides_j; + double_int access_strides_i, access_strides_j; + double_int small, large, nb_iter; + double_int l1_cache_size, l2_cache_size; + int cmp; unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; struct data_dependence_relation *ddr; + if (VEC_length (ddr_p, dependence_relations) == 0) + return trans; + /* When there is an unknown relation in the dependence_relations, we know that it is no worth looking at this loop nest: give up. */ ddr = VEC_index (ddr_p, dependence_relations, 0); if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) return trans; - + + l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024); + l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024); + /* LOOP_I is always the outer loop. */ for (loop_j = first_loop->inner; loop_j; loop_j = loop_j->inner) for (loop_i = first_loop; - loop_i->depth < loop_j->depth; + loop_depth (loop_i) < loop_depth (loop_j); loop_i = loop_i->inner) { gather_interchange_stats (dependence_relations, datarefs, @@ -205,92 +221,138 @@ try_interchange_loops (lambda_trans_matrix trans, /* Heuristics for loop interchange profitability: + 0. Don't transform if the smallest stride is larger than + the L2 cache, or if the largest stride multiplied by the + number of iterations is smaller than the L1 cache. + 1. (spatial locality) Inner loops should have smallest dependence steps. 2. (spatial locality) Inner loops should contain more dependence relations not carried by the loop. - 3. (temporal locality) Inner loops should have smallest + 3. (temporal locality) Inner loops should have smallest array access strides. */ + + cmp = double_int_ucmp (access_strides_i, access_strides_j); + small = cmp < 0 ? access_strides_i : access_strides_j; + large = cmp < 0 ? access_strides_j : access_strides_i; + + if (double_int_ucmp (small, l2_cache_size) > 0) + continue; + + res = cmp < 0 ? + estimated_loop_iterations (loop_j, false, &nb_iter): + estimated_loop_iterations (loop_i, false, &nb_iter); + large = double_int_mul (large, nb_iter); + + if (res && double_int_ucmp (large, l1_cache_size) < 0) + continue; + if (dependence_steps_i < dependence_steps_j || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j - || access_strides_i < access_strides_j) + || cmp < 0) { lambda_matrix_row_exchange (LTM_MATRIX (trans), - loop_i->depth - first_loop->depth, - loop_j->depth - first_loop->depth); + loop_depth (loop_i) - loop_depth (first_loop), + loop_depth (loop_j) - loop_depth (first_loop)); /* Validate the resulting matrix. When the transformation is not valid, reverse to the previous transformation. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) lambda_matrix_row_exchange (LTM_MATRIX (trans), - loop_i->depth - first_loop->depth, - loop_j->depth - first_loop->depth); + loop_depth (loop_i) - loop_depth (first_loop), + loop_depth (loop_j) - loop_depth (first_loop)); } } return trans; } -/* Perform a set of linear transforms on LOOPS. */ +/* Return the number of nested loops in LOOP_NEST, or 0 if the loops + are not perfectly nested. */ + +unsigned int +perfect_loop_nest_depth (struct loop *loop_nest) +{ + struct loop *temp; + unsigned int depth = 1; + + /* If it's not a loop nest, we don't want it. We also don't handle + sibling loops properly, which are loops of the following form: + + | for (i = 0; i < 50; i++) + | { + | for (j = 0; j < 50; j++) + | { + | ... + | } + | for (j = 0; j < 50; j++) + | { + | ... + | } + | } + */ + + if (!loop_nest->inner || !single_exit (loop_nest)) + return 0; + + for (temp = loop_nest->inner; temp; temp = temp->inner) + { + /* If we have a sibling loop or multiple exit edges, jump ship. */ + if (temp->next || !single_exit (temp)) + return 0; + + depth++; + } + + return depth; +} + +/* Perform a set of linear transforms on loops. */ void -linear_transform_loops (struct loops *loops) +linear_transform_loops (void) { - unsigned int i; + bool modified = false; + loop_iterator li; VEC(tree,heap) *oldivs = NULL; VEC(tree,heap) *invariants = NULL; - - for (i = 1; i < loops->num; i++) + VEC(tree,heap) *lambda_parameters = NULL; + VEC(gimple,heap) *remove_ivs = VEC_alloc (gimple, heap, 3); + struct loop *loop_nest; + gimple oldiv_stmt; + unsigned i; + + FOR_EACH_LOOP (li, loop_nest, 0) { unsigned int depth = 0; VEC (ddr_p, heap) *dependence_relations; VEC (data_reference_p, heap) *datarefs; - struct loop *loop_nest = loops->parray[i]; - struct loop *temp; + lambda_loopnest before, after; lambda_trans_matrix trans; - bool problem = false; - bool need_perfect_nest = false; - /* If it's not a loop nest, we don't want it. - We also don't handle sibling loops properly, - which are loops of the following form: - for (i = 0; i < 50; i++) - { - for (j = 0; j < 50; j++) - { - ... - } - for (j = 0; j < 50; j++) - { - ... - } - } */ - if (!loop_nest || !loop_nest->inner) + struct obstack lambda_obstack; + gcc_obstack_init (&lambda_obstack); + + depth = perfect_loop_nest_depth (loop_nest); + if (depth == 0) continue; + VEC_truncate (tree, oldivs, 0); VEC_truncate (tree, invariants, 0); - depth = 1; - for (temp = loop_nest->inner; temp; temp = temp->inner) - { - /* If we have a sibling loop or multiple exit edges, jump ship. */ - if (temp->next || !temp->single_exit) - { - problem = true; - break; - } - depth ++; - } - if (problem) - continue; + VEC_truncate (tree, lambda_parameters, 0); - /* Analyze data references and dependence relations using scev. */ - datarefs = VEC_alloc (data_reference_p, heap, 10); dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); - compute_data_dependences_for_loop (loop_nest, true, &datarefs, - &dependence_relations); + if (!compute_data_dependences_for_loop (loop_nest, true, &datarefs, + &dependence_relations)) + continue; + + lambda_collect_parameters (datarefs, &lambda_parameters); + if (!lambda_compute_access_matrices (datarefs, lambda_parameters, + loop_nest->num)) + continue; if (dump_file && (dump_flags & TDF_DETAILS)) dump_ddrs (dump_file, dependence_relations); @@ -305,7 +367,7 @@ linear_transform_loops (struct loops *loops) { if (dump_file) fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); - continue; + goto free_and_continue; } /* Check whether the transformation is legal. */ @@ -313,26 +375,22 @@ linear_transform_loops (struct loops *loops) { if (dump_file) fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); - continue; + goto free_and_continue; } - if (!perfect_nest_p (loop_nest)) - need_perfect_nest = true; + before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs, + &invariants, &lambda_obstack); - before = gcc_loopnest_to_lambda_loopnest (loops, - loop_nest, &oldivs, - &invariants, - need_perfect_nest); if (!before) - continue; - + goto free_and_continue; + if (dump_file) { fprintf (dump_file, "Before:\n"); print_lambda_loopnest (dump_file, before, 'i'); } - after = lambda_loopnest_transform (before, trans); + after = lambda_loopnest_transform (before, trans, &lambda_obstack); if (dump_file) { @@ -341,17 +399,27 @@ linear_transform_loops (struct loops *loops) } lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, - after, trans); + &remove_ivs, + after, trans, &lambda_obstack); + modified = true; if (dump_file) fprintf (dump_file, "Successfully transformed loop.\n"); + free_and_continue: + obstack_free (&lambda_obstack, NULL); free_dependence_relations (dependence_relations); free_data_refs (datarefs); } + for (i = 0; VEC_iterate (gimple, remove_ivs, i, oldiv_stmt); i++) + remove_iv (oldiv_stmt); + VEC_free (tree, heap, oldivs); VEC_free (tree, heap, invariants); + VEC_free (gimple, heap, remove_ivs); scev_reset (); - rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi); + + if (modified) + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi); }