X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-parloops.c;h=28c450d6b2d8372359a2b28db46087a78b5a0510;hb=44f861fca343148a1b0720105ec2b7f14bbcc849;hp=aba70c8f082fde66ab0510f94e076e840eb4d881;hpb=a8af2e868d6e2eb08b40066e4fc888d8ca2424ab;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index aba70c8f082..28c450d6b2d 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1,5 +1,5 @@ /* Loop autoparallelization. - Copyright (C) 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. Contributed by Sebastian Pop and Zdenek Dvorak . @@ -23,16 +23,12 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" -#include "tree.h" #include "tree-flow.h" #include "cfgloop.h" #include "tree-data-ref.h" -#include "tree-pretty-print.h" +#include "tree-scalar-evolution.h" #include "gimple-pretty-print.h" #include "tree-pass.h" -#include "tree-scalar-evolution.h" -#include "hashtab.h" #include "langhooks.h" #include "tree-vectorizer.h" @@ -205,7 +201,7 @@ reduction_phi (htab_t reduction_list, gimple phi) { struct reduction_info tmpred, *red; - if (htab_elements (reduction_list) == 0) + if (htab_elements (reduction_list) == 0 || phi == NULL) return NULL; tmpred.reduc_phi = phi; @@ -244,6 +240,125 @@ name_to_copy_elt_hash (const void *aa) return (hashval_t) a->version; } +/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE + matrix. Rather than use floats, we simply keep a single DENOMINATOR that + represents the denominator for every element in the matrix. */ +typedef struct lambda_trans_matrix_s +{ + lambda_matrix matrix; + int rowsize; + int colsize; + int denominator; +} *lambda_trans_matrix; +#define LTM_MATRIX(T) ((T)->matrix) +#define LTM_ROWSIZE(T) ((T)->rowsize) +#define LTM_COLSIZE(T) ((T)->colsize) +#define LTM_DENOMINATOR(T) ((T)->denominator) + +/* Allocate a new transformation matrix. */ + +static lambda_trans_matrix +lambda_trans_matrix_new (int colsize, int rowsize, + struct obstack * lambda_obstack) +{ + lambda_trans_matrix ret; + + ret = (lambda_trans_matrix) + obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s)); + LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack); + LTM_ROWSIZE (ret) = rowsize; + LTM_COLSIZE (ret) = colsize; + LTM_DENOMINATOR (ret) = 1; + return ret; +} + +/* Multiply a vector VEC by a matrix MAT. + MAT is an M*N matrix, and VEC is a vector with length N. The result + is stored in DEST which must be a vector of length M. */ + +static void +lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, + lambda_vector vec, lambda_vector dest) +{ + int i, j; + + lambda_vector_clear (dest, m); + for (i = 0; i < m; i++) + for (j = 0; j < n; j++) + dest[i] += matrix[i][j] * vec[j]; +} + +/* Return true if TRANS is a legal transformation matrix that respects + the dependence vectors in DISTS and DIRS. The conservative answer + is false. + + "Wolfe proves that a unimodular transformation represented by the + matrix T is legal when applied to a loop nest with a set of + lexicographically non-negative distance vectors RDG if and only if + for each vector d in RDG, (T.d >= 0) is lexicographically positive. + i.e.: if and only if it transforms the lexicographically positive + distance vectors to lexicographically positive vectors. Note that + a unimodular matrix must transform the zero vector (and only it) to + the zero vector." S.Muchnick. */ + +static bool +lambda_transform_legal_p (lambda_trans_matrix trans, + int nb_loops, + VEC (ddr_p, heap) *dependence_relations) +{ + unsigned int i, j; + lambda_vector distres; + struct data_dependence_relation *ddr; + + gcc_assert (LTM_COLSIZE (trans) == nb_loops + && LTM_ROWSIZE (trans) == nb_loops); + + /* When there are no dependences, the transformation is correct. */ + if (VEC_length (ddr_p, dependence_relations) == 0) + return true; + + ddr = VEC_index (ddr_p, dependence_relations, 0); + if (ddr == NULL) + return true; + + /* When there is an unknown relation in the dependence_relations, we + know that it is no worth looking at this loop nest: give up. */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + return false; + + distres = lambda_vector_new (nb_loops); + + /* For each distance vector in the dependence graph. */ + FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr) + { + /* Don't care about relations for which we know that there is no + dependence, nor about read-read (aka. output-dependences): + these data accesses can happen in any order. */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_known + || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) + continue; + + /* Conservatively answer: "this transformation is not valid". */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + return false; + + /* If the dependence could not be captured by a distance vector, + conservatively answer that the transform is not valid. */ + if (DDR_NUM_DIST_VECTS (ddr) == 0) + return false; + + /* Compute trans.dist_vect */ + for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) + { + lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, + DDR_DIST_VECT (ddr, j), distres); + + if (!lambda_vector_lexico_pos (distres, nb_loops)) + return false; + } + } + return true; +} /* Data dependency analysis. Returns true if the iterations of LOOP are independent on each other (that is, if we can execute them @@ -272,8 +387,14 @@ loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack) datarefs = VEC_alloc (data_reference_p, heap, 10); dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); loop_nest = VEC_alloc (loop_p, heap, 3); - compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, - &dependence_relations); + if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, + &dependence_relations)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " FAILED: cannot analyze data dependencies\n"); + ret = false; + goto end; + } if (dump_file && (dump_flags & TDF_DETAILS)) dump_data_dependence_relations (dump_file, dependence_relations); @@ -290,6 +411,7 @@ loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack) fprintf (dump_file, " FAILED: data dependencies exist across iterations\n"); + end: VEC_free (loop_p, heap, loop_nest); free_dependence_relations (dependence_relations); free_data_refs (datarefs); @@ -601,8 +723,11 @@ eliminate_local_variables (edge entry, edge exit) FOR_EACH_VEC_ELT (basic_block, body, i, bb) if (bb != entry_bb && bb != exit_bb) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - if (gimple_debug_bind_p (gsi_stmt (gsi))) - has_debug_stmt = true; + if (is_gimple_debug (gsi_stmt (gsi))) + { + if (gimple_debug_bind_p (gsi_stmt (gsi))) + has_debug_stmt = true; + } else eliminate_local_variables_stmt (entry, &gsi, decl_address); @@ -768,8 +893,8 @@ separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, replacement decls are stored in DECL_COPIES. */ static bool -separate_decls_in_region_debug_bind (gimple stmt, - htab_t name_copies, htab_t decl_copies) +separate_decls_in_region_debug (gimple stmt, htab_t name_copies, + htab_t decl_copies) { use_operand_p use; ssa_op_iter oi; @@ -778,7 +903,12 @@ separate_decls_in_region_debug_bind (gimple stmt, struct name_to_copy_elt elt; void **slot, **dslot; - var = gimple_debug_bind_get_var (stmt); + if (gimple_debug_bind_p (stmt)) + var = gimple_debug_bind_get_var (stmt); + else if (gimple_debug_source_bind_p (stmt)) + var = gimple_debug_source_bind_get_var (stmt); + else + return true; if (TREE_CODE (var) == DEBUG_EXPR_DECL) return true; gcc_assert (DECL_P (var) && SSA_VAR_P (var)); @@ -786,7 +916,10 @@ separate_decls_in_region_debug_bind (gimple stmt, dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT); if (!dslot) return true; - gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to); + if (gimple_debug_bind_p (stmt)) + gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to); + else if (gimple_debug_source_bind_p (stmt)) + gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to); FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) { @@ -1180,11 +1313,10 @@ separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, { gimple stmt = gsi_stmt (gsi); - if (gimple_debug_bind_p (stmt)) + if (is_gimple_debug (stmt)) { - if (separate_decls_in_region_debug_bind (stmt, - name_copies, - decl_copies)) + if (separate_decls_in_region_debug (stmt, name_copies, + decl_copies)) { gsi_remove (&gsi, true); continue; @@ -1349,6 +1481,8 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit gimple phi, nphi, cond_stmt, stmt, cond_nit; gimple_stmt_iterator gsi; tree nit_1; + edge exit_1; + tree new_rhs; split_block_after_labels (loop->header); orig_header = single_succ (loop->header); @@ -1377,6 +1511,38 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit control = t; } } + + /* Setting the condition towards peeling the last iteration: + If the block consisting of the exit condition has the latch as + successor, then the body of the loop is executed before + the exit condition is tested. In such case, moving the + condition to the entry, causes that the loop will iterate + one less iteration (which is the wanted outcome, since we + peel out the last iteration). If the body is executed after + the condition, moving the condition to the entry requires + decrementing one iteration. */ + exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); + if (exit_1->dest == loop->latch) + new_rhs = gimple_cond_rhs (cond_stmt); + else + { + new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)), + gimple_cond_rhs (cond_stmt), + build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1)); + if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME) + { + basic_block preheader; + gimple_stmt_iterator gsi1; + + preheader = loop_preheader_edge(loop)->src; + gsi1 = gsi_after_labels (preheader); + new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true, + NULL_TREE,false,GSI_CONTINUE_LINKING); + } + } + gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs)); + gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt))); + bbs = get_loop_body_in_dom_order (loop); for (n = 0; bbs[n] != loop->latch; n++) @@ -2019,7 +2185,7 @@ parallelize_loops (void) /* FIXME: the check for vector phi nodes could be removed. */ || loop_has_vector_phi_nodes (loop)) continue; - estimated = estimated_loop_iterations_int (loop, false); + estimated = max_stmt_executions_int (loop, false); /* FIXME: Bypass this check as graphite doesn't update the count and frequency correctly now. */ if (!flag_loop_parallelize_all