/* Loop autoparallelization.
- Copyright (C) 2006, 2007, 2008, 2009, 2010
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
Zdenek Dvorak <dvorakz@suse.cz>.
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tm.h"
-#include "tree.h"
#include "tree-flow.h"
#include "cfgloop.h"
#include "tree-data-ref.h"
-#include "tree-pretty-print.h"
+#include "tree-scalar-evolution.h"
#include "gimple-pretty-print.h"
#include "tree-pass.h"
-#include "tree-scalar-evolution.h"
-#include "hashtab.h"
#include "langhooks.h"
#include "tree-vectorizer.h"
{
struct reduction_info tmpred, *red;
- if (htab_elements (reduction_list) == 0)
+ if (htab_elements (reduction_list) == 0 || phi == NULL)
return NULL;
tmpred.reduc_phi = phi;
return (hashval_t) a->version;
}
+/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
+ matrix. Rather than use floats, we simply keep a single DENOMINATOR that
+ represents the denominator for every element in the matrix. */
+typedef struct lambda_trans_matrix_s
+{
+ lambda_matrix matrix;
+ int rowsize;
+ int colsize;
+ int denominator;
+} *lambda_trans_matrix;
+#define LTM_MATRIX(T) ((T)->matrix)
+#define LTM_ROWSIZE(T) ((T)->rowsize)
+#define LTM_COLSIZE(T) ((T)->colsize)
+#define LTM_DENOMINATOR(T) ((T)->denominator)
+
+/* Allocate a new transformation matrix. */
+
+static lambda_trans_matrix
+lambda_trans_matrix_new (int colsize, int rowsize,
+ struct obstack * lambda_obstack)
+{
+ lambda_trans_matrix ret;
+
+ ret = (lambda_trans_matrix)
+ obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
+ LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
+ LTM_ROWSIZE (ret) = rowsize;
+ LTM_COLSIZE (ret) = colsize;
+ LTM_DENOMINATOR (ret) = 1;
+ return ret;
+}
+
+/* Multiply a vector VEC by a matrix MAT.
+ MAT is an M*N matrix, and VEC is a vector with length N. The result
+ is stored in DEST which must be a vector of length M. */
+
+static void
+lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
+ lambda_vector vec, lambda_vector dest)
+{
+ int i, j;
+
+ lambda_vector_clear (dest, m);
+ for (i = 0; i < m; i++)
+ for (j = 0; j < n; j++)
+ dest[i] += matrix[i][j] * vec[j];
+}
+
+/* Return true if TRANS is a legal transformation matrix that respects
+ the dependence vectors in DISTS and DIRS. The conservative answer
+ is false.
+
+ "Wolfe proves that a unimodular transformation represented by the
+ matrix T is legal when applied to a loop nest with a set of
+ lexicographically non-negative distance vectors RDG if and only if
+ for each vector d in RDG, (T.d >= 0) is lexicographically positive.
+ i.e.: if and only if it transforms the lexicographically positive
+ distance vectors to lexicographically positive vectors. Note that
+ a unimodular matrix must transform the zero vector (and only it) to
+ the zero vector." S.Muchnick. */
+
+static bool
+lambda_transform_legal_p (lambda_trans_matrix trans,
+ int nb_loops,
+ VEC (ddr_p, heap) *dependence_relations)
+{
+ unsigned int i, j;
+ lambda_vector distres;
+ struct data_dependence_relation *ddr;
+
+ gcc_assert (LTM_COLSIZE (trans) == nb_loops
+ && LTM_ROWSIZE (trans) == nb_loops);
+
+ /* When there are no dependences, the transformation is correct. */
+ if (VEC_length (ddr_p, dependence_relations) == 0)
+ return true;
+
+ ddr = VEC_index (ddr_p, dependence_relations, 0);
+ if (ddr == NULL)
+ return true;
+
+ /* When there is an unknown relation in the dependence_relations, we
+ know that it is no worth looking at this loop nest: give up. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ return false;
+
+ distres = lambda_vector_new (nb_loops);
+
+ /* For each distance vector in the dependence graph. */
+ FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
+ {
+ /* Don't care about relations for which we know that there is no
+ dependence, nor about read-read (aka. output-dependences):
+ these data accesses can happen in any order. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known
+ || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
+ continue;
+
+ /* Conservatively answer: "this transformation is not valid". */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ return false;
+
+ /* If the dependence could not be captured by a distance vector,
+ conservatively answer that the transform is not valid. */
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
+ return false;
+
+ /* Compute trans.dist_vect */
+ for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
+ {
+ lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
+ DDR_DIST_VECT (ddr, j), distres);
+
+ if (!lambda_vector_lexico_pos (distres, nb_loops))
+ return false;
+ }
+ }
+ return true;
+}
/* Data dependency analysis. Returns true if the iterations of LOOP
are independent on each other (that is, if we can execute them
datarefs = VEC_alloc (data_reference_p, heap, 10);
dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
loop_nest = VEC_alloc (loop_p, heap, 3);
- compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
- &dependence_relations);
+ if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+ &dependence_relations))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
+ ret = false;
+ goto end;
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
dump_data_dependence_relations (dump_file, dependence_relations);
fprintf (dump_file,
" FAILED: data dependencies exist across iterations\n");
+ end:
VEC_free (loop_p, heap, loop_nest);
free_dependence_relations (dependence_relations);
free_data_refs (datarefs);
FOR_EACH_VEC_ELT (basic_block, body, i, bb)
if (bb != entry_bb && bb != exit_bb)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if (gimple_debug_bind_p (gsi_stmt (gsi)))
- has_debug_stmt = true;
+ if (is_gimple_debug (gsi_stmt (gsi)))
+ {
+ if (gimple_debug_bind_p (gsi_stmt (gsi)))
+ has_debug_stmt = true;
+ }
else
eliminate_local_variables_stmt (entry, &gsi, decl_address);
replacement decls are stored in DECL_COPIES. */
static bool
-separate_decls_in_region_debug_bind (gimple stmt,
- htab_t name_copies, htab_t decl_copies)
+separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
+ htab_t decl_copies)
{
use_operand_p use;
ssa_op_iter oi;
struct name_to_copy_elt elt;
void **slot, **dslot;
- var = gimple_debug_bind_get_var (stmt);
+ if (gimple_debug_bind_p (stmt))
+ var = gimple_debug_bind_get_var (stmt);
+ else if (gimple_debug_source_bind_p (stmt))
+ var = gimple_debug_source_bind_get_var (stmt);
+ else
+ return true;
if (TREE_CODE (var) == DEBUG_EXPR_DECL)
return true;
gcc_assert (DECL_P (var) && SSA_VAR_P (var));
dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
if (!dslot)
return true;
- gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
+ if (gimple_debug_bind_p (stmt))
+ gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
+ else if (gimple_debug_source_bind_p (stmt))
+ gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
{
{
gimple stmt = gsi_stmt (gsi);
- if (gimple_debug_bind_p (stmt))
+ if (is_gimple_debug (stmt))
{
- if (separate_decls_in_region_debug_bind (stmt,
- name_copies,
- decl_copies))
+ if (separate_decls_in_region_debug (stmt, name_copies,
+ decl_copies))
{
gsi_remove (&gsi, true);
continue;
gimple phi, nphi, cond_stmt, stmt, cond_nit;
gimple_stmt_iterator gsi;
tree nit_1;
+ edge exit_1;
+ tree new_rhs;
split_block_after_labels (loop->header);
orig_header = single_succ (loop->header);
control = t;
}
}
+
+ /* Setting the condition towards peeling the last iteration:
+ If the block consisting of the exit condition has the latch as
+ successor, then the body of the loop is executed before
+ the exit condition is tested. In such case, moving the
+ condition to the entry, causes that the loop will iterate
+ one less iteration (which is the wanted outcome, since we
+ peel out the last iteration). If the body is executed after
+ the condition, moving the condition to the entry requires
+ decrementing one iteration. */
+ exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
+ if (exit_1->dest == loop->latch)
+ new_rhs = gimple_cond_rhs (cond_stmt);
+ else
+ {
+ new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
+ gimple_cond_rhs (cond_stmt),
+ build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
+ if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
+ {
+ basic_block preheader;
+ gimple_stmt_iterator gsi1;
+
+ preheader = loop_preheader_edge(loop)->src;
+ gsi1 = gsi_after_labels (preheader);
+ new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
+ NULL_TREE,false,GSI_CONTINUE_LINKING);
+ }
+ }
+ gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
+ gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
+
bbs = get_loop_body_in_dom_order (loop);
for (n = 0; bbs[n] != loop->latch; n++)
/* FIXME: the check for vector phi nodes could be removed. */
|| loop_has_vector_phi_nodes (loop))
continue;
- estimated = estimated_loop_iterations_int (loop, false);
+ estimated = max_stmt_executions_int (loop, false);
/* FIXME: Bypass this check as graphite doesn't update the
count and frequency correctly now. */
if (!flag_loop_parallelize_all