PR c++/52035

[pf3gnuchains/gcc-fork.git] / gcc / tree-parloops.c
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c

index aba70c8..28c450d 100644 (file)
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1,5 +1,5 @@
  /* Loop autoparallelization.
-   Copyright (C) 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
     Free Software Foundation, Inc.
     Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
     Zdenek Dvorak <dvorakz@suse.cz>.
@@ -23,16 +23,12 @@ along with GCC; see the file COPYING3.  If not see
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
-#include "tm.h"
-#include "tree.h"
  #include "tree-flow.h"
  #include "cfgloop.h"
  #include "tree-data-ref.h"
-#include "tree-pretty-print.h"
+#include "tree-scalar-evolution.h"
  #include "gimple-pretty-print.h"
  #include "tree-pass.h"
-#include "tree-scalar-evolution.h"
-#include "hashtab.h"
  #include "langhooks.h"
  #include "tree-vectorizer.h"
  
@@ -205,7 +201,7 @@ reduction_phi (htab_t reduction_list, gimple phi)
  {
    struct reduction_info tmpred, *red;
  
-  if (htab_elements (reduction_list) == 0)
+  if (htab_elements (reduction_list) == 0 || phi == NULL)
      return NULL;
  
    tmpred.reduc_phi = phi;
@@ -244,6 +240,125 @@ name_to_copy_elt_hash (const void *aa)
    return (hashval_t) a->version;
  }
  
+/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
+   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
+   represents the denominator for every element in the matrix.  */
+typedef struct lambda_trans_matrix_s
+{
+  lambda_matrix matrix;
+  int rowsize;
+  int colsize;
+  int denominator;
+} *lambda_trans_matrix;
+#define LTM_MATRIX(T) ((T)->matrix)
+#define LTM_ROWSIZE(T) ((T)->rowsize)
+#define LTM_COLSIZE(T) ((T)->colsize)
+#define LTM_DENOMINATOR(T) ((T)->denominator)
+
+/* Allocate a new transformation matrix.  */
+
+static lambda_trans_matrix
+lambda_trans_matrix_new (int colsize, int rowsize,
+                        struct obstack * lambda_obstack)
+{
+  lambda_trans_matrix ret;
+
+  ret = (lambda_trans_matrix)
+    obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
+  LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
+  LTM_ROWSIZE (ret) = rowsize;
+  LTM_COLSIZE (ret) = colsize;
+  LTM_DENOMINATOR (ret) = 1;
+  return ret;
+}
+
+/* Multiply a vector VEC by a matrix MAT.
+   MAT is an M*N matrix, and VEC is a vector with length N.  The result
+   is stored in DEST which must be a vector of length M.  */
+
+static void
+lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
+                          lambda_vector vec, lambda_vector dest)
+{
+  int i, j;
+
+  lambda_vector_clear (dest, m);
+  for (i = 0; i < m; i++)
+    for (j = 0; j < n; j++)
+      dest[i] += matrix[i][j] * vec[j];
+}
+
+/* Return true if TRANS is a legal transformation matrix that respects
+   the dependence vectors in DISTS and DIRS.  The conservative answer
+   is false.
+
+   "Wolfe proves that a unimodular transformation represented by the
+   matrix T is legal when applied to a loop nest with a set of
+   lexicographically non-negative distance vectors RDG if and only if
+   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
+   i.e.: if and only if it transforms the lexicographically positive
+   distance vectors to lexicographically positive vectors.  Note that
+   a unimodular matrix must transform the zero vector (and only it) to
+   the zero vector." S.Muchnick.  */
+
+static bool
+lambda_transform_legal_p (lambda_trans_matrix trans,
+                         int nb_loops,
+                         VEC (ddr_p, heap) *dependence_relations)
+{
+  unsigned int i, j;
+  lambda_vector distres;
+  struct data_dependence_relation *ddr;
+
+  gcc_assert (LTM_COLSIZE (trans) == nb_loops
+             && LTM_ROWSIZE (trans) == nb_loops);
+
+  /* When there are no dependences, the transformation is correct.  */
+  if (VEC_length (ddr_p, dependence_relations) == 0)
+    return true;
+
+  ddr = VEC_index (ddr_p, dependence_relations, 0);
+  if (ddr == NULL)
+    return true;
+
+  /* When there is an unknown relation in the dependence_relations, we
+     know that it is no worth looking at this loop nest: give up.  */
+  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+    return false;
+
+  distres = lambda_vector_new (nb_loops);
+
+  /* For each distance vector in the dependence graph.  */
+  FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
+    {
+      /* Don't care about relations for which we know that there is no
+        dependence, nor about read-read (aka. output-dependences):
+        these data accesses can happen in any order.  */
+      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
+         || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
+       continue;
+
+      /* Conservatively answer: "this transformation is not valid".  */
+      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+       return false;
+
+      /* If the dependence could not be captured by a distance vector,
+        conservatively answer that the transform is not valid.  */
+      if (DDR_NUM_DIST_VECTS (ddr) == 0)
+       return false;
+
+      /* Compute trans.dist_vect */
+      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
+       {
+         lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
+                                    DDR_DIST_VECT (ddr, j), distres);
+
+         if (!lambda_vector_lexico_pos (distres, nb_loops))
+           return false;
+       }
+    }
+  return true;
+}
  
  /* Data dependency analysis. Returns true if the iterations of LOOP
     are independent on each other (that is, if we can execute them
@@ -272,8 +387,14 @@ loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
    datarefs = VEC_alloc (data_reference_p, heap, 10);
    dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
    loop_nest = VEC_alloc (loop_p, heap, 3);
-  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-                                    &dependence_relations);
+  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+                                          &dependence_relations))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
+      ret = false;
+      goto end;
+    }
    if (dump_file && (dump_flags & TDF_DETAILS))
      dump_data_dependence_relations (dump_file, dependence_relations);
  
@@ -290,6 +411,7 @@ loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
      fprintf (dump_file,
              "  FAILED: data dependencies exist across iterations\n");
  
+ end:
    VEC_free (loop_p, heap, loop_nest);
    free_dependence_relations (dependence_relations);
    free_data_refs (datarefs);
@@ -601,8 +723,11 @@ eliminate_local_variables (edge entry, edge exit)
    FOR_EACH_VEC_ELT (basic_block, body, i, bb)
      if (bb != entry_bb && bb != exit_bb)
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       if (gimple_debug_bind_p (gsi_stmt (gsi)))
-         has_debug_stmt = true;
+       if (is_gimple_debug (gsi_stmt (gsi)))
+         {
+           if (gimple_debug_bind_p (gsi_stmt (gsi)))
+             has_debug_stmt = true;
+         }
         else
           eliminate_local_variables_stmt (entry, &gsi, decl_address);
  
@@ -768,8 +893,8 @@ separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
     replacement decls are stored in DECL_COPIES.  */
  
  static bool
-separate_decls_in_region_debug_bind (gimple stmt,
-                                    htab_t name_copies, htab_t decl_copies)
+separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
+                               htab_t decl_copies)
  {
    use_operand_p use;
    ssa_op_iter oi;
@@ -778,7 +903,12 @@ separate_decls_in_region_debug_bind (gimple stmt,
    struct name_to_copy_elt elt;
    void **slot, **dslot;
  
-  var = gimple_debug_bind_get_var (stmt);
+  if (gimple_debug_bind_p (stmt))
+    var = gimple_debug_bind_get_var (stmt);
+  else if (gimple_debug_source_bind_p (stmt))
+    var = gimple_debug_source_bind_get_var (stmt);
+  else
+    return true;
    if (TREE_CODE (var) == DEBUG_EXPR_DECL)
      return true;
    gcc_assert (DECL_P (var) && SSA_VAR_P (var));
@@ -786,7 +916,10 @@ separate_decls_in_region_debug_bind (gimple stmt,
    dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
    if (!dslot)
      return true;
-  gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
+  if (gimple_debug_bind_p (stmt))
+    gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
+  else if (gimple_debug_source_bind_p (stmt))
+    gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
  
    FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
    {
@@ -1180,11 +1313,10 @@ separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
             {
               gimple stmt = gsi_stmt (gsi);
  
-             if (gimple_debug_bind_p (stmt))
+             if (is_gimple_debug (stmt))
                 {
-                 if (separate_decls_in_region_debug_bind (stmt,
-                                                          name_copies,
-                                                          decl_copies))
+                 if (separate_decls_in_region_debug (stmt, name_copies,
+                                                     decl_copies))
                     {
                       gsi_remove (&gsi, true);
                       continue;
@@ -1349,6 +1481,8 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
    gimple phi, nphi, cond_stmt, stmt, cond_nit;
    gimple_stmt_iterator gsi;
    tree nit_1;
+  edge exit_1;
+  tree new_rhs;
  
    split_block_after_labels (loop->header);
    orig_header = single_succ (loop->header);
@@ -1377,6 +1511,38 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit
           control = t;
         }
      }
+
+ /* Setting the condition towards peeling the last iteration:
+    If the block consisting of the exit condition has the latch as
+    successor, then the body of the loop is executed before
+    the exit condition is tested.  In such case, moving the
+    condition to the entry, causes that the loop will iterate
+    one less iteration (which is the wanted outcome, since we
+    peel out the last iteration).  If the body is executed after
+    the condition, moving the condition to the entry requires
+    decrementing one iteration.  */
+  exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit); 
+  if (exit_1->dest == loop->latch)
+    new_rhs = gimple_cond_rhs (cond_stmt);
+  else
+  {
+    new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
+                          gimple_cond_rhs (cond_stmt),
+                          build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
+    if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
+      {
+       basic_block preheader;
+       gimple_stmt_iterator gsi1;
+
+       preheader = loop_preheader_edge(loop)->src;
+       gsi1 = gsi_after_labels (preheader);
+       new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
+                                           NULL_TREE,false,GSI_CONTINUE_LINKING);
+      }
+  }
+  gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
+  gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
+  
    bbs = get_loop_body_in_dom_order (loop);
  
    for (n = 0; bbs[n] != loop->latch; n++)
@@ -2019,7 +2185,7 @@ parallelize_loops (void)
           /* FIXME: the check for vector phi nodes could be removed.  */
           || loop_has_vector_phi_nodes (loop))
         continue;
-      estimated = estimated_loop_iterations_int (loop, false);
+      estimated = max_stmt_executions_int (loop, false);
        /* FIXME: Bypass this check as graphite doesn't update the
        count and frequency correctly now.  */
        if (!flag_loop_parallelize_all