2008-06-04 Junjie Gu <jgu@tensilica.com>

[pf3gnuchains/gcc-fork.git] / gcc / tree-loop-linear.c
diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c

index fcb93ea..f58bd11 100644 (file)
--- a/gcc/tree-loop-linear.c
+++ b/gcc/tree-loop-linear.c
@@ -1,12 +1,12 @@
  /* Linear Loop transforms
-   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
     Contributed by Daniel Berlin <dberlin@dberlin.org>.
  
  This file is part of GCC.
  
  GCC is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
+Software Foundation; either version 3, or (at your option) any later
  version.
  
  GCC is distributed in the hope that it will be useful, but WITHOUT ANY
@@ -15,16 +15,14 @@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  for more details.
  
  You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING.  If not, write to the Free
-Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.  */
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
  
  
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
-#include "errors.h"
  #include "ggc.h"
  #include "tree.h"
  #include "target.h"
@@ -32,6 +30,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "rtl.h"
  #include "basic-block.h"
  #include "diagnostic.h"
+#include "obstack.h"
  #include "tree-flow.h"
  #include "tree-dump.h"
  #include "timevar.h"
@@ -42,7 +41,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "tree-data-ref.h"
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
-#include "varray.h"
  #include "lambda.h"
  
  /* Linear loop transforms include any composition of interchange,
@@ -55,19 +53,19 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
     transform matrix for locality purposes.
     TODO: Completion of partial transforms.  */
  
-/* Gather statistics for loop interchange.  LOOP_NUMBER is a relative
-   index in the considered loop nest.  The first loop in the
-   considered loop nest is FIRST_LOOP, and consequently the index of
-   the considered loop is obtained by FIRST_LOOP + LOOP_NUMBER.
+/* Gather statistics for loop interchange.  LOOP is the loop being
+   considered. The first loop in the considered loop nest is
+   FIRST_LOOP, and consequently, the index of the considered loop is
+   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
     
     Initializes:
     - DEPENDENCE_STEPS the sum of all the data dependence distances
-   carried by loop LOOP_NUMBER,
+   carried by loop LOOP,
  
     - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
-   for which the loop LOOP_NUMBER is not carrying any dependence,
+   for which the loop LOOP is not carrying any dependence,
  
-   - ACCESS_STRIDES the sum of all the strides in LOOP_NUMBER.
+   - ACCESS_STRIDES the sum of all the strides in LOOP.
  
     Example: for the following loop,
  
@@ -91,71 +89,75 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  */
  
  static void
-gather_interchange_stats (varray_type dependence_relations, 
-                         varray_type datarefs,
-                         unsigned int loop_number, 
-                         unsigned int first_loop,
+gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations,
+                         VEC (data_reference_p, heap) *datarefs,
+                         struct loop *loop,
+                         struct loop *first_loop,
                           unsigned int *dependence_steps, 
                           unsigned int *nb_deps_not_carried_by_loop, 
-                         unsigned int *access_strides)
+                         double_int *access_strides)
  {
-  unsigned int i;
+  unsigned int i, j;
+  struct data_dependence_relation *ddr;
+  struct data_reference *dr;
  
    *dependence_steps = 0;
    *nb_deps_not_carried_by_loop = 0;
-  *access_strides = 0;
+  *access_strides = double_int_zero;
  
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
+  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
      {
-      int dist;
-      struct data_dependence_relation *ddr = 
-       (struct data_dependence_relation *) 
-       VARRAY_GENERIC_PTR (dependence_relations, i);
-
        /* If we don't know anything about this dependence, or the distance
          vector is NULL, or there is no dependence, then there is no reuse of
          data.  */
-
-      if (DDR_DIST_VECT (ddr) == NULL
-         || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
-         || DDR_ARE_DEPENDENT (ddr) == chrec_known)
+      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
+         || DDR_ARE_DEPENDENT (ddr) == chrec_known
+         || DDR_NUM_DIST_VECTS (ddr) == 0)
         continue;
-      
  
-      
-      dist = DDR_DIST_VECT (ddr)[loop_number];
-      if (dist == 0)
-       (*nb_deps_not_carried_by_loop) += 1;
-      else if (dist < 0)
-       (*dependence_steps) += -dist;
-      else
-       (*dependence_steps) += dist;
+      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
+       {
+         int dist = DDR_DIST_VECT (ddr, j)[loop_depth (loop) - loop_depth (first_loop)];
+
+         if (dist == 0)
+           (*nb_deps_not_carried_by_loop) += 1;
+
+         else if (dist < 0)
+           (*dependence_steps) += -dist;
+
+         else
+           (*dependence_steps) += dist;
+       }
      }
  
    /* Compute the access strides.  */
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
+  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
      {
        unsigned int it;
-      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
+      tree ref = DR_REF (dr);
        tree stmt = DR_STMT (dr);
        struct loop *stmt_loop = loop_containing_stmt (stmt);
-      struct loop *inner_loop = current_loops->parray[first_loop + 1];
+      struct loop *inner_loop = first_loop->inner;
  
-      if (!flow_loop_nested_p (inner_loop, stmt_loop)
-         && inner_loop->num != stmt_loop->num)
+      if (inner_loop != stmt_loop 
+         && !flow_loop_nested_p (inner_loop, stmt_loop))
         continue;
  
-      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
+      for (it = 0; it < DR_NUM_DIMENSIONS (dr); 
+          it++, ref = TREE_OPERAND (ref, 0))
         {
-         tree chrec = DR_ACCESS_FN (dr, it);
-         tree tstride = evolution_part_in_loop_num 
-           (chrec, first_loop + loop_number);
-         
-         if (tstride == NULL_TREE
-             || TREE_CODE (tstride) != INTEGER_CST)
+         int num = am_vector_index_for_loop (DR_ACCESS_MATRIX (dr), loop->num);
+         int istride = AM_GET_ACCESS_MATRIX_ELEMENT (DR_ACCESS_MATRIX (dr), it, num);
+         tree array_size = TYPE_SIZE (TREE_TYPE (ref));
+         double_int dstride;
+
+         if (array_size == NULL_TREE 
+             || TREE_CODE (array_size) != INTEGER_CST)
             continue;
-         
-         (*access_strides) += int_cst_value (tstride);
+
+         dstride = double_int_mul (tree_to_double_int (array_size), 
+                                   shwi_to_double_int (istride));
+         (*access_strides) = double_int_add (*access_strides, dstride);
         }
      }
  }
@@ -171,26 +173,40 @@ gather_interchange_stats (varray_type dependence_relations,
  static lambda_trans_matrix
  try_interchange_loops (lambda_trans_matrix trans, 
                        unsigned int depth,                     
-                      varray_type dependence_relations,
-                      varray_type datarefs, 
-                      unsigned int first_loop)
+                      VEC (ddr_p, heap) *dependence_relations,
+                      VEC (data_reference_p, heap) *datarefs,
+                      struct loop *first_loop)
  {
-  unsigned int loop_i, loop_j;
+  bool res;
+  struct loop *loop_i;
+  struct loop *loop_j;
    unsigned int dependence_steps_i, dependence_steps_j;
-  unsigned int access_strides_i, access_strides_j;
+  double_int access_strides_i, access_strides_j;
+  double_int small, large, nb_iter;
+  double_int l1_cache_size, l2_cache_size;
+  int cmp;
    unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
    struct data_dependence_relation *ddr;
  
+  if (VEC_length (ddr_p, dependence_relations) == 0)
+    return trans;
+
    /* When there is an unknown relation in the dependence_relations, we
       know that it is no worth looking at this loop nest: give up.  */
-  ddr = (struct data_dependence_relation *) 
-    VARRAY_GENERIC_PTR (dependence_relations, 0);
+  ddr = VEC_index (ddr_p, dependence_relations, 0);
    if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
      return trans;
-  
+
+  l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024);
+  l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024);
+
    /* LOOP_I is always the outer loop.  */
-  for (loop_j = 1; loop_j < depth; loop_j++)
-    for (loop_i = 0; loop_i < loop_j; loop_i++)
+  for (loop_j = first_loop->inner; 
+       loop_j; 
+       loop_j = loop_j->inner)
+    for (loop_i = first_loop; 
+        loop_depth (loop_i) < loop_depth (loop_j); 
+        loop_i = loop_i->inner)
        {
         gather_interchange_stats (dependence_relations, datarefs,
                                   loop_i, first_loop,
@@ -205,126 +221,153 @@ try_interchange_loops (lambda_trans_matrix trans,
         
         /* Heuristics for loop interchange profitability:
  
+          0. Don't transform if the smallest stride is larger than
+             the L2 cache, or if the largest stride multiplied by the
+             number of iterations is smaller than the L1 cache.
+
            1. (spatial locality) Inner loops should have smallest
                dependence steps.
  
            2. (spatial locality) Inner loops should contain more
            dependence relations not carried by the loop.
  
-          3. (temporal locality) Inner loops should have smallest 
+          3. (temporal locality) Inner loops should have smallest
               array access strides.
         */
+
+       cmp = double_int_ucmp (access_strides_i, access_strides_j);
+       small = cmp < 0 ? access_strides_i : access_strides_j;
+       large = cmp < 0 ? access_strides_j : access_strides_i;
+
+       if (double_int_ucmp (small, l2_cache_size) > 0)
+         continue;
+
+       res = cmp < 0 ?
+         estimated_loop_iterations (loop_j, false, &nb_iter):
+         estimated_loop_iterations (loop_i, false, &nb_iter);
+       large = double_int_mul (large, nb_iter);
+
+       if (res && double_int_ucmp (large, l1_cache_size) < 0)
+         continue;
+
         if (dependence_steps_i < dependence_steps_j 
             || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
-           || access_strides_i < access_strides_j)
+           || cmp < 0)
           {
-           lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
+           lambda_matrix_row_exchange (LTM_MATRIX (trans),
+                                       loop_depth (loop_i) - loop_depth (first_loop),
+                                       loop_depth (loop_j) - loop_depth (first_loop));
             /* Validate the resulting matrix.  When the transformation
                is not valid, reverse to the previous transformation.  */
             if (!lambda_transform_legal_p (trans, depth, dependence_relations))
-             lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
+             lambda_matrix_row_exchange (LTM_MATRIX (trans), 
+                                         loop_depth (loop_i) - loop_depth (first_loop), 
+                                         loop_depth (loop_j) - loop_depth (first_loop));
           }
        }
  
    return trans;
  }
  
-/* Perform a set of linear transforms on LOOPS.  */
+/* Return the number of nested loops in LOOP_NEST, or 0 if the loops
+   are not perfectly nested.  */
+
+static unsigned int
+perfect_loop_nest_depth (struct loop *loop_nest)
+{
+  struct loop *temp;
+  unsigned int depth = 1;
+
+  /* If it's not a loop nest, we don't want it.  We also don't handle
+     sibling loops properly, which are loops of the following form:
+
+     | for (i = 0; i < 50; i++)
+     |   {
+     |     for (j = 0; j < 50; j++)
+     |       {
+     |        ...
+     |       }
+     |     for (j = 0; j < 50; j++)
+     |       {
+     |        ...
+     |       }
+     |   }
+  */
+
+  if (!loop_nest->inner || !single_exit (loop_nest))
+    return 0;
+
+  for (temp = loop_nest->inner; temp; temp = temp->inner)
+    {
+      /* If we have a sibling loop or multiple exit edges, jump ship.  */
+      if (temp->next || !single_exit (temp))
+       return 0;
+
+      depth++;
+    }
+
+  return depth;
+}
+
+/* Perform a set of linear transforms on loops.  */
  
  void
-linear_transform_loops (struct loops *loops)
+linear_transform_loops (void)
  {
-  unsigned int i;
-  
-  compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL);
-  for (i = 1; i < loops->num; i++)
+  bool modified = false;
+  loop_iterator li;
+  VEC(tree,heap) *oldivs = NULL;
+  VEC(tree,heap) *invariants = NULL;
+  VEC(tree,heap) *lambda_parameters = NULL;
+  VEC(tree,heap) *remove_ivs = VEC_alloc (tree, heap, 3);
+  struct loop *loop_nest;
+  tree oldiv_stmt;
+  unsigned i;
+
+  FOR_EACH_LOOP (li, loop_nest, 0)
      {
        unsigned int depth = 0;
-      varray_type datarefs;
-      varray_type dependence_relations;
-      struct loop *loop_nest = loops->parray[i];
-      struct loop *temp;
-      VEC (tree) *oldivs = NULL;
-      VEC (tree) *invariants = NULL;
+      VEC (ddr_p, heap) *dependence_relations;
+      VEC (data_reference_p, heap) *datarefs;
+      
        lambda_loopnest before, after;
        lambda_trans_matrix trans;
-      bool problem = false;
-      bool need_perfect_nest = false;
-      /* If it's not a loop nest, we don't want it.
-         We also don't handle sibling loops properly, 
-         which are loops of the following form:
-         for (i = 0; i < 50; i++)
-           {
-             for (j = 0; j < 50; j++)
-               {
-               ...
-               }
-           for (j = 0; j < 50; j++)
-               {
-                ...
-               }
-           } */
-      if (!loop_nest->inner)
-       continue;
-      depth = 1;
-      for (temp = loop_nest->inner; temp; temp = temp->inner)
-       {
-         flow_loop_scan (temp, LOOP_ALL);
-         /* If we have a sibling loop or multiple exit edges, jump ship.  */
-         if (temp->next || temp->num_exits != 1)
-           {
-             problem = true;
-             break;
-           }
-         depth ++;
-       }
-      if (problem)
+      struct obstack lambda_obstack;
+      gcc_obstack_init (&lambda_obstack);
+
+      depth = perfect_loop_nest_depth (loop_nest);
+      if (depth == 0)
         continue;
  
-      /* Analyze data references and dependence relations using scev.  */      
- 
-      VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
-      VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
-                              "dependence_relations");
+      VEC_truncate (tree, oldivs, 0);
+      VEC_truncate (tree, invariants, 0);
+      VEC_truncate (tree, lambda_parameters, 0);
+
+      datarefs = VEC_alloc (data_reference_p, heap, 10);
+      dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+      if (!compute_data_dependences_for_loop (loop_nest, true, &datarefs,
+                                             &dependence_relations))
+       continue;
        
-  
-      compute_data_dependences_for_loop (depth, loop_nest,
-                                        &datarefs, &dependence_relations);
+      lambda_collect_parameters (datarefs, &lambda_parameters);
+      if (!lambda_compute_access_matrices (datarefs, lambda_parameters,
+                                          loop_nest->num))
+       continue;
+
        if (dump_file && (dump_flags & TDF_DETAILS))
-       {
-         unsigned int j;
-         for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
-           {
-             struct data_dependence_relation *ddr = 
-               (struct data_dependence_relation *) 
-               VARRAY_GENERIC_PTR (dependence_relations, j);
-
-             if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
-               {
-                 fprintf (dump_file, "DISTANCE_V (");
-                 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), 
-                                      DDR_SIZE_VECT (ddr));
-                 fprintf (dump_file, ")\n");
-                 fprintf (dump_file, "DIRECTION_V (");
-                 print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), 
-                                      DDR_SIZE_VECT (ddr));
-                 fprintf (dump_file, ")\n");
-               }
-           }
-         fprintf (dump_file, "\n\n");
-       }
+       dump_ddrs (dump_file, dependence_relations);
+
        /* Build the transformation matrix.  */
        trans = lambda_trans_matrix_new (depth, depth);
        lambda_matrix_id (LTM_MATRIX (trans), depth);
-
        trans = try_interchange_loops (trans, depth, dependence_relations,
-                                    datarefs, loop_nest->num);
+                                    datarefs, loop_nest);
  
        if (lambda_trans_matrix_id_p (trans))
         {
           if (dump_file)
            fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
-         continue;
+         goto free_and_continue;
         }
  
        /* Check whether the transformation is legal.  */
@@ -332,42 +375,51 @@ linear_transform_loops (struct loops *loops)
         {
           if (dump_file)
             fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
-         continue;
+         goto free_and_continue;
         }
-      if (!perfect_nest_p (loop_nest))
-       need_perfect_nest = true;
-      before = gcc_loopnest_to_lambda_loopnest (loops,
-                                               loop_nest, &oldivs, 
-                                               &invariants,
-                                               need_perfect_nest);
+
+      before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
+                                                &invariants, &lambda_obstack);
+
        if (!before)
-       continue;
-            
+       goto free_and_continue;
+
        if (dump_file)
         {
           fprintf (dump_file, "Before:\n");
           print_lambda_loopnest (dump_file, before, 'i');
         }
    
-      after = lambda_loopnest_transform (before, trans);
+      after = lambda_loopnest_transform (before, trans, &lambda_obstack);
+
        if (dump_file)
         {
           fprintf (dump_file, "After:\n");
           print_lambda_loopnest (dump_file, after, 'u');
         }
+
        lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
-                                      after, trans);
+                                      &remove_ivs,
+                                       after, trans, &lambda_obstack);
+      modified = true;
+
        if (dump_file)
         fprintf (dump_file, "Successfully transformed loop.\n");
-      oldivs = NULL;
-      invariants = NULL;
+
+    free_and_continue:
+      obstack_free (&lambda_obstack, NULL);
        free_dependence_relations (dependence_relations);
        free_data_refs (datarefs);
      }
-  free_df ();
+
+  for (i = 0; VEC_iterate (tree, remove_ivs, i, oldiv_stmt); i++)
+    remove_iv (oldiv_stmt);
+
+  VEC_free (tree, heap, oldivs);
+  VEC_free (tree, heap, invariants);
+  VEC_free (tree, heap, remove_ivs);
    scev_reset ();
-  rewrite_into_loop_closed_ssa ();
-#ifdef ENABLE_CHECKING
-  verify_loop_closed_ssa ();
-#endif
+
+  if (modified)
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
  }