Fix IA-64 problems with denorms getting clobbered by type conversions.

[pf3gnuchains/gcc-fork.git] / gcc / lambda-code.c
diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c

index 43889f7..bf00c05 100644 (file)
--- a/gcc/lambda-code.c
+++ b/gcc/lambda-code.c
@@ -1,5 +1,5 @@
  /*  Loop transformation code generation
-    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
      Contributed by Daniel Berlin <dberlin@dberlin.org>
  
      This file is part of GCC.
@@ -41,6 +41,7 @@
  #include "tree-scalar-evolution.h"
  #include "vec.h"
  #include "lambda.h"
+#include "vecprim.h"
  
  /* This loop nest code generation is based on non-singular matrix
     math.
@@ -114,9 +115,6 @@
   Fourier-Motzkin elimination is used to compute the bounds of the base space
   of the lattice.  */
  
-DEF_VEC_I(int);
-DEF_VEC_ALLOC_I(int,heap);
-
  static bool perfect_nestify (struct loops *, 
                              struct loop *, VEC(tree,heap) *, 
                              VEC(tree,heap) *, VEC(int,heap) *,
@@ -441,45 +439,6 @@ lambda_lattice_compute_base (lambda_loopnest nest)
    return ret;
  }
  
-/* Compute the greatest common denominator of two numbers (A and B) using
-   Euclid's algorithm.  */
-
-static int
-gcd (int a, int b)
-{
-
-  int x, y, z;
-
-  x = abs (a);
-  y = abs (b);
-
-  while (x > 0)
-    {
-      z = y % x;
-      y = x;
-      x = z;
-    }
-
-  return (y);
-}
-
-/* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
-
-static int
-gcd_vector (lambda_vector vector, int size)
-{
-  int i;
-  int gcd1 = 0;
-
-  if (size > 0)
-    {
-      gcd1 = vector[0];
-      for (i = 1; i < size; i++)
-       gcd1 = gcd (gcd1, vector[i]);
-    }
-  return gcd1;
-}
-
  /* Compute the least common multiple of two numbers A and B .  */
  
  static int
@@ -848,7 +807,7 @@ lambda_compute_target_space (lambda_loopnest auxillary_nest,
        LN_LOOPS (target_nest)[i] = target_loop;
  
        /* Computes the gcd of the coefficients of the linear part.  */
-      gcd1 = gcd_vector (target[i], i);
+      gcd1 = lambda_vector_gcd (target[i], i);
  
        /* Include the denominator in the GCD.  */
        gcd1 = gcd (gcd1, determinant);
@@ -911,9 +870,9 @@ lambda_compute_target_space (lambda_loopnest auxillary_nest,
             }
           /* Find the gcd and divide by it here, rather than doing it
              at the tree level.  */
-         gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
-         gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
-                            invariants);
+         gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
+         gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
+                                   invariants);
           gcd1 = gcd (gcd1, gcd2);
           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
@@ -967,9 +926,9 @@ lambda_compute_target_space (lambda_loopnest auxillary_nest,
             }
           /* Find the gcd and divide by it here, instead of at the
              tree level.  */
-         gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
-         gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
-                            invariants);
+         gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
+         gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
+                                   invariants);
           gcd1 = gcd (gcd1, gcd2);
           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
@@ -2201,23 +2160,20 @@ exit_phi_for_loop_p (struct loop *loop, tree stmt)
    return true;
  }
  
-/* Return true if STMT can be put back into INNER, a loop by moving it to the 
-   beginning of that loop.  */
+/* Return true if STMT can be put back into the loop INNER, by
+   copying it to the beginning of that loop and changing the uses.  */
  
  static bool
  can_put_in_inner_loop (struct loop *inner, tree stmt)
  {
    imm_use_iterator imm_iter;
    use_operand_p use_p;
-  basic_block use_bb = NULL;
    
    gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
    if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
        || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
      return false;
    
-  /* We require that the basic block of all uses be the same, or the use be an
-     exit phi.  */
    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
      {
        if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
@@ -2226,17 +2182,39 @@ can_put_in_inner_loop (struct loop *inner, tree stmt)
  
           if (!flow_bb_inside_loop_p (inner, immbb))
             return false;
-         if (use_bb == NULL)
-           use_bb = immbb;
-         else if (immbb != use_bb)
+       }
+    }
+  return true;  
+}
+
+/* Return true if STMT can be put *after* the inner loop of LOOP.  */
+static bool
+can_put_after_inner_loop (struct loop *loop, tree stmt)
+{
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+
+  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
+    return false;
+  
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
+    {
+      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
+       {
+         basic_block immbb = bb_for_stmt (USE_STMT (use_p));
+         
+         if (!dominated_by_p (CDI_DOMINATORS,
+                              immbb,
+                              loop->inner->header)
+             && !can_put_in_inner_loop (loop->inner, stmt))
             return false;
         }
      }
    return true;
-  
  }
  
  
+
  /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
     one.  LOOPIVS is a vector of induction variables, one per loop.  
     ATM, we only handle imperfect nests of depth 2, where all of the statements
@@ -2277,18 +2255,17 @@ can_convert_to_perfect_nest (struct loop *loop,
                 if (stmt_uses_op (stmt, iv))
                   goto fail;
               
-             /* If this is a simple operation like a cast that is invariant
-                in the inner loop, only used there, and we can place it
-                there, then it's not going to hurt us.
-                This means that we will propagate casts and other cheap
-                invariant operations *back*
-                into the inner loop if we can interchange the loop, on the
-                theory that we are going to gain a lot more by interchanging
-                the loop than we are by leaving some invariant code there for
-                some other pass to clean up.  */
+             /* If this is a scalar operation that can be put back
+                into the inner loop, or after the inner loop, through
+                copying, then do so. This works on the theory that
+                any amount of scalar code we have to reduplicate
+                into or after the loops is less expensive that the
+                win we get from rearranging the memory walk
+                the loop is doing so that it has better
+                cache behavior.  */
               if (TREE_CODE (stmt) == MODIFY_EXPR
-                 && is_gimple_cast (TREE_OPERAND (stmt, 1))
-                 && can_put_in_inner_loop (loop->inner, stmt))
+                 && (can_put_in_inner_loop (loop->inner, stmt)
+                     || can_put_after_inner_loop (loop, stmt)))
                 continue;
  
               /* Otherwise, if the bb of a statement we care about isn't
@@ -2515,23 +2492,33 @@ perfect_nestify (struct loops *loops,
                         bsi_prev (&bsi);
                       continue;
                     }
-                 /* Move this statement back into the inner loop.
-                    This looks a bit confusing, but we are really just
-                    finding the first non-exit phi use and moving the
-                    statement to the beginning of that use's basic
-                    block.  */
+                 
+                 /* Make copies of this statement to put it back next
+                    to its uses. */
                   FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, 
                                          TREE_OPERAND (stmt, 0))
                     {
                       tree imm_stmt = USE_STMT (use_p);
                       if (!exit_phi_for_loop_p (loop->inner, imm_stmt))
                         {
-                         block_stmt_iterator tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
-                         bsi_move_after (&bsi, &tobsi);
-                         update_stmt (stmt);
-                         BREAK_FROM_SAFE_IMM_USE (imm_iter);
+                         block_stmt_iterator tobsi;
+                         tree newname;
+                         tree newstmt;
+                        
+                         newstmt  = unshare_expr (stmt);
+                         tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
+                         newname = TREE_OPERAND (newstmt, 0);
+                         newname = SSA_NAME_VAR (newname);
+                         newname = make_ssa_name (newname, newstmt);
+                         TREE_OPERAND (newstmt, 0) = newname;
+                         SET_USE (use_p, TREE_OPERAND (newstmt, 0));
+                         bsi_insert_before (&tobsi, newstmt, BSI_SAME_STMT);
+                         update_stmt (newstmt);
+                         update_stmt (imm_stmt);
                         } 
                     }
+                 if (!bsi_end_p (bsi))
+                   bsi_prev (&bsi);                      
                 }
             }
           else
@@ -2589,7 +2576,7 @@ perfect_nestify (struct loops *loops,
  bool
  lambda_transform_legal_p (lambda_trans_matrix trans, 
                           int nb_loops,
-                         varray_type dependence_relations)
+                         VEC (ddr_p, heap) *dependence_relations)
  {
    unsigned int i, j;
    lambda_vector distres;
@@ -2600,8 +2587,7 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
  
    /* When there is an unknown relation in the dependence_relations, we
       know that it is no worth looking at this loop nest: give up.  */
-  ddr = (struct data_dependence_relation *) 
-    VARRAY_GENERIC_PTR (dependence_relations, 0);
+  ddr = VEC_index (ddr_p, dependence_relations, 0);
    if (ddr == NULL)
      return true;
    if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
@@ -2610,11 +2596,8 @@ lambda_transform_legal_p (lambda_trans_matrix trans,
    distres = lambda_vector_new (nb_loops);
  
    /* For each distance vector in the dependence graph.  */
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
+  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
      {
-      ddr = (struct data_dependence_relation *) 
-       VARRAY_GENERIC_PTR (dependence_relations, i);     
-
        /* Don't care about relations for which we know that there is no
          dependence, nor about read-read (aka. output-dependences):
          these data accesses can happen in any order.  */