OSDN Git Service

2007-10-31 Sebastian Pop <sebastian.pop@amd.com>
authorspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 31 Oct 2007 13:53:03 +0000 (13:53 +0000)
committerspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 31 Oct 2007 13:53:03 +0000 (13:53 +0000)
PR tree-optimization/32377
* tree-data-ref.c (compute_overlap_steps_for_affine_univar): Make it
work also for unknown number of iterations.
(analyze_subscript_affine_affine): Clean up.  Don't fail when the
number of iterations is not known.

* gfortran.dg/vect/pr32377.f90: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@129797 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gfortran.dg/vect/pr32377.f90 [new file with mode: 0644]
gcc/tree-data-ref.c

index 9e46938..e781bd4 100644 (file)
@@ -1,3 +1,11 @@
+2007-10-31  Sebastian Pop  <sebastian.pop@amd.com>
+
+       PR tree-optimization/32377
+       * tree-data-ref.c (compute_overlap_steps_for_affine_univar): Make it
+       work also for unknown number of iterations.
+       (analyze_subscript_affine_affine): Clean up.  Don't fail when the 
+       number of iterations is not known.
+
 2007-10-31  Richard Guenther  <rguenther@suse.de>
 
        PR middle-end/33779
 2007-10-31  Richard Guenther  <rguenther@suse.de>
 
        PR middle-end/33779
index 385b384..687dd19 100644 (file)
@@ -1,3 +1,8 @@
+2007-10-31  Sebastian Pop  <sebastian.pop@amd.com>
+
+       PR tree-optimization/32377
+       * gfortran.dg/vect/pr32377.f90: New.
+       
 2007-10-31  Richard Guenther  <rguenther@suse.de>
 
        PR middle-end/33779
 2007-10-31  Richard Guenther  <rguenther@suse.de>
 
        PR middle-end/33779
diff --git a/gcc/testsuite/gfortran.dg/vect/pr32377.f90 b/gcc/testsuite/gfortran.dg/vect/pr32377.f90
new file mode 100644 (file)
index 0000000..624a9ae
--- /dev/null
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_float }
+
+subroutine s243(ntimes,ld,n,ctime,dtime,a,b,c,d,e,aa,bb,cc)
+
+  integer ntimes,ld,n,i,nl
+  real a(n),b(n),c(n),d(n),e(n),aa(ld,n),bb(ld,n),cc(ld,n)
+  real t1,t2,chksum,ctime,dtime,cs1d
+  b(:n-1)= b(:n-1)+(c(:n-1)+e(:n-1))*d(:n-1)
+  a(:n-1)= b(:n-1)+a(2:n)*d(:n-1)
+  return
+end subroutine s243
+
+! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
+! { dg-final { cleanup-tree-dump "vect" } }
index 720c94d..17851ee 100644 (file)
@@ -1819,9 +1819,15 @@ compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
       step_overlaps_a = step_b / gcd_steps_a_b;
       step_overlaps_b = step_a / gcd_steps_a_b;
 
       step_overlaps_a = step_b / gcd_steps_a_b;
       step_overlaps_b = step_a / gcd_steps_a_b;
 
-      tau2 = FLOOR_DIV (niter, step_overlaps_a);
-      tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
-      last_conflict = tau2;
+      if (niter > 0)
+       {
+         tau2 = FLOOR_DIV (niter, step_overlaps_a);
+         tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
+         last_conflict = tau2;
+         *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+       }
+      else
+       *last_conflicts = chrec_dont_know;
 
       *overlaps_a = affine_fn_univar (integer_zero_node, dim, 
                                      build_int_cst (NULL_TREE,
 
       *overlaps_a = affine_fn_univar (integer_zero_node, dim, 
                                      build_int_cst (NULL_TREE,
@@ -1829,7 +1835,6 @@ compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
       *overlaps_b = affine_fn_univar (integer_zero_node, dim, 
                                      build_int_cst (NULL_TREE, 
                                                     step_overlaps_b));
       *overlaps_b = affine_fn_univar (integer_zero_node, dim, 
                                      build_int_cst (NULL_TREE, 
                                                     step_overlaps_b));
-      *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
     }
 
   else
     }
 
   else
@@ -1985,7 +1990,6 @@ analyze_subscript_affine_affine (tree chrec_a,
 {
   unsigned nb_vars_a, nb_vars_b, dim;
   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
 {
   unsigned nb_vars_a, nb_vars_b, dim;
   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
-  HOST_WIDE_INT tau1, tau2;
   lambda_matrix A, U, S;
 
   if (eq_evolutions_p (chrec_a, chrec_b))
   lambda_matrix A, U, S;
 
   if (eq_evolutions_p (chrec_a, chrec_b))
@@ -2043,18 +2047,7 @@ analyze_subscript_affine_affine (tree chrec_a,
                                                   false);
          niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
                                                   false);
                                                   false);
          niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
                                                   false);
-         if (niter_a < 0 || niter_b < 0)
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
-             *overlaps_a = conflict_fn_not_known ();
-             *overlaps_b = conflict_fn_not_known ();
-             *last_conflicts = chrec_dont_know;
-             goto end_analyze_subs_aa;
-           }
-
          niter = MIN (niter_a, niter_b);
          niter = MIN (niter_a, niter_b);
-
          step_a = int_cst_value (CHREC_RIGHT (chrec_a));
          step_b = int_cst_value (CHREC_RIGHT (chrec_b));
 
          step_a = int_cst_value (CHREC_RIGHT (chrec_a));
          step_b = int_cst_value (CHREC_RIGHT (chrec_b));
 
@@ -2138,31 +2131,7 @@ analyze_subscript_affine_affine (tree chrec_a,
         
             | x0 = i0 + i1 * t, 
             | y0 = j0 + j1 * t.  */
         
             | x0 = i0 + i1 * t, 
             | y0 = j0 + j1 * t.  */
-      
-         HOST_WIDE_INT i0, j0, i1, j1;
-
-         /* X0 and Y0 are the first iterations for which there is a
-            dependence.  X0, Y0 are two solutions of the Diophantine
-            equation: chrec_a (X0) = chrec_b (Y0).  */
-         HOST_WIDE_INT x0, y0;
-         HOST_WIDE_INT niter, niter_a, niter_b;
-
-         niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
-                                                  false);
-         niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
-                                                  false);
-
-         if (niter_a < 0 || niter_b < 0)
-           {
-             if (dump_file && (dump_flags & TDF_DETAILS))
-               fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
-             *overlaps_a = conflict_fn_not_known ();
-             *overlaps_b = conflict_fn_not_known ();
-             *last_conflicts = chrec_dont_know;
-             goto end_analyze_subs_aa;
-           }
-
-         niter = MIN (niter_a, niter_b);
+         HOST_WIDE_INT i0, j0, i1, j1;
 
          i0 = U[0][0] * gamma / gcd_alpha_beta;
          j0 = U[0][1] * gamma / gcd_alpha_beta;
 
          i0 = U[0][0] * gamma / gcd_alpha_beta;
          j0 = U[0][1] * gamma / gcd_alpha_beta;
@@ -2179,80 +2148,72 @@ analyze_subscript_affine_affine (tree chrec_a,
              *overlaps_a = conflict_fn_no_dependence ();
              *overlaps_b = conflict_fn_no_dependence ();
              *last_conflicts = integer_zero_node;
              *overlaps_a = conflict_fn_no_dependence ();
              *overlaps_b = conflict_fn_no_dependence ();
              *last_conflicts = integer_zero_node;
+             goto end_analyze_subs_aa;
            }
 
            }
 
-         else 
+         if (i1 > 0 && j1 > 0)
            {
            {
-             if (i1 > 0)
+             HOST_WIDE_INT niter_a = estimated_loop_iterations_int
+               (get_chrec_loop (chrec_a), false);
+             HOST_WIDE_INT niter_b = estimated_loop_iterations_int
+               (get_chrec_loop (chrec_b), false);
+             HOST_WIDE_INT niter = MIN (niter_a, niter_b);
+
+             /* (X0, Y0) is a solution of the Diophantine equation:
+                "chrec_a (X0) = chrec_b (Y0)".  */
+             HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
+                                       CEIL (-j0, j1));
+             HOST_WIDE_INT x0 = i1 * tau1 + i0;
+             HOST_WIDE_INT y0 = j1 * tau1 + j0;
+
+             /* (X1, Y1) is the smallest positive solution of the eq
+                "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
+                first conflict occurs.  */
+             HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
+             HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
+             HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
+
+             if (niter > 0)
                {
                {
-                 tau1 = CEIL (-i0, i1);
-                 tau2 = FLOOR_DIV (niter - i0, i1);
+                 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
+                                           FLOOR_DIV (niter - j0, j1));
+                 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
 
 
-                 if (j1 > 0)
+                 /* If the overlap occurs outside of the bounds of the
+                    loop, there is no dependence.  */
+                 if (x1 > niter || y1 > niter)
                    {
                    {
-                     int last_conflict, min_multiple;
-                     tau1 = MAX (tau1, CEIL (-j0, j1));
-                     tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
-
-                     x0 = i1 * tau1 + i0;
-                     y0 = j1 * tau1 + j0;
-
-                     /* At this point (x0, y0) is one of the
-                        solutions to the Diophantine equation.  The
-                        next step has to compute the smallest
-                        positive solution: the first conflicts.  */
-                     min_multiple = MIN (x0 / i1, y0 / j1);
-                     x0 -= i1 * min_multiple;
-                     y0 -= j1 * min_multiple;
-
-                     tau1 = (x0 - i0)/i1;
-                     last_conflict = tau2 - tau1;
-
-                     /* If the overlap occurs outside of the bounds of the
-                        loop, there is no dependence.  */
-                     if (x0 > niter || y0  > niter)
-                       {
-                         *overlaps_a = conflict_fn_no_dependence ();
-                         *overlaps_b = conflict_fn_no_dependence ();
-                         *last_conflicts = integer_zero_node;
-                       }
-                     else
-                       {
-                         *overlaps_a
-                           = conflict_fn (1,
-                               affine_fn_univar (build_int_cst (NULL_TREE, x0),
-                                                 1,
-                                                 build_int_cst (NULL_TREE, i1)));
-                         *overlaps_b
-                           = conflict_fn (1,
-                               affine_fn_univar (build_int_cst (NULL_TREE, y0),
-                                                 1,
-                                                 build_int_cst (NULL_TREE, j1)));
-                         *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
-                       }
+                     *overlaps_a = conflict_fn_no_dependence ();
+                     *overlaps_b = conflict_fn_no_dependence ();
+                     *last_conflicts = integer_zero_node;
+                     goto end_analyze_subs_aa;
                    }
                  else
                    }
                  else
-                   {
-                     /* FIXME: For the moment, the upper bound of the
-                        iteration domain for j is not checked.  */
-                     if (dump_file && (dump_flags & TDF_DETAILS))
-                       fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
-                     *overlaps_a = conflict_fn_not_known ();
-                     *overlaps_b = conflict_fn_not_known ();
-                     *last_conflicts = chrec_dont_know;
-                   }
+                   *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
                }
                }
-         
              else
              else
-               {
-                 /* FIXME: For the moment, the upper bound of the
-                    iteration domain for i is not checked.  */
-                 if (dump_file && (dump_flags & TDF_DETAILS))
-                   fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
-                 *overlaps_a = conflict_fn_not_known ();
-                 *overlaps_b = conflict_fn_not_known ();
-                 *last_conflicts = chrec_dont_know;
-               }
+               *last_conflicts = chrec_dont_know;
+
+             *overlaps_a
+               = conflict_fn (1,
+                              affine_fn_univar (build_int_cst (NULL_TREE, x1),
+                                                1,
+                                                build_int_cst (NULL_TREE, i1)));
+             *overlaps_b
+               = conflict_fn (1,
+                              affine_fn_univar (build_int_cst (NULL_TREE, y1),
+                                                1,
+                                                build_int_cst (NULL_TREE, j1)));
+           }
+         else
+           {
+             /* FIXME: For the moment, the upper bound of the
+                iteration domain for i and j is not checked.  */
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
+             *overlaps_a = conflict_fn_not_known ();
+             *overlaps_b = conflict_fn_not_known ();
+             *last_conflicts = chrec_dont_know;
            }
        }
       else
            }
        }
       else
@@ -2264,7 +2225,6 @@ analyze_subscript_affine_affine (tree chrec_a,
          *last_conflicts = chrec_dont_know;
        }
     }
          *last_conflicts = chrec_dont_know;
        }
     }
-
   else
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
   else
     {
       if (dump_file && (dump_flags & TDF_DETAILS))