1 ! { dg-do compile { target i?86-*-* x86_64-*-* } }
2 ! { dg-require-effective-target vect_double }
3 ! { dg-require-effective-target sse2 }
4 ! { dg-options "-O3 -ffast-math -msse2 -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" }
7 ******* RESID COMPUTES THE RESIDUAL: R = V - AU
9 * THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE
10 * A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND
11 * MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL
12 * BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL
13 * CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO.
15 SUBROUTINE RESID(U,V,R,N,A)
17 REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
23 600 R(I1,I2,I3)=V(I1,I2,I3)
24 > -A(0)*( U(I1, I2, I3 ) )
25 > -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 )
26 > + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 )
27 > + U(I1, I2, I3-1) + U(I1, I2, I3+1) )
28 > -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 )
29 > + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 )
30 > + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1)
31 > + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1)
32 > + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1)
33 > + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) )
34 > -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
35 > + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
36 > + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
37 > + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
41 ! we want to check that predictive commoning did something on the
42 ! vectorized loop, which means we have to have exactly 13 vector
44 ! { dg-final { scan-tree-dump-times "vect_\[^\\n\]*\\+ " 13 "optimized" } }
45 ! { dg-final { cleanup-tree-dump "vect" } }
46 ! { dg-final { cleanup-tree-dump "optimized" } }