@@ -460,6 +460,438 @@ main (int argc, char **argv)
abort ();
}
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc kernels wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+
+ r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream1);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 49.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc kernels wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 17.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 16.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 25.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
acc_shutdown (acc_device_nvidia);
return 0;
@@ -609,5 +609,357 @@ main(int argc, char **argv)
abort ();
#endif
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels if(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc kernels if(zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc kernels if(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc kernels if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc kernels if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc kernels if(one == 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+#if ACC_MEM_SHARED
+ exp = 0.0;
+ exp2 = 0.0;
+#else
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
return 0;
}
@@ -132,4 +132,126 @@ program asyncwait
if (d(i) .ne. 1.0) call abort
if (e(i) .ne. 11.0) call abort
end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc kernels async
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
+
+ !$acc kernels async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+ !$acc kernels async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels wait (1) async (1)
+ !$acc loop
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
end program asyncwait
@@ -1,6 +1,6 @@
! { dg-do run }
-program parallel_wait
+program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@@ -33,8 +33,33 @@ program parallel_wait
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
+
+ !$acc kernels async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels wait (0, 1)
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
deallocate (a)
deallocate (b)
deallocate (c)
-end program parallel_wait
+end program asyncwait
@@ -1,6 +1,6 @@
! { dg-do run }
-program parallel_wait
+program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@@ -35,8 +35,35 @@ program parallel_wait
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
+
+ !$acc kernels async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc wait (0, 1)
+
+ !$acc kernels
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
deallocate (a)
deallocate (b)
deallocate (c)
-end program parallel_wait
+end program asyncwait
@@ -450,4 +450,437 @@ program main
if (acc_is_present (b) .eqv. .TRUE.) call abort
#endif
+ a(:) = 4.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 5.0
+#else
+ exp = 4.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc kernels if (0 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 17.0) call abort
+ end do
+
+ a(:) = 8.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 9.0
+#else
+ exp = 8.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 22.0
+
+ !$acc kernels if (zero == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 23.0) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 76.0
+
+ !$acc kernels if (.FALSE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 77.0) call abort
+ end do
+
+ a(:) = 22.0
+
+ nn = 1
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 18.0
+
+ nn = 0
+
+ !$acc kernels if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 19.0) call abort
+ end do
+
+ a(:) = 49.0
+
+ nn = 1
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 50.0
+#else
+ exp = 49.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 38.0
+
+ nn = 0;
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 39.0) call abort
+ end do
+
+ a(:) = 91.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 92.0) call abort
+ end do
+
+ a(:) = 43.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 44.0
+#else
+ exp = 43.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 87.0
+
+ !$acc kernels if (one == 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 88.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 9.0
+
+#if ACC_MEM_SHARED
+ exp = 0.0
+ exp2 = 0.0
+#else
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 6.0
+ b(:) = 12.0
+
+ !$acc update device (a(1:N), b(1:N)) if (0 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 26.0
+ b(:) = 21.0
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (0 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. 0.0) call abort
+ if (b(i) .ne. 0.0) call abort
+ end do
+
+#if !ACC_MEM_SHARED
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+#endif
+
+ a(:) = 4.0
+ b(:) = 0.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+
+ !$acc kernels present (a(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ do i = 1, N
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ a(:) = 8.0
+ b(:) = 1.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc end data
+
+ a(:) = 18.0
+ b(:) = 21.0
+
+ !$acc data copyin (a(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc data copyout (b(1:N)) if (0 == 1)
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc data copyout (b(1:N)) if (1 == 1)
+
+ !$acc kernels present (a(1:N)) present (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc end data
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc end data
+ !$acc end data
+
+ do i = 1, N
+ if (b(1) .ne. 18.0) call abort
+ end do
+
+ !$acc enter data copyin (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (0 == 1)
+
+ !$acc enter data copyin (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (zero == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (zero == 1)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (one == 0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 0)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
end program main