diff mbox

[gomp4] Additional tests for kernels directive.

Message ID 55AEA42B.6040400@codesourcery.com
State New
Headers show

Commit Message

James Norris July 21, 2015, 7:57 p.m. UTC
Hi,

The attached file contains additional tests for the
if, async, and wait clauses associated with the
kernels directive.

Jim
diff mbox

Patch

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
index 22cef6d..d478ce2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
@@ -460,6 +460,438 @@  main (int argc, char **argv)
             abort ();
     }
 
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc kernels wait (1) async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 11.0)
+            abort ();
+    }
+
+
+    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+        abort ();
+    }
+
+    acc_set_cuda_stream (1, stream1);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort ();
+
+        if (b[i] != 49.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc kernels wait (1) async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 17.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 16.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async (1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 25.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
     acc_shutdown (acc_device_nvidia);
 
     return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
index 184b355..5478bb6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
@@ -609,5 +609,357 @@  main(int argc, char **argv)
 	abort ();
 #endif
 
+    for (i = 0; i < N; i++)
+        a[i] = 4.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 5.0;
+#else
+    exp = 4.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc kernels if(0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 17.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 8.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 9.0;
+#else
+    exp = 8.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+#pragma acc kernels if(zero)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 23.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 17.0;
+#else
+    exp = 16.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 76.0;
+
+#pragma acc kernels if(false)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 77.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+    n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 23.0;
+#else
+    exp = 22.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 18.0;
+
+    n = 0;
+
+#pragma acc kernels if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 19.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 49.0;
+
+    n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 50.0;
+#else
+    exp = 49.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 38.0;
+
+    n = 0;
+
+#pragma acc kernels if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 39.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 91.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 92.0;
+#else
+    exp = 91.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 43.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 44.0;
+#else
+    exp = 43.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 87.0;
+
+#pragma acc kernels if(one == 0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 88.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 9.0;
+    }
+
+#if ACC_MEM_SHARED
+    exp = 0.0;
+    exp2 = 0.0;
+#else
+    acc_map_data (a, d_a, N * sizeof (float));
+    acc_map_data (b, d_b, N * sizeof (float));
+    exp = 3.0;
+    exp2 = 9.0;
+#endif
+
     return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
index b6e637b..01728bd 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
@@ -132,4 +132,126 @@  program asyncwait
      if (d(i) .ne. 1.0) call abort
      if (e(i) .ne. 11.0) call abort
   end do
+
+  a(:) = 3.0
+  b(:) = 0.0
+
+  !$acc data copy (a(1:N)) copy (b(1:N))
+
+  !$acc kernels async
+  !$acc loop
+  do i = 1, N
+     b(i) = a(i)
+  end do
+  !$acc end kernels
+
+  !$acc wait
+  !$acc end data
+
+  do i = 1, N
+     if (a(i) .ne. 3.0) call abort
+     if (b(i) .ne. 3.0) call abort
+  end do
+
+  a(:) = 2.0
+  b(:) = 0.0
+
+  !$acc data copy (a(1:N)) copy (b(1:N))
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+     b(i) = a(i)
+  end do
+  !$acc end kernels
+
+  !$acc wait (1)
+  !$acc end data
+
+  do i = 1, N
+     if (a(i) .ne. 2.0) call abort
+     if (b(i) .ne. 2.0) call abort
+  end do
+
+  a(:) = 3.0
+  b(:) = 0.0
+  c(:) = 0.0
+  d(:) = 0.0
+
+  !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
+
+  !$acc kernels async (1)
+  do i = 1, N
+     b(i) = (a(i) * a(i) * a(i)) / a(i)
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  do i = 1, N
+     c(i) = (a(i) * 4) / a(i)
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+     d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+  end do
+  !$acc end kernels
+
+  !$acc wait (1)
+  !$acc end data
+
+  do i = 1, N
+     if (a(i) .ne. 3.0) call abort
+     if (b(i) .ne. 9.0) call abort
+     if (c(i) .ne. 4.0) call abort
+     if (d(i) .ne. 1.0) call abort
+  end do
+
+  a(:) = 2.0
+  b(:) = 0.0
+  c(:) = 0.0
+  d(:) = 0.0
+  e(:) = 0.0
+
+  !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+  !$acc kernels async (1)
+  do i = 1, N
+     b(i) = (a(i) * a(i) * a(i)) / a(i)
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+     c(i) = (a(i) * 4) / a(i)
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+     d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+  end do
+  !$acc end kernels
+
+  !$acc kernels wait (1) async (1)
+  !$acc loop
+  do i = 1, N
+     e(i) = a(i) + b(i) + c(i) + d(i)
+  end do
+  !$acc end kernels
+
+  !$acc wait (1)
+  !$acc end data
+
+  do i = 1, N
+     if (a(i) .ne. 2.0) call abort
+     if (b(i) .ne. 4.0) call abort
+     if (c(i) .ne. 4.0) call abort
+     if (d(i) .ne. 1.0) call abort
+     if (e(i) .ne. 11.0) call abort
+  end do
 end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
index bade52b..fe131b6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
@@ -1,6 +1,6 @@ 
 ! { dg-do run }
 
-program parallel_wait
+program asyncwait
   integer, parameter :: N = 64
   real, allocatable :: a(:), b(:), c(:)
   integer i
@@ -33,8 +33,33 @@  program parallel_wait
   do i = 1, N
     if (c(i) .ne. 2.0) call abort
   end do
+
+  !$acc kernels async (0)
+  !$acc loop
+  do i = 1, N
+    a(i) = 1
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+    b(i) = 1
+  end do
+  !$acc end kernels
+
+  !$acc kernels wait (0, 1)
+  !$acc loop
+  do i = 1, N
+    c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (c(i) .ne. 2.0) call abort
+  end do
   
   deallocate (a)
   deallocate (b)
   deallocate (c)
-end program parallel_wait
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
index d48dc11..fa96a01 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
@@ -1,6 +1,6 @@ 
 ! { dg-do run }
 
-program parallel_wait
+program asyncwait
   integer, parameter :: N = 64
   real, allocatable :: a(:), b(:), c(:)
   integer i
@@ -35,8 +35,35 @@  program parallel_wait
   do i = 1, N
     if (c(i) .ne. 2.0) call abort
   end do
+
+  !$acc kernels async (0)
+  !$acc loop
+  do i = 1, N
+    a(i) = 1
+  end do
+  !$acc end kernels
+
+  !$acc kernels async (1)
+  !$acc loop
+  do i = 1, N
+    b(i) = 1
+  end do
+  !$acc end kernels
+
+  !$acc wait (0, 1)
+
+  !$acc kernels
+  !$acc loop
+  do i = 1, N
+    c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (c(i) .ne. 2.0) call abort
+  end do
   
   deallocate (a)
   deallocate (b)
   deallocate (c)
-end program parallel_wait
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
index 1729a3b..e54c1b2 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
@@ -450,4 +450,437 @@  program main
   if (acc_is_present (b) .eqv. .TRUE.) call abort
 #endif
 
+  a(:) = 4.0
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+     do i = 1, N
+        if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+          b(i) = a(i) + 1
+        else
+          b(i) = a(i)
+        end if
+     end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 5.0
+#else
+  exp = 4.0
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 16.0
+
+  !$acc kernels if (0 == 1)
+     do i = 1, N
+       if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+         b(i) = a(i) + 1
+       else
+         b(i) = a(i)
+       end if
+     end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 17.0) call abort
+  end do
+
+  a(:) = 8.0
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 9.0
+#else
+  exp = 8.0
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 22.0
+
+  !$acc kernels if (zero == 1)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 23.0) call abort
+  end do
+
+  a(:) = 16.0
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 17.0;
+#else
+  exp = 16.0;
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 76.0
+
+  !$acc kernels if (.FALSE.)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 77.0) call abort
+  end do
+
+  a(:) = 22.0
+
+  nn = 1
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 23.0;
+#else
+  exp = 22.0;
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 18.0
+
+  nn = 0
+
+  !$acc kernels if (nn == 1)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 19.0) call abort
+  end do
+
+  a(:) = 49.0
+
+  nn = 1
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 50.0
+#else
+  exp = 49.0
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 38.0
+
+  nn = 0;
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 39.0) call abort
+  end do
+
+  a(:) = 91.0
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 92.0) call abort
+  end do
+
+  a(:) = 43.0
+
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+#if ACC_MEM_SHARED
+  exp = 44.0
+#else
+  exp = 43.0
+#endif
+
+  do i = 1, N
+    if (b(i) .ne. exp) call abort
+  end do
+
+  a(:) = 87.0
+
+  !$acc kernels if (one == 0)
+    do i = 1, N
+      if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+        b(i) = a(i) + 1
+      else
+        b(i) = a(i)
+      end if
+    end do
+  !$acc end kernels
+
+  do i = 1, N
+    if (b(i) .ne. 88.0) call abort
+  end do
+
+  a(:) = 3.0
+  b(:) = 9.0
+
+#if ACC_MEM_SHARED
+  exp = 0.0
+  exp2 = 0.0
+#else
+  call acc_copyin (a, sizeof (a))
+  call acc_copyin (b, sizeof (b))
+  exp = 3.0;
+  exp2 = 9.0;
+#endif
+
+  !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+  a(:) = 0.0
+  b(:) = 0.0
+
+  !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+  do i = 1, N
+    if (a(i) .ne. exp) call abort
+    if (b(i) .ne. exp2) call abort
+  end do
+
+  a(:) = 6.0
+  b(:) = 12.0
+
+  !$acc update device (a(1:N), b(1:N)) if (0 == 1)
+
+  a(:) = 0.0
+  b(:) = 0.0
+
+  !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+  do i = 1, N
+    if (a(i) .ne. exp) call abort
+    if (b(i) .ne. exp2) call abort
+  end do
+
+  a(:) = 26.0
+  b(:) = 21.0
+
+  !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+  a(:) = 0.0
+  b(:) = 0.0
+
+  !$acc update host (a(1:N), b(1:N)) if (0 == 1)
+
+  do i = 1, N
+    if (a(i) .ne. 0.0) call abort
+    if (b(i) .ne. 0.0) call abort
+  end do
+
+#if !ACC_MEM_SHARED
+  call acc_copyout (a, sizeof (a))
+  call acc_copyout (b, sizeof (b))
+#endif
+
+  a(:) = 4.0
+  b(:) = 0.0
+
+  !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+
+    !$acc kernels present (a(1:N))
+       do i = 1, N
+           b(i) = a(i)
+       end do
+    !$acc end kernels
+  !$acc end data
+
+  do i = 1, N
+    if (b(i) .ne. 4.0) call abort
+  end do
+
+  a(:) = 8.0
+  b(:) = 1.0
+
+  !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+  if (acc_is_present (a) .eqv. .TRUE.) call abort
+  if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc end data
+
+  a(:) = 18.0
+  b(:) = 21.0
+
+  !$acc data copyin (a(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (a) .eqv. .FALSE.) call abort
+#endif
+
+    !$acc data copyout (b(1:N)) if (0 == 1)
+#if !ACC_MEM_SHARED
+      if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+        !$acc data copyout (b(1:N)) if (1 == 1)
+
+        !$acc kernels present (a(1:N)) present (b(1:N))
+          do i = 1, N
+            b(i) = a(i)
+          end do
+      !$acc end kernels
+
+    !$acc end data
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+    !$acc end data
+  !$acc end data
+
+  do i = 1, N
+   if (b(1) .ne. 18.0) call abort
+  end do
+
+  !$acc enter data copyin (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+  if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (0 == 1)
+
+  !$acc enter data copyin (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+  if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc enter data copyin (b(1:N)) if (zero == 1)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (zero == 1)
+
+  !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+  if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc enter data copyin (b(1:N)) if (one == 0)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (one == 0)
+
+  !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+  !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+  if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
 end program main