[gomp4] Tests for private variables/state propagation
diff mbox

Message ID 20150617151515.087aa93e@octopus
State New
Headers show

Commit Message

Julian Brown June 17, 2015, 2:15 p.m. UTC
Hi,

This is a set of tests for OpenACC private variable/state propagation
support in GCC. The associated functionality is a work-in-progress: as
such, many of these tests do not pass yet (causing incorrect results,
ICEs or even bogus assembly output). I believe the tests to be valid
OpenACC, though it's possible I misinterpreted the spec at some points!

I will apply to the gomp4 branch shortly. (We will of course be working
on addressing the failures.)

Cheers,

Julian

ChangeLog

    libgomp/
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-par-gang-{1,2,3}.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-local-gang-1.c: New test.
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-loop-gang-{1,2,3,4,5,6}.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-loop-worker-{1,2,3,4,5,6,7}.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-local-worker-{1,2,3,4,5}.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/
    private-vars-loop-vector-{1,2}.c: New tests.

Patch
diff mbox

commit 40193f49480f0a0b750d15049d29fd427282c5f0
Author: Julian Brown <julian@codesourcery.com>
Date:   Tue Jun 16 03:50:55 2015 -0700

    New set of private variable/state propagation tests.

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-gang-1.c
new file mode 100644
index 0000000..ada46d0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-gang-1.c
@@ -0,0 +1,38 @@ 
+#include <assert.h>
+
+/* Test of gang-private variables declared in local scope with parallel
+   directive.  */
+
+#if defined(ACC_DEVICE_TYPE_host) || defined(ACC_DEVICE_TYPE_host_nonshm)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 32
+#endif
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[ACTUAL_GANGS];
+
+  for (i = 0; i < ACTUAL_GANGS; i++)
+    arr[i] = 3;
+
+  #pragma acc parallel copy(arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
+		       vector_length(32)
+  {
+    int x;
+
+    #pragma acc loop gang(static:1)
+    for (i = 0; i < ACTUAL_GANGS; i++)
+      x = i * 2;
+
+    #pragma acc loop gang(static:1)
+    for (i = 0; i < ACTUAL_GANGS; i++)
+      arr[i] += x;
+  }
+
+  for (i = 0; i < ACTUAL_GANGS; i++)
+    assert (arr[i] == 3 + i * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-1.c
new file mode 100644
index 0000000..f8658e5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-1.c
@@ -0,0 +1,56 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Back-to-back worker loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+
+	#pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-2.c
new file mode 100644
index 0000000..925f9a0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-2.c
@@ -0,0 +1,51 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Successive vector loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-3.c
new file mode 100644
index 0000000..6129523
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-3.c
@@ -0,0 +1,57 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Aggregate worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    vec2 pt;
+	    
+	    pt.x = i ^ j * 3;
+	    pt.y = i | j * 5;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.x * k;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-4.c
new file mode 100644
index 0000000..4cec00e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-4.c
@@ -0,0 +1,60 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Addressable worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    vec2 pt, *ptp;
+	    
+	    ptp = &pt;
+	    
+	    pt.x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += ptp->x * k;
+
+	    ptp->y = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-5.c
new file mode 100644
index 0000000..efc2206
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-local-worker-5.c
@@ -0,0 +1,53 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Array worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int pt[2];
+	    
+	    pt[0] = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+	    pt[1] = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-1.c
new file mode 100644
index 0000000..504d11e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-1.c
@@ -0,0 +1,27 @@ 
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32];
+
+  for (i = 0; i < 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+	arr[i] += x;
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == i * 3);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-2.c
new file mode 100644
index 0000000..9debf83
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-2.c
@@ -0,0 +1,33 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+   to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+
+	#pragma acc loop worker
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-3.c
new file mode 100644
index 0000000..0a77869
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-3.c
@@ -0,0 +1,31 @@ 
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+   to partitioned vectors.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+
+	#pragma acc loop vector
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-4.c
new file mode 100644
index 0000000..24b0059
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-4.c
@@ -0,0 +1,37 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private addressable variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(x)
+    for (i = 0; i < 32; i++)
+      {
+        int *p = &x;
+
+	x = i * 2;
+
+	#pragma acc loop worker
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+
+	(*p)--;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-5.c
new file mode 100644
index 0000000..b955303
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-5.c
@@ -0,0 +1,34 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private array variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x[8], i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(x)
+    for (i = 0; i < 32; i++)
+      {
+        for (int j = 0; j < 8; j++)
+	  x[j] = j * 2;
+
+	#pragma acc loop worker
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x[j % 8];
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i % 8) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-6.c
new file mode 100644
index 0000000..0c17eaa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-gang-6.c
@@ -0,0 +1,42 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private aggregate variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+typedef struct {
+  int x, y, z;
+  int attr[13];
+} vec3;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32];
+  vec3 pt;
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang private(pt)
+    for (i = 0; i < 32; i++)
+      {
+        pt.x = i;
+	pt.y = i * 2;
+	pt.z = i * 4;
+	pt.attr[5] = i * 6;
+
+	#pragma acc loop worker
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 13);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-1.c
new file mode 100644
index 0000000..b3c6ad3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-1.c
@@ -0,0 +1,51 @@ 
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+
+	    #pragma acc loop vector private(x)
+	    for (k = 0; k < 32; k++)
+	      {
+		x = i ^ j * 3;
+		arr[i * 1024 + j * 32 + k] += x * k;
+	      }
+
+	    #pragma acc loop vector private(x)
+	    for (k = 0; k < 32; k++)
+	      {
+		x = i | j * 5;
+		arr[i * 1024 + j * 32 + k] += x * k;
+	      }
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-2.c
new file mode 100644
index 0000000..d4609e9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-vector-2.c
@@ -0,0 +1,46 @@ 
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. Array type.  */
+
+int
+main (int argc, char* argv[])
+{
+  int pt[2], i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+
+	    #pragma acc loop vector private(pt)
+	    for (k = 0; k < 32; k++)
+	      {
+	        pt[0] = i ^ j * 3;
+		pt[1] = i | j * 5;
+		arr[i * 1024 + j * 32 + k] += pt[0] * k;
+		arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	      }
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-1.c
new file mode 100644
index 0000000..6377f89
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-1.c
@@ -0,0 +1,36 @@ 
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    x = i ^ j * 3;
+	    /* Try to ensure 'x' accesses doesn't get optimized into a
+	       temporary.  */
+	    __asm__ __volatile__ ("");
+	    arr[i * 32 + j] += x;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-2.c
new file mode 100644
index 0000000..834b0f1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-2.c
@@ -0,0 +1,45 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-3.c
new file mode 100644
index 0000000..18d6229
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-3.c
@@ -0,0 +1,56 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Back-to-back worker loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+
+	#pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-4.c
new file mode 100644
index 0000000..881bc7d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-4.c
@@ -0,0 +1,51 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Successive vector loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-5.c
new file mode 100644
index 0000000..fc1535b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-5.c
@@ -0,0 +1,53 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Addressable worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int *p = &x;
+	    
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    *p = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-6.c
new file mode 100644
index 0000000..feba09e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-6.c
@@ -0,0 +1,57 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Aggregate worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+  vec2 pt;
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker private(pt)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    
+	    pt.x = i ^ j * 3;
+	    pt.y = i | j * 5;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.x * k;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-7.c
new file mode 100644
index 0000000..5469c5d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-loop-worker-7.c
@@ -0,0 +1,56 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on loop directive, broadcasting
+   to vector-partitioned mode.  Array worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+  int pt[2];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  /* "pt" is treated as "present_or_copy" on the parallel directive because it
+     is an array variable.  */
+  #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+  {
+    int j;
+
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        /* But here, it is made private per-worker.  */
+        #pragma acc loop worker private(pt)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    
+	    pt[0] = i ^ j * 3;
+
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+	    pt[1] = i | j * 5;
+	    
+	    #pragma acc loop vector
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-1.c
new file mode 100644
index 0000000..831ba1e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-1.c
@@ -0,0 +1,25 @@ 
+#include <assert.h>
+
+/* Basic test of firstprivate variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32];
+
+  for (i = 0; i < 32; i++)
+    arr[i] = 3;
+
+  #pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) \
+		       vector_length(32)
+  {
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      arr[i] += x;
+  }
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == 8);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-2.c
new file mode 100644
index 0000000..8199186
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-2.c
@@ -0,0 +1,35 @@ 
+#include <assert.h>
+
+/* Test of gang-private variables declared on the parallel directive.  */
+
+#if defined(ACC_DEVICE_TYPE_host) || defined(ACC_DEVICE_TYPE_host_nonshm)
+#define ACTUAL_GANGS 1
+#else
+#define ACTUAL_GANGS 32
+#endif
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[ACTUAL_GANGS];
+
+  for (i = 0; i < ACTUAL_GANGS; i++)
+    arr[i] = 3;
+
+  #pragma acc parallel private(x) copy(arr) num_gangs(ACTUAL_GANGS) \
+		       num_workers(8) vector_length(32)
+  {
+    #pragma acc loop gang(static:1)
+    for (i = 0; i < ACTUAL_GANGS; i++)
+      x = i * 2;
+
+    #pragma acc loop gang(static:1)
+    for (i = 0; i < ACTUAL_GANGS; i++)
+      arr[i] += x;
+  }
+
+  for (i = 0; i < ACTUAL_GANGS; i++)
+    assert (arr[i] == 3 + i * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-3.c
new file mode 100644
index 0000000..e5387c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-vars-par-gang-3.c
@@ -0,0 +1,35 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of gang-private array variable declared on the parallel directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x[32], i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) \
+		       vector_length(32)
+  {
+    #pragma acc loop gang
+    for (i = 0; i < 32; i++)
+      {
+        int j;
+	for (j = 0; j < 32; j++)
+	  x[j] = j * 2;
+	
+	#pragma acc loop worker
+	for (j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x[31 - j];
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (31 - (i % 32)) * 2);
+
+  return 0;
+}