[gomp4] Some additional OpenACC reduction tests
diff mbox

Message ID 20150729182312.588bc7c1@octopus
State New
Headers show

Commit Message

Julian Brown July 29, 2015, 5:23 p.m. UTC
Hi,

This is a set of 19 new tests for OpenACC reductions, covering several
ways of performing reductions over the parallel and loop directives
using gang or worker/vector level parallelism. (The semantics are quite
subtle in some places, but I believe the tests follow the specification
to the letter at least, E&OE.)

Several of these do not pass yet, so have been marked with XFAILs.

I will apply to gomp4 branch shortly.

Cheers,

Julian

ChangeLog

    libgomp/
    * testsuite/libgomp.oacc-c-c++-common/loop-reduction-*.c: New tests.
    * testsuite/par-reduction-*.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-*.c:
    New tests.

Patch
diff mbox

commit d6cb22b11bbe6f536bd11110f6d5ce8349266040
Author: Julian Brown <julian@codesourcery.com>
Date:   Wed Jul 29 10:04:36 2015 -0700

    Some new OpenACC reduction tests.

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
new file mode 100644
index 0000000..52f9a8f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
@@ -0,0 +1,43 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, non-private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  res = hres = 1;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(*:res)
+    for (i = 0; i < 12; i++)
+      res *= arr[i];
+  }
+
+  for (i = 0; i < 12; i++)
+    hres *= arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
new file mode 100644
index 0000000..b5e3b2f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
@@ -0,0 +1,28 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and vectors, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
new file mode 100644
index 0000000..d724680
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
@@ -0,0 +1,28 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and workers, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang worker reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
new file mode 100644
index 0000000..d610373
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
@@ -0,0 +1,28 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang worker vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
new file mode 100644
index 0000000..3e5c707
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
@@ -0,0 +1,36 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable: separate gang and worker/vector loops).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (j = 0; j < 32; j++)
+      {
+        #pragma acc loop worker vector reduction(+:res)
+        for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+      }
+    /* "res" is non-private, and is not available until after the parallel
+       region.  */
+  }
+
+  for (i = 0; i < 32768; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
new file mode 100644
index 0000000..44d7f0f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
@@ -0,0 +1,35 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable: separate gang and worker/vector loops).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  double arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copyin(arr) copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (j = 0; j < 32; j++)
+      {
+        #pragma acc loop worker vector reduction(+:res)
+        for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+      }
+  }
+
+  for (i = 0; i < 32768; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
new file mode 100644
index 0000000..8bc18f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
@@ -0,0 +1,57 @@ 
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, multiple
+   non-private reduction variables, float type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, mres = 0, hres = 0, hmres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res, mres)
+  {
+    #pragma acc loop gang reduction(+:res) reduction(max:mres)
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + i];
+	    if (arr[j * 1024 + i] > mres)
+	      mres = arr[j * 1024 + i];
+	  }
+
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + (1023 - i)];
+	    if (arr[j * 1024 + (1023 - i)] > mres)
+	      mres = arr[j * 1024 + (1023 - i)];
+	  }
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+	if (arr[j * 1024 + i] > hmres)
+	  hmres = arr[j * 1024 + i];
+	if (arr[j * 1024 + (1023 - i)] > hmres)
+	  hmres = arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+  assert (mres == hmres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
new file mode 100644
index 0000000..ce8cb38
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
@@ -0,0 +1,41 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
new file mode 100644
index 0000000..63f3fef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
@@ -0,0 +1,43 @@ 
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (vector reduction in
+   gang-partitioned/worker-partitioned mode, private reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, k;
+  double ina[1024], inb[1024], out[1024], acc;
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 32; i++)
+      {
+        ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
+	inb[j * 32 + i] = (double) (i + j);
+      }
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(acc) copyin(ina, inb) copyout(out)
+  {
+    #pragma acc loop gang worker
+    for (k = 0; k < 32; k++)
+      for (j = 0; j < 32; j++)
+        {
+	  acc = 0;
+
+	  #pragma acc loop vector reduction(+:acc)
+	  for (i = 0; i < 32; i++)
+	    acc += ina[k * 32 + i] * inb[i * 32 + j];
+
+	  out[k * 32 + j] = acc;
+	}
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 32; i++)
+      assert (out[j * 32 + i] == (i + j) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
new file mode 100644
index 0000000..78f6be0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
@@ -0,0 +1,41 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
new file mode 100644
index 0000000..2765908
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
@@ -0,0 +1,41 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
new file mode 100644
index 0000000..c30b0e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
@@ -0,0 +1,45 @@ 
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = j;
+
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 1023; i >= 0; i--)
+	  res += arr[j * 1024 + i];
+
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = j;
+      
+      for (i = 0; i < 1024; i++)
+	hres += arr[j * 1024 + i] * 2;
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
new file mode 100644
index 0000000..ac96525
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
@@ -0,0 +1,37 @@ 
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable: gang-redundant mode).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i ^ 33;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyin(arr) copyout(out)
+  {
+    /* "res" should be available at the end of the following loop (and should
+       have the same value redundantly in each gang).  */
+    #pragma acc loop worker vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+    
+    #pragma acc loop gang (static: 1)
+    for (i = 0; i < 32; i++)
+      out[i] = res;
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  for (i = 0; i < 32; i++)
+    assert (out[i] == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
new file mode 100644
index 0000000..0e0ce96
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
@@ -0,0 +1,38 @@ 
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (worker and
+   vector-partitioned loops individually in gang-partitioned mode, int
+   type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 1023; i >= 0; i--)
+	  res += arr[j * 1024 + i];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      hres += arr[j * 1024 + i] * 2;
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
new file mode 100644
index 0000000..a7a75a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
@@ -0,0 +1,40 @@ 
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   in gang-partitioned mode, int type with XOR).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(^:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(^:res)
+	for (i = 0; i < 1024; i++)
+	  res ^= arr[j * 1024 + i];
+
+	#pragma acc loop worker vector reduction(^:res)
+	for (i = 0; i < 1024; i++)
+	  res ^= arr[j * 1024 + (1023 - i)];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres ^= arr[j * 1024 + i];
+	hres ^= arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
new file mode 100644
index 0000000..860e56d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
@@ -0,0 +1,44 @@ 
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   together in gang-partitioned mode, float type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + (1023 - i)];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
new file mode 100644
index 0000000..41e0f71
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
@@ -0,0 +1,57 @@ 
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   together in gang-partitioned mode, float type, multiple reductions).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, mres = 0, hres = 0, hmres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res) reduction(max:mres)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + i];
+	    if (arr[j * 1024 + i] > mres)
+	      mres = arr[j * 1024 + i];
+	  }
+
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + (1023 - i)];
+	    if (arr[j * 1024 + (1023 - i)] > mres)
+	      mres = arr[j * 1024 + (1023 - i)];
+	  }
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+	if (arr[j * 1024 + i] > hmres)
+	  hmres = arr[j * 1024 + i];
+	if (arr[j * 1024 + (1023 - i)] > hmres)
+	  hmres = arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+  assert (mres == hmres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
new file mode 100644
index 0000000..1172ca7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
@@ -0,0 +1,37 @@ 
+#include <assert.h>
+
+/* Test of reduction on parallel directive.  */
+
+#define ACTUAL_GANGS 256
+
+int
+main (int argc, char *argv[])
+{
+  int res = 0, res2 = 0;
+
+  #pragma acc parallel num_gangs(ACTUAL_GANGS) num_workers(32) \
+		       vector_length(32) reduction(+:res) copy(res2)
+  {
+    res += 5;
+
+    #pragma acc atomic
+    res2 += 5;
+  }
+
+  assert (res == res2);
+
+  res = res2 = 1;
+  
+  #pragma acc parallel num_gangs(8) num_workers(32)  vector_length(32) \
+		       reduction(*:res) copy(res2)
+  {
+    res *= 5;
+
+    #pragma acc atomic
+    res2 *= 5;
+  }
+  
+  assert (res == res2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
new file mode 100644
index 0000000..92451ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
@@ -0,0 +1,42 @@ 
+#include <assert.h>
+#include <openacc.h>
+
+/* Test of reduction on parallel directive (with async).  */
+
+#define ACTUAL_GANGS 256
+
+int
+main (int argc, char *argv[])
+{
+  int res = 0, res2 = 0;
+
+  #pragma acc parallel num_gangs(ACTUAL_GANGS) num_workers(32) \
+		       vector_length(32) reduction(+:res) copy(res2) async(1)
+  {
+    res += 5;
+
+    #pragma acc atomic
+    res2 += 5;
+  }
+
+  acc_wait (1);
+
+  assert (res == res2);
+
+  res = res2 = 1;
+
+  #pragma acc parallel num_gangs(8) num_workers(32) vector_length(32) \
+		       reduction(*:res) copy(res2) async(1)
+  {
+    res *= 5;
+
+    #pragma acc atomic
+    res2 *= 5;
+  }
+
+  acc_wait (1);
+
+  assert (res == res2);
+
+  return 0;
+}