diff mbox

[committed] Add oacc kernels tests in goacc

Message ID 569CE9AA.4000201@mentor.com
State New
Headers show

Commit Message

Tom de Vries Jan. 18, 2016, 1:33 p.m. UTC
[ was: Re: [PATCH, 13/16] Add c-c++-common/goacc/kernels-*.c ]

On 09/11/15 21:07, Tom de Vries wrote:
> On 09/11/15 16:35, Tom de Vries wrote:
>> Hi,
>>
>> this patch series for stage1 trunk adds support to:
>> - parallelize oacc kernels regions using parloops, and
>> - map the loops onto the oacc gang dimension.
>>
>> The patch series contains these patches:
>>
>>       1    Insert new exit block only when needed in
>>          transform_to_exit_first_loop_alt
>>       2    Make create_parallel_loop return void
>>       3    Ignore reduction clause on kernels directive
>>       4    Implement -foffload-alias
>>       5    Add in_oacc_kernels_region in struct loop
>>       6    Add pass_oacc_kernels
>>       7    Add pass_dominator_oacc_kernels
>>       8    Add pass_ch_oacc_kernels
>>       9    Add pass_parallelize_loops_oacc_kernels
>>      10    Add pass_oacc_kernels pass group in passes.def
>>      11    Update testcases after adding kernels pass group
>>      12    Handle acc loop directive
>>      13    Add c-c++-common/goacc/kernels-*.c
>>      14    Add gfortran.dg/goacc/kernels-*.f95
>>      15    Add libgomp.oacc-c-c++-common/kernels-*.c
>>      16    Add libgomp.oacc-fortran/kernels-*.f95
>>
>> The first 9 patches are more or less independent, but patches 10-16 are
>> intended to be committed at the same time.
>>
>> Bootstrapped and reg-tested on x86_64.
>>
>> Build and reg-tested with nvidia accelerator, in combination with a
>> patch that enables accelerator testing (which is submitted at
>> https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).
>>
>> I'll post the individual patches in reply to this message.
>
> This patch adds C/C++ oacc kernels compilation tests.
>

This reduced patch contains the test-cases that currently pass.

Bootstrapped and reg-tested on x86_64.

Build with nvidia accelerator and tested goacc.exp and libgomp.

Committed to trunk.

Thanks,
- Tom
diff mbox

Patch

Add oacc kernels tests in goacc

2015-11-09  Tom de Vries  <tom@codesourcery.com>

	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
	* c-c++-common/goacc/kernels-double-reduction.c: New test.
	* c-c++-common/goacc/kernels-empty.c: New test.
	* c-c++-common/goacc/kernels-eternal.c: New test.
	* c-c++-common/goacc/kernels-loop-2.c: New test.
	* c-c++-common/goacc/kernels-loop-3.c: New test.
	* c-c++-common/goacc/kernels-loop-data-2.c: New test.
	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test.
	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test.
	* c-c++-common/goacc/kernels-loop-data-update.c: New test.
	* c-c++-common/goacc/kernels-loop-data.c: New test.
	* c-c++-common/goacc/kernels-loop-g.c: New test.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: New test.
	* c-c++-common/goacc/kernels-loop-n.c: New test.
	* c-c++-common/goacc/kernels-loop-nest.c: New test.
	* c-c++-common/goacc/kernels-loop.c: New test.
	* c-c++-common/goacc/kernels-noreturn.c: New test.
	* c-c++-common/goacc/kernels-one-counter-var.c: New test.
	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test.
	* c-c++-common/goacc/kernels-reduction.c: New test.

---
 .../goacc/kernels-counter-vars-function-scope.c    | 54 +++++++++++++++++
 .../goacc/kernels-double-reduction-n.c             | 37 ++++++++++++
 .../c-c++-common/goacc/kernels-double-reduction.c  | 37 ++++++++++++
 gcc/testsuite/c-c++-common/goacc/kernels-empty.c   |  6 ++
 gcc/testsuite/c-c++-common/goacc/kernels-eternal.c | 11 ++++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  | 70 ++++++++++++++++++++++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  | 49 +++++++++++++++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  | 17 ++++++
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c | 52 ++++++++++++++++
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  | 56 +++++++++++++++++
 .../c-c++-common/goacc/kernels-loop-nest.c         | 39 ++++++++++++
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    | 56 +++++++++++++++++
 .../c-c++-common/goacc/kernels-noreturn.c          | 12 ++++
 .../c-c++-common/goacc/kernels-one-counter-var.c   | 54 +++++++++++++++++
 .../c-c++-common/goacc/kernels-reduction.c         | 36 +++++++++++
 15 files changed, 586 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
new file mode 100644
index 0000000..e8b5357
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -0,0 +1,54 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+  COUNTERTYPE i;
+  COUNTERTYPE ii;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
new file mode 100644
index 0000000..c39d674
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -0,0 +1,37 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N 500
+
+unsigned int a[N][N];
+
+void  __attribute__((noinline,noclone))
+foo (unsigned int n)
+{
+  int i, j;
+  unsigned int sum = 1;
+
+#pragma acc kernels copyin (a[0:n]) copy (sum)
+  {
+    for (i = 0; i < n; ++i)
+      for (j = 0; j < n; ++j)
+	sum += a[i][j];
+  }
+
+  if (sum != 5001)
+    abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
new file mode 100644
index 0000000..3501d0d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -0,0 +1,37 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N 500
+
+unsigned int a[N][N];
+
+void  __attribute__((noinline,noclone))
+foo (void)
+{
+  int i, j;
+  unsigned int sum = 1;
+
+#pragma acc kernels copyin (a[0:N]) copy (sum)
+  {
+    for (i = 0; i < N; ++i)
+      for (j = 0; j < N; ++j)
+	sum += a[i][j];
+  }
+
+  if (sum != 5001)
+    abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-empty.c b/gcc/testsuite/c-c++-common/goacc/kernels-empty.c
new file mode 100644
index 0000000..e91b81c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-empty.c
@@ -0,0 +1,6 @@ 
+void
+foo (void)
+{
+#pragma acc kernels
+  ;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-eternal.c b/gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
new file mode 100644
index 0000000..edc17d2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
@@ -0,0 +1,11 @@ 
+int
+main (void)
+{
+#pragma acc kernels
+  {
+    while (1)
+      ;
+  }
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
new file mode 100644
index 0000000..f97584d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -0,0 +1,70 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc kernels copyout (a[0:N])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE i = 0; i < N; i++)
+      a[i] = i * 2;
+  }
+
+#pragma acc kernels copyout (b[0:N])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE i = 0; i < N; i++)
+      b[i] = i * 4;
+  }
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only three loops are analyzed, and that all can be
+   parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
new file mode 100644
index 0000000..530d62a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -0,0 +1,49 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int i;
+
+  unsigned int *__restrict c;
+
+  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    c[i] = i * 2;
+
+#pragma acc kernels copy (c[0:N])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = c[ii] + ii + 1;
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != i * 2 + i + 1)
+      abort ();
+
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
new file mode 100644
index 0000000..4f1c2c5
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -0,0 +1,17 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-g" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include "kernels-loop.c"
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
new file mode 100644
index 0000000..151db51
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -0,0 +1,52 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N ((1024 * 512) + 1)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
new file mode 100644
index 0000000..bee5f5a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -0,0 +1,56 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N ((1024 * 512) + 1)
+#define COUNTERTYPE unsigned int
+
+int
+foo (COUNTERTYPE n)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
+  b = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
+  c = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
+
+  for (COUNTERTYPE i = 0; i < n; i++)
+    a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < n; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:n], b[0:n]) copyout (c[0:n])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE ii = 0; ii < n; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < n; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
new file mode 100644
index 0000000..ea0e342
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -0,0 +1,39 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+/* Based on autopar/outer-1.c.  */
+
+#include <stdlib.h>
+
+#define N 1000
+
+int
+main (void)
+{
+  int x[N][N];
+
+#pragma acc kernels copyout (x)
+  {
+    for (int ii = 0; ii < N; ii++)
+      for (int jj = 0; jj < N; jj++)
+	x[ii][jj] = ii + jj + 3;
+  }
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      if (x[i][j] != i + j + 3)
+	abort ();
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop.c
new file mode 100644
index 0000000..ab5dfb9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -0,0 +1,56 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+#ifdef ACC_LOOP
+    #pragma acc loop
+#endif
+    for (COUNTERTYPE ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c b/gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
new file mode 100644
index 0000000..1a8cc67
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
@@ -0,0 +1,12 @@ 
+int
+main (void)
+{
+
+#pragma acc kernels
+  {
+    __builtin_abort ();
+  }
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c b/gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
new file mode 100644
index 0000000..b16a8cd
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -0,0 +1,54 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+  COUNTERTYPE i;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (i = 0; i < N; i++)
+      c[i] = a[i] + b[i];
+  }
+
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-reduction.c b/gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
new file mode 100644
index 0000000..61c5df3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -0,0 +1,36 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops1-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define n 10000
+
+unsigned int a[n];
+
+void  __attribute__((noinline,noclone))
+foo (void)
+{
+  int i;
+  unsigned int sum = 1;
+
+#pragma acc kernels copyin (a[0:n]) copy (sum)
+  {
+    for (i = 0; i < n; ++i)
+      sum += a[i];
+  }
+
+  if (sum != 5001)
+    abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
+