diff mbox

Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels

Message ID 87inl62zw1.fsf@hertz.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge May 12, 2017, 9:22 a.m. UTC
Hi!

On Wed, 10 May 2017 18:30:54 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, May 09, 2017 at 10:57:34PM +0200, Thomas Schwinge wrote:
> >     Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels

> Ok.

Thanks.  Committed to trunk in r247957:

commit 5dd0c4e81e7a79afccfc936407affbdda2e3b737
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:18:34 2017 +0000

    [PR middle-end/69921] Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
    
            gcc/
            PR middle-end/69921
            * tree-parloops.c (create_parallel_loop): Set "oacc kernels
            parallelized" attribute for parallelized OpenACC kernels.
            * omp-offload.c (execute_oacc_device_lower): Use it.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
            Likewise.
            * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
            * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3.c: Likewise.
            * c-c++-common/goacc/kernels-loop-g.c: Likewise.
            * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n.c: Likewise.
            * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
            * c-c++-common/goacc/kernels-loop.c: Likewise.
            * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
            * c-c++-common/goacc/kernels-reduction.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop.f95: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247957 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  5 +++++
 gcc/omp-offload.c                                  | 24 ++++++++++++++++----
 gcc/testsuite/ChangeLog                            | 26 ++++++++++++++++++++++
 .../goacc/classify-kernels-unparallelized.c        |  2 +-
 .../c-c++-common/goacc/classify-kernels.c          |  6 ++---
 .../goacc/kernels-counter-vars-function-scope.c    |  3 +--
 .../goacc/kernels-double-reduction-n.c             |  3 +--
 .../c-c++-common/goacc/kernels-double-reduction.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-nest.c         |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    |  3 +--
 .../c-c++-common/goacc/kernels-one-counter-var.c   |  3 +--
 .../c-c++-common/goacc/kernels-reduction.c         |  3 +--
 .../goacc/classify-kernels-unparallelized.f95      |  2 +-
 .../gfortran.dg/goacc/classify-kernels.f95         |  6 ++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-2.f95      |  3 +--
 .../goacc/kernels-loop-data-enter-exit-2.f95       |  3 +--
 .../goacc/kernels-loop-data-enter-exit.f95         |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-update.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data.f95        |  3 +--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 |  5 ++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |  3 +--
 gcc/tree-parloops.c                                | 16 +++++++------
 28 files changed, 89 insertions(+), 60 deletions(-)



Grüße
 Thomas
diff mbox

Patch

diff --git gcc/ChangeLog gcc/ChangeLog
index aeb22df..580a3db 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,10 @@ 
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/69921
+	* tree-parloops.c (create_parallel_loop): Set "oacc kernels
+	parallelized" attribute for parallelized OpenACC kernels.
+	* omp-offload.c (execute_oacc_device_lower): Use it.
+
 	* omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
 	Set "oacc kernels" attribute.
 	* omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
diff --git gcc/omp-offload.c gcc/omp-offload.c
index d24f131..9372f9e 100644
--- gcc/omp-offload.c
+++ gcc/omp-offload.c
@@ -1444,6 +1444,13 @@  execute_oacc_device_lower ()
       flag_openacc_dims = (char *)&flag_openacc_dims;
     }
 
+  bool is_oacc_kernels
+    = (lookup_attribute ("oacc kernels",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
   int fn_level = oacc_fn_attrib_level (attrs);
@@ -1453,17 +1460,26 @@  execute_oacc_device_lower ()
       if (fn_level >= 0)
 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
 		 fn_level);
-      else if (lookup_attribute ("oacc kernels",
-				 DECL_ATTRIBUTES (current_function_decl)))
-	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (is_oacc_kernels)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 (is_oacc_kernels_parallelized
+		  ? "parallelized" : "unparallelized"));
       else
 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
     }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+  /* OpenACC kernels constructs are special: they currently don't use the
+     generic oacc_loop infrastructure and attribute/dimension processing.  */
+  if (is_oacc_kernels && is_oacc_kernels_parallelized)
+    {
+      /* Parallelized OpenACC kernels constructs use gang parallelism.  See
+	 also tree-parloops.c:create_parallel_loop.  */
+      used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
+    }
+
   int dims[GOMP_DIM_MAX];
-
   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
 
   if (dump_file)
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 52865d3..e1e2641 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -2,6 +2,32 @@ 
 
 	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
 	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/kernels-counter-vars-function-scope.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
+	* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-g.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+	* c-c++-common/goacc/kernels-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+	* c-c++-common/goacc/kernels-reduction.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop.f95: Likewise.
+
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
 	* c-c++-common/goacc/classify-parallel.c: Likewise.
 	* c-c++-common/goacc/classify-routine.c: Likewise.
 	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 70ff428..626f6b4 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -34,6 +34,6 @@  void KERNELS ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index c8b0fda..95037e6 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -25,11 +25,11 @@  void KERNELS ()
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 17f240e..c475333 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -45,9 +45,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
index 750f576..27ea2e9 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -27,10 +27,9 @@  foo (unsigned int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
index df60d6a..0841e90 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -27,10 +27,9 @@  foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index 913d91f..acef6a1 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -59,11 +59,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
index 1822d2a..75e2bb7 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -39,9 +39,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
index e946319..73b469d 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -7,9 +7,8 @@ 
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
index 9b63b45..5592623 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -43,9 +43,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 279f797..e86be1b 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -46,9 +46,8 @@  foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
index db1071f..2b0e186 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -30,9 +30,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop.c
index abf7a3c..9619d53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -46,9 +46,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
index 95f4817..69539b2 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -44,9 +44,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
index 6f5a418..4a18272 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -26,9 +26,8 @@  foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 9887d35..4b282ca 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -36,6 +36,6 @@  end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 69c89a9..da025c1 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -27,11 +27,11 @@  end program main
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 865f7a6..516aede 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -34,11 +34,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index c9f3a62..ff3788a 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -40,11 +40,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 3361607..60a5c96 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -40,11 +40,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 5ba56fb..ce04749 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -38,11 +38,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
index a622a96..d2de138 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
@@ -38,10 +38,9 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
index 4ec2ac3..92872b2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
@@ -38,11 +38,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
index 409fe6f..079712f2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
@@ -32,10 +32,9 @@  end module test
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! TODO, PR70545.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function __test_MOD_foo._omp_fn.0 " 1 "optimized" } }
-
-! TODO, PR70545.
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" { xfail *-*-* } } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
index ae2cac6..cc9a3a9 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -30,9 +30,8 @@  end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 6ce9d84..f826154 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2040,19 +2040,20 @@  create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
   edge exit, nexit, guard, end, e;
 
-  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
       gcc_checking_assert (lookup_attribute ("oacc kernels",
 					     DECL_ATTRIBUTES (cfun->decl)));
-
-      tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
-      OMP_CLAUSE_NUM_GANGS_EXPR (clause)
-	= build_int_cst (integer_type_node, n_threads);
-      oacc_set_fn_attrib (cfun->decl, clause, NULL);
+      /* Indicate to later processing that this is a parallelized OpenACC
+	 kernels construct.  */
+      DECL_ATTRIBUTES (cfun->decl)
+	= tree_cons (get_identifier ("oacc kernels parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
     }
   else
     {
+      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
+
       basic_block bb = loop_preheader_edge (loop)->src;
       basic_block paral_bb = single_pred (bb);
       gsi = gsi_last_bb (paral_bb);
@@ -2154,7 +2155,8 @@  create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Emit GIMPLE_OMP_FOR.  */
   if (oacc_kernels_p)
-    /* In combination with the NUM_GANGS on the parallel.  */
+    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
+       omp-offload.c:execute_oacc_device_lower.  */
     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
   else
     {

Committed to gomp-4_0-branch in r247958:

commit 2630763958847ab6841dc1164d29ead4ac90fe00
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:20:35 2017 +0000

    [PR middle-end/69921] Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
    
            gcc/
            PR middle-end/69921
            * tree-parloops.c (create_parallel_loop): Set "oacc kernels
            parallelized" attribute for parallelized OpenACC kernels.
            * omp-low.c (execute_oacc_device_lower): Use it.
            * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
            * omp-low.c (set_oacc_fn_attrib): Make it "static".
            * omp-low.h (set_oacc_fn_attrib): Remove prototype.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/kernels-acc-loop-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Likewise.
            * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
            Likewise.
            * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
            * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3.c: Likewise.
            * c-c++-common/goacc/kernels-loop-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data.c: Likewise.
            * c-c++-common/goacc/kernels-loop-g.c: Likewise.
            * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n.c: Likewise.
            * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
            * c-c++-common/goacc/kernels-loop.c: Likewise.
            * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
            * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
            Likewise.
            * c-c++-common/goacc/kernels-reduction.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop.f95: Likewise.
            * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
            Likewise.
    
    trunk r247957
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@247958 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  8 +++++
 gcc/config/nvptx/nvptx.c                           | 31 ++++++----------
 gcc/omp-low.c                                      | 26 +++++++++++---
 gcc/omp-low.h                                      |  1 -
 gcc/testsuite/ChangeLog.gomp                       | 41 ++++++++++++++++++++++
 .../goacc/classify-kernels-unparallelized.c        |  2 +-
 .../c-c++-common/goacc/classify-kernels.c          |  6 ++--
 .../goacc/kernels-acc-loop-reduction.c             |  3 +-
 .../goacc/kernels-acc-loop-smaller-equal.c         |  3 +-
 .../goacc/kernels-counter-vars-function-scope.c    |  3 +-
 .../goacc/kernels-double-reduction-n.c             |  3 +-
 .../c-c++-common/goacc/kernels-double-reduction.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-2-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-3-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-acc-loop.c     |  3 +-
 .../c-c++-common/goacc/kernels-loop-data-2.c       |  3 +-
 .../goacc/kernels-loop-data-enter-exit-2.c         |  3 +-
 .../goacc/kernels-loop-data-enter-exit.c           |  3 +-
 .../c-c++-common/goacc/kernels-loop-data-update.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-data.c         |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c |  3 +-
 .../c-c++-common/goacc/kernels-loop-n-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-nest.c         |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    |  3 +-
 .../c-c++-common/goacc/kernels-one-counter-var.c   |  3 +-
 .../goacc/kernels-parallel-loop-data-enter-exit.c  |  3 +-
 .../c-c++-common/goacc/kernels-reduction.c         |  3 +-
 .../goacc/classify-kernels-unparallelized.f95      |  2 +-
 .../gfortran.dg/goacc/classify-kernels.f95         |  6 ++--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data-2.f95      |  3 +-
 .../goacc/kernels-loop-data-enter-exit-2.f95       |  3 +-
 .../goacc/kernels-loop-data-enter-exit.f95         |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data-update.f95 |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data.f95        |  3 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 |  7 ++--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |  3 +-
 .../kernels-parallel-loop-data-enter-exit.f95      |  3 +-
 gcc/tree-parloops.c                                | 16 +++++----
 43 files changed, 132 insertions(+), 110 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 8cd46c3..b34e987 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,13 @@ 
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/69921
+	* tree-parloops.c (create_parallel_loop): Set "oacc kernels
+	parallelized" attribute for parallelized OpenACC kernels.
+	* omp-low.c (execute_oacc_device_lower): Use it.
+	* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
+	* omp-low.c (set_oacc_fn_attrib): Make it "static".
+	* omp-low.h (set_oacc_fn_attrib): Remove prototype.
+
 	* omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
 	Set "oacc kernels" attribute.
 	(set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c
index f3c1525..fd8da8d 100644
--- gcc/config/nvptx/nvptx.c
+++ gcc/config/nvptx/nvptx.c
@@ -4285,28 +4285,17 @@  nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
 	 higher, as otherwise we're not expecting any parallelization to
 	 happen.  */
       if (optimize >= 2
-	  && lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (decl)))
+	  && lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (decl))
+	  && !lookup_attribute ("oacc kernels parallelized",
+				DECL_ATTRIBUTES (decl)))
 	{
-	  bool avoid_offloading_p = true;
-	  for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
-	    {
-	      if (dims[ix] == 0 || dims[ix] > 1)
-		{
-		  avoid_offloading_p = false;
-		  break;
-		}
-	    }
-	  if (avoid_offloading_p)
-	    {
-	      warning_at (DECL_SOURCE_LOCATION (decl), 0,
-			  "OpenACC kernels construct will be executed"
-			  " sequentially; will by default avoid offloading to"
-			  " prevent data copy penalty");
-	      DECL_ATTRIBUTES (decl)
-		= tree_cons (get_identifier ("omp avoid offloading"),
-			     NULL_TREE, DECL_ATTRIBUTES (decl));
-
-	    }
+	  warning_at (DECL_SOURCE_LOCATION (decl), 0,
+		      "OpenACC kernels construct will be executed"
+		      " sequentially; will by default avoid offloading to"
+		      " prevent data copy penalty");
+	  DECL_ATTRIBUTES (decl)
+	    = tree_cons (get_identifier ("omp avoid offloading"),
+			 NULL_TREE, DECL_ATTRIBUTES (decl));
 	}
     }
 
diff --git gcc/omp-low.c gcc/omp-low.c
index 18872f9..0fbc3ff 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -13138,7 +13138,7 @@  replace_oacc_fn_attrib (tree fn, tree dims)
    function attribute.  Push any that are non-constant onto the ARGS
    list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
-void
+static void
 set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
@@ -21171,6 +21171,13 @@  execute_oacc_device_lower ()
       flag_openacc_dims = (char *)&flag_openacc_dims;
     } 
 
+  bool is_oacc_kernels
+    = (lookup_attribute ("oacc kernels",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
   int fn_level = oacc_fn_attrib_level (attrs);
@@ -21180,17 +21187,26 @@  execute_oacc_device_lower ()
       if (fn_level >= 0)
 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
 		 fn_level);
-      else if (lookup_attribute ("oacc kernels",
-				 DECL_ATTRIBUTES (current_function_decl)))
-	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (is_oacc_kernels)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 (is_oacc_kernels_parallelized
+		  ? "parallelized" : "unparallelized"));
       else
 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
     }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+  /* OpenACC kernels constructs are special: they currently don't use the
+     generic oacc_loop infrastructure and attribute/dimension processing.  */
+  if (is_oacc_kernels && is_oacc_kernels_parallelized)
+    {
+      /* Parallelized OpenACC kernels constructs use gang parallelism.  See
+	 also tree-parloops.c:create_parallel_loop.  */
+      used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
+    }
+
   int dims[GOMP_DIM_MAX];
-
   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
 
   if (dump_file)
diff --git gcc/omp-low.h gcc/omp-low.h
index 0ea5308..65b2433 100644
--- gcc/omp-low.h
+++ gcc/omp-low.h
@@ -34,7 +34,6 @@  extern void replace_oacc_fn_attrib (tree, tree);
 extern int verify_oacc_routine_clauses (tree, tree *, location_t, const char *);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 9f2b7be..dadff1a 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,46 @@ 
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/kernels-acc-loop-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Likewise.
+	* c-c++-common/goacc/kernels-counter-vars-function-scope.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
+	* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-g.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+	* c-c++-common/goacc/kernels-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-reduction.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop.f95: Likewise.
+	* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
+	Likewise.
+
 	* c-c++-common/goacc/oaccdevlow-kernels.c: Adjust.
 	* c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
 	* c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 70ff428..626f6b4 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -34,6 +34,6 @@  void KERNELS ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index c8b0fda..95037e6 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -25,11 +25,11 @@  void KERNELS ()
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
index b52f280..4824e53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
@@ -16,9 +16,8 @@  foo (int n, unsigned int *a)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
index dd10c46..d70afb0 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
@@ -16,9 +16,8 @@  foo (int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 17f240e..c475333 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -45,9 +45,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
index 750f576..27ea2e9 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -27,10 +27,9 @@  foo (unsigned int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
index df60d6a..0841e90 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -27,10 +27,9 @@  foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
index 21b2a70..7b127cb 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
@@ -9,11 +9,10 @@ 
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index 913d91f..acef6a1 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -59,11 +59,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
index d82debc..a040e09 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
@@ -8,9 +8,8 @@ 
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
index 1822d2a..75e2bb7 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -39,9 +39,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
index dc3bb43..070a5b5 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
@@ -8,9 +8,8 @@ 
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
index 8046ae9..7180021 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
@@ -59,11 +59,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
index ac977d2..0c9f833 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
@@ -57,11 +57,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
index 72e18a7..0bd21b6 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
@@ -54,11 +54,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
index 94d5702..dd5a841 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
@@ -55,10 +55,9 @@  main (void)
 /* Check that only two loops are analyzed, and that both can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
index cc49699..a658182 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
@@ -53,11 +53,10 @@  main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
index e946319..73b469d 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -7,9 +7,8 @@ 
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
index 9b63b45..5592623 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -43,9 +43,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
index 685cb7f..1f25e63 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
@@ -8,9 +8,8 @@ 
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 279f797..e86be1b 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -46,9 +46,8 @@  foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
index db1071f..2b0e186 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -30,9 +30,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop.c
index abf7a3c..9619d53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -46,9 +46,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
index 95f4817..69539b2 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -44,9 +44,8 @@  main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
index 70c5469..58c9416 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
@@ -55,11 +55,10 @@  main (void)
 /* Check that only two loops are analyzed, and that both can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" { xfail *-*-* } } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
index 6f5a418..4a18272 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -26,9 +26,8 @@  foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 9887d35..4b282ca 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -36,6 +36,6 @@  end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 69c89a9..da025c1 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -27,11 +27,11 @@  end program main
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 865f7a6..516aede 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -34,11 +34,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index c9f3a62..ff3788a 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -40,11 +40,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 3361607..60a5c96 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -40,11 +40,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 5ba56fb..ce04749 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -38,11 +38,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
index a622a96..d2de138 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
@@ -38,10 +38,9 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
index 4ec2ac3..92872b2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
@@ -38,11 +38,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
index bdfebde..7c56897 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
@@ -32,11 +32,10 @@  end module test
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! This failure was introduced with the GOMP_MAP_POINTER ->
+! GOMP_MAP_FIRSTPRIVATE_POINTER conversion.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function __test_MOD_foo._omp_fn.0 " 1 "optimized" } }
-
-! This failure was introduced with the GOMP_MAP_POINTER ->
-! GOMP_MAP_FIRSTPRIVATE_POINTER conversion.
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" { xfail *-*-* } } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
index ae2cac6..cc9a3a9 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -30,9 +30,8 @@  end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
index 49b767c..16c9b80 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
@@ -39,11 +39,10 @@  end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index a94f7a2..02736e0 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2038,19 +2038,20 @@  create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
   edge exit, nexit, guard, end, e;
 
-  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
       gcc_checking_assert (lookup_attribute ("oacc kernels",
 					     DECL_ATTRIBUTES (cfun->decl)));
-
-      tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
-      OMP_CLAUSE_NUM_GANGS_EXPR (clause)
-	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, NULL);
+      /* Indicate to later processing that this is a parallelized OpenACC
+	 kernels construct.  */
+      DECL_ATTRIBUTES (cfun->decl)
+	= tree_cons (get_identifier ("oacc kernels parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
     }
   else
     {
+      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
+
       basic_block bb = loop_preheader_edge (loop)->src;
       basic_block paral_bb = single_pred (bb);
       gsi = gsi_last_bb (paral_bb);
@@ -2152,7 +2153,8 @@  create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Emit GIMPLE_OMP_FOR.  */
   if (oacc_kernels_p)
-    /* In combination with the NUM_GANGS on the parallel.  */
+    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
+       omp-low.c:execute_oacc_device_lower.  */
     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
   else
     {