@@ -1,5 +1,11 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
+ * builtin-attrs.def (DOT_DOT_r_r_r): Add DEF_ATTR_FOR_STRING.
+ (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST): Add DEF_ATTR_TREE_LIST.
+ * omp-builtins.def (BUILT_IN_GOACC_DATA_START)
+ (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_UPDATE): Use
+ DEF_GOACC_BUILTIN_FNSPEC instead of DEF_GOACC_BUILTIN.
+
* passes.def: Add pass_fre after pass_ch_oacc_kernels.
* passes.def: Add pass_scev_cprop to pass_oacc_kernels.
@@ -65,6 +65,7 @@ DEF_ATTR_FOR_INT (6)
ATTR_##ENUM, ATTR_NULL)
DEF_ATTR_FOR_STRING (STR1, "1")
DEF_ATTR_FOR_STRING (DOT_DOT_DOT_r_r_r, "...rrr")
+DEF_ATTR_FOR_STRING (DOT_DOT_r_r_r, "..rrr")
#undef DEF_ATTR_FOR_STRING
/* Construct a tree for a list of two integers. */
@@ -131,6 +132,8 @@ DEF_ATTR_TREE_LIST (ATTR_PURE_NOTHROW_LEAF_LIST, ATTR_PURE, \
DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST, \
ATTR_FNSPEC, ATTR_LIST_DOT_DOT_DOT_r_r_r, \
ATTR_NOTHROW_LIST)
+DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST, \
+ ATTR_FNSPEC, ATTR_LIST_DOT_DOT_r_r_r, ATTR_NOTHROW_LIST)
DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LIST, ATTR_NORETURN, \
ATTR_NULL, ATTR_NOTHROW_LIST)
DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\
@@ -32,13 +32,17 @@ along with GCC; see the file COPYING3. If not see
DEF_GOACC_BUILTIN (BUILT_IN_ACC_GET_DEVICE_TYPE, "acc_get_device_type",
BT_FN_INT, ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_START, "GOACC_data_start",
- BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_DATA_START, "GOACC_data_start",
+ BT_FN_VOID_INT_SIZE_PTR_PTR_PTR,
+ ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST,
+ ATTR_NOTHROW_LIST, "..rrr")
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end",
BT_FN_VOID, ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data",
- BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
- ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_ENTER_EXIT_DATA,
+ "GOACC_enter_exit_data",
+ BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
+ ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST,
+ ATTR_NOTHROW_LIST, "..rrr")
DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_KERNELS_INTERNAL,
"GOACC_kernels_internal",
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
@@ -50,9 +54,10 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_KERNELS, "GOACC_kernels",
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel",
BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR,
ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
- BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
- ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_UPDATE, "GOACC_update",
+ BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
+ ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST,
+ ATTR_NOTHROW_LIST, "..rrr")
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
BT_FN_VOID_INT_INT_VAR,
ATTR_NOTHROW_LIST)
@@ -1,6 +1,21 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
+ * c-c++-common/goacc/kernels-loop-data-2.c: New test.
+ * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test.
+ * c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test.
+ * c-c++-common/goacc/kernels-loop-data-update.c: New test.
+ * c-c++-common/goacc/kernels-loop-data.c: New test.
+ * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New
+ test.
+ * gfortran.dg/goacc/kernels-loop-data-2.f95: New test.
+ * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: New test.
+ * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: New test.
+ * gfortran.dg/goacc/kernels-loop-data-update.f95: New test.
+ * gfortran.dg/goacc/kernels-loop-data.f95: New test.
+ * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: New
+ test.
+
* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
* c-c++-common/goacc/kernels-one-counter-var.c: New test.
* g++.dg/ipa/devirt-37.C: Update for new pass_fre.
new file mode 100644
@@ -0,0 +1,71 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+ }
+
+#pragma acc data copyout (b[0:N])
+ {
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ {
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only three loops are analyzed, and that all can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N])
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+#pragma acc exit data copyout (a[0:N])
+
+#pragma acc enter data create (b[0:N])
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+#pragma acc exit data copyout (b[0:N])
+
+
+#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+#pragma acc exit data copyout (c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only three loops are analyzed, and that all can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only three loops are analyzed, and that all can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc update device (b[0:N])
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only two loops are analyzed, and that both can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,65 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only three loops are analyzed, and that all can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc parallel present (b[0:N])
+ {
+#pragma acc loop
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only two loops are analyzed, and that both can be
+ parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,52 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc data copyout (a(0:n-1))
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ !$acc data copyout (b(0:n-1))
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ !$acc data copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,52 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1))
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (a(0:n-1))
+
+ !$acc enter data create (b(0:n-1))
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (b(0:n-1))
+
+ !$acc enter data copyin (a(0:n-1), b(0:n-1)) create (c(0:n-1))
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,50 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,49 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+
+ !$acc update device (b(0:n-1))
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,50 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc end data
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
new file mode 100644
@@ -0,0 +1,51 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" }
+! { dg-additional-options "-fdump-tree-optimized" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc parallel present (b(0:n-1))
+ !$acc loop
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end parallel
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
+
+! Check that only three loops are analyzed, and that all can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } }
+
+! Check that the loop has been split off into a function.
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
+! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
+
+! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
@@ -1,6 +1,30 @@
2015-04-21 Tom de Vries <tom@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: New
+ test.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
+ New test.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
+ New test.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c:
+ New test.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: New
+ test.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
+ New test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95: New
+ test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95:
+ New test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95:
+ New test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95: New
+ test.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-data.f95: New test.
+ * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95:
+ New test.
+
* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test.
* testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test.
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+ }
+
+#pragma acc data copyout (b[0:N])
+ {
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ {
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N])
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+#pragma acc exit data copyout (a[0:N])
+
+#pragma acc enter data create (b[0:N])
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+#pragma acc exit data copyout (b[0:N])
+
+
+#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+#pragma acc exit data copyout (c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc update device (b[0:N])
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-ftree-parallelize-loops=32 -O2" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc parallel present (b[0:N])
+ {
+#pragma acc loop
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc data copyout (a(0:n-1))
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ !$acc data copyout (b(0:n-1))
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ !$acc data copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,38 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1))
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (a(0:n-1))
+
+ !$acc enter data create (b(0:n-1))
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (b(0:n-1))
+
+ !$acc enter data copyin (a(0:n-1), b(0:n-1)) create (c(0:n-1))
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+ !$acc exit data copyout (c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+
+ !$acc update device (b(0:n-1))
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (b(0:n-1))
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end kernels
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc end data
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,37 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc parallel present (b(0:n-1))
+ !$acc loop
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end parallel
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
Hi! On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries <Tom_deVries@mentor.com> wrote: > I'm submitting a patch series with initial support for the oacc kernels directive. Committed to gomp-4_0-branch in r222288: commit 7109b39defb87bc839983339c9fb4cdcb3891238 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Apr 21 20:32:01 2015 +0000 Handle oacc kernels with other directives Mark directives with fn spec attributes to prevent them from acting as optimization barrier. gcc/ * builtin-attrs.def (DOT_DOT_r_r_r): Add DEF_ATTR_FOR_STRING. (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST): Add DEF_ATTR_TREE_LIST. * omp-builtins.def (BUILT_IN_GOACC_DATA_START) (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_UPDATE): Use DEF_GOACC_BUILTIN_FNSPEC instead of DEF_GOACC_BUILTIN. gcc/testsuite/ * c-c++-common/goacc/kernels-loop-data-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test. * c-c++-common/goacc/kernels-loop-data-update.c: New test. * c-c++-common/goacc/kernels-loop-data.c: New test. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test. * gfortran.dg/goacc/kernels-loop-data-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: New test. * gfortran.dg/goacc/kernels-loop-data-update.f95: New test. * gfortran.dg/goacc/kernels-loop-data.f95: New test. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222288 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 6 ++ gcc/builtin-attrs.def | 3 + gcc/omp-builtins.def | 21 +++--- gcc/testsuite/ChangeLog.gomp | 15 +++++ .../c-c++-common/goacc/kernels-loop-data-2.c | 71 ++++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit-2.c | 69 +++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit.c | 66 ++++++++++++++++++ .../c-c++-common/goacc/kernels-loop-data-update.c | 66 ++++++++++++++++++ .../c-c++-common/goacc/kernels-loop-data.c | 65 ++++++++++++++++++ .../goacc/kernels-parallel-loop-data-enter-exit.c | 67 ++++++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-2.f95 | 52 ++++++++++++++ .../goacc/kernels-loop-data-enter-exit-2.f95 | 52 ++++++++++++++ .../goacc/kernels-loop-data-enter-exit.f95 | 50 ++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-update.f95 | 49 ++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data.f95 | 50 ++++++++++++++ .../kernels-parallel-loop-data-enter-exit.f95 | 51 ++++++++++++++ libgomp/ChangeLog.gomp | 24 +++++++ .../kernels-loop-data-2.c | 56 +++++++++++++++ .../kernels-loop-data-enter-exit-2.c | 54 +++++++++++++++ .../kernels-loop-data-enter-exit.c | 51 ++++++++++++++ .../kernels-loop-data-update.c | 53 +++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-data.c | 50 ++++++++++++++ .../kernels-parallel-loop-data-enter-exit.c | 52 ++++++++++++++ .../libgomp.oacc-fortran/kernels-loop-data-2.f95 | 38 +++++++++++ .../kernels-loop-data-enter-exit-2.f95 | 38 +++++++++++ .../kernels-loop-data-enter-exit.f95 | 36 ++++++++++ .../kernels-loop-data-update.f95 | 36 ++++++++++ .../libgomp.oacc-fortran/kernels-loop-data.f95 | 36 ++++++++++ .../kernels-parallel-loop-data-enter-exit.f95 | 37 ++++++++++ 29 files changed, 1306 insertions(+), 8 deletions(-) Grüße, Thomas