commit a297265c276f7882d33cfb05bb5ab71e05b6d7a1
Author: Julian Brown <julian@codesourcery.com>
Date: Mon Sep 22 03:27:53 2014 -0700
OpenACC tests.
@@ -239,3 +239,31 @@ proc libgomp_option_proc { option } {
return 0
}
}
+
+# Return 1 if at least one nvidia board is present.
+
+proc check_effective_target_openacc_nvidia_accel_present { } {
+ return [check_runtime openacc_nvidia_accel_present {
+ #include <openacc.h>
+ int main () {
+ return !(acc_get_num_devices (acc_device_nvidia) > 0);
+ }
+ } "" ]
+}
+
+# Return 1 if at least one nvidia board is present, and the nvidia device type
+# is selected by default by means of setting the environment variable
+# ACC_DEVICE_TYPE.
+
+proc check_effective_target_openacc_nvidia_accel_selected { } {
+ if { ![check_effective_target_openacc_nvidia_accel_present] } {
+ return 0;
+ }
+ if { ![info exists ::env(ACC_DEVICE_TYPE)] } {
+ return 0;
+ }
+ if { $::env(ACC_DEVICE_TYPE) == "nvidia" } {
+ return 1;
+ }
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,101 @@
+# This whole file adapted from libgomp.c++/c++.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+global shlib_ext
+
+set shlib_ext [get_shlib_extension]
+set lang_link_flags "-lstdc++"
+set lang_test_file_found 0
+set lang_library_path "../libstdc++-v3/src/.libs"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+set blddir [lookfor_file [get_multilibs] libgomp]
+
+
+if { $blddir != "" } {
+ # Look for a static libstdc++ first.
+ if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] {
+ set lang_test_file "${lang_library_path}/libstdc++.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libstdc++.
+ } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libstdc++ library found, will not execute c++ tests"
+ }
+} elseif { [info exists GXX_UNDER_TEST] } {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GXX_UNDER_TEST not defined, will not execute c++ tests"
+}
+
+if { $lang_test_file_found } {
+ # Gather a list of all tests.
+ set tests [lsort [glob -nocomplain $srcdir/$subdir/*.C]]
+
+ if { $blddir != "" } {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
+ if { [file exists $flags_file] } {
+ set libstdcxx_includes [exec sh $flags_file --build-includes]
+ } else {
+ set libstdcxx_includes ""
+ }
+
+ # Todo: get list of accelerators from configure options --enable-accelerator.
+ set accels { "nonshm-host" "nvidia" }
+
+ # Run on host (or fallback) accelerator.
+ lappend accels "host"
+
+ # Test OpenACC with available accelerators.
+ foreach accel $accels {
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ nonshm-host {
+ set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+ set acc_mem_shared 0
+ }
+ nvidia {
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ # Todo: Verify that this works for both local and remote testing.
+ setenv ACC_DEVICE_TYPE $accel
+
+ dg-runtest $tests "$tagopt" $libstdcxx_includes
+ }
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int
+main (int argc)
+{
+
+#pragma acc parallel
+ {
+ if (argc != 1)
+ abort ();
+ }
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */
+
+extern void abort ();
+
+int
+main (void)
+{
+
+#pragma acc parallel
+ {
+ abort ();
+ }
+
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,63 @@
+/* Disable the acc_on_device builtin; we want to test the libgomp library
+ function. */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ /* Host. */
+
+ {
+ if (!acc_on_device (acc_device_none))
+ abort ();
+ if (!acc_on_device (acc_device_host))
+ abort ();
+ if (acc_on_device (acc_device_not_host))
+ abort ();
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+ }
+
+
+ /* Host via offloading fallback mode. */
+
+#pragma acc parallel if(0)
+ {
+ if (!acc_on_device (acc_device_none))
+ abort ();
+ if (!acc_on_device (acc_device_host))
+ abort ();
+ if (acc_on_device (acc_device_not_host))
+ abort ();
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+ }
+
+
+#if !ACC_DEVICE_TYPE_host
+
+ /* Offloaded. */
+
+#pragma acc parallel
+ {
+ if (acc_on_device (acc_device_none))
+ abort ();
+ if (acc_on_device (acc_device_host))
+ abort ();
+ if (!acc_on_device (acc_device_not_host))
+ abort ();
+#if ACC_DEVICE_TYPE_nvidia
+ if (!acc_on_device (acc_device_nvidia))
+ abort ();
+#else
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+#endif
+ }
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,295 @@
+/* { dg-do compile } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 64;
+ float *a, *b;
+ int i;
+
+ acc_init (acc_device_nvidia);
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1 /* { dg-error "error: expected '\\)' before end of line" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (a) /* { dg-error "error: expected '\\)' before 'a'" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (N) /* { dg-error "error: expected '\\)' before 'N'" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async ()
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) async
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1 /* { dg-error "error: expected ',' before end of line" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (a) /* { dg-error "error: expected integer expression before 'a'" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (N) /* { dg-error "error: expected integer expression before 'N'" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait ()
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc parallel copyin (a[0:N]) copy (b[0:N]) wait
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1 2) /* { dg-error "error: expected ',' before numeric constant" } */
+
+#pragma acc wait (1,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+
+#pragma acc wait (,1) /* { dg-error "error: expected integer expression before ',' token" } */
+
+#pragma acc wait (1,2,) /* { dg-error "error: expected integer expression before '\\)' token" } */
+
+#pragma acc wait (1,2 3) /* { dg-error "error: expected ',' before numeric constant" } */
+
+#pragma acc wait (1,2,,) /* { dg-error "error: expected integer expression before ',' token" } */
+
+#pragma acc wait (1 /* { dg-error "error: expected ',' before end of line" } */
+
+#pragma acc wait (1,*) /* { dg-error "error: expected integer expression before '\\*' token" } */
+
+#pragma acc wait (1,a) /* { dg-error "error: expected integer expression before 'a'" } */
+
+#pragma acc wait (a) /* { dg-error "error: expected integer expression before 'a'" } */
+
+#pragma acc wait (N) /* { dg-error "error: expected integer expression before 'N'" } */
+
+#pragma acc wait (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+
+#pragma acc wait 1 /* { dg-error "error: expected clause before numeric constant" } */
+
+#pragma acc wait N /* { dg-error "error: expected clause before 'N'" } */
+
+#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+
+#pragma acc wait async (1 2) /* { dg-error "error: expected '\\)' before numeric constant" } */
+
+#pragma acc wait async (1,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (,1) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2 3) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1,2,,) /* { dg-error "error: expected '\\)' before ',' token" } */
+
+#pragma acc wait async (1 /* { dg-error "error: expected '\\)' before end of line" } */
+
+#pragma acc wait async (*) /* { dg-error "error: expected '\\)' before '\\*' token" } */
+
+#pragma acc wait async (a) /* { dg-error "error: expected '\\)' before 'a'" } */
+
+#pragma acc wait async (N) /* { dg-error "error: expected '\\)' before 'N'" } */
+
+#pragma acc wait async (1.0) /* { dg-error "error: expected integer expression before numeric constant" } */
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
new file mode 100755
@@ -0,0 +1,466 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include "cuda.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+
+int
+main (int argc, char **argv)
+{
+ CUresult r;
+ CUstream stream1;
+ int N = 128; //1024 * 1024;
+ float *a, *b, *c, *d, *e;
+ int i;
+ int nbytes;
+
+ acc_init (acc_device_nvidia);
+
+ nbytes = N * sizeof (float);
+
+ a = (float *) malloc (nbytes);
+ b = (float *) malloc (nbytes);
+ c = (float *) malloc (nbytes);
+ d = (float *) malloc (nbytes);
+ e = (float *) malloc (nbytes);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc parallel wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+
+ r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream1);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 49.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc parallel wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 17.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 16.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 25.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,84 @@
+# This whole file adapted from libgomp.c/c.exp.
+
+if [info exists lang_library_path] then {
+ unset lang_library_path
+ unset lang_link_flags
+}
+if [info exists lang_test_file] then {
+ unset lang_test_file
+}
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS "-O2"
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+# Gather a list of all tests.
+set tests [lsort [find $srcdir/$subdir *.c]]
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+append ld_library_path ":/opt/nvidia/cuda-5.5/lib64"
+set_ld_library_path_env_vars
+
+# Todo: get list of accelerators from configure options --enable-accelerator.
+set accels { "nonshm-host" "nvidia" }
+
+# Run on host (or fallback) accelerator.
+lappend accels "host"
+
+# Test OpenACC with available accelerators.
+set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+foreach accel $accels {
+ set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ nonshm-host {
+ set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+ set acc_mem_shared 0
+ }
+ nvidia {
+ # Copy ptx file (TEMPORARY)
+ remote_download host $srcdir/libgomp.oacc-c/subr.ptx
+
+ # Where cuda.h lives
+ # Todo: get that from configure option --with-cuda-driver.
+ lappend ALWAYS_CFLAGS "additional_flags=-I/opt/nvidia/cuda-5.5/include"
+ lappend ALWAYS_CFLAGS "additional_flags=-L/opt/nvidia/cuda-5.5/lib64"
+
+ # Where timer.h lives
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}"
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ # Todo: Verify that this works for both local and remote testing.
+ setenv ACC_DEVICE_TYPE $accel
+
+ dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+#define N 2
+ int a[N], b[N];
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+{
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ const int idx = ii;
+ int n = 1;
+ const int len = n;
+
+#pragma acc cache /* { dg-error "error: expected '\\(' before end of line" } */
+
+#pragma acc cache (a) /* { dg-error "error: expected '\\\[' before '\\)' token" } */
+
+#pragma acc cache (a[0:N]) copyin (a[0:N]) /* { dg-error "error: expected end of line before 'copyin'" } */
+
+#pragma acc cache () /* { dg-error "error: expected identifier before '\\)' token" } */
+
+#pragma acc cache (a[0:N] b[0:N}) /* { dg-error "error: expected '\\)' before 'b'" } */
+
+#pragma acc cache (a[0:N] /* { dg-error "error: expected '\\)' before end of line" } */
+
+#pragma acc cache (a[ii]) /* { dg-error "error: 'ii' is not a constant" } */
+
+#pragma acc cache (a[idx:n]) /* { dg-error "error: 'n' is not a constant" } */
+
+#pragma acc cache (a[0:N])
+
+#pragma acc cache (a[0:N], b[0:N])
+
+#pragma acc cache (a[0])
+
+#pragma acc cache (a[0], a[1], b[0:N])
+
+#pragma acc cache (a[idx])
+
+#pragma acc cache (a[idx:len])
+
+ b[ii] = a[ii];
+ }
+}
+
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != b[i])
+ abort ();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,623 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+ c = (float *) malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present_or_copyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free(d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 2.0;
+ }
+
+ d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free (d);
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 4.0;
+ }
+
+#pragma acc parallel copy(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = a[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+ d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel present_or_copy(a[0:N]) present_or_copy(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 7.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ d = acc_deviceptr(&a[0]);
+ acc_unmap_data (&a[0]);
+ acc_free (d);
+
+ d = acc_deviceptr(&b[0]);
+ acc_unmap_data (&b[0]);
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) create(c[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = acc_malloc (N * sizeof (float));
+ acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ d = acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present(c[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ acc_copyin (a, N * sizeof (float));
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (b, d, N * sizeof (float));
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel present(a[0:N]) present(c[0:N]) present(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ acc_copyout (b, N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ d = acc_deviceptr(a);
+
+ acc_unmap_data (a);
+
+ acc_free (d);
+
+ d = acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 6.0;
+ }
+
+ d = acc_malloc(N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ d[ii] = a[ii];
+ b[ii] = d[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel pcopyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free(d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) pcopyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+ c = (float *) malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = acc_malloc (N * sizeof (float));
+ acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) present_or_create(c[0:N+1]) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ d = acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ return 0;
+}
+/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int i, j, k, l = 0;
+ int a[3][3][3];
+
+ memset (a, '\0', sizeof (a));
+ #pragma acc parallel
+ #pragma acc loop collapse(4 - 1)
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ a[i][j][k] = i + j * 4 + k * 16;
+ #pragma acc parallel
+ {
+ #pragma acc loop collapse(2) reduction(|:l)
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ if (a[i][j][k] != i + j * 4 + k * 16)
+ l = 1;
+ }
+ if (l)
+ abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int i, j, k, l = 0, f = 0, x = 0;
+ int m1 = 4, m2 = -5, m3 = 17;
+
+ #pragma acc parallel
+ #pragma acc loop collapse(3) reduction(+:l)
+ for (i = -2; i < m1; i++)
+ for (j = m2; j < -2; j++)
+ {
+ for (k = 13; k < m3; k++)
+ {
+ if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++)
+ l++;
+ }
+ }
+
+ for (i = -2; i < m1; i++)
+ for (j = m2; j < -2; j++)
+ {
+ for (k = 13; k < m3; k++)
+ {
+ if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++)
+ x++;
+ }
+ }
+
+ if (l != x)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=gnu99" } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+ int i2, l = 0, r = 0;
+ int a[3][3][3];
+
+ memset (a, '\0', sizeof (a));
+ #pragma acc parallel
+ #pragma acc loop collapse(4 - 1)
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ a[i][j][k] = i + j * 4 + k * 16;
+#pragma acc parallel
+ {
+ #pragma acc loop collapse(2) reduction(|:l)
+ for (i2 = 0; i2 < 2; i2++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i2][j][k] != i2 + j * 4 + k * 16)
+ l += 1;
+ }
+
+ for (i2 = 0; i2 < 2; i2++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i2][j][k] != i2 + j * 4 + k * 16)
+ r += 1;
+
+ if (l != r)
+ abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent(&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf(stderr, "new context established\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main(int argc, char **argv)
+{
+ cublasStatus_t s;
+ cudaError_t e;
+ cublasHandle_t h;
+ CUcontext pctx, ctx;
+ CUresult r;
+ int dev;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 1 - cuBLAS creates, OpenACC shares. */
+
+ s = cublasCreate(&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasCreate failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ e = cudaGetDevice(&dev);
+ if (e != cudaSuccess)
+ {
+ fprintf(stderr, "cudaGetDevice failed: %d\n", e);
+ exit(EXIT_FAILURE);
+ }
+
+ acc_set_device_num(dev, acc_device_nvidia);
+
+ h_X = (float *)malloc(N * sizeof (float));
+ if (!h_X)
+ {
+ fprintf(stderr, "malloc failed: for h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *)malloc(N * sizeof (float));
+ if (!h_Y1)
+ {
+ fprintf(stderr, "malloc failed: for h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *)malloc(N * sizeof (float));
+ if (!h_Y2)
+ {
+ fprintf(stderr, "malloc failed: for h_Y2\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand() / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+ }
+
+ d_X = acc_copyin(&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf(stderr, "copyin error h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf(stderr, "copyin error h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check(pctx);
+
+ saxpy(N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float)sqrt((double)error_norm);
+ ref_norm = (float)sqrt((double)ref_norm);
+
+ if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf(stderr, "math error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ free(h_X);
+ free(h_Y1);
+ free(h_Y2);
+
+ acc_free(d_X);
+ acc_free(d_Y);
+
+ context_check(pctx);
+
+ s = cublasDestroy(h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasDestroy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&ctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!ctx)
+ {
+ fprintf(stderr, "Expected context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pctx != ctx)
+ {
+ fprintf(stderr, "Unexpected new context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,223 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent(&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf(stderr, "new context established\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main(int argc, char **argv)
+{
+ cublasStatus_t s;
+ cudaError_t e;
+ cublasHandle_t h;
+ CUcontext pctx, ctx;
+ CUresult r;
+ int dev;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 2 - cuBLAS creates, OpenACC shares. */
+
+ s = cublasCreate(&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasCreate failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ e = cudaGetDevice(&dev);
+ if (e != cudaSuccess)
+ {
+ fprintf(stderr, "cudaGetDevice failed: %d\n", e);
+ exit(EXIT_FAILURE);
+ }
+
+ acc_set_device_num(dev, acc_device_nvidia);
+
+ h_X = (float *)malloc(N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *)malloc(N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *)malloc(N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y2\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand() / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+ }
+
+ d_X = acc_copyin(&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf(stderr, "copyin error h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf(stderr, "copyin error h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check(pctx);
+
+#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copyin(alpha)
+ {
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+ }
+ }
+
+ context_check(pctx);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float)sqrt((double)error_norm);
+ ref_norm = (float)sqrt((double)ref_norm);
+
+ if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf(stderr, "math error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ free(h_X);
+ free(h_Y1);
+ free(h_Y2);
+
+ acc_free(d_X);
+ acc_free(d_Y);
+
+ context_check(pctx);
+
+ s = cublasDestroy(h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasDestroy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&ctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!ctx)
+ {
+ fprintf(stderr, "Expected context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pctx != ctx)
+ {
+ fprintf(stderr, "Unexpected new context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,200 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent(&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf(stderr, "new context established\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main(int argc, char **argv)
+{
+ cublasStatus_t s;
+ cublasHandle_t h;
+ CUcontext pctx;
+ CUresult r;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 3 - OpenACC creates, cuBLAS shares. */
+
+ acc_set_device_num(0, acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ h_X = (float *)malloc(N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *)malloc(N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *)malloc(N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y2\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand() / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+ }
+
+ d_X = acc_copyin(&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf(stderr, "copyin error h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf(stderr, "copyin error h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ s = cublasCreate(&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasCreate failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check(pctx);
+
+ saxpy(N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float)sqrt((double)error_norm);
+ ref_norm = (float)sqrt((double)ref_norm);
+
+ if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf(stderr, "math error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ free(h_X);
+ free(h_Y1);
+ free(h_Y2);
+
+ acc_free(d_X);
+ acc_free(d_Y);
+
+ context_check(pctx);
+
+ s = cublasDestroy(h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasDestroy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (pctx)
+ {
+ fprintf(stderr, "Unexpected context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy(int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check(CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent(&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf(stderr, "new context established\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf(stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main(int argc, char **argv)
+{
+ cublasStatus_t s;
+ cublasHandle_t h;
+ CUcontext pctx;
+ CUresult r;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 4 - OpenACC creates, cuBLAS shares. */
+
+ acc_set_device_num(0, acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ h_X = (float *)malloc(N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_X\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *)malloc(N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *)malloc(N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf(stderr, "malloc failed: for h_Y2\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand() / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand() / (float) RAND_MAX;
+ }
+
+#pragma acc parallel copyin(h_X[0:N]), copy(h_Y2[0:N]) copy(alpha)
+ {
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+ }
+ }
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ d_X = acc_copyin(&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf(stderr, "copyin error h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ d_Y = acc_copyin(&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf(stderr, "copyin error h_Y1\n");
+ exit(EXIT_FAILURE);
+ }
+
+ s = cublasCreate(&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasCreate failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ s = cublasSaxpy(h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasSaxpy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_memcpy_from_device(&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check(pctx);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float)sqrt((double)error_norm);
+ ref_norm = (float)sqrt((double)ref_norm);
+
+ if ((fabs(ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf(stderr, "math error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ free(h_X);
+ free(h_Y1);
+ free(h_Y2);
+
+ acc_free(d_X);
+ acc_free(d_Y);
+
+ context_check(pctx);
+
+ s = cublasDestroy(h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf(stderr, "cublasDestroy failed: %d\n", s);
+ exit(EXIT_FAILURE);
+ }
+
+ context_check(pctx);
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent(&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf(stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit(EXIT_FAILURE);
+ }
+
+ if (pctx)
+ {
+ fprintf(stderr, "Unexpected context\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,188 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int i;
+
+int
+is_mapped (void *p, size_t n)
+{
+#if ACC_MEM_SHARED
+ return 1;
+#else
+ return acc_is_present (p, n);
+#endif
+}
+
+int main(void)
+{
+ int j;
+
+ i = -1;
+ j = -2;
+#pragma acc data copyin (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data copyout (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+
+#pragma acc parallel present (i, j)
+ {
+ i = 4;
+ j = 2;
+ }
+ }
+ if (i != 4 || j != 2)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data create (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copyin (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copyout (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+
+#pragma acc parallel present (i, j)
+ {
+ i = 4;
+ j = 2;
+ }
+ }
+ if (i != 4 || j != 2)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copy (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_create (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data copyin (i, j)
+ {
+#pragma acc data present (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data
+ {
+#if !ACC_MEM_SHARED
+ if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j)))
+ abort ();
+#endif
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int main(void)
+{
+ void *a, *a_1, *a_2;
+
+#define A (void *) 0x123
+ a = A;
+
+#pragma acc data copyout(a_1, a_2)
+#pragma acc kernels deviceptr(a)
+ {
+ a_1 = a;
+ a_2 = &a;
+ }
+
+ if (a != A)
+ abort ();
+ if (a_1 != a)
+ abort ();
+#if ACC_MEM_SHARED
+ if (a_2 != &a)
+ abort ();
+#else
+ if (a_2 == &a)
+ abort ();
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
+
+#include "libgomp_g.h"
+
+extern void abort ();
+
+volatile int i;
+
+void
+f (void *data)
+{
+ if (i != -1)
+ abort ();
+ i = 42;
+}
+
+int main(void)
+{
+ i = -1;
+ GOACC_kernels (0, f, (const void *) 0,
+ 0, (void *) 0, (void *) 0, (void *) 0,
+ 1, 1, 1, -2, -1);
+ if (i != 42)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
+
+#include "libgomp_g.h"
+
+extern void abort ();
+
+volatile int i;
+
+void
+f (void *data)
+{
+ if (i != -1)
+ abort ();
+ i = 42;
+}
+
+int main(void)
+{
+ i = -1;
+ GOACC_parallel (0, f, (const void *) 0,
+ 0, (void *) 0, (void *) 0, (void *) 0,
+ 1, 1, 1, -2, -1);
+ if (i != 42)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,537 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define N 32
+
+int
+main(int argc, char **argv)
+{
+ float *a, *b, *d_a, *d_b, exp;
+ int i;
+ const int one = 1;
+ const int zero = 0;
+ int n;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ d_a = (float *) acc_malloc (N * sizeof (float));
+ d_b = (float *) acc_malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel if(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc parallel if(zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc parallel if(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc parallel if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc parallel if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc parallel if(one == 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 12.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 26.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 0.0)
+ abort();
+
+ if (b[i] != 0.0)
+ abort();
+ }
+
+ acc_unmap_data (a);
+ acc_unmap_data (b);
+
+ acc_free (d_a);
+ acc_free (d_b);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1)
+{
+#pragma acc parallel present(a[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 8.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0)
+{
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+}
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 18.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc data copyin(a[0:N]) if(1)
+{
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(0)
+ {
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(1)
+ {
+#pragma acc parallel present(a[0:N]) present(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+ }
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 18.0)
+ abort ();
+ }
+
+#ifdef XXX_TODO_ENTER_END_DATA
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,184 @@
+/* { dg-do run } */
+
+extern void abort ();
+
+int i;
+
+int main(void)
+{
+ int j, v;
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#endif
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (devtype) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_init (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device already active" } */
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ void *d;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_free (0);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 512;
+ void *d;
+
+ d = acc_malloc (N);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d + (N >> 1));
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: mem free failed 1" } */
new file mode 100644
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ memset (h, 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h, N + 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N - 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 0) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, N) != 0)
+ abort ();
+
+ if (acc_is_present (0, N) != 0)
+ abort ();
+
+ if (acc_is_present (h, 0) != 0)
+ abort ();
+
+ acc_free (d);
+
+ if (acc_is_present (h, 1) != 0)
+ abort ();
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + N - 1, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h - 1, 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 1) != 0)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 1)
+ abort ();
+ }
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 0)
+ abort ();
+ }
+
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ (void) acc_copyin (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ acc_free (d);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ (void) acc_copyin (p, N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ memset (h[i], 0, i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ acc_copyout (h[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_shutdown (devtype);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: no device initialized" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N + 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h + 1, N - 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h1, *h2;
+
+ h1 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h1[i] = 0xab;
+ }
+
+ (void) acc_copyin (h1, N);
+
+ h2 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h2[i] = 0xde;
+ }
+
+ (void) acc_copyin (h2, N);
+
+ acc_copyout (h1, N + N);
+
+ free (h1);
+ free (h2);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (0, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init (acc_device_host);
+
+ acc_shutdown (acc_device_not_host);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N - 2);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, N);
+ if (!d)
+ abort ();
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100755
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ d2 = acc_pcreate (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100755
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N - 2);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100755
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h + 2, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */
new file mode 100755
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
new file mode 100755
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d1 = acc_present_or_copyin (h, N);
+ if (!d1)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ d2 = acc_present_or_copyin (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ d2 = acc_pcopyin (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (0, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init (99);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 99 is out of range" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, 0);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, 0);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N - 2);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, 0);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N - 2);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != 0)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_init (acc_device_default);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_shutdown (acc_device_default);
+
+ if (acc_get_num_devices (acc_device_nvidia) != 0)
+ {
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_init (acc_device_default);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+ }
+
+ return 0;
+
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ if (acc_is_present (h, N) != 1)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = (unsigned char *) malloc (N);
+ d[i] = acc_malloc (N);
+
+ acc_map_data (h[i], d[i], N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h[i]);
+
+ if (acc_is_present (h[i], N) != 0)
+ abort ();
+
+ acc_free (d[i]);
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (0, d, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, 0, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, 0);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ int i;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_map_data (h + i, d + i, 1);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h + i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N >> 1);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + (N >> 1), 1) != 0)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (d);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (0);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr (d + i) != h + i)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr (h + i) != d + i)
+ abort ();
+ }
+
+ acc_unmap_data (h);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr (d + i) != 0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr (h + i) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int devnum;
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ devnum = acc_get_num_devices (acc_device_host);
+ if (devnum != 1)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ d[i] = acc_malloc (N);
+
+ acc_memcpy_to_device (d[i], h[i], N);
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ memset (h[i], 0, N);
+
+ acc_memcpy_from_device (h[i], d[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d[i]);
+
+ free (h[i]);
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, N << 1);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid size" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (0, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, 0, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, d, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host or device address" } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, 0);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (0, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, 0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
new file mode 100644
@@ -0,0 +1,131 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (0) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep (1);
+
+ if (acc_async_test (0) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_none) != 0)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_host) == 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,143 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (i) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,126 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (1) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test (1) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
new file mode 100644
@@ -0,0 +1,128 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,141 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,146 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,148 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,154 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (i);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,142 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
new file mode 100644
@@ -0,0 +1,147 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,174 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ devnum = 2;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (1, stream))
+ abort ();
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (0, 1);
+
+ if (acc_async_test (0) != 0)
+ abort ();
+
+ if (acc_async_test (1) != 0)
+ abort ();
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (acc_async_test (0) != 1)
+ abort ();
+
+ if (acc_async_test (1) != 1)
+ abort ();
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,139 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ int N;
+ int i;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream);
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (1, 1);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: identical parameters" } */
new file mode 100644
@@ -0,0 +1,218 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams, stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 500.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = malloc (nbytes);
+ d_a = acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = acc_get_cuda_stream (N);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N, stream))
+ abort ();
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_all_async (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 0)
+ abort ();
+ }
+
+ acc_wait (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ abort ();
+ }
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ stream = acc_get_cuda_stream (N + 1);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N + 1, stream))
+ abort ();
+
+ acc_wait_all_async (N + 1);
+
+ acc_wait (N + 1);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all_async (N);
+
+ acc_wait (N);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,151 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay, sum;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long **a, **d_a, *tid, ticks;
+ int nbytes;
+ void *kargs[3];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay2");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&sum, module, "sum");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = sizeof (int);
+
+ ticks = (unsigned long) (200.0 * clkrate);
+
+ N = nprocs;
+
+ streams = malloc (N * sizeof (void *));
+
+ a = malloc (N * sizeof (unsigned long *));
+ d_a = malloc (N * sizeof (unsigned long *));
+ tid = malloc (N * sizeof (unsigned long));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = malloc (sizeof (unsigned long));
+ *a[i] = N;
+ d_a[i] = acc_malloc (nbytes);
+ tid[i] = i;
+
+ acc_map_data (a[i], d_a[i], nbytes);
+
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ kargs[0] = (void *) &d_a[i];
+ kargs[1] = (void *) &ticks;
+ kargs[2] = (void *) &tid[i];
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ ticks = (unsigned long) (50.0 * clkrate);
+ }
+
+ acc_wait_all_async (0);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_copyout (a[i], nbytes);
+ if (*a[i] != i)
+ abort ();
+ }
+
+ free (streams);
+
+ for (i = 0; i < N; i++)
+ {
+ free (a[i]);
+ }
+
+ free (a);
+ free (d_a);
+ free (tid);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ float atime;
+ CUstream stream;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ init_timers (1);
+
+ stream = acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ start_timer (0);
+
+ acc_wait_all_async (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ fini_timers ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ int cnt;
+
+ cnt = 0;
+
+ s = streams[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (s == streams[j])
+ cnt++;
+ }
+
+ if (cnt != 1)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ s = NULL;
+
+ if (acc_set_cuda_stream (N + 1, s) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char *x;
+void *d_x;
+const int N = 256;
+
+static void *
+test (void *arg)
+{
+ int i;
+
+ if (acc_get_current_cuda_context () != NULL)
+ abort ();
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != i)
+ abort ();
+
+ x[i] = N - i - 1;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ const int nthreads = 1;
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ x[i] = i;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));
+
+ for (i = 0; i < nthreads; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < nthreads; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != N - i - 1)
+ abort ();
+ }
+
+ if (acc_is_present (x, N) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = malloc (NTHREADS * N);
+ d_x = malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int num_devices;
+ int devnum;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+#endif
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ return 0;
+
+ acc_init (devtype);
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ abort ();
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ acc_set_device_num (0, devtype);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 0)
+ abort();
+
+ if (num_devices > 1)
+ {
+ acc_set_device_num (1, 0);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 1)
+ abort();
+ }
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,137 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+#include <cuda.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ acc_wait_all ();
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ CUresult r;
+ CUstream s;
+
+ acc_init (acc_device_nvidia);
+
+ x = malloc (NTHREADS * N);
+ d_x = malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, s))
+ abort ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ acc_wait_all ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,84 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 1024 * 1024;
+ int i;
+ unsigned char *h;
+ void *d;
+ float async, sync;
+ struct timeval start, stop;
+ CUresult r;
+ CUstream s;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ gettimeofday (&start, NULL);
+
+ for (i = 0; i < 100; i++)
+ {
+#pragma acc update device(h[0:N])
+ }
+
+ gettimeofday (&stop, NULL);
+
+ sync = (float) (stop.tv_sec - start.tv_sec);
+ sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+ gettimeofday (&start, NULL);
+
+ r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, s))
+ abort ();
+
+ for (i = 0; i < 100; i++)
+ {
+#pragma acc update device(h[0:N]) async(0)
+ }
+
+ acc_wait_all ();
+
+ gettimeofday (&stop, NULL);
+
+ async = (float) (stop.tv_sec - start.tv_sec);
+ async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+ if (async > (sync * 1.5))
+ abort ();
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 32;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ acc_copyout (x[tid], N);
+
+ p = x[tid];
+
+ for (i = 0; i < N; i++)
+ {
+ if (p[i] != i)
+ abort ();
+ }
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ unsigned char *p;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = malloc (NTHREADS * N);
+ d_x = malloc (NTHREADS * N);
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+
+ p = malloc (N);
+
+ x[i] = p;
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = j;
+ }
+
+ d_x[i] = acc_copyin (p, N);
+ }
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
new file mode 100644
@@ -0,0 +1,680 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+ c = (float *) malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc data present_or_copyin(a[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free(d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 2.0;
+ }
+
+ d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present_or_copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free (d);
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 4.0;
+ }
+
+#pragma acc data copy(a[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = a[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+ d = acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data present_or_copy(a[0:N]) present_or_copy(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 7.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ d = acc_deviceptr(&a[0]);
+ acc_unmap_data (&a[0]);
+ acc_free (d);
+
+ d = acc_deviceptr(&b[0]);
+ acc_unmap_data (&b[0]);
+ acc_free (d);
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data copyin(a[0:N]) create(c[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = acc_malloc (N * sizeof (float));
+ acc_map_data(c, d, N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present_or_create(c[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ d = acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin(a[0:N]) present(c[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ acc_unmap_data (c);
+
+ if (acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ acc_free (d);
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (b, d, N * sizeof (float));
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (a, d, N * sizeof (float));
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = 1.0;
+ c[ii] = 2.0;
+ b[ii] = 4.0;
+ }
+ }
+ }
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort();
+
+ acc_copyout (b, N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort();
+
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ d = acc_deviceptr(a);
+
+ acc_unmap_data (a);
+
+ acc_free (d);
+
+ d = acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 6.0;
+ }
+
+ d = acc_malloc(N * sizeof (float));
+
+#pragma acc parallel copyin(a[0:N]) deviceptr(d) copyout(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ d[ii] = a[ii];
+ b[ii] = d[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc data pcopyin(a[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ acc_free(d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) pcopyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data copyin(a[0:N]) pcreate(c[0:N]) copyout(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+extern void abort(void);
+
+int
+main(int argc, char *argv[])
+{
+#define N 10
+ char a[N];
+
+ {
+ int i;
+ for (i = 0; i < N; ++i)
+ a[i] = 0;
+ }
+
+#pragma acc data copyout(a)
+ {
+#pragma acc parallel /* will result in a "dummy frame" */ present(a)
+ {
+ int i;
+ for (i = 0; i < N; ++i)
+ a[i] = i;
+ }
+ }
+
+ {
+ int i;
+ for (i = 0; i < N; ++i)
+ if (a[i] != i)
+ abort();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+#pragma acc parallel copyin(a[2:4]) copyout(b[2:4])
+ {
+ b[2] = a[2];
+ b[3] = a[3];
+ }
+
+ for (i = 2; i < 4; i++)
+ {
+ if (a[i] != 2.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[0:4])
+ {
+ b[0] = a[0];
+ b[1] = a[1];
+ b[2] = a[2];
+ b[3] = a[3];
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ b[i] = 6.0;
+ }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[4:4])
+ {
+ b[4] = a[0];
+ b[5] = a[1];
+ b[6] = a[2];
+ b[7] = a[3];
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (a[i] != 9.0)
+ abort();
+ }
+
+ for (i = 4; i < 8; i++)
+ {
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,206 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int i;
+
+int main(void)
+{
+ int j, v;
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin (i, j)
+ {
+#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin(i, j)
+ {
+#pragma acc parallel /* copyout */ present_or_copyout (v)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 5;
+ int a[N];
+
+ a[0] = 10;
+ a[1] = 10;
+ a[2] = 10;
+ a[3] = 10;
+ a[4] = 10;
+
+ acc_init(acc_device_nvidia);
+
+#pragma acc parallel copy(a[0:N])
+ {
+ a[0] = 5;
+ a[1] = 5;
+ a[2] = 5;
+ a[3] = 5;
+ a[4] = 5;
+ }
+
+ acc_shutdown(acc_device_nvidia);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+ c = (float *) malloc(N * sizeof (float));
+
+ d = acc_malloc(N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data present(a[0:N]) present(c[0:N]) present(b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ d = acc_deviceptr (c);
+ acc_unmap_data (c);
+ acc_free (d);
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+ {
+
+#pragma acc parallel present(a[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,186 @@
+/* { dg-do run } */
+/* TODO:
+ <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+ { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* Integer reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ int vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (result != vresult)
+ abort ();
+
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&:result)
+ for (i = 0; i < n; i++)
+ result &= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult &= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '|' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (|:result)
+ for (i = 0; i < n; i++)
+ result |= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult |= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '^' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (^:result)
+ for (i = 0; i < n; i++)
+ result ^= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult ^= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* TODO:
+ <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+ { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* float reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ float vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (fabs(result - vresult) > .0001)
+ abort ();
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* TODO:
+ <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+ { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* double reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ double vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (fabs(result - vresult) > .0001)
+ abort ();
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,138 @@
+/* { dg-do run } */
+/* TODO:
+ <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+ { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } } */
+
+/* complex reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+#include <complex.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ double complex vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* Needs support for complex multiplication. */
+
+// /* '*' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (*:result)
+// for (i = 0; i < n; i++)
+// result *= array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult *= array[i];
+//
+// if (fabs(result - vresult) > .0001)
+// abort ();
+// result = 0;
+// vresult = 0;
+
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+// #pragma acc end parallel
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (creal(result) > creal(array[i]));
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (creal(result) > creal(array[i]));
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (creal(result) > creal(array[i]));
+#pragma acc end parallel
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (creal(result) > creal(array[i]));
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,64 @@
+
+extern "C" __global__ void
+delay (clock_t * d_o, clock_t delay)
+{
+ clock_t start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+}
+
+extern "C" __global__ void
+delay2 (unsigned long *d_o, clock_t delay, unsigned long tid)
+{
+ clock_t start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+
+ d_o[0] = tid;
+}
+
+extern "C" __global__ void
+sum (clock_t * d_o, int N)
+{
+ int i;
+ clock_t sum;
+ __shared__ clock_t ticks[32];
+
+ sum = 0;
+
+ for (i = threadIdx.x; i < N; i += blockDim.x)
+ sum += d_o[i];
+
+ ticks[threadIdx.x] = sum;
+
+ syncthreads ();
+
+ for (i = 16; i >= 1; i >>= 1)
+ {
+ if (threadIdx.x < i)
+ ticks[threadIdx.x] += ticks[threadIdx.x + i];
+
+ syncthreads ();
+ }
+
+ d_o[0] = ticks[0];
+}
+
+extern "C" __global__ void
+mult (int n, float *x, float *y)
+{
+ int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+ for (i = 0; i < n; i++)
+ y[i] = x[i] * x[i];
+}
new file mode 100644
@@ -0,0 +1,251 @@
+ .version 1.4
+ .target sm_10, map_f64_to_f32
+ // compiled with /sj/opt/nvidia/cuda-5.5/open64/lib//be
+ // nvopencc 4.1 built on 2013-07-17
+
+ //-----------------------------------------------------------
+ // Compiling /tmp/tmpxft_00007716_00000000-9_subr.cpp3.i (/tmp/ccBI#.6sfiTI)
+ //-----------------------------------------------------------
+
+ //-----------------------------------------------------------
+ // Options:
+ //-----------------------------------------------------------
+ // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:64
+ // -O3 (Optimization level)
+ // -g0 (Debug level)
+ // -m2 (Report advisories)
+ //-----------------------------------------------------------
+
+ .file 1 "<command-line>"
+ .file 2 "/tmp/tmpxft_00007716_00000000-8_subr.cudafe2.gpu"
+ .file 3 "/usr/lib/gcc/x86_64-redhat-linux/4.4.5/include/stddef.h"
+ .file 4 "/opt/nvidia/cuda-5.5/bin/..//include/crt/device_runtime.h"
+ .file 5 "/opt/nvidia/cuda-5.5/bin/..//include/host_defines.h"
+ .file 6 "/opt/nvidia/cuda-5.5/bin/..//include/builtin_types.h"
+ .file 7 "/opt/nvidia/cuda-5.5/bin/..//include/device_types.h"
+ .file 8 "/opt/nvidia/cuda-5.5/bin/..//include/driver_types.h"
+ .file 9 "/opt/nvidia/cuda-5.5/bin/..//include/surface_types.h"
+ .file 10 "/opt/nvidia/cuda-5.5/bin/..//include/texture_types.h"
+ .file 11 "/opt/nvidia/cuda-5.5/bin/..//include/vector_types.h"
+ .file 12 "/opt/nvidia/cuda-5.5/bin/..//include/device_launch_parameters.h"
+ .file 13 "/opt/nvidia/cuda-5.5/bin/..//include/crt/storage_class.h"
+ .file 14 "/usr/include/bits/types.h"
+ .file 15 "/usr/include/time.h"
+ .file 16 "subr.cu"
+ .file 17 "/opt/nvidia/cuda-5.5/bin/..//include/common_functions.h"
+ .file 18 "/opt/nvidia/cuda-5.5/bin/..//include/math_functions.h"
+ .file 19 "/opt/nvidia/cuda-5.5/bin/..//include/math_constants.h"
+ .file 20 "/opt/nvidia/cuda-5.5/bin/..//include/device_functions.h"
+ .file 21 "/opt/nvidia/cuda-5.5/bin/..//include/sm_11_atomic_functions.h"
+ .file 22 "/opt/nvidia/cuda-5.5/bin/..//include/sm_12_atomic_functions.h"
+ .file 23 "/opt/nvidia/cuda-5.5/bin/..//include/sm_13_double_functions.h"
+ .file 24 "/opt/nvidia/cuda-5.5/bin/..//include/sm_20_atomic_functions.h"
+ .file 25 "/opt/nvidia/cuda-5.5/bin/..//include/sm_32_atomic_functions.h"
+ .file 26 "/opt/nvidia/cuda-5.5/bin/..//include/sm_35_atomic_functions.h"
+ .file 27 "/opt/nvidia/cuda-5.5/bin/..//include/sm_20_intrinsics.h"
+ .file 28 "/opt/nvidia/cuda-5.5/bin/..//include/sm_30_intrinsics.h"
+ .file 29 "/opt/nvidia/cuda-5.5/bin/..//include/sm_32_intrinsics.h"
+ .file 30 "/opt/nvidia/cuda-5.5/bin/..//include/sm_35_intrinsics.h"
+ .file 31 "/opt/nvidia/cuda-5.5/bin/..//include/surface_functions.h"
+ .file 32 "/opt/nvidia/cuda-5.5/bin/..//include/texture_fetch_functions.h"
+ .file 33 "/opt/nvidia/cuda-5.5/bin/..//include/texture_indirect_functions.h"
+ .file 34 "/opt/nvidia/cuda-5.5/bin/..//include/surface_indirect_functions.h"
+ .file 35 "/opt/nvidia/cuda-5.5/bin/..//include/math_functions_dbl_ptx1.h"
+
+
+ .entry delay (
+ .param .u64 __cudaparm_delay_d_o,
+ .param .s64 __cudaparm_delay_delay)
+ {
+ .reg .u32 %rv1;
+ .reg .u32 %r<6>;
+ .reg .u64 %rd<7>;
+ .reg .pred %p<4>;
+ .loc 16 3 0
+$LDWbegin_delay:
+ .loc 16 7 0
+ mov.u32 %r1, %clock;
+ mov.s32 %r2, %r1;
+ ld.param.s64 %rd1, [__cudaparm_delay_delay];
+ mov.u64 %rd2, 0;
+ setp.le.s64 %p1, %rd1, %rd2;
+ @%p1 bra $Lt_0_1282;
+ cvt.s64.s32 %rd3, %r2;
+$Lt_0_1794:
+ .loc 16 12 0
+ mov.u32 %r3, %clock;
+ mov.s32 %r4, %r3;
+ cvt.s64.s32 %rd4, %r4;
+ sub.s64 %rd5, %rd4, %rd3;
+ .loc 16 7 0
+ ld.param.s64 %rd1, [__cudaparm_delay_delay];
+ .loc 16 12 0
+ setp.gt.s64 %p2, %rd1, %rd5;
+ @%p2 bra $Lt_0_1794;
+$Lt_0_1282:
+ .loc 16 13 0
+ exit;
+$LDWend_delay:
+ } // delay
+
+ .entry delay2 (
+ .param .u64 __cudaparm_delay2_d_o,
+ .param .s64 __cudaparm_delay2_delay,
+ .param .u64 __cudaparm_delay2_tid)
+ {
+ .reg .u32 %rv1;
+ .reg .u32 %r<6>;
+ .reg .u64 %rd<9>;
+ .reg .pred %p<4>;
+ .loc 16 16 0
+$LDWbegin_delay2:
+ .loc 16 20 0
+ mov.u32 %r1, %clock;
+ mov.s32 %r2, %r1;
+ ld.param.s64 %rd1, [__cudaparm_delay2_delay];
+ mov.u64 %rd2, 0;
+ setp.le.s64 %p1, %rd1, %rd2;
+ @%p1 bra $Lt_1_1282;
+ cvt.s64.s32 %rd3, %r2;
+$Lt_1_1794:
+ .loc 16 25 0
+ mov.u32 %r3, %clock;
+ mov.s32 %r4, %r3;
+ cvt.s64.s32 %rd4, %r4;
+ sub.s64 %rd5, %rd4, %rd3;
+ .loc 16 20 0
+ ld.param.s64 %rd1, [__cudaparm_delay2_delay];
+ .loc 16 25 0
+ setp.gt.s64 %p2, %rd1, %rd5;
+ @%p2 bra $Lt_1_1794;
+$Lt_1_1282:
+ .loc 16 27 0
+ ld.param.u64 %rd6, [__cudaparm_delay2_tid];
+ ld.param.u64 %rd7, [__cudaparm_delay2_d_o];
+ st.global.u64 [%rd7+0], %rd6;
+ .loc 16 28 0
+ exit;
+$LDWend_delay2:
+ } // delay2
+
+ .entry sum (
+ .param .u64 __cudaparm_sum_d_o,
+ .param .s32 __cudaparm_sum_N)
+ {
+ .reg .u32 %r<9>;
+ .reg .u64 %rd<21>;
+ .reg .pred %p<6>;
+ .shared .align 8 .b8 __cuda___cuda_local_var_14805_37_non_const_ticks56[256];
+ .loc 16 31 0
+$LDWbegin_sum:
+ .loc 16 39 0
+ cvt.s32.u16 %r1, %tid.x;
+ mov.s32 %r2, %r1;
+ ld.param.u64 %rd1, [__cudaparm_sum_d_o];
+ ld.param.s32 %r3, [__cudaparm_sum_N];
+ setp.le.s32 %p1, %r3, %r1;
+ @%p1 bra $Lt_2_5634;
+ cvt.u32.u16 %r4, %ntid.x;
+ cvt.s64.u32 %rd2, %r4;
+ mul.wide.u32 %rd3, %r4, 8;
+ cvt.s64.s32 %rd4, %r1;
+ mul.wide.s32 %rd5, %r1, 8;
+ ld.param.u64 %rd1, [__cudaparm_sum_d_o];
+ add.u64 %rd6, %rd1, %rd5;
+ mov.s64 %rd7, 0;
+$Lt_2_3586:
+ //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown
+ .loc 16 40 0
+ ld.global.s64 %rd8, [%rd6+0];
+ add.s64 %rd7, %rd8, %rd7;
+ add.u32 %r2, %r2, %r4;
+ add.u64 %rd6, %rd3, %rd6;
+ .loc 16 39 0
+ ld.param.s32 %r3, [__cudaparm_sum_N];
+ .loc 16 40 0
+ setp.gt.s32 %p2, %r3, %r2;
+ @%p2 bra $Lt_2_3586;
+ bra.uni $Lt_2_3074;
+$Lt_2_5634:
+ mov.s64 %rd7, 0;
+$Lt_2_3074:
+ mov.u64 %rd9, __cuda___cuda_local_var_14805_37_non_const_ticks56;
+ .loc 16 42 0
+ cvt.u32.u16 %r5, %tid.x;
+ cvt.u64.u32 %rd10, %r5;
+ mul.wide.u32 %rd11, %r5, 8;
+ add.u64 %rd12, %rd9, %rd11;
+ st.shared.s64 [%rd12+0], %rd7;
+ .loc 16 44 0
+ bar.sync 0;
+ mov.s32 %r2, 16;
+$Lt_2_4610:
+ //<loop> Loop body line 44, nesting depth: 1, estimated iterations: unknown
+ setp.le.u32 %p3, %r2, %r5;
+ @%p3 bra $Lt_2_4866;
+ .loc 16 49 0
+ ld.shared.s64 %rd13, [%rd12+0];
+ add.u32 %r6, %r2, %r5;
+ cvt.u64.u32 %rd14, %r6;
+ mul.wide.u32 %rd15, %r6, 8;
+ add.u64 %rd16, %rd9, %rd15;
+ ld.shared.s64 %rd17, [%rd16+0];
+ add.s64 %rd18, %rd13, %rd17;
+ st.shared.s64 [%rd12+0], %rd18;
+$Lt_2_4866:
+ .loc 16 51 0
+ bar.sync 0;
+ .loc 16 46 0
+ shr.s32 %r2, %r2, 1;
+ mov.u32 %r7, 0;
+ setp.gt.s32 %p4, %r2, %r7;
+ @%p4 bra $Lt_2_4610;
+ .loc 16 54 0
+ ld.shared.s64 %rd19, [__cuda___cuda_local_var_14805_37_non_const_ticks56+0];
+ st.global.s64 [%rd1+0], %rd19;
+ .loc 16 55 0
+ exit;
+$LDWend_sum:
+ } // sum
+
+ .entry mult (
+ .param .s32 __cudaparm_mult_n,
+ .param .u64 __cudaparm_mult_x,
+ .param .u64 __cudaparm_mult_y)
+ {
+ .reg .u32 %r<7>;
+ .reg .u64 %rd<4>;
+ .reg .f32 %f<4>;
+ .reg .pred %p<4>;
+ .loc 16 58 0
+$LDWbegin_mult:
+ ld.param.s32 %r1, [__cudaparm_mult_n];
+ mov.u32 %r2, 0;
+ setp.le.s32 %p1, %r1, %r2;
+ @%p1 bra $Lt_3_1282;
+ ld.param.s32 %r1, [__cudaparm_mult_n];
+ mov.s32 %r3, %r1;
+ ld.param.u64 %rd1, [__cudaparm_mult_x];
+ ld.param.u64 %rd2, [__cudaparm_mult_y];
+ mov.s32 %r4, 0;
+ mov.s32 %r5, %r3;
+$Lt_3_1794:
+ //<loop> Loop body line 58, nesting depth: 1, estimated iterations: unknown
+ .loc 16 63 0
+ ld.global.f32 %f1, [%rd1+0];
+ mul.f32 %f2, %f1, %f1;
+ st.global.f32 [%rd2+0], %f2;
+ add.s32 %r4, %r4, 1;
+ add.u64 %rd2, %rd2, 4;
+ add.u64 %rd1, %rd1, 4;
+ .loc 16 58 0
+ ld.param.s32 %r1, [__cudaparm_mult_n];
+ .loc 16 63 0
+ setp.ne.s32 %p2, %r1, %r4;
+ @%p2 bra $Lt_3_1794;
+$Lt_3_1282:
+ .loc 16 64 0
+ exit;
+$LDWend_mult:
+ } // mult
+
new file mode 100644
@@ -0,0 +1,103 @@
+
+#include <stdio.h>
+#include <cuda.h>
+
+static int _Tnum_timers;
+static CUevent *_Tstart_events, *_Tstop_events;
+static CUstream _Tstream;
+
+void
+init_timers (int ntimers)
+{
+ int i;
+ CUresult r;
+
+ _Tnum_timers = ntimers;
+
+ _Tstart_events = malloc (_Tnum_timers * sizeof (CUevent));
+ _Tstop_events = malloc (_Tnum_timers * sizeof (CUevent));
+
+ r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+ }
+}
+
+void
+fini_timers (void)
+{
+ int i;
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ cuEventDestroy (_Tstart_events[i]);
+ cuEventDestroy (_Tstop_events[i]);
+ }
+
+ cuStreamDestroy (_Tstream);
+
+ free (_Tstart_events);
+ free (_Tstop_events);
+}
+
+void
+start_timer (int timer)
+{
+ CUresult r;
+
+ r = cuEventRecord (_Tstart_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+}
+
+float
+stop_timer (int timer)
+{
+ CUresult r;
+ float etime;
+
+ r = cuEventRecord (_Tstop_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventSynchronize (_Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
+ abort ();
+ }
+
+ return etime;
+}
new file mode 100644
@@ -0,0 +1,280 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c;
+ float *d_a, *d_b, *d_c;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+ c = (float *) malloc(N * sizeof (float));
+
+ d_a = acc_malloc(N * sizeof (float));
+ d_b = acc_malloc(N * sizeof (float));
+ d_c = acc_malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+ acc_map_data(a, d_a, N * sizeof (float));
+ acc_map_data(b, d_b, N * sizeof (float));
+ acc_map_data(c, d_c, N * sizeof (float));
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 6.0)
+ abort();
+
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 2.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort();
+
+ if (b[i] != 7.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc update device(a[0:N])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 9.0)
+ abort();
+
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ }
+
+#pragma acc update device(a[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ }
+
+#pragma acc update device(a[0:N >> 1])
+
+#pragma acc parallel present(a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host(a[0:N], b[0:N])
+
+ for (i = 0; i < (N >> 1); i++)
+ {
+ if (a[i] != 6.0)
+ abort();
+
+ if (b[i] != 6.0)
+ abort();
+ }
+
+ for (i = (N >> 1); i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort();
+
+ if (b[i] != 5.0)
+ abort();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,10 @@
+! { dg-shouldfail "" { *-*-* } { "*" } { "" } }
+
+program main
+ implicit none
+
+ !$acc parallel
+ call abort
+ !$acc end parallel
+
+end program main
new file mode 100644
@@ -0,0 +1,13 @@
+program main
+ implicit none
+
+ integer :: argc
+ argc = command_argument_count ()
+
+ !$acc parallel copyin(argc)
+ if (argc .ne. 0) then
+ call abort
+ end if
+ !$acc end parallel
+
+end program main
new file mode 100644
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test the
+! libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for
+! Fortran.
+
+use openacc
+implicit none
+
+! Host.
+
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+
+
+! Host via offloading fallback mode.
+
+!$acc parallel if(.false.)
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+!$acc end parallel
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$acc parallel
+if (acc_on_device (acc_device_none)) call abort
+if (acc_on_device (acc_device_host)) call abort
+if (.not. acc_on_device (acc_device_not_host)) call abort
+#if ACC_DEVICE_TYPE_nvidia
+if (.not. acc_on_device (acc_device_nvidia)) call abort
+#else
+if (acc_on_device (acc_device_nvidia)) call abort
+#endif
+!$acc end parallel
+
+#endif
+
+end
new file mode 100644
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+ USE OPENACC
+ IMPLICIT NONE
+
+!Host.
+
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+ END
new file mode 100644
@@ -0,0 +1,45 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+!Host.
+
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+ END
new file mode 100644
@@ -0,0 +1,27 @@
+! { dg-do run }
+
+program collapse1
+ integer :: i, j, k, a(1:3, 4:6, 5:7)
+ logical :: l
+ l = .false.
+ a(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse(4 - 1)
+ do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ a(i, j, k) = i + j + k
+ end do
+ end do
+ end do
+ !$acc loop collapse(2) reduction(.or.:l)
+ do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ if (a(i, j, k) .ne. (i + j + k)) l = .true.
+ end do
+ end do
+ end do
+ !$acc end parallel
+ if (l) call abort
+end program collapse1
new file mode 100644
@@ -0,0 +1,25 @@
+! { dg-do run }
+
+program collapse2
+ integer :: i, j, k, a(1:3, 4:6, 5:7)
+ logical :: l
+ l = .false.
+ a(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse(4 - 1)
+ do 164 i = 1, 3
+ do 164 j = 4, 6
+ do 164 k = 5, 7
+ a(i, j, k) = i + j + k
+164 end do
+ !$acc loop collapse(2) reduction(.or.:l)
+firstdo: do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ if (a(i, j, k) .ne. (i + j + k)) l = .true.
+ end do
+ end do
+ end do firstdo
+ !$acc end parallel
+ if (l) call abort
+end program collapse2
new file mode 100644
@@ -0,0 +1,28 @@
+! { dg-do run }
+
+program collapse3
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ !$acc parallel
+ !$acc loop collapse(3)
+ do 115 k=1,3
+dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+ !$acc parallel
+ !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.2)) call abort
+end program collapse3
new file mode 100644
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test1
+program collapse4
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse4
new file mode 100644
@@ -0,0 +1,48 @@
+! { dg-do run }
+
+! collapse3.f90:test2
+program collapse5
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ m = i * 100 + j * 10 + k
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse5
new file mode 100644
@@ -0,0 +1,50 @@
+! { dg-do run }
+
+! collapse3.f90:test3
+program collapse6
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ v7 = 1
+ v8 = 1
+ v9 = 1
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse6
new file mode 100644
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test4
+program collapse7
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = 1, 7
+ do j = -3, 5
+ do k = 12, 19
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse7
new file mode 100644
@@ -0,0 +1,47 @@
+! { dg-do run }
+
+! collapse3.f90:test5
+program collapse8
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse8
new file mode 100644
@@ -0,0 +1,108 @@
+# This whole file adapted from libgomp.fortran/fortran.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+load_gcc_lib gfortran-dg.exp
+
+global shlib_ext
+global ALWAYS_CFLAGS
+
+set shlib_ext [get_shlib_extension]
+set lang_library_path "../libgfortran/.libs"
+set lang_link_flags "-lgfortran"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+set lang_test_file_found 0
+set quadmath_library_path "../libquadmath/.libs"
+
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
+
+if { $blddir != "" } {
+ set lang_source_re {^.*\.[fF](|90|95|03|08)$}
+ set lang_include_flags "-fintrinsic-modules-path=${blddir}"
+ # Look for a static libgfortran first.
+ if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] {
+ set lang_test_file "${lang_library_path}/libgfortran.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libgfortran.
+ } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libgfortran library found, will not execute fortran tests"
+ }
+} elseif [info exists GFORTRAN_UNDER_TEST] {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests"
+}
+
+if { $lang_test_file_found } {
+ # Gather a list of all tests.
+ set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]]
+
+ if { $blddir != "" } {
+ if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"]
+ || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } {
+ lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/"
+ # Allow for spec subsitution.
+ lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/"
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ }
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ # Todo: get list of accelerators from configure options --enable-accelerator.
+ set accels { "nonshm-host" "nvidia" }
+
+ # Run on host (or fallback) accelerator.
+ lappend accels "host"
+
+ # Test OpenACC with available accelerators.
+ foreach accel $accels {
+ set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+ # Todo: Determine shared memory or not using run-time test.
+ switch $accel {
+ host {
+ set acc_mem_shared 1
+ }
+ nonshm-host {
+ set tagopt "-DACC_DEVICE_TYPE_nonshm_host=1"
+ set acc_mem_shared 0
+ }
+ nvidia {
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ # Todo: Verify that this works for both local and remote testing.
+ setenv ACC_DEVICE_TYPE $accel
+
+ # For Fortran we're doing torture testing, as Fortran has far more tests
+ # with arrays etc. that testing just -O0 or -O2 is insufficient, that is
+ # typically not the case for C/C++.
+ gfortran-dg-runtest $tests "$tagopt" ""
+ }
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,13 @@
+use openacc
+
+if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+call acc_set_device_type (acc_device_host)
+if (acc_get_device_type () .ne. acc_device_host) call abort
+call acc_set_device_num (0, acc_device_host)
+if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+call acc_shutdown (acc_device_host)
+
+call acc_init (acc_device_host)
+call acc_shutdown (acc_device_host)
+
+end
new file mode 100644
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,13 @@
+ USE OPENACC
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
new file mode 100644
@@ -0,0 +1,13 @@
+ INCLUDE "openacc_lib.h"
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
new file mode 100644
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+ use openacc
+ use iso_c_binding
+ implicit none
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+ use openacc
+ use iso_c_binding
+ implicit none
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
new file mode 100644
@@ -0,0 +1,97 @@
+program map
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a) copyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc parallel pcopy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a) pcopyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
new file mode 100644
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+ program main
+ implicit none
+ include "openacc_lib.h"
+
+ if (openacc_version .ne. 201306) call abort;
+
+ end program main
new file mode 100644
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ if (openacc_version .ne. 201306) call abort;
+
+end program main
new file mode 100644
@@ -0,0 +1,15 @@
+program test
+ implicit none
+
+ integer a(5)
+
+ a = 10;
+
+ !$acc parallel copy(a(1:5))
+ a(1) = 5
+ a(2) = 5
+ a(3) = 5
+ a(4) = 5
+ a(5) = 5
+ !$acc end parallel
+end program test
new file mode 100644
@@ -0,0 +1,229 @@
+! { dg-do run }
+
+program test
+ implicit none
+ integer, allocatable :: a1(:)
+ integer, allocatable :: b1(:)
+ integer, allocatable :: c1(:)
+ integer, allocatable :: b2(:,:)
+ integer, allocatable :: c3(:,:,:)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(1:5))
+ a1(1) = 1
+ a1(2) = 2
+ a1(3) = 3
+ a1(4) = 4
+ a1(5) = 5
+ !$acc end parallel
+
+ if (a1(1) .ne. 1) call abort
+ if (a1(2) .ne. 2) call abort
+ if (a1(3) .ne. 3) call abort
+ if (a1(4) .ne. 4) call abort
+ if (a1(5) .ne. 5) call abort
+
+ deallocate(a1)
+
+ allocate (a1(0:4))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(0:4))
+ a1(0) = 1
+ a1(1) = 2
+ a1(2) = 3
+ a1(3) = 4
+ a1(4) = 5
+ !$acc end parallel
+
+ if (a1(0) .ne. 1) call abort
+ if (a1(1) .ne. 2) call abort
+ if (a1(2) .ne. 3) call abort
+ if (a1(3) .ne. 4) call abort
+ if (a1(4) .ne. 5) call abort
+
+ deallocate(a1)
+
+ allocate (b2(5,5))
+ if (.not.allocated (b2)) call abort()
+
+ b2 = 11
+
+ !$acc parallel copy(b2(1:5,1:5))
+ b2(1,1) = 1
+ b2(2,2) = 2
+ b2(3,3) = 3
+ b2(4,4) = 4
+ b2(5,5) = 5
+ !$acc end parallel
+
+ if (b2(1,1) .ne. 1) call abort
+ if (b2(2,2) .ne. 2) call abort
+ if (b2(3,3) .ne. 3) call abort
+ if (b2(4,4) .ne. 4) call abort
+ if (b2(5,5) .ne. 5) call abort
+
+ deallocate(b2)
+
+ allocate (b2(0:4,0:4))
+ if (.not.allocated (b2)) call abort()
+
+ b2 = 11
+
+ !$acc parallel copy(b2(0:4,0:4))
+ b2(0,0) = 1
+ b2(1,1) = 2
+ b2(2,2) = 3
+ b2(3,3) = 4
+ b2(4,4) = 5
+ !$acc end parallel
+
+ if (b2(0,0) .ne. 1) call abort
+ if (b2(1,1) .ne. 2) call abort
+ if (b2(2,2) .ne. 3) call abort
+ if (b2(3,3) .ne. 4) call abort
+ if (b2(4,4) .ne. 5) call abort
+
+ deallocate(b2)
+
+ allocate (c3(5,5,5))
+ if (.not.allocated (c3)) call abort()
+
+ c3 = 12
+
+ !$acc parallel copy(c3(1:5,1:5,1:5))
+ c3(1,1,1) = 1
+ c3(2,2,2) = 2
+ c3(3,3,3) = 3
+ c3(4,4,4) = 4
+ c3(5,5,5) = 5
+ !$acc end parallel
+
+ if (c3(1,1,1) .ne. 1) call abort
+ if (c3(2,2,2) .ne. 2) call abort
+ if (c3(3,3,3) .ne. 3) call abort
+ if (c3(4,4,4) .ne. 4) call abort
+ if (c3(5,5,5) .ne. 5) call abort
+
+ deallocate(c3)
+
+ allocate (c3(0:4,0:4,0:4))
+ if (.not.allocated (c3)) call abort()
+
+ c3 = 12
+
+ !$acc parallel copy(c3(0:4,0:4,0:4))
+ c3(0,0,0) = 1
+ c3(1,1,1) = 2
+ c3(2,2,2) = 3
+ c3(3,3,3) = 4
+ c3(4,4,4) = 5
+ !$acc end parallel
+
+ if (c3(0,0,0) .ne. 1) call abort
+ if (c3(1,1,1) .ne. 2) call abort
+ if (c3(2,2,2) .ne. 3) call abort
+ if (c3(3,3,3) .ne. 4) call abort
+ if (c3(4,4,4) .ne. 5) call abort
+
+ deallocate(c3)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ allocate (b1(5))
+ if (.not.allocated (b1)) call abort()
+
+ allocate (c1(5))
+ if (.not.allocated (c1)) call abort()
+
+ a1 = 10
+ b1 = 3
+ c1 = 7
+
+ !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5))
+ c1(1) = a1(1)
+ c1(2) = a1(2)
+ c1(3) = a1(3)
+ c1(4) = a1(4)
+ c1(5) = a1(5)
+
+ b1(1) = c1(1)
+ b1(2) = c1(2)
+ b1(3) = c1(3)
+ b1(4) = c1(4)
+ b1(5) = c1(5)
+ !$acc end parallel
+
+ if (b1(1) .ne. 10) call abort
+ if (b1(2) .ne. 10) call abort
+ if (b1(3) .ne. 10) call abort
+ if (b1(4) .ne. 10) call abort
+ if (b1(5) .ne. 10) call abort
+
+ deallocate(a1)
+ deallocate(b1)
+ deallocate(c1)
+
+ allocate (a1(0:4))
+ if (.not.allocated (a1)) call abort()
+
+ allocate (b1(0:4))
+ if (.not.allocated (b1)) call abort()
+
+ allocate (c1(0:4))
+ if (.not.allocated (c1)) call abort()
+
+ a1 = 10
+ b1 = 3
+ c1 = 7
+
+ !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4))
+ c1(0) = a1(0)
+ c1(1) = a1(1)
+ c1(2) = a1(2)
+ c1(3) = a1(3)
+ c1(4) = a1(4)
+
+ b1(0) = c1(0)
+ b1(1) = c1(1)
+ b1(2) = c1(2)
+ b1(3) = c1(3)
+ b1(4) = c1(4)
+ !$acc end parallel
+
+ if (b1(0) .ne. 10) call abort
+ if (b1(1) .ne. 10) call abort
+ if (b1(2) .ne. 10) call abort
+ if (b1(3) .ne. 10) call abort
+ if (b1(4) .ne. 10) call abort
+
+ deallocate(a1)
+ deallocate(b1)
+ deallocate(c1)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(2:3))
+ a1(2) = 2
+ a1(3) = 3
+ !$acc end parallel
+
+ if (a1(1) .ne. 10) call abort
+ if (a1(2) .ne. 2) call abort
+ if (a1(3) .ne. 3) call abort
+ if (a1(4) .ne. 10) call abort
+ if (a1(5) .ne. 10) call abort
+
+ deallocate(a1)
+
+end program test
new file mode 100644
@@ -0,0 +1,228 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! Integer reductions
+
+program reduction_1
+ implicit none
+
+ integer, parameter :: n = 1000, vl = 32
+ integer :: i, vresult, result
+ logical :: lresult, lvresult
+ integer, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'iand' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(iand:result)
+ do i = 1, n
+ result = iand (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = iand (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'ior' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(ior:result)
+ do i = 1, n
+ result = ior (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = ior (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'ieor' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(ieor:result)
+ do i = 1, n
+ result = ieor (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = ieor (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_1
new file mode 100644
@@ -0,0 +1,173 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! real reductions
+
+program reduction_2
+ implicit none
+
+ integer, parameter :: n = 1000, vl = 32
+ integer :: i
+ real, parameter :: e = .001
+ real :: vresult, result
+ logical :: lresult, lvresult
+ real, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (abs (result - vresult) .ge. e) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_2
new file mode 100644
@@ -0,0 +1,173 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! double precision reductions
+
+program reduction_3
+ implicit none
+
+ integer, parameter :: n = 1000, vl = 32
+ integer :: i
+ double precision, parameter :: e = .001
+ double precision :: vresult, result
+ logical :: lresult, lvresult
+ double precision, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (abs (result - vresult) .ge. e) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_3
new file mode 100644
@@ -0,0 +1,57 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! complex reductions
+
+program reduction_4
+ implicit none
+
+ integer, parameter :: n = 1000, vl = 32
+ integer :: i
+ complex :: vresult, result
+ complex, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (result .ne. vresult) call abort
+
+ result = 1
+ vresult = 1
+
+! ! '*' reductions
+!
+! !$acc parallel vector_length(vl)
+! !$acc loop reduction(*:result)
+! do i = 1, n
+! result = result * array(i)
+! end do
+! !$acc end parallel
+!
+! ! Verify the results
+! do i = 1, n
+! vresult = vresult * array(i)
+! end do
+!
+! if (result.ne.vresult) call abort
+end program reduction_4
new file mode 100644
@@ -0,0 +1,35 @@
+! { dg-do run }
+! TODO:
+! <http://sourcery.sje.mentorg.com/pipermail/gnu-nvidia-openacc/2014-July/001162.html>.
+! { dg-xfail-run-if "" { *-*-* } { "-DACC_DEVICE_TYPE_host=1" } { "" } }
+
+! subroutine reduction
+
+program reduction
+ integer, parameter :: n = 40, c = 10
+ integer :: i, vsum, sum
+
+ call redsub (sum, n, c)
+
+ vsum = 0
+
+ ! Verify the results
+ do i = 1, n
+ vsum = vsum + c
+ end do
+
+ if (sum.ne.vsum) call abort ()
+end program reduction
+
+subroutine redsub(sum, n, c)
+ integer :: sum, n, c
+
+ sum = 0
+
+ !$acc parallel vector_length(n) copyin (n, c)
+ !$acc loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + c
+ end do
+ !$acc end parallel
+end subroutine redsub
new file mode 100644
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ interface
+ recursive function fact (x)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: fact
+ end function fact
+ end interface
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
+recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+end function fact
new file mode 100644
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ module m1
+ contains
+ recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+ end function fact
+ end module m1
+ use m1
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
new file mode 100644
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ integer, external :: fact
+ !$acc routine (fact)
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
+recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+end function fact
new file mode 100644
@@ -0,0 +1,23 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ call incr(a(i))
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. (i + 1)) call abort
+ end do
+end
+subroutine incr (x)
+ !$acc routine
+ integer, intent(inout) :: x
+ x = x + 1
+end subroutine incr
new file mode 100644
@@ -0,0 +1,97 @@
+program subarrays
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(1:n)) copyin (b(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc parallel pcopy (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
new file mode 100644
@@ -0,0 +1,100 @@
+program subarrays
+ integer, parameter :: n = 20, c = 10, low = 5, high = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = low, high
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = low, high
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(low:high)) copyin (b(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = b(i)
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ a(:) = 0
+
+ !$acc parallel pcopy (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(low:high)) &
+ !$acc & pcopyin (b(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = b(i)
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check