diff mbox

[gomp4,2/3] OpenACC 2.0 support for libgomp - new tests

Message ID 20141014171142.14aa7de4@octopus
State New
Headers show

Commit Message

Julian Brown Oct. 14, 2014, 4:11 p.m. UTC
This is an updated version of the patch:

https://gcc.gnu.org/ml/gcc-patches/2014-09/msg02025.html

but against gomp4 branch instead of mainline. Some tests have been
updated a little since the last patch.

OK to apply (to the gomp4 branch)?

Thanks,

Julian

ChangeLog

xxxx-xx-xx  James Norris  <jnorris@codesourcery.com>
	    Thomas Schwinge  <thomas@codesourcery.com>
	    Tom de Vries  <tom@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

    libgomp/
    * testsuite/Makefile.in: Regenerated.
    * testsuite/lib/libgomp.exp
    (check_effective_target_openacc_nvidia_accel_present)
    (check_effective_target_openacc_nvidia_accel_selected): New
    functions.
    * testsuite/libgomp.oacc-fortran/fortran.exp: New exp file.
    * testsuite/libgomp.oacc-fortran/*.f: New tests.
    * testsuite/libgomp.oacc-fortran/*.f90: Likewise.
    * testsuite/libgomp.oacc-c/c.exp: New exp file.
    * testsuite/libgomp.oacc-c/subr.ptx: New file.
    * testsuite/libgomp.oacc-c/subr.cu: New file.
    * testsuite/libgomp.oacc-c/timer.h: New file.
    * testsuite/libgomp.oacc-c/*.c: New tests.
    * testsuite/libgomp.oacc-c++/c++.exp: New exp file.
diff mbox

Patch

From 18c107c58d42314128e485bb79892672a8feaa6b Mon Sep 17 00:00:00 2001
From: Julian Brown <julian@codesourcery.com>
Date: Mon, 13 Oct 2014 04:40:51 -0700
Subject: [PATCH 1/3] Tests for libgomp OpenACC support.

---
 libgomp/testsuite/Makefile.in                      |    4 +
 libgomp/testsuite/lib/libgomp.exp                  |   30 +
 libgomp/testsuite/libgomp.oacc-c++/c++.exp         |   37 +-
 libgomp/testsuite/libgomp.oacc-c/abort-2.c         |   17 +
 libgomp/testsuite/libgomp.oacc-c/abort.c           |   17 +
 libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c |   25 +-
 libgomp/testsuite/libgomp.oacc-c/c.exp             |   50 +-
 libgomp/testsuite/libgomp.oacc-c/clauses-1.c       |  623 ++++++++++++++++++
 libgomp/testsuite/libgomp.oacc-c/clauses-2.c       |   67 ++
 libgomp/testsuite/libgomp.oacc-c/context-1.c       |  213 ++++++
 libgomp/testsuite/libgomp.oacc-c/context-2.c       |  223 +++++++
 libgomp/testsuite/libgomp.oacc-c/context-3.c       |  200 ++++++
 libgomp/testsuite/libgomp.oacc-c/context-4.c       |  213 ++++++
 libgomp/testsuite/libgomp.oacc-c/data-1.c          |  112 ++--
 libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c     |   32 +
 libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c   |    3 +-
 libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c  |    3 +-
 libgomp/testsuite/libgomp.oacc-c/if-1.c            |  547 ++++++++++++++++
 libgomp/testsuite/libgomp.oacc-c/kernels-1.c       |   22 +-
 libgomp/testsuite/libgomp.oacc-c/lib-1.c           |   19 +-
 libgomp/testsuite/libgomp.oacc-c/lib-10.c          |   58 ++
 libgomp/testsuite/libgomp.oacc-c/lib-11.c          |   22 +
 libgomp/testsuite/libgomp.oacc-c/lib-12.c          |   37 ++
 libgomp/testsuite/libgomp.oacc-c/lib-13.c          |   60 ++
 libgomp/testsuite/libgomp.oacc-c/lib-14.c          |   61 ++
 libgomp/testsuite/libgomp.oacc-c/lib-15.c          |   33 +
 libgomp/testsuite/libgomp.oacc-c/lib-16.c          |   29 +
 libgomp/testsuite/libgomp.oacc-c/lib-17.c          |   31 +
 libgomp/testsuite/libgomp.oacc-c/lib-18.c          |   34 +
 libgomp/testsuite/libgomp.oacc-c/lib-19.c          |   60 ++
 libgomp/testsuite/libgomp.oacc-c/lib-2.c           |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-20.c          |   29 +
 libgomp/testsuite/libgomp.oacc-c/lib-21.c          |   29 +
 libgomp/testsuite/libgomp.oacc-c/lib-22.c          |   29 +
 libgomp/testsuite/libgomp.oacc-c/lib-23.c          |   39 ++
 libgomp/testsuite/libgomp.oacc-c/lib-24.c          |   55 ++
 libgomp/testsuite/libgomp.oacc-c/lib-25.c          |   30 +
 libgomp/testsuite/libgomp.oacc-c/lib-26.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-27.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-28.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-29.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-3.c           |   15 +
 libgomp/testsuite/libgomp.oacc-c/lib-30.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-31.c          |   27 +
 libgomp/testsuite/libgomp.oacc-c/lib-32.c          |   38 ++
 libgomp/testsuite/libgomp.oacc-c/lib-33.c          |   31 +
 libgomp/testsuite/libgomp.oacc-c/lib-34.c          |   33 +
 libgomp/testsuite/libgomp.oacc-c/lib-35.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-36.c          |   26 +
 libgomp/testsuite/libgomp.oacc-c/lib-37.c          |   40 ++
 libgomp/testsuite/libgomp.oacc-c/lib-38.c          |   67 ++
 libgomp/testsuite/libgomp.oacc-c/lib-39.c          |   41 ++
 libgomp/testsuite/libgomp.oacc-c/lib-4.c           |   13 +
 libgomp/testsuite/libgomp.oacc-c/lib-40.c          |   42 ++
 libgomp/testsuite/libgomp.oacc-c/lib-41.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-42.c          |   35 +
 libgomp/testsuite/libgomp.oacc-c/lib-43.c          |   45 ++
 libgomp/testsuite/libgomp.oacc-c/lib-44.c          |   45 ++
 libgomp/testsuite/libgomp.oacc-c/lib-45.c          |   50 ++
 libgomp/testsuite/libgomp.oacc-c/lib-46.c          |   42 ++
 libgomp/testsuite/libgomp.oacc-c/lib-47.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-48.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-49.c          |   48 ++
 libgomp/testsuite/libgomp.oacc-c/lib-5.c           |   40 ++
 libgomp/testsuite/libgomp.oacc-c/lib-50.c          |   30 +
 libgomp/testsuite/libgomp.oacc-c/lib-51.c          |   41 ++
 libgomp/testsuite/libgomp.oacc-c/lib-52.c          |   28 +
 libgomp/testsuite/libgomp.oacc-c/lib-53.c          |   28 +
 libgomp/testsuite/libgomp.oacc-c/lib-54.c          |   28 +
 libgomp/testsuite/libgomp.oacc-c/lib-55.c          |   48 ++
 libgomp/testsuite/libgomp.oacc-c/lib-56.c          |   33 +
 libgomp/testsuite/libgomp.oacc-c/lib-57.c          |   28 +
 libgomp/testsuite/libgomp.oacc-c/lib-58.c          |   28 +
 libgomp/testsuite/libgomp.oacc-c/lib-59.c          |   55 ++
 libgomp/testsuite/libgomp.oacc-c/lib-6.c           |   39 ++
 libgomp/testsuite/libgomp.oacc-c/lib-60.c          |   54 ++
 libgomp/testsuite/libgomp.oacc-c/lib-61.c          |   70 ++
 libgomp/testsuite/libgomp.oacc-c/lib-62.c          |   49 ++
 libgomp/testsuite/libgomp.oacc-c/lib-63.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-64.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-65.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-66.c          |   47 ++
 libgomp/testsuite/libgomp.oacc-c/lib-67.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-68.c          |   43 ++
 libgomp/testsuite/libgomp.oacc-c/lib-69.c          |  124 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-7.c           |   18 +
 libgomp/testsuite/libgomp.oacc-c/lib-70.c          |  136 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-71.c          |  119 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-72.c          |  121 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-73.c          |  134 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-74.c          |  139 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-75.c          |  141 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-76.c          |  147 +++++
 libgomp/testsuite/libgomp.oacc-c/lib-77.c          |  135 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-78.c          |  140 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-79.c          |  167 +++++
 libgomp/testsuite/libgomp.oacc-c/lib-80.c          |  132 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-81.c          |  211 ++++++
 libgomp/testsuite/libgomp.oacc-c/lib-82.c          |  144 +++++
 libgomp/testsuite/libgomp.oacc-c/lib-83.c          |   58 ++
 libgomp/testsuite/libgomp.oacc-c/lib-84.c          |   66 ++
 libgomp/testsuite/libgomp.oacc-c/lib-85.c          |   52 ++
 libgomp/testsuite/libgomp.oacc-c/lib-86.c          |   42 ++
 libgomp/testsuite/libgomp.oacc-c/lib-87.c          |   42 ++
 libgomp/testsuite/libgomp.oacc-c/lib-88.c          |  111 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-89.c          |  118 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-9.c           |   70 ++
 libgomp/testsuite/libgomp.oacc-c/lib-90.c          |  137 ++++
 libgomp/testsuite/libgomp.oacc-c/lib-91.c          |   84 +++
 libgomp/testsuite/libgomp.oacc-c/lib-92.c          |  112 ++++
 libgomp/testsuite/libgomp.oacc-c/nested-1.c        |  680 ++++++++++++++++++++
 libgomp/testsuite/libgomp.oacc-c/nested-2.c        |   35 +
 libgomp/testsuite/libgomp.oacc-c/offset-1.c        |   97 +++
 libgomp/testsuite/libgomp.oacc-c/parallel-1.c      |   76 ++-
 libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c |   35 +
 libgomp/testsuite/libgomp.oacc-c/present-1.c       |   48 ++
 libgomp/testsuite/libgomp.oacc-c/present-2.c       |   48 ++
 libgomp/testsuite/libgomp.oacc-c/subr.cu           |   64 ++
 libgomp/testsuite/libgomp.oacc-c/subr.ptx          |  148 +++++
 libgomp/testsuite/libgomp.oacc-c/timer.h           |  103 +++
 libgomp/testsuite/libgomp.oacc-c/update-1.c        |  280 ++++++++
 libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 |   10 +
 libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 |   13 +
 .../libgomp.oacc-fortran/acc_on_device-1-1.f90     |   17 +-
 .../libgomp.oacc-fortran/acc_on_device-1-2.f       |   17 +-
 .../libgomp.oacc-fortran/acc_on_device-1-3.f       |   17 +-
 libgomp/testsuite/libgomp.oacc-fortran/fortran.exp |   42 +-
 libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90   |   10 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90  |   82 +++
 libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90  |   82 +++
 libgomp/testsuite/libgomp.oacc-fortran/lib-2.f     |   10 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-3.f     |   10 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90   |   35 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90   |   31 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90   |   35 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90   |   31 +
 libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90   |   83 +++
 libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90   |   83 +++
 libgomp/testsuite/libgomp.oacc-fortran/map-1.f90   |   97 +++
 .../libgomp.oacc-fortran/pointer-align-1.f90       |   21 +
 libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90  |  229 +++++++
 .../testsuite/libgomp.oacc-fortran/subarrays-1.f90 |   97 +++
 .../testsuite/libgomp.oacc-fortran/subarrays-2.f90 |  100 +++
 143 files changed, 10476 insertions(+), 93 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/abort-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/abort.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/clauses-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/clauses-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-4.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/if-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-10.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-11.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-12.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-13.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-14.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-15.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-16.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-17.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-18.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-19.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-20.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-21.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-22.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-23.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-24.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-25.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-26.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-27.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-28.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-29.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-30.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-31.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-32.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-33.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-34.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-35.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-36.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-37.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-38.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-39.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-4.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-40.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-41.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-42.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-43.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-44.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-45.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-46.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-47.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-48.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-49.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-50.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-51.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-52.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-53.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-54.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-55.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-56.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-57.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-58.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-59.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-6.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-60.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-61.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-62.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-63.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-64.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-65.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-66.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-67.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-68.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-69.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-7.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-70.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-71.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-72.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-73.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-74.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-75.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-76.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-77.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-78.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-79.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-80.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-81.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-82.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-83.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-84.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-85.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-86.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-87.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-88.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-89.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-9.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-90.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-91.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-92.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/offset-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/present-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/present-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/subr.cu
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/subr.ptx
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/timer.h
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/update-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90

diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in
index 5273eaa..77b365e 100644
--- a/libgomp/testsuite/Makefile.in
+++ b/libgomp/testsuite/Makefile.in
@@ -129,6 +129,10 @@  PACKAGE_URL = @PACKAGE_URL@
 PACKAGE_VERSION = @PACKAGE_VERSION@
 PATH_SEPARATOR = @PATH_SEPARATOR@
 PERL = @PERL@
+PLUGIN_NVPTX = @PLUGIN_NVPTX@
+PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
+PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
+PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@
 RANLIB = @RANLIB@
 SECTION_LDFLAGS = @SECTION_LDFLAGS@
 SED = @SED@
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 094e5ed..78a14cb 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -139,6 +139,8 @@  proc libgomp_init { args } {
         lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
         lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
         lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
+	# The top-level include directory, for libgomp-constants.h.
+	lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
     }
     lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
 
@@ -239,3 +241,31 @@  proc libgomp_option_proc { option } {
 	return 0
     }
 }
+
+# Return 1 if at least one nvidia board is present.
+
+proc check_effective_target_openacc_nvidia_accel_present { } {
+    return [check_runtime openacc_nvidia_accel_present {
+	#include <openacc.h>
+	int main () {
+	return !(acc_get_num_devices (acc_device_nvidia) > 0);
+	}
+    } "" ]
+}
+
+# Return 1 if at least one nvidia board is present, and the nvidia device type
+# is selected by default by means of setting the environment variable
+# ACC_DEVICE_TYPE.
+
+proc check_effective_target_openacc_nvidia_accel_selected { } {
+    if { ![check_effective_target_openacc_nvidia_accel_present] } {
+	return 0;
+    }
+    if { ![info exists ::env(ACC_DEVICE_TYPE)] } {
+	return 0;
+    }
+    if { $::env(ACC_DEVICE_TYPE) == "nvidia" } {
+        return 1;
+    }
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
index ae8a1d5..164d7d2 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -17,7 +17,8 @@  if [info exists lang_include_flags] then {
 dg-init
 
 # Turn on OpenACC.
-lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
 
 set blddir [lookfor_file [get_multilibs] libgomp]
 
@@ -61,8 +62,38 @@  if { $lang_test_file_found } {
 	set libstdcxx_includes ""
     }
 
-    # Main loop.
-    dg-runtest $tests "" $libstdcxx_includes
+    # Todo: get list of accelerators from configure options --enable-accelerator.
+    set accels { "nvidia" "host_nonshm" }
+
+    # Run on host (or fallback) accelerator.
+    lappend accels "host"
+
+    # Test OpenACC with available accelerators.
+    foreach accel $accels {
+	set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+	# Todo: Determine shared memory or not using run-time test.
+	switch $accel {
+	    host {
+		set acc_mem_shared 1
+	    }
+	    host_nonshm {
+		set acc_mem_shared 0
+	    }
+	    nvidia {
+		set acc_mem_shared 0
+	    }
+	    default {
+		set acc_mem_shared 0
+	    }
+	}
+	set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+	# Todo: Verify that this works for both local and remote testing.
+	setenv ACC_DEVICE_TYPE $accel
+
+	dg-runtest $tests "$tagopt" $libstdcxx_includes
+    }
 }
 
 # All done.
diff --git a/libgomp/testsuite/libgomp.oacc-c/abort-2.c b/libgomp/testsuite/libgomp.oacc-c/abort-2.c
new file mode 100644
index 0000000..debb81e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/abort-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+
+#pragma acc parallel
+  {
+    if (argc != 1)
+      abort ();
+  }
+
+  return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c/abort.c b/libgomp/testsuite/libgomp.oacc-c/abort.c
new file mode 100644
index 0000000..f88b9e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/abort.c
@@ -0,0 +1,17 @@ 
+/* { dg-do run } */
+/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+
+#pragma acc parallel
+  {
+    abort ();
+  }
+
+  return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c
index f216587..81ea476 100644
--- a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c
@@ -1,7 +1,6 @@ 
 /* Disable the acc_on_device builtin; we want to test the libgomp library
    function.  */
-/* TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness.  */
-/* { dg-additional-options "-fno-builtin-acc_on_device -DACC_DEVICE_TYPE_host" } */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
 
 #include <stdlib.h>
 #include <openacc.h>
@@ -16,8 +15,12 @@  main (int argc, char *argv[])
       abort ();
     if (!acc_on_device (acc_device_host))
       abort ();
+    if (acc_on_device (acc_device_host_nonshm))
+      abort ();
     if (acc_on_device (acc_device_not_host))
       abort ();
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
   }
 
 
@@ -29,8 +32,12 @@  main (int argc, char *argv[])
       abort ();
     if (!acc_on_device (acc_device_host))
       abort ();
+    if (acc_on_device (acc_device_host_nonshm))
+      abort ();
     if (acc_on_device (acc_device_not_host))
       abort ();
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
   }
 
 
@@ -44,8 +51,22 @@  main (int argc, char *argv[])
       abort ();
     if (acc_on_device (acc_device_host))
       abort ();
+#if ACC_DEVICE_TYPE_host_nonshm
+    if (!acc_on_device (acc_device_host_nonshm))
+      abort ();
+#else
+    if (acc_on_device (acc_device_host_nonshm))
+      abort ();
+#endif
     if (!acc_on_device (acc_device_not_host))
       abort ();
+#if ACC_DEVICE_TYPE_nvidia
+    if (!acc_on_device (acc_device_nvidia))
+      abort ();
+#else
+    if (acc_on_device (acc_device_nvidia))
+      abort ();
+#endif
   }
 
 #endif
diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp
index 13a478e..553c225 100644
--- a/libgomp/testsuite/libgomp.oacc-c/c.exp
+++ b/libgomp/testsuite/libgomp.oacc-c/c.exp
@@ -23,17 +23,61 @@  if ![info exists DEFAULT_CFLAGS] then {
 dg-init
 
 # Turn on OpenACC.
-lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
 
 # Gather a list of all tests.
 set tests [lsort [find $srcdir/$subdir *.c]]
 
 set ld_library_path $always_ld_library_path
 append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+append ld_library_path ":/opt/nvidia/cuda-5.5/lib64"
 set_ld_library_path_env_vars
 
-# Main loop.
-dg-runtest $tests "" $DEFAULT_CFLAGS
+# Todo: get list of accelerators from configure options --enable-accelerator.
+set accels { "nvidia" "host_nonshm" }
+
+# Run on host (or fallback) accelerator.
+lappend accels "host"
+
+# Test OpenACC with available accelerators.
+set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+foreach accel $accels {
+    set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+    set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+    # Todo: Determine shared memory or not using run-time test.
+    switch $accel {
+	host {
+	    set acc_mem_shared 1
+	}
+	host_nonshm {
+	    set acc_mem_shared 0
+	}
+	nvidia {
+	    # Copy ptx file (TEMPORARY)
+	    remote_download host $srcdir/libgomp.oacc-c/subr.ptx
+
+	    # Where cuda.h lives
+	    # Todo: get that from configure option --with-cuda-driver.
+	    lappend ALWAYS_CFLAGS "additional_flags=-I/opt/nvidia/cuda-5.5/include"
+	    lappend ALWAYS_CFLAGS "additional_flags=-L/opt/nvidia/cuda-5.5/lib64"
+
+	    # Where timer.h lives
+	    lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}"
+	    set acc_mem_shared 0
+	}
+	default {
+	    set acc_mem_shared 0
+	}
+    }
+    set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+    # Todo: Verify that this works for both local and remote testing.
+    setenv ACC_DEVICE_TYPE $accel
+
+    dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS
+}
 
 # All done.
 dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c
new file mode 100644
index 0000000..51c0cf5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c
@@ -0,0 +1,623 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    c = (float *) malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present_or_copyin (a[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 2.0;
+    }
+
+    d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 4.0;
+    }
+
+#pragma acc parallel copy (a[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = a[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = b[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+    d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            a[ii] = a[ii] + 1;
+            b[ii] = b[ii] + 2;
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 7.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_deviceptr (&a[0]);
+    acc_unmap_data (&a[0]);
+    acc_free (d);
+
+    d = (float *) acc_deviceptr (&b[0]);
+    acc_unmap_data (&b[0]);
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) create (c[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present (c[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    acc_copyin (a, N * sizeof (float));
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (b, d, N * sizeof (float));
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel present (a[0:N]) present (c[0:N]) present (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    acc_copyout (b, N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    d = (float *) acc_deviceptr (a);
+
+    acc_unmap_data (a);
+
+    acc_free (d);
+
+    d = (float *) acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 6.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            d[ii] = a[ii];
+            b[ii] = d[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel pcopyin (a[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) pcopyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc parallel copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c
new file mode 100644
index 0000000..8dc45cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c
@@ -0,0 +1,67 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    c = (float *) malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N+1]) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            c[ii] = a[ii];
+            b[ii] = c[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    return 0;
+}
+/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-1.c b/libgomp/testsuite/libgomp.oacc-c/context-1.c
new file mode 100644
index 0000000..dabc706
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-1.c
@@ -0,0 +1,213 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent (&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf (stderr, "new context established\n");
+        exit (EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main (int argc, char **argv)
+{
+    cublasStatus_t s;
+    cudaError_t e;
+    cublasHandle_t h;
+    CUcontext pctx, ctx;
+    CUresult r;
+    int dev;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 1 - cuBLAS creates, OpenACC shares.  */
+
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasCreate failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    e = cudaGetDevice (&dev);
+    if (e != cudaSuccess)
+    {
+        fprintf (stderr, "cudaGetDevice failed: %d\n", e);
+        exit (EXIT_FAILURE);
+    }
+
+    acc_set_device_num (dev, acc_device_nvidia);
+
+    h_X = (float *) malloc (N * sizeof (float));
+    if (!h_X)
+    {
+        fprintf (stderr, "malloc failed: for h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *) malloc (N * sizeof (float));
+    if (!h_Y1)
+    {
+        fprintf (stderr, "malloc failed: for h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *) malloc (N * sizeof (float));
+    if (!h_Y2)
+    {
+        fprintf (stderr, "malloc failed: for h_Y2\n");
+        exit (EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand () / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+    }
+
+    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf (stderr, "copyin error h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf (stderr, "copyin error h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check (pctx);
+
+    saxpy (N, alpha, h_X, h_Y2);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float) sqrt ((double) error_norm);
+    ref_norm = (float) sqrt ((double) ref_norm);
+
+    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf (stderr, "math error\n");
+        exit (EXIT_FAILURE);
+    }
+
+    free (h_X);
+    free (h_Y1);
+    free (h_Y2);
+
+    acc_free (d_X);
+    acc_free (d_Y);
+
+    context_check (pctx);
+
+    s = cublasDestroy (h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasDestroy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&ctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (!ctx)
+    {
+        fprintf (stderr, "Expected context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    if (pctx != ctx)
+    {
+        fprintf (stderr, "Unexpected new context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-2.c b/libgomp/testsuite/libgomp.oacc-c/context-2.c
new file mode 100644
index 0000000..16464d5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-2.c
@@ -0,0 +1,223 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent (&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf (stderr, "new context established\n");
+        exit (EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main (int argc, char **argv)
+{
+    cublasStatus_t s;
+    cudaError_t e;
+    cublasHandle_t h;
+    CUcontext pctx, ctx;
+    CUresult r;
+    int dev;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 2 - cuBLAS creates, OpenACC shares.  */
+
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasCreate failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    e = cudaGetDevice (&dev);
+    if (e != cudaSuccess)
+    {
+        fprintf (stderr, "cudaGetDevice failed: %d\n", e);
+        exit (EXIT_FAILURE);
+    }
+
+    acc_set_device_num (dev, acc_device_nvidia);
+
+    h_X = (float *) malloc (N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *) malloc (N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *) malloc (N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y2\n");
+        exit (EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand () / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+    }
+
+    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf (stderr, "copyin error h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf (stderr, "copyin error h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check (pctx);
+
+#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha)
+    {
+        int i;
+
+        for (i = 0; i < N; i++)
+        {
+            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+        }
+    }
+
+    context_check (pctx);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float) sqrt ((double) error_norm);
+    ref_norm = (float) sqrt ((double) ref_norm);
+
+    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf (stderr, "math error\n");
+        exit (EXIT_FAILURE);
+    }
+
+    free (h_X);
+    free (h_Y1);
+    free (h_Y2);
+
+    acc_free (d_X);
+    acc_free (d_Y);
+
+    context_check (pctx);
+
+    s = cublasDestroy (h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasDestroy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&ctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (!ctx)
+    {
+        fprintf (stderr, "Expected context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    if (pctx != ctx)
+    {
+        fprintf (stderr, "Unexpected new context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-3.c b/libgomp/testsuite/libgomp.oacc-c/context-3.c
new file mode 100644
index 0000000..ccd276c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-3.c
@@ -0,0 +1,200 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent (&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf (stderr, "new context established\n");
+        exit (EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main (int argc, char **argv)
+{
+    cublasStatus_t s;
+    cublasHandle_t h;
+    CUcontext pctx;
+    CUresult r;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 3 - OpenACC creates, cuBLAS shares.  */
+
+    acc_set_device_num (0, acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    h_X = (float *) malloc (N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *) malloc (N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *) malloc (N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y2\n");
+        exit (EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand () / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+    }
+
+    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf (stderr, "copyin error h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf (stderr, "copyin error h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasCreate failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check (pctx);
+
+    saxpy (N, alpha, h_X, h_Y2);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float) sqrt ((double) error_norm);
+    ref_norm = (float) sqrt ((double) ref_norm);
+
+    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf (stderr, "math error\n");
+        exit (EXIT_FAILURE);
+    }
+
+    free (h_X);
+    free (h_Y1);
+    free (h_Y2);
+
+    acc_free (d_X);
+    acc_free (d_Y);
+
+    context_check (pctx);
+
+    s = cublasDestroy (h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasDestroy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (pctx)
+    {
+        fprintf (stderr, "Unexpected context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/context-4.c b/libgomp/testsuite/libgomp.oacc-c/context-4.c
new file mode 100644
index 0000000..71365e8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/context-4.c
@@ -0,0 +1,213 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        y[i] = a * x[i] + y[i];
+    }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+    CUcontext ctx2, ctx3;
+    CUresult r;
+
+    r = cuCtxGetCurrent (&ctx2);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (ctx1 != ctx2)
+    {
+        fprintf (stderr, "new context established\n");
+        exit (EXIT_FAILURE);
+    }
+
+    ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+    if (ctx1 != ctx3)
+    {
+        fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return;
+}
+
+int
+main (int argc, char **argv)
+{
+    cublasStatus_t s;
+    cublasHandle_t h;
+    CUcontext pctx;
+    CUresult r;
+    int i;
+    const int N = 256;
+    float *h_X, *h_Y1, *h_Y2;
+    float *d_X,*d_Y;
+    float alpha = 2.0f;
+    float error_norm;
+    float ref_norm;
+
+    /* Test 4 - OpenACC creates, cuBLAS shares.  */
+
+    acc_set_device_num (0, acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    h_X = (float *) malloc (N * sizeof (float));
+    if (h_X == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_X\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y1 = (float *) malloc (N * sizeof (float));
+    if (h_Y1 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    h_Y2 = (float *) malloc (N * sizeof (float));
+    if (h_Y2 == 0)
+    {
+        fprintf (stderr, "malloc failed: for h_Y2\n");
+        exit (EXIT_FAILURE);
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        h_X[i] = rand () / (float) RAND_MAX;
+        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+    }
+
+#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
+    {
+        int i;
+
+        for (i = 0; i < N; i++)
+        {
+            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+        }
+    }
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+    if (d_X == NULL)
+    {
+        fprintf (stderr, "copyin error h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+    if (d_Y == NULL)
+    {
+        fprintf (stderr, "copyin error h_Y1\n");
+        exit (EXIT_FAILURE);
+    }
+
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasCreate failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+    context_check (pctx);
+
+    error_norm = 0;
+    ref_norm = 0;
+
+    for (i = 0; i < N; ++i)
+    {
+        float diff;
+
+        diff = h_Y1[i] - h_Y2[i];
+        error_norm += diff * diff;
+        ref_norm += h_Y2[i] * h_Y2[i];
+    }
+
+    error_norm = (float) sqrt ((double) error_norm);
+    ref_norm = (float) sqrt ((double) ref_norm);
+
+    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+    {
+        fprintf (stderr, "math error\n");
+        exit (EXIT_FAILURE);
+    }
+
+    free (h_X);
+    free (h_Y1);
+    free (h_Y2);
+
+    acc_free (d_X);
+    acc_free (d_Y);
+
+    context_check (pctx);
+
+    s = cublasDestroy (h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+    {
+        fprintf (stderr, "cublasDestroy failed: %d\n", s);
+        exit (EXIT_FAILURE);
+    }
+
+    context_check (pctx);
+
+    acc_shutdown (acc_device_nvidia);
+
+    r = cuCtxGetCurrent (&pctx);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+        exit (EXIT_FAILURE);
+    }
+
+    if (pctx)
+    {
+        fprintf (stderr, "Unexpected context\n");
+        exit (EXIT_FAILURE);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/data-1.c b/libgomp/testsuite/libgomp.oacc-c/data-1.c
index 8f9a17a..e7564cc 100644
--- a/libgomp/testsuite/libgomp.oacc-c/data-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c/data-1.c
@@ -1,19 +1,30 @@ 
 /* { dg-do run } */
 
-extern void abort ();
+#include <stdlib.h>
+#include <openacc.h>
 
 int i;
 
+int
+is_mapped (void *p, size_t n)
+{
+#if ACC_MEM_SHARED
+  return 1;
+#else
+  return acc_is_present (p, n);
+#endif
+}
+
 int main(void)
 {
   int j;
 
-#if 0
   i = -1;
   j = -2;
 #pragma acc data copyin (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     if (i != -1 || j != -2)
       abort ();
     i = 2;
@@ -28,37 +39,30 @@  int main(void)
   j = -2;
 #pragma acc data copyout (i, j)
   {
-    // TODO: check that variables have been mapped.
-    if (i != -1 || j != -2)
-      abort ();
-    i = 2;
-    j = 1;
-    if (i != 2 || j != 1)
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
       abort ();
-  }
-  if (i != -1 || j != -2)
-    abort ();
-
-  i = -1;
-  j = -2;
-#pragma acc data copy (i, j)
-  {
-    // TODO: check that variables have been mapped.
     if (i != -1 || j != -2)
       abort ();
     i = 2;
     j = 1;
     if (i != 2 || j != 1)
       abort ();
+
+#pragma acc parallel present (i, j)
+    {
+      i = 4;
+      j = 2;
+    }
   }
-  if (i != -1 || j != -2)
+  if (i != 4 || j != 2)
     abort ();
 
   i = -1;
   j = -2;
 #pragma acc data create (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     if (i != -1 || j != -2)
       abort ();
     i = 2;
@@ -66,15 +70,15 @@  int main(void)
     if (i != 2 || j != 1)
       abort ();
   }
-  if (i != -1 || j != -2)
+  if (i != 2 || j != 1)
     abort ();
-#endif
 
   i = -1;
   j = -2;
 #pragma acc data present_or_copyin (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     if (i != -1 || j != -2)
       abort ();
     i = 2;
@@ -85,28 +89,34 @@  int main(void)
   if (i != 2 || j != 1)
     abort ();
 
-#if 0
   i = -1;
   j = -2;
 #pragma acc data present_or_copyout (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     if (i != -1 || j != -2)
       abort ();
     i = 2;
     j = 1;
     if (i != 2 || j != 1)
       abort ();
+
+#pragma acc parallel present (i, j)
+    {
+      i = 4;
+      j = 2;
+    }
   }
-  if (i != -1 || j != -2)
+  if (i != 4 || j != 2)
     abort ();
-#endif
 
   i = -1;
   j = -2;
 #pragma acc data present_or_copy (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     if (i != -1 || j != -2)
       abort ();
     i = 2;
@@ -114,47 +124,56 @@  int main(void)
     if (i != 2 || j != 1)
       abort ();
   }
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
+    abort ();
+#else
   if (i != -1 || j != -2)
     abort ();
+#endif
 
-#if 0
   i = -1;
   j = -2;
 #pragma acc data present_or_create (i, j)
   {
-    // TODO: check that variables have been mapped.
+    if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+      abort ();
     i = 2;
     j = 1;
     if (i != 2 || j != 1)
       abort ();
   }
-  if (i != -1 || j != -2)
+
+  if (i != 2 || j != 1)
     abort ();
-#endif
 
-#if 0
   i = -1;
   j = -2;
-#pragma acc data present (i, j)
+#pragma acc data copyin (i, j)
   {
-    // TODO: check that variables have been mapped.
-    if (i != -1 || j != -2)
-      abort ();
-    i = 2;
-    j = 1;
-    if (i != 2 || j != 1)
-      abort ();
+#pragma acc data present (i, j)
+    {
+      if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+        abort ();
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+    }
   }
-  if (i != -1 || j != -2)
+  if (i != 2 || j != 1)
     abort ();
-#endif
 
-#if 0
   i = -1;
   j = -2;
 #pragma acc data
   {
-    // TODO: check that variables have been mapped.
+#if !ACC_MEM_SHARED
+    if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j)))
+      abort ();
+#endif
     if (i != -1 || j != -2)
       abort ();
     i = 2;
@@ -162,9 +181,8 @@  int main(void)
     if (i != 2 || j != 1)
       abort ();
   }
-  if (i != -1 || j != -2)
+  if (i != 2 || j != 1)
     abort ();
-#endif
 
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c
new file mode 100644
index 0000000..e271a37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int main (void)
+{
+  void *a, *a_1, *a_2;
+
+#define A (void *) 0x123
+  a = A;
+
+#pragma acc data copyout (a_1, a_2)
+#pragma acc kernels deviceptr (a)
+  {
+    a_1 = a;
+    a_2 = &a;
+  }
+
+  if (a != A)
+    abort ();
+  if (a_1 != a)
+    abort ();
+#if ACC_MEM_SHARED
+  if (a_2 != &a)
+    abort ();
+#else
+  if (a_2 == &a)
+    abort ();
+#endif
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c
index b41e558..683fefa 100644
--- a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c
+++ b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c
@@ -1,4 +1,5 @@ 
 /* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
 
 #include "libgomp_g.h"
 
@@ -19,7 +20,7 @@  int main(void)
   i = -1;
   GOACC_kernels (0, f, (const void *) 0,
 		 0, (void *) 0, (void *) 0, (void *) 0,
-		 1, 1, 1);
+		 1, 1, 1, -2, -1);
   if (i != 42)
     abort ();
 
diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
index 4ab1e9b..232ce8a 100644
--- a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
+++ b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
@@ -1,4 +1,5 @@ 
 /* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */
 
 #include "libgomp_g.h"
 
@@ -19,7 +20,7 @@  int main(void)
   i = -1;
   GOACC_parallel (0, f, (const void *) 0,
 		  0, (void *) 0, (void *) 0, (void *) 0,
-		  1, 1, 1);
+		  1, 1, 1, -2, -1);
   if (i != 42)
     abort ();
 
diff --git a/libgomp/testsuite/libgomp.oacc-c/if-1.c b/libgomp/testsuite/libgomp.oacc-c/if-1.c
new file mode 100644
index 0000000..e289f40
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/if-1.c
@@ -0,0 +1,547 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define N   32
+
+int
+main(int argc, char **argv)
+{
+    float *a, *b, *d_a, *d_b, exp, exp2;
+    int i;
+    const int one = 1;
+    const int zero = 0;
+    int n;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    d_a = (float *) acc_malloc (N * sizeof (float));
+    d_b = (float *) acc_malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+        a[i] = 4.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 5.0;
+#else
+    exp = 4.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc parallel if(0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 17.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 8.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 9.0;
+#else
+    exp = 8.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+#pragma acc parallel if(zero)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 23.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 16.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 17.0;
+#else
+    exp = 16.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 76.0;
+
+#pragma acc parallel if(false)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 77.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 22.0;
+
+    n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 23.0;
+#else
+    exp = 22.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 18.0;
+
+    n = 0;
+
+#pragma acc parallel if(n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 19.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 49.0;
+
+    n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 50.0;
+#else
+    exp = 49.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 38.0;
+
+    n = 0;
+
+#pragma acc parallel if(n + n)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 39.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 91.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 92.0;
+#else
+    exp = 91.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 43.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+#if ACC_MEM_SHARED
+    exp = 44.0;
+#else
+    exp = 43.0;
+#endif
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != exp)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+        a[i] = 87.0;
+
+#pragma acc parallel if(one == 0)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            if (acc_on_device (acc_device_host))
+                b[ii] = a[ii] + 1;
+            else
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 88.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 9.0;
+    }
+
+#if ACC_MEM_SHARED
+    exp = 0.0;
+    exp2 = 0.0;
+#else
+    acc_map_data (a, d_a, N * sizeof (float));
+    acc_map_data (b, d_b, N * sizeof (float));
+    exp = 3.0;
+    exp2 = 9.0;
+#endif
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != exp)
+            abort();
+
+        if (b[i] != exp2)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 12.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N]) if(0)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != exp)
+            abort();
+
+        if (b[i] != exp2)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 26.0;
+        b[i] = 21.0;
+    }
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 0.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update host(a[0:N], b[0:N]) if(0)
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 0.0)
+            abort();
+
+        if (b[i] != 0.0)
+            abort();
+    }
+
+#if !ACC_MEM_SHARED
+    acc_unmap_data (a);
+    acc_unmap_data (b);
+#endif
+
+    acc_free (d_a);
+    acc_free (d_b);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1)
+{
+#pragma acc parallel present(a[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            b[ii] = a[ii];
+        }
+    }
+}
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 4.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 8.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0)
+{
+#if !ACC_MEM_SHARED
+    if (acc_is_present (a, N * sizeof (float)))
+        abort ();
+#endif
+
+#if !ACC_MEM_SHARED
+    if (acc_is_present (b, N * sizeof (float)))
+        abort ();
+#endif
+}
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 18.0;
+        b[i] = 21.0;
+    }
+
+#pragma acc data copyin(a[0:N]) if(1)
+{
+#if !ACC_MEM_SHARED
+    if (!acc_is_present (a, N * sizeof (float)))
+        abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(0)
+    {
+#if !ACC_MEM_SHARED
+        if (acc_is_present (b, N * sizeof (float)))
+            abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(1)
+        {
+#pragma acc parallel present(a[0:N]) present(b[0:N])
+            {
+                int ii;
+
+                for (ii = 0; ii < N; ii++)
+                {
+                    b[ii] = a[ii];
+                }
+            }
+        }
+
+#if !ACC_MEM_SHARED
+        if (acc_is_present (b, N * sizeof (float)))
+            abort ();
+#endif
+    }
+}
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 18.0)
+            abort ();
+	}
+
+#ifdef XXX_TODO_ENTER_END_DATA
+#endif
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c
index 8550662..3acfdf5 100644
--- a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c
@@ -1,10 +1,10 @@ 
 /* { dg-do run } */
 
-extern void abort ();
+#include <stdlib.h>
 
 int i;
 
-int main(void)
+int main (void)
 {
   int j, v;
 
@@ -83,8 +83,15 @@  int main(void)
       abort ();
     v = 1;
   }
-  if (v != 1 || i != -1 || j != -2)
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
     abort ();
+#else
+  if (i != -1 || j != -2)
+    abort ();
+#endif
 
   i = -1;
   j = -2;
@@ -127,8 +134,15 @@  int main(void)
       abort ();
     v = 1;
   }
-  if (v != 1 || i != -1 || j != -2)
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (i != 2 || j != 1)
     abort ();
+#else
+  if (i != -1 || j != -2)
+    abort ();
+#endif
 
 #if 0
   i = -1;
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-1.c b/libgomp/testsuite/libgomp.oacc-c/lib-1.c
index 8ad1b19..17129d8 100644
--- a/libgomp/testsuite/libgomp.oacc-c/lib-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-1.c
@@ -1,7 +1,24 @@ 
+/* { dg-do run } */
+
 #include <openacc.h>
 
 int
-main (void)
+main (int argc, char **argv)
 {
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (devtype) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  acc_init (devtype);
+
   return 0;
 }
+
+/* { dg-shouldfail "libgomp: device already active" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-10.c b/libgomp/testsuite/libgomp.oacc-c/lib-10.c
new file mode 100644
index 0000000..cf1af8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-10.c
@@ -0,0 +1,58 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  void *d;
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  d = acc_malloc (0);
+  if (d != NULL)
+    abort ();
+
+  acc_free (0);
+
+  acc_shutdown (devtype);
+
+  acc_set_device_type (devtype);
+
+  d = acc_malloc (0);
+  if (d != NULL)
+    abort ();
+
+  acc_shutdown (devtype);
+
+  acc_init (devtype);
+
+  d = acc_malloc (1024);
+  if (d == NULL)
+    abort ();
+
+  acc_free (d);
+
+  acc_shutdown (devtype);
+
+  acc_set_device_type (devtype);
+
+  d = acc_malloc (1024);
+  if (d == NULL)
+    abort ();
+
+  acc_free (d);
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-11.c b/libgomp/testsuite/libgomp.oacc-c/lib-11.c
new file mode 100644
index 0000000..b4583ae
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-11.c
@@ -0,0 +1,22 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 512;
+  void *d;
+
+  d = acc_malloc (N);
+  if (d == NULL)
+    abort ();
+
+  acc_free ((void *)((uintptr_t) d + (uintptr_t) (N >> 1)));
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: mem free failed 1" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-12.c b/libgomp/testsuite/libgomp.oacc-c/lib-12.c
new file mode 100644
index 0000000..b46f590
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-12.c
@@ -0,0 +1,37 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  memset (h, 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-13.c b/libgomp/testsuite/libgomp.oacc-c/lib-13.c
new file mode 100644
index 0000000..7098ef3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-13.c
@@ -0,0 +1,60 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h, N + 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + 1, N) != 0)
+    abort ();
+
+  if (acc_is_present (h - 1, N) != 0)
+    abort ();
+
+  if (acc_is_present (h - 1, N - 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, 0) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, N) != 0)
+    abort ();
+
+  if (acc_is_present (0, N) != 0)
+    abort ();
+   
+  if (acc_is_present (h, 0) != 0)
+    abort ();
+
+  acc_free (d);
+
+  if (acc_is_present (h, 1) != 0)
+    abort ();
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-14.c b/libgomp/testsuite/libgomp.oacc-c/lib-14.c
new file mode 100644
index 0000000..a9632f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-14.c
@@ -0,0 +1,61 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h + N - 1, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h - 1, 1) != 0)
+    abort ();
+
+  if (acc_is_present (h + N, 1) != 0)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, N - i) != 1)
+	abort ();
+    }
+
+  acc_free (d);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, N - i) != 0)
+	abort ();
+    }
+
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-15.c b/libgomp/testsuite/libgomp.oacc-c/lib-15.c
new file mode 100644
index 0000000..4f6a731
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-15.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-16.c b/libgomp/testsuite/libgomp.oacc-c/lib-16.c
new file mode 100644
index 0000000..9d277ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-16.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  (void) acc_copyin (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-17.c b/libgomp/testsuite/libgomp.oacc-c/lib-17.c
new file mode 100644
index 0000000..5ff894c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-17.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N);
+
+  acc_copyout (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-18.c b/libgomp/testsuite/libgomp.oacc-c/lib-18.c
new file mode 100644
index 0000000..2bc3263
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-18.c
@@ -0,0 +1,34 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+
+  acc_free (d);
+
+  acc_copyout (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-19.c b/libgomp/testsuite/libgomp.oacc-c/lib-19.c
new file mode 100644
index 0000000..3581616
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-19.c
@@ -0,0 +1,60 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      h[i] = (unsigned char *) malloc (N);
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = i;
+	}
+
+      (void) acc_copyin (p, N);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      memset (h[i], 0, i);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      acc_copyout (h[i], N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-2.c b/libgomp/testsuite/libgomp.oacc-c/lib-2.c
new file mode 100644
index 0000000..9a4501f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-2.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+#endif
+
+  acc_init (devtype);
+
+  acc_shutdown (devtype);
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: no device initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-20.c b/libgomp/testsuite/libgomp.oacc-c/lib-20.c
new file mode 100644
index 0000000..b379a8f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-20.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, N + 1);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-21.c b/libgomp/testsuite/libgomp.oacc-c/lib-21.c
new file mode 100644
index 0000000..3a67400
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-21.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h, 0);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-22.c b/libgomp/testsuite/libgomp.oacc-c/lib-22.c
new file mode 100644
index 0000000..2b86da8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-22.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  (void) acc_copyin (h, N);
+
+  acc_copyout (h + 1, N - 1);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-23.c b/libgomp/testsuite/libgomp.oacc-c/lib-23.c
new file mode 100644
index 0000000..38f236d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-23.c
@@ -0,0 +1,39 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h1, *h2;
+
+  h1 = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h1[i] = 0xab;
+    }
+
+  (void) acc_copyin (h1, N);
+
+  h2 = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h2[i] = 0xde;
+    }
+
+  (void) acc_copyin (h2, N);
+
+  acc_copyout (h1, N + N);
+
+  free (h1);
+  free (h2);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-24.c b/libgomp/testsuite/libgomp.oacc-c/lib-24.c
new file mode 100644
index 0000000..d7de8e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-24.c
@@ -0,0 +1,55 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 1)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-25.c b/libgomp/testsuite/libgomp.oacc-c/lib-25.c
new file mode 100644
index 0000000..1145828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-25.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-26.c b/libgomp/testsuite/libgomp.oacc-c/lib-26.c
new file mode 100644
index 0000000..a23f56e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-26.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, 0);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-27.c b/libgomp/testsuite/libgomp.oacc-c/lib-27.c
new file mode 100644
index 0000000..074fddb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-27.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (0, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-28.c b/libgomp/testsuite/libgomp.oacc-c/lib-28.c
new file mode 100644
index 0000000..027f7cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-28.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (0, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-29.c b/libgomp/testsuite/libgomp.oacc-c/lib-29.c
new file mode 100644
index 0000000..a66de0f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-29.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, 0);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-3.c b/libgomp/testsuite/libgomp.oacc-c/lib-3.c
new file mode 100644
index 0000000..e823a41
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_init (acc_device_host);
+
+  acc_shutdown (acc_device_not_host);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-30.c b/libgomp/testsuite/libgomp.oacc-c/lib-30.c
new file mode 100644
index 0000000..ce2bdb4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-30.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_create (h, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N - 2);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-31.c b/libgomp/testsuite/libgomp.oacc-c/lib-31.c
new file mode 100644
index 0000000..25ce5a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-31.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (h, N);
+  if (!d)
+    abort ();
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-32.c b/libgomp/testsuite/libgomp.oacc-c/lib-32.c
new file mode 100644
index 0000000..e3f87a8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-32.c
@@ -0,0 +1,38 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  d2 = acc_pcreate (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-33.c b/libgomp/testsuite/libgomp.oacc-c/lib-33.c
new file mode 100644
index 0000000..4abaa02
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-33.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h, N - 2);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-34.c b/libgomp/testsuite/libgomp.oacc-c/lib-34.c
new file mode 100644
index 0000000..32d5d51
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-34.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  d1 = acc_present_or_create (h, N);
+  if (!d1)
+    abort ();
+
+  d2 = acc_present_or_create (h + 2, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-35.c b/libgomp/testsuite/libgomp.oacc-c/lib-35.c
new file mode 100644
index 0000000..ca8edab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-35.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (0, N);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-36.c b/libgomp/testsuite/libgomp.oacc-c/lib-36.c
new file mode 100644
index 0000000..cb29397
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-36.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_present_or_create (h, 0);
+  if (!d)
+    abort ();
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-37.c b/libgomp/testsuite/libgomp.oacc-c/lib-37.c
new file mode 100644
index 0000000..5a7d533
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-37.c
@@ -0,0 +1,40 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-38.c b/libgomp/testsuite/libgomp.oacc-c/lib-38.c
new file mode 100644
index 0000000..1e16a1d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-38.c
@@ -0,0 +1,67 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d1, *d2;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d1 = acc_present_or_copyin (h, N);
+  if (!d1)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  d2 = acc_present_or_copyin (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  d2 = acc_pcopyin (h, N);
+  if (!d2)
+    abort ();
+
+  if (d1 != d2)
+    abort ();
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-39.c b/libgomp/testsuite/libgomp.oacc-c/lib-39.c
new file mode 100644
index 0000000..db1e0b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-39.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (0, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-4.c b/libgomp/testsuite/libgomp.oacc-c/lib-4.c
new file mode 100644
index 0000000..060275b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-4.c
@@ -0,0 +1,13 @@ 
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  acc_init ((acc_device_t) 99);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 99 is out of range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-40.c b/libgomp/testsuite/libgomp.oacc-c/lib-40.c
new file mode 100644
index 0000000..cb6c422
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-40.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_present_or_copyin (h, 0);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-41.c b/libgomp/testsuite/libgomp.oacc-c/lib-41.c
new file mode 100644
index 0000000..01c5f3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-41.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-42.c b/libgomp/testsuite/libgomp.oacc-c/lib-42.c
new file mode 100644
index 0000000..d577fe3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-42.c
@@ -0,0 +1,35 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  acc_update_device (h, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-43.c b/libgomp/testsuite/libgomp.oacc-c/lib-43.c
new file mode 100644
index 0000000..ceeb155
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-43.c
@@ -0,0 +1,45 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (0, N);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-44.c b/libgomp/testsuite/libgomp.oacc-c/lib-44.c
new file mode 100644
index 0000000..0cabb0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-44.c
@@ -0,0 +1,45 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, 0);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-45.c b/libgomp/testsuite/libgomp.oacc-c/lib-45.c
new file mode 100644
index 0000000..f9a6294
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-45.c
@@ -0,0 +1,50 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = 0xab;
+    }
+
+  acc_update_device (h, N - 2);
+
+  acc_copyout (h, N);
+
+  for (i = 0; i < N - 2; i++)
+    {
+      if (h[i] != 0xab)
+	abort ();
+    }
+
+  for (i = N - 2; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-46.c b/libgomp/testsuite/libgomp.oacc-c/lib-46.c
new file mode 100644
index 0000000..b195725
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-46.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-47.c b/libgomp/testsuite/libgomp.oacc-c/lib-47.c
new file mode 100644
index 0000000..a7ff904
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-47.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (0, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-48.c b/libgomp/testsuite/libgomp.oacc-c/lib-48.c
new file mode 100644
index 0000000..01d3c6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-48.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, 0);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-49.c b/libgomp/testsuite/libgomp.oacc-c/lib-49.c
new file mode 100644
index 0000000..a33324c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-49.c
@@ -0,0 +1,48 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_copyin (h, N);
+  if (!d)
+    abort ();
+
+  memset (&h[0], 0, N);
+
+  acc_update_self (h, N - 2);
+
+  for (i = 0; i < N - 2; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  for (i = N - 2; i < N; i++)
+    {
+      if (h[i] != 0)
+	abort ();
+    }
+
+  acc_delete (h, N);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-5.c b/libgomp/testsuite/libgomp.oacc-c/lib-5.c
new file mode 100644
index 0000000..961a62c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-5.c
@@ -0,0 +1,40 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  acc_init (acc_device_default);
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  acc_shutdown (acc_device_default);
+
+  if (acc_get_num_devices (acc_device_nvidia) != 0)
+    {
+      acc_init (acc_device_nvidia);
+
+      if (acc_get_device_type () != acc_device_nvidia)
+        abort ();
+
+      acc_shutdown (acc_device_nvidia);
+
+      acc_init (acc_device_default);
+
+      acc_set_device_type (acc_device_nvidia);
+
+      if (acc_get_device_type () != acc_device_nvidia)
+        abort ();
+
+      acc_shutdown (acc_device_nvidia);
+    }
+
+  return 0;
+
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-50.c b/libgomp/testsuite/libgomp.oacc-c/lib-50.c
new file mode 100644
index 0000000..e8294e1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-50.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  if (acc_is_present (h, N) != 1)
+    abort ();
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-51.c b/libgomp/testsuite/libgomp.oacc-c/lib-51.c
new file mode 100644
index 0000000..29d28f2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-51.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+  void *d[N];
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = (unsigned char *) malloc (N);
+      d[i] = acc_malloc (N);
+
+      acc_map_data (h[i], d[i], N);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h[i], N) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      acc_unmap_data (h[i]);
+
+      if (acc_is_present (h[i], N) != 0)
+	abort ();
+
+      acc_free (d[i]);
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-52.c b/libgomp/testsuite/libgomp.oacc-c/lib-52.c
new file mode 100644
index 0000000..780db31
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-52.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (0, d, N);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-53.c b/libgomp/testsuite/libgomp.oacc-c/lib-53.c
new file mode 100644
index 0000000..657adde
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-53.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, 0, N);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-54.c b/libgomp/testsuite/libgomp.oacc-c/lib-54.c
new file mode 100644
index 0000000..1f3df80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-54.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, 0);
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-55.c b/libgomp/testsuite/libgomp.oacc-c/lib-55.c
new file mode 100644
index 0000000..286653f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-55.c
@@ -0,0 +1,48 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  int i;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      acc_map_data ((void *)((uintptr_t) h + (uintptr_t) i),
+                    				(void *)((uintptr_t) d + (uintptr_t) i), 1);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + 1, 1) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      acc_unmap_data (h + i);
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + 1, 1) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-56.c b/libgomp/testsuite/libgomp.oacc-c/lib-56.c
new file mode 100644
index 0000000..e3f5a80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-56.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N >> 1);
+
+  if (acc_is_present (h, 1) != 1)
+    abort ();
+
+  if (acc_is_present (h + (N >> 1), 1) != 0)
+    abort ();
+
+  acc_unmap_data (h);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-57.c b/libgomp/testsuite/libgomp.oacc-c/lib-57.c
new file mode 100644
index 0000000..f9043a4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-57.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_unmap_data (d);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-58.c b/libgomp/testsuite/libgomp.oacc-c/lib-58.c
new file mode 100644
index 0000000..9d6e27d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-58.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_unmap_data (0);
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-59.c b/libgomp/testsuite/libgomp.oacc-c/lib-59.c
new file mode 100644
index 0000000..2f087ae
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-59.c
@@ -0,0 +1,55 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) !=
+                            (void *)((uintptr_t) h + (uintptr_t) i))
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_deviceptr ((void *)((uintptr_t) h + (uintptr_t) i)) !=
+                            (void *)((uintptr_t) d + (uintptr_t) i))
+	abort ();
+    }
+
+  acc_unmap_data (h);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != 0)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_deviceptr (h + i) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-6.c b/libgomp/testsuite/libgomp.oacc-c/lib-6.c
new file mode 100644
index 0000000..afdd480
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-6.c
@@ -0,0 +1,39 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  int devnum;
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_set_device_type (acc_device_nvidia);
+
+  if (acc_get_device_type () != acc_device_nvidia)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  acc_set_device_type (acc_device_nvidia);
+
+  if (acc_get_device_type () != acc_device_nvidia)
+    abort ();
+
+  devnum = acc_get_num_devices (acc_device_host);
+  if (devnum != 1)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_device_type () == acc_device_default)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-60.c b/libgomp/testsuite/libgomp.oacc-c/lib-60.c
new file mode 100644
index 0000000..ccae728
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-60.c
@@ -0,0 +1,54 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_is_present (h + i, 1) != 0)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-61.c b/libgomp/testsuite/libgomp.oacc-c/lib-61.c
new file mode 100644
index 0000000..ce66ced
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-61.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h[N];
+  void *d[N];
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      h[i] = (unsigned char *) malloc (N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = i;
+	}
+
+      d[i] = acc_malloc (N);
+
+      acc_memcpy_to_device (d[i], h[i], N);
+
+      for (j = 0; j < N; j++)
+	{
+	  if (acc_is_present (h[i] + j, 1) != 0)
+	    abort ();
+	}
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      unsigned char *p;
+
+      memset (h[i], 0, N);
+
+      acc_memcpy_from_device (h[i], d[i], N);
+
+      p = h[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      for (j = 0; j < N; j++)
+	{
+	  if (acc_is_present (h[i] + j, 1) != 0)
+	    abort ();
+	}
+
+      acc_free (d[i]);
+
+      free (h[i]);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-62.c b/libgomp/testsuite/libgomp.oacc-c/lib-62.c
new file mode 100644
index 0000000..e6178e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-62.c
@@ -0,0 +1,49 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_to_device (d, h, N << 1);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid size" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-63.c b/libgomp/testsuite/libgomp.oacc-c/lib-63.c
new file mode 100644
index 0000000..ca237ec
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-63.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (0, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-64.c b/libgomp/testsuite/libgomp.oacc-c/lib-64.c
new file mode 100644
index 0000000..850fd2e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-64.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, 0, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-65.c b/libgomp/testsuite/libgomp.oacc-c/lib-65.c
new file mode 100644
index 0000000..26c8cef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-65.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, d, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host or device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-66.c b/libgomp/testsuite/libgomp.oacc-c/lib-66.c
new file mode 100644
index 0000000..360c05b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-66.c
@@ -0,0 +1,47 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_to_device (d, h, 0);
+
+  acc_memcpy_from_device (h, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-67.c b/libgomp/testsuite/libgomp.oacc-c/lib-67.c
new file mode 100644
index 0000000..01b8b2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-67.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (0, d, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-68.c b/libgomp/testsuite/libgomp.oacc-c/lib-68.c
new file mode 100644
index 0000000..3ff5bd7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-68.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_memcpy_to_device (d, h, N);
+
+  memset (&h[0], 0, N);
+
+  acc_memcpy_from_device (h, 0, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-69.c b/libgomp/testsuite/libgomp.oacc-c/lib-69.c
new file mode 100644
index 0000000..5462f12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-69.c
@@ -0,0 +1,124 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test (0) != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep (1);
+
+  if (acc_async_test (0) != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-7.c b/libgomp/testsuite/libgomp.oacc-c/lib-7.c
new file mode 100644
index 0000000..e78734b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-7.c
@@ -0,0 +1,18 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_none) != 0)
+    abort ();
+
+  if (acc_get_num_devices (acc_device_host) == 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-70.c b/libgomp/testsuite/libgomp.oacc-c/lib-70.c
new file mode 100644
index 0000000..912b266
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-70.c
@@ -0,0 +1,136 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  const int N = 10;
+  int i;
+  CUstream streams[N];
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      if (acc_async_test (i) != 0)
+	{
+	  fprintf (stderr, "asynchronous operation not running\n");
+	  abort ();
+	}
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  for (i = 0; i < N; i++)
+    {
+      if (acc_async_test (i) != 1)
+	{
+	  fprintf (stderr, "found asynchronous operation still running\n");
+	  abort ();
+	}
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-71.c b/libgomp/testsuite/libgomp.oacc-c/lib-71.c
new file mode 100644
index 0000000..a045379
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-71.c
@@ -0,0 +1,119 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (0, stream);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test (1) != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  if (acc_async_test (1) != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-72.c b/libgomp/testsuite/libgomp.oacc-c/lib-72.c
new file mode 100644
index 0000000..e383ba0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-72.c
@@ -0,0 +1,121 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+    
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  if (acc_async_test_all () != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.f) + 1);
+
+  if (acc_async_test_all () != 1)
+    {
+      fprintf (stderr, "found asynchronous operation still running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-73.c b/libgomp/testsuite/libgomp.oacc-c/lib-73.c
new file mode 100644
index 0000000..43a8b7e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-73.c
@@ -0,0 +1,134 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  const int N = 10;
+  int i;
+  CUstream streams[N];
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+    }
+
+  if (acc_async_test_all () != 0)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  sleep ((int) (dtime / 1000.0f) + 1);
+
+  if (acc_async_test_all () != 1)
+    {
+      fprintf (stderr, "asynchronous operation not running\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-74.c b/libgomp/testsuite/libgomp.oacc-c/lib-74.c
new file mode 100644
index 0000000..0726ee4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-74.c
@@ -0,0 +1,139 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-75.c b/libgomp/testsuite/libgomp.oacc-c/lib-75.c
new file mode 100644
index 0000000..1942211
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-75.c
@@ -0,0 +1,141 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      acc_wait (0);
+    }
+
+  atime = stop_timer (0);
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-76.c b/libgomp/testsuite/libgomp.oacc-c/lib-76.c
new file mode 100644
index 0000000..11d9d62
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-76.c
@@ -0,0 +1,147 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  streams = (CUstream *) malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      acc_wait (i);
+    }
+
+  atime = stop_timer (0);
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (streams);
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-77.c b/libgomp/testsuite/libgomp.oacc-c/lib-77.c
new file mode 100644
index 0000000..e47212b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-77.c
@@ -0,0 +1,135 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (0, stream);
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-78.c b/libgomp/testsuite/libgomp.oacc-c/lib-78.c
new file mode 100644
index 0000000..4f58fb2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-78.c
@@ -0,0 +1,140 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  start_timer (0);
+
+  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+      abort ();
+    }
+
+  acc_wait_all ();
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait_all ();
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-79.c b/libgomp/testsuite/libgomp.oacc-c/lib-79.c
new file mode 100644
index 0000000..ef3df13
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-79.c
@@ -0,0 +1,167 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime, hitime, lotime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  devnum = 2;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (1, stream))
+    abort ();
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_async (0, 1);
+
+  if (acc_async_test (0) != 0)
+    abort ();
+
+  if (acc_async_test (1) != 0)
+    abort ();
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (acc_async_test (0) != 1)
+    abort ();
+
+  if (acc_async_test (1) != 1)
+    abort ();
+
+  hitime = dtime * N;
+  hitime += hitime * 0.02;
+
+  lotime = dtime * N;
+  lotime -= lotime * 0.02;
+
+  if (atime > hitime || atime < lotime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-80.c b/libgomp/testsuite/libgomp.oacc-c/lib-80.c
new file mode 100644
index 0000000..0b5ec24
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-80.c
@@ -0,0 +1,132 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  CUstream stream;
+  int N;
+  int i;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 200.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  acc_set_cuda_stream (1, stream);
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_async (1, 1);
+
+  acc_wait (1);
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-shouldfail "libgomp: identical parameters" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-81.c b/libgomp/testsuite/libgomp.oacc-c/lib-81.c
new file mode 100644
index 0000000..d5f18f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-81.c
@@ -0,0 +1,211 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams, stream;
+  unsigned long *a, *d_a, dticks;
+  int nbytes;
+  float atime, dtime;
+  void *kargs[2];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay, module, "delay");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = nprocs * sizeof (unsigned long);
+
+  dtime = 500.0;
+
+  dticks = (unsigned long) (dtime * clkrate);
+
+  N = nprocs;
+
+  a = (unsigned long *) malloc (nbytes);
+  d_a = (unsigned long *) acc_malloc (nbytes);
+
+  acc_map_data (a, d_a, nbytes);
+
+  streams = (CUstream *) malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  init_timers (1);
+
+  kargs[0] = (void *) &d_a;
+  kargs[1] = (void *) &dticks;
+
+  stream = (CUstream) acc_get_cuda_stream (N);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (N, stream))
+    abort ();
+
+  start_timer (0);
+
+  for (i = 0; i < N; i++)
+    {
+      r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+    }
+
+  acc_wait_all_async (N);
+
+  for (i = 0; i <= N; i++)
+    {
+      if (acc_async_test (i) != 0)
+	abort ();
+    }
+
+  acc_wait (N);
+
+  for (i = 0; i <= N; i++)
+    {
+      if (acc_async_test (i) != 1)
+	abort ();
+    }
+
+  atime = stop_timer (0);
+
+  if (atime < dtime)
+    {
+      fprintf (stderr, "actual time < delay time\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  stream = (CUstream) acc_get_cuda_stream (N + 1);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (N + 1, stream))
+    abort ();
+
+  acc_wait_all_async (N + 1);
+
+  acc_wait (N + 1);
+
+  atime = stop_timer (0);
+
+  if (0.10 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  start_timer (0);
+
+  acc_wait_all_async (N);
+
+  acc_wait (N);
+
+  atime = stop_timer (0);
+
+  if (0.10 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  acc_unmap_data (a);
+
+  fini_timers ();
+
+  free (streams);
+  free (a);
+  acc_free (d_a);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-82.c b/libgomp/testsuite/libgomp.oacc-c/lib-82.c
new file mode 100644
index 0000000..be30a7f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-82.c
@@ -0,0 +1,144 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  CUdevice dev;
+  CUfunction delay2;
+  CUmodule module;
+  CUresult r;
+  int N;
+  int i;
+  CUstream *streams;
+  unsigned long **a, **d_a, *tid, ticks;
+  int nbytes;
+  void *kargs[3];
+  int clkrate;
+  int devnum, nprocs;
+
+  acc_init (acc_device_nvidia);
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+
+  r = cuDeviceGet (&dev, devnum);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+      abort ();
+    }
+
+  r =
+    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+			  dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleLoad (&module, "subr.ptx");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuModuleGetFunction (&delay2, module, "delay2");
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+      abort ();
+    }
+
+  nbytes = sizeof (int);
+
+  ticks = (unsigned long) (200.0 * clkrate);
+
+  N = nprocs;
+
+  streams = (CUstream *) malloc (N * sizeof (void *));
+
+  a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+  d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+  tid = (unsigned long *) malloc (N * sizeof (unsigned long));
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = (unsigned long *) malloc (sizeof (unsigned long));
+      *a[i] = N;
+      d_a[i] = (unsigned long *) acc_malloc (nbytes);
+      tid[i] = i;
+
+      acc_map_data (a[i], d_a[i], nbytes);
+
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+        abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+        {
+          fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+          abort ();
+        }
+
+       if (!acc_set_cuda_stream (i, streams[i]))
+        abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      kargs[0] = (void *) &d_a[i];
+      kargs[1] = (void *) &ticks;
+      kargs[2] = (void *) &tid[i];
+
+      r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+	  abort ();
+	}
+
+      ticks = (unsigned long) (50.0 * clkrate);
+    }
+
+  acc_wait_all_async (0);
+
+  for (i = 0; i < N; i++)
+    {
+      acc_copyout (a[i], nbytes);
+      if (*a[i] != i)
+	abort ();
+    }
+
+  free (streams);
+
+  for (i = 0; i < N; i++)
+    {
+      free (a[i]);
+    }
+
+  free (a);
+  free (d_a);
+  free (tid);
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-83.c b/libgomp/testsuite/libgomp.oacc-c/lib-83.c
new file mode 100644
index 0000000..1c2e52b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-83.c
@@ -0,0 +1,58 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+  float atime;
+  CUstream stream;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  init_timers (1);
+
+  stream = (CUstream) acc_get_cuda_stream (0);
+  if (stream != NULL)
+    abort ();
+
+  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  if (!acc_set_cuda_stream (0, stream))
+    abort ();
+
+  start_timer (0);
+
+  acc_wait_all_async (0);
+
+  acc_wait (0);
+
+  atime = stop_timer (0);
+
+  if (0.010 < atime)
+    {
+      fprintf (stderr, "actual time too long\n");
+      abort ();
+    }
+
+  fini_timers ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-84.c b/libgomp/testsuite/libgomp.oacc-c/lib-84.c
new file mode 100644
index 0000000..786b908
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-84.c
@@ -0,0 +1,66 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 100;
+  int i;
+  CUstream *streams;
+  CUstream s;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  streams = (CUstream *) malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+      int cnt;
+
+      cnt = 0;
+
+      s = streams[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (s == streams[j])
+	    cnt++;
+	}
+
+      if (cnt != 1)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-85.c b/libgomp/testsuite/libgomp.oacc-c/lib-85.c
new file mode 100644
index 0000000..cf925a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-85.c
@@ -0,0 +1,52 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 100;
+  int i;
+  CUstream *streams;
+  CUstream s;
+  CUresult r;
+
+  acc_init (acc_device_nvidia);
+
+  (void) acc_get_device_num (acc_device_nvidia);
+
+  streams = (CUstream *) malloc (N * sizeof (void *));
+
+  for (i = 0; i < N; i++)
+    {
+      streams[i] = (CUstream) acc_get_cuda_stream (i);
+      if (streams[i] != NULL)
+	abort ();
+
+      r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+        if (!acc_set_cuda_stream (i, streams[i]))
+	  abort ();
+    }
+
+  s = NULL;
+
+  if (acc_set_cuda_stream (N + 1, s) != 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-86.c b/libgomp/testsuite/libgomp.oacc-c/lib-86.c
new file mode 100644
index 0000000..b8a8ee9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-86.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_init (acc_device_host);
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_shutdown (acc_device_host);
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  acc_init (acc_device_nvidia);
+
+  if (acc_get_current_cuda_device () == 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_current_cuda_device () != 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-87.c b/libgomp/testsuite/libgomp.oacc-c/lib-87.c
new file mode 100644
index 0000000..147d443
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-87.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_init (acc_device_host);
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_shutdown (acc_device_host);
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  acc_init (acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () != 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-88.c b/libgomp/testsuite/libgomp.oacc-c/lib-88.c
new file mode 100644
index 0000000..10f4ad8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-88.c
@@ -0,0 +1,111 @@ 
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char *x;
+void *d_x;
+const int N = 256;
+
+static void *
+test (void *arg)
+{
+  int i;
+
+  if (acc_get_current_cuda_context () != NULL)
+    abort ();
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  memset (x, 0, N);
+
+  acc_copyout (x, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (x[i] != i)
+	abort ();
+
+      x[i] = N - i - 1;
+    }
+
+  d_x = acc_copyin (x, N);
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  const int nthreads = 1;
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      x[i] = i;
+    }
+
+  d_x = acc_copyin (x, N);
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));
+
+  for (i = 0; i < nthreads; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < nthreads; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  if (acc_is_present (x, N) != 1)
+    abort ();
+
+  memset (x, 0, N);
+
+  acc_copyout (x, N);
+
+  for (i = 0; i < N; i++)
+    {
+      if (x[i] != N - i - 1)
+	abort ();
+    }
+
+  if (acc_is_present (x, N) != 0)
+    abort ();
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-89.c b/libgomp/testsuite/libgomp.oacc-c/lib-89.c
new file mode 100644
index 0000000..061c409
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-89.c
@@ -0,0 +1,118 @@ 
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  p = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      p[i] = tid;
+    }
+
+  x[tid] = p;
+
+  d_x[tid] = acc_copyin (p, N);
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = (unsigned char **) malloc (NTHREADS * N);
+  d_x = (void **) malloc (NTHREADS * N);
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 1)
+	abort ();
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      memset (x[i], 0, N);
+      acc_copyout (x[i], N);
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      unsigned char *p;
+      int j;
+
+      p = x[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-9.c b/libgomp/testsuite/libgomp.oacc-c/lib-9.c
new file mode 100644
index 0000000..a4cf7f2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-9.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int num_devices;
+  int devnum;
+  acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+  devtype = acc_device_nvidia;
+#endif
+
+  num_devices = acc_get_num_devices (devtype);
+  if (num_devices == 0)
+    return 0;
+
+  acc_init (devtype);
+
+  for (i = 0; i < num_devices; i++)
+    {
+      acc_set_device_num (i, devtype);
+      devnum = acc_get_device_num (devtype);
+      if (devnum != i)
+	abort ();
+    }
+
+  acc_shutdown (devtype);
+
+  num_devices = acc_get_num_devices (devtype);
+  if (num_devices == 0)
+    abort ();
+
+  for (i = 0; i < num_devices; i++)
+    {
+      acc_set_device_num (i, devtype);
+      devnum = acc_get_device_num (devtype);
+      if (devnum != i)
+	abort ();
+    }
+
+  acc_shutdown (devtype);
+
+  acc_init (devtype);
+
+  acc_set_device_num (0, devtype);
+
+  devnum = acc_get_device_num (devtype);
+  if (devnum != 0)
+    abort ();
+
+  if (num_devices > 1)
+    {
+      acc_set_device_num (1, (acc_device_t) 0);
+
+      devnum = acc_get_device_num (devtype);
+      if (devnum != 1)
+	abort ();
+  }
+
+  acc_shutdown (devtype);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-90.c b/libgomp/testsuite/libgomp.oacc-c/lib-90.c
new file mode 100644
index 0000000..d17755b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-90.c
@@ -0,0 +1,137 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+#include <cuda.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  p = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      p[i] = tid;
+    }
+
+  x[tid] = p;
+
+  d_x[tid] = acc_copyin (p, N);
+
+  acc_wait_all ();
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+  CUresult r;
+  CUstream s;
+
+  acc_init (acc_device_nvidia);
+
+  x = (unsigned char **) malloc (NTHREADS * N);
+  d_x = (void **) malloc (NTHREADS * N);
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  if (!acc_set_cuda_stream (0, s))
+	  abort ();
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 1)
+	abort ();
+    }
+
+  acc_get_cuda_stream (1);
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      memset (x[i], 0, N);
+      acc_copyout (x[i], N);
+    }
+
+  acc_wait_all ();
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      unsigned char *p;
+      int j;
+
+      p = x[i];
+
+      for (j = 0; j < N; j++)
+	{
+	  if (p[j] != i)
+	    abort ();
+	}
+
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-91.c b/libgomp/testsuite/libgomp.oacc-c/lib-91.c
new file mode 100644
index 0000000..e00ef4f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-91.c
@@ -0,0 +1,84 @@ 
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 1024 * 1024;
+  int i;
+  unsigned char *h;
+  void *d;
+  float async, sync;
+  struct timeval start, stop;
+  CUresult r;
+  CUstream s;
+
+  acc_init (acc_device_nvidia);
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  gettimeofday (&start, NULL);
+
+  for (i = 0; i < 100; i++)
+    {
+#pragma acc update device(h[0:N])
+    }
+
+  gettimeofday (&stop, NULL);
+
+  sync = (float) (stop.tv_sec - start.tv_sec);
+  sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+  gettimeofday (&start, NULL);
+
+  r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+	  abort ();
+	}
+
+  if (!acc_set_cuda_stream (0, s))
+	  abort ();
+
+  for (i = 0; i < 100; i++)
+    {
+#pragma acc update device(h[0:N]) async(0)
+    }
+
+  acc_wait_all ();
+
+  gettimeofday (&stop, NULL);
+
+  async = (float) (stop.tv_sec - start.tv_sec);
+  async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+  if (async > (sync * 1.5))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-92.c b/libgomp/testsuite/libgomp.oacc-c/lib-92.c
new file mode 100644
index 0000000..18193e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/lib-92.c
@@ -0,0 +1,112 @@ 
+/* { dg-do run } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 32;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+  int i;
+  int tid;
+  unsigned char *p;
+  int devnum;
+
+  tid = (int) (long) arg;
+
+  devnum = acc_get_device_num (acc_device_nvidia);
+  acc_set_device_num (devnum, acc_device_nvidia);
+
+  if (acc_get_current_cuda_context () == NULL)
+    abort ();
+
+  acc_copyout (x[tid], N);
+
+  p = x[tid];
+
+  for (i = 0; i < N; i++)
+    {
+      if (p[i] != i)
+	abort ();
+    }
+
+  return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  pthread_attr_t attr;
+  pthread_t *tid;
+  unsigned char *p;
+
+  if (acc_get_num_devices (acc_device_nvidia) == 0)
+    return 0;
+
+  acc_init (acc_device_nvidia);
+
+  x = (unsigned char **) malloc (NTHREADS * N);
+  d_x = (void **) malloc (NTHREADS * N);
+
+  for (i = 0; i < N; i++)
+    {
+      int j;
+
+      p = (unsigned char *) malloc (N);
+
+      x[i] = p;
+
+      for (j = 0; j < N; j++)
+	{
+	  p[j] = j;
+	}
+
+      d_x[i] = acc_copyin (p, N);
+    }
+
+  if (pthread_attr_init (&attr) != 0)
+    perror ("pthread_attr_init failed");
+
+  tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+  acc_get_cuda_stream (1);
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+	  != 0)
+	perror ("pthread_create failed");
+    }
+
+  if (pthread_attr_destroy (&attr) != 0)
+    perror ("pthread_attr_destroy failed");
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      void *res;
+
+      if (pthread_join (tid[i], &res) != 0)
+	perror ("pthread join failed");
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      if (acc_is_present (x[i], N) != 0)
+	abort ();
+    }
+
+  acc_shutdown (acc_device_nvidia);
+
+  return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-1.c
new file mode 100644
index 0000000..ededf2b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-1.c
@@ -0,0 +1,680 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    c = (float *) malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc data copyin (a[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc data present_or_copyin (a[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 2.0;
+    }
+
+    d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 4.0;
+    }
+
+#pragma acc data copy (a[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = a[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = b[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+    d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = a[ii] + 1;
+                b[ii] = b[ii] + 2;
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 7.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_deviceptr (&a[0]);
+    acc_unmap_data (&a[0]);
+    acc_free (d);
+
+    d = (float *) acc_deviceptr (&b[0]);
+    acc_unmap_data (&b[0]);
+    acc_free (d);
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data copyin (a[0:N]) create (c[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 8.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present (c[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    acc_unmap_data (c);
+
+    if (acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (b, d, N * sizeof (float));
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (a, d, N * sizeof (float));
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                a[ii] = 1.0;
+                c[ii] = 2.0;
+                b[ii] = 4.0;
+            }
+        }
+    }
+
+    if (!acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (c, (N * sizeof (float))))
+      abort ();
+
+    acc_copyout (b, N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+    }
+
+    d = (float *) acc_deviceptr (a);
+
+    acc_unmap_data (a);
+
+    acc_free (d);
+
+    d = (float *) acc_deviceptr (c);
+
+    acc_unmap_data (c);
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 6.0;
+    }
+
+    d = (float *) acc_malloc (N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+        {
+            d[ii] = a[ii];
+            b[ii] = d[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+    d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc data pcopyin (a[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    acc_free (d);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N]) pcopyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+                b[ii] = a[ii];
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 7.0;
+    }
+
+#pragma acc data copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    if (acc_is_present (&c[0], (N * sizeof (float))))
+      abort ();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-2.c
new file mode 100644
index 0000000..0579185
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/nested-2.c
@@ -0,0 +1,35 @@ 
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+#define N 10
+  char a[N];
+
+  {
+    int i;
+    for (i = 0; i < N; ++i)
+      a[i] = 0;
+  }
+
+#pragma acc data copyout (a)
+  {
+#pragma acc parallel /* will result in a "dummy frame" */ present (a)
+    {
+      int i;
+      for (i = 0; i < N; ++i)
+	a[i] = i;
+    }
+  }
+
+  {
+    int i;
+    for (i = 0; i < N; ++i)
+      if (a[i] != i)
+	abort ();
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/offset-1.c b/libgomp/testsuite/libgomp.oacc-c/offset-1.c
new file mode 100644
index 0000000..0bae23a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/offset-1.c
@@ -0,0 +1,97 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b;
+    int i;
+
+    a = (float *) malloc(N * sizeof (float));
+    b = (float *) malloc(N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 5.0;
+    }
+
+#pragma acc parallel copyin(a[2:4]) copyout(b[2:4])
+    {
+        b[2] = a[2];
+        b[3] = a[3];
+    }
+
+    for (i = 2; i < 4; i++)
+    {
+        if (a[i] != 2.0)
+            abort();
+
+        if (b[i] != 2.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[0:4])
+    {
+        b[0] = a[0];
+        b[1] = a[1];
+        b[2] = a[2];
+        b[3] = a[3];
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        if (a[i] != 3.0)
+            abort();
+
+        if (b[i] != 3.0)
+            abort();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+        b[i] = 6.0;
+    }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[4:4])
+    {
+        b[4] = a[0];
+        b[5] = a[1];
+        b[6] = a[2];
+        b[7] = a[3];
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        if (a[i] != 9.0)
+            abort();
+    }
+
+    for (i = 4; i < 8; i++)
+    {
+        if (b[i] != 9.0)
+            abort();
+    }
+
+    if (acc_is_present (a, (N * sizeof (float))))
+      abort();
+
+    if (acc_is_present (b, (N * sizeof (float))))
+      abort();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c
index 68f7de5..fd9df33 100644
--- a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c
@@ -1,6 +1,6 @@ 
 /* { dg-do run } */
 
-extern void abort ();
+#include <stdlib.h>
 
 int i;
 
@@ -8,7 +8,6 @@  int main(void)
 {
   int j, v;
 
-#if 0
   i = -1;
   j = -2;
   v = 0;
@@ -22,8 +21,13 @@  int main(void)
       abort ();
     v = 1;
   }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
   if (v != 1 || i != -1 || j != -2)
     abort ();
+#endif
 
   i = -1;
   j = -2;
@@ -66,6 +70,10 @@  int main(void)
       abort ();
     v = 1;
   }
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
   if (v != 1 || i != -1 || j != -2)
     abort ();
 #endif
@@ -83,8 +91,15 @@  int main(void)
       abort ();
     v = 1;
   }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
   if (v != 1 || i != -1 || j != -2)
     abort ();
+#endif
 
   i = -1;
   j = -2;
@@ -127,43 +142,64 @@  int main(void)
       abort ();
     v = 1;
   }
+  if (v != 1)
+    abort ();
+#if ACC_MEM_SHARED
+  if (v != 1 || i != 2 || j != 1)
+    abort ();
+#else
   if (v != 1 || i != -1 || j != -2)
     abort ();
+#endif
 
-#if 0
   i = -1;
   j = -2;
   v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+
+#pragma acc data copyin (i, j)
   {
-    if (i != -1 || j != -2)
-      abort ();
-    i = 2;
-    j = 1;
-    if (i != 2 || j != 1)
-      abort ();
-    v = 1;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+    {
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+      v = 1;
+    }
   }
+#if ACC_MEM_SHARED
   if (v != 1 || i != 2 || j != 1)
     abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
 #endif
 
-#if 0
   i = -1;
   j = -2;
   v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v)
+
+#pragma acc data copyin(i, j)
   {
-    if (i != -1 || j != -2)
-      abort ();
-    i = 2;
-    j = 1;
-    if (i != 2 || j != 1)
-      abort ();
-    v = 1;
+#pragma acc parallel /* copyout */ present_or_copyout (v)
+    {
+      if (i != -1 || j != -2)
+        abort ();
+      i = 2;
+      j = 1;
+      if (i != 2 || j != 1)
+        abort ();
+      v = 1;
+    }
   }
+#if ACC_MEM_SHARED
   if (v != 1 || i != 2 || j != 1)
     abort ();
+#else
+  if (v != 1 || i != -1 || j != -2)
+    abort ();
 #endif
 
   return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c
new file mode 100644
index 0000000..f7d5b9b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c
@@ -0,0 +1,35 @@ 
+/* { dg-do run } */
+
+/* PR middle-end/63247 */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+#define N 4
+    short a[N];
+
+    a[0] = 10;
+    a[1] = 10;
+    a[2] = 10;
+    a[3] = 10;
+
+#pragma acc parallel copy(a[1:N-1])
+    {
+      a[1] = 51;
+      a[2] = 52;
+      a[3] = 53;
+    }
+
+    if (a[0] != 10)
+      abort ();
+    if (a[1] != 51)
+      abort ();
+    if (a[2] != 52)
+      abort ();
+    if (a[3] != 53)
+      abort ();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/present-1.c b/libgomp/testsuite/libgomp.oacc-c/present-1.c
new file mode 100644
index 0000000..f331f1f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/present-1.c
@@ -0,0 +1,48 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c, *d;
+    int i;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    c = (float *) malloc (N * sizeof (float));
+
+    d = (float *) acc_malloc (N * sizeof (float));
+    acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N])
+    {
+#pragma acc parallel
+        {
+            int ii;
+
+            for (ii = 0; ii < N; ii++)
+            {
+                c[ii] = a[ii];
+                b[ii] = c[ii];
+            }
+        }
+    }
+
+    d = (float *) acc_deviceptr (c);
+    acc_unmap_data (c);
+    acc_free (d);
+
+    free (a);
+    free (b);
+    free (c);
+
+    return 0;
+}
+/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/present-2.c b/libgomp/testsuite/libgomp.oacc-c/present-2.c
new file mode 100644
index 0000000..41efa70
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/present-2.c
@@ -0,0 +1,48 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+  int N = 8;
+  float *a, *b;
+  int i;
+
+  a = (float *) malloc (N * sizeof (float));
+  b = (float *) malloc (N * sizeof (float));
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 4.0;
+      b[i] = 0.0;
+    }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+  {
+
+#pragma acc parallel present(a[0:N])
+    {
+      int ii;
+
+      for (ii = 0; ii < N; ii++)
+	{
+	  b[ii] = a[ii];
+	}
+    }
+
+  }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != 4.0)
+	abort ();
+
+      if (b[i] != 4.0)
+	abort ();
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.cu b/libgomp/testsuite/libgomp.oacc-c/subr.cu
new file mode 100644
index 0000000..e86e0fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/subr.cu
@@ -0,0 +1,64 @@ 
+
+extern "C" __global__ void
+delay (clock_t * d_o, clock_t delay)
+{
+  clock_t start, ticks;
+
+  start = clock ();
+
+  ticks = 0;
+
+  while (ticks < delay)
+    ticks = clock () - start;
+}
+
+extern "C" __global__ void
+delay2 (unsigned long *d_o, clock_t delay, unsigned long tid)
+{
+  clock_t start, ticks;
+
+  start = clock ();
+
+  ticks = 0;
+
+  while (ticks < delay)
+    ticks = clock () - start;
+
+  d_o[0] = tid;
+}
+
+extern "C" __global__ void
+sum (clock_t * d_o, int N)
+{
+  int i;
+  clock_t sum;
+  __shared__ clock_t ticks[32];
+
+  sum = 0;
+
+  for (i = threadIdx.x; i < N; i += blockDim.x)
+    sum += d_o[i];
+
+  ticks[threadIdx.x] = sum;
+
+  syncthreads ();
+
+  for (i = 16; i >= 1; i >>= 1)
+    {
+      if (threadIdx.x < i)
+	ticks[threadIdx.x] += ticks[threadIdx.x + i];
+
+      syncthreads ();
+    }
+
+  d_o[0] = ticks[0];
+}
+
+extern "C" __global__ void
+mult (int n, float *x, float *y)
+{
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (i = 0; i < n; i++)
+    y[i] = x[i] * x[i];
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.ptx b/libgomp/testsuite/libgomp.oacc-c/subr.ptx
new file mode 100644
index 0000000..6f748fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/subr.ptx
@@ -0,0 +1,148 @@ 
+// BEGIN PREAMBLE
+	.version	3.1
+	.target	sm_30
+	.address_size 64
+// END PREAMBLE
+
+// BEGIN FUNCTION DEF: clock
+.func (.param.u32 %out_retval)clock
+{
+.reg.u32 %retval;
+	.reg.u64 %hr10;
+	.reg.u32 %r22;
+	.reg.u32 %r23;
+	.reg.u32 %r24;
+	.local.align 8 .b8 %frame[8];
+	// #APP 
+// 7 "subr.c" 1
+	mov.u32 %r24, %clock;
+// 0 "" 2
+	// #NO_APP 
+		st.local.u32	[%frame], %r24;
+		ld.local.u32	%r22, [%frame];
+		mov.u32	%r23, %r22;
+		mov.u32	%retval, %r23;
+	st.param.u32	[%out_retval], %retval;
+	ret;
+	}
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay
+.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2)
+{
+	.reg.u64 %ar1;
+	.reg.u64 %ar2;
+	.reg.u64 %hr10;
+	.reg.u64 %r22;
+	.reg.u32 %r23;
+	.reg.u64 %r24;
+	.reg.u64 %r25;
+	.reg.u32 %r26;
+	.reg.u32 %r27;
+	.reg.u32 %r28;
+	.reg.u32 %r29;
+	.reg.u32 %r30;
+	.reg.u64 %r31;
+	.reg.pred %r32;
+	.local.align 8 .b8 %frame[24];
+	ld.param.u64 %ar1, [%in_ar1];
+	ld.param.u64 %ar2, [%in_ar2];
+		mov.u64	%r24, %ar1;
+		st.u64	[%frame+8], %r24;
+		mov.u64	%r25, %ar2;
+		st.local.u64	[%frame+16], %r25;
+	{
+		.param.u32 %retval_in;
+	{
+		call (%retval_in), clock;
+	}
+		ld.param.u32	%r26, [%retval_in];
+}
+		st.local.u32	[%frame+4], %r26;
+		mov.u32	%r27, 0;
+		st.local.u32	[%frame], %r27;
+		bra	$L4;
+$L5:
+	{
+		.param.u32 %retval_in;
+	{
+		call (%retval_in), clock;
+	}
+		ld.param.u32	%r28, [%retval_in];
+}
+		mov.u32	%r23, %r28;
+		ld.local.u32	%r30, [%frame+4];
+		sub.u32	%r29, %r23, %r30;
+		st.local.u32	[%frame], %r29;
+$L4:
+		ld.local.s32	%r22, [%frame];
+		ld.local.u64	%r31, [%frame+16];
+		setp.lo.u64 %r32,%r22,%r31;
+	@%r32	bra	$L5;
+	ret;
+	}
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay2
+.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3)
+{
+	.reg.u64 %ar1;
+	.reg.u64 %ar2;
+	.reg.u64 %ar3;
+	.reg.u64 %hr10;
+	.reg.u64 %r22;
+	.reg.u32 %r23;
+	.reg.u64 %r24;
+	.reg.u64 %r25;
+	.reg.u64 %r26;
+	.reg.u32 %r27;
+	.reg.u32 %r28;
+	.reg.u32 %r29;
+	.reg.u32 %r30;
+	.reg.u32 %r31;
+	.reg.u64 %r32;
+	.reg.pred %r33;
+	.reg.u64 %r34;
+	.reg.u64 %r35;
+	.local.align 8 .b8 %frame[32];
+	ld.param.u64 %ar1, [%in_ar1];
+	ld.param.u64 %ar2, [%in_ar2];
+	ld.param.u64 %ar3, [%in_ar3];
+		mov.u64	%r24, %ar1;
+		st.local.u64	[%frame+8], %r24;
+		mov.u64	%r25, %ar2;
+		st.local.u64	[%frame+16], %r25;
+		mov.u64	%r26, %ar3;
+		st.local.u64	[%frame+24], %r26;
+	{
+		.param.u32 %retval_in;
+	{
+		call (%retval_in), clock;
+	}
+		ld.param.u32	%r27, [%retval_in];
+}
+		st.local.u32	[%frame+4], %r27;
+		mov.u32	%r28, 0;
+		st.local.u32	[%frame], %r28;
+		bra	$L8;
+$L9:
+	{
+		.param.u32 %retval_in;
+	{
+		call (%retval_in), clock;
+	}
+		ld.param.u32	%r29, [%retval_in];
+}
+		mov.u32	%r23, %r29;
+		ld.local.u32	%r31, [%frame+4];
+		sub.u32	%r30, %r23, %r31;
+		st.local.u32	[%frame], %r30;
+$L8:
+		ld.local.s32	%r22, [%frame];
+		ld.local.u64	%r32, [%frame+16];
+		setp.lo.u64 %r33,%r22,%r32;
+	@%r33	bra	$L9;
+		ld.local.u64	%r34, [%frame+8];
+		ld.local.u64	%r35, [%frame+24];
+		st.u64	[%r34], %r35;
+	ret;
+	}
+// END FUNCTION DEF
diff --git a/libgomp/testsuite/libgomp.oacc-c/timer.h b/libgomp/testsuite/libgomp.oacc-c/timer.h
new file mode 100644
index 0000000..53749da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/timer.h
@@ -0,0 +1,103 @@ 
+
+#include <stdio.h>
+#include <cuda.h>
+
+static int _Tnum_timers;
+static CUevent *_Tstart_events, *_Tstop_events;
+static CUstream _Tstream;
+
+void
+init_timers (int ntimers)
+{
+  int i;
+  CUresult r;
+
+  _Tnum_timers = ntimers;
+
+  _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+  _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+
+  r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+      abort ();
+    }
+
+  for (i = 0; i < _Tnum_timers; i++)
+    {
+      r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuEventCreate failed: %d\n", r);
+	  abort ();
+	}
+
+      r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
+      if (r != CUDA_SUCCESS)
+	{
+	  fprintf (stderr, "cuEventCreate failed: %d\n", r);
+	  abort ();
+	}
+    }
+}
+
+void
+fini_timers (void)
+{
+  int i;
+
+  for (i = 0; i < _Tnum_timers; i++)
+    {
+      cuEventDestroy (_Tstart_events[i]);
+      cuEventDestroy (_Tstop_events[i]);
+    }
+
+  cuStreamDestroy (_Tstream);
+
+  free (_Tstart_events);
+  free (_Tstop_events);
+}
+
+void
+start_timer (int timer)
+{
+  CUresult r;
+
+  r = cuEventRecord (_Tstart_events[timer], _Tstream);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventRecord failed: %d\n", r);
+      abort ();
+    }
+}
+
+float
+stop_timer (int timer)
+{
+  CUresult r;
+  float etime;
+
+  r = cuEventRecord (_Tstop_events[timer], _Tstream);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventRecord failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuEventSynchronize (_Tstop_events[timer]);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
+      abort ();
+    }
+
+  r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
+  if (r != CUDA_SUCCESS)
+    {
+      fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
+      abort ();
+    }
+
+  return etime;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/update-1.c b/libgomp/testsuite/libgomp.oacc-c/update-1.c
new file mode 100644
index 0000000..dff139f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/update-1.c
@@ -0,0 +1,280 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+    int N = 8;
+    float *a, *b, *c;
+    float *d_a, *d_b, *d_c;
+    int i;
+
+    a = (float *) malloc (N * sizeof (float));
+    b = (float *) malloc (N * sizeof (float));
+    c = (float *) malloc (N * sizeof (float));
+
+    d_a = (float *) acc_malloc (N * sizeof (float));
+    d_b = (float *) acc_malloc (N * sizeof (float));
+    d_c = (float *) acc_malloc (N * sizeof (float));
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+    acc_map_data (a, d_a, N * sizeof (float));
+    acc_map_data (b, d_b, N * sizeof (float));
+    acc_map_data (c, d_c, N * sizeof (float));
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 1.0;
+    }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 6.0)
+            abort ();
+
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 2.0;
+    }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort ();
+
+        if (b[i] != 7.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 9.0;
+    }
+
+#pragma acc update device (a[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 9.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+    }
+
+#pragma acc update device (a[0:N])
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 6.0;
+    }
+
+#pragma acc update device (a[0:N >> 1])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+    for (i = 0; i < (N >> 1); i++)
+    {
+        if (a[i] != 6.0)
+            abort ();
+
+        if (b[i] != 6.0)
+            abort ();
+    }
+
+    for (i = (N >> 1); i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    if (!acc_is_present (&a[0], (N * sizeof (float))))
+      abort ();
+
+    if (!acc_is_present (&b[0], (N * sizeof (float))))
+      abort ();
+
+    return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
new file mode 100644
index 0000000..52b030b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
@@ -0,0 +1,10 @@ 
+! { dg-shouldfail "" { *-*-* } { "*" } { "" } }
+
+program main
+  implicit none
+
+  !$acc parallel
+  call abort
+  !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
new file mode 100644
index 0000000..2ba2bcb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
@@ -0,0 +1,13 @@ 
+program main
+  implicit none
+
+  integer :: argc
+  argc = command_argument_count ()
+
+  !$acc parallel copyin(argc)
+  if (argc .ne. 0) then
+     call abort
+  end if
+  !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
index c4597a6..4488818 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
@@ -1,5 +1,4 @@ 
-! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness.
-! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" }
+! { dg-additional-options "-cpp" }
 ! TODO: Have to disable the acc_on_device builtin for we want to test the
 ! libgomp library function?  The command line option
 ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for
@@ -12,7 +11,9 @@  implicit none
 
 if (.not. acc_on_device (acc_device_none)) call abort
 if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_host_nonshm)) call abort
 if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
 
 
 ! Host via offloading fallback mode.
@@ -20,7 +21,9 @@  if (acc_on_device (acc_device_not_host)) call abort
 !$acc parallel if(.false.)
 if (.not. acc_on_device (acc_device_none)) call abort
 if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_host_nonshm)) call abort
 if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
 !$acc end parallel
 
 
@@ -31,7 +34,17 @@  if (acc_on_device (acc_device_not_host)) call abort
 !$acc parallel
 if (acc_on_device (acc_device_none)) call abort
 if (acc_on_device (acc_device_host)) call abort
+#if ACC_DEVICE_TYPE_host_nonshm
+if (.not. acc_on_device (acc_device_host_nonshm)) call abort
+#else
+if (acc_on_device (acc_device_host_nonshm)) call abort
+#endif
 if (.not. acc_on_device (acc_device_not_host)) call abort
+#if ACC_DEVICE_TYPE_nvidia
+if (.not. acc_on_device (acc_device_nvidia)) call abort
+#else
+if (acc_on_device (acc_device_nvidia)) call abort
+#endif
 !$acc end parallel
 
 #endif
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
index 3787e1e..0047a19 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
@@ -1,5 +1,4 @@ 
-! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness.
-! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" }
+! { dg-additional-options "-cpp" }
 ! TODO: Have to disable the acc_on_device builtin for we want to test
 ! the libgomp library function?  The command line option
 ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
@@ -12,7 +11,9 @@ 
 
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
 
 
 !Host via offloading fallback mode.
@@ -20,7 +21,9 @@ 
 !$ACC PARALLEL IF(.FALSE.)
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
 !$ACC END PARALLEL
 
 
@@ -31,7 +34,17 @@ 
 !$ACC PARALLEL
       IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_host_nonshm
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#endif
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
 !$ACC END PARALLEL
 
 #endif
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
index 1ee5926..49d7a72 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
@@ -1,5 +1,4 @@ 
-! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness.
-! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" }
+! { dg-additional-options "-cpp" }
 ! TODO: Have to disable the acc_on_device builtin for we want to test
 ! the libgomp library function?  The command line option
 ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
@@ -12,7 +11,9 @@ 
 
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
 
 
 !Host via offloading fallback mode.
@@ -20,7 +21,9 @@ 
 !$ACC PARALLEL IF(.FALSE.)
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
 !$ACC END PARALLEL
 
 
@@ -31,7 +34,17 @@ 
 !$ACC PARALLEL
       IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
       IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_host_nonshm
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#endif
       IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+      IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+      IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
 !$ACC END PARALLEL
 
 #endif
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
index cd0ab26..312f947 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -21,7 +21,8 @@  set quadmath_library_path "../libquadmath/.libs"
 dg-init
 
 # Turn on OpenACC.
-lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+# XXX (TEMPORARY): Remove the -flto once that's properly integrated.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto"
 
 if { $blddir != "" } {
     set lang_source_re {^.*\.[fF](|90|95|03|08)$}
@@ -65,10 +66,41 @@  if { $lang_test_file_found } {
     append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
     set_ld_library_path_env_vars
 
-    # For Fortran we're doing torture testing, as Fortran has far more tests
-    # with arrays etc. that testing just -O0 or -O2 is insufficient, that is
-    # typically not the case for C/C++.
-    gfortran-dg-runtest $tests "" ""
+    # Todo: get list of accelerators from configure options --enable-accelerator.
+    set accels { "nvidia" "host_nonshm" }
+
+    # Run on host (or fallback) accelerator.
+    lappend accels "host"
+
+    # Test OpenACC with available accelerators.
+    foreach accel $accels {
+	set tagopt "-DACC_DEVICE_TYPE_$accel=1"
+
+	# Todo: Determine shared memory or not using run-time test.
+	switch $accel {
+	    host {
+		set acc_mem_shared 1
+	    }
+	    host_nonshm {
+		set acc_mem_shared 0
+	    }
+	    nvidia {
+		set acc_mem_shared 0
+	    }
+	    default {
+		set acc_mem_shared 0
+	    }
+	}
+	set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+	# Todo: Verify that this works for both local and remote testing.
+	setenv ACC_DEVICE_TYPE $accel
+
+	# For Fortran we're doing torture testing, as Fortran has far more tests
+	# with arrays etc. that testing just -O0 or -O2 is insufficient, that is
+	# typically not the case for C/C++.
+	gfortran-dg-runtest $tests "$tagopt" ""
+    }
 }
 
 # All done.
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
index 124aa87..51dc452 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
@@ -1,3 +1,13 @@ 
 use openacc
 
+if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+call acc_set_device_type (acc_device_host)
+if (acc_get_device_type () .ne. acc_device_host) call abort
+call acc_set_device_num (0, acc_device_host)
+if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+call acc_shutdown (acc_device_host)
+
+call acc_init (acc_device_host)
+call acc_shutdown (acc_device_host)
+
 end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
new file mode 100644
index 0000000..a54d6a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
@@ -0,0 +1,82 @@ 
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
new file mode 100644
index 0000000..a54d6a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90
@@ -0,0 +1,82 @@ 
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
index 64beb9e..a9d70b2 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
@@ -1,3 +1,13 @@ 
       USE OPENACC
 
+      IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+      CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+      CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      CALL ACC_INIT (ACC_DEVICE_HOST)
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
       END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
index 3f9940b..56d2cd2 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
@@ -1,3 +1,13 @@ 
       INCLUDE "openacc_lib.h"
 
+      IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+      CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+      CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+      IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+      CALL ACC_INIT (ACC_DEVICE_HOST)
+      CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
       END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
new file mode 100644
index 0000000..3a2b661
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
@@ -0,0 +1,35 @@ 
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+  if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+  call acc_init (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  call acc_set_device_type (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_host)
+
+  if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+  if (.NOT. acc_async_test (n) ) call abort
+
+  call acc_wait (n)
+
+  call acc_wait_all ()
+
+  call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
new file mode 100644
index 0000000..e68eb89
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
@@ -0,0 +1,31 @@ 
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_nvidia)
+
+  if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+  if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+    n = 1
+
+    call acc_set_device_num (n, acc_device_nvidia)
+
+    if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+  end if
+
+  call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
new file mode 100644
index 0000000..401ad66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
@@ -0,0 +1,35 @@ 
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+  if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+  call acc_init (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  call acc_set_device_type (acc_device_host)
+
+  if (acc_get_device_type () .ne. acc_device_host) call abort
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_host)
+
+  if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+  if (.NOT. acc_async_test (n) ) call abort
+
+  call acc_wait (n)
+
+  call acc_wait_all ()
+
+  call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
new file mode 100644
index 0000000..422df53
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
@@ -0,0 +1,31 @@ 
+! { dg-do run }
+
+program main
+  implicit none
+  include "openacc_lib.h"
+
+  integer n
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  n = 0
+
+  call acc_set_device_num (n, acc_device_nvidia)
+
+  if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+  if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+    n = 1
+
+    call acc_set_device_num (n, acc_device_nvidia)
+
+    if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+  end if
+
+  call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
new file mode 100644
index 0000000..ad758b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
@@ -0,0 +1,83 @@ 
+! { dg-do run }
+
+program main
+  use openacc
+  use iso_c_binding
+  implicit none
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
new file mode 100644
index 0000000..ad758b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90
@@ -0,0 +1,83 @@ 
+! { dg-do run }
+
+program main
+  use openacc
+  use iso_c_binding
+  implicit none
+
+  integer, target :: a_3d_i(10, 10, 10)
+  complex a_3d_c(10, 10, 10)
+  real a_3d_r(10, 10, 10)
+
+  integer i, j, k
+  complex c
+  real r
+  integer, parameter :: i_size = sizeof (i)
+  integer, parameter :: c_size = sizeof (c)
+  integer, parameter :: r_size = sizeof (r)
+
+  if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+  call acc_init (acc_device_nvidia)
+
+  call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+  call acc_copyin (a_3d_i)
+  call acc_copyin (a_3d_c)
+  call acc_copyin (a_3d_r)
+
+  if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+  if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+  do i = 1, 10
+    do j = 1, 10
+      do k = 1, 10
+        if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+        if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+      end do
+    end do
+  end do
+
+  call acc_shutdown (acc_device_nvidia)
+
+contains
+
+  subroutine set3d (clear, a_i, a_c, a_r)
+  logical clear
+  integer, dimension (:,:,:), intent (inout) :: a_i
+  complex, dimension (:,:,:), intent (inout) :: a_c
+  real, dimension (:,:,:), intent (inout) :: a_r
+
+  integer i, j, k
+  integer lb1, ub1, lb2, ub2, lb3, ub3
+
+  lb1 = lbound (a_i, 1)
+  ub1 = ubound (a_i, 1)
+
+  lb2 = lbound (a_i, 2)
+  ub2 = ubound (a_i, 2)
+
+  lb3 = lbound (a_i, 3)
+  ub3 = ubound (a_i, 3)
+
+  do i = lb1, ub1
+    do j = lb2, ub2
+      do k = lb3, ub3
+        if (clear) then
+          a_i(i, j, k) = 0
+          a_c(i, j, k) = cmplx (0.0, 0.0)
+          a_r(i, j, k) = 0.0
+        else
+          a_i(i, j, k) = i
+          a_c(i, j, k) = cmplx (i, j)
+          a_r(i, j, k) = i
+        end if
+      end do
+    end do
+  end do
+
+  end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
new file mode 100644
index 0000000..082dd8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
@@ -0,0 +1,97 @@ 
+program map
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a) copyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc parallel pcopy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a) pcopyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
new file mode 100644
index 0000000..a5e1fcb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
@@ -0,0 +1,21 @@ 
+! PR middle-end/63247
+
+program test
+  implicit none
+
+  integer(kind=2) a(4)
+
+  a = 10;
+
+  !$acc parallel copy(a(2:4))
+  a(2) = 52
+  a(3) = 53
+  a(4) = 54
+  !$acc end parallel
+
+  if (a(1) .ne. 10) call abort
+  if (a(2) .ne. 52) call abort
+  if (a(3) .ne. 53) call abort
+  if (a(4) .ne. 54) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
new file mode 100644
index 0000000..1a1d4c7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
@@ -0,0 +1,229 @@ 
+! { dg-do run }
+
+program test
+  implicit none
+  integer, allocatable :: a1(:)
+  integer, allocatable :: b1(:)
+  integer, allocatable :: c1(:)
+  integer, allocatable :: b2(:,:)
+  integer, allocatable :: c3(:,:,:)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(1:5))
+  a1(1) = 1
+  a1(2) = 2
+  a1(3) = 3
+  a1(4) = 4
+  a1(5) = 5
+  !$acc end parallel
+
+  if (a1(1) .ne. 1) call abort
+  if (a1(2) .ne. 2) call abort
+  if (a1(3) .ne. 3) call abort
+  if (a1(4) .ne. 4) call abort
+  if (a1(5) .ne. 5) call abort
+
+  deallocate(a1)
+
+  allocate (a1(0:4))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(0:4))
+  a1(0) = 1
+  a1(1) = 2
+  a1(2) = 3
+  a1(3) = 4
+  a1(4) = 5
+  !$acc end parallel
+
+  if (a1(0) .ne. 1) call abort
+  if (a1(1) .ne. 2) call abort
+  if (a1(2) .ne. 3) call abort
+  if (a1(3) .ne. 4) call abort
+  if (a1(4) .ne. 5) call abort
+
+  deallocate(a1)
+
+  allocate (b2(5,5))
+  if (.not.allocated (b2)) call abort()
+
+  b2 = 11
+
+  !$acc parallel copy(b2(1:5,1:5))
+  b2(1,1) = 1
+  b2(2,2) = 2
+  b2(3,3) = 3
+  b2(4,4) = 4
+  b2(5,5) = 5
+  !$acc end parallel
+
+  if (b2(1,1) .ne. 1) call abort
+  if (b2(2,2) .ne. 2) call abort
+  if (b2(3,3) .ne. 3) call abort
+  if (b2(4,4) .ne. 4) call abort
+  if (b2(5,5) .ne. 5) call abort
+
+  deallocate(b2)
+
+  allocate (b2(0:4,0:4))
+  if (.not.allocated (b2)) call abort()
+
+  b2 = 11
+
+  !$acc parallel copy(b2(0:4,0:4))
+  b2(0,0) = 1
+  b2(1,1) = 2
+  b2(2,2) = 3
+  b2(3,3) = 4
+  b2(4,4) = 5
+  !$acc end parallel
+
+  if (b2(0,0) .ne. 1) call abort
+  if (b2(1,1) .ne. 2) call abort
+  if (b2(2,2) .ne. 3) call abort
+  if (b2(3,3) .ne. 4) call abort
+  if (b2(4,4) .ne. 5) call abort
+
+  deallocate(b2)
+
+  allocate (c3(5,5,5))
+  if (.not.allocated (c3)) call abort()
+
+  c3 = 12
+
+  !$acc parallel copy(c3(1:5,1:5,1:5))
+  c3(1,1,1) = 1
+  c3(2,2,2) = 2
+  c3(3,3,3) = 3
+  c3(4,4,4) = 4
+  c3(5,5,5) = 5
+  !$acc end parallel
+
+  if (c3(1,1,1) .ne. 1) call abort
+  if (c3(2,2,2) .ne. 2) call abort
+  if (c3(3,3,3) .ne. 3) call abort
+  if (c3(4,4,4) .ne. 4) call abort
+  if (c3(5,5,5) .ne. 5) call abort
+
+  deallocate(c3)
+
+  allocate (c3(0:4,0:4,0:4))
+  if (.not.allocated (c3)) call abort()
+
+  c3 = 12
+
+  !$acc parallel copy(c3(0:4,0:4,0:4))
+  c3(0,0,0) = 1
+  c3(1,1,1) = 2
+  c3(2,2,2) = 3
+  c3(3,3,3) = 4
+  c3(4,4,4) = 5
+  !$acc end parallel
+
+  if (c3(0,0,0) .ne. 1) call abort
+  if (c3(1,1,1) .ne. 2) call abort
+  if (c3(2,2,2) .ne. 3) call abort
+  if (c3(3,3,3) .ne. 4) call abort
+  if (c3(4,4,4) .ne. 5) call abort
+
+  deallocate(c3)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  allocate (b1(5))
+  if (.not.allocated (b1)) call abort()
+
+  allocate (c1(5))
+  if (.not.allocated (c1)) call abort()
+
+  a1 = 10
+  b1 = 3
+  c1 = 7
+   
+  !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5))
+  c1(1) = a1(1)
+  c1(2) = a1(2)
+  c1(3) = a1(3)
+  c1(4) = a1(4)
+  c1(5) = a1(5)
+
+  b1(1) = c1(1)
+  b1(2) = c1(2)
+  b1(3) = c1(3)
+  b1(4) = c1(4)
+  b1(5) = c1(5)
+  !$acc end parallel
+
+  if (b1(1) .ne. 10) call abort
+  if (b1(2) .ne. 10) call abort
+  if (b1(3) .ne. 10) call abort
+  if (b1(4) .ne. 10) call abort
+  if (b1(5) .ne. 10) call abort
+
+  deallocate(a1)
+  deallocate(b1)
+  deallocate(c1)
+
+  allocate (a1(0:4))
+  if (.not.allocated (a1)) call abort()
+
+  allocate (b1(0:4))
+  if (.not.allocated (b1)) call abort()
+
+  allocate (c1(0:4))
+  if (.not.allocated (c1)) call abort()
+
+  a1 = 10
+  b1 = 3
+  c1 = 7
+   
+  !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4))
+  c1(0) = a1(0)
+  c1(1) = a1(1)
+  c1(2) = a1(2)
+  c1(3) = a1(3)
+  c1(4) = a1(4)
+
+  b1(0) = c1(0)
+  b1(1) = c1(1)
+  b1(2) = c1(2)
+  b1(3) = c1(3)
+  b1(4) = c1(4)
+  !$acc end parallel
+
+  if (b1(0) .ne. 10) call abort
+  if (b1(1) .ne. 10) call abort
+  if (b1(2) .ne. 10) call abort
+  if (b1(3) .ne. 10) call abort
+  if (b1(4) .ne. 10) call abort
+
+  deallocate(a1)
+  deallocate(b1)
+  deallocate(c1)
+
+  allocate (a1(5))
+  if (.not.allocated (a1)) call abort()
+
+  a1 = 10
+
+  !$acc parallel copy(a1(2:3))
+  a1(2) = 2
+  a1(3) = 3
+  !$acc end parallel
+
+  if (a1(1) .ne. 10) call abort
+  if (a1(2) .ne. 2) call abort
+  if (a1(3) .ne. 3) call abort
+  if (a1(4) .ne. 10) call abort
+  if (a1(5) .ne. 10) call abort
+
+  deallocate(a1)
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
new file mode 100644
index 0000000..b39414f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
@@ -0,0 +1,97 @@ 
+program subarrays
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(1:n)) copyin (b(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc parallel pcopy (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n))
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
new file mode 100644
index 0000000..81799f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
@@ -0,0 +1,100 @@ 
+program subarrays
+  integer, parameter     :: n = 20, c = 10, low = 5, high = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = low, high
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = low, high
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a(low:high)) copyin (b(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = b(i)
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  a(:) = 0
+  
+  !$acc parallel pcopy (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc parallel pcopyout (a(low:high)) &
+  !$acc & pcopyin (b(low:high))
+  !$acc loop
+  do i = low, high
+     a(i) = b(i)
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
-- 
1.7.10.4