diff mbox

[HSA] Load an HSA runtime via dlopen mechanism

Message ID 5715EB44.1030903@suse.cz
State New
Headers show

Commit Message

Martin Liška April 19, 2016, 8:24 a.m. UTC
Hello.

After brief discussions about packaging of an HSA runtime, we've decided to load
an HSA runtime via dlopen mechanism. Following patch introduces necessary header
files and all functions within the HSA plug-in are loaded via dlsym.

Patch survives HSA regression tests, installed to the HSA branch as r235189.

Thanks,
Martin
diff mbox

Patch

From c93babc050cc31e1d370240568414dfa0f02f5d8 Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Thu, 14 Apr 2016 14:25:58 +0200
Subject: [PATCH] Load an HSA runtime via dlopen mechanism

gcc/ChangeLog:

2016-04-19  Martin Liska  <mliska@suse.cz>

	* doc/install.texi: Remove entry about --with-hsa-kmt-lib.

libgomp/ChangeLog:

2016-04-19  Martin Liska  <mliska@suse.cz>

	* config.h.in: Introduce HSA_RUNTIME_LIB.
	* configure: Regerenated.
	* hsa.h: New file.
	* hsa_ext_finalize.h: New file.
	* plugin/configfrag.ac: Remove hsa-kmt-lib test.
	* plugin/plugin-hsa.c (struct hsa_runtime_fn_info): New
	structure.
	(init_enviroment_variables): Load newly introduced ENV
	variables.
	(hsa_warn): Call a function via hsa_fns data structure.
	(hsa_fatal): Likewise.
	(init_hsa_runtime_functions): Likewise.
	(suitable_hsa_agent_p): Likewise.
	(init_hsa_context): Likewise.
	(get_kernarg_memory_region): Likewise.
	(GOMP_OFFLOAD_init_device): Likewise.
	(destroy_hsa_program): Likewise.
	(create_and_finalize_hsa_program): Likewise.
	(create_single_kernel_dispatch): Likewise.
	(release_kernel_dispatch): Likewise.
	(init_single_kernel): Likewise.
	(GOMP_OFFLOAD_run): Likewise.
	(GOMP_OFFLOAD_fini_device): Likewise.
	* testsuite/lib/libgomp.exp: Remove hsa_kmt_lib support.
	* testsuite/libgomp-test-support.exp.in: Likewise.
---
 gcc/doc/install.texi                          |   6 -
 libgomp/config.h.in                           |   3 +
 libgomp/configure                             |  52 +--
 libgomp/hsa.h                                 | 630 ++++++++++++++++++++++++++
 libgomp/hsa_ext_finalize.h                    | 265 +++++++++++
 libgomp/plugin/configfrag.ac                  |  28 +-
 libgomp/plugin/plugin-hsa.c                   | 312 ++++++++++---
 libgomp/testsuite/lib/libgomp.exp             |   4 -
 libgomp/testsuite/libgomp-test-support.exp.in |   1 -
 9 files changed, 1161 insertions(+), 140 deletions(-)
 create mode 100644 libgomp/hsa.h
 create mode 100644 libgomp/hsa_ext_finalize.h

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 4268036..644f9dd 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -2012,12 +2012,6 @@  explicitly specify the directory where they are installed.  The
 shorthand for
 @option{--with-hsa-runtime-lib=@/@var{hsainstalldir}/lib} and
 @option{--with-hsa-runtime-include=@/@var{hsainstalldir}/include}.
-
-@item --with-hsa-kmt-lib=@var{pathname}
-
-If you configure GCC with HSA offloading but do not have the HSA
-KMT library installed in a standard location then you can
-explicitly specify the directory where it resides.
 @end table
 
 @subheading Cross-Compiler-Specific Options
diff --git a/libgomp/config.h.in b/libgomp/config.h.in
index 226ac53..4483a84 100644
--- a/libgomp/config.h.in
+++ b/libgomp/config.h.in
@@ -125,6 +125,9 @@ 
 /* Define to 1 if the HSA plugin is built, 0 if not. */
 #undef PLUGIN_HSA
 
+/* Define path to HSA runtime.  */
+#undef HSA_RUNTIME_LIB
+
 /* Define to 1 if the NVIDIA plugin is built, 0 if not. */
 #undef PLUGIN_NVPTX
 
diff --git a/libgomp/configure b/libgomp/configure
index 8d03eb6..9a09369 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -637,7 +637,6 @@  PLUGIN_HSA_LIBS
 PLUGIN_HSA_LDFLAGS
 PLUGIN_HSA_CPPFLAGS
 PLUGIN_HSA
-HSA_KMT_LIB
 HSA_RUNTIME_LIB
 HSA_RUNTIME_INCLUDE
 PLUGIN_NVPTX_LIBS
@@ -794,7 +793,6 @@  with_cuda_driver_lib
 with_hsa_runtime
 with_hsa_runtime_include
 with_hsa_runtime_lib
-with_hsa_kmt_lib
 enable_linux_futex
 enable_tls
 enable_symvers
@@ -1476,7 +1474,6 @@  Optional Packages:
   --with-hsa-runtime-lib=PATH
                           specify directory for the installed HSA run-time
                           library
-  --with-hsa-kmt-lib=PATH specify directory for installed HSA KMT library.
 
 Some influential environment variables:
   CC          C compiler command
@@ -11145,7 +11142,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11148 "configure"
+#line 11145 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11251,7 +11248,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11254 "configure"
+#line 11251 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -15293,22 +15290,6 @@  if test "x$HSA_RUNTIME_LIB" != x; then
   HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
 fi
 
-HSA_KMT_LIB=
-
-HSA_KMT_LDFLAGS=
-
-# Check whether --with-hsa-kmt-lib was given.
-if test "${with_hsa_kmt_lib+set}" = set; then :
-  withval=$with_hsa_kmt_lib;
-fi
-
-if test "x$with_hsa_kmt_lib" != x; then
-  HSA_KMT_LIB=$with_hsa_kmt_lib
-fi
-if test "x$HSA_KMT_LIB" != x; then
-  HSA_KMT_LDFLAGS=-L$HSA_KMT_LIB
-fi
-
 PLUGIN_HSA=0
 PLUGIN_HSA_CPPFLAGS=
 PLUGIN_HSA_LDFLAGS=
@@ -15318,8 +15299,6 @@  PLUGIN_HSA_LIBS=
 
 
 
-
-
 # Get offload targets and path to install tree of offloading compiler.
 offload_additional_options=
 offload_additional_lib_paths=
@@ -15384,8 +15363,8 @@  rm -f core conftest.err conftest.$ac_objext \
 	        tgt_name=hsa
 	        PLUGIN_HSA=$tgt
 	        PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
-	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS $HSA_KMT_LDFLAGS"
-	        PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+	        PLUGIN_HSA_LIBS="-ldl"
 
 	        PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
 	        CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
@@ -15394,22 +15373,7 @@  rm -f core conftest.err conftest.$ac_objext \
 	        PLUGIN_HSA_save_LIBS=$LIBS
 	        LIBS="$PLUGIN_HSA_LIBS $LIBS"
 
-	        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include "hsa.h"
-int
-main ()
-{
-hsa_status_t status = hsa_init ()
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  PLUGIN_HSA=1
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
+	        PLUGIN_HSA=1
 	        CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
 	        LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
 	        LIBS=$PLUGIN_HSA_save_LIBS
@@ -15485,6 +15449,12 @@  _ACEOF
 
 
 
+cat >>confdefs.h <<_ACEOF
+#define HSA_RUNTIME_LIB "$HSA_RUNTIME_LIB/"
+_ACEOF
+
+
+
 # Check for functions needed.
 for ac_func in getloadavg clock_gettime strtoull
 do :
diff --git a/libgomp/hsa.h b/libgomp/hsa.h
new file mode 100644
index 0000000..6765751
--- /dev/null
+++ b/libgomp/hsa.h
@@ -0,0 +1,630 @@ 
+/* HSA runtime API 1.0.1 representation description.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+The contents of the file was created by extracting data structures, enum,
+typedef and other definitions from HSA Runtime Programmer’s Reference Manual
+Version 1.0 (http://www.hsafoundation.com/standards/).
+
+HTML version is provided on the following link:
+http://www.hsafoundation.com/html/Content/Runtime/Topics/Runtime_title_page.htm
+*/
+
+#ifndef _HSA_H
+#define _HSA_H 1
+
+#define HSA_LARGE_MODEL 1
+
+typedef struct hsa_signal_s { uint64_t handle; } hsa_signal_t;
+typedef enum {
+  HSA_QUEUE_TYPE_MULTI = 0,
+  HSA_QUEUE_TYPE_SINGLE = 1
+} hsa_queue_type_t;
+
+typedef enum { HSA_PROFILE_BASE = 0, HSA_PROFILE_FULL = 1 } hsa_profile_t;
+typedef struct hsa_region_s { uint64_t handle; } hsa_region_t;
+typedef enum {
+  HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
+  HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
+  HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
+  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
+  HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
+  HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
+  HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
+  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_executable_symbol_info_t;
+typedef enum {
+  HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
+  HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
+  HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4
+} hsa_region_global_flag_t;
+typedef struct hsa_code_object_s { uint64_t handle; } hsa_code_object_t;
+typedef enum {
+  HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
+} hsa_kernel_dispatch_packet_setup_width_t;
+typedef enum {
+  HSA_DEVICE_TYPE_CPU = 0,
+  HSA_DEVICE_TYPE_GPU = 1,
+  HSA_DEVICE_TYPE_DSP = 2
+} hsa_device_type_t;
+typedef enum {
+  HSA_STATUS_SUCCESS = 0x0,
+  HSA_STATUS_INFO_BREAK = 0x1,
+  HSA_STATUS_ERROR = 0x1000,
+  HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
+  HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
+  HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
+  HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
+  HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
+  HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
+  HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
+  HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
+  HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
+  HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
+  HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
+  HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
+  HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
+  HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
+  HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
+  HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
+  HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
+  HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
+  HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
+  HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
+  HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
+  HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
+  HSA_STATUS_ERROR_EXCEPTION = 0x1016
+} hsa_status_t;
+typedef enum {
+  HSA_EXTENSION_FINALIZER = 0,
+  HSA_EXTENSION_IMAGES = 1
+} hsa_extension_t;
+typedef struct hsa_queue_s {
+  hsa_queue_type_t type;
+  uint32_t features;
+
+#ifdef HSA_LARGE_MODEL
+  void *base_address;
+#elif defined HSA_LITTLE_ENDIAN
+  void *base_address;
+  uint32_t reserved0;
+#else
+  uint32_t reserved0;
+  void *base_address;
+#endif
+
+  hsa_signal_t doorbell_signal;
+  uint32_t size;
+  uint32_t reserved1;
+  uint64_t id;
+} hsa_queue_t;
+typedef struct hsa_agent_dispatch_packet_s {
+  uint16_t header;
+  uint16_t type;
+  uint32_t reserved0;
+
+#ifdef HSA_LARGE_MODEL
+  void *return_address;
+#elif defined HSA_LITTLE_ENDIAN
+  void *return_address;
+  uint32_t reserved1;
+#else
+  uint32_t reserved1;
+  void *return_address;
+#endif
+  uint64_t arg[4];
+  uint64_t reserved2;
+  hsa_signal_t completion_signal;
+} hsa_agent_dispatch_packet_t;
+typedef enum {
+  HSA_CODE_SYMBOL_INFO_TYPE = 0,
+  HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
+  HSA_CODE_SYMBOL_INFO_NAME = 2,
+  HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+  HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
+  HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
+  HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
+  HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+  HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+  HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+  HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+  HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+  HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+  HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+  HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+  HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_code_symbol_info_t;
+typedef enum {
+  HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,
+  HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
+} hsa_queue_feature_t;
+typedef enum {
+  HSA_VARIABLE_ALLOCATION_AGENT = 0,
+  HSA_VARIABLE_ALLOCATION_PROGRAM = 1
+} hsa_variable_allocation_t;
+typedef enum {
+  HSA_FENCE_SCOPE_NONE = 0,
+  HSA_FENCE_SCOPE_AGENT = 1,
+  HSA_FENCE_SCOPE_SYSTEM = 2
+} hsa_fence_scope_t;
+typedef struct hsa_agent_s { uint64_t handle; } hsa_agent_t;
+typedef enum { HSA_CODE_OBJECT_TYPE_PROGRAM = 0 } hsa_code_object_type_t;
+typedef enum {
+  HSA_SIGNAL_CONDITION_EQ = 0,
+  HSA_SIGNAL_CONDITION_NE = 1,
+  HSA_SIGNAL_CONDITION_LT = 2,
+  HSA_SIGNAL_CONDITION_GTE = 3
+} hsa_signal_condition_t;
+typedef enum {
+  HSA_EXECUTABLE_STATE_UNFROZEN = 0,
+  HSA_EXECUTABLE_STATE_FROZEN = 1
+} hsa_executable_state_t;
+typedef enum {
+  HSA_ENDIANNESS_LITTLE = 0,
+  HSA_ENDIANNESS_BIG = 1
+} hsa_endianness_t;
+typedef enum {
+  HSA_MACHINE_MODEL_SMALL = 0,
+  HSA_MACHINE_MODEL_LARGE = 1
+} hsa_machine_model_t;
+typedef enum {
+  HSA_AGENT_INFO_NAME = 0,
+  HSA_AGENT_INFO_VENDOR_NAME = 1,
+  HSA_AGENT_INFO_FEATURE = 2,
+  HSA_AGENT_INFO_MACHINE_MODEL = 3,
+  HSA_AGENT_INFO_PROFILE = 4,
+  HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
+  HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
+  HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
+  HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
+  HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
+  HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
+  HSA_AGENT_INFO_GRID_MAX_DIM = 9,
+  HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
+  HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
+  HSA_AGENT_INFO_QUEUES_MAX = 12,
+  HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
+  HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
+  HSA_AGENT_INFO_QUEUE_TYPE = 15,
+  HSA_AGENT_INFO_NODE = 16,
+  HSA_AGENT_INFO_DEVICE = 17,
+  HSA_AGENT_INFO_CACHE_SIZE = 18,
+  HSA_AGENT_INFO_ISA = 19,
+  HSA_AGENT_INFO_EXTENSIONS = 20,
+  HSA_AGENT_INFO_VERSION_MAJOR = 21,
+  HSA_AGENT_INFO_VERSION_MINOR = 22
+} hsa_agent_info_t;
+typedef struct hsa_barrier_and_packet_s {
+  uint16_t header;
+  uint16_t reserved0;
+  uint32_t reserved1;
+  hsa_signal_t dep_signal[5];
+  uint64_t reserved2;
+  hsa_signal_t completion_signal;
+} hsa_barrier_and_packet_t;
+typedef struct hsa_dim3_s {
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+} hsa_dim3_t;
+typedef enum {
+  HSA_ACCESS_PERMISSION_RO = 1,
+  HSA_ACCESS_PERMISSION_WO = 2,
+  HSA_ACCESS_PERMISSION_RW = 3
+} hsa_access_permission_t;
+typedef enum {
+  HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
+  HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
+} hsa_agent_feature_t;
+typedef enum {
+  HSA_WAIT_STATE_BLOCKED = 0,
+  HSA_WAIT_STATE_ACTIVE = 1
+} hsa_wait_state_t;
+typedef struct hsa_executable_s { uint64_t handle; } hsa_executable_t;
+typedef enum {
+  HSA_REGION_SEGMENT_GLOBAL = 0,
+  HSA_REGION_SEGMENT_READONLY = 1,
+  HSA_REGION_SEGMENT_PRIVATE = 2,
+  HSA_REGION_SEGMENT_GROUP = 3
+} hsa_region_segment_t;
+typedef enum {
+  HSA_REGION_INFO_SEGMENT = 0,
+  HSA_REGION_INFO_GLOBAL_FLAGS = 1,
+  HSA_REGION_INFO_SIZE = 2,
+  HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
+  HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
+  HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
+  HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7
+} hsa_region_info_t;
+typedef enum {
+  HSA_ISA_INFO_NAME_LENGTH = 0,
+  HSA_ISA_INFO_NAME = 1,
+  HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
+  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
+  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4
+} hsa_isa_info_t;
+typedef enum {
+  HSA_VARIABLE_SEGMENT_GLOBAL = 0,
+  HSA_VARIABLE_SEGMENT_READONLY = 1
+} hsa_variable_segment_t;
+typedef struct hsa_callback_data_s { uint64_t handle; } hsa_callback_data_t;
+typedef enum {
+  HSA_SYMBOL_KIND_VARIABLE = 0,
+  HSA_SYMBOL_KIND_KERNEL = 1,
+  HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
+} hsa_symbol_kind_t;
+typedef struct hsa_kernel_dispatch_packet_s {
+  uint16_t header;
+  uint16_t setup;
+  uint16_t workgroup_size_x;
+  uint16_t workgroup_size_y;
+  uint16_t workgroup_size_z;
+  uint16_t reserved0;
+  uint32_t grid_size_x;
+  uint32_t grid_size_y;
+  uint32_t grid_size_z;
+  uint32_t private_segment_size;
+  uint32_t group_segment_size;
+  uint64_t kernel_object;
+
+#ifdef HSA_LARGE_MODEL
+  void *kernarg_address;
+#elif defined HSA_LITTLE_ENDIAN
+  void *kernarg_address;
+  uint32_t reserved1;
+#else
+  uint32_t reserved1;
+  void *kernarg_address;
+#endif
+  uint64_t reserved2;
+  hsa_signal_t completion_signal;
+} hsa_kernel_dispatch_packet_t;
+typedef enum {
+  HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
+  HSA_PACKET_TYPE_INVALID = 1,
+  HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
+  HSA_PACKET_TYPE_BARRIER_AND = 3,
+  HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
+  HSA_PACKET_TYPE_BARRIER_OR = 5
+} hsa_packet_type_t;
+typedef enum {
+  HSA_PACKET_HEADER_TYPE = 0,
+  HSA_PACKET_HEADER_BARRIER = 8,
+  HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
+  HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
+} hsa_packet_header_t;
+typedef struct hsa_isa_s { uint64_t handle; } hsa_isa_t;
+typedef enum {
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
+} hsa_default_float_rounding_mode_t;
+typedef struct hsa_code_symbol_s { uint64_t handle; } hsa_code_symbol_t;
+typedef struct hsa_executable_symbol_s {
+  uint64_t handle;
+} hsa_executable_symbol_t;
+#ifdef HSA_LARGE_MODEL
+typedef int64_t hsa_signal_value_t;
+#else
+typedef int32_t hsa_signal_value_t;
+#endif
+typedef enum {
+  HSA_EXCEPTION_POLICY_BREAK = 1,
+  HSA_EXCEPTION_POLICY_DETECT = 2
+} hsa_exception_policy_t;
+typedef enum {
+  HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
+  HSA_SYSTEM_INFO_VERSION_MINOR = 1,
+  HSA_SYSTEM_INFO_TIMESTAMP = 2,
+  HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
+  HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
+  HSA_SYSTEM_INFO_ENDIANNESS = 5,
+  HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
+  HSA_SYSTEM_INFO_EXTENSIONS = 7
+} hsa_system_info_t;
+typedef enum {
+  HSA_EXECUTABLE_INFO_PROFILE = 1,
+  HSA_EXECUTABLE_INFO_STATE = 2
+} hsa_executable_info_t;
+typedef enum {
+  HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
+} hsa_kernel_dispatch_packet_setup_t;
+typedef enum {
+  HSA_PACKET_HEADER_WIDTH_TYPE = 8,
+  HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
+  HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
+  HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
+} hsa_packet_header_width_t;
+typedef enum {
+  HSA_CODE_OBJECT_INFO_VERSION = 0,
+  HSA_CODE_OBJECT_INFO_TYPE = 1,
+  HSA_CODE_OBJECT_INFO_ISA = 2,
+  HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
+  HSA_CODE_OBJECT_INFO_PROFILE = 4,
+  HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
+} hsa_code_object_info_t;
+typedef struct hsa_barrier_or_packet_s {
+  uint16_t header;
+  uint16_t reserved0;
+  uint32_t reserved1;
+  hsa_signal_t dep_signal[5];
+  uint64_t reserved2;
+  hsa_signal_t completion_signal;
+} hsa_barrier_or_packet_t;
+typedef enum {
+  HSA_SYMBOL_KIND_LINKAGE_MODULE = 0,
+  HSA_SYMBOL_KIND_LINKAGE_PROGRAM = 1,
+} hsa_symbol_kind_linkage_t;
+hsa_status_t hsa_executable_validate(hsa_executable_t executable,
+                                     uint32_t *result);
+uint64_t hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue,
+                                           uint64_t value);
+
+uint64_t hsa_queue_add_write_index_acquire(const hsa_queue_t *queue,
+                                           uint64_t value);
+
+uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue,
+                                           uint64_t value);
+
+uint64_t hsa_queue_add_write_index_release(const hsa_queue_t *queue,
+                                           uint64_t value);
+hsa_status_t hsa_shut_down();
+void hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value);
+hsa_status_t hsa_executable_readonly_variable_define(
+    hsa_executable_t executable, hsa_agent_t agent, const char *variable_name,
+    void *address);
+hsa_status_t hsa_agent_extension_supported(uint16_t extension,
+                                           hsa_agent_t agent,
+                                           uint16_t version_major,
+                                           uint16_t version_minor,
+                                           bool *result);
+hsa_signal_value_t hsa_signal_load_acquire(hsa_signal_t signal);
+
+hsa_signal_value_t hsa_signal_load_relaxed(hsa_signal_t signal);
+hsa_status_t hsa_executable_get_info(hsa_executable_t executable,
+                                     hsa_executable_info_t attribute,
+                                     void *value);
+hsa_status_t hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent,
+                                                         void *data),
+                                void *data);
+void hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value);
+hsa_status_t
+hsa_executable_symbol_get_info(hsa_executable_symbol_t executable_symbol,
+                               hsa_executable_symbol_info_t attribute,
+                               void *value);
+void hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value);
+hsa_status_t hsa_code_object_get_info(hsa_code_object_t code_object,
+                                      hsa_code_object_info_t attribute,
+                                      void *value);
+hsa_status_t hsa_code_object_deserialize(void *serialized_code_object,
+                                         size_t serialized_code_object_size,
+                                         const char *options,
+                                         hsa_code_object_t *code_object);
+hsa_status_t hsa_status_string(hsa_status_t status, const char **status_string);
+hsa_status_t hsa_code_object_get_symbol(hsa_code_object_t code_object,
+                                        const char *symbol_name,
+                                        hsa_code_symbol_t *symbol);
+void hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value);
+hsa_status_t hsa_signal_destroy(hsa_signal_t signal);
+hsa_status_t hsa_system_get_extension_table(uint16_t extension,
+                                            uint16_t version_major,
+                                            uint16_t version_minor,
+                                            void *table);
+hsa_status_t hsa_agent_iterate_regions(
+    hsa_agent_t agent,
+    hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
+hsa_status_t hsa_executable_agent_global_variable_define(
+    hsa_executable_t executable, hsa_agent_t agent, const char *variable_name,
+    void *address);
+hsa_status_t hsa_queue_create(hsa_agent_t agent, uint32_t size,
+                              hsa_queue_type_t type,
+                              void (*callback)(hsa_status_t status,
+                                               hsa_queue_t *source, void *data),
+                              void *data, uint32_t private_segment_size,
+                              uint32_t group_segment_size, hsa_queue_t **queue);
+hsa_status_t hsa_isa_compatible(hsa_isa_t code_object_isa, hsa_isa_t agent_isa,
+                                bool *result);
+hsa_status_t hsa_code_object_serialize(
+    hsa_code_object_t code_object,
+    hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data,
+                                   void **address),
+    hsa_callback_data_t callback_data, const char *options,
+    void **serialized_code_object, size_t *serialized_code_object_size);
+hsa_status_t hsa_region_get_info(hsa_region_t region,
+                                 hsa_region_info_t attribute, void *value);
+hsa_status_t hsa_executable_freeze(hsa_extension_t executable,
+                                   const char *options);
+hsa_status_t hsa_system_extension_supported(uint16_t extension,
+                                            uint16_t version_major,
+                                            uint16_t version_minor,
+                                            bool *result);
+hsa_signal_value_t hsa_signal_wait_acquire(hsa_signal_t signal,
+                                           hsa_signal_condition_t condition,
+                                           hsa_signal_value_t compare_value,
+                                           uint64_t timeout_hint,
+                                           hsa_wait_state_t wait_state_hint);
+
+hsa_signal_value_t hsa_signal_wait_relaxed(hsa_signal_t signal,
+                                           hsa_signal_condition_t condition,
+                                           hsa_signal_value_t compare_value,
+                                           uint64_t timeout_hint,
+                                           hsa_wait_state_t wait_state_hint);
+hsa_status_t hsa_memory_copy(void *dst, const void *src, size_t size);
+hsa_status_t hsa_memory_free(void *ptr);
+hsa_status_t hsa_queue_destroy(hsa_queue_t *queue);
+hsa_status_t hsa_isa_from_name(const char *name, hsa_isa_t *isa);
+hsa_status_t hsa_isa_get_info(hsa_isa_t isa, hsa_isa_info_t attribute,
+                              uint32_t index, void *value);
+hsa_status_t hsa_signal_create(hsa_signal_value_t initial_value,
+                               uint32_t num_consumers,
+                               const hsa_agent_t *consumers,
+                               hsa_signal_t *signal);
+hsa_status_t hsa_code_symbol_get_info(hsa_code_symbol_t code_symbol,
+                                      hsa_code_symbol_info_t attribute,
+                                      void *value);
+hsa_signal_value_t hsa_signal_cas_acq_rel(hsa_signal_t signal,
+                                          hsa_signal_value_t expected,
+                                          hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_cas_acquire(hsa_signal_t signal,
+                                          hsa_signal_value_t expected,
+                                          hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_cas_relaxed(hsa_signal_t signal,
+                                          hsa_signal_value_t expected,
+                                          hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_cas_release(hsa_signal_t signal,
+                                          hsa_signal_value_t expected,
+                                          hsa_signal_value_t value);
+hsa_status_t hsa_code_object_iterate_symbols(
+    hsa_code_object_t code_object,
+    hsa_status_t (*callback)(hsa_code_object_t code_object,
+                             hsa_code_symbol_t symbol, void *data),
+    void *data);
+void hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue,
+                                        uint64_t value);
+
+void hsa_queue_store_read_index_release(const hsa_queue_t *queue,
+                                        uint64_t value);
+hsa_status_t hsa_memory_assign_agent(void *ptr, hsa_agent_t agent,
+                                     hsa_access_permission_t access);
+hsa_status_t hsa_queue_inactivate(hsa_queue_t *queue);
+hsa_status_t hsa_executable_get_symbol(hsa_executable_t executable,
+                                       const char *module_name,
+                                       const char *symbol_name,
+                                       hsa_agent_t agent,
+                                       int32_t call_convention,
+                                       hsa_executable_symbol_t *symbol);
+uint64_t hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue,
+                                           uint64_t expected, uint64_t value);
+
+uint64_t hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue,
+                                           uint64_t expected, uint64_t value);
+
+uint64_t hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue,
+                                           uint64_t expected, uint64_t value);
+
+uint64_t hsa_queue_cas_write_index_release(const hsa_queue_t *queue,
+                                           uint64_t expected, uint64_t value);
+void hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value);
+uint64_t hsa_queue_load_read_index_acquire(const hsa_queue_t *queue);
+
+uint64_t hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue);
+hsa_status_t hsa_executable_load_code_object(hsa_executable_t executable,
+                                             hsa_agent_t agent,
+                                             hsa_code_object_t code_object,
+                                             const char *options);
+uint64_t hsa_queue_load_write_index_acquire(const hsa_queue_t *queue);
+
+uint64_t hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue);
+hsa_status_t hsa_agent_get_exception_policies(hsa_agent_t agent,
+                                              hsa_profile_t profile,
+                                              uint16_t *mask);
+hsa_status_t hsa_memory_deregister(void *ptr, size_t size);
+void hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+void hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value);
+hsa_status_t hsa_soft_queue_create(hsa_region_t region, uint32_t size,
+                                   hsa_queue_type_t type, uint32_t features,
+                                   hsa_signal_t doorbell_signal,
+                                   hsa_queue_t **queue);
+hsa_status_t hsa_executable_iterate_symbols(
+    hsa_executable_t executable,
+    hsa_status_t (*callback)(hsa_executable_t executable,
+                             hsa_executable_symbol_t symbol, void *data),
+    void *data);
+hsa_status_t hsa_memory_register(void *ptr, size_t size);
+void hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue,
+                                         uint64_t value);
+
+void hsa_queue_store_write_index_release(const hsa_queue_t *queue,
+                                         uint64_t value);
+hsa_status_t hsa_executable_global_variable_define(hsa_executable_t executable,
+                                                   const char *variable_name,
+                                                   void *address);
+hsa_status_t hsa_executable_destroy(hsa_executable_t executable);
+hsa_status_t hsa_code_object_destroy(hsa_code_object_t code_object);
+hsa_status_t hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr);
+hsa_signal_value_t hsa_signal_exchange_acq_rel(hsa_signal_t signal,
+                                               hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_exchange_acquire(hsa_signal_t signal,
+                                               hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_exchange_relaxed(hsa_signal_t signal,
+                                               hsa_signal_value_t value);
+
+hsa_signal_value_t hsa_signal_exchange_release(hsa_signal_t signal,
+                                               hsa_signal_value_t value);
+hsa_status_t hsa_agent_get_info(hsa_agent_t agent, hsa_agent_info_t attribute,
+                                void *value);
+hsa_status_t hsa_init();
+hsa_status_t hsa_system_get_info(hsa_system_info_t attribute, void *value);
+hsa_status_t hsa_executable_create(hsa_profile_t profile,
+                                   hsa_executable_state_t executable_state,
+                                   const char *options,
+                                   hsa_executable_t *executable);
+
+#endif /* _HSA_H */
diff --git a/libgomp/hsa_ext_finalize.h b/libgomp/hsa_ext_finalize.h
new file mode 100644
index 0000000..f159add
--- /dev/null
+++ b/libgomp/hsa_ext_finalize.h
@@ -0,0 +1,265 @@ 
+/* HSA Extensions API 1.0.1 representation description.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+The contents of the file was created by extracting data structures, enum,
+typedef and other definitions from HSA Runtime Programmer’s Reference Manual
+Version 1.0 (http://www.hsafoundation.com/standards/).
+
+HTML version is provided on the following link:
+http://www.hsafoundation.com/html/Content/Runtime/Topics/Runtime_title_page.htm
+*/
+
+
+#ifndef _HSA_EXT_FINALIZE_H
+#define _HSA_EXT_FINALIZE_H 1
+
+struct BrigModuleHeader;
+typedef struct BrigModuleHeader *BrigModule_t;
+
+typedef enum {
+  HSA_EXT_IMAGE_GEOMETRY_1D = 0,
+  HSA_EXT_IMAGE_GEOMETRY_2D = 1,
+  HSA_EXT_IMAGE_GEOMETRY_3D = 2,
+  HSA_EXT_IMAGE_GEOMETRY_1DA = 3,
+  HSA_EXT_IMAGE_GEOMETRY_2DA = 4,
+  HSA_EXT_IMAGE_GEOMETRY_1DB = 5,
+  HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,
+  HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
+} hsa_ext_image_geometry_t;
+
+typedef enum {
+  HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
+  HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
+} hsa_ext_image_channel_type_t;
+
+typedef enum {
+  HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
+  HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
+} hsa_ext_image_channel_order_t;
+
+typedef struct hsa_ext_image_format_s
+{
+  hsa_ext_image_channel_type_t channel_type;
+  hsa_ext_image_channel_order_t channel_order;
+} hsa_ext_image_format_t;
+
+typedef struct hsa_ext_sampler_s
+{
+  uint64_t handle;
+} hsa_ext_sampler_t;
+typedef struct hsa_ext_image_data_info_s
+{
+  size_t size;
+  size_t alignment;
+} hsa_ext_image_data_info_t;
+typedef enum {
+  HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,
+  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,
+  HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,
+  HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,
+  HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4
+} hsa_ext_sampler_addressing_mode_t;
+typedef struct hsa_ext_image_s
+{
+  uint64_t handle;
+} hsa_ext_image_t;
+typedef enum {
+  HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
+  HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
+  HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
+  HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
+  HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
+  HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
+} hsa_ext_image_capability_t;
+typedef struct hsa_ext_control_directives_s
+{
+  uint64_t control_directives_mask;
+  uint16_t break_exceptions_mask;
+  uint16_t detect_exceptions_mask;
+  uint32_t max_dynamic_group_size;
+  uint64_t max_flat_grid_size;
+  uint32_t max_flat_workgroup_size;
+  uint32_t reserved1;
+  uint64_t required_grid_size[3];
+  hsa_dim3_t required_workgroup_size;
+  uint8_t required_dim;
+  uint8_t reserved2[75];
+} hsa_ext_control_directives_t;
+typedef enum {
+  HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,
+  HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1
+} hsa_ext_sampler_filter_mode_t;
+
+typedef enum {
+  HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,
+  HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1
+} hsa_ext_sampler_coordinate_mode_t;
+typedef enum {
+  HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
+} hsa_ext_finalizer_call_convention_t;
+typedef struct hsa_ext_program_s
+{
+  uint64_t handle;
+} hsa_ext_program_t;
+typedef struct hsa_ext_image_descriptor_s
+{
+  hsa_ext_image_geometry_t geometry;
+  size_t width;
+  size_t height;
+  size_t depth;
+  size_t array_size;
+  hsa_ext_image_format_t format;
+} hsa_ext_image_descriptor_t;
+typedef enum {
+  HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
+  HSA_EXT_PROGRAM_INFO_PROFILE = 1,
+  HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
+} hsa_ext_program_info_t;
+typedef BrigModule_t hsa_ext_module_t;
+typedef struct hsa_ext_sampler_descriptor_s
+{
+  hsa_ext_sampler_coordinate_mode_t coordinate_mode;
+  hsa_ext_sampler_filter_mode_t filter_mode;
+  hsa_ext_sampler_addressing_mode_t address_mode;
+} hsa_ext_sampler_descriptor_t;
+
+typedef struct hsa_ext_image_region_s
+{
+  hsa_dim3_t offset;
+  hsa_dim3_t range;
+} hsa_ext_image_region_t;
+hsa_status_t hsa_ext_image_export (hsa_agent_t agent, hsa_ext_image_t src_image,
+				   void *dst_memory, size_t dst_row_pitch,
+				   size_t dst_slice_pitch,
+				   const hsa_ext_image_region_t *image_region);
+hsa_status_t hsa_ext_program_add_module (hsa_ext_program_t program,
+					 hsa_ext_module_t module);
+hsa_status_t hsa_ext_program_iterate_modules (
+  hsa_ext_program_t program,
+  hsa_status_t (*callback) (hsa_ext_program_t program, hsa_ext_module_t module,
+			    void *data),
+  void *data);
+hsa_status_t hsa_ext_program_create (
+  hsa_machine_model_t machine_model, hsa_profile_t profile,
+  hsa_default_float_rounding_mode_t default_float_rounding_mode,
+  const char *options, hsa_ext_program_t *program);
+hsa_status_t
+hsa_ext_image_data_get_info (hsa_agent_t agent,
+			     const hsa_ext_image_descriptor_t *image_descriptor,
+			     hsa_access_permission_t access_permission,
+			     hsa_ext_image_data_info_t *image_data_info);
+
+hsa_status_t hsa_ext_image_import (hsa_agent_t agent, const void *src_memory,
+				   size_t src_row_pitch, size_t src_slice_pitch,
+				   hsa_ext_image_t dst_image,
+				   const hsa_ext_image_region_t *image_region);
+hsa_status_t hsa_ext_program_get_info (hsa_ext_program_t program,
+				       hsa_ext_program_info_t attribute,
+				       void *value);
+enum
+{
+  HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
+  HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001
+};
+hsa_status_t hsa_ext_image_destroy (hsa_agent_t agent, hsa_ext_image_t image);
+hsa_status_t hsa_ext_image_get_capability (
+  hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
+  const hsa_ext_image_format_t *image_format, uint32_t *capability_mask);
+enum
+{
+  HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
+  HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
+  HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
+  HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
+  HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
+  HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
+  HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
+};
+hsa_status_t hsa_ext_sampler_destroy (hsa_agent_t agent,
+				      hsa_ext_sampler_t sampler);
+hsa_status_t hsa_ext_program_finalize (
+  hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
+  hsa_ext_control_directives_t control_directives, const char *options,
+  hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
+hsa_status_t hsa_ext_image_create (
+  hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor,
+  const void *image_data, hsa_access_permission_t access_permission,
+  hsa_ext_image_t *image);
+hsa_status_t hsa_ext_program_destroy (hsa_ext_program_t program);
+hsa_status_t hsa_ext_image_copy (hsa_agent_t agent, hsa_ext_image_t src_image,
+				 const hsa_dim3_t *src_offset,
+				 hsa_ext_image_t dst_image,
+				 const hsa_dim3_t *dst_offset,
+				 const hsa_dim3_t *range);
+hsa_status_t hsa_ext_image_clear (hsa_agent_t agent, hsa_ext_image_t image,
+				  const void *data,
+				  const hsa_ext_image_region_t *image_region);
+enum
+{
+  HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
+  HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
+  HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
+  HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
+  HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
+  HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
+  HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
+  HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
+  HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
+  HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
+  HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
+  HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B
+};
+hsa_status_t
+hsa_ext_sampler_create (hsa_agent_t agent,
+			const hsa_ext_sampler_descriptor_t *sampler_descriptor,
+			hsa_ext_sampler_t *sampler);
+
+#endif /* _HSA_EXT_FINALIZE_H */
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac
index 88b4156..e5e554b 100644
--- a/libgomp/plugin/configfrag.ac
+++ b/libgomp/plugin/configfrag.ac
@@ -118,19 +118,6 @@  if test "x$HSA_RUNTIME_LIB" != x; then
   HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
 fi
 
-HSA_KMT_LIB=
-AC_SUBST(HSA_KMT_LIB)
-HSA_KMT_LDFLAGS=
-AC_ARG_WITH(hsa-kmt-lib,
-	[AS_HELP_STRING([--with-hsa-kmt-lib=PATH],
-		[specify directory for installed HSA KMT library.])])
-if test "x$with_hsa_kmt_lib" != x; then
-  HSA_KMT_LIB=$with_hsa_kmt_lib
-fi
-if test "x$HSA_KMT_LIB" != x; then
-  HSA_KMT_LDFLAGS=-L$HSA_KMT_LIB
-fi
-
 PLUGIN_HSA=0
 PLUGIN_HSA_CPPFLAGS=
 PLUGIN_HSA_LDFLAGS=
@@ -140,8 +127,6 @@  AC_SUBST(PLUGIN_HSA_CPPFLAGS)
 AC_SUBST(PLUGIN_HSA_LDFLAGS)
 AC_SUBST(PLUGIN_HSA_LIBS)
 
-
-
 # Get offload targets and path to install tree of offloading compiler.
 offload_additional_options=
 offload_additional_lib_paths=
@@ -195,8 +180,8 @@  if test x"$enable_offload_targets" != x; then
 	        tgt_name=hsa
 	        PLUGIN_HSA=$tgt
 	        PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
-	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS $HSA_KMT_LDFLAGS"
-	        PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+	        PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
+	        PLUGIN_HSA_LIBS="-ldl"
 
 	        PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
 	        CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
@@ -205,11 +190,7 @@  if test x"$enable_offload_targets" != x; then
 	        PLUGIN_HSA_save_LIBS=$LIBS
 	        LIBS="$PLUGIN_HSA_LIBS $LIBS"
 
-	        AC_LINK_IFELSE(
-	          [AC_LANG_PROGRAM(
-	            [#include "hsa.h"],
-	              [hsa_status_t status = hsa_init ()])],
-	          [PLUGIN_HSA=1])
+	        PLUGIN_HSA=1
 	        CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
 	        LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
 	        LIBS=$PLUGIN_HSA_save_LIBS
@@ -260,3 +241,6 @@  AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
 AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1])
 AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA],
   [Define to 1 if the HSA plugin is built, 0 if not.])
+
+AC_DEFINE_UNQUOTED([HSA_RUNTIME_LIB], ["$HSA_RUNTIME_LIB/"],
+  [Define path to HSA runtime.])
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index 0b3b67a..0fd0b10 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -27,16 +27,103 @@ 
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
+#include <inttypes.h>
+#include <stdbool.h>
 #include <hsa.h>
 #include <hsa_ext_finalize.h>
 #include <dlfcn.h>
 #include "libgomp-plugin.h"
 #include "gomp-constants.h"
 
+/* As an HSA runtime is dlopened, following structure defines function
+   pointers utilized by the HSA plug-in.  */
+
+struct hsa_runtime_fn_info
+{
+  /* HSA runtime.  */
+  hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
+					const char **status_string);
+  hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
+					 hsa_agent_info_t attribute,
+					 void *value);
+  hsa_status_t (*hsa_init_fn) (void);
+  hsa_status_t (*hsa_iterate_agents_fn)
+    (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
+  hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
+					  hsa_region_info_t attribute,
+					  void *value);
+  hsa_status_t (*hsa_queue_create_fn)
+    (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
+     void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
+     void *data, uint32_t private_segment_size,
+     uint32_t group_segment_size, hsa_queue_t **queue);
+  hsa_status_t (*hsa_agent_iterate_regions_fn)
+    (hsa_agent_t agent,
+     hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
+  hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
+  hsa_status_t (*hsa_executable_create_fn)
+    (hsa_profile_t profile, hsa_executable_state_t executable_state,
+     const char *options, hsa_executable_t *executable);
+  hsa_status_t (*hsa_executable_global_variable_define_fn)
+    (hsa_executable_t executable, const char *variable_name, void *address);
+  hsa_status_t (*hsa_executable_load_code_object_fn)
+    (hsa_executable_t executable, hsa_agent_t agent,
+     hsa_code_object_t code_object, const char *options);
+  hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
+					   const char *options);
+  hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
+					uint32_t num_consumers,
+					const hsa_agent_t *consumers,
+					hsa_signal_t *signal);
+  hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
+					  void **ptr);
+  hsa_status_t (*hsa_memory_free_fn) (void *ptr);
+  hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
+  hsa_status_t (*hsa_executable_get_symbol_fn)
+    (hsa_executable_t executable, const char *module_name,
+     const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
+     hsa_executable_symbol_t *symbol);
+  hsa_status_t (*hsa_executable_symbol_get_info_fn)
+    (hsa_executable_symbol_t executable_symbol,
+     hsa_executable_symbol_info_t attribute, void *value);
+  uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
+						    uint64_t value);
+  uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
+  void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
+				       hsa_signal_value_t value);
+  void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
+				       hsa_signal_value_t value);
+  hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
+    (hsa_signal_t signal, hsa_signal_condition_t condition,
+     hsa_signal_value_t compare_value, uint64_t timeout_hint,
+     hsa_wait_state_t wait_state_hint);
+  hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
+  hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
+
+  /* HSA finalizer.  */
+  hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
+						 hsa_ext_module_t module);
+  hsa_status_t (*hsa_ext_program_create_fn)
+    (hsa_machine_model_t machine_model, hsa_profile_t profile,
+     hsa_default_float_rounding_mode_t default_float_rounding_mode,
+     const char *options, hsa_ext_program_t *program);
+  hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
+  hsa_status_t (*hsa_ext_program_finalize_fn)
+    (hsa_ext_program_t program,hsa_isa_t isa,
+     int32_t call_convention, hsa_ext_control_directives_t control_directives,
+     const char *options, hsa_code_object_type_t code_object_type,
+     hsa_code_object_t *code_object);
+};
+
+/* HSA runtime functions that are initialized in init_hsa_context.  */
+
+static struct hsa_runtime_fn_info hsa_fns;
+
 /* Keep the following GOMP prefixed structures in sync with respective parts of
    the compiler.  */
 
@@ -129,6 +216,16 @@  static bool debug;
 
 static bool suppress_host_fallback;
 
+/* Flag to locate HSA runtime shared library that is dlopened
+   by this plug-in.  */
+
+static const char *hsa_runtime_lib;
+
+/* Flag to decide if the runtime should support also CPU devices (can be
+   a simulator).  */
+
+static bool support_cpu_devices;
+
 /* Initialize debug and suppress_host_fallback according to the environment.  */
 
 static void
@@ -143,6 +240,12 @@  init_enviroment_variables (void)
     suppress_host_fallback = true;
   else
     suppress_host_fallback = false;
+
+  hsa_runtime_lib = getenv ("HSA_RUNTIME_LIB");
+  if (hsa_runtime_lib == NULL)
+    hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
+
+  support_cpu_devices = getenv ("HSA_SUPPORT_CPU_DEVICES");
 }
 
 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
@@ -176,7 +279,7 @@  hsa_warn (const char *str, hsa_status_t status)
     return;
 
   const char *hsa_error;
-  hsa_status_string (status, &hsa_error);
+  hsa_fns.hsa_status_string_fn (status, &hsa_error);
 
   fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
 }
@@ -188,7 +291,7 @@  static void
 hsa_fatal (const char *str, hsa_status_t status)
 {
   const char *hsa_error;
-  hsa_status_string (status, &hsa_error);
+  hsa_fns.hsa_status_string_fn (status, &hsa_error);
   GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
 		     hsa_error);
 }
@@ -346,6 +449,50 @@  struct hsa_context_info
 
 static struct hsa_context_info hsa_context;
 
+#define DLSYM_FN(function) \
+  hsa_fns.function##_fn = dlsym (handle, #function); \
+  if (hsa_fns.function##_fn == NULL) \
+    return false;
+
+static bool
+init_hsa_runtime_functions (void)
+{
+  void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
+  if (handle == NULL)
+    return false;
+
+  DLSYM_FN (hsa_status_string)
+  DLSYM_FN (hsa_agent_get_info)
+  DLSYM_FN (hsa_init)
+  DLSYM_FN (hsa_iterate_agents)
+  DLSYM_FN (hsa_region_get_info)
+  DLSYM_FN (hsa_queue_create)
+  DLSYM_FN (hsa_agent_iterate_regions)
+  DLSYM_FN (hsa_executable_destroy)
+  DLSYM_FN (hsa_executable_create)
+  DLSYM_FN (hsa_executable_global_variable_define)
+  DLSYM_FN (hsa_executable_load_code_object)
+  DLSYM_FN (hsa_executable_freeze)
+  DLSYM_FN (hsa_signal_create)
+  DLSYM_FN (hsa_memory_allocate)
+  DLSYM_FN (hsa_memory_free)
+  DLSYM_FN (hsa_signal_destroy)
+  DLSYM_FN (hsa_executable_get_symbol)
+  DLSYM_FN (hsa_executable_symbol_get_info)
+  DLSYM_FN (hsa_queue_add_write_index_release)
+  DLSYM_FN (hsa_queue_load_read_index_acquire)
+  DLSYM_FN (hsa_signal_wait_acquire)
+  DLSYM_FN (hsa_signal_store_relaxed)
+  DLSYM_FN (hsa_signal_store_release)
+  DLSYM_FN (hsa_signal_load_acquire)
+  DLSYM_FN (hsa_queue_destroy)
+  DLSYM_FN (hsa_ext_program_add_module)
+  DLSYM_FN (hsa_ext_program_create)
+  DLSYM_FN (hsa_ext_program_destroy)
+  DLSYM_FN (hsa_ext_program_finalize)
+  return true;
+}
+
 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
 
 static struct kernel_info *
@@ -373,17 +520,32 @@  suitable_hsa_agent_p (hsa_agent_t agent)
 {
   hsa_device_type_t device_type;
   hsa_status_t status
-    = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type);
-  if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
+    = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
+				     &device_type);
+  if (status != HSA_STATUS_SUCCESS)
     return false;
 
+  switch (device_type)
+    {
+    case HSA_DEVICE_TYPE_GPU:
+      break;
+    case HSA_DEVICE_TYPE_CPU:
+      if (!support_cpu_devices)
+	return false;
+      break;
+    default:
+      return false;
+    }
+
   uint32_t features = 0;
-  status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
+  status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
+					  &features);
   if (status != HSA_STATUS_SUCCESS
       || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
     return false;
   hsa_queue_type_t queue_type;
-  status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
+  status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
+					  &queue_type);
   if (status != HSA_STATUS_SUCCESS
       || (queue_type != HSA_QUEUE_TYPE_MULTI))
     return false;
@@ -429,11 +591,16 @@  init_hsa_context (void)
   if (hsa_context.initialized)
     return;
   init_enviroment_variables ();
-  status = hsa_init ();
+  if (!init_hsa_runtime_functions ())
+    {
+      HSA_DEBUG ("Run-time could not be dynamically opened\n");
+      return;
+    }
+  status = hsa_fns.hsa_init_fn ();
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Run-time could not be initialized", status);
   HSA_DEBUG ("HSA run-time initialized\n");
-  status = hsa_iterate_agents (count_gpu_agents, NULL);
+  status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("HSA GPU devices could not be enumerated", status);
   HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
@@ -441,7 +608,7 @@  init_hsa_context (void)
   hsa_context.agents
     = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
 				  * sizeof (struct agent_info));
-  status = hsa_iterate_agents (assign_agent_ids, &agent_index);
+  status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
   if (agent_index != hsa_context.agent_count)
     GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
   hsa_context.initialized = true;
@@ -467,14 +634,16 @@  get_kernarg_memory_region (hsa_region_t region, void *data)
   hsa_status_t status;
   hsa_region_segment_t segment;
 
-  status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+  status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
+					   &segment);
   if (status != HSA_STATUS_SUCCESS)
     return status;
   if (segment != HSA_REGION_SEGMENT_GLOBAL)
     return HSA_STATUS_SUCCESS;
 
   uint32_t flags;
-  status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+  status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
+					   &flags);
   if (status != HSA_STATUS_SUCCESS)
     return status;
   if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
@@ -517,28 +686,35 @@  GOMP_OFFLOAD_init_device (int n)
 
   uint32_t queue_size;
   hsa_status_t status;
-  status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
-			       &queue_size);
+  status = hsa_fns.hsa_agent_get_info_fn (agent->id,
+					  HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+					  &queue_size);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
-  status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
+  status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
+					  &agent->isa);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error querying the ISA of the agent", status);
-  status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
-			     queue_callback, NULL, UINT32_MAX, UINT32_MAX,
-			     &agent->command_q);
+  status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
+					HSA_QUEUE_TYPE_MULTI,
+					queue_callback, NULL, UINT32_MAX,
+					UINT32_MAX,
+					&agent->command_q);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error creating command queue", status);
 
-  status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
-			     queue_callback, NULL, UINT32_MAX, UINT32_MAX,
-			     &agent->kernel_dispatch_command_q);
+  status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
+					HSA_QUEUE_TYPE_MULTI,
+					queue_callback, NULL, UINT32_MAX,
+					UINT32_MAX,
+					&agent->kernel_dispatch_command_q);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error creating kernel dispatch command queue", status);
 
   agent->kernarg_region.handle = (uint64_t) -1;
-  status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
-				      &agent->kernarg_region);
+  status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
+						 get_kernarg_memory_region,
+						 &agent->kernarg_region);
   if (agent->kernarg_region.handle == (uint64_t) -1)
     GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
 		       "arguments");
@@ -602,7 +778,7 @@  destroy_hsa_program (struct agent_info *agent)
 
   HSA_DEBUG ("Destroying the current HSA program.\n");
 
-  status = hsa_executable_destroy (agent->executable);
+  status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not destroy HSA executable", status);
 
@@ -738,9 +914,10 @@  create_and_finalize_hsa_program (struct agent_info *agent)
   if (agent->prog_finalized)
     goto final;
 
-  status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
-				   HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
-				   NULL, &prog_handle);
+  status = hsa_fns.hsa_ext_program_create_fn
+    (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+     HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+     NULL, &prog_handle);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not create an HSA program", status);
 
@@ -749,8 +926,8 @@  create_and_finalize_hsa_program (struct agent_info *agent)
   struct module_info *module = agent->first_module;
   while (module)
     {
-      status = hsa_ext_program_add_module (prog_handle,
-					   module->image_desc->brig_module);
+      status = hsa_fns.hsa_ext_program_add_module_fn
+	(prog_handle, module->image_desc->brig_module);
       if (status != HSA_STATUS_SUCCESS)
 	hsa_fatal ("Could not add a module to the HSA program", status);
       module = module->next;
@@ -776,7 +953,8 @@  create_and_finalize_hsa_program (struct agent_info *agent)
 	  continue;
 	}
 
-      status = hsa_ext_program_add_module (prog_handle, library->image);
+      status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
+						      library->image);
       if (status != HSA_STATUS_SUCCESS)
 	hsa_warn ("Could not add a shared BRIG library the HSA program",
 		  status);
@@ -788,11 +966,9 @@  create_and_finalize_hsa_program (struct agent_info *agent)
   hsa_ext_control_directives_t control_directives;
   memset (&control_directives, 0, sizeof (control_directives));
   hsa_code_object_t code_object;
-  status = hsa_ext_program_finalize (prog_handle, agent->isa,
-				     HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
-				     control_directives, "",
-				     HSA_CODE_OBJECT_TYPE_PROGRAM,
-				     &code_object);
+  status = hsa_fns.hsa_ext_program_finalize_fn
+    (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+     control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
   if (status != HSA_STATUS_SUCCESS)
     {
       hsa_warn ("Finalization of the HSA program failed", status);
@@ -800,11 +976,12 @@  create_and_finalize_hsa_program (struct agent_info *agent)
     }
 
   HSA_DEBUG ("Finalization done\n");
-  hsa_ext_program_destroy (prog_handle);
+  hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
 
   status
-    = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
-			     "", &agent->executable);
+    = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
+					HSA_EXECUTABLE_STATE_UNFROZEN,
+					"", &agent->executable);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not create HSA executable", status);
 
@@ -816,9 +993,8 @@  create_and_finalize_hsa_program (struct agent_info *agent)
 	{
 	  struct global_var_info *var;
 	  var = &module->image_desc->global_variables[i];
-	  status
-	    = hsa_executable_global_variable_define (agent->executable,
-						     var->name, var->address);
+	  status = hsa_fns.hsa_executable_global_variable_define_fn
+	    (agent->executable, var->name, var->address);
 
 	  HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
 		     var->address);
@@ -831,11 +1007,12 @@  create_and_finalize_hsa_program (struct agent_info *agent)
       module = module->next;
     }
 
-  status = hsa_executable_load_code_object (agent->executable, agent->id,
-					    code_object, "");
+  status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
+						       agent->id,
+						       code_object, "");
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not add a code object to the HSA executable", status);
-  status = hsa_executable_freeze (agent->executable, "");
+  status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not freeze the HSA executable", status);
 
@@ -876,7 +1053,7 @@  create_single_kernel_dispatch (struct kernel_info *kernel,
   shadow->object = kernel->object;
 
   hsa_signal_t sync_signal;
-  hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal);
+  hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error creating the HSA sync signal", status);
 
@@ -885,8 +1062,9 @@  create_single_kernel_dispatch (struct kernel_info *kernel,
   shadow->group_segment_size = kernel->group_segment_size;
 
   status
-    = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size,
-			   &shadow->kernarg_address);
+    = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
+				      kernel->kernarg_segment_size,
+				      &shadow->kernarg_address);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
 
@@ -901,11 +1079,11 @@  release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
   HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
 	     shadow->debug, (void *) shadow->debug);
 
-  hsa_memory_free (shadow->kernarg_address);
+  hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
 
   hsa_signal_t s;
   s.handle = shadow->signal;
-  hsa_signal_destroy (s);
+  hsa_fns.hsa_signal_destroy_fn (s);
 
   free (shadow->omp_data_memory);
 
@@ -925,31 +1103,30 @@  init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
   hsa_status_t status;
   struct agent_info *agent = kernel->agent;
   hsa_executable_symbol_t kernel_symbol;
-  status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
-				      agent->id, 0, &kernel_symbol);
+  status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+						 kernel->name, agent->id,
+						 0, &kernel_symbol);
   if (status != HSA_STATUS_SUCCESS)
     {
       hsa_warn ("Could not find symbol for kernel in the code object", status);
       goto failure;
     }
   HSA_DEBUG ("Located kernel %s\n", kernel->name);
-  status
-    = hsa_executable_symbol_get_info (kernel_symbol,
-				      HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
-				      &kernel->object);
+  status = hsa_fns.hsa_executable_symbol_get_info_fn
+    (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not extract a kernel object from its symbol", status);
-  status = hsa_executable_symbol_get_info
+  status = hsa_fns.hsa_executable_symbol_get_info_fn
     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
      &kernel->kernarg_segment_size);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not get info about kernel argument size", status);
-  status = hsa_executable_symbol_get_info
+  status = hsa_fns.hsa_executable_symbol_get_info_fn
     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
      &kernel->group_segment_size);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Could not get info about kernel group segment size", status);
-  status = hsa_executable_symbol_get_info
+  status = hsa_fns.hsa_executable_symbol_get_info_fn
     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
      &kernel->private_segment_size);
   if (status != HSA_STATUS_SUCCESS)
@@ -1227,11 +1404,12 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
       print_kernel_dispatch (shadow, 2);
     }
 
-  uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
+  uint64_t index
+    = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
   HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
 
   /* Wait until the queue is not full before writing the packet.   */
-  while (index - hsa_queue_load_read_index_acquire (agent->command_q)
+  while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
 	 >= agent->command_q->size)
     ;
 
@@ -1259,7 +1437,7 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
   hsa_signal_t s;
   s.handle = shadow->signal;
   packet->completion_signal = s;
-  hsa_signal_store_relaxed (s, 1);
+  hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
 
   /* PR hsa/70337.  */
@@ -1285,7 +1463,8 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
   packet_store_release ((uint32_t *) packet, header,
 			1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
 
-  hsa_signal_store_release (agent->command_q->doorbell_signal, index);
+  hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
+				       index);
 
   /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
      signal wait and signal load operations on their own and we need to
@@ -1296,8 +1475,9 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
 
   /* Root signal waits with 1ms timeout.  */
-  while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000,
-				  HSA_WAIT_STATE_BLOCKED) != 0)
+  while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
+					     1000 * 1000,
+					     HSA_WAIT_STATE_BLOCKED) != 0)
     for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
       {
 	hsa_signal_t child_s;
@@ -1305,7 +1485,7 @@  GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
 
 	HSA_DEBUG ("Waiting for children completion signal: %lu\n",
 		   shadow->children_dispatches[i]->signal);
-	hsa_signal_load_acquire (child_s);
+	hsa_fns.hsa_signal_load_acquire_fn (child_s);
       }
 
   release_kernel_dispatch (shadow);
@@ -1446,10 +1626,10 @@  GOMP_OFFLOAD_fini_device (int n)
 
   release_agent_shared_libraries (agent);
 
-  hsa_status_t status = hsa_queue_destroy (agent->command_q);
+  hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error destroying command queue", status);
-  status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
+  status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error destroying kernel dispatch command queue", status);
   if (pthread_mutex_destroy (&agent->prog_mutex))
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 1cb4991..50ec8a7 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -205,13 +205,9 @@  proc libgomp_init { args } {
 	    append always_ld_library_path ":$cuda_driver_lib"
 	}
 	global hsa_runtime_lib
-	global hsa_kmt_lib
 	if { $hsa_runtime_lib != "" } {
 	    append always_ld_library_path ":$hsa_runtime_lib"
 	}
-	if { $hsa_kmt_lib != "" } {
-	    append always_ld_library_path ":$hsa_kmt_lib"
-	}
     }
 
     # We use atomic operations in the testcases to validate results.
diff --git a/libgomp/testsuite/libgomp-test-support.exp.in b/libgomp/testsuite/libgomp-test-support.exp.in
index 5a724fb..a5250a8 100644
--- a/libgomp/testsuite/libgomp-test-support.exp.in
+++ b/libgomp/testsuite/libgomp-test-support.exp.in
@@ -1,6 +1,5 @@ 
 set cuda_driver_include "@CUDA_DRIVER_INCLUDE@"
 set cuda_driver_lib "@CUDA_DRIVER_LIB@"
 set hsa_runtime_lib "@HSA_RUNTIME_LIB@"
-set hsa_kmt_lib "@HSA_KMT_LIB@"
 
 set offload_targets "@offload_targets@"
-- 
2.8.1