diff mbox

[hsa] Upgrade to the final version of HSA 1.0

Message ID 20150430121313.GW2305@virgil.suse
State New
Headers show

Commit Message

Martin Jambor April 30, 2015, 12:13 p.m. UTC
Hi,

HSA Foundation has recently published the final version of HSA 1.0
(as opposed to the provisional version to which we adhered before this
patch and which was in some ways quite different).  This patch makes
the transition to this version also in the HSA development branch of
HSA.

Apart from the changes required by the changes in the standard, this
patch also removes the use of OKRA layer to dispatch HSA kernels and
uses directly the official HSA run-time library to do that.  Changes
apparent for a curious user are described in the updated README.hsa
file (all in all, things are simpler now, the changes were mostly
deletions).

The next step on my agenda is along overdue merge from trunk and then
gradual integration with the libgomp plugin mechanism and transition
to requiring OPM 4.0.  This patch, which I committed to the branch
yesterday, should make it easier both directly and because there
should be no more disruptions from HSA foundation for quite some time.

Thanks,

Martin


2015-04-29  Martin Jambor  <mjambor@suse.cz>

gcc/
	* hsa-brig-format.h: Updated to 1.0F.
	* hsa-brig.c: Added includes of tree-iterator.h, tree-ssa-alias.h,
	gimple-expr.h, gimple.h, stringpool.h, ipa-ref.h, lto-streamer.h and
	cgraph.h.
	(BRIG_ELF_SECTION_NAME): New define.
	(BRIG_LABEL_STRING): Likewise.
	(hsa_brig_section): New field header_byte_delta.
	(hsa_brig_section::init): Calculate initial size based on what the new
	disassembler expects.
	(hsa_brig_section::output): Remove assembly section switching, add new
	BRIG section stuff.
	(get_ptr_by_offset): Use the new delta for calculations.
	(brig_init): Remove initializations of 1.0p structures and add
	initializations of 1.0F structures.
	(emit_directive_variable): Adjust to 1.0F.
	(emit_function_directives): Likewise.
	(enqueue_op): Likewise.
	(emit_immediate_operand): Likewise.
	(emit_register_operand): Likewise.
	(emit_memory_insn): Likewise.
	(emit_segment_insn): Likewise.
	(emit_cmp_insn): Likewise.
	(emit_cvt_insn): Likewise.
	(emit_arg_block): Likewise.
	(emit_basic_insn): Do not turn mov types into bit-types.  Adjust to
	1.0F.
	(hsa_ctor_statements): New variable.
	(hsa_output_kernel_mapping): New function.
	(HSA_SECTION_ALIGNMENT): New define.
	(hsa_output_brig): Emit HSAIL BRIG 1.0F.  Call
	hsa_output_kernel_mapping.
	* hsa-dump.c (hsa_opcode_name): Adjust to 1.0F
	(hsa_memscope_name): Likewise.
	* hsa-gen.c (hsa_decl_kernel_map_element): New type.
	(hsa_decl_kernel_mapping): New variable.
	(hsa_deinit_data_for_cfun): Do not free names of kernels.
	(hsa_alloc_immed_op): Adjust to HSA 1.0F.
	(hsa_alloc_reg_op): Likewise.
	(hsa_add_kern_decl_mapping): New function.
	(hsa_get_number_decl_kernel_mappings): Likewise.
	(hsa_get_decl_kernel_mapping_decl): Likewise.
	(hsa_get_decl_kernel_mapping_name): Likewise.
	(hsa_free_decl_kernel_mapping): Likewise.
	(generate_hsa): Fix detecting kernels.  Also call
	hsa_add_kern_decl_mapping.
	(hsa_kernel_desc_type, hsa_range_dimnum_decl): Removed.
	(hsa_range_grid_decl, hsa_range_group_decl): Likewise.
	(hsa_launch_range_type): Likewise.
	(hsa_lattrs_dimnum_decl, hsa_lattrs_grid_decl): New.
	(hsa_lattrs_group_decl, hsa_lattrs_nargs_decl): Likewise.
	(hsa_launch_attributes_type): Likewise.
	(init_hsa_functions): Build the types and decls necessary for HSA 1.0F.
	(wrap_hsa_kernel_call): New function.
	(wrap_all_hsa_calls): Simplify.
	* hsa-regalloc.c (hsa_num_def_ops): Adjust to HSA 1.0F.
	* hsa.h (hsa_op_base): Adjust to HSA 1.0F.
	(is_a_helper <hsa_op_immed *>::test): Likewise.
	(is_a_helper <hsa_op_reg *>::test): Likewise.
	(hsa_get_number_decl_kernel_mappings): Declare.
	(hsa_get_decl_kernel_mapping_decl): Likewise.
	(hsa_get_decl_kernel_mapping_name): Likewise.
	(hsa_free_decl_kernel_mapping): Likewise.

libgomp/
	* Makefile.am (libgomp_la_SOURCES): USe hsa.c instead of hsaokra.c.
	* Makefile.in: Regenerated.
	* hsa.c: New file.
	* hsa.h: Likewise.
	* hsa_ext_finalize.h: Likewise.
	* hsaokra.c: Removed.
	* okra.h: Likewise.
	* libgomp.map (HSA_1.0): Added __hsa_register_image.
diff mbox

Patch

diff --git a/gcc/hsa-brig-format.h b/gcc/hsa-brig-format.h
index 179ea4c..8508896 100644
--- a/gcc/hsa-brig-format.h
+++ b/gcc/hsa-brig-format.h
@@ -23,962 +23,1236 @@ 
 #include "config.h"
 #include "system.h"
 
-typedef uint32_t BrigDataOffset32_t;
+typedef uint32_t BrigVersion32_t;
+
+enum BrigVersion {
+
+    BRIG_VERSION_HSAIL_MAJOR = 1,
+    BRIG_VERSION_HSAIL_MINOR = 0,
+    BRIG_VERSION_BRIG_MAJOR  = 1,
+    BRIG_VERSION_BRIG_MINOR  = 0
+};
+
+typedef uint8_t BrigAlignment8_t;
+
+typedef uint8_t BrigAllocation8_t;
+
+typedef uint8_t BrigAluModifier8_t;
+
+typedef uint8_t BrigAtomicOperation8_t;
+
 typedef uint32_t BrigCodeOffset32_t;
-typedef uint32_t BrigOperandOffset32_t;
 
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+typedef uint8_t BrigCompareOperation8_t;
+
+typedef uint16_t BrigControlDirective16_t;
+
+typedef uint32_t BrigDataOffset32_t;
+
 typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
+
 typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
 
-typedef uint8_t BrigAlignment8_t;
+typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+
+typedef uint8_t BrigExecutableModifier8_t;
+
+typedef uint8_t BrigImageChannelOrder8_t;
+
+typedef uint8_t BrigImageChannelType8_t;
+
+typedef uint8_t BrigImageGeometry8_t;
+
+typedef uint8_t BrigImageQuery8_t;
+
+typedef uint16_t BrigKind16_t;
+
+typedef uint8_t BrigLinkage8_t;
+
+typedef uint8_t BrigMachineModel8_t;
+
+typedef uint8_t BrigMemoryModifier8_t;
+
+typedef uint8_t BrigMemoryOrder8_t;
+
+typedef uint8_t BrigMemoryScope8_t;
+
+typedef uint16_t BrigOpcode16_t;
+
+typedef uint32_t BrigOperandOffset32_t;
+
+typedef uint8_t BrigPack8_t;
+
+typedef uint8_t BrigProfile8_t;
+
+typedef uint16_t BrigRegisterKind16_t;
+
+typedef uint8_t BrigRound8_t;
+
+typedef uint8_t BrigSamplerAddressing8_t;
+
+typedef uint8_t BrigSamplerCoordNormalization8_t;
+
+typedef uint8_t BrigSamplerFilter8_t;
+
+typedef uint8_t BrigSamplerQuery8_t;
+
+typedef uint32_t BrigSectionIndex32_t;
+
+typedef uint8_t BrigSegCvtModifier8_t;
+
+typedef uint8_t BrigSegment8_t;
+
+typedef uint32_t BrigStringOffset32_t;
+
+typedef uint16_t BrigType16_t;
+
+typedef uint8_t BrigVariableModifier8_t;
+
+typedef uint8_t BrigWidth8_t;
+
+typedef uint32_t BrigExceptions32_t;
+
+enum BrigKind {
+
+    BRIG_KIND_NONE = 0x0000,
+
+    BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
+    BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
+    BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
+    BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
+    BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
+    BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
+    BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
+    BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
+    BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
+    BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
+    BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
+    BRIG_KIND_DIRECTIVE_LOC = 0x100a,
+    BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
+    BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
+    BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
+    BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
+    BRIG_KIND_DIRECTIVE_END = 0x100f,
+
+    BRIG_KIND_INST_BEGIN = 0x2000,
+    BRIG_KIND_INST_ADDR = 0x2000,
+    BRIG_KIND_INST_ATOMIC = 0x2001,
+    BRIG_KIND_INST_BASIC = 0x2002,
+    BRIG_KIND_INST_BR = 0x2003,
+    BRIG_KIND_INST_CMP = 0x2004,
+    BRIG_KIND_INST_CVT = 0x2005,
+    BRIG_KIND_INST_IMAGE = 0x2006,
+    BRIG_KIND_INST_LANE = 0x2007,
+    BRIG_KIND_INST_MEM = 0x2008,
+    BRIG_KIND_INST_MEM_FENCE = 0x2009,
+    BRIG_KIND_INST_MOD = 0x200a,
+    BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
+    BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
+    BRIG_KIND_INST_QUEUE = 0x200d,
+    BRIG_KIND_INST_SEG = 0x200e,
+    BRIG_KIND_INST_SEG_CVT = 0x200f,
+    BRIG_KIND_INST_SIGNAL = 0x2010,
+    BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
+    BRIG_KIND_INST_END = 0x2012,
+
+    BRIG_KIND_OPERAND_BEGIN = 0x3000,
+    BRIG_KIND_OPERAND_ADDRESS = 0x3000,
+    BRIG_KIND_OPERAND_ALIGN = 0x3001,
+    BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
+    BRIG_KIND_OPERAND_CODE_REF = 0x3003,
+    BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
+    BRIG_KIND_OPERAND_RESERVED = 0x3005,
+    BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
+    BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
+    BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
+    BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
+    BRIG_KIND_OPERAND_REGISTER = 0x300a,
+    BRIG_KIND_OPERAND_STRING = 0x300b,
+    BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
+    BRIG_KIND_OPERAND_END = 0x300d
+};
+
 enum BrigAlignment {
-  BRIG_ALIGNMENT_NONE = 0,
-  BRIG_ALIGNMENT_1 = 1,
-  BRIG_ALIGNMENT_2 = 2,
-  BRIG_ALIGNMENT_4 = 3,
-  BRIG_ALIGNMENT_8 = 4,
-  BRIG_ALIGNMENT_16 = 5,
-  BRIG_ALIGNMENT_32 = 6,
-  BRIG_ALIGNMENT_64 = 7,
-  BRIG_ALIGNMENT_128 = 8,
-  BRIG_ALIGNMENT_256 = 9
+
+    BRIG_ALIGNMENT_NONE = 0,
+    BRIG_ALIGNMENT_1 = 1,
+    BRIG_ALIGNMENT_2 = 2,
+    BRIG_ALIGNMENT_4 = 3,
+    BRIG_ALIGNMENT_8 = 4,
+    BRIG_ALIGNMENT_16 = 5,
+    BRIG_ALIGNMENT_32 = 6,
+    BRIG_ALIGNMENT_64 = 7,
+    BRIG_ALIGNMENT_128 = 8,
+    BRIG_ALIGNMENT_256 = 9,
+
+    BRIG_ALIGNMENT_LAST,
+    BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1
 };
 
-typedef uint8_t BrigAllocation8_t;
 enum BrigAllocation {
-  BRIG_ALLOCATION_NONE = 0,
-  BRIG_ALLOCATION_PROGRAM = 1,
-  BRIG_ALLOCATION_AGENT = 2,
-  BRIG_ALLOCATION_AUTOMATIC = 3
+
+    BRIG_ALLOCATION_NONE = 0,
+    BRIG_ALLOCATION_PROGRAM = 1,
+    BRIG_ALLOCATION_AGENT = 2,
+    BRIG_ALLOCATION_AUTOMATIC = 3
 };
 
-typedef uint16_t BrigAluModifier16_t;
 enum BrigAluModifierMask {
-  BRIG_ALU_ROUND = 15,
-  BRIG_ALU_FTZ = 16
+    BRIG_ALU_FTZ = 1
 };
 
-typedef uint8_t BrigAtomicOperation8_t;
 enum BrigAtomicOperation {
-  BRIG_ATOMIC_ADD = 0,
-  BRIG_ATOMIC_AND = 1,
-  BRIG_ATOMIC_CAS = 2,
-  BRIG_ATOMIC_EXCH = 3,
-  BRIG_ATOMIC_LD = 4,
-  BRIG_ATOMIC_MAX = 5,
-  BRIG_ATOMIC_MIN = 6,
-  BRIG_ATOMIC_OR = 7,
-  BRIG_ATOMIC_ST = 8,
-  BRIG_ATOMIC_SUB = 9,
-  BRIG_ATOMIC_WRAPDEC = 10,
-  BRIG_ATOMIC_WRAPINC = 11,
-  BRIG_ATOMIC_XOR = 12,
-  BRIG_ATOMIC_WAIT_EQ = 13,
-  BRIG_ATOMIC_WAIT_NE = 14,
-  BRIG_ATOMIC_WAIT_LT = 15,
-  BRIG_ATOMIC_WAIT_GTE = 16,
-  BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
-  BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
-  BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
-  BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-typedef uint16_t BrigKinds16_t;
-enum BrigKind {
-  BRIG_KIND_NONE = 0x0000,
-  BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
-  BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
-  BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
-  BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
-  BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
-  BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
-  BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
-  BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
-  BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
-  BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
-  BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
-  BRIG_KIND_DIRECTIVE_LOC = 0x100a,
-  BRIG_KIND_DIRECTIVE_PRAGMA = 0x100b,
-  BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100c,
-  BRIG_KIND_DIRECTIVE_VARIABLE = 0x100d,
-  BRIG_KIND_DIRECTIVE_VERSION = 0x100e,
-  BRIG_KIND_DIRECTIVE_END = 0x100f,
-  BRIG_KIND_INST_BEGIN = 0x2000,
-  BRIG_KIND_INST_ADDR = 0x2000,
-  BRIG_KIND_INST_ATOMIC = 0x2001,
-  BRIG_KIND_INST_BASIC = 0x2002,
-  BRIG_KIND_INST_BR = 0x2003,
-  BRIG_KIND_INST_CMP = 0x2004,
-  BRIG_KIND_INST_CVT = 0x2005,
-  BRIG_KIND_INST_IMAGE = 0x2006,
-  BRIG_KIND_INST_LANE = 0x2007,
-  BRIG_KIND_INST_MEM = 0x2008,
-  BRIG_KIND_INST_MEM_FENCE = 0x2009,
-  BRIG_KIND_INST_MOD = 0x200a,
-  BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
-  BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
-  BRIG_KIND_INST_QUEUE = 0x200d,
-  BRIG_KIND_INST_SEG = 0x200e,
-  BRIG_KIND_INST_SEG_CVT = 0x200f,
-  BRIG_KIND_INST_SIGNAL = 0x2010,
-  BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
-  BRIG_KIND_INST_END = 0x2012,
-  BRIG_KIND_OPERAND_BEGIN = 0x3000,
-  BRIG_KIND_OPERAND_ADDRESS = 0x3000,
-  BRIG_KIND_OPERAND_DATA = 0x3001,
-  BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
-  BRIG_KIND_OPERAND_CODE_REF = 0x3003,
-  BRIG_KIND_OPERAND_IMAGE_PROPERTIES = 0x3004,
-  BRIG_KIND_OPERAND_OPERAND_LIST = 0x3005,
-  BRIG_KIND_OPERAND_REG = 0x3006,
-  BRIG_KIND_OPERAND_SAMPLER_PROPERTIES = 0x3007,
-  BRIG_KIND_OPERAND_STRING = 0x3008,
-  BRIG_KIND_OPERAND_WAVESIZE = 0x3009,
-  BRIG_KIND_OPERAND_END = 0x300a
+
+    BRIG_ATOMIC_ADD = 0,
+    BRIG_ATOMIC_AND = 1,
+    BRIG_ATOMIC_CAS = 2,
+    BRIG_ATOMIC_EXCH = 3,
+    BRIG_ATOMIC_LD = 4,
+    BRIG_ATOMIC_MAX = 5,
+    BRIG_ATOMIC_MIN = 6,
+    BRIG_ATOMIC_OR = 7,
+    BRIG_ATOMIC_ST = 8,
+    BRIG_ATOMIC_SUB = 9,
+    BRIG_ATOMIC_WRAPDEC = 10,
+    BRIG_ATOMIC_WRAPINC = 11,
+    BRIG_ATOMIC_XOR = 12,
+    BRIG_ATOMIC_WAIT_EQ = 13,
+    BRIG_ATOMIC_WAIT_NE = 14,
+    BRIG_ATOMIC_WAIT_LT = 15,
+    BRIG_ATOMIC_WAIT_GTE = 16,
+    BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
+    BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
+    BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
+    BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
 };
 
-typedef uint8_t BrigCompareOperation8_t;
 enum BrigCompareOperation {
-  BRIG_COMPARE_EQ = 0,
-  BRIG_COMPARE_NE = 1,
-  BRIG_COMPARE_LT = 2,
-  BRIG_COMPARE_LE = 3,
-  BRIG_COMPARE_GT = 4,
-  BRIG_COMPARE_GE = 5,
-  BRIG_COMPARE_EQU = 6,
-  BRIG_COMPARE_NEU = 7,
-  BRIG_COMPARE_LTU = 8,
-  BRIG_COMPARE_LEU = 9,
-  BRIG_COMPARE_GTU = 10,
-  BRIG_COMPARE_GEU = 11,
-  BRIG_COMPARE_NUM = 12,
-  BRIG_COMPARE_NAN = 13,
-  BRIG_COMPARE_SEQ = 14,
-  BRIG_COMPARE_SNE = 15,
-  BRIG_COMPARE_SLT = 16,
-  BRIG_COMPARE_SLE = 17,
-  BRIG_COMPARE_SGT = 18,
-  BRIG_COMPARE_SGE = 19,
-  BRIG_COMPARE_SGEU = 20,
-  BRIG_COMPARE_SEQU = 21,
-  BRIG_COMPARE_SNEU = 22,
-  BRIG_COMPARE_SLTU = 23,
-  BRIG_COMPARE_SLEU = 24,
-  BRIG_COMPARE_SNUM = 25,
-  BRIG_COMPARE_SNAN = 26,
-  BRIG_COMPARE_SGTU = 27
+
+    BRIG_COMPARE_EQ = 0,
+    BRIG_COMPARE_NE = 1,
+    BRIG_COMPARE_LT = 2,
+    BRIG_COMPARE_LE = 3,
+    BRIG_COMPARE_GT = 4,
+    BRIG_COMPARE_GE = 5,
+    BRIG_COMPARE_EQU = 6,
+    BRIG_COMPARE_NEU = 7,
+    BRIG_COMPARE_LTU = 8,
+    BRIG_COMPARE_LEU = 9,
+    BRIG_COMPARE_GTU = 10,
+    BRIG_COMPARE_GEU = 11,
+    BRIG_COMPARE_NUM = 12,
+    BRIG_COMPARE_NAN = 13,
+    BRIG_COMPARE_SEQ = 14,
+    BRIG_COMPARE_SNE = 15,
+    BRIG_COMPARE_SLT = 16,
+    BRIG_COMPARE_SLE = 17,
+    BRIG_COMPARE_SGT = 18,
+    BRIG_COMPARE_SGE = 19,
+    BRIG_COMPARE_SGEU = 20,
+    BRIG_COMPARE_SEQU = 21,
+    BRIG_COMPARE_SNEU = 22,
+    BRIG_COMPARE_SLTU = 23,
+    BRIG_COMPARE_SLEU = 24,
+    BRIG_COMPARE_SNUM = 25,
+    BRIG_COMPARE_SNAN = 26,
+    BRIG_COMPARE_SGTU = 27
 };
 
-typedef uint16_t BrigControlDirective16_t;
 enum BrigControlDirective {
-  BRIG_CONTROL_NONE = 0,
-  BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
-  BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
-  BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
-  BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
-  BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
-  BRIG_CONTROL_REQUESTEDWORKGROUPSPERCU = 6,
-  BRIG_CONTROL_REQUIREDDIM = 7,
-  BRIG_CONTROL_REQUIREDGRIDSIZE = 8,
-  BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 9,
-  BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 10
+
+    BRIG_CONTROL_NONE = 0,
+    BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
+    BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
+    BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
+    BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
+    BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
+    BRIG_CONTROL_REQUIREDDIM = 6,
+    BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
+    BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
+    BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
 };
 
-typedef uint8_t BrigExecutableModifier8_t;
-enum BrigExecuteableModifierMask {
-  BRIG_EXECUTABLE_DEFINITION = 1
+enum BrigExecutableModifierMask {
+
+    BRIG_EXECUTABLE_DEFINITION = 1
 };
 
-typedef uint8_t BrigImageChannelOrder8_t;
 enum BrigImageChannelOrder {
-  BRIG_CHANNEL_ORDER_A = 0,
-  BRIG_CHANNEL_ORDER_R = 1,
-  BRIG_CHANNEL_ORDER_RX = 2,
-  BRIG_CHANNEL_ORDER_RG = 3,
-  BRIG_CHANNEL_ORDER_RGX = 4,
-  BRIG_CHANNEL_ORDER_RA = 5,
-  BRIG_CHANNEL_ORDER_RGB = 6,
-  BRIG_CHANNEL_ORDER_RGBX = 7,
-  BRIG_CHANNEL_ORDER_RGBA = 8,
-  BRIG_CHANNEL_ORDER_BGRA = 9,
-  BRIG_CHANNEL_ORDER_ARGB = 10,
-  BRIG_CHANNEL_ORDER_ABGR = 11,
-  BRIG_CHANNEL_ORDER_SRGB = 12,
-  BRIG_CHANNEL_ORDER_SRGBX = 13,
-  BRIG_CHANNEL_ORDER_SRGBA = 14,
-  BRIG_CHANNEL_ORDER_SBGRA = 15,
-  BRIG_CHANNEL_ORDER_INTENSITY = 16,
-  BRIG_CHANNEL_ORDER_LUMINANCE = 17,
-  BRIG_CHANNEL_ORDER_DEPTH = 18,
-  BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19
+
+    BRIG_CHANNEL_ORDER_A = 0,
+    BRIG_CHANNEL_ORDER_R = 1,
+    BRIG_CHANNEL_ORDER_RX = 2,
+    BRIG_CHANNEL_ORDER_RG = 3,
+    BRIG_CHANNEL_ORDER_RGX = 4,
+    BRIG_CHANNEL_ORDER_RA = 5,
+    BRIG_CHANNEL_ORDER_RGB = 6,
+    BRIG_CHANNEL_ORDER_RGBX = 7,
+    BRIG_CHANNEL_ORDER_RGBA = 8,
+    BRIG_CHANNEL_ORDER_BGRA = 9,
+    BRIG_CHANNEL_ORDER_ARGB = 10,
+    BRIG_CHANNEL_ORDER_ABGR = 11,
+    BRIG_CHANNEL_ORDER_SRGB = 12,
+    BRIG_CHANNEL_ORDER_SRGBX = 13,
+    BRIG_CHANNEL_ORDER_SRGBA = 14,
+    BRIG_CHANNEL_ORDER_SBGRA = 15,
+    BRIG_CHANNEL_ORDER_INTENSITY = 16,
+    BRIG_CHANNEL_ORDER_LUMINANCE = 17,
+    BRIG_CHANNEL_ORDER_DEPTH = 18,
+    BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
+
+    BRIG_CHANNEL_ORDER_UNKNOWN,
+
+    BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
+
 };
 
-typedef uint8_t BrigImageChannelType8_t;
 enum BrigImageChannelType {
-  BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
-  BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
-  BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
-  BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
-  BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
-  BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
-  BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
-  BRIG_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
-  BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
-  BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
-  BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
-  BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
-  BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
-  BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
-  BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
-  BRIG_CHANNEL_TYPE_FLOAT = 15
+
+    BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
+    BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
+    BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
+    BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
+    BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
+    BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
+    BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
+    BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
+    BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
+    BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
+    BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
+    BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
+    BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
+    BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
+    BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
+    BRIG_CHANNEL_TYPE_FLOAT = 15,
+
+    BRIG_CHANNEL_TYPE_UNKNOWN,
+
+    BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
 };
 
-typedef uint8_t BrigImageGeometry8_t;
 enum BrigImageGeometry {
-  BRIG_GEOMETRY_1D = 0,
-  BRIG_GEOMETRY_2D = 1,
-  BRIG_GEOMETRY_3D = 2,
-  BRIG_GEOMETRY_1DA = 3,
-  BRIG_GEOMETRY_2DA = 4,
-  BRIG_GEOMETRY_1DB = 5,
-  BRIG_GEOMETRY_2DDEPTH = 6,
-  BRIG_GEOMETRY_2DADEPTH = 7
+
+    BRIG_GEOMETRY_1D = 0,
+    BRIG_GEOMETRY_2D = 1,
+    BRIG_GEOMETRY_3D = 2,
+    BRIG_GEOMETRY_1DA = 3,
+    BRIG_GEOMETRY_2DA = 4,
+    BRIG_GEOMETRY_1DB = 5,
+    BRIG_GEOMETRY_2DDEPTH = 6,
+    BRIG_GEOMETRY_2DADEPTH = 7,
+
+    BRIG_GEOMETRY_UNKNOWN,
+
+    BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
 };
 
-typedef uint8_t BrigImageQuery8_t;
 enum BrigImageQuery {
-  BRIG_IMAGE_QUERY_WIDTH = 0,
-  BRIG_IMAGE_QUERY_HEIGHT = 1,
-  BRIG_IMAGE_QUERY_DEPTH = 2,
-  BRIG_IMAGE_QUERY_ARRAY = 3,
-  BRIG_IMAGE_QUERY_CHANNELORDER = 4,
-  BRIG_IMAGE_QUERY_CHANNELTYPE = 5
+
+    BRIG_IMAGE_QUERY_WIDTH = 0,
+    BRIG_IMAGE_QUERY_HEIGHT = 1,
+    BRIG_IMAGE_QUERY_DEPTH = 2,
+    BRIG_IMAGE_QUERY_ARRAY = 3,
+    BRIG_IMAGE_QUERY_CHANNELORDER = 4,
+    BRIG_IMAGE_QUERY_CHANNELTYPE = 5
 };
 
-typedef uint8_t BrigLinkage8_t;
 enum BrigLinkage {
-  BRIG_LINKAGE_NONE = 0,
-  BRIG_LINKAGE_PROGRAM = 1,
-  BRIG_LINKAGE_MODULE = 2,
-  BRIG_LINKAGE_FUNCTION = 3,
-  BRIG_LINKAGE_ARG = 4
+
+    BRIG_LINKAGE_NONE = 0,
+    BRIG_LINKAGE_PROGRAM = 1,
+    BRIG_LINKAGE_MODULE = 2,
+    BRIG_LINKAGE_FUNCTION = 3,
+    BRIG_LINKAGE_ARG = 4
 };
 
-typedef uint8_t BrigMachineModel8_t;
 enum BrigMachineModel {
-  BRIG_MACHINE_SMALL = 0,
-  BRIG_MACHINE_LARGE = 1
+
+    BRIG_MACHINE_SMALL = 0,
+    BRIG_MACHINE_LARGE = 1,
+
+    BRIG_MACHINE_UNDEF = 2
 };
 
-typedef uint8_t BrigMemoryModifier8_t;
 enum BrigMemoryModifierMask {
-  BRIG_MEMORY_CONST = 1
+    BRIG_MEMORY_CONST = 1
 };
 
-typedef uint8_t BrigMemoryOrder8_t;
 enum BrigMemoryOrder {
-  BRIG_MEMORY_ORDER_NONE = 0,
-  BRIG_MEMORY_ORDER_RELAXED = 1,
-  BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
-  BRIG_MEMORY_ORDER_SC_RELEASE = 3,
-  BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4
+
+    BRIG_MEMORY_ORDER_NONE = 0,
+    BRIG_MEMORY_ORDER_RELAXED = 1,
+    BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
+    BRIG_MEMORY_ORDER_SC_RELEASE = 3,
+    BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,
+
+    BRIG_MEMORY_ORDER_LAST = 5
 };
 
-typedef uint8_t BrigMemoryScope8_t;
 enum BrigMemoryScope {
-  BRIG_MEMORY_SCOPE_NONE = 0,
-  BRIG_MEMORY_SCOPE_WORKITEM = 1,
-  BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
-  BRIG_MEMORY_SCOPE_WORKGROUP = 3,
-  BRIG_MEMORY_SCOPE_COMPONENT = 4,
-  BRIG_MEMORY_SCOPE_SYSTEM = 5
+
+    BRIG_MEMORY_SCOPE_NONE = 0,
+    BRIG_MEMORY_SCOPE_WORKITEM = 1,
+    BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
+    BRIG_MEMORY_SCOPE_WORKGROUP = 3,
+    BRIG_MEMORY_SCOPE_AGENT = 4,
+    BRIG_MEMORY_SCOPE_SYSTEM = 5,
+
+    BRIG_MEMORY_SCOPE_LAST = 6
 };
 
-typedef uint16_t BrigOpcode16_t;
 enum BrigOpcode {
-  BRIG_OPCODE_NOP = 0,
-  BRIG_OPCODE_ABS = 1,
-  BRIG_OPCODE_ADD = 2,
-  BRIG_OPCODE_BORROW = 3,
-  BRIG_OPCODE_CARRY = 4,
-  BRIG_OPCODE_CEIL = 5,
-  BRIG_OPCODE_COPYSIGN = 6,
-  BRIG_OPCODE_DIV = 7,
-  BRIG_OPCODE_FLOOR = 8,
-  BRIG_OPCODE_FMA = 9,
-  BRIG_OPCODE_FRACT = 10,
-  BRIG_OPCODE_MAD = 11,
-  BRIG_OPCODE_MAX = 12,
-  BRIG_OPCODE_MIN = 13,
-  BRIG_OPCODE_MUL = 14,
-  BRIG_OPCODE_MULHI = 15,
-  BRIG_OPCODE_NEG = 16,
-  BRIG_OPCODE_REM = 17,
-  BRIG_OPCODE_RINT = 18,
-  BRIG_OPCODE_SQRT = 19,
-  BRIG_OPCODE_SUB = 20,
-  BRIG_OPCODE_TRUNC = 21,
-  BRIG_OPCODE_MAD24 = 22,
-  BRIG_OPCODE_MAD24HI = 23,
-  BRIG_OPCODE_MUL24 = 24,
-  BRIG_OPCODE_MUL24HI = 25,
-  BRIG_OPCODE_SHL = 26,
-  BRIG_OPCODE_SHR = 27,
-  BRIG_OPCODE_AND = 28,
-  BRIG_OPCODE_NOT = 29,
-  BRIG_OPCODE_OR = 30,
-  BRIG_OPCODE_POPCOUNT = 31,
-  BRIG_OPCODE_XOR = 32,
-  BRIG_OPCODE_BITEXTRACT = 33,
-  BRIG_OPCODE_BITINSERT = 34,
-  BRIG_OPCODE_BITMASK = 35,
-  BRIG_OPCODE_BITREV = 36,
-  BRIG_OPCODE_BITSELECT = 37,
-  BRIG_OPCODE_FIRSTBIT = 38,
-  BRIG_OPCODE_LASTBIT = 39,
-  BRIG_OPCODE_COMBINE = 40,
-  BRIG_OPCODE_EXPAND = 41,
-  BRIG_OPCODE_LDA = 42,
-  BRIG_OPCODE_MOV = 43,
-  BRIG_OPCODE_SHUFFLE = 44,
-  BRIG_OPCODE_UNPACKHI = 45,
-  BRIG_OPCODE_UNPACKLO = 46,
-  BRIG_OPCODE_PACK = 47,
-  BRIG_OPCODE_UNPACK = 48,
-  BRIG_OPCODE_CMOV = 49,
-  BRIG_OPCODE_CLASS = 50,
-  BRIG_OPCODE_NCOS = 51,
-  BRIG_OPCODE_NEXP2 = 52,
-  BRIG_OPCODE_NFMA = 53,
-  BRIG_OPCODE_NLOG2 = 54,
-  BRIG_OPCODE_NRCP = 55,
-  BRIG_OPCODE_NRSQRT = 56,
-  BRIG_OPCODE_NSIN = 57,
-  BRIG_OPCODE_NSQRT = 58,
-  BRIG_OPCODE_BITALIGN = 59,
-  BRIG_OPCODE_BYTEALIGN = 60,
-  BRIG_OPCODE_PACKCVT = 61,
-  BRIG_OPCODE_UNPACKCVT = 62,
-  BRIG_OPCODE_LERP = 63,
-  BRIG_OPCODE_SAD = 64,
-  BRIG_OPCODE_SADHI = 65,
-  BRIG_OPCODE_SEGMENTP = 66,
-  BRIG_OPCODE_FTOS = 67,
-  BRIG_OPCODE_STOF = 68,
-  BRIG_OPCODE_CMP = 69,
-  BRIG_OPCODE_CVT = 70,
-  BRIG_OPCODE_LD = 71,
-  BRIG_OPCODE_ST = 72,
-  BRIG_OPCODE_ATOMIC = 73,
-  BRIG_OPCODE_ATOMICNORET = 74,
-  BRIG_OPCODE_SIGNAL = 75,
-  BRIG_OPCODE_SIGNALNORET = 76,
-  BRIG_OPCODE_MEMFENCE = 77,
-  BRIG_OPCODE_RDIMAGE = 78,
-  BRIG_OPCODE_LDIMAGE = 79,
-  BRIG_OPCODE_STIMAGE = 80,
-  BRIG_OPCODE_QUERYIMAGE = 81,
-  BRIG_OPCODE_QUERYSAMPLER = 82,
-  BRIG_OPCODE_CBR = 83,
-  BRIG_OPCODE_BR = 84,
-  BRIG_OPCODE_SBR = 85,
-  BRIG_OPCODE_BARRIER = 86,
-  BRIG_OPCODE_WAVEBARRIER = 87,
-  BRIG_OPCODE_ARRIVEFBAR = 88,
-  BRIG_OPCODE_INITFBAR = 89,
-  BRIG_OPCODE_JOINFBAR = 90,
-  BRIG_OPCODE_LEAVEFBAR = 91,
-  BRIG_OPCODE_RELEASEFBAR = 92,
-  BRIG_OPCODE_WAITFBAR = 93,
-  BRIG_OPCODE_LDF = 94,
-  BRIG_OPCODE_ACTIVELANECOUNT = 95,
-  BRIG_OPCODE_ACTIVELANEID = 96,
-  BRIG_OPCODE_ACTIVELANEMASK = 97,
-  BRIG_OPCODE_ACTIVELANESHUFFLE = 98,
-  BRIG_OPCODE_CALL = 99,
-  BRIG_OPCODE_SCALL = 100,
-  BRIG_OPCODE_ICALL = 101,
-  BRIG_OPCODE_LDI = 102,
-  BRIG_OPCODE_RET = 103,
-  BRIG_OPCODE_ALLOCA = 104,
-  BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
-  BRIG_OPCODE_DIM = 106,
-  BRIG_OPCODE_GRIDGROUPS = 107,
-  BRIG_OPCODE_GRIDSIZE = 108,
-  BRIG_OPCODE_PACKETCOMPLETIONSIG = 109,
-  BRIG_OPCODE_PACKETID = 110,
-  BRIG_OPCODE_WORKGROUPID = 111,
-  BRIG_OPCODE_WORKGROUPSIZE = 112,
-  BRIG_OPCODE_WORKITEMABSID = 113,
-  BRIG_OPCODE_WORKITEMFLATABSID = 114,
-  BRIG_OPCODE_WORKITEMFLATID = 115,
-  BRIG_OPCODE_WORKITEMID = 116,
-  BRIG_OPCODE_CLEARDETECTEXCEPT = 117,
-  BRIG_OPCODE_GETDETECTEXCEPT = 118,
-  BRIG_OPCODE_SETDETECTEXCEPT = 119,
-  BRIG_OPCODE_ADDQUEUEWRITEINDEX = 120,
-  BRIG_OPCODE_AGENTCOUNT = 121,
-  BRIG_OPCODE_AGENTID = 122,
-  BRIG_OPCODE_CASQUEUEWRITEINDEX = 123,
-  BRIG_OPCODE_LDK = 124,
-  BRIG_OPCODE_LDQUEUEREADINDEX = 125,
-  BRIG_OPCODE_LDQUEUEWRITEINDEX = 126,
-  BRIG_OPCODE_QUEUEID = 127,
-  BRIG_OPCODE_QUEUEPTR = 128,
-  BRIG_OPCODE_STQUEUEREADINDEX = 129,
-  BRIG_OPCODE_STQUEUEWRITEINDEX = 130,
-  BRIG_OPCODE_CLOCK = 131,
-  BRIG_OPCODE_CUID = 132,
-  BRIG_OPCODE_DEBUGTRAP = 133,
-  BRIG_OPCODE_GROUPBASEPTR = 134,
-  BRIG_OPCODE_KERNARGBASEPTR = 135,
-  BRIG_OPCODE_LANEID = 136,
-  BRIG_OPCODE_MAXCUID = 137,
-  BRIG_OPCODE_MAXWAVEID = 138,
-  BRIG_OPCODE_NULLPTR = 139,
-  BRIG_OPCODE_WAVEID = 140
+
+    BRIG_OPCODE_NOP = 0,
+    BRIG_OPCODE_ABS = 1,
+    BRIG_OPCODE_ADD = 2,
+    BRIG_OPCODE_BORROW = 3,
+    BRIG_OPCODE_CARRY = 4,
+    BRIG_OPCODE_CEIL = 5,
+    BRIG_OPCODE_COPYSIGN = 6,
+    BRIG_OPCODE_DIV = 7,
+    BRIG_OPCODE_FLOOR = 8,
+    BRIG_OPCODE_FMA = 9,
+    BRIG_OPCODE_FRACT = 10,
+    BRIG_OPCODE_MAD = 11,
+    BRIG_OPCODE_MAX = 12,
+    BRIG_OPCODE_MIN = 13,
+    BRIG_OPCODE_MUL = 14,
+    BRIG_OPCODE_MULHI = 15,
+    BRIG_OPCODE_NEG = 16,
+    BRIG_OPCODE_REM = 17,
+    BRIG_OPCODE_RINT = 18,
+    BRIG_OPCODE_SQRT = 19,
+    BRIG_OPCODE_SUB = 20,
+    BRIG_OPCODE_TRUNC = 21,
+    BRIG_OPCODE_MAD24 = 22,
+    BRIG_OPCODE_MAD24HI = 23,
+    BRIG_OPCODE_MUL24 = 24,
+    BRIG_OPCODE_MUL24HI = 25,
+    BRIG_OPCODE_SHL = 26,
+    BRIG_OPCODE_SHR = 27,
+    BRIG_OPCODE_AND = 28,
+    BRIG_OPCODE_NOT = 29,
+    BRIG_OPCODE_OR = 30,
+    BRIG_OPCODE_POPCOUNT = 31,
+    BRIG_OPCODE_XOR = 32,
+    BRIG_OPCODE_BITEXTRACT = 33,
+    BRIG_OPCODE_BITINSERT = 34,
+    BRIG_OPCODE_BITMASK = 35,
+    BRIG_OPCODE_BITREV = 36,
+    BRIG_OPCODE_BITSELECT = 37,
+    BRIG_OPCODE_FIRSTBIT = 38,
+    BRIG_OPCODE_LASTBIT = 39,
+    BRIG_OPCODE_COMBINE = 40,
+    BRIG_OPCODE_EXPAND = 41,
+    BRIG_OPCODE_LDA = 42,
+    BRIG_OPCODE_MOV = 43,
+    BRIG_OPCODE_SHUFFLE = 44,
+    BRIG_OPCODE_UNPACKHI = 45,
+    BRIG_OPCODE_UNPACKLO = 46,
+    BRIG_OPCODE_PACK = 47,
+    BRIG_OPCODE_UNPACK = 48,
+    BRIG_OPCODE_CMOV = 49,
+    BRIG_OPCODE_CLASS = 50,
+    BRIG_OPCODE_NCOS = 51,
+    BRIG_OPCODE_NEXP2 = 52,
+    BRIG_OPCODE_NFMA = 53,
+    BRIG_OPCODE_NLOG2 = 54,
+    BRIG_OPCODE_NRCP = 55,
+    BRIG_OPCODE_NRSQRT = 56,
+    BRIG_OPCODE_NSIN = 57,
+    BRIG_OPCODE_NSQRT = 58,
+    BRIG_OPCODE_BITALIGN = 59,
+    BRIG_OPCODE_BYTEALIGN = 60,
+    BRIG_OPCODE_PACKCVT = 61,
+    BRIG_OPCODE_UNPACKCVT = 62,
+    BRIG_OPCODE_LERP = 63,
+    BRIG_OPCODE_SAD = 64,
+    BRIG_OPCODE_SADHI = 65,
+    BRIG_OPCODE_SEGMENTP = 66,
+    BRIG_OPCODE_FTOS = 67,
+    BRIG_OPCODE_STOF = 68,
+    BRIG_OPCODE_CMP = 69,
+    BRIG_OPCODE_CVT = 70,
+    BRIG_OPCODE_LD = 71,
+    BRIG_OPCODE_ST = 72,
+    BRIG_OPCODE_ATOMIC = 73,
+    BRIG_OPCODE_ATOMICNORET = 74,
+    BRIG_OPCODE_SIGNAL = 75,
+    BRIG_OPCODE_SIGNALNORET = 76,
+    BRIG_OPCODE_MEMFENCE = 77,
+    BRIG_OPCODE_RDIMAGE = 78,
+    BRIG_OPCODE_LDIMAGE = 79,
+    BRIG_OPCODE_STIMAGE = 80,
+    BRIG_OPCODE_IMAGEFENCE = 81,
+    BRIG_OPCODE_QUERYIMAGE = 82,
+    BRIG_OPCODE_QUERYSAMPLER = 83,
+    BRIG_OPCODE_CBR = 84,
+    BRIG_OPCODE_BR = 85,
+    BRIG_OPCODE_SBR = 86,
+    BRIG_OPCODE_BARRIER = 87,
+    BRIG_OPCODE_WAVEBARRIER = 88,
+    BRIG_OPCODE_ARRIVEFBAR = 89,
+    BRIG_OPCODE_INITFBAR = 90,
+    BRIG_OPCODE_JOINFBAR = 91,
+    BRIG_OPCODE_LEAVEFBAR = 92,
+    BRIG_OPCODE_RELEASEFBAR = 93,
+    BRIG_OPCODE_WAITFBAR = 94,
+    BRIG_OPCODE_LDF = 95,
+    BRIG_OPCODE_ACTIVELANECOUNT = 96,
+    BRIG_OPCODE_ACTIVELANEID = 97,
+    BRIG_OPCODE_ACTIVELANEMASK = 98,
+    BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
+    BRIG_OPCODE_CALL = 100,
+    BRIG_OPCODE_SCALL = 101,
+    BRIG_OPCODE_ICALL = 102,
+    BRIG_OPCODE_RET = 103,
+    BRIG_OPCODE_ALLOCA = 104,
+    BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
+    BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
+    BRIG_OPCODE_DIM = 107,
+    BRIG_OPCODE_GRIDGROUPS = 108,
+    BRIG_OPCODE_GRIDSIZE = 109,
+    BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
+    BRIG_OPCODE_PACKETID = 111,
+    BRIG_OPCODE_WORKGROUPID = 112,
+    BRIG_OPCODE_WORKGROUPSIZE = 113,
+    BRIG_OPCODE_WORKITEMABSID = 114,
+    BRIG_OPCODE_WORKITEMFLATABSID = 115,
+    BRIG_OPCODE_WORKITEMFLATID = 116,
+    BRIG_OPCODE_WORKITEMID = 117,
+    BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
+    BRIG_OPCODE_GETDETECTEXCEPT = 119,
+    BRIG_OPCODE_SETDETECTEXCEPT = 120,
+    BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
+    BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
+    BRIG_OPCODE_LDQUEUEREADINDEX = 123,
+    BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
+    BRIG_OPCODE_STQUEUEREADINDEX = 125,
+    BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
+    BRIG_OPCODE_CLOCK = 127,
+    BRIG_OPCODE_CUID = 128,
+    BRIG_OPCODE_DEBUGTRAP = 129,
+    BRIG_OPCODE_GROUPBASEPTR = 130,
+    BRIG_OPCODE_KERNARGBASEPTR = 131,
+    BRIG_OPCODE_LANEID = 132,
+    BRIG_OPCODE_MAXCUID = 133,
+    BRIG_OPCODE_MAXWAVEID = 134,
+    BRIG_OPCODE_NULLPTR = 135,
+    BRIG_OPCODE_WAVEID = 136,
+    BRIG_OPCODE_FIRST_USER_DEFINED = 32768,
+
+    BRIG_OPCODE_GCNMADU = (1u << 15) | 0,
+    BRIG_OPCODE_GCNMADS = (1u << 15) | 1,
+    BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2,
+    BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3,
+    BRIG_OPCODE_GCNMED3 = (1u << 15) | 4,
+    BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5,
+    BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6,
+    BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7,
+    BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8,
+    BRIG_OPCODE_GCNBFM = (1u << 15) | 9,
+    BRIG_OPCODE_GCNLD = (1u << 15) | 10,
+    BRIG_OPCODE_GCNST = (1u << 15) | 11,
+    BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12,
+    BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13,
+    BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14,
+    BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15,
+    BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16,
+    BRIG_OPCODE_GCNMSAD = (1u << 15) | 17,
+    BRIG_OPCODE_GCNQSAD = (1u << 15) | 18,
+    BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19,
+    BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20,
+    BRIG_OPCODE_GCNSADW = (1u << 15) | 21,
+    BRIG_OPCODE_GCNSADD = (1u << 15) | 22,
+    BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23,
+    BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24,
+    BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25,
+    BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26,
+    BRIG_OPCODE_GCNMAX = (1u << 15) | 27,
+    BRIG_OPCODE_GCNMIN = (1u << 15) | 28,
+    BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29,
+    BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30
 };
 
-typedef uint8_t BrigPack8_t;
 enum BrigPack {
-  BRIG_PACK_NONE = 0,
-  BRIG_PACK_PP = 1,
-  BRIG_PACK_PS = 2,
-  BRIG_PACK_SP = 3,
-  BRIG_PACK_SS = 4,
-  BRIG_PACK_S = 5,
-  BRIG_PACK_P = 6,
-  BRIG_PACK_PPSAT = 7,
-  BRIG_PACK_PSSAT = 8,
-  BRIG_PACK_SPSAT = 9,
-  BRIG_PACK_SSSAT = 10,
-  BRIG_PACK_SSAT = 11,
-  BRIG_PACK_PSAT = 12
+
+    BRIG_PACK_NONE = 0,
+    BRIG_PACK_PP = 1,
+    BRIG_PACK_PS = 2,
+    BRIG_PACK_SP = 3,
+    BRIG_PACK_SS = 4,
+    BRIG_PACK_S = 5,
+    BRIG_PACK_P = 6,
+    BRIG_PACK_PPSAT = 7,
+    BRIG_PACK_PSSAT = 8,
+    BRIG_PACK_SPSAT = 9,
+    BRIG_PACK_SSSAT = 10,
+    BRIG_PACK_SSAT = 11,
+    BRIG_PACK_PSAT = 12
 };
 
-typedef uint8_t BrigProfile8_t;
 enum BrigProfile {
-  BRIG_PROFILE_BASE = 0,
-  BRIG_PROFILE_FULL = 1
+
+    BRIG_PROFILE_BASE = 0,
+    BRIG_PROFILE_FULL = 1,
+
+    BRIG_PROFILE_UNDEF = 2
 };
 
-typedef uint16_t BrigRegisterKind16_t;
 enum BrigRegisterKind {
-  BRIG_REGISTER_CONTROL = 0,
-  BRIG_REGISTER_SINGLE = 1,
-  BRIG_REGISTER_DOUBLE = 2,
-  BRIG_REGISTER_QUAD = 3
+
+    BRIG_REGISTER_KIND_CONTROL = 0,
+    BRIG_REGISTER_KIND_SINGLE = 1,
+    BRIG_REGISTER_KIND_DOUBLE = 2,
+    BRIG_REGISTER_KIND_QUAD = 3
 };
 
-typedef uint8_t BrigRound8_t;
 enum BrigRound {
-  BRIG_ROUND_NONE = 0,
-  BRIG_ROUND_FLOAT_NEAR_EVEN = 1,
-  BRIG_ROUND_FLOAT_ZERO = 2,
-  BRIG_ROUND_FLOAT_PLUS_INFINITY = 3,
-  BRIG_ROUND_FLOAT_MINUS_INFINITY = 4,
-  BRIG_ROUND_INTEGER_NEAR_EVEN = 5,
-  BRIG_ROUND_INTEGER_ZERO = 6,
-  BRIG_ROUND_INTEGER_PLUS_INFINITY = 7,
-  BRIG_ROUND_INTEGER_MINUS_INFINITY = 8,
-  BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 9,
-  BRIG_ROUND_INTEGER_ZERO_SAT = 10,
-  BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 11,
-  BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 12,
-  BRIG_ROUND_INTEGER_SIGNALLING_NEAR_EVEN = 13,
-  BRIG_ROUND_INTEGER_SIGNALLING_ZERO = 14,
-  BRIG_ROUND_INTEGER_SIGNALLING_PLUS_INFINITY = 15,
-  BRIG_ROUND_INTEGER_SIGNALLING_MINUS_INFINITY = 16,
-  BRIG_ROUND_INTEGER_SIGNALLING_NEAR_EVEN_SAT = 17,
-  BRIG_ROUND_INTEGER_SIGNALLING_ZERO_SAT = 18,
-  BRIG_ROUND_INTEGER_SIGNALLING_PLUS_INFINITY_SAT = 19,
-  BRIG_ROUND_INTEGER_SIGNALLING_MINUS_INFINITY_SAT = 20
+
+    BRIG_ROUND_NONE = 0,
+    BRIG_ROUND_FLOAT_DEFAULT = 1,
+    BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
+    BRIG_ROUND_FLOAT_ZERO = 3,
+    BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
+    BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
+    BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
+    BRIG_ROUND_INTEGER_ZERO = 7,
+    BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
+    BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
+    BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
+    BRIG_ROUND_INTEGER_ZERO_SAT = 11,
+    BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
+    BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
+    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
+    BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
+    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
+    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
+    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
+    BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
+    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
+    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
 };
 
-typedef uint8_t BrigSamplerAddressing8_t;
 enum BrigSamplerAddressing {
-  BRIG_ADDRESSING_UNDEFINED = 0,
-  BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
-  BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
-  BRIG_ADDRESSING_REPEAT = 3,
-  BRIG_ADDRESSING_MIRRORED_REPEAT = 4
+
+    BRIG_ADDRESSING_UNDEFINED = 0,
+    BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
+    BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
+    BRIG_ADDRESSING_REPEAT = 3,
+    BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
+
+    BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
 };
 
-typedef uint8_t BrigSamplerCoordNormalization8_t;
 enum BrigSamplerCoordNormalization {
-  BRIG_COORD_UNNORMALIZED = 0,
-  BRIG_COORD_NORMALIZED = 1
+
+    BRIG_COORD_UNNORMALIZED = 0,
+    BRIG_COORD_NORMALIZED = 1
 };
 
-typedef uint8_t BrigSamplerFilter8_t;
 enum BrigSamplerFilter {
-  BRIG_FILTER_NEAREST = 0,
-  BRIG_FILTER_LINEAR = 1
+
+    BRIG_FILTER_NEAREST = 0,
+    BRIG_FILTER_LINEAR = 1,
+
+    BRIG_FILTER_FIRST_USER_DEFINED = 128
 };
 
-typedef uint8_t BrigSamplerQuery8_t;
 enum BrigSamplerQuery {
-  BRIG_SAMPLER_QUERY_ADDRESSING = 0,
-  BRIG_SAMPLER_QUERY_COORD = 1,
-  BRIG_SAMPLER_QUERY_FILTER = 2
+
+    BRIG_SAMPLER_QUERY_ADDRESSING = 0,
+    BRIG_SAMPLER_QUERY_COORD = 1,
+    BRIG_SAMPLER_QUERY_FILTER = 2
 };
 
-typedef uint32_t BrigSectionIndex32_t;
 enum BrigSectionIndex {
-  BRIG_SECTION_INDEX_DATA = 0,
-  BRIG_SECTION_INDEX_CODE = 1,
-  BRIG_SECTION_INDEX_OPERAND = 2,
-  BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3
-};
 
-struct BrigSectionHeader {
-  uint32_t byteCount;
-  uint32_t headerByteCount;
-  uint32_t nameLength;
-  uint8_t name[1];
+    BRIG_SECTION_INDEX_DATA = 0,
+    BRIG_SECTION_INDEX_CODE = 1,
+    BRIG_SECTION_INDEX_OPERAND = 2,
+    BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
+
+    BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED
 };
 
-typedef uint8_t BrigSegCvtModifier8_t;
 enum BrigSegCvtModifierMask {
-  BRIG_SEG_CVT_NONULL = 1
+    BRIG_SEG_CVT_NONULL = 1
 };
 
-typedef uint8_t BrigSegment8_t;
 enum BrigSegment {
-  BRIG_SEGMENT_NONE = 0,
-  BRIG_SEGMENT_FLAT = 1,
-  BRIG_SEGMENT_GLOBAL = 2,
-  BRIG_SEGMENT_READONLY = 3,
-  BRIG_SEGMENT_KERNARG = 4,
-  BRIG_SEGMENT_GROUP = 5,
-  BRIG_SEGMENT_PRIVATE = 6,
-  BRIG_SEGMENT_SPILL = 7,
-  BRIG_SEGMENT_ARG = 8
-};
-
-enum {
-  BRIG_TYPE_PACK_SHIFT = 5,
-  BRIG_TYPE_BASE_MASK = (1 << BRIG_TYPE_PACK_SHIFT) - 1,
-  BRIG_TYPE_PACK_MASK = 3 << BRIG_TYPE_PACK_SHIFT,
-  BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
-  BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
-  BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
-  BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT
+
+    BRIG_SEGMENT_NONE = 0,
+    BRIG_SEGMENT_FLAT = 1,
+    BRIG_SEGMENT_GLOBAL = 2,
+    BRIG_SEGMENT_READONLY = 3,
+    BRIG_SEGMENT_KERNARG = 4,
+    BRIG_SEGMENT_GROUP = 5,
+    BRIG_SEGMENT_PRIVATE = 6,
+    BRIG_SEGMENT_SPILL = 7,
+    BRIG_SEGMENT_ARG = 8,
+
+    BRIG_SEGMENT_FIRST_USER_DEFINED = 128,
+
+    BRIG_SEGMENT_AMD_GCN = 9
 };
 
-typedef uint16_t BrigType16_t;
-enum BrigType {
-  BRIG_TYPE_NONE = 0,
-  BRIG_TYPE_U8 = 1,
-  BRIG_TYPE_U16 = 2,
-  BRIG_TYPE_U32 = 3,
-  BRIG_TYPE_U64 = 4,
-  BRIG_TYPE_S8 = 5,
-  BRIG_TYPE_S16 = 6,
-  BRIG_TYPE_S32 = 7,
-  BRIG_TYPE_S64 = 8,
-  BRIG_TYPE_F16 = 9,
-  BRIG_TYPE_F32 = 10,
-  BRIG_TYPE_F64 = 11,
-  BRIG_TYPE_B1 = 12,
-  BRIG_TYPE_B8 = 13,
-  BRIG_TYPE_B16 = 14,
-  BRIG_TYPE_B32 = 15,
-  BRIG_TYPE_B64 = 16,
-  BRIG_TYPE_B128 = 17,
-  BRIG_TYPE_SAMP = 18,
-  BRIG_TYPE_ROIMG = 19,
-  BRIG_TYPE_WOIMG = 20,
-  BRIG_TYPE_RWIMG = 21,
-  BRIG_TYPE_SIG32 = 22,
-  BRIG_TYPE_SIG64 = 23,
-  BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
-  BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
-  BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
-  BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
-  BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
-  BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
-  BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
-  BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128
+enum BrigPackedTypeBits {
+
+    BRIG_TYPE_BASE_SIZE  = 5,
+    BRIG_TYPE_PACK_SIZE  = 2,
+    BRIG_TYPE_ARRAY_SIZE = 1,
+
+    BRIG_TYPE_BASE_SHIFT  = 0,
+    BRIG_TYPE_PACK_SHIFT  = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
+    BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
+
+    BRIG_TYPE_BASE_MASK  = ((1 << BRIG_TYPE_BASE_SIZE)  - 1) << BRIG_TYPE_BASE_SHIFT,
+    BRIG_TYPE_PACK_MASK  = ((1 << BRIG_TYPE_PACK_SIZE)  - 1) << BRIG_TYPE_PACK_SHIFT,
+    BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
+
+    BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
+    BRIG_TYPE_PACK_32   = 1 << BRIG_TYPE_PACK_SHIFT,
+    BRIG_TYPE_PACK_64   = 2 << BRIG_TYPE_PACK_SHIFT,
+    BRIG_TYPE_PACK_128  = 3 << BRIG_TYPE_PACK_SHIFT,
+
+    BRIG_TYPE_ARRAY     = 1 << BRIG_TYPE_ARRAY_SHIFT
 };
 
-struct BrigUInt64 {
-  uint32_t lo;
-  uint32_t hi;
+enum BrigType {
+
+    BRIG_TYPE_NONE  = 0,
+    BRIG_TYPE_U8    = 1,
+    BRIG_TYPE_U16   = 2,
+    BRIG_TYPE_U32   = 3,
+    BRIG_TYPE_U64   = 4,
+    BRIG_TYPE_S8    = 5,
+    BRIG_TYPE_S16   = 6,
+    BRIG_TYPE_S32   = 7,
+    BRIG_TYPE_S64   = 8,
+    BRIG_TYPE_F16   = 9,
+    BRIG_TYPE_F32   = 10,
+    BRIG_TYPE_F64   = 11,
+    BRIG_TYPE_B1    = 12,
+    BRIG_TYPE_B8    = 13,
+    BRIG_TYPE_B16   = 14,
+    BRIG_TYPE_B32   = 15,
+    BRIG_TYPE_B64   = 16,
+    BRIG_TYPE_B128  = 17,
+    BRIG_TYPE_SAMP  = 18,
+    BRIG_TYPE_ROIMG = 19,
+    BRIG_TYPE_WOIMG = 20,
+    BRIG_TYPE_RWIMG = 21,
+    BRIG_TYPE_SIG32 = 22,
+    BRIG_TYPE_SIG64 = 23,
+
+    BRIG_TYPE_U8X4  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_32,
+    BRIG_TYPE_U8X8  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_U8X16 = BRIG_TYPE_U8  | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
+    BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_S8X4  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_32,
+    BRIG_TYPE_S8X8  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_S8X16 = BRIG_TYPE_S8  | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
+    BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
+    BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
+    BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
+    BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
+
+    BRIG_TYPE_U8_ARRAY    = BRIG_TYPE_U8    | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U16_ARRAY   = BRIG_TYPE_U16   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U32_ARRAY   = BRIG_TYPE_U32   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U64_ARRAY   = BRIG_TYPE_U64   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S8_ARRAY    = BRIG_TYPE_S8    | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S16_ARRAY   = BRIG_TYPE_S16   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S32_ARRAY   = BRIG_TYPE_S32   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S64_ARRAY   = BRIG_TYPE_S64   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F16_ARRAY   = BRIG_TYPE_F16   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F32_ARRAY   = BRIG_TYPE_F32   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F64_ARRAY   = BRIG_TYPE_F64   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_B8_ARRAY    = BRIG_TYPE_B8    | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_B16_ARRAY   = BRIG_TYPE_B16   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_B32_ARRAY   = BRIG_TYPE_B32   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_B64_ARRAY   = BRIG_TYPE_B64   | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_B128_ARRAY  = BRIG_TYPE_B128  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_SAMP_ARRAY  = BRIG_TYPE_SAMP  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U8X4_ARRAY  = BRIG_TYPE_U8X4  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U8X8_ARRAY  = BRIG_TYPE_U8X8  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S8X4_ARRAY  = BRIG_TYPE_S8X4  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S8X8_ARRAY  = BRIG_TYPE_S8X8  | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
+    BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,
+
+    BRIG_TYPE_INVALID = (unsigned) -1
 };
 
-typedef uint8_t BrigVariableModifier8_t;
 enum BrigVariableModifierMask {
-  BRIG_SYMBOL_DECLARATION = 0,
-  BRIG_SYMBOL_DEFINITION = 1,
-  BRIG_SYMBOL_CONST = 2,
-  BRIG_SYMBOL_ARRAY = 4,
-  BRIG_SYMBOL_FLEX_ARRAY = 8
-};
 
-typedef uint32_t BrigVersion32_t;
-enum BrigVersion {
-  BRIG_VERSION_HSAIL_MAJOR = 0,
-  BRIG_VERSION_HSAIL_MINOR = 99,
-  BRIG_VERSION_BRIG_MAJOR = 0,
-  BRIG_VERSION_BRIG_MINOR = 99
+    BRIG_VARIABLE_DEFINITION = 1,
+    BRIG_VARIABLE_CONST = 2
 };
 
-typedef uint8_t BrigWidth8_t;
 enum BrigWidth {
-  BRIG_WIDTH_NONE = 0,
-  BRIG_WIDTH_1 = 1,
-  BRIG_WIDTH_2 = 2,
-  BRIG_WIDTH_4 = 3,
-  BRIG_WIDTH_8 = 4,
-  BRIG_WIDTH_16 = 5,
-  BRIG_WIDTH_32 = 6,
-  BRIG_WIDTH_64 = 7,
-  BRIG_WIDTH_128 = 8,
-  BRIG_WIDTH_256 = 9,
-  BRIG_WIDTH_512 = 10,
-  BRIG_WIDTH_1024 = 11,
-  BRIG_WIDTH_2048 = 12,
-  BRIG_WIDTH_4096 = 13,
-  BRIG_WIDTH_8192 = 14,
-  BRIG_WIDTH_16384 = 15,
-  BRIG_WIDTH_32768 = 16,
-  BRIG_WIDTH_65536 = 17,
-  BRIG_WIDTH_131072 = 18,
-  BRIG_WIDTH_262144 = 19,
-  BRIG_WIDTH_524288 = 20,
-  BRIG_WIDTH_1048576 = 21,
-  BRIG_WIDTH_2097152 = 22,
-  BRIG_WIDTH_4194304 = 23,
-  BRIG_WIDTH_8388608 = 24,
-  BRIG_WIDTH_16777216 = 25,
-  BRIG_WIDTH_33554432 = 26,
-  BRIG_WIDTH_67108864 = 27,
-  BRIG_WIDTH_134217728 = 28,
-  BRIG_WIDTH_268435456 = 29,
-  BRIG_WIDTH_536870912 = 30,
-  BRIG_WIDTH_1073741824 = 31,
-  BRIG_WIDTH_2147483648 = 32,
-  BRIG_WIDTH_WAVESIZE = 33,
-  BRIG_WIDTH_ALL = 34
+
+    BRIG_WIDTH_NONE = 0,
+    BRIG_WIDTH_1 = 1,
+    BRIG_WIDTH_2 = 2,
+    BRIG_WIDTH_4 = 3,
+    BRIG_WIDTH_8 = 4,
+    BRIG_WIDTH_16 = 5,
+    BRIG_WIDTH_32 = 6,
+    BRIG_WIDTH_64 = 7,
+    BRIG_WIDTH_128 = 8,
+    BRIG_WIDTH_256 = 9,
+    BRIG_WIDTH_512 = 10,
+    BRIG_WIDTH_1024 = 11,
+    BRIG_WIDTH_2048 = 12,
+    BRIG_WIDTH_4096 = 13,
+    BRIG_WIDTH_8192 = 14,
+    BRIG_WIDTH_16384 = 15,
+    BRIG_WIDTH_32768 = 16,
+    BRIG_WIDTH_65536 = 17,
+    BRIG_WIDTH_131072 = 18,
+    BRIG_WIDTH_262144 = 19,
+    BRIG_WIDTH_524288 = 20,
+    BRIG_WIDTH_1048576 = 21,
+    BRIG_WIDTH_2097152 = 22,
+    BRIG_WIDTH_4194304 = 23,
+    BRIG_WIDTH_8388608 = 24,
+    BRIG_WIDTH_16777216 = 25,
+    BRIG_WIDTH_33554432 = 26,
+    BRIG_WIDTH_67108864 = 27,
+    BRIG_WIDTH_134217728 = 28,
+    BRIG_WIDTH_268435456 = 29,
+    BRIG_WIDTH_536870912 = 30,
+    BRIG_WIDTH_1073741824 = 31,
+    BRIG_WIDTH_2147483648 = 32,
+    BRIG_WIDTH_WAVESIZE = 33,
+    BRIG_WIDTH_ALL = 34,
+
+    BRIG_WIDTH_LAST
 };
 
-struct BrigData {
-  uint32_t byteCount;
-  uint8_t bytes[1];
+struct BrigUInt64 {
+    uint32_t lo;
+    uint32_t hi;
+
+};
+
+struct BrigAluModifier {
+    BrigAluModifier8_t allBits;
+
 };
 
 struct BrigBase {
-  uint16_t byteCount;
-  BrigKinds16_t kind;
+    uint16_t byteCount;
+    BrigKind16_t kind;
+};
+
+struct BrigData {
+
+    uint32_t byteCount;
+    uint8_t bytes[1];
+};
+
+struct BrigExecutableModifier {
+    BrigExecutableModifier8_t allBits;
+
+};
+
+struct BrigMemoryModifier {
+    BrigMemoryModifier8_t allBits;
+
 };
 
-struct BrigDirectiveArgBlock {
-  BrigBase base;
+struct BrigSegCvtModifier {
+    BrigSegCvtModifier8_t allBits;
+
+};
+
+struct BrigVariableModifier {
+    BrigVariableModifier8_t allBits;
+
+};
+
+struct BrigDirectiveArgBlockEnd {
+    BrigBase base;
+};
+
+struct BrigDirectiveArgBlockStart {
+    BrigBase base;
 };
 
 struct BrigDirectiveComment {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
 };
 
 struct BrigDirectiveControl {
-  BrigBase base;
-  BrigControlDirective16_t control;
-  uint16_t reserved;
-  BrigDataOffsetOperandList32_t operands;
+    BrigBase base;
+    BrigControlDirective16_t control;
+    uint16_t reserved;
+    BrigDataOffsetOperandList32_t operands;
 };
 
 struct BrigDirectiveExecutable {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
-  uint16_t outArgCount;
-  uint16_t inArgCount;
-  BrigCodeOffset32_t firstInArg;
-  BrigCodeOffset32_t firstCodeBlockEntry;
-  BrigCodeOffset32_t nextModuleEntry;
-  uint32_t codeBlockEntryCount;
-  BrigExecutableModifier8_t modifier;
-  BrigLinkage8_t linkage;
-  uint16_t reserved;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
+    uint16_t outArgCount;
+    uint16_t inArgCount;
+    BrigCodeOffset32_t firstInArg;
+    BrigCodeOffset32_t firstCodeBlockEntry;
+    BrigCodeOffset32_t nextModuleEntry;
+    BrigExecutableModifier modifier;
+    BrigLinkage8_t linkage;
+    uint16_t reserved;
 };
 
 struct BrigDirectiveExtension {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
 };
 
 struct BrigDirectiveFbarrier {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
-  BrigExecutableModifier8_t modifier;
-  BrigLinkage8_t linkage;
-  uint16_t reserved;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
+    BrigVariableModifier modifier;
+    BrigLinkage8_t linkage;
+    uint16_t reserved;
 };
 
 struct BrigDirectiveLabel {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
 };
 
 struct BrigDirectiveLoc {
-  BrigBase base;
-  BrigDataOffsetString32_t filename;
-  uint32_t line;
-  uint32_t column;
+    BrigBase base;
+    BrigDataOffsetString32_t filename;
+    uint32_t line;
+    uint32_t column;
 };
 
 struct BrigDirectiveNone {
-  BrigBase base;
+    BrigBase base;
 };
 
 struct BrigDirectivePragma {
-  BrigBase base;
-  BrigDataOffsetOperandList32_t operands;
+    BrigBase base;
+    BrigDataOffsetOperandList32_t operands;
 };
 
 struct BrigDirectiveVariable {
-  BrigBase base;
-  BrigDataOffsetString32_t name;
-  BrigOperandOffset32_t init;
-  BrigType16_t type;
-  BrigSegment8_t segment;
-  BrigAlignment8_t align;
-  BrigUInt64 dim;
-  BrigVariableModifier8_t modifier;
-  BrigLinkage8_t linkage;
-  BrigAllocation8_t allocation;
-  uint8_t reserved;
-};
-
-struct BrigDirectiveVersion {
-  BrigBase base;
-  BrigVersion32_t hsailMajor;
-  BrigVersion32_t hsailMinor;
-  BrigVersion32_t brigMajor;
-  BrigVersion32_t brigMinor;
-  BrigProfile8_t profile;
-  BrigMachineModel8_t machineModel;
-  uint16_t reserved;
+    BrigBase base;
+    BrigDataOffsetString32_t name;
+    BrigOperandOffset32_t init;
+    BrigType16_t type;
+
+    BrigSegment8_t segment;
+    BrigAlignment8_t align;
+    BrigUInt64 dim;
+    BrigVariableModifier modifier;
+    BrigLinkage8_t linkage;
+    BrigAllocation8_t allocation;
+    uint8_t reserved;
+};
+
+struct BrigDirectiveModule {
+    BrigBase base;
+    BrigDataOffsetString32_t name;
+    BrigVersion32_t hsailMajor;
+    BrigVersion32_t hsailMinor;
+    BrigProfile8_t profile;
+    BrigMachineModel8_t machineModel;
+    BrigRound8_t defaultFloatRound;
+    uint8_t reserved;
 };
 
 struct BrigInstBase {
-  BrigBase base;
-  BrigOpcode16_t opcode;
-  BrigType16_t type;
-  BrigDataOffsetOperandList32_t operands;
+    BrigBase base;
+    BrigOpcode16_t opcode;
+    BrigType16_t type;
+    BrigDataOffsetOperandList32_t operands;
+
 };
 
 struct BrigInstAddr {
-  BrigInstBase base;
-  BrigSegment8_t segment;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigSegment8_t segment;
+    uint8_t reserved[3];
 };
 
 struct BrigInstAtomic {
-  BrigInstBase base;
-  BrigSegment8_t segment;
-  BrigMemoryOrder8_t memoryOrder;
-  BrigMemoryScope8_t memoryScope;
-  BrigAtomicOperation8_t atomicOperation;
-  uint8_t equivClass;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigSegment8_t segment;
+    BrigMemoryOrder8_t memoryOrder;
+    BrigMemoryScope8_t memoryScope;
+    BrigAtomicOperation8_t atomicOperation;
+    uint8_t equivClass;
+    uint8_t reserved[3];
 };
 
 struct BrigInstBasic {
-  BrigInstBase base;
+    BrigInstBase base;
 };
 
 struct BrigInstBr {
-  BrigInstBase base;
-  BrigWidth8_t width;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigWidth8_t width;
+    uint8_t reserved[3];
 };
 
 struct BrigInstCmp {
-  BrigInstBase base;
-  BrigType16_t sourceType;
-  BrigAluModifier16_t modifier;
-  BrigCompareOperation8_t compare;
-  BrigPack8_t pack;
-  uint16_t reserved;
+    BrigInstBase base;
+    BrigType16_t sourceType;
+    BrigAluModifier modifier;
+    BrigCompareOperation8_t compare;
+    BrigPack8_t pack;
+    uint8_t reserved[3];
 };
 
 struct BrigInstCvt {
-  BrigInstBase base;
-  BrigType16_t sourceType;
-  BrigAluModifier16_t modifier;
+    BrigInstBase base;
+    BrigType16_t sourceType;
+    BrigAluModifier modifier;
+    BrigRound8_t round;
 };
 
 struct BrigInstImage {
-  BrigInstBase base;
-  BrigType16_t imageType;
-  BrigType16_t coordType;
-  BrigImageGeometry8_t geometry;
-  uint8_t equivClass;
-  uint16_t reserved;
+    BrigInstBase base;
+    BrigType16_t imageType;
+    BrigType16_t coordType;
+    BrigImageGeometry8_t geometry;
+    uint8_t equivClass;
+    uint16_t reserved;
 };
 
 struct BrigInstLane {
-  BrigInstBase base;
-  BrigType16_t sourceType;
-  BrigWidth8_t width;
-  uint8_t reserved;
+    BrigInstBase base;
+    BrigType16_t sourceType;
+    BrigWidth8_t width;
+    uint8_t reserved;
 };
 
 struct BrigInstMem {
-  BrigInstBase base;
-  BrigSegment8_t segment;
-  BrigAlignment8_t align;
-  uint8_t equivClass;
-  BrigWidth8_t width;
-  BrigMemoryModifier8_t modifier;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigSegment8_t segment;
+    BrigAlignment8_t align;
+    uint8_t equivClass;
+    BrigWidth8_t width;
+    BrigMemoryModifier modifier;
+    uint8_t reserved[3];
 };
 
 struct BrigInstMemFence {
-  BrigInstBase base;
-  BrigMemoryOrder8_t memoryOrder;
-  BrigMemoryScope8_t globalSegmentMemoryScope;
-  BrigMemoryScope8_t groupSegmentMemoryScope;
-  BrigMemoryScope8_t imageSegmentMemoryScope;
+    BrigInstBase base;
+    BrigMemoryOrder8_t memoryOrder;
+    BrigMemoryScope8_t globalSegmentMemoryScope;
+    BrigMemoryScope8_t groupSegmentMemoryScope;
+    BrigMemoryScope8_t imageSegmentMemoryScope;
 };
 
 struct BrigInstMod {
-  BrigInstBase base;
-  BrigAluModifier16_t modifier;
-  BrigPack8_t pack;
-  uint8_t reserved;
+    BrigInstBase base;
+    BrigAluModifier modifier;
+    BrigRound8_t round;
+    BrigPack8_t pack;
+    uint8_t reserved;
 };
 
 struct BrigInstQueryImage {
-  BrigInstBase base;
-  BrigType16_t imageType;
-  BrigImageGeometry8_t geometry;
-  BrigImageQuery8_t query;
+    BrigInstBase base;
+    BrigType16_t imageType;
+    BrigImageGeometry8_t geometry;
+    BrigImageQuery8_t imageQuery;
 };
 
 struct BrigInstQuerySampler {
-  BrigInstBase base;
-  BrigSamplerQuery8_t query;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigSamplerQuery8_t samplerQuery;
+    uint8_t reserved[3];
 };
 
 struct BrigInstQueue {
-  BrigInstBase base;
-  BrigSegment8_t segment;
-  BrigMemoryOrder8_t memoryOrder;
-  uint16_t reserved;
+    BrigInstBase base;
+    BrigSegment8_t segment;
+    BrigMemoryOrder8_t memoryOrder;
+    uint16_t reserved;
 };
 
 struct BrigInstSeg {
-  BrigInstBase base;
-  BrigSegment8_t segment;
-  uint8_t reserved[3];
+    BrigInstBase base;
+    BrigSegment8_t segment;
+    uint8_t reserved[3];
 };
 
 struct BrigInstSegCvt {
-  BrigInstBase base;
-  BrigType16_t sourceType;
-  BrigSegment8_t segment;
-  BrigSegCvtModifier8_t modifier;
+    BrigInstBase base;
+    BrigType16_t sourceType;
+    BrigSegment8_t segment;
+    BrigSegCvtModifier modifier;
 };
 
 struct BrigInstSignal {
-  BrigInstBase base;
-  BrigType16_t signalType;
-  BrigMemoryOrder8_t memoryOrder;
-  BrigAtomicOperation8_t signalOperation;
+    BrigInstBase base;
+    BrigType16_t signalType;
+    BrigMemoryOrder8_t memoryOrder;
+    BrigAtomicOperation8_t signalOperation;
 };
 
 struct BrigInstSourceType {
-  BrigInstBase base;
-  BrigType16_t sourceType;
-  uint16_t reserved;
+    BrigInstBase base;
+    BrigType16_t sourceType;
+    uint16_t reserved;
 };
 
 struct BrigOperandAddress {
-  BrigBase base;
-  BrigCodeOffset32_t symbol;
-  BrigOperandOffset32_t reg;
-  BrigUInt64 offset;
+    BrigBase base;
+    BrigCodeOffset32_t symbol;
+    BrigOperandOffset32_t reg;
+    BrigUInt64 offset;
+};
+
+struct BrigOperandAlign {
+    BrigBase base;
+    BrigAlignment8_t align;
+    uint8_t reserved[3];
 };
 
 struct BrigOperandCodeList {
-  BrigBase base;
-  BrigDataOffsetCodeList32_t elements;
+    BrigBase base;
+    BrigDataOffsetCodeList32_t elements;
+
 };
 
 struct BrigOperandCodeRef {
-  BrigBase base;
-  BrigCodeOffset32_t ref;
+    BrigBase base;
+    BrigCodeOffset32_t ref;
+};
+
+struct BrigOperandConstantBytes {
+    BrigBase base;
+    BrigType16_t type;
+    uint16_t reserved;
+    BrigDataOffsetString32_t bytes;
 };
 
-struct BrigOperandData {
-  BrigBase base;
-  BrigDataOffset32_t data;
+struct BrigOperandConstantOperandList {
+    BrigBase base;
+    BrigType16_t type;
+    uint16_t reserved;
+    BrigDataOffsetOperandList32_t elements;
+
 };
 
-struct BrigOperandImageProperties {
-  BrigBase base;
-  uint64_t width;
-  uint64_t height;
-  uint64_t depth;
-  uint64_t array;
-  BrigImageGeometry8_t geometry;
-  BrigImageChannelOrder8_t channelOrder;
-  BrigImageChannelType8_t channelType;
-  uint8_t reserved;
+struct BrigOperandConstantImage {
+    BrigBase base;
+    BrigType16_t type;
+    BrigImageGeometry8_t geometry;
+    BrigImageChannelOrder8_t channelOrder;
+    BrigImageChannelType8_t channelType;
+    uint8_t reserved[3];
+    BrigUInt64 width;
+    BrigUInt64 height;
+    BrigUInt64 depth;
+    BrigUInt64 array;
 };
 
 struct BrigOperandOperandList {
-  BrigBase base;
-  BrigDataOffsetOperandList32_t elements;
+    BrigBase base;
+    BrigDataOffsetOperandList32_t elements;
+
 };
 
-struct BrigOperandReg {
-  BrigBase base;
-  BrigRegisterKind16_t regKind;
-  uint16_t regNum;
+struct BrigOperandRegister {
+    BrigBase base;
+    BrigRegisterKind16_t regKind;
+    uint16_t regNum;
 };
 
-struct BrigOperandSamplerProperties {
-  BrigBase base;
-  BrigSamplerCoordNormalization8_t coord;
-  BrigSamplerFilter8_t filter;
-  BrigSamplerAddressing8_t addressing;
-  uint8_t reserved;
+struct BrigOperandConstantSampler {
+    BrigBase base;
+    BrigType16_t type;
+    BrigSamplerCoordNormalization8_t coord;
+    BrigSamplerFilter8_t filter;
+    BrigSamplerAddressing8_t addressing;
+    uint8_t reserved[3];
 };
 
 struct BrigOperandString {
-  BrigBase base;
-  BrigDataOffsetString32_t string;
+    BrigBase base;
+    BrigDataOffsetString32_t string;
 };
 
 struct BrigOperandWavesize {
-  BrigBase base;
+    BrigBase base;
 };
 
-struct BrigModule {
-  uint32_t sectionCount;
-  BrigSectionHeader* section[1];
+enum BrigExceptionsMask {
+    BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
+    BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
+    BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
+    BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
+    BRIG_EXCEPTIONS_INEXACT = 1 << 4,
+
+    BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
 };
 
+struct BrigSectionHeader {
+    uint64_t byteCount;
+    uint32_t headerByteCount;
+    uint32_t nameLength;
+    uint8_t name[1];
+};
+
+#define MODULE_IDENTIFICATION_LENGTH (8)
+
+struct BrigModuleHeader {
+    char identification[MODULE_IDENTIFICATION_LENGTH];
+    BrigVersion32_t brigMajor;
+    BrigVersion32_t brigMinor;
+    uint64_t byteCount;
+    uint8_t hash[64];
+    uint32_t reserved;
+    uint32_t sectionCount;
+    uint64_t sectionIndex;
+};
+
+typedef BrigModuleHeader* BrigModule_t;
+
 #endif /* HSA_BRIG_FORMAT_H */
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index 0de9aab..30426c7 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -25,19 +25,29 @@  along with GCC; see the file COPYING3.  If not see
 #include "hard-reg-set.h"
 #include "hsa.h"
 #include "tree.h"
+#include "tree-iterator.h"
 #include "stor-layout.h"
 #include "tree-cfg.h"
+#include "tree-ssa-alias.h"
 #include "machmode.h"
 #include "output.h"
+#include "gimple-expr.h"
 #include "dominance.h"
 #include "cfg.h"
 #include "function.h"
+#include "gimple.h"
 #include "basic-block.h"
 #include "vec.h"
+#include "stringpool.h"
 #include "gimple-pretty-print.h"
 #include "diagnostic-core.h"
 #include "hash-map.h"
+#include "ipa-ref.h"
+#include "lto-streamer.h"
+#include "cgraph.h"
 
+#define BRIG_ELF_SECTION_NAME ".brig"
+#define BRIG_LABEL_STRING "hsa_brig"
 #define BRIG_SECTION_DATA_NAME    "hsa_data"
 #define BRIG_SECTION_CODE_NAME    "hsa_code"
 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
@@ -67,7 +77,11 @@  public:
   /* Section name that will be output to the BRIG.  */
   const char *section_name;
   /* Size in bytes of all data stored in the section.  */
-  unsigned total_size, header_byte_count;
+  unsigned total_size;
+  /* The size of the header of the section including padding. */
+  unsigned header_byte_count;
+  /* The size of the header of the section without any padding.  */
+  unsigned header_byte_delta;
 
   /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes.  */
   vec <struct hsa_brig_data_chunk> chunks;
@@ -123,15 +137,15 @@  hsa_brig_section::allocate_new_chunk ()
 void
 hsa_brig_section::init (const char *name)
 {
-  struct BrigSectionHeader sample;
-
   section_name = name;
-  total_size = sizeof(sample.byteCount) + sizeof(sample.headerByteCount)
-        + sizeof(sample.nameLength);
-  /* Add strlen + null termination to the section size*/
-  total_size = total_size + strlen(section_name) + 1;
+  /* While the following computation is basically wrong, because the intent
+     certainly wasn't to have the first character of name and padding, which
+     are a part of sizeof (BrigSectionHeader), included in the first addend,
+     this is what the disassembler expects.  */
+  total_size = sizeof (BrigSectionHeader) + strlen(section_name);
   chunks.create (1);
   allocate_new_chunk ();
+  header_byte_delta = total_size;
   round_size_up (4);
   header_byte_count = total_size;
 }
@@ -154,14 +168,17 @@  void
 hsa_brig_section::output ()
 {
   struct BrigSectionHeader section_header;
+  char padding[8];
 
-  switch_to_section (get_section (section_name, SECTION_NOTYPE, NULL));
-
-  section_header.byteCount = htole32 (total_size);
-  section_header.nameLength = htole32 (strlen(section_name));
+  section_header.byteCount = htole64 (total_size);
   section_header.headerByteCount = htole32 (header_byte_count);
-  assemble_string ((const char*) &section_header, 12);
-  assemble_string (section_name, (section_header.nameLength + 1));
+  section_header.nameLength = htole32 (strlen(section_name));
+  assemble_string ((const char*) &section_header, 16);
+  assemble_string (section_name, (section_header.nameLength));
+  memset (&padding, 0, sizeof (padding));
+  /* This is also a consequence of the wrong header size computation described
+     in a comment in hsa_brig_section::init.  */
+  assemble_string (padding, 8);
   for (unsigned i = 0; i < chunks.length (); i++)
     assemble_string (chunks[i].data, chunks[i].size);
 }
@@ -214,7 +231,7 @@  hsa_brig_section::get_ptr_by_offset (unsigned int offset)
 {
   gcc_assert (offset < total_size);
 
-  offset -= header_byte_count;
+  offset -= header_byte_delta;
   unsigned int i;
 
   for (i = 0; offset >= chunks[i].size; i++)
@@ -223,7 +240,6 @@  hsa_brig_section::get_ptr_by_offset (unsigned int offset)
   return chunks[i].data + offset;
 }
 
-
 /* BRIG string data hashing.  */
 
 struct brig_string_slot
@@ -356,7 +372,6 @@  static struct operand_queue
 static void
 brig_init (void)
 {
-  struct BrigDirectiveVersion verdir;
   brig_insn_count = 0;
 
   if (brig_initialized)
@@ -366,21 +381,37 @@  brig_init (void)
   brig_data.init (BRIG_SECTION_DATA_NAME);
   brig_code.init (BRIG_SECTION_CODE_NAME);
   brig_operand.init (BRIG_SECTION_OPERAND_NAME);
+  brig_initialized = true;
 
-  verdir.base.byteCount = htole16 (sizeof (verdir));
-  verdir.base.kind = htole16 (BRIG_KIND_DIRECTIVE_VERSION);
-  verdir.hsailMajor = htole32 (BRIG_VERSION_HSAIL_MAJOR) ;
-  verdir.hsailMinor =  htole32 (BRIG_VERSION_HSAIL_MINOR);
-  verdir.brigMajor = htole32 (BRIG_VERSION_BRIG_MAJOR);
-  verdir.brigMinor = htole32 (BRIG_VERSION_BRIG_MINOR);
-  verdir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
+  struct BrigDirectiveModule moddir;
+  memset (&moddir, 0, sizeof (moddir));
+  moddir.base.byteCount = htole16 (sizeof (moddir));
+
+  char *modname;
+  if (!in_lto_p && main_input_filename)
+    {
+      const char *part = strrchr (main_input_filename, '/');
+      if (!part)
+	part = main_input_filename;
+      asprintf (&modname, "&%s", part);
+      char* extension = strchr (modname, '.');
+      if (extension)
+	*extension = '\0';
+      moddir.name = brig_emit_string (modname);
+      free (modname);
+    }
+  else
+    moddir.name = brig_emit_string (main_input_filename);
+  moddir.base.kind = htole16 (BRIG_KIND_DIRECTIVE_MODULE);
+  moddir.hsailMajor = htole32 (BRIG_VERSION_HSAIL_MAJOR) ;
+  moddir.hsailMinor = htole32 (BRIG_VERSION_HSAIL_MINOR);
+  moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
   if (hsa_machine_large_p ())
-    verdir.machineModel = BRIG_MACHINE_LARGE;
+    moddir.machineModel = BRIG_MACHINE_LARGE;
   else
-    verdir.machineModel = BRIG_MACHINE_SMALL;
-  verdir.reserved = 0;
-  brig_code.add (&verdir, sizeof (verdir));
-  brig_initialized = true;
+    moddir.machineModel = BRIG_MACHINE_SMALL;
+  moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
+  brig_code.add (&moddir, sizeof (moddir));
 }
 
 /* Free all BRIG data.  */
@@ -433,6 +464,7 @@  emit_directive_variable (struct hsa_symbol *symbol)
   if (symbol->directive_offset)
     return symbol->directive_offset;
 
+  memset (&dirvar, 0, sizeof (dirvar));
   dirvar.base.byteCount = htole16 (sizeof (dirvar));
   dirvar.base.kind = htole16 (BRIG_KIND_DIRECTIVE_VARIABLE);
   dirvar.allocation = BRIG_ALLOCATION_AUTOMATIC;
@@ -472,7 +504,7 @@  emit_directive_variable (struct hsa_symbol *symbol)
   dirvar.linkage = symbol->linkage;
   dirvar.dim.lo = htole32 (symbol->dimLo);
   dirvar.dim.hi = htole32 (symbol->dimHi);
-  dirvar.modifier = BRIG_SYMBOL_DEFINITION;
+  dirvar.modifier.allBits |= BRIG_VARIABLE_DEFINITION;
   dirvar.reserved = 0;
 
   symbol->directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
@@ -506,8 +538,10 @@  emit_function_directives (void)
 
   next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
 
+  memset (&fndir, 0, sizeof (fndir));
   fndir.base.byteCount = htole16 (sizeof (fndir));
-  fndir.base.kind = htole16 (hsa_cfun.kern_p ? BRIG_KIND_DIRECTIVE_KERNEL : BRIG_KIND_DIRECTIVE_FUNCTION);
+  fndir.base.kind = htole16 (hsa_cfun.kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
+			     : BRIG_KIND_DIRECTIVE_FUNCTION);
   fndir.name = htole32 (name_offset);
   fndir.inArgCount = htole16 (hsa_cfun.input_args_count);
   fndir.outArgCount = htole16 (hsa_cfun.output_arg ? 1 : 0);
@@ -515,8 +549,7 @@  emit_function_directives (void)
   fndir.firstCodeBlockEntry = htole32 (scoped_off);
   fndir.nextModuleEntry = htole32 (next_toplev_off);
   fndir.linkage = BRIG_LINKAGE_PROGRAM;
-  fndir.codeBlockEntryCount = htole32 (0);
-  fndir.modifier = BRIG_EXECUTABLE_DEFINITION;
+  fndir.modifier.allBits |= BRIG_EXECUTABLE_DEFINITION;
   memset (&fndir.reserved, 0, sizeof (fndir.reserved));
 
   function_offsets.put (cfun->decl, brig_code.total_size);
@@ -721,9 +754,9 @@  enqueue_op (hsa_op_base *op)
   op_queue.last_op = op;
 
   if (is_a <hsa_op_immed *> (op))
-    op_queue.projected_size += sizeof (struct BrigOperandData);
+    op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
   else if (is_a <hsa_op_reg *> (op))
-    op_queue.projected_size += sizeof (struct BrigOperandReg);
+    op_queue.projected_size += sizeof (struct BrigOperandRegister);
   else if (is_a <hsa_op_address *> (op))
     {
     op_queue.projected_size += sizeof (struct BrigOperandAddress);
@@ -742,7 +775,7 @@  enqueue_op (hsa_op_base *op)
 static void
 emit_immediate_operand (hsa_op_immed *imm)
 {
-  struct BrigOperandData out;
+  struct BrigOperandConstantBytes out;
   uint32_t byteCount;
 
   union
@@ -754,6 +787,7 @@  emit_immediate_operand (hsa_op_immed *imm)
   } bytes;
   unsigned len;
 
+  memset (&out, 0, sizeof (out));
   switch (imm->type)
     {
     case BRIG_TYPE_U8:
@@ -841,10 +875,10 @@  emit_immediate_operand (hsa_op_immed *imm)
     }
 
   out.base.byteCount = htole16 (sizeof (out));
-  out.base.kind = htole16 (BRIG_KIND_OPERAND_DATA);
-  byteCount = len ;
-
-  out.data = brig_data.add (&byteCount, sizeof (byteCount));
+  out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
+  byteCount = htole32 (len);
+  out.type = htole16 (imm->type);
+  out.bytes = brig_data.add (&byteCount, sizeof (byteCount));
   brig_data.add (&bytes, len);
 
   brig_operand.add (&out, sizeof(out));
@@ -856,20 +890,20 @@  emit_immediate_operand (hsa_op_immed *imm)
 static void
 emit_register_operand (hsa_op_reg *reg)
 {
-  struct BrigOperandReg out;
+  struct BrigOperandRegister out;
 
   out.base.byteCount = htole16 (sizeof (out));
-  out.base.kind = htole16 (BRIG_KIND_OPERAND_REG);
+  out.base.kind = htole16 (BRIG_KIND_OPERAND_REGISTER);
   out.regNum = htole32 (reg->hard_num);
 
   if (BRIG_TYPE_B32 == regtype_for_type (reg->type))
-    out.regKind = BRIG_REGISTER_SINGLE;
+    out.regKind = BRIG_REGISTER_KIND_SINGLE;
   else if (BRIG_TYPE_B64 == regtype_for_type (reg->type))
-    out.regKind = BRIG_REGISTER_DOUBLE;
+    out.regKind = BRIG_REGISTER_KIND_DOUBLE;
   else if (BRIG_TYPE_B128 == regtype_for_type (reg->type))
-    out.regKind = BRIG_REGISTER_QUAD;
+    out.regKind = BRIG_REGISTER_KIND_QUAD;
   else if (BRIG_TYPE_B1 == regtype_for_type (reg->type))
-    out.regKind = BRIG_REGISTER_CONTROL;
+    out.regKind = BRIG_REGISTER_KIND_CONTROL;
   else
     gcc_unreachable ();
 
@@ -997,7 +1031,7 @@  emit_memory_insn (hsa_insn_mem *mem)
     repr.segment = addr->symbol->segment;
   else
     repr.segment = BRIG_SEGMENT_FLAT;
-  repr.modifier = 0 ;
+  repr.modifier.allBits = 0 ;
   repr.equivClass = mem->equiv_class;
   repr.align = BRIG_ALIGNMENT_1;
   if (mem->opcode == BRIG_OPCODE_LD)
@@ -1119,7 +1153,7 @@  emit_segment_insn (hsa_insn_seg *seg)
 
   repr.sourceType = htole16 (as_a <hsa_op_reg *> (seg->operands[1])->type);
   repr.segment = seg->segment;
-  repr.modifier = 0;
+  repr.modifier.allBits = 0;
 
   brig_code.add (&repr, sizeof (repr));
 
@@ -1136,6 +1170,7 @@  emit_cmp_insn (hsa_insn_cmp *cmp)
   BrigOperandOffset32_t operand_offsets[3];
   uint32_t byteCount;
 
+  memset (&repr, 0, sizeof (repr));
   repr.base.base.byteCount = htole16 (sizeof (repr));
   repr.base.base.kind = htole16 (BRIG_KIND_INST_CMP);
   repr.base.opcode = htole16 (cmp->opcode);
@@ -1155,10 +1190,9 @@  emit_cmp_insn (hsa_insn_cmp *cmp)
     repr.sourceType = htole16 (as_a <hsa_op_reg *> (cmp->operands[1])->type);
   else
     repr.sourceType = htole16 (as_a <hsa_op_immed *> (cmp->operands[1])->type);
-  repr.modifier = 0;
+  repr.modifier.allBits = 0;
   repr.compare = cmp->compare;
   repr.pack = 0;
-  repr.reserved = 0;
 
   brig_code.add (&repr, sizeof (repr));
   brig_insn_count++;
@@ -1261,17 +1295,16 @@  emit_cvt_insn (hsa_insn_basic *insn)
   else
     srctype = as_a <hsa_op_immed *> (insn->operands[1])->type;
   repr.sourceType = htole16 (srctype);
-
+  repr.modifier.allBits = 0;
   /* float to smaller float requires a rounding setting (we default
      to 'near'.  */
   if (float_type_p (insn->type)
       && (!float_type_p (srctype)
          || ((insn->type & BRIG_TYPE_BASE_MASK)
              < (srctype & BRIG_TYPE_BASE_MASK))))
-    repr.modifier = BRIG_ROUND_FLOAT_NEAR_EVEN;
+    repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
   else
-    repr.modifier = 0;
-
+    repr.round = BRIG_ROUND_NONE;
   brig_code.add (&repr, sizeof (repr));
   brig_insn_count++;
 }
@@ -1281,15 +1314,22 @@  emit_cvt_insn (hsa_insn_basic *insn)
 static void
 emit_arg_block (bool is_start)
 {
-  struct BrigDirectiveArgBlock repr;
-  repr.base.byteCount = htole16 (sizeof (repr));
-
-  BrigKinds16_t kind = is_start ? BRIG_KIND_DIRECTIVE_ARG_BLOCK_START
-    : BRIG_KIND_DIRECTIVE_ARG_BLOCK_END;
-  repr.base.kind = htole16 (kind);
-
-  brig_code.add (&repr, sizeof (repr));
-  brig_insn_count++;
+  if (is_start)
+    {
+      struct BrigDirectiveArgBlockStart repr;
+      repr.base.byteCount = htole16 (sizeof (repr));
+      repr.base.kind = htole16 (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START);
+      brig_code.add (&repr, sizeof (repr));
+      brig_insn_count++;
+    }
+  else
+    {
+      struct BrigDirectiveArgBlockEnd repr;
+      repr.base.byteCount = htole16 (sizeof (repr));
+      repr.base.kind = htole16 (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END);
+      brig_code.add (&repr, sizeof (repr));
+      brig_insn_count++;
+    }
 }
 
 /* Emit call instruction INSN, where this instruction must be closed
@@ -1390,14 +1430,12 @@  emit_basic_insn (hsa_insn_basic *insn)
       return;
     }
 
+  memset (&repr, 0, sizeof (repr));
   repr.base.base.byteCount = htole16 (sizeof (BrigInstBasic));
   repr.base.base.kind = htole16 (BRIG_KIND_INST_BASIC);
   repr.base.opcode = htole16 (insn->opcode);
   switch (insn->opcode)
     {
-      /* XXX The spec says mov can take all types.  But the LLVM based
-	 simulator cries about "Mov_s32" not being defined.  */
-      case BRIG_OPCODE_MOV:
       /* And the bit-logical operations need bit types and whine about
          arithmetic types :-/  */
       case BRIG_OPCODE_AND:
@@ -1430,9 +1468,9 @@  emit_basic_insn (hsa_insn_basic *insn)
   if ((type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE)
     {
       if (float_type_p (type))
-	repr.modifier = BRIG_ROUND_FLOAT_NEAR_EVEN;
+	repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
       else
-	repr.modifier = 0;
+	repr.round = 0;
       /* We assume that destination and sources agree in packing
          layout.  */
       if (insn->operands[2])
@@ -1578,12 +1616,173 @@  hsa_brig_emit_function (void)
       prev_bb = bb;
     }
   perhaps_emit_branch (prev_bb, NULL);
-  ptr_to_fndir->codeBlockEntryCount = brig_insn_count ;
   ptr_to_fndir->nextModuleEntry = brig_code.total_size;
 
   emit_queued_operands ();
 }
 
+static GTY(()) tree hsa_ctor_statements;
+
+/* Create a static initializator that will register out brig stufgf with
+   libgomp.  */
+
+static void
+hsa_output_kernel_mapping (tree brig_decl)
+{
+  unsigned map_count = hsa_get_number_decl_kernel_mappings ();
+  gcc_assert (map_count > 0);
+
+  tree int_num_of_kernels;
+  int_num_of_kernels = build_int_cst (integer_type_node, (int) map_count);
+  tree kernel_num_index_type = build_index_type (int_num_of_kernels);
+  tree host_functions_array_type = build_array_type (ptr_type_node,
+						     kernel_num_index_type);
+
+  vec<constructor_elt, va_gc> *host_functions_vec = NULL;
+  for (unsigned i = 0; i < map_count; ++i)
+    {
+      tree decl = hsa_get_decl_kernel_mapping_decl (i);
+      CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE,
+			      build_fold_addr_expr (decl));
+    }
+  tree host_functions_ctor = build_constructor (host_functions_array_type,
+						host_functions_vec);
+  char tmp_name[64];
+  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_host_functions", 1);
+  tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+					 get_identifier (tmp_name),
+					 host_functions_array_type);
+  TREE_STATIC (hsa_host_func_table) = 1;
+  TREE_READONLY (hsa_host_func_table) = 1;
+  TREE_PUBLIC (hsa_host_func_table) = 0;
+  DECL_ARTIFICIAL (hsa_host_func_table) = 1;
+  DECL_IGNORED_P (hsa_host_func_table) = 1;
+  DECL_EXTERNAL (hsa_host_func_table) = 0;
+  TREE_CONSTANT (hsa_host_func_table) = 1;
+  DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
+  varpool_node::finalize_decl (hsa_host_func_table);
+
+  int len = 0;
+  for (unsigned i = 0; i < map_count; ++i)
+    {
+      char *name = hsa_get_decl_kernel_mapping_name (i);
+      /* We add 1 for the terminating zero and 1 for an ampersand prefix.  */
+      len = len + strlen (name) + 2;
+    }
+  len++;
+
+  char *buf = XNEWVEC (char, len);
+  char *p = buf;
+  for (unsigned i = 0; i < map_count; ++i)
+    {
+      char *name = hsa_get_decl_kernel_mapping_name (i);
+      int ll = strlen (name);
+      gcc_assert (ll > 0);
+      *p = '&';
+      p++;
+      memcpy (p, name, ll);
+      p += ll;
+      *p = '\0';
+      p++;
+    }
+  *p = '\0';
+  tree kern_names = build_string (len, buf);
+  TREE_TYPE (kern_names) = build_array_type (char_type_node,
+					     build_index_type (size_int (len)));
+  free (buf);
+
+  tree hsa_image_desc_type = make_node (RECORD_TYPE);
+  tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			   get_identifier ("brig_module"), ptr_type_node);
+  DECL_CHAIN (id_f1) = NULL_TREE;
+  tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			   get_identifier ("kern_names"), ptr_type_node);
+  DECL_CHAIN (id_f2) = id_f1;
+  finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f2,
+			 NULL_TREE);
+
+  vec<constructor_elt, va_gc> *img_desc_vec = NULL;
+  CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+			  build_fold_addr_expr (brig_decl));
+  CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+			  build1 (ADDR_EXPR,
+				  build_pointer_type (TREE_TYPE (kern_names)),
+				  kern_names));
+
+  tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
+
+  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_img_descriptor", 1);
+  tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+					get_identifier (tmp_name),
+					hsa_image_desc_type);
+  TREE_STATIC (hsa_img_descriptor) = 1;
+  TREE_READONLY (hsa_img_descriptor) = 1;
+  TREE_PUBLIC (hsa_img_descriptor) = 0;
+  DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
+  DECL_IGNORED_P (hsa_img_descriptor) = 1;
+  DECL_EXTERNAL (hsa_img_descriptor) = 0;
+  TREE_CONSTANT (hsa_img_descriptor) = 1;
+  DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
+  varpool_node::finalize_decl (hsa_img_descriptor);
+
+  /* Construct the "host_table" libgomp expects. */
+  tree libgomp_host_table_type = build_array_type (ptr_type_node,
+						   build_index_type
+						   (build_int_cst
+						    (integer_type_node, 4)));
+  vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
+  tree host_func_table_addr = build_fold_addr_expr (hsa_host_func_table);
+  CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
+			  host_func_table_addr);
+  offset_int func_table_size = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node))
+    * map_count;
+  CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
+			  fold_build2 (POINTER_PLUS_EXPR,
+				       TREE_TYPE (host_func_table_addr),
+				       host_func_table_addr,
+				       build_int_cst (size_type_node,
+						      func_table_size.to_uhwi
+						      ())));
+  CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
+  CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
+  tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
+						    libgomp_host_table_vec);
+  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "hsa_libgomp_host_table", 1);
+  tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+					    get_identifier (tmp_name),
+					    libgomp_host_table_type);
+
+  TREE_STATIC (hsa_libgomp_host_table) = 1;
+  TREE_READONLY (hsa_libgomp_host_table) = 1;
+  TREE_PUBLIC (hsa_libgomp_host_table) = 0;
+  DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
+  DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
+  DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
+  TREE_CONSTANT (hsa_libgomp_host_table) = 1;
+  DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
+  varpool_node::finalize_decl (hsa_libgomp_host_table);
+
+  /* Generate an initializer with a call to the registration routine.  */
+
+  /* FIXME: gomp_offload_register has one more enum parameter omitted here.  */
+  tree reg_fn_type = build_function_type_list (void_type_node, ptr_type_node,
+					       ptr_type_node, NULL_TREE);
+  tree reg_fn = build_fn_decl ("__hsa_register_image", reg_fn_type);
+   append_to_statement_list
+    (build_call_expr (reg_fn, 2,
+		      build_fold_addr_expr (hsa_libgomp_host_table),
+		      build_fold_addr_expr (hsa_img_descriptor)),
+     &hsa_ctor_statements);
+  cgraph_build_static_cdtor ('I', hsa_ctor_statements, DEFAULT_INIT_PRIORITY);
+}
+
+
+#define HSA_SECTION_ALIGNMENT 16
+
+/* Emit the brig module we have compiled to a section in the final assembly and
+   also create a compile unit static constructor that will register the brig
+   module with libgomp.  */
+
 void
 hsa_output_brig (void)
 {
@@ -1613,13 +1812,74 @@  hsa_output_brig (void)
 
   saved_section = in_section;
 
+  switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
+  char tmp_name[64];
+  ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
+  ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
+  tree brig_id = get_identifier (tmp_name);
+  tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
+			       char_type_node);
+  SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
+  TREE_ADDRESSABLE (brig_decl) = 1;
+  TREE_READONLY (brig_decl) = 1;
+  DECL_ARTIFICIAL (brig_decl) = 1;
+  DECL_IGNORED_P (brig_decl) = 1;
+  TREE_STATIC (brig_decl) = 1;
+  TREE_PUBLIC (brig_decl) = 0;
+  TREE_USED (brig_decl) = 1;
+  DECL_INITIAL (brig_decl) = brig_decl;
+  TREE_ASM_WRITTEN (brig_decl) = 1;
+
+  BrigModuleHeader module_header;
+  memcpy (&module_header.identification, "HSA BRIG",
+	  sizeof(module_header.identification));
+  module_header.brigMajor = htole32 (BRIG_VERSION_BRIG_MAJOR);
+  module_header.brigMinor = htole32 (BRIG_VERSION_BRIG_MINOR);
+  uint64_t section_index[3];
+
+  int data_padding, code_padding, operand_padding;
+  data_padding = HSA_SECTION_ALIGNMENT
+    - brig_data.total_size % HSA_SECTION_ALIGNMENT;
+  code_padding = HSA_SECTION_ALIGNMENT
+    - brig_code.total_size % HSA_SECTION_ALIGNMENT;
+  operand_padding = HSA_SECTION_ALIGNMENT
+    - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
+
+  uint64_t module_size = sizeof (module_header) + sizeof (section_index)
+    + brig_data.total_size + data_padding
+    + brig_code.total_size + code_padding
+    + brig_operand.total_size + operand_padding;
+  gcc_assert ((module_size % 16) == 0);
+  module_header.byteCount = htole64 (module_size);
+  memset (&module_header.hash, 0, sizeof (module_header.hash));
+  module_header.reserved = 0;
+  module_header.sectionCount = htole32 (3);
+  module_header.sectionIndex = htole64 (sizeof (module_header));
+  assemble_string ((const char *) &module_header, sizeof(module_header));
+  uint64_t off = sizeof (module_header) + sizeof (section_index);
+  section_index[0] = htole64 (off);
+  off += brig_data.total_size + data_padding;
+  section_index[1] = htole64 (off);
+  off += brig_code.total_size + code_padding;
+  section_index[2] = htole64 (off);
+  assemble_string ((const char *) &section_index, sizeof (section_index));
+
+  char padding[HSA_SECTION_ALIGNMENT];
+  memset (padding, 0, sizeof(padding));
+
   brig_data.output ();
+  assemble_string (padding, data_padding);
   brig_code.output ();
+  assemble_string (padding, code_padding);
   brig_operand.output ();
+  assemble_string (padding, operand_padding);
 
   if (saved_section)
     switch_to_section (saved_section);
 
+  hsa_output_kernel_mapping (brig_decl);
+
+  hsa_free_decl_kernel_mapping ();
   brig_release_data ();
   hsa_deinit_compilation_unit_data ();
 }
diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c
index 07db392..62d3ed0 100644
--- a/gcc/hsa-dump.c
+++ b/gcc/hsa-dump.c
@@ -347,16 +347,12 @@  hsa_opcode_name (BrigOpcode16_t opcode)
       return "activelaneid";
     case BRIG_OPCODE_ACTIVELANEMASK:
       return "activelanemask";
-    case BRIG_OPCODE_ACTIVELANESHUFFLE:
-      return "activelaneshuffle";
     case BRIG_OPCODE_CALL:
       return "call";
     case BRIG_OPCODE_SCALL:
       return "scall";
     case BRIG_OPCODE_ICALL:
       return "icall";
-    case BRIG_OPCODE_LDI:
-      return "ldi";
     case BRIG_OPCODE_RET:
       return "ret";
     case BRIG_OPCODE_ALLOCA:
@@ -393,22 +389,12 @@  hsa_opcode_name (BrigOpcode16_t opcode)
       return "setdetectexcept";
     case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
       return "addqueuewriteindex";
-    case BRIG_OPCODE_AGENTCOUNT:
-      return "agentcount";
-    case BRIG_OPCODE_AGENTID:
-      return "agentid";
     case BRIG_OPCODE_CASQUEUEWRITEINDEX:
       return "casqueuewriteindex";
-    case BRIG_OPCODE_LDK:
-      return "ldk";
     case BRIG_OPCODE_LDQUEUEREADINDEX:
       return "ldqueuereadindex";
     case BRIG_OPCODE_LDQUEUEWRITEINDEX:
       return "ldqueuewriteindex";
-    case BRIG_OPCODE_QUEUEID:
-      return "queueid";
-    case BRIG_OPCODE_QUEUEPTR:
-      return "queueptr";
     case BRIG_OPCODE_STQUEUEREADINDEX:
       return "stqueuereadindex";
     case BRIG_OPCODE_STQUEUEWRITEINDEX:
@@ -570,11 +556,11 @@  hsa_memscope_name (enum BrigMemoryScope scope)
     case BRIG_MEMORY_SCOPE_WORKITEM:
       return "wi";
     case BRIG_MEMORY_SCOPE_WAVEFRONT:
-      return "wv";
+      return "wave";
     case BRIG_MEMORY_SCOPE_WORKGROUP:
       return "wg";
-    case BRIG_MEMORY_SCOPE_COMPONENT:
-      return "cmp";
+    case BRIG_MEMORY_SCOPE_AGENT:
+      return "agent";
     case BRIG_MEMORY_SCOPE_SYSTEM:
       return "sys";
     default:
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index f338556..99b3019 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -66,6 +66,20 @@  along with GCC; see the file COPYING3.  If not see
    function. */
 struct hsa_function_representation hsa_cfun;
 
+/* Element of the mapping vector between a host decl and an HSA kernel.  */
+
+struct GTY(()) hsa_decl_kernel_map_element
+{
+  /* The decl of the host function.  */
+  tree decl;
+  /* Name of the HSA kernel in BRIG.  */
+  char * GTY((skip)) name;
+};
+
+/* Mapping between decls and corresponding HSA kernels in this compilation
+   unit.  */
+static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> *hsa_decl_kernel_mapping;
+
 /* Alloc pools for allocating basic hsa structures such as operands,
    instructions and other basic entitie.s */
 static alloc_pool hsa_allocp_operand_address;
@@ -244,7 +258,10 @@  hsa_deinit_data_for_cfun (void)
   delete hsa_cfun.local_symbols;
   free (hsa_cfun.input_args);
   free (hsa_cfun.output_arg);
-  free (hsa_cfun.name);
+  /* Kernel names are deallocated at the end of BRIG output when deallocating
+     hsa_decl_kernel_mapping.  */
+  if (!hsa_cfun.kern_p)
+    free (hsa_cfun.name);
   hsa_cfun.spill_symbols.release();
 }
 
@@ -580,7 +597,7 @@  hsa_alloc_immed_op (tree tree_val)
 		       && !POINTER_TYPE_P (TREE_TYPE (tree_val)));
 
   memset (imm, 0 , sizeof (hsa_op_immed));
-  imm->kind = BRIG_KIND_OPERAND_DATA;
+  imm->kind = BRIG_KIND_OPERAND_CONSTANT_BYTES;
   imm->type = hsa_type_for_scalar_tree_type (TREE_TYPE (tree_val), true);
   imm->value = tree_val;
 
@@ -612,7 +629,7 @@  hsa_alloc_reg_op (void)
   hreg = (hsa_op_reg *) pool_alloc (hsa_allocp_operand_reg);
   hsa_list_operand_reg.safe_push (hreg);
   memset (hreg, 0, sizeof (hsa_op_reg));
-  hreg->kind = BRIG_KIND_OPERAND_REG;
+  hreg->kind = BRIG_KIND_OPERAND_REGISTER;
   /* TODO: Try removing later on.  I suppose this is not necessary but I'd
      rather avoid surprises.  */
   hreg->order = hsa_cfun.reg_count++;
@@ -2225,6 +2242,50 @@  gen_function_parameters (vec <hsa_op_reg_p> ssa_map)
     }
 }
 
+/* Create a mapping between the original function DECL and kernel name NAME.  */
+
+static void
+hsa_add_kern_decl_mapping (tree decl, char *name)
+{
+  hsa_decl_kernel_map_element dkm;
+  dkm.decl = decl;
+  dkm.name = name;
+  vec_safe_push (hsa_decl_kernel_mapping, dkm);
+}
+
+/* Return the number of kernel decl name mappings.  */
+
+unsigned
+hsa_get_number_decl_kernel_mappings (void)
+{
+  return vec_safe_length (hsa_decl_kernel_mapping);
+}
+
+/* Return the decl in the Ith kernel decl name mapping.  */
+
+tree
+hsa_get_decl_kernel_mapping_decl (unsigned i)
+{
+  return (*hsa_decl_kernel_mapping)[i].decl;
+}
+
+/* Return the name in the Ith kernel decl name mapping.  */
+
+char *
+hsa_get_decl_kernel_mapping_name (unsigned i)
+{
+  return (*hsa_decl_kernel_mapping)[i].name;
+}
+
+/* Free the mapping between original decls and kernel names.  */
+
+void
+hsa_free_decl_kernel_mapping (void)
+{
+  for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
+    free ((*hsa_decl_kernel_mapping)[i].name);
+  ggc_free (hsa_decl_kernel_mapping);
+}
 
 static void
 sanitize_hsa_name (char *p)
@@ -2244,14 +2305,19 @@  generate_hsa (void)
 
   hsa_init_data_for_cfun ();
 
-  bool kern_p = lookup_attribute ("hsakernel",
-    DECL_ATTRIBUTES (current_function_decl));
+  bool kern_p = lookup_attribute ("hsa",
+				  DECL_ATTRIBUTES (current_function_decl))
+    || lookup_attribute ("hsakernel", DECL_ATTRIBUTES (current_function_decl));
   hsa_cfun.kern_p = kern_p;
 
   ssa_map.safe_grow_cleared (SSANAMES (cfun)->length ());
   hsa_cfun.name
     = xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
   sanitize_hsa_name (hsa_cfun.name);
+
+  if (kern_p)
+    hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun.name);
+
   gen_function_parameters (ssa_map);
   if (seen_error ())
     goto fail;
@@ -2277,93 +2343,51 @@  generate_hsa (void)
 }
 
 static GTY(()) tree hsa_launch_fn;
-static GTY(()) tree hsa_kernel_desc_type;
 static GTY(()) tree hsa_dim_array_type;
-static GTY(()) tree hsa_range_dimnum_decl;
-static GTY(()) tree hsa_range_grid_decl;
-static GTY(()) tree hsa_range_group_decl;
-static GTY(()) tree hsa_launch_range_type;
+static GTY(()) tree hsa_lattrs_dimnum_decl;
+static GTY(()) tree hsa_lattrs_grid_decl;
+static GTY(()) tree hsa_lattrs_group_decl;
+static GTY(()) tree hsa_lattrs_nargs_decl;
+static GTY(()) tree hsa_launch_attributes_type;
 
 static void
 init_hsa_functions (void)
 {
-  tree launch_fn_type;
-  tree fields, f;
-  tree constcharptr;
   if (hsa_launch_fn)
     return;
 
-  constcharptr = build_pointer_type (build_qualified_type
-				     (char_type_node, TYPE_QUAL_CONST));
-
-  hsa_kernel_desc_type = make_node (RECORD_TYPE);
-  fields = NULL_TREE;
-  f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-		  get_identifier ("filename"), constcharptr);
-  DECL_CHAIN (f) = fields;
-  fields = f;
-  f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-		  get_identifier ("name"), constcharptr);
-  DECL_CHAIN (f) = fields;
-  fields = f;
-  f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-		  get_identifier ("nargs"), uint64_type_node);
-  DECL_CHAIN (f) = fields;
-  fields = f;
-  f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-		  get_identifier ("kernel"), ptr_type_node);
-  DECL_CHAIN (f) = fields;
-  fields = f;
-  f = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-                  get_identifier ("context"), ptr_type_node);
-  DECL_CHAIN (f) = fields;
-  fields = f;
-
-  finish_builtin_struct (hsa_kernel_desc_type, "__hsa_kernel_desc",
-			 fields, NULL_TREE);
-
-
   tree dim_arr_index_type;
   dim_arr_index_type = build_index_type (build_int_cst (integer_type_node, 2));
   hsa_dim_array_type = build_array_type (uint32_type_node, dim_arr_index_type);
 
-  hsa_launch_range_type = make_node (RECORD_TYPE);
-  fields = NULL_TREE;
-  hsa_range_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-				      get_identifier ("dimension"),
-				      uint32_type_node);
-  DECL_CHAIN (hsa_range_dimnum_decl) = NULL_TREE;
+  hsa_launch_attributes_type = make_node (RECORD_TYPE);
+  hsa_lattrs_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				       get_identifier ("ndim"),
+				       uint32_type_node);
+  DECL_CHAIN (hsa_lattrs_dimnum_decl) = NULL_TREE;
 
-  hsa_range_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+  hsa_lattrs_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 				    get_identifier ("global_size"),
 				    hsa_dim_array_type);
-  DECL_CHAIN (hsa_range_grid_decl) = hsa_range_dimnum_decl;
-  hsa_range_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+  DECL_CHAIN (hsa_lattrs_grid_decl) = hsa_lattrs_dimnum_decl;
+  hsa_lattrs_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 				     get_identifier ("group_size"),
 				     hsa_dim_array_type);
-  DECL_CHAIN (hsa_range_group_decl) = hsa_range_grid_decl;
-  tree reserved = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-			      get_identifier ("reserved"), uint32_type_node);
-  DECL_CHAIN (reserved) = hsa_range_group_decl;
-
-  /* This is in fact okra_range_s, but let's call everything HSA, at least for
-     now.  */
-  finish_builtin_struct (hsa_launch_range_type, "__hsa_launch_range",
-			 reserved, NULL_TREE);
-
-  /* __hsa_launch_kernel (__hsa_kernel_desc * kd, __hsa_launch_range* range,
-     uint64_t *args) */
-
+  DECL_CHAIN (hsa_lattrs_group_decl) = hsa_lattrs_grid_decl;
+  hsa_lattrs_nargs_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				      get_identifier ("nargs"),
+				      uint32_type_node);
+  DECL_CHAIN (hsa_lattrs_nargs_decl) = hsa_lattrs_group_decl;
+  finish_builtin_struct (hsa_launch_attributes_type, "__hsa_launch_attributes",
+			 hsa_lattrs_nargs_decl, NULL_TREE);
+  tree launch_fn_type;
   launch_fn_type
-    = build_function_type_list (void_type_node,
-				build_pointer_type (hsa_kernel_desc_type),
-				build_pointer_type (hsa_launch_range_type),
+    = build_function_type_list (void_type_node, ptr_type_node,
+				build_pointer_type (hsa_launch_attributes_type),
 				build_pointer_type (uint64_type_node),
 				NULL_TREE);
 
-  hsa_launch_fn
-    = build_fn_decl ("__hsa_launch_kernel",
-		     launch_fn_type);
+  hsa_launch_fn = build_fn_decl ("__hsa_launch_kernel", launch_fn_type);
 }
 
 /* Insert before the current statement in GSI a store of VALUE to INDEX of
@@ -2382,12 +2406,110 @@  insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
 }
 
+
+static void
+wrap_hsa_kernel_call (gimple_stmt_iterator *gsi, tree fndecl)
+{
+  init_hsa_functions ();
+
+  bool real_kern_p = lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl));
+  tree grid_size_1, group_size_1;
+  tree u32_one = build_int_cst (uint32_type_node, 1);
+  gimple call_stmt = gsi_stmt (*gsi);
+  unsigned discard_arguents, num_args = gimple_call_num_args (call_stmt);
+  if (real_kern_p)
+    {
+      discard_arguents = 2;
+      if (num_args < 2)
+	{
+	  error ("Calls to functions with hsakernel attribute must "
+		 "have at least two arguments.");
+	  grid_size_1 = group_size_1 = u32_one;
+	}
+      else
+	{
+	  grid_size_1 = fold_convert (uint32_type_node,
+				      gimple_call_arg (call_stmt, num_args - 2));
+	  grid_size_1 = force_gimple_operand_gsi (gsi, grid_size_1, true,
+						  NULL_TREE, true,
+						  GSI_SAME_STMT);
+	  group_size_1 = fold_convert (uint32_type_node,
+				       gimple_call_arg (call_stmt,
+							num_args - 1));
+	  group_size_1 = force_gimple_operand_gsi (gsi, group_size_1, true,
+						   NULL_TREE, true,
+						   GSI_SAME_STMT);
+	}
+    }
+  else
+    {
+      discard_arguents = 0;
+      grid_size_1 = build_int_cst (uint32_type_node, 64);
+      group_size_1 = build_int_cst (uint32_type_node, 16);
+    }
+
+  tree lattrs = create_tmp_var (hsa_launch_attributes_type,
+				"__hsa_launch_attrs");
+  tree dimref = build3 (COMPONENT_REF, uint32_type_node,
+			lattrs, hsa_lattrs_dimnum_decl, NULL_TREE);
+  gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 0,
+			  grid_size_1);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 1,
+			  u32_one);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_grid_decl, 2,
+			  u32_one);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 0,
+			  group_size_1);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 1,
+			  u32_one);
+  insert_store_range_dim (gsi, lattrs, hsa_lattrs_group_decl, 2,
+			  u32_one);
+  tree nargsref = build3 (COMPONENT_REF, uint32_type_node,
+			 lattrs, hsa_lattrs_nargs_decl, NULL_TREE);
+  tree nargsval = build_int_cst (uint32_type_node, num_args - discard_arguents);
+  gsi_insert_before (gsi, gimple_build_assign (nargsref, nargsval),
+		     GSI_SAME_STMT);
+  lattrs = build_fold_addr_expr (lattrs);
+
+  tree args;
+  args = create_tmp_var (build_array_type_nelts (uint64_type_node,
+						 num_args - discard_arguents),
+			 NULL);
+
+  gcc_assert (num_args >= discard_arguents);
+  for (unsigned i = 0; i < (num_args - discard_arguents); i++)
+    {
+      tree arg = gimple_call_arg (call_stmt, i);
+      gimple g;
+
+      tree r = build4 (ARRAY_REF, uint64_type_node, args,
+		       size_int (i), NULL_TREE, NULL_TREE);
+
+      arg = force_gimple_operand_gsi (gsi, fold_convert (uint64_type_node, arg),
+				      true, NULL_TREE, true, GSI_SAME_STMT);
+      g = gimple_build_assign (r, arg);
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+    }
+
+  args = build_fold_addr_expr (args);
+
+  /* XXX doesn't handle calls with lhs, doesn't remove EH
+     edges.  */
+  gimple launch = gimple_build_call (hsa_launch_fn, 3,
+				     build_fold_addr_expr (fndecl),
+				     lattrs, args);
+  gsi_insert_before (gsi, launch, GSI_SAME_STMT);
+  unlink_stmt_vdef (call_stmt);
+  gsi_remove (gsi, true);
+}
+
+
 static unsigned int
-wrap_hsa (void)
+wrap_all_hsa_calls (void)
 {
   bool changed = false;
   basic_block bb;
-  init_hsa_functions ();
   FOR_EACH_BB_FN (bb, cfun)
     {
       gimple_stmt_iterator gsi;
@@ -2398,169 +2520,7 @@  wrap_hsa (void)
 	    && (lookup_attribute ("hsa", DECL_ATTRIBUTES (fndecl))
 		|| lookup_attribute ("hsakernel", DECL_ATTRIBUTES (fndecl))))
 	  {
-	    char *tmpname;
-	    gimple launch, call_stmt = gsi_stmt (gsi);
-	    vec<constructor_elt, va_gc> *v = NULL;
-	    tree str;
-	    str = build_string_literal (1, "");
-	    bool kern_p = lookup_attribute ("hsakernel",
-					    DECL_ATTRIBUTES (fndecl));
-	    hsa_cfun.kern_p = kern_p;
-	    if (!in_lto_p && main_input_filename)
-	      {
-		char *filename;
-		const char *part = strrchr (main_input_filename, '/');
-		if (!part)
-		  part = main_input_filename;
-		asprintf (&filename, "%s", part);
-		char* extension = strchr (filename, '.');
-		if (extension)
-		  {
-		    strcpy (extension, "\0");
-		    asprintf (&extension, "%s", ".o\0");
-		    strcat (filename, extension);
-		    free (extension);
-		    str = build_string_literal (strlen(filename)+1,filename);
-		    free (filename);
-		  }
-	      }
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
-
-
-	    int slen = IDENTIFIER_LENGTH (DECL_ASSEMBLER_NAME (fndecl));
-	    if (asprintf (&tmpname, "&%s",
-			  IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fndecl))) < 0)
-	      gcc_unreachable ();
-	    sanitize_hsa_name (tmpname + 1);
-
-	    str = build_string_literal (slen + 2, tmpname);
-	    free (tmpname);
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
-	    int discard_arguents;
-	    int num_args = gimple_call_num_args (call_stmt);
-	    if (kern_p)
-	      discard_arguents = 2;
-	    else
-	      discard_arguents = 0;
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-				    size_int (num_args - discard_arguents));
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
-
-	    tree desc_initval = build_constructor (hsa_kernel_desc_type, v);
-
-	    /* Create a new VAR_DECL of type descriptor.  */
-	    char tmp_name[32];
-	    static unsigned int var_id;
-	    ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kd", var_id++);
-	    tree desc = build_decl (gimple_location (call_stmt), VAR_DECL,
-				    get_identifier (tmp_name),
-				    hsa_kernel_desc_type);
-	    TREE_STATIC (desc) = 1;
-	    TREE_PUBLIC (desc) = 0;
-	    DECL_ARTIFICIAL (desc) = 1;
-	    DECL_IGNORED_P (desc) = 1;
-	    DECL_EXTERNAL (desc) = 0;
-
-	    TREE_CONSTANT (desc_initval) = 1;
-	    TREE_STATIC (desc_initval) = 1;
-	    DECL_INITIAL (desc) = desc_initval;
-	    varpool_node::finalize_decl (desc);
-	    desc = build_fold_addr_expr (desc);
-
-	    tree grid_size_1, group_size_1;
-	    tree u32_one = build_int_cst (uint32_type_node, 1);
-	    if (kern_p)
-	      {
-		discard_arguents = 2;
-		int num_args = gimple_call_num_args (call_stmt);
-		if (num_args < 2)
-		  {
-		    error ("Calls to functions with hsakernel attribute must "
-			   "have at least two arguments.");
-		    grid_size_1 = group_size_1 = u32_one;
-		  }
-		else
-		  {
-		    grid_size_1 = fold_convert (uint32_type_node,
-						gimple_call_arg (call_stmt,
-								 num_args - 2));
-		    grid_size_1 = force_gimple_operand_gsi (&gsi, grid_size_1,
-							    true, NULL_TREE,
-							    true,
-							    GSI_SAME_STMT);
-		    group_size_1 = fold_convert (uint32_type_node,
-						 gimple_call_arg (call_stmt,
-								  num_args
-								  - 1));
-		    group_size_1 = force_gimple_operand_gsi (&gsi, group_size_1,
-							     true, NULL_TREE,
-							     true,
-							     GSI_SAME_STMT);
-		  }
-	      }
-	    else
-	      {
-		discard_arguents = 0;
-		grid_size_1 = build_int_cst (uint32_type_node, 256);
-		group_size_1 = build_int_cst (uint32_type_node, 16);
-	      }
-
-
-	    /* We fill in range dynamically because later on we'd like to
-	       decide about the values at run time.  */
-	    tree range = create_tmp_var (hsa_launch_range_type, "__hsa_range");
-	    tree dimref = build3 (COMPONENT_REF, uint32_type_node,
-				  range, hsa_range_dimnum_decl, NULL_TREE);
-	    gsi_insert_before (&gsi,
-			       gimple_build_assign (dimref, u32_one),
-			       GSI_SAME_STMT);
-	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 0,
-				    grid_size_1);
-	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 1,
-				    u32_one);
-	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 2,
-				    u32_one);
-	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 0,
-				    group_size_1);
-	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 1,
-				    u32_one);
-	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 2,
-				    u32_one);
-	    range = build_fold_addr_expr (range);
-
-	    tree args = create_tmp_var
-	      (build_array_type_nelts (uint64_type_node,
-				       gimple_call_num_args (call_stmt)),
-	       NULL);
-
-	    for (unsigned i = 0;
-		 i < gimple_call_num_args (call_stmt) - discard_arguents;
-		 i++)
-	      {
-		tree arg = gimple_call_arg (call_stmt, i);
-		gimple g;
-
-		tree r = build4 (ARRAY_REF, uint64_type_node, args,
-				 size_int (i), NULL_TREE, NULL_TREE);
-
-		arg = force_gimple_operand_gsi (&gsi,
-						fold_convert (uint64_type_node,
-							      arg),
-						true, NULL_TREE,
-						true, GSI_SAME_STMT);
-		g = gimple_build_assign (r, arg);
-		gsi_insert_before (&gsi, g, GSI_SAME_STMT);
-	      }
-
-	    args = build_fold_addr_expr (args);
-
-	    /* XXX doesn't handle calls with lhs, doesn't remove EH
-	       edges.  */
-	    launch = gimple_build_call (hsa_launch_fn, 3, desc, range, args);
-	    gsi_insert_before (&gsi, launch, GSI_SAME_STMT);
-	    unlink_stmt_vdef (call_stmt);
-	    gsi_remove (&gsi, true);
+	    wrap_hsa_kernel_call (&gsi, fndecl);
 	    changed = true;
 	  }
 	else
@@ -2615,7 +2575,7 @@  pass_gen_hsail::execute (function *)
 			   DECL_ATTRIBUTES (current_function_decl)))
     return generate_hsa ();
   else
-    return wrap_hsa ();
+    return wrap_all_hsa_calls ();
 }
 
 } // anon namespace
diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
index 79376a5..1d5a7fc 100644
--- a/gcc/hsa-regalloc.c
+++ b/gcc/hsa-regalloc.c
@@ -292,7 +292,7 @@  hsa_num_def_ops (hsa_insn_basic *insn)
       case BRIG_OPCODE_ACTIVELANECOUNT:
       case BRIG_OPCODE_ACTIVELANEID:
       case BRIG_OPCODE_ACTIVELANEMASK:
-      case BRIG_OPCODE_ACTIVELANESHUFFLE:
+      case BRIG_OPCODE_ACTIVELANEPERMUTE:
 	return 1; /* ??? */
 
       case BRIG_OPCODE_CALL:
@@ -300,9 +300,6 @@  hsa_num_def_ops (hsa_insn_basic *insn)
       case BRIG_OPCODE_ICALL:
 	return 1; /* ??? */
 
-      case BRIG_OPCODE_LDI:
-	return 1; /* ??? */
-
       case BRIG_OPCODE_RET:
 	return 0;
 
@@ -318,14 +315,9 @@  hsa_num_def_ops (hsa_insn_basic *insn)
       case BRIG_OPCODE_PACKETCOMPLETIONSIG:
       case BRIG_OPCODE_PACKETID:
       case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
-      case BRIG_OPCODE_AGENTCOUNT:
-      case BRIG_OPCODE_AGENTID:
       case BRIG_OPCODE_CASQUEUEWRITEINDEX:
-      case BRIG_OPCODE_LDK:
       case BRIG_OPCODE_LDQUEUEREADINDEX:
       case BRIG_OPCODE_LDQUEUEWRITEINDEX:
-      case BRIG_OPCODE_QUEUEID:
-      case BRIG_OPCODE_QUEUEPTR:
       case BRIG_OPCODE_STQUEUEREADINDEX:
       case BRIG_OPCODE_STQUEUEWRITEINDEX:
 	return 1; /* ??? */
diff --git a/gcc/hsa.h b/gcc/hsa.h
index 837c79b..3e200cb 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -77,7 +77,7 @@  struct hsa_op_base
   unsigned brig_op_offset;
 
   /* The type of a particular operand.  */
-  BrigKinds16_t kind;
+  BrigKind16_t kind;
 };
 
 /* Common abstract ancestor for operands which have a type.  */
@@ -103,7 +103,7 @@  template <>
 inline bool
 is_a_helper <hsa_op_immed *>::test (hsa_op_base *p)
 {
-  return p->kind == BRIG_KIND_OPERAND_DATA;
+  return p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES;
 }
 
 /* HSA register operand.  */
@@ -152,7 +152,7 @@  template <>
 inline bool
 is_a_helper <hsa_op_reg *>::test (hsa_op_base *p)
 {
-  return p->kind == BRIG_KIND_OPERAND_REG;
+  return p->kind == BRIG_KIND_OPERAND_REGISTER;
 }
 
 /* An address HSA operand.  */
@@ -617,6 +617,10 @@  hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
 hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
 hsa_bb *hsa_init_new_bb (basic_block);
 void hsa_deinit_compilation_unit_data (void);
+unsigned hsa_get_number_decl_kernel_mappings (void);
+tree hsa_get_decl_kernel_mapping_decl (unsigned i);
+char *hsa_get_decl_kernel_mapping_name (unsigned i);
+void hsa_free_decl_kernel_mapping (void);
 
 /* In hsa-regalloc.c.  */
 void hsa_regalloc (void);
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index b164e1d..e97e8ed 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -60,7 +60,7 @@  libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
 	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
 	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
-	time.c fortran.c affinity.c target.c hsaokra.c
+	time.c fortran.c affinity.c target.c hsa.c
 
 nodist_noinst_HEADERS = libgomp_f.h
 nodist_libsubinclude_HEADERS = omp.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index ecef710..14bc864 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -96,7 +96,7 @@  am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
 	error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
 	parallel.lo sections.lo single.lo task.lo team.lo work.lo \
 	lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
-	fortran.lo affinity.lo target.lo hsaokra.lo
+	fortran.lo affinity.lo target.lo hsa.lo
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -320,7 +320,7 @@  libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
 	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
 	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
-	time.c fortran.c affinity.c target.c hsaokra.c
+	time.c fortran.c affinity.c target.c hsa.c
 
 nodist_noinst_HEADERS = libgomp_f.h
 nodist_libsubinclude_HEADERS = omp.h
@@ -464,7 +464,7 @@  distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hsaokra.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hsa.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
diff --git a/libgomp/hsa.c b/libgomp/hsa.c
new file mode 100644
index 0000000..60c22ee
--- /dev/null
+++ b/libgomp/hsa.c
@@ -0,0 +1,481 @@ 
+#include "config.h"
+#include "libgomp.h"
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <assert.h>
+#include "hsa.h"
+#include "hsa_ext_finalize.h"
+
+struct __hsa_launch_attributes
+{
+  /* Number of dimensions the workload has.  Maximum number is 3.  */
+  uint32_t ndim;
+  /* Size of the grid in the three respective dimensions.  */
+  uint32_t gdims[3];
+  /* Size of work-groups in the respective dimensions.  */
+  uint32_t wdims[3];
+  /* Number of arguments of the kernel.  */
+  uint32_t nargs;
+};
+
+struct hsa_context_info
+{
+  bool initialized;
+  hsa_agent_t agent;
+
+  uint32_t queue_size;
+  hsa_isa_t isa;
+  hsa_queue_t* sync_command_q;
+  hsa_signal_t sync_signal;
+};
+
+struct hsa_program_info
+{
+  bool created, finalized;
+  hsa_ext_program_t handle;
+  hsa_executable_t executable;
+};
+
+struct hsa_image_desc
+{
+  hsa_ext_module_t module;
+  const char *names;
+};
+
+struct hsa_kernel_info
+{
+  bool initialized;
+  const char *name;
+  hsa_executable_symbol_t symbol;
+  uint64_t object;
+  uint32_t kernarg_segment_size;
+  uint32_t group_segment_size;
+  uint32_t private_segment_size;
+  void *kernarg_addr;
+};
+
+struct hsa_image_info
+{
+  bool initialized;
+  void ***host_functions;
+  struct hsa_image_desc *image_descriptor;
+  struct hsa_kernel_info *kernels;
+};
+
+/* Print to stderr information about what is going on when true.  */
+
+static bool debug;
+
+static struct hsa_context_info hsa_context;
+static struct hsa_program_info hsa_program;
+
+/* FIXME: Currently we allow only one HSA image (module, HSA object file).
+   Once we move to libgomp, it will be taking care of host function <-> hsa
+   kernel mapping and we will automatically have more.  */
+static struct hsa_image_info image_info;
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, store it to
+   hsa_context.agent.  */
+
+static hsa_status_t
+find_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused)))
+{
+  hsa_device_type_t device_type;
+  hsa_status_t stat = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE,
+					  &device_type);
+  if (stat == HSA_STATUS_SUCCESS && device_type == HSA_DEVICE_TYPE_GPU)
+    {
+      hsa_context.agent = agent;
+      return HSA_STATUS_INFO_BREAK;
+    }
+  return HSA_STATUS_SUCCESS;
+}
+
+/* Callback of dispatch queue to report errors.  */
+
+static void
+queue_callback(hsa_status_t status, hsa_queue_t* queue, void* data) {
+  const char* message;
+  hsa_status_string (status, &message);
+  fprintf(stderr, "Error at queue %llu: %s", (unsigned long long) queue->id,
+	  message);
+}
+
+
+/* Initialize context for running HSA kernels.  */
+
+static void
+init_hsa_context (void)
+{
+  hsa_status_t status;
+
+  if (getenv ("HSA_DEBUG"))
+    debug = true;
+  else
+    debug = false;
+
+  status = hsa_init ();
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("HSA initialization failed");
+  if (debug)
+    fprintf (stderr, "HSA run-time initialized\n");
+  status = hsa_iterate_agents (find_gpu_agent, NULL);
+  if (status != HSA_STATUS_INFO_BREAK)
+    gomp_fatal ("Error searching for a GPU HSA agent");
+
+  if (debug)
+    {
+      char name[64];
+      memset (&name, 0, sizeof (name));
+      status = hsa_agent_get_info (hsa_context.agent, HSA_AGENT_INFO_NAME, name);
+      if (status != HSA_STATUS_SUCCESS)
+	gomp_fatal ("Error requesting the name of the HSA agent");
+      fprintf (stderr, "Selected agent %s\n", name);
+    }
+  status = hsa_agent_get_info(hsa_context.agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+			      &hsa_context.queue_size);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Error requesting maximum queue size of the HSA agent");
+  if (debug)
+    fprintf (stderr, "The maximum queue length is %u\n",
+	     (unsigned int) hsa_context.queue_size);
+  status = hsa_agent_get_info(hsa_context.agent, HSA_AGENT_INFO_ISA,
+			      &hsa_context.isa);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Error querying the ISA of the agent");
+
+  status = hsa_queue_create (hsa_context.agent, hsa_context.queue_size,
+			     HSA_QUEUE_TYPE_SINGLE, queue_callback, NULL,
+			     UINT32_MAX, UINT32_MAX,
+			     &hsa_context.sync_command_q);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Error creating command queue");
+
+  status = hsa_signal_create (1, 0, NULL, &hsa_context.sync_signal);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Error creating the HSA sync signal");
+
+  if (debug)
+    fprintf (stderr, "HSA context initialized, queue has id %llu\n",
+	     (long long unsigned) hsa_context.sync_command_q->id);
+
+  hsa_context.initialized = true;
+}
+
+static void
+create_hsa_program (void)
+{
+  hsa_status_t status;
+  assert (hsa_context.initialized);
+  status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+				   HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+				   NULL, &hsa_program.handle);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not create an HSA program");
+  if (debug)
+    fprintf (stderr, "Created a finalizer program\n");
+
+  hsa_program.created = true;
+}
+
+
+static void
+finalize_hsa_program (void)
+{
+  hsa_ext_control_directives_t control_directives;
+  hsa_code_object_t code_object;
+  hsa_status_t status;
+
+  assert (hsa_program.created);
+  memset (&control_directives, 0, sizeof (control_directives));
+  status = hsa_ext_program_finalize(hsa_program.handle, hsa_context.isa,
+				    HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+				    control_directives, "",
+				    HSA_CODE_OBJECT_TYPE_PROGRAM,
+				    &code_object);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Finalization of the HSA program failed");
+  if (debug)
+    fprintf (stderr, "Finalization done\n");
+  hsa_ext_program_destroy(hsa_program.handle);
+
+  status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
+				 "", &hsa_program.executable);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not create HSA executable");
+  status = hsa_executable_load_code_object(hsa_program.executable,
+					   hsa_context.agent, code_object, "");
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not add a code object to the HSA executable");
+  status = hsa_executable_freeze(hsa_program.executable, "");
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not add a code object to the HSA executable");
+
+  if (debug)
+    fprintf (stderr, "Froze HSA executable with the finalized code object\n");
+
+  hsa_program.finalized = true;
+}
+
+static void
+init_hsa_image (struct hsa_image_info *ii)
+{
+  hsa_status_t status;
+  const char *p;
+  int count = 0;
+  struct hsa_kernel_info *kernel;
+
+  if (hsa_program.finalized)
+    gomp_fatal ("Sorry, re-finalization not yet supported.");
+  if (!hsa_program.created)
+    create_hsa_program ();
+  status = hsa_ext_program_add_module(hsa_program.handle,
+				      image_info.image_descriptor->module);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not add a module to the HSA program");
+  if (debug)
+    fprintf (stderr, "Added a module to the HSA program\n");
+
+  p = ii->image_descriptor->names;
+  while (*p)
+    {
+      count++;
+      do
+	p++;
+      while (*p);
+      p++;
+    }
+
+  if (debug)
+    fprintf (stderr, "Encountered %d kernels in an image\n", count);
+
+  ii->kernels = gomp_malloc_cleared (sizeof (struct hsa_kernel_info) * count);
+  if (!ii->kernels)
+    gomp_fatal ("Could not allocate memory for HSA kertnel descriptors");
+
+  p = ii->image_descriptor->names;
+  kernel = ii->kernels;
+  while (*p)
+    {
+      kernel->name = p;
+      kernel++;
+      do
+	p++;
+      while (*p);
+      p++;
+    }
+
+  ii->initialized = true;
+}
+
+/* Callback of hsa_agent_iterate_regions.  Determines if a memory region can be
+   used for kernarg allocations.  */
+
+static hsa_status_t get_kernarg_memory_region(hsa_region_t region, void* data)
+{
+  hsa_status_t status;
+  hsa_region_segment_t segment;
+
+  status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+  if (status != HSA_STATUS_SUCCESS)
+    return status;
+  if (segment != HSA_REGION_SEGMENT_GLOBAL)
+    return HSA_STATUS_SUCCESS;
+
+  uint32_t flags;
+  status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+  if (status != HSA_STATUS_SUCCESS)
+    return status;
+  if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
+    {
+      hsa_region_t* ret = (hsa_region_t*) data;
+      *ret = region;
+      return HSA_STATUS_INFO_BREAK;
+    }
+  return HSA_STATUS_SUCCESS;
+}
+
+
+static void
+init_hsa_kernel (struct hsa_kernel_info *ki, struct hsa_image_info *ii)
+{
+  hsa_status_t status;
+
+  status = hsa_executable_get_symbol (hsa_program.executable, NULL,
+				      ki->name, hsa_context.agent, 0,
+				      &ki->symbol);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not find symbol for kernel in the code object");
+  if (debug)
+    fprintf (stderr, "Located kernel %s\n", ki->name);
+
+  status = hsa_executable_symbol_get_info
+    (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &ki->object);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not extract a kernel object from its symbol");
+  status = hsa_executable_symbol_get_info
+    (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
+     &ki->kernarg_segment_size);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not get infor about kernel argument size");
+  status = hsa_executable_symbol_get_info
+    (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
+     &ki->group_segment_size);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not get info about kernel group segment size");
+  status = hsa_executable_symbol_get_info
+    (ki->symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
+     &ki->private_segment_size);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not get info about kernel private segment size");
+
+  hsa_region_t kernarg_region;
+  kernarg_region.handle = (uint64_t) -1;
+  status = hsa_agent_iterate_regions (hsa_context.agent,
+				      get_kernarg_memory_region,
+				      &kernarg_region);
+  if (kernarg_region.handle == (uint64_t) -1)
+    gomp_fatal ("Could not find suitable memory region for kernel arguments");
+
+  /* Allocate the kernel argument buffer from the correct region.  */
+  status = hsa_memory_allocate (kernarg_region, ki->kernarg_segment_size,
+				&ki->kernarg_addr);
+  if (status != HSA_STATUS_SUCCESS)
+    gomp_fatal ("Could not allocate memory for kernel arguments");
+
+  if (debug)
+    {
+      fprintf (stderr, "Kernel structure for %s fully initialized\n", ki->name);
+      fprintf (stderr, "  group_segment_size: %u\n",
+	       (unsigned) ki->group_segment_size);
+      fprintf (stderr, "  private_segment_size: %u\n",
+	       (unsigned) ki->private_segment_size);
+      fprintf (stderr, "  kernarg_segment_size: %u\n",
+	       (unsigned) ki->kernarg_segment_size);
+      fprintf (stderr, "  kernarg_addr: %p\n", ki->kernarg_addr);
+    }
+  ki->initialized = true;
+}
+
+void
+__hsa_launch_kernel (void *host_fn, struct __hsa_launch_attributes *attrs,
+		     uint64_t *args)
+{
+  struct hsa_kernel_info *ki;
+  hsa_kernel_dispatch_packet_t *packet;
+  void **hf;
+  uint16_t header;
+  uint64_t index;
+  int i;
+
+  if (!image_info.host_functions)
+    gomp_fatal ("Call to launch_kernel before register_image");
+
+  if (!hsa_context.initialized)
+    init_hsa_context ();
+  if (!image_info.initialized)
+    init_hsa_image (&image_info);
+  if (!hsa_program.finalized)
+    finalize_hsa_program ();
+
+  i = 0;
+  hf = image_info.host_functions[0];
+  while (*hf != host_fn)
+    {
+      if (hf == image_info.host_functions[1])
+	gomp_fatal ("Could not map host function to an HSA kernel");
+      hf++;
+      i++;
+    }
+  if (debug)
+    fprintf (stderr, "Identified kernel %d\n", i);
+  ki = &image_info.kernels[i];
+
+  if (!ki->initialized)
+    init_hsa_kernel (ki, &image_info);
+
+  index  = hsa_queue_add_write_index_relaxed (hsa_context.sync_command_q, 1);
+  if (debug)
+    fprintf (stderr, "Got AQL index %llu\n", (long long int) index);
+  assert ((index & ~(hsa_context.sync_command_q->size - 1)) == 0);
+  packet = ((hsa_kernel_dispatch_packet_t*) hsa_context
+	    .sync_command_q->base_address) + index;
+  hsa_signal_store_relaxed (hsa_context.sync_signal, 1);
+
+  memset (((uint8_t *)packet) + 4, 0, sizeof (*packet) - 4);
+  packet->setup  |= (uint16_t) attrs->ndim
+    << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+  packet->grid_size_x = attrs->gdims[0];
+  packet->workgroup_size_x = attrs->wdims[0];
+  if (attrs->ndim > 1)
+    {
+      packet->grid_size_y = attrs->gdims[1];
+      packet->workgroup_size_y = attrs->wdims[1];
+    }
+  else
+    {
+      packet->grid_size_y = 1;
+      packet->workgroup_size_y = 1;
+    }
+  if (attrs->ndim > 2)
+    {
+      packet->grid_size_z = attrs->gdims[2];
+      packet->workgroup_size_z = attrs->wdims[2];
+    }
+  else
+    {
+      packet->grid_size_z = 1;
+      packet->workgroup_size_z = 1;
+    }
+
+  packet->private_segment_size = ki->private_segment_size;
+  packet->group_segment_size = ki->group_segment_size;
+  packet->kernel_object = ki->object;
+  packet->kernarg_address = ki->kernarg_addr;
+  packet->completion_signal = hsa_context.sync_signal;
+
+  if (debug)
+    fprintf (stderr, "Copying %u arguments, total %llu bytes, from %p\n",
+	     (unsigned) attrs->nargs, 
+	     (unsigned long long) sizeof(uint64_t) * attrs->nargs,
+	     (void *) args);
+  memcpy (ki->kernarg_addr, args, sizeof(uint64_t) * attrs->nargs);
+
+  header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+  if (debug)
+    fprintf (stderr, "Going to dispatch the kernel\n");
+
+  __atomic_store_n ((uint16_t*)(&packet->header), header, __ATOMIC_RELEASE);
+  hsa_signal_store_relaxed (hsa_context.sync_command_q->doorbell_signal, index);
+
+  if (debug)
+    fprintf (stderr, "Kernel dispatched, waiting for completion\n");
+
+  hsa_signal_wait_acquire(hsa_context.sync_signal, HSA_SIGNAL_CONDITION_LT, 1,
+			  UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+  if (debug)
+    fprintf (stderr, "Kernel returned\n");
+}
+
+void
+__hsa_register_image (void *host_table,
+		      /* enum offload_target_type target_type,*/
+                      void *target_data)
+{
+  /*
+  fprintf (stderr, "__hsa_register_image %p %p called\n",
+           host_table, target_data);
+  */
+  if (!host_table || !target_data)
+    gomp_fatal ("Invalid image registration parameters");
+  if (image_info.host_functions)
+    gomp_fatal ("At this point we only allow one HSA module per program");
+  image_info.host_functions = host_table;
+  image_info.image_descriptor = target_data;
+}
diff --git a/libgomp/hsa.h b/libgomp/hsa.h
new file mode 100644
index 0000000..d96ab7a
--- /dev/null
+++ b/libgomp/hsa.h
@@ -0,0 +1,3724 @@ 
+////////////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 ADVANCED MICRO DEVICES, INC.
+//
+// AMD is granting you permission to use this software and documentation(if any)
+// (collectively, the "Materials") pursuant to the terms and conditions of the
+// Software License Agreement included with the Materials.If you do not have a
+// copy of the Software License Agreement, contact your AMD representative for a
+// copy.
+//
+// You agree that you will not reverse engineer or decompile the Materials, in
+// whole or in part, except as allowed by applicable law.
+//
+// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY,
+// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE
+// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM
+// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF
+// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion
+// of implied warranties, so the above exclusion may not apply to You.
+//
+// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT,
+// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT,
+// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF
+// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total
+// liability to You for all damages, losses, and causes of action (whether in
+// contract, tort (including negligence) or otherwise) exceed the amount of $100
+// USD.  You agree to defend, indemnify and hold harmless AMD and its licensors,
+// and any of their directors, officers, employees, affiliates or agents from
+// and against any and all loss, damage, liability and other expenses (including
+// reasonable attorneys' fees), resulting from Your use of the Software or
+// violation of the terms and conditions of this Agreement.
+//
+// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with
+// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is
+// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 -
+// 7013, et seq., or its successor.Use of the Materials by the Government
+// constitutes acknowledgement of AMD's proprietary rights in them.
+//
+// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as
+//                      stated in the Software License Agreement.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef HSA_RUNTIME_INC_HSA_H_
+#define HSA_RUNTIME_INC_HSA_H_
+
+#include <stddef.h> /* size_t */
+#include <stdint.h> /* uintXX_t */
+#ifndef __cplusplus
+#include <stdbool.h>
+#endif /* __cplusplus */
+
+// Placeholder for calling convention and import macros
+#define HSA_CALL
+#undef HSA_API
+#define HSA_API HSA_CALL
+
+// Detect and set large model builds.
+#undef HSA_LARGE_MODEL
+#if defined(__LP64__) || defined(_M_X64)
+#define HSA_LARGE_MODEL
+#endif
+
+// Try to detect CPU endianness
+#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+    defined(_M_X64)
+#define LITTLEENDIAN_CPU
+#endif
+#endif
+
+#undef HSA_LITTLE_ENDIAN
+#if defined(LITTLEENDIAN_CPU)
+#define HSA_LITTLE_ENDIAN
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+#define OBSIDIAN_RUNTIME
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** \defgroup status Runtime Notifications
+ *  @{
+ */
+
+/**
+ * @brief Status codes.
+ */
+typedef enum {
+  /**
+   * The function has been executed successfully.
+   */
+  HSA_STATUS_SUCCESS = 0x0,
+  /**
+   * A traversal over a list of elements has been interrupted by the
+   * application before completing.
+   */
+  HSA_STATUS_INFO_BREAK = 0x1,
+  /**
+   * A generic error has occurred.
+   */
+  HSA_STATUS_ERROR = 0x1000,
+  /**
+   * One of the actual arguments does not meet a precondition stated in the
+   * documentation of the corresponding formal argument.
+   */
+  HSA_STATUS_ERROR_INVALID_ARGUMENT = 0x1001,
+  /**
+   * The requested queue creation is not valid.
+   */
+  HSA_STATUS_ERROR_INVALID_QUEUE_CREATION = 0x1002,
+  /**
+   * The requested allocation is not valid.
+   */
+  HSA_STATUS_ERROR_INVALID_ALLOCATION = 0x1003,
+  /**
+   * The agent is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_AGENT = 0x1004,
+  /**
+   * The memory region is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_REGION = 0x1005,
+  /**
+   * The signal is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_SIGNAL = 0x1006,
+  /**
+   * The queue is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_QUEUE = 0x1007,
+  /**
+   * The HSA runtime failed to allocate the necessary resources. This error
+   * may also occur when the HSA runtime needs to spawn threads or create
+   * internal OS-specific events.
+   */
+  HSA_STATUS_ERROR_OUT_OF_RESOURCES = 0x1008,
+  /**
+   * The AQL packet is malformed.
+   */
+  HSA_STATUS_ERROR_INVALID_PACKET_FORMAT = 0x1009,
+  /**
+   * An error has been detected while releasing a resource.
+   */
+  HSA_STATUS_ERROR_RESOURCE_FREE = 0x100A,
+  /**
+   * An API other than ::hsa_init has been invoked while the reference count
+   * of the HSA runtime is 0.
+   */
+  HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
+  /**
+   * The maximum reference count for the object has been reached.
+   */
+  HSA_STATUS_ERROR_REFCOUNT_OVERFLOW = 0x100C,
+  /**
+   * The arguments passed to a functions are not compatible.
+   */
+  HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS = 0x100D,
+  /**
+   * The index is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_INDEX = 0x100E,
+  /**
+   * The instruction set architecture is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_ISA = 0x100F,
+  /**
+   * The instruction set architecture name is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_ISA_NAME = 0x1017,
+  /**
+   * The code object is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
+  /**
+   * The executable is invalid.
+   */
+  HSA_STATUS_ERROR_INVALID_EXECUTABLE = 0x1011,
+  /**
+   * The executable is frozen.
+   */
+  HSA_STATUS_ERROR_FROZEN_EXECUTABLE = 0x1012,
+  /**
+   * There is no symbol with the given name.
+   */
+  HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
+  /**
+   * The variable is already defined.
+   */
+  HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED = 0x1014,
+  /**
+   * The variable is undefined.
+   */
+  HSA_STATUS_ERROR_VARIABLE_UNDEFINED = 0x1015,
+  /**
+   * An HSAIL operation resulted on a hardware exception.
+   */
+  HSA_STATUS_ERROR_EXCEPTION = 0x1016
+} hsa_status_t;
+
+/**
+ * @brief Query additional information about a status code.
+ *
+ * @param[in] status Status code.
+ *
+ * @param[out] status_string A NUL-terminated string that describes the error
+ * status.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p status is an invalid
+ * status code, or @p status_string is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_status_string(hsa_status_t status, const char **status_string);
+
+/** @} */
+
+/** \defgroup common Common Definitions
+ *  @{
+ */
+
+/**
+ * @brief Three-dimensional coordinate.
+ */
+typedef struct hsa_dim3_s {
+  /**
+   * X dimension.
+   */
+  uint32_t x;
+
+  /**
+   * Y dimension.
+   */
+  uint32_t y;
+
+  /**
+   * Z dimension.
+   */
+  uint32_t z;
+} hsa_dim3_t;
+
+/**
+ * @brief Access permissions.
+ */
+typedef enum {
+  /**
+   * Read-only access.
+   */
+  HSA_ACCESS_PERMISSION_RO = 1,
+  /**
+   * Write-only access.
+   */
+  HSA_ACCESS_PERMISSION_WO = 2,
+  /**
+   * Read and write access.
+   */
+  HSA_ACCESS_PERMISSION_RW = 3
+} hsa_access_permission_t;
+
+/** @} **/
+
+/** \defgroup initshutdown Initialization and Shut Down
+ *  @{
+ */
+
+/**
+ * @brief Initialize the HSA runtime.
+ *
+ * @details Initializes the HSA runtime if it is not already initialized, and
+ * increases the reference counter associated with the HSA runtime for the
+ * current process. Invocation of any HSA function other than ::hsa_init results
+ * in undefined behavior if the current HSA runtime reference counter is less
+ * than one.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_REFCOUNT_OVERFLOW The HSA runtime reference
+ * count reaches INT32_MAX.
+ */
+hsa_status_t HSA_API hsa_init();
+
+/**
+ * @brief Shut down the HSA runtime.
+ *
+ * @details Decreases the reference count of the HSA runtime instance. When the
+ * reference count reaches 0, the HSA runtime is no longer considered valid
+ * but the application might call ::hsa_init to initialize the HSA runtime
+ * again.
+ *
+ * Once the reference count of the HSA runtime reaches 0, all the resources
+ * associated with it (queues, signals, agent information, etc.) are
+ * considered invalid and any attempt to reference them in subsequent API calls
+ * results in undefined behavior. When the reference count reaches 0, the HSA
+ * runtime may release resources associated with it.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ */
+hsa_status_t HSA_API hsa_shut_down();
+
+/** @} **/
+
+/** \defgroup agentinfo System and Agent Information
+ *  @{
+ */
+
+/**
+ * @brief Endianness. A convention used to interpret the bytes making up a data
+ * word.
+ */
+typedef enum {
+  /**
+   * The least significant byte is stored in the smallest address.
+   */
+  HSA_ENDIANNESS_LITTLE = 0,
+  /**
+   * The most significant byte is stored in the smallest address.
+   */
+  HSA_ENDIANNESS_BIG = 1
+} hsa_endianness_t;
+
+/**
+ * @brief Machine model. A machine model determines the size of certain data
+ * types in HSA runtime and an agent.
+ */
+typedef enum {
+  /**
+   * Small machine model. Addresses use 32 bits.
+   */
+  HSA_MACHINE_MODEL_SMALL = 0,
+  /**
+   * Large machine model. Addresses use 64 bits.
+   */
+  HSA_MACHINE_MODEL_LARGE = 1
+} hsa_machine_model_t;
+
+/**
+ * @brief Profile. A profile indicates a particular level of feature
+ * support. For example, in the base profile the application must use the HSA
+ * runtime allocator to reserve Shared Virtual Memory, while in the full profile
+ * any host pointer can be shared across all the agents.
+ */
+typedef enum {
+  /**
+   * Base profile.
+   */
+  HSA_PROFILE_BASE = 0,
+  /**
+   * Full profile.
+   */
+  HSA_PROFILE_FULL = 1
+} hsa_profile_t;
+
+/**
+ * @brief System attributes.
+ */
+typedef enum {
+  /**
+   * Major version of the HSA runtime specification supported by the
+   * implementation. The type of this attribute is uint16_t.
+   */
+  HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
+  /**
+   * Minor version of the HSA runtime specification supported by the
+   * implementation. The type of this attribute is uint16_t.
+   */
+  HSA_SYSTEM_INFO_VERSION_MINOR = 1,
+  /**
+   * Current timestamp. The value of this attribute monotonically increases at a
+   * constant rate. The type of this attribute is uint64_t.
+   */
+  HSA_SYSTEM_INFO_TIMESTAMP = 2,
+  /**
+   * Timestamp value increase rate, in Hz. The timestamp (clock) frequency is
+   * in the range 1-400MHz. The type of this attribute is uint64_t.
+   */
+  HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY = 3,
+  /**
+   * Maximum duration of a signal wait operation. Expressed as a count based on
+   * the timestamp frequency. The type of this attribute is uint64_t.
+   */
+  HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT = 4,
+  /**
+   * Endianness of the system. The type of this attribute us ::hsa_endianness_t.
+   */
+  HSA_SYSTEM_INFO_ENDIANNESS = 5,
+  /**
+   * Machine model supported by the HSA runtime. The type of this attribute is
+   * ::hsa_machine_model_t.
+   */
+  HSA_SYSTEM_INFO_MACHINE_MODEL = 6,
+  /**
+   * Bit-mask indicating which extensions are supported by the
+   * implementation. An extension with an ID of @p i is supported if the bit at
+   * position @p i is set. The type of this attribute is uint8_t[128].
+   */
+  HSA_SYSTEM_INFO_EXTENSIONS = 7
+} hsa_system_info_t;
+
+/**
+ * @brief Get the current value of a system attribute.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * system attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_system_get_info(hsa_system_info_t attribute, void *value);
+
+/**
+ * @brief HSA extensions.
+ */
+typedef enum {
+  /**
+   * Finalizer extension.
+   */
+  HSA_EXTENSION_FINALIZER = 0,
+  /**
+   * Images extension.
+   */
+  HSA_EXTENSION_IMAGES = 1,
+  HSA_EXTENSION_AMD_PROFILER = 2
+} hsa_extension_t;
+
+/**
+ * @brief Query if a given version of an extension is supported by the HSA
+ * implementation.
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] version_major Major version number.
+ *
+ * @param[in] version_minor Minor version number.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. The result is true if the specified version of the
+ * extension is supported, and false otherwise.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p result is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_system_extension_supported(uint16_t extension, uint16_t version_major,
+                                   uint16_t version_minor, bool *result);
+
+/**
+ * @brief Retrieve the function pointers corresponding to a given version of an
+ * extension. Portable applications are expected to invoke the extension API
+ * using the returned function pointers
+ *
+ * @details The application is responsible for verifying that the given version
+ * of the extension is supported by the HSA implementation (see
+ * ::hsa_system_extension_supported). If the given combination of extension,
+ * major version, and minor version is not supported by the implementation, the
+ * behavior is undefined.
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] version_major Major version number for which to retrieve the
+ * function pointer table.
+ *
+ * @param[in] version_minor Minor version number for which to retrieve the
+ * function pointer table.
+ *
+ * @param[out] table Pointer to an application-allocated function pointer table
+ * that is populated by the HSA runtime. Must not be NULL. The memory associated
+ * with table can be reused or freed after the function returns.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p table is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_system_get_extension_table(uint16_t extension, uint16_t version_major,
+                                   uint16_t version_minor, void *table);
+
+/**
+ * @brief Opaque handle representing an agent, a device that participates in
+ * the HSA memory model. An agent can submit AQL packets for execution, and
+ * may also accept AQL packets for execution (agent dispatch packets or kernel
+ * dispatch packets launching HSAIL-derived binaries).
+ */
+typedef struct hsa_agent_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_agent_t;
+
+/**
+ * @brief Agent features.
+ */
+typedef enum {
+  /**
+   * The agent supports AQL packets of kernel dispatch type. If this
+   * feature is enabled, the agent is also a kernel agent.
+   */
+  HSA_AGENT_FEATURE_KERNEL_DISPATCH = 1,
+  /**
+   * The agent supports AQL packets of agent dispatch type.
+   */
+  HSA_AGENT_FEATURE_AGENT_DISPATCH = 2
+} hsa_agent_feature_t;
+
+/**
+ * @brief Hardware device type.
+ */
+typedef enum {
+  /**
+   * CPU device.
+   */
+  HSA_DEVICE_TYPE_CPU = 0,
+  /**
+   * GPU device.
+   */
+  HSA_DEVICE_TYPE_GPU = 1,
+  /**
+   * DSP device.
+   */
+  HSA_DEVICE_TYPE_DSP = 2
+} hsa_device_type_t;
+
+/**
+ * @brief Default floating-point rounding mode.
+ */
+typedef enum {
+  /**
+   * Use a default floating-point rounding mode specified elsewhere.
+   */
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT = 0,
+  /**
+   * Operations that specify the default floating-point mode are rounded to zero
+   * by default.
+   */
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO = 1,
+  /**
+   * Operations that specify the default floating-point mode are rounded to the
+   * nearest representable number and that ties should be broken by selecting
+   * the value with an even least significant bit.
+   */
+  HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR = 2
+} hsa_default_float_rounding_mode_t;
+
+/**
+ * @brief Agent attributes.
+ */
+typedef enum {
+  /**
+   * Agent name. The type of this attribute is a NUL-terminated char[64]. If
+   * the name of the agent uses less than 63 characters, the rest of the
+   * array must be filled with NULs.
+   */
+  HSA_AGENT_INFO_NAME = 0,
+  /**
+   * Name of vendor. The type of this attribute is a NUL-terminated char[64]. If
+   * the name of the vendor uses less than 63 characters, the rest of the array
+   * must be filled with NULs.
+   */
+  HSA_AGENT_INFO_VENDOR_NAME = 1,
+  /**
+   * Agent capability. The type of this attribute is ::hsa_agent_feature_t.
+   */
+  HSA_AGENT_INFO_FEATURE = 2,
+  /**
+   * Machine model supported by the agent. The type of this attribute is
+   * ::hsa_machine_model_t.
+   */
+  HSA_AGENT_INFO_MACHINE_MODEL = 3,
+  /**
+   * Profile supported by the agent. The type of this attribute is
+   * ::hsa_profile_t.
+   */
+  HSA_AGENT_INFO_PROFILE = 4,
+  /**
+   * Default floating-point rounding mode. The type of this attribute is
+   * ::hsa_default_float_rounding_mode_t, but the value
+   * ::HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT is not allowed.
+   */
+  HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5,
+  /**
+   * Default floating-point rounding modes supported by the agent in the Base
+   * profile. The type of this attribute is a mask of
+   * ::hsa_default_float_rounding_mode_t. The default floating-point rounding
+   * mode (::HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE) bit must not be set.
+   */
+  HSA_AGENT_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES = 23,
+  /**
+   * Flag indicating that the f16 HSAIL operation is at least as fast as the
+   * f32 operation in the current agent. The value of this attribute is
+   * undefined if the agent is not a kernel agent. The type of this
+   * attribute is bool.
+   */
+  HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
+  /**
+   * Number of work-items in a wavefront. Must be a power of 2 in the range
+   * [1,256]. The value of this attribute is undefined if the agent is not
+   * a kernel agent. The type of this attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
+  /**
+   * Maximum number of work-items of each dimension of a work-group.  Each
+   * maximum must be greater than 0. No maximum can exceed the value of
+   * ::HSA_AGENT_INFO_WORKGROUP_MAX_SIZE. The value of this attribute is
+   * undefined if the agent is not a kernel agent. The type of this
+   * attribute is uint16_t[3].
+   */
+  HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
+  /**
+   * Maximum total number of work-items in a work-group. The value of this
+   * attribute is undefined if the agent is not a kernel agent. The type
+   * of this attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
+  /**
+   * Maximum number of work-items of each dimension of a grid. Each maximum must
+   * be greater than 0, and must not be smaller than the corresponding value in
+   * ::HSA_AGENT_INFO_WORKGROUP_MAX_DIM. No maximum can exceed the value of
+   * ::HSA_AGENT_INFO_GRID_MAX_SIZE. The value of this attribute is undefined if
+   * the agent is not a kernel agent. The type of this attribute is
+   * ::hsa_dim3_t.
+   */
+  HSA_AGENT_INFO_GRID_MAX_DIM = 9,
+  /**
+   * Maximum total number of work-items in a grid. The value of this attribute
+   * is undefined if the agent is not a kernel agent. The type of this
+   * attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
+  /**
+   * Maximum number of fbarriers per work-group. Must be at least 32. The value
+   * of this attribute is undefined if the agent is not a kernel agent. The
+   * type of this attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
+  /**
+   * Maximum number of queues that can be active (created but not destroyed) at
+   * one time in the agent. The type of this attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_QUEUES_MAX = 12,
+  /**
+   * Minimum number of packets that a queue created in the agent
+   * can hold. Must be a power of 2 greater than 0. Must not exceed
+   * the value of ::HSA_AGENT_INFO_QUEUE_MAX_SIZE. The type of this
+   * attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
+  /**
+   * Maximum number of packets that a queue created in the agent can
+   * hold. Must be a power of 2 greater than 0. The type of this attribute
+   * is uint32_t.
+   */
+  HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
+  /**
+   * Type of a queue created in the agent. The type of this attribute is
+   * ::hsa_queue_type_t.
+   */
+  HSA_AGENT_INFO_QUEUE_TYPE = 15,
+  /**
+   * Identifier of the NUMA node associated with the agent. The type of this
+   * attribute is uint32_t.
+   */
+  HSA_AGENT_INFO_NODE = 16,
+  /**
+   * Type of hardware device associated with the agent. The type of this
+   * attribute is ::hsa_device_type_t.
+   */
+  HSA_AGENT_INFO_DEVICE = 17,
+  /**
+   * Array of data cache sizes (L1..L4). Each size is expressed in bytes. A size
+   * of 0 for a particular level indicates that there is no cache information
+   * for that level. The type of this attribute is uint32_t[4].
+   */
+  HSA_AGENT_INFO_CACHE_SIZE = 18,
+  /**
+   * Instruction set architecture of the agent. The type of this attribute
+   * is ::hsa_isa_t.
+   */
+  HSA_AGENT_INFO_ISA = 19,
+  /**
+   * Bit-mask indicating which extensions are supported by the agent. An
+   * extension with an ID of @p i is supported if the bit at position @p i is
+   * set. The type of this attribute is uint8_t[128].
+   */
+  HSA_AGENT_INFO_EXTENSIONS = 20,
+  /**
+   * Major version of the HSA runtime specification supported by the
+   * agent. The type of this attribute is uint16_t.
+   */
+  HSA_AGENT_INFO_VERSION_MAJOR = 21,
+  /**
+   * Minor version of the HSA runtime specification supported by the
+   * agent. The type of this attribute is uint16_t.
+   */
+  HSA_AGENT_INFO_VERSION_MINOR = 22,
+  HSA_AGENT_INFO_COUNT = 25
+} hsa_agent_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given agent.
+ *
+ * @param[in] agent A valid agent.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * agent attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_agent_get_info(hsa_agent_t agent,
+                                        hsa_agent_info_t attribute,
+                                        void *value);
+
+/**
+ * @brief Iterate over the available agents, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] callback Callback to be invoked once per agent. The HSA
+ * runtime passes two arguments to the callback, the agent and the
+ * application data.  If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_iterate_agents returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data),
+                       void *data);
+
+/*
+
+// If we do not know the size of an attribute, we need to query it first
+// Note: this API will not be in the spec unless needed
+hsa_status_t HSA_API hsa_agent_get_info_size(
+    hsa_agent_t agent,
+    hsa_agent_info_t attribute,
+    size_t* size);
+
+// Set the value of an agents attribute
+// Note: this API will not be in the spec unless needed
+hsa_status_t HSA_API hsa_agent_set_info(
+    hsa_agent_t agent,
+    hsa_agent_info_t attribute,
+    void* value);
+
+*/
+
+/**
+ * @brief Exception policies applied in the presence of hardware exceptions.
+ */
+typedef enum {
+  /**
+   * If a hardware exception is detected, a work-item signals an exception.
+   */
+  HSA_EXCEPTION_POLICY_BREAK = 1,
+  /**
+   * If a hardware exception is detected, a hardware status bit is set.
+   */
+  HSA_EXCEPTION_POLICY_DETECT = 2
+} hsa_exception_policy_t;
+
+/**
+ * @brief Retrieve the exception policy support for a given combination of
+ * agent and profile
+ *
+ * @param[in] agent Agent.
+ *
+ * @param[in] profile Profile.
+ *
+ * @param[out] mask Pointer to a memory location where the HSA runtime stores a
+ * mask of ::hsa_exception_policy_t values. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is not a valid
+ * profile, or @p mask is NULL.
+ *
+ */
+hsa_status_t HSA_API hsa_agent_get_exception_policies(hsa_agent_t agent,
+                                                      hsa_profile_t profile,
+                                                      uint16_t *mask);
+
+/**
+ * @brief Query if a given version of an extension is supported by an agent
+ *
+ * @param[in] extension Extension identifier.
+ *
+ * @param[in] agent Agent.
+ *
+ * @param[in] version_major Major version number.
+ *
+ * @param[in] version_minor Minor version number.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. The result is true if the specified version of the
+ * extension is supported, and false otherwise.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p extension is not a valid
+ * extension, or @p result is NULL.
+ */
+hsa_status_t HSA_API
+    hsa_agent_extension_supported(uint16_t extension, hsa_agent_t agent,
+                                  uint16_t version_major,
+                                  uint16_t version_minor, bool *result);
+
+/** @} */
+
+/** \defgroup signals Signals
+ *  @{
+ */
+
+/**
+ * @brief Signal handle.
+ */
+typedef struct hsa_signal_s {
+  /**
+   * Opaque handle. The value 0 is reserved.
+   */
+  uint64_t handle;
+} hsa_signal_t;
+
+/**
+ * @brief Signal value. The value occupies 32 bits in small machine mode, and 64
+ * bits in large machine mode.
+ */
+#ifdef HSA_LARGE_MODEL
+typedef int64_t hsa_signal_value_t;
+#else
+typedef int32_t hsa_signal_value_t;
+#endif
+
+/**
+ * @brief Create a signal.
+ *
+ * @param[in] initial_value Initial value of the signal.
+ *
+ * @param[in] num_consumers Size of @p consumers. A value of 0 indicates that
+ * any agent might wait on the signal.
+ *
+ * @param[in] consumers List of agents that might consume (wait on) the
+ * signal. If @p num_consumers is 0, this argument is ignored; otherwise, the
+ * HSA runtime might use the list to optimize the handling of the signal
+ * object. If an agent not listed in @p consumers waits on the returned
+ * signal, the behavior is undefined. The memory associated with @p consumers
+ * can be reused or freed after the function returns.
+ *
+ * @param[out] signal Pointer to a memory location where the HSA runtime will
+ * store the newly created signal handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate the
+ * resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p signal is NULL, @p
+ * num_consumers is greater than 0 but @p consumers is NULL, or @p consumers
+ * contains duplicates.
+ */
+hsa_status_t HSA_API
+    hsa_signal_create(hsa_signal_value_t initial_value, uint32_t num_consumers,
+                      const hsa_agent_t *consumers, hsa_signal_t *signal);
+
+/**
+ * @brief Destroy a signal previous created by ::hsa_signal_create.
+ *
+ * @param[in] signal Signal.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SIGNAL @p signal is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The handle in @p signal is 0.
+ */
+hsa_status_t HSA_API hsa_signal_destroy(hsa_signal_t signal);
+
+/**
+ * @brief Atomically read the current value of a signal.
+ *
+ * @param[in] signal Signal.
+ *
+ * @return Value of the signal.
+ */
+hsa_signal_value_t HSA_API hsa_signal_load_acquire(hsa_signal_t signal);
+
+/**
+ * @copydoc hsa_signal_load_acquire
+ */
+hsa_signal_value_t HSA_API hsa_signal_load_relaxed(hsa_signal_t signal);
+
+/**
+ * @brief Atomically set the value of a signal.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal.
+ *
+ * @param[in] value New signal value.
+ */
+void HSA_API
+    hsa_signal_store_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_store_relaxed
+ */
+void HSA_API
+    hsa_signal_store_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically set the value of a signal and return its previous value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value New value.
+ *
+ * @return Value of the signal prior to the exchange.
+ *
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_exchange_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_exchange_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_exchange_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_exchange_acq_rel
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_exchange_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically set the value of a signal if the observed value is equal to
+ * the expected value. The observed value is returned regardless of whether the
+ * replacement was done.
+ *
+ * @details If the value of the signal is changed, all the agents waiting
+ * on @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue
+ * doorbell signal, the behavior is undefined.
+ *
+ * @param[in] expected Value to compare with.
+ *
+ * @param[in] value New value.
+ *
+ * @return Observed value of the signal.
+ *
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_acq_rel(hsa_signal_t signal,
+                                                  hsa_signal_value_t expected,
+                                                  hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_acquire(hsa_signal_t signal,
+                                                  hsa_signal_value_t expected,
+                                                  hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_relaxed(hsa_signal_t signal,
+                                                  hsa_signal_value_t expected,
+                                                  hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_cas_acq_rel
+ */
+hsa_signal_value_t HSA_API hsa_signal_cas_release(hsa_signal_t signal,
+                                                  hsa_signal_value_t expected,
+                                                  hsa_signal_value_t value);
+
+/**
+ * @brief Atomically increment the value of a signal by a given amount.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to add to the value of the signal.
+ *
+ */
+void HSA_API
+    hsa_signal_add_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+    hsa_signal_add_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+    hsa_signal_add_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_add_acq_rel
+ */
+void HSA_API
+    hsa_signal_add_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically decrement the value of a signal by a given amount.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to subtract from the value of the signal.
+ *
+ */
+void HSA_API
+    hsa_signal_subtract_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+    hsa_signal_subtract_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+    hsa_signal_subtract_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_subtract_acq_rel
+ */
+void HSA_API
+    hsa_signal_subtract_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise AND operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to AND with the value of the signal.
+ *
+ */
+void HSA_API
+    hsa_signal_and_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+    hsa_signal_and_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+    hsa_signal_and_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_and_acq_rel
+ */
+void HSA_API
+    hsa_signal_and_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise OR operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to OR with the value of the signal.
+ */
+void HSA_API
+    hsa_signal_or_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+    hsa_signal_or_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+    hsa_signal_or_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_or_acq_rel
+ */
+void HSA_API
+    hsa_signal_or_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Atomically perform a bitwise XOR operation between the value of a
+ * signal and a given value.
+ *
+ * @details If the value of the signal is changed, all the agents waiting on
+ * @p signal for which @p value satisfies their wait condition are awakened.
+ *
+ * @param[in] signal Signal. If @p signal is a queue doorbell signal, the
+ * behavior is undefined.
+ *
+ * @param[in] value Value to XOR with the value of the signal.
+ *
+ */
+void HSA_API
+    hsa_signal_xor_acq_rel(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+    hsa_signal_xor_acquire(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+    hsa_signal_xor_relaxed(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @copydoc hsa_signal_xor_acq_rel
+ */
+void HSA_API
+    hsa_signal_xor_release(hsa_signal_t signal, hsa_signal_value_t value);
+
+/**
+ * @brief Wait condition operator.
+ */
+typedef enum {
+  /**
+   * The two operands are equal.
+   */
+  HSA_SIGNAL_CONDITION_EQ = 0,
+  /**
+   * The two operands are not equal.
+   */
+  HSA_SIGNAL_CONDITION_NE = 1,
+  /**
+   * The first operand is less than the second operand.
+   */
+  HSA_SIGNAL_CONDITION_LT = 2,
+  /**
+   * The first operand is greater than or equal to the second operand.
+   */
+  HSA_SIGNAL_CONDITION_GTE = 3
+} hsa_signal_condition_t;
+
+/**
+ * @brief State of the application thread during a signal wait.
+ */
+typedef enum {
+  /**
+   * The application thread may be rescheduled while waiting on the signal.
+   */
+  HSA_WAIT_STATE_BLOCKED = 0,
+  /**
+   * The application thread stays active while waiting on a signal.
+   */
+  HSA_WAIT_STATE_ACTIVE = 1
+} hsa_wait_state_t;
+
+/**
+ * @brief Wait until a signal value satisfies a specified condition, or a
+ * certain amount of time has elapsed.
+ *
+ * @details A wait operation can spuriously resume at any time sooner than the
+ * timeout (for example, due to system or other external factors) even when the
+ * condition has not been met.
+ *
+ * The function is guaranteed to return if the signal value satisfies the
+ * condition at some point in time during the wait, but the value returned to
+ * the application might not satisfy the condition. The application must ensure
+ * that signals are used in such way that wait wakeup conditions are not
+ * invalidated before dependent threads have woken up.
+ *
+ * When the wait operation internally loads the value of the passed signal, it
+ * uses the memory order indicated in the function name.
+ *
+ * @param[in] signal Signal.
+ *
+ * @param[in] condition Condition used to compare the signal value with @p
+ * compare_value.
+ *
+ * @param[in] compare_value Value to compare with.
+ *
+ * @param[in] timeout_hint Maximum duration of the wait.  Specified in the same
+ * unit as the system timestamp. The operation might block for a shorter or
+ * longer time even if the condition is not met. A value of UINT64_MAX indicates
+ * no maximum.
+ *
+ * @param[in] wait_state_hint Hint used by the application to indicate the
+ * preferred waiting state. The actual waiting state is ultimately decided by
+ * HSA runtime and may not match the provided hint. A value of
+ * ::HSA_WAIT_STATE_ACTIVE may improve the latency of response to a signal
+ * update by avoiding rescheduling overhead.
+ *
+ * @return Observed value of the signal, which might not satisfy the specified
+ * condition.
+ *
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_wait_acquire(hsa_signal_t signal,
+                            hsa_signal_condition_t condition,
+                            hsa_signal_value_t compare_value,
+                            uint64_t timeout_hint,
+                            hsa_wait_state_t wait_state_hint);
+
+/**
+ * @copydoc hsa_signal_wait_acquire
+ */
+hsa_signal_value_t HSA_API
+    hsa_signal_wait_relaxed(hsa_signal_t signal,
+                            hsa_signal_condition_t condition,
+                            hsa_signal_value_t compare_value,
+                            uint64_t timeout_hint,
+                            hsa_wait_state_t wait_state_hint);
+
+/** @} */
+
+/** \defgroup memory Memory
+ *  @{
+ */
+
+/**
+ * @brief A memory region represents a block of virtual memory with certain
+ * properties. For example, the HSA runtime represents fine-grained memory in
+ * the global segment using a region. A region might be associated with more
+ * than one agent.
+ */
+typedef struct hsa_region_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_region_t;
+
+/** @} */
+
+/** \defgroup queue Queues
+ *  @{
+ */
+
+/**
+ * @brief Queue type. Intended to be used for dynamic queue protocol
+ * determination.
+ */
+typedef enum {
+  /**
+   * Queue supports multiple producers.
+   */
+  HSA_QUEUE_TYPE_MULTI = 0,
+  /**
+   * Queue only supports a single producer.
+   */
+  HSA_QUEUE_TYPE_SINGLE = 1
+} hsa_queue_type_t;
+
+/**
+ * @brief Queue features.
+ */
+typedef enum {
+  /**
+   * Queue supports kernel dispatch packets.
+   */
+  HSA_QUEUE_FEATURE_KERNEL_DISPATCH = 1,
+
+  /**
+   * Queue supports agent dispatch packets.
+   */
+  HSA_QUEUE_FEATURE_AGENT_DISPATCH = 2
+} hsa_queue_feature_t;
+
+/**
+ * @brief User mode queue.
+ *
+ * @details The queue structure is read-only and allocated by the HSA runtime,
+ * but agents can directly modify the contents of the buffer pointed by @a
+ * base_address, or use HSA runtime APIs to access the doorbell signal.
+ *
+ */
+typedef struct hsa_queue_s {
+  /**
+   * Queue type.
+   */
+  hsa_queue_type_t type;
+
+  /**
+   * Queue features mask. This is a bit-field of ::hsa_queue_feature_t
+   * values. Applications should ignore any unknown set bits.
+   */
+  uint32_t features;
+
+#ifdef HSA_LARGE_MODEL
+  void *base_address;
+#elif defined HSA_LITTLE_ENDIAN
+  /**
+   * Starting address of the HSA runtime-allocated buffer used to store the AQL
+   * packets. Must be aligned to the size of an AQL packet.
+   */
+  void *base_address;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved0;
+#else
+  uint32_t reserved0;
+  void *base_address;
+#endif
+
+  /**
+   * Signal object used by the application to indicate the ID of a packet that
+   * is ready to be processed. The HSA runtime manages the doorbell signal. If
+   * the application tries to replace or destroy this signal, the behavior is
+   * undefined.
+   *
+   * If @a type is ::HSA_QUEUE_TYPE_SINGLE the doorbell signal value must be
+   * updated in a monotonically increasing fashion. If @a type is
+   * ::HSA_QUEUE_TYPE_MULTI, the doorbell signal value can be updated with any
+   * value.
+   */
+  hsa_signal_t doorbell_signal;
+
+  /**
+   * Maximum number of packets the queue can hold. Must be a power of 2.
+   */
+  uint32_t size;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+  /**
+   * Queue identifier, which is unique over the lifetime of the application.
+   */
+  uint64_t id;
+
+} hsa_queue_t;
+
+/**
+ * @brief Create a user mode queue.
+ *
+ * @details The HSA runtime creates the queue structure, the underlying packet
+ * buffer, the completion signal, and the write and read indexes. The initial
+ * value of the write and read indexes is 0. The type of every packet in the
+ * buffer is initialized to ::HSA_PACKET_TYPE_INVALID.
+ *
+ * The application should only rely on the error code returned to determine if
+ * the queue is valid.
+ *
+ * @param[in] agent Agent where to create the queue.
+ *
+ * @param[in] size Number of packets the queue is expected to
+ * hold. Must be a power of 2 between 1 and the value of
+ * ::HSA_AGENT_INFO_QUEUE_MAX_SIZE in @p agent. The size of the newly
+ * created queue is the maximum of @p size and the value of
+ * ::HSA_AGENT_INFO_QUEUE_MIN_SIZE in @p agent.
+ *
+ * @param[in] type Type of the queue. If the value of
+ * ::HSA_AGENT_INFO_QUEUE_TYPE in @p agent is ::HSA_QUEUE_TYPE_SINGLE, then @p
+ * type must also be ::HSA_QUEUE_TYPE_SINGLE.
+ *
+ * @param[in] callback Callback invoked by the HSA runtime for every
+ * asynchronous event related to the newly created queue. May be NULL. The HSA
+ * runtime passes three arguments to the callback: a code identifying the event
+ * that triggered the invocation, a pointer to the queue where the event
+ * originated, and the application data.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @param[in] private_segment_size Hint indicating the maximum
+ * expected private segment usage per work-item, in bytes. There may
+ * be performance degradation if the application places a kernel
+ * dispatch packet in the queue and the corresponding private segment
+ * usage exceeds @p private_segment_size. If the application does not
+ * want to specify any particular value for this argument, @p
+ * private_segment_size must be UINT32_MAX. If the queue does not
+ * support kernel dispatch packets, this argument is ignored.
+ *
+ * @param[in] group_segment_size Hint indicating the maximum expected
+ * group segment usage per work-group, in bytes. There may be
+ * performance degradation if the application places a kernel dispatch
+ * packet in the queue and the corresponding group segment usage
+ * exceeds @p group_segment_size. If the application does not want to
+ * specify any particular value for this argument, @p
+ * group_segment_size must be UINT32_MAX. If the queue does not
+ * support kernel dispatch packets, this argument is ignored.
+ *
+ * @param[out] queue Memory location where the HSA runtime stores a pointer to
+ * the newly created queue.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE_CREATION @p agent does not
+ * support queues of the given type.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two,
+ * @p size is 0, @p type is an invalid queue type, or @p queue is NULL.
+ *
+ */
+hsa_status_t HSA_API
+    hsa_queue_create(hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
+                     void (*callback)(hsa_status_t status, hsa_queue_t *source,
+                                      void *data),
+                     void *data, uint32_t private_segment_size,
+                     uint32_t group_segment_size, hsa_queue_t **queue);
+
+/**
+ * @brief Create a queue for which the application or a kernel is responsible
+ * for processing the AQL packets.
+ *
+ * @details The application can use this function to create queues where AQL
+ * packets are not parsed by the packet processor associated with an agent,
+ * but rather by a unit of execution running on that agent (for example, a
+ * thread in the host application).
+ *
+ * The application is responsible for ensuring that all the producers and
+ * consumers of the resulting queue can access the provided doorbell signal
+ * and memory region. The application is also responsible for ensuring that the
+ * unit of execution processing the queue packets supports the indicated
+ * features (AQL packet types).
+ *
+ * When the queue is created, the HSA runtime allocates the packet buffer using
+ * @p region, and the write and read indexes. The initial value of the write and
+ * read indexes is 0, and the type of every packet in the buffer is initialized
+ * to ::HSA_PACKET_TYPE_INVALID. The value of the @e size, @e type, @e features,
+ * and @e doorbell_signal fields in the returned queue match the values passed
+ * by the application.
+ *
+ * @param[in] region Memory region that the HSA runtime should use to allocate
+ * the AQL packet buffer and any other queue metadata.
+ *
+ * @param[in] size Number of packets the queue is expected to hold. Must be a
+ * power of 2 greater than 0.
+ *
+ * @param[in] type Queue type.
+ *
+ * @param[in] features Supported queue features. This is a bit-field of
+ * ::hsa_queue_feature_t values.
+ *
+ * @param[in] doorbell_signal Doorbell signal that the HSA runtime must
+ * associate with the returned queue. The signal handle must not be 0.
+ *
+ * @param[out] queue Memory location where the HSA runtime stores a pointer to
+ * the newly created queue. The application should not rely on the value
+ * returned for this argument but only in the status code to determine if the
+ * queue is valid. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is failure to allocate
+ * the resources required by the implementation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is not a power of two, @p
+ * size is 0, @p type is an invalid queue type, the doorbell signal handle is
+ * 0, or @p queue is NULL.
+ *
+ */
+hsa_status_t HSA_API
+    hsa_soft_queue_create(hsa_region_t region, uint32_t size,
+                          hsa_queue_type_t type, uint32_t features,
+                          hsa_signal_t doorbell_signal, hsa_queue_t **queue);
+
+/**
+ * @brief Destroy a user mode queue.
+ *
+ * @details When a queue is destroyed, the state of the AQL packets that have
+ * not been yet fully processed (their completion phase has not finished)
+ * becomes undefined. It is the responsibility of the application to ensure that
+ * all pending queue operations are finished if their results are required.
+ *
+ * The resources allocated by the HSA runtime during queue creation (queue
+ * structure, ring buffer, doorbell signal) are released.  The queue should not
+ * be accessed after being destroyed.
+ *
+ * @param[in] queue Pointer to a queue created using ::hsa_queue_create.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
+ */
+hsa_status_t HSA_API hsa_queue_destroy(hsa_queue_t *queue);
+
+/**
+ * @brief Inactivate a queue.
+ *
+ * @details Inactivating the queue aborts any pending executions and prevent any
+ * new packets from being processed. Any more packets written to the queue once
+ * it is inactivated will be ignored by the packet processor.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_QUEUE The queue is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p queue is NULL.
+ */
+hsa_status_t HSA_API hsa_queue_inactivate(hsa_queue_t *queue);
+
+/**
+ * @brief Atomically load the read index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @return Read index of the queue pointed by @p queue.
+ */
+uint64_t HSA_API hsa_queue_load_read_index_acquire(const hsa_queue_t *queue);
+
+/**
+ * @copydoc hsa_queue_load_read_index_acquire
+ */
+uint64_t HSA_API hsa_queue_load_read_index_relaxed(const hsa_queue_t *queue);
+
+/**
+ * @brief Atomically load the write index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @return Write index of the queue pointed by @p queue.
+ */
+uint64_t HSA_API hsa_queue_load_write_index_acquire(const hsa_queue_t *queue);
+
+/**
+ * @copydoc hsa_queue_load_write_index_acquire
+ */
+uint64_t HSA_API hsa_queue_load_write_index_relaxed(const hsa_queue_t *queue);
+
+/**
+ * @brief Atomically set the write index of a queue.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to assign to the write index.
+ *
+ */
+void HSA_API hsa_queue_store_write_index_relaxed(const hsa_queue_t *queue,
+                                                 uint64_t value);
+
+/**
+ * @copydoc hsa_queue_store_write_index_relaxed
+ */
+void HSA_API hsa_queue_store_write_index_release(const hsa_queue_t *queue,
+                                                 uint64_t value);
+
+/**
+ * @brief Atomically set the write index of a queue if the observed value is
+ * equal to the expected value. The application can inspect the returned value
+ * to determine if the replacement was done.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] expected Expected value.
+ *
+ * @param[in] value Value to assign to the write index if @p expected matches
+ * the observed write index. Must be greater than @p expected.
+ *
+ * @return Previous value of the write index.
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_acq_rel(const hsa_queue_t *queue,
+                                                   uint64_t expected,
+                                                   uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_acquire(const hsa_queue_t *queue,
+                                                   uint64_t expected,
+                                                   uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_relaxed(const hsa_queue_t *queue,
+                                                   uint64_t expected,
+                                                   uint64_t value);
+
+/**
+ * @copydoc hsa_queue_cas_write_index_acq_rel
+ */
+uint64_t HSA_API hsa_queue_cas_write_index_release(const hsa_queue_t *queue,
+                                                   uint64_t expected,
+                                                   uint64_t value);
+
+/**
+ * @brief Atomically increment the write index of a queue by an offset.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to add to the write index.
+ *
+ * @return Previous value of the write index.
+ */
+uint64_t HSA_API
+    hsa_queue_add_write_index_acq_rel(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+    hsa_queue_add_write_index_acquire(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+    hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @copydoc hsa_queue_add_write_index_acq_rel
+ */
+uint64_t HSA_API
+    hsa_queue_add_write_index_release(const hsa_queue_t *queue, uint64_t value);
+
+/**
+ * @brief Atomically set the read index of a queue.
+ *
+ * @details Modifications of the read index are not allowed and result in
+ * undefined behavior if the queue is associated with an agent for which
+ * only the corresponding packet processor is permitted to update the read
+ * index.
+ *
+ * @param[in] queue Pointer to a queue.
+ *
+ * @param[in] value Value to assign to the read index.
+ *
+ */
+void HSA_API hsa_queue_store_read_index_relaxed(const hsa_queue_t *queue,
+                                                uint64_t value);
+
+/**
+ * @copydoc hsa_queue_store_read_index_relaxed
+ */
+void HSA_API hsa_queue_store_read_index_release(const hsa_queue_t *queue,
+                                                uint64_t value);
+/** @} */
+
+/** \defgroup aql Architected Queuing Language
+ *  @{
+ */
+
+/**
+ * @brief Packet type.
+ */
+typedef enum {
+  /**
+   * Vendor-specific packet.
+   */
+  HSA_PACKET_TYPE_VENDOR_SPECIFIC = 0,
+  /**
+   * The packet has been processed in the past, but has not been reassigned to
+   * the packet processor. A packet processor must not process a packet of this
+   * type. All queues support this packet type.
+   */
+  HSA_PACKET_TYPE_INVALID = 1,
+  /**
+   * Packet used by agents for dispatching jobs to kernel agents. Not all
+   * queues support packets of this type (see ::hsa_queue_feature_t).
+   */
+  HSA_PACKET_TYPE_KERNEL_DISPATCH = 2,
+  /**
+   * Packet used by agents to delay processing of subsequent packets, and to
+   * express complex dependencies between multiple packets. All queues support
+   * this packet type.
+   */
+  HSA_PACKET_TYPE_BARRIER_AND = 3,
+  /**
+   * Packet used by agents for dispatching jobs to agents.  Not all
+   * queues support packets of this type (see ::hsa_queue_feature_t).
+   */
+  HSA_PACKET_TYPE_AGENT_DISPATCH = 4,
+  /**
+   * Packet used by agents to delay processing of subsequent packets, and to
+   * express complex dependencies between multiple packets. All queues support
+   * this packet type.
+   */
+  HSA_PACKET_TYPE_BARRIER_OR = 5,
+  HSA_PACKET_TYPE_COUNT = 6
+} hsa_packet_type_t;
+
+/**
+ * @brief Scope of the memory fence operation associated with a packet.
+ */
+typedef enum {
+  /**
+   * No scope (no fence is applied). The packet relies on external fences to
+   * ensure visibility of memory updates.
+   */
+  HSA_FENCE_SCOPE_NONE = 0,
+  /**
+   * The fence is applied with agent scope for the global segment.
+   */
+  HSA_FENCE_SCOPE_AGENT = 1,
+  /**
+   * The fence is applied across both agent and system scope for the global
+   * segment.
+   */
+  HSA_FENCE_SCOPE_SYSTEM = 2
+} hsa_fence_scope_t;
+
+/**
+ * @brief Sub-fields of the @a header field that is present in any AQL
+ * packet. The offset (with respect to the address of @a header) of a sub-field
+ * is identical to its enumeration constant. The width of each sub-field is
+ * determined by the corresponding value in ::hsa_packet_header_width_t. The
+ * offset and the width are expressed in bits.
+ */
+typedef enum {
+  /**
+   * Packet type. The value of this sub-field must be one of
+   * ::hsa_packet_type_t. If the type is ::HSA_PACKET_TYPE_VENDOR_SPECIFIC, the
+   * packet layout is vendor-specific.
+   */
+  HSA_PACKET_HEADER_TYPE = 0,
+  /**
+   * Barrier bit. If the barrier bit is set, the processing of the current
+   * packet only launches when all preceding packets (within the same queue) are
+   * complete.
+   */
+  HSA_PACKET_HEADER_BARRIER = 8,
+  /**
+   * Acquire fence scope. The value of this sub-field determines the scope and
+   * type of the memory fence operation applied before the packet enters the
+   * active phase. An acquire fence ensures that any subsequent global segment
+   * or image loads by any unit of execution that belongs to a dispatch that has
+   * not yet entered the active phase on any queue of the same kernel agent,
+   * sees any data previously released at the scopes specified by the acquire
+   * fence. The value of this sub-field must be one of ::hsa_fence_scope_t.
+   */
+  HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE = 9,
+  /**
+   * Release fence scope, The value of this sub-field determines the scope and
+   * type of the memory fence operation applied after kernel completion but
+   * before the packet is completed. A release fence makes any global segment or
+   * image data that was stored by any unit of execution that belonged to a
+   * dispatch that has completed the active phase on any queue of the same
+   * kernel agent visible in all the scopes specified by the release fence. The
+   * value of this sub-field must be one of ::hsa_fence_scope_t.
+   */
+  HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE = 11
+} hsa_packet_header_t;
+
+/**
+ * @brief Width (in bits) of the sub-fields in ::hsa_packet_header_t.
+ */
+typedef enum {
+  HSA_PACKET_HEADER_WIDTH_TYPE = 8,
+  HSA_PACKET_HEADER_WIDTH_BARRIER = 1,
+  HSA_PACKET_HEADER_WIDTH_ACQUIRE_FENCE_SCOPE = 2,
+  HSA_PACKET_HEADER_WIDTH_RELEASE_FENCE_SCOPE = 2
+} hsa_packet_header_width_t;
+
+/**
+ * @brief Sub-fields of the kernel dispatch packet @a setup field. The offset
+ * (with respect to the address of @a setup) of a sub-field is identical to its
+ * enumeration constant. The width of each sub-field is determined by the
+ * corresponding value in ::hsa_kernel_dispatch_packet_setup_width_t. The
+ * offset and the width are expressed in bits.
+ */
+typedef enum {
+  /**
+   * Number of dimensions of the grid. Valid values are 1, 2, or 3.
+   *
+   */
+  HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS = 0
+} hsa_kernel_dispatch_packet_setup_t;
+
+/**
+ * @brief Width (in bits) of the sub-fields in
+ * ::hsa_kernel_dispatch_packet_setup_t.
+ */
+typedef enum {
+  HSA_KERNEL_DISPATCH_PACKET_SETUP_WIDTH_DIMENSIONS = 2
+} hsa_kernel_dispatch_packet_setup_width_t;
+
+/**
+ * @brief AQL kernel dispatch packet
+ */
+typedef struct hsa_kernel_dispatch_packet_s {
+  /**
+   * Packet header. Used to configure multiple packet parameters such as the
+   * packet type. The parameters are described by ::hsa_packet_header_t.
+   */
+  uint16_t header;
+
+  /**
+   * Dispatch setup parameters. Used to configure kernel dispatch parameters
+   * such as the number of dimensions in the grid. The parameters are described
+   * by ::hsa_kernel_dispatch_packet_setup_t.
+   */
+  uint16_t setup;
+
+  /**
+   * X dimension of work-group, in work-items. Must be greater than 0.
+   */
+  uint16_t workgroup_size_x;
+
+  /**
+   * Y dimension of work-group, in work-items. Must be greater than
+   * 0. If the grid has 1 dimension, the only valid value is 1.
+   */
+  uint16_t workgroup_size_y;
+
+  /**
+   * Z dimension of work-group, in work-items. Must be greater than
+   * 0. If the grid has 1 or 2 dimensions, the only valid value is 1.
+   */
+  uint16_t workgroup_size_z;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint16_t reserved0;
+
+  /**
+   * X dimension of grid, in work-items. Must be greater than 0. Must
+   * not be smaller than @a workgroup_size_x.
+   */
+  uint32_t grid_size_x;
+
+  /**
+   * Y dimension of grid, in work-items. Must be greater than 0. If the grid has
+   * 1 dimension, the only valid value is 1. Must not be smaller than @a
+   * workgroup_size_y.
+   */
+  uint32_t grid_size_y;
+
+  /**
+   * Z dimension of grid, in work-items. Must be greater than 0. If the grid has
+   * 1 or 2 dimensions, the only valid value is 1. Must not be smaller than @a
+   * workgroup_size_z.
+   */
+  uint32_t grid_size_z;
+
+  /**
+   * Size in bytes of private memory allocation request (per work-item).
+   */
+  uint32_t private_segment_size;
+
+  /**
+   * Size in bytes of group memory allocation request (per work-group). Must not
+   * be less than the sum of the group memory used by the kernel (and the
+   * functions it calls directly or indirectly) and the dynamically allocated
+   * group segment variables.
+   */
+  uint32_t group_segment_size;
+
+  /**
+   * Opaque handle to a code object that includes an implementation-defined
+   * executable code for the kernel.
+   */
+  uint64_t kernel_object;
+
+#ifdef HSA_LARGE_MODEL
+  void *kernarg_address;
+#elif defined HSA_LITTLE_ENDIAN
+  /**
+   * Pointer to a buffer containing the kernel arguments. May be NULL.
+   *
+   * The buffer must be allocated using ::hsa_memory_allocate, and must not be
+   * modified once the kernel dispatch packet is enqueued until the dispatch has
+   * completed execution.
+   */
+  void *kernarg_address;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+#else
+  uint32_t reserved1;
+  void *kernarg_address;
+#endif
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint64_t reserved2;
+
+  /**
+   * Signal used to indicate completion of the job. The application can use the
+   * special signal handle 0 to indicate that no signal is used.
+   */
+  hsa_signal_t completion_signal;
+
+} hsa_kernel_dispatch_packet_t;
+
+/**
+ * @brief Agent dispatch packet.
+ */
+typedef struct hsa_agent_dispatch_packet_s {
+  /**
+   * Packet header. Used to configure multiple packet parameters such as the
+   * packet type. The parameters are described by ::hsa_packet_header_t.
+   */
+  uint16_t header;
+
+  /**
+   * Application-defined function to be performed by the destination agent.
+   */
+  uint16_t type;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved0;
+
+#ifdef HSA_LARGE_MODEL
+  void *return_address;
+#elif defined HSA_LITTLE_ENDIAN
+  /**
+   * Address where to store the function return values, if any.
+   */
+  void *return_address;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+#else
+  uint32_t reserved1;
+  void *return_address;
+#endif
+
+  /**
+   * Function arguments.
+   */
+  uint64_t arg[4];
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint64_t reserved2;
+
+  /**
+   * Signal used to indicate completion of the job. The application can use the
+   * special signal handle 0 to indicate that no signal is used.
+   */
+  hsa_signal_t completion_signal;
+
+} hsa_agent_dispatch_packet_t;
+
+/**
+ * @brief Barrier-AND packet.
+ */
+typedef struct hsa_barrier_and_packet_s {
+  /**
+   * Packet header. Used to configure multiple packet parameters such as the
+   * packet type. The parameters are described by ::hsa_packet_header_t.
+   */
+  uint16_t header;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint16_t reserved0;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+
+  /**
+   * Array of dependent signal objects. Signals with a handle value of 0 are
+   * allowed and are interpreted by the packet processor as satisfied
+   * dependencies.
+   */
+  hsa_signal_t dep_signal[5];
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint64_t reserved2;
+
+  /**
+   * Signal used to indicate completion of the job. The application can use the
+   * special signal handle 0 to indicate that no signal is used.
+   */
+  hsa_signal_t completion_signal;
+
+} hsa_barrier_and_packet_t;
+
+/**
+ * @brief Barrier-OR packet.
+ */
+typedef struct hsa_barrier_or_packet_s {
+  /**
+   * Packet header. Used to configure multiple packet parameters such as the
+   * packet type. The parameters are described by ::hsa_packet_header_t.
+   */
+  uint16_t header;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint16_t reserved0;
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+
+  /**
+   * Array of dependent signal objects. Signals with a handle value of 0 are
+   * allowed and are interpreted by the packet processor as dependencies not
+   * satisfied.
+   */
+  hsa_signal_t dep_signal[5];
+
+  /**
+   * Reserved. Must be 0.
+   */
+  uint64_t reserved2;
+
+  /**
+   * Signal used to indicate completion of the job. The application can use the
+   * special signal handle 0 to indicate that no signal is used.
+   */
+  hsa_signal_t completion_signal;
+
+} hsa_barrier_or_packet_t;
+
+/** @} */
+
+/** \addtogroup memory Memory
+ *  @{
+ */
+
+/**
+ * @brief Memory segments associated with a region.
+ */
+typedef enum {
+  /**
+   * Global segment. Used to hold data that is shared by all agents.
+   */
+  HSA_REGION_SEGMENT_GLOBAL = 0,
+  /**
+   * Read-only segment. Used to hold data that remains constant during the
+   * execution of a kernel.
+   */
+  HSA_REGION_SEGMENT_READONLY = 1,
+  /**
+   * Private segment. Used to hold data that is local to a single work-item.
+   */
+  HSA_REGION_SEGMENT_PRIVATE = 2,
+  /**
+   * Group segment. Used to hold data that is shared by the work-items of a
+   * work-group.
+   */
+  HSA_REGION_SEGMENT_GROUP = 3,
+  /* TODO: non-standard enums */
+  HSA_REGION_SEGMENT_ARG = 4,
+  HSA_REGION_SEGMENT_KERNARG = 5,
+  HSA_REGION_SEGMENT_SPILL = 6
+} hsa_region_segment_t;
+
+/**
+ * @brief Global region flags.
+ */
+typedef enum {
+  /**
+   * The application can use memory in the region to store kernel arguments, and
+   * provide the values for the kernarg segment of a kernel dispatch. If this
+   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_FINE_GRAINED must be set.
+   */
+  HSA_REGION_GLOBAL_FLAG_KERNARG = 1,
+  /**
+   * Updates to memory in this region are immediately visible to all the
+   * agents under the terms of the HSA memory model. If this
+   * flag is set, then ::HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED must not be set.
+   */
+  HSA_REGION_GLOBAL_FLAG_FINE_GRAINED = 2,
+  /**
+   * Updates to memory in this region can be performed by a single agent at
+   * a time. If a different agent in the system is allowed to access the
+   * region, the application must explicitely invoke ::hsa_memory_assign_agent
+   * in order to transfer ownership to that agent for a particular buffer.
+   */
+  HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED = 4
+} hsa_region_global_flag_t;
+
+/**
+ * @brief Attributes of a memory region.
+ */
+typedef enum {
+  /**
+   * Segment where memory in the region can be used. The type of this
+   * attribute is ::hsa_region_segment_t.
+   */
+  HSA_REGION_INFO_SEGMENT = 0,
+  /**
+   * Flag mask. The value of this attribute is undefined if the value of
+   * ::HSA_REGION_INFO_SEGMENT is not ::HSA_REGION_SEGMENT_GLOBAL. The type of
+   * this attribute is uint32_t, a bit-field of ::hsa_region_global_flag_t
+   * values.
+   */
+  HSA_REGION_INFO_GLOBAL_FLAGS = 1,
+  /**
+   * Size of this region, in bytes. The type of this attribute is size_t.
+   */
+  HSA_REGION_INFO_SIZE = 2,
+  /**
+   * Maximum allocation size in this region, in bytes. Must not exceed the value
+   * of ::HSA_REGION_INFO_SIZE. The type of this attribute is size_t.
+   *
+   * If the region is in the global or readonly segments, this is the maximum
+   * size that the application can pass to ::hsa_memory_allocate. If the region
+   * is in the group segment, this is the maximum size (per work-group) that can
+   * be requested for a given kernel dispatch. If the region is in the private
+   * segment, this is the maximum size (per work-item) that can be request for a
+   * specific kernel dispatch.
+   */
+  HSA_REGION_INFO_ALLOC_MAX_SIZE = 4,
+  /**
+   * Indicates whether memory in this region can be allocated using
+   * ::hsa_memory_allocate. The type of this attribute is bool.
+   *
+   * The value of this flag is always false for regions in the group and private
+   * segments.
+   */
+  HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED = 5,
+  /**
+   * Allocation granularity of buffers allocated by ::hsa_memory_allocate in
+   * this region. The size of a buffer allocated in this region is a multiple of
+   * the value of this attribute. The value of this attribute is only defined if
+   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region. The type
+   * of this attribute is size_t.
+   */
+  HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE = 6,
+  /**
+   * Alignment of buffers allocated by ::hsa_memory_allocate in this region. The
+   * value of this attribute is only defined if
+   * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED is true for this region, and must
+   * be a power of 2. The type of this attribute is size_t.
+   */
+  HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
+  HSA_REGION_INFO_COUNT = 8
+} hsa_region_info_t;
+
+/**
+ * @brief Get the current value of an attribute of a region.
+ *
+ * @param[in] region A valid region.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to a application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * region attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_region_get_info(hsa_region_t region,
+                                         hsa_region_info_t attribute,
+                                         void *value);
+
+/**
+ * @brief Iterate over the memory regions associated with a given agent, and
+ * invoke an application-defined callback on every iteration.
+ *
+ * @param[in] agent A valid agent.
+ *
+ * @param[in] callback Callback to be invoked once per region that is
+ * accessible from the agent.  The HSA runtime passes two arguments to the
+ * callback, the region and the application data.  If @p callback returns a
+ * status other than ::HSA_STATUS_SUCCESS for a particular iteration, the
+ * traversal stops and ::hsa_agent_iterate_regions returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_agent_iterate_regions(
+    hsa_agent_t agent,
+    hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
+
+/**
+ * @brief Allocate a block of memory in a given region.
+ *
+ * @param[in] region Region where to allocate memory from. The region must have
+ * the ::HSA_REGION_INFO_RUNTIME_ALLOC_ALLOWED flag set.
+ *
+ * @param[in] size Allocation size, in bytes. Must not be zero. This value is
+ * rounded up to the nearest multiple of ::HSA_REGION_INFO_RUNTIME_ALLOC_GRANULE
+ * in @p region.
+ *
+ * @param[out] ptr Pointer to the location where to store the base address of
+ * the allocated block. The returned base address is aligned to the value of
+ * ::HSA_REGION_INFO_RUNTIME_ALLOC_ALIGNMENT in @p region. If the allocation
+ * fails, the returned value is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES No memory is available.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_REGION The region is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The host is not allowed to
+ * allocate memory in @p region, or @p size is greater than the value of
+ * HSA_REGION_INFO_ALLOC_MAX_SIZE in @p region.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p size is 0.
+ */
+hsa_status_t HSA_API
+    hsa_memory_allocate(hsa_region_t region, size_t size, void **ptr);
+
+/**
+ * @brief Deallocate a block of memory previously allocated using
+ * ::hsa_memory_allocate.
+ *
+ * @param[in] ptr Pointer to a memory block. If @p ptr does not match a value
+ * previously returned by ::hsa_memory_allocate, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ */
+hsa_status_t HSA_API hsa_memory_free(void *ptr);
+
+/**
+ * @brief Copy a block of memory.
+ *
+ * @param[out] dst Buffer where the content is to be copied.
+ *
+ * @param[in] src A valid pointer to the source of data to be copied.
+ *
+ * @param[in] size Number of bytes to copy. If @p size is 0, no copy is
+ * performed and the function returns success. Copying a number of bytes larger
+ * than the size of the buffers pointed by @p dst or @p src results in undefined
+ * behavior.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT The source or destination
+ * pointers are NULL.
+ */
+hsa_status_t HSA_API hsa_memory_copy(void *dst, const void *src, size_t size);
+
+/**
+ * @brief Change the ownership of a global, coarse-grained buffer.
+ *
+ * @details The contents of a coarse-grained buffer are visible to an agent
+ * only after ownership has been explicitely transferred to that agent. Once the
+ * operation completes, the previous owner cannot longer access the data in the
+ * buffer.
+ *
+ * An implementation of the HSA runtime is allowed, but not required, to change
+ * the physical location of the buffer when ownership is transferred to a
+ * different agent. In general the application must not assume this
+ * behavior. The virtual location (address) of the passed buffer is never
+ * modified.
+ *
+ * @param[in] ptr Base address of a global buffer. The pointer should match an
+ * address previously returned by ::hsa_memory_allocate. The size of the buffer
+ * affected by the ownership change is identical to the size of that previous
+ * allocation. If @p ptr points to a fine-grained global buffer, no operation is
+ * performed and the function returns success. If @p ptr does not point to
+ * global memory, the behavior is undefined.
+ *
+ * @param[in] agent Agent that becomes the owner of the buffer. The
+ * application is responsible for ensuring that @p agent has access to the
+ * region that contains the buffer. It is allowed to change ownership to an
+ * agent that is already the owner of the buffer, with the same or different
+ * access permissions.
+ *
+ * @param[in] access Access permissions requested for the new owner.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime is unable to
+ * acquire the resources required by the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p ptr is NULL, or @p access is
+ * not a valid access value.
+ */
+hsa_status_t HSA_API hsa_memory_assign_agent(void *ptr, hsa_agent_t agent,
+                                             hsa_access_permission_t access);
+
+/**
+ *
+ * @brief Register a global, fine-grained buffer.
+ *
+ * @details Registering a buffer serves as an indication to the HSA runtime that
+ * the memory might be accessed from a kernel agent other than the
+ * host. Registration is a performance hint that allows the HSA runtime
+ * implementation to know which buffers will be accessed by some of the kernel
+ * agents ahead of time.
+ *
+ * Registration is only recommended for buffers in the global segment that have
+ * not been allocated using the HSA allocator (::hsa_memory_allocate), but an OS
+ * allocator instead.
+ *
+ * Registrations should not overlap.
+ *
+ * @param[in] ptr A buffer in global memory. If a NULL pointer is passed, no
+ * operation is performed.
+ *
+ * @param[in] size Requested registration size in bytes. A size of 0 is
+ * only allowed if @p ptr is NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure in
+ * allocating the necessary resources.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p size is 0 but @p ptr
+ * is not NULL.
+ */
+hsa_status_t HSA_API hsa_memory_register(void *ptr, size_t size);
+
+/**
+ *
+ * @brief Deregister memory previously registered using ::hsa_memory_register.
+ *
+ * @details If the memory interval being deregistered does not match a previous
+ * registration (start and end addresses), the behavior is undefined.
+ *
+ * @param[in] ptr A pointer to the base of the buffer to be deregistered. If
+ * a NULL pointer is passed, no operation is performed.
+ *
+ * @param[in] size Size of the buffer to be deregistered.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ */
+hsa_status_t HSA_API hsa_memory_deregister(void *ptr, size_t size);
+
+/** @} */
+
+/** \defgroup symbol-attributes Symbol Attributes
+ *  @{
+ */
+
+/**
+ * @brief Symbol type.
+ */
+typedef enum {
+  /**
+   * Variable.
+   */
+  HSA_SYMBOL_KIND_VARIABLE = 0,
+  /**
+   * Kernel.
+   */
+  HSA_SYMBOL_KIND_KERNEL = 1,
+  /**
+   * Indirect function.
+   */
+  HSA_SYMBOL_KIND_INDIRECT_FUNCTION = 2
+} hsa_symbol_kind_t;
+
+/**
+ * @brief Allocation type of a variable.
+ */
+typedef enum {
+  /**
+   * Agent allocation.
+   */
+  HSA_VARIABLE_ALLOCATION_AGENT = 0,
+  /**
+   * Program allocation.
+   */
+  HSA_VARIABLE_ALLOCATION_PROGRAM = 1
+} hsa_variable_allocation_t;
+
+/**
+ * @brief Linkage type of a symbol.
+ */
+typedef enum {
+  /**
+   * Module linkage.
+   */
+  HSA_SYMBOL_LINKAGE_MODULE = 0,
+  /**
+   * Program linkage.
+   */
+  HSA_SYMBOL_LINKAGE_PROGRAM = 1
+} hsa_symbol_linkage_t;
+
+/**
+ * @brief Memory segment associated with a variable.
+ */
+typedef enum {
+  /**
+   * Global memory segment.
+   */
+  HSA_VARIABLE_SEGMENT_GLOBAL = 0,
+  /**
+   * Readonly memory segment.
+   */
+  HSA_VARIABLE_SEGMENT_READONLY = 1
+} hsa_variable_segment_t;
+
+/** @} */
+
+/** \defgroup code-object Code Object
+ *  @{
+ */
+
+/**
+ * @brief Instruction set architecture.
+ */
+typedef struct hsa_isa_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_isa_t;
+
+/**
+ * @brief Retrieve a reference to an ISA handle out of a symbolic name.
+ *
+ * @param[in] name Vendor-specific name associated with a particular instruction
+ * set architecture. Must be a NUL-terminated string.
+ *
+ * @param[out] isa Memory location where the HSA runtime stores the ISA handle
+ * corresponding to the given name. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p name is NULL, or @p isa is
+ * NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA_NAME The given name does not
+ * correspond to any instruction set architecture.
+ */
+hsa_status_t HSA_API hsa_isa_from_name(
+    const char* name,
+    hsa_isa_t* isa);
+
+/**
+ * @brief Instruction set architecture attributes.
+ */
+typedef enum {
+  /**
+   * The length of the ISA name. The type of this attribute is uint32_t.
+   */
+  HSA_ISA_INFO_NAME_LENGTH = 0,
+  /**
+   * Human-readable description.  The type of this attribute is character array
+   * with the length equal to the value of ::HSA_ISA_INFO_NAME_LENGTH attribute.
+   */
+  HSA_ISA_INFO_NAME = 1,
+  /**
+   * Number of call conventions supported by the instruction set architecture.
+   * The type of this attribute is uint32_t.
+   */
+  HSA_ISA_INFO_CALL_CONVENTION_COUNT = 2,
+  /**
+   * Number of work-items in a wavefront for a given call convention. Must be a
+   * power of 2 in the range [1,256]. The type of this attribute is uint32_t.
+   */
+  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE = 3,
+  /**
+   * Number of wavefronts per compute unit for a given call convention. In
+   * practice, other factors (for example, the amount of group memory used by a
+   * work-group) may further limit the number of wavefronts per compute
+   * unit. The type of this attribute is uint32_t.
+   */
+  HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT = 4
+} hsa_isa_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given instruction set
+ * architecture (ISA).
+ *
+ * @param[in] isa A valid instruction set architecture.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[in] index Call convention index. Used only for call convention
+ * attributes, otherwise ignored. Must have a value between 0 (inclusive) and
+ * the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT (not
+ * inclusive) in @p isa.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA The instruction set architecture is
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_INDEX @p index out of range.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * instruction set architecture attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_isa_get_info(
+    hsa_isa_t isa,
+    hsa_isa_info_t attribute,
+    uint32_t index,
+    void* value);
+
+/**
+ * @brief Check if the instruction set architecture of a code object can be
+ * executed on an agent associated with another architecture.
+ *
+ * @param[in] code_object_isa Instruction set architecture associated with a
+ * code object.
+ *
+ * @param[in] agent_isa Instruction set architecture associated with an agent.
+ *
+ * @param[out] result Pointer to a memory location where the HSA runtime stores
+ * the result of the check. If the two architectures are compatible, the result
+ * is true; if they are incompatible, the result is false.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p code_object_isa or @p agent_isa are
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
+ */
+hsa_status_t HSA_API hsa_isa_compatible(
+    hsa_isa_t code_object_isa,
+    hsa_isa_t agent_isa,
+    bool* result);
+
+/**
+ * @brief An opaque handle to a code object, which contains ISA for finalized
+ * kernels and indirect functions together with information about the
+ * global/readonly segment variables they reference.
+ */
+typedef struct hsa_code_object_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_code_object_t;
+
+/**
+ * @brief Opaque handle to application data that is passed to the serialization
+ * and deserialization functions.
+ */
+typedef struct hsa_callback_data_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_callback_data_t;
+
+/**
+ * @brief Serialize a code object. Can be used for offline finalization,
+ * install-time finalization, disk code caching, etc.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] alloc_callback Callback function for memory allocation. Must not
+ * be NULL. The HSA runtime passes three arguments to the callback: the
+ * allocation size, the application data, and a pointer to a memory location
+ * where the application stores the allocation result. The HSA runtime invokes
+ * @p alloc_callback once to allocate a buffer that contains the serialized
+ * version of @p code_object.  If the callback returns a status code other than
+ * ::HSA_STATUS_SUCCESS, this function returns the same code.
+ *
+ * @param[in] callback_data Application data that is passed to @p
+ * alloc_callback. May be NULL.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] serialized_code_object Memory location where the HSA runtime
+ * stores a pointer to the serialized code object. Must not be NULL.
+ *
+ * @param[out] serialized_code_object_size Memory location where the HSA runtime
+ * stores the size (in bytes) of @p serialized_code_object. The returned value
+ * matches the allocation size passed by the HSA runtime to @p
+ * alloc_callback. Must not be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p alloc_callback, @p
+ * serialized_code_object, or @p serialized_code_object_size are NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_serialize(
+    hsa_code_object_t code_object,
+    hsa_status_t (*alloc_callback)(size_t size, hsa_callback_data_t data, void **address),
+    hsa_callback_data_t callback_data,
+    const char *options,
+    void **serialized_code_object,
+    size_t *serialized_code_object_size);
+
+/**
+ * @brief Deserialize a code object.
+ *
+ * @param[in] serialized_code_object A serialized code object. Must not be NULL.
+ *
+ * @param[in] serialized_code_object_size The size (in bytes) of @p
+ * serialized_code_object. Must not be 0.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] code_object Memory location where the HSA runtime stores the
+ * deserialized code object.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p serialized_code_object, or @p
+ * code_object are NULL. @p serialized_code_object_size is 0.
+ */
+hsa_status_t HSA_API hsa_code_object_deserialize(
+    void *serialized_code_object,
+    size_t serialized_code_object_size,
+    const char *options,
+    hsa_code_object_t *code_object);
+
+/**
+ * @brief Destroy a code object.
+ *
+ * @details The lifetime of a code object must exceed that of any executable
+ * where it has been loaded. If an executable that loaded @p code_object has not
+ * been destroyed, the behavior is undefined.
+ *
+ * @param[in] code_object Code object. The handle becomes invalid after it has
+ * been destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ */
+hsa_status_t HSA_API hsa_code_object_destroy(
+    hsa_code_object_t code_object);
+
+/**
+ * @brief Code object type.
+ */
+typedef enum {
+  /**
+   * Produces code object that contains ISA for all kernels and indirect
+   * functions in HSA source.
+   */
+  HSA_CODE_OBJECT_TYPE_PROGRAM = 0
+} hsa_code_object_type_t;
+
+/**
+ * @brief Code object attributes.
+ */
+typedef enum {
+  /**
+   * The version of the code object. The type of this attribute is a
+   * NUL-terminated char[64]. If the version of the code object uses less than
+   * 63 characters, the rest of the array must be filled with NULs.
+   */
+  HSA_CODE_OBJECT_INFO_VERSION = 0,
+  /**
+   * Type of code object. The type of this attribute is
+   * ::hsa_code_object_type_t.
+   */
+  HSA_CODE_OBJECT_INFO_TYPE = 1,
+  /**
+   * Instruction set architecture this code object is produced for. The type of
+   * this attribute is ::hsa_isa_t.
+   */
+  HSA_CODE_OBJECT_INFO_ISA = 2,
+  /**
+   * Machine model this code object is produced for. The type of this attribute
+   * is ::hsa_machine_model_t.
+   */
+  HSA_CODE_OBJECT_INFO_MACHINE_MODEL = 3,
+  /**
+   * Profile this code object is produced for. The type of this attribute is
+   * ::hsa_profile_t.
+   */
+  HSA_CODE_OBJECT_INFO_PROFILE = 4,
+  /**
+   * Default floating-point rounding mode used when the code object is
+   * produced. The type of this attribute is
+   * ::hsa_default_float_rounding_mode_t.
+   */
+  HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 5
+} hsa_code_object_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given code object.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * code object attribute, or @p value is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ */
+hsa_status_t HSA_API hsa_code_object_get_info(
+    hsa_code_object_t code_object,
+    hsa_code_object_info_t attribute,
+    void *value);
+
+/**
+ * @brief Code object symbol.
+ */
+typedef struct hsa_code_symbol_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_code_symbol_t;
+
+/**
+ * @brief Get the symbol handle within a code object for a given a symbol name.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] symbol_name Symbol name.
+ *
+ * @param[out] symbol Memory location where the HSA runtime stores the symbol
+ * handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
+ * that matches @p symbol_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
+ * @p symbol is NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_get_symbol(
+    hsa_code_object_t code_object,
+    const char *symbol_name,
+    hsa_code_symbol_t *symbol);
+
+/**
+ * @brief Code object symbol attributes.
+ */
+typedef enum {
+  /**
+   * The type of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
+   */
+  HSA_CODE_SYMBOL_INFO_TYPE = 0,
+  /**
+   * The length of the symbol name. The type of this attribute is uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_NAME_LENGTH = 1,
+  /**
+   * The name of the symbol. The type of this attribute is character array with
+   * the length equal to the value of ::HSA_CODE_SYMBOL_INFO_NAME_LENGTH
+   * attribute
+   */
+  HSA_CODE_SYMBOL_INFO_NAME = 2,
+  /**
+   * The length of the module name to which this symbol belongs if this symbol
+   * has module linkage, otherwise 0 is returned. The type of this attribute is
+   * uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+  /**
+   * The module name to which this symbol belongs if this symbol has module
+   * linkage, otherwise empty string is returned. The type of this attribute is
+   * character array with the length equal to the value of
+   * ::HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
+   */
+  HSA_CODE_SYMBOL_INFO_MODULE_NAME = 4,
+  /**
+   * The linkage kind of the symbol. The type of this attribute is
+   * ::hsa_symbol_linkage_t.
+   */
+  HSA_CODE_SYMBOL_INFO_LINKAGE = 5,
+  /**
+   * Indicates whether the symbol corresponds to a definition. The type of this
+   * attribute is bool.
+   */
+  HSA_CODE_SYMBOL_INFO_IS_DEFINITION = 17,
+  /**
+   * The allocation kind of the variable. The value of this attribute is
+   * undefined if the symbol is not a variable. The type of this attribute is
+   * ::hsa_variable_allocation_t.
+   */
+  HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+  /**
+   * The segment kind of the variable. The value of this attribute is
+   * undefined if the symbol is not a variable. The type of this attribute is
+   * ::hsa_variable_segment_t.
+   */
+  HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+  /**
+   * Alignment of the variable. The value of this attribute is undefined if the
+   * symbol is not a variable. The type of this attribute is uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+  /**
+   * Size of the variable. The value of this attribute is undefined if the
+   * symbol is not a variable. The type of this attribute is uint32_t.
+   *
+   * A size of 0 is returned if the variable is an external variable and has an
+   * unknown dimension.
+   */
+  HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+  /**
+   * Indicates whether the variable is constant. The value of this attribute is
+   * undefined if the symbol is not a variable. The type of this attribute is
+   * bool.
+   */
+  HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+  /**
+   * Size of kernarg segment memory that is required to hold the values of the
+   * kernel arguments, in bytes. The value of this attribute is undefined if the
+   * symbol is not a kernel. The type of this attribute is uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+  /**
+   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
+   * which is the maximum of 16 and the maximum alignment of any of the kernel
+   * arguments. The value of this attribute is undefined if the symbol is not a
+   * kernel. The type of this attribute is uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+  /**
+   * Size of static group segment memory required by the kernel (per
+   * work-group), in bytes. The value of this attribute is undefined
+   * if the symbol is not a kernel. The type of this attribute is uint32_t.
+   *
+   * The reported amount does not include any dynamically allocated group
+   * segment memory that may be requested by the application when a kernel is
+   * dispatched.
+   */
+  HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+  /**
+   * Size of static private, spill, and arg segment memory required by
+   * this kernel (per work-item), in bytes. The value of this attribute is
+   * undefined if the symbol is not a kernel. The type of this attribute is
+   * uint32_t.
+   *
+   * If the value of ::HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is true,
+   * the kernel may use more private memory than the reported value, and the
+   * application must add the dynamic call stack usage to @a
+   * private_segment_size when populating a kernel dispatch packet.
+   */
+  HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+  /**
+   * Dynamic callstack flag. The value of this attribute is undefined if the
+   * symbol is not a kernel. The type of this attribute is bool.
+   *
+   * If this flag is set (the value is true), the kernel uses a dynamically
+   * sized call stack. This can happen if recursive calls, calls to indirect
+   * functions, or the HSAIL alloca instruction are present in the kernel.
+   */
+  HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+  /**
+   * Call convention of the indirect function. The value of this attribute is
+   * undefined if the symbol is not an indirect function. The type of this
+   * attribute is uint32_t.
+   */
+  HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_code_symbol_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given code symbol.
+ *
+ * @param[in] code_symbol Code symbol.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * code symbol attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_code_symbol_get_info(
+    hsa_code_symbol_t code_symbol,
+    hsa_code_symbol_info_t attribute,
+    void *value);
+
+/**
+ * @brief Iterate over the symbols in a code object, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] code_object Code object.
+ *
+ * @param[in] callback Callback to be invoked once per code object symbol. The
+ * HSA runtime passes three arguments to the callback: the code object, a
+ * symbol, and the application data.  If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_code_object_iterate_symbols returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_code_object_iterate_symbols(
+    hsa_code_object_t code_object,
+    hsa_status_t (*callback)(hsa_code_object_t code_object, hsa_code_symbol_t symbol, void* data),
+    void* data);
+
+/** @} */
+
+/** \defgroup executable Executable
+ *  @{
+ */
+
+/**
+ * @brief An opaque handle to an executable, which contains ISA for finalized
+ * kernels and indirect functions together with the allocated global/readonly
+ * segment variables they reference.
+ */
+typedef struct hsa_executable_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_executable_t;
+
+/**
+ * @brief Executable state.
+ */
+typedef enum {
+  /**
+   * Executable state, which allows the user to load code objects and define
+   * external variables. Variable addresses, kernel code handles, and
+   * indirect function code handles are not available in query operations until
+   * the executable is frozen (zero always returned).
+   */
+  HSA_EXECUTABLE_STATE_UNFROZEN = 0,
+  /**
+   * Executable state, which allows the user to query variable addresses,
+   * kernel code handles, and indirect function code handles using query
+   * operation. Loading new code objects, as well as defining external variables
+   * is not allowed in this state.
+   */
+  HSA_EXECUTABLE_STATE_FROZEN = 1
+} hsa_executable_state_t;
+
+/**
+ * @brief Create an empty executable.
+ *
+ * @param[in] profile Profile used in the executable.
+ *
+ * @param[in] executable_state Executable state. If the state is
+ * ::HSA_EXECUTABLE_STATE_FROZEN, the resulting executable is useless because no
+ * code objects can be loaded, and no variables can be defined.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] executable Memory location where the HSA runtime stores newly
+ * created executable handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p profile is invalid, or
+ * @p executable is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_create(
+    hsa_profile_t profile,
+    hsa_executable_state_t executable_state,
+    const char *options,
+    hsa_executable_t *executable);
+
+/**
+ * @brief Destroy an executable.
+ *
+ * @details Executable handle becomes invalid after the executable has been
+ * destroyed. Code object handles that were loaded into this executable are
+ * still valid after the executable has been destroyed, and can be used as
+ * intended. Resources allocated outside and associated with this executable
+ * (such as external global/readonly variables) can be released after the
+ * executable has been destroyed.
+ *
+ * Executable should not be destroyed while kernels are in flight.
+ *
+ * @param[in] executable Executable.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ */
+hsa_status_t HSA_API hsa_executable_destroy(
+    hsa_executable_t executable);
+
+/**
+ * @brief Load code object into the executable.
+ *
+ * @details Every global/readonly variable that is external must be defined
+ * using define set of operations before loading code objects. Internal
+ * global/readonly variable is allocated once the code object, that is being
+ * loaded, references this variable and this variable is not allocated.
+ *
+ * Any module linkage declaration must have been defined either by a define
+ * variable or by loading a code object that has a symbol with module linkage
+ * definition.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent to load code object for. The agent must support the
+ * default floating-point rounding mode used by @p code_object.
+ *
+ * @param[in] code_object Code object to load.  The lifetime of the code object
+ * must exceed that of the executable: if @p code_object is destroyed before @p
+ * executable, the behavior is undefined.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_CODE_OBJECT @p code_object is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS @p agent is not compatible
+ * with @p code_object (for example, @p agent does not support the default
+ * floating-point rounding mode specified by @p code_object), or @p code_object
+ * is not compatible with @p executable (for example, @p code_object and @p
+ * executable have different machine models or profiles).
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_load_code_object(
+    hsa_executable_t executable,
+    hsa_agent_t agent,
+    hsa_code_object_t code_object,
+    const char *options);
+
+/**
+ * @brief Freeze the executable.
+ *
+ * @details No modifications to executable can be made after freezing: no
+ * code objects can be loaded to the executable, no external variables can
+ * be defined. Freezing the executable does not prevent querying executable's
+ * attributes.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_UNDEFINED One or more variable is
+ * undefined in the executable.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is already frozen.
+ */
+hsa_status_t HSA_API hsa_executable_freeze(
+    hsa_executable_t executable,
+    const char *options);
+
+/**
+ * @brief Executable attributes.
+ */
+typedef enum {
+  /**
+   * Profile this executable is created for. The type of this attribute is
+   * ::hsa_profile_t.
+   */
+  HSA_EXECUTABLE_INFO_PROFILE = 1,
+  /**
+   * Executable state. The type of this attribute is ::hsa_executable_state_t.
+   */
+  HSA_EXECUTABLE_INFO_STATE = 2
+} hsa_executable_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given executable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * executable attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_get_info(
+    hsa_executable_t executable,
+    hsa_executable_info_t attribute,
+    void *value);
+
+/**
+ * @brief Define an external global variable with program allocation.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the global segment memory with program allocation. The
+ * variable must be defined before loading a code object into an executable.
+ * In addition, code objects loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_global_variable_define(
+    hsa_executable_t executable,
+    const char *variable_name,
+    void *address);
+
+/**
+ * @brief Define an external global variable with agent allocation.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the global segment memory with agent allocation. The
+ * variable must be defined before loading a code object into an executable.
+ * In addition, code objects loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent for which the variable is being defined.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_agent_global_variable_define(
+    hsa_executable_t executable,
+    hsa_agent_t agent,
+    const char *variable_name,
+    void *address);
+
+/**
+ * @brief Define an external readonly variable.
+ *
+ * @details This function allows the application to provide the definition
+ * of a variable in the readonly segment memory. The variable must be defined
+ * before loading a code object into an executable. In addition, code objects
+ * loaded must not define the variable.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] agent Agent for which the variable is being defined.
+ *
+ * @param[in] variable_name Name of the variable.
+ *
+ * @param[in] address Address where the variable is defined. The buffer pointed
+ * by @p address is owned by the application, and cannot be deallocated before
+ * @p executable is destroyed.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p variable_name is NULL.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_AGENT @p agent is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED The variable is
+ * already defined.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no variable with the
+ * @p variable_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_FROZEN_EXECUTABLE @p executable is frozen.
+ */
+hsa_status_t HSA_API hsa_executable_readonly_variable_define(
+    hsa_executable_t executable,
+    hsa_agent_t agent,
+    const char *variable_name,
+    void *address);
+
+/**
+ * @brief Validate executable. Checks that all code objects have matching
+ * machine model, profile, and default floating-point rounding mode. Checks that
+ * all declarations have definitions. Checks declaration-definition
+ * compatibility (see HSA Programming Reference Manual for compatibility rules).
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[out] result Memory location where the HSA runtime stores the
+ * validation result. If the executable is valid, the result is 0.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE @p executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p result is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_validate(
+    hsa_executable_t executable,
+    uint32_t* result);
+
+/**
+ * @brief Executable symbol.
+ */
+typedef struct hsa_executable_symbol_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_executable_symbol_t;
+
+/**
+ * @brief Get the symbol handle for a given a symbol name.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] module_name Module name. Must be NULL if the symbol has
+ * program linkage.
+ *
+ * @param[in] symbol_name Symbol name.
+ *
+ * @param[in] agent Agent associated with the symbol. If the symbol is
+ * independent of any agent (for example, a variable with program
+ * allocation), this argument is ignored.
+ *
+ * @param[in] call_convention Call convention associated with the symbol. If the
+ * symbol does not correspond to an indirect function, this argument is ignored.
+ *
+ * @param[out] symbol Memory location where the HSA runtime stores the symbol
+ * handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE The executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_SYMBOL_NAME There is no symbol with a name
+ * that matches @p symbol_name.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p symbol_name is NULL, or
+ * @p symbol is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_get_symbol(
+    hsa_executable_t executable,
+    const char *module_name,
+    const char *symbol_name,
+    hsa_agent_t agent,
+    int32_t call_convention,
+    hsa_executable_symbol_t *symbol);
+
+/**
+ * @brief Executable symbol attributes.
+ */
+typedef enum {
+  /**
+   * The kind of the symbol. The type of this attribute is ::hsa_symbol_kind_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_TYPE = 0,
+  /**
+   * The length of the symbol name. The type of this attribute is uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH = 1,
+  /**
+   * The name of the symbol. The type of this attribute is character array with
+   * the length equal to the value of ::HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH
+   * attribute
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_NAME = 2,
+  /**
+   * The length of the module name to which this symbol belongs if this symbol
+   * has module linkage, otherwise 0 is returned. The type of this attribute is
+   * uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH = 3,
+  /**
+   * The module name to which this symbol belongs if this symbol has module
+   * linkage, otherwise empty string is returned. The type of this attribute is
+   * character array with the length equal to the value of
+   * ::HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME_LENGTH attribute.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_MODULE_NAME = 4,
+  /**
+   * Agent associated with this symbol. If the symbol is a variable, the
+   * value of this attribute is only defined if
+   * ::HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION is
+   * ::HSA_VARIABLE_ALLOCATION_AGENT. The type of this attribute is hsa_agent_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_AGENT = 20,
+  /**
+   * The address of the variable. The value of this attribute is undefined if
+   * the symbol is not a variable. The type of this attribute is uint64_t.
+   *
+   * If executable's state is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0 is
+   * returned.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS = 21,
+  /**
+   * The linkage kind of the symbol. The type of this attribute is
+   * ::hsa_symbol_linkage_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_LINKAGE = 5,
+  /**
+   * Indicates whether the symbol corresponds to a definition. The type of this
+   * attribute is bool.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_IS_DEFINITION = 17,
+  /**
+   * The allocation kind of the variable. The value of this attribute is
+   * undefined if the symbol is not a variable.  The type of this attribute is
+   * ::hsa_variable_allocation_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALLOCATION = 6,
+  /**
+   * The segment kind of the variable. The value of this attribute is undefined
+   * if the symbol is not a variable. The type of this attribute is
+   * ::hsa_variable_segment_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SEGMENT = 7,
+  /**
+   * Alignment of the variable. The value of this attribute is undefined if
+   * the symbol is not a variable. The type of this attribute is uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ALIGNMENT = 8,
+  /**
+   * Size of the variable. The value of this attribute is undefined if
+   * the symbol is not a variable. The type of this attribute is uint32_t.
+   *
+   * A value of 0 is returned if the variable is an external variable and has an
+   * unknown dimension.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE = 9,
+  /**
+   * Indicates whether the variable is constant. The value of this attribute is
+   * undefined if the symbol is not a variable. The type of this attribute is
+   * bool.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_IS_CONST = 10,
+  /**
+   * Kernel object handle, used in the kernel dispatch packet. The value of this
+   * attribute is undefined if the symbol is not a kernel. The type of this
+   * attribute is uint64_t.
+   *
+   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
+   * is returned.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT = 22,
+  /**
+   * Size of kernarg segment memory that is required to hold the values of the
+   * kernel arguments, in bytes. The value of this attribute is undefined if the
+   * symbol is not a kernel. The type of this attribute is uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE = 11,
+  /**
+   * Alignment (in bytes) of the buffer used to pass arguments to the kernel,
+   * which is the maximum of 16 and the maximum alignment of any of the kernel
+   * arguments. The value of this attribute is undefined if the symbol is not a
+   * kernel. The type of this attribute is uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT = 12,
+  /**
+   * Size of static group segment memory required by the kernel (per
+   * work-group), in bytes. The value of this attribute is undefined
+   * if the symbol is not a kernel. The type of this attribute is uint32_t.
+   *
+   * The reported amount does not include any dynamically allocated group
+   * segment memory that may be requested by the application when a kernel is
+   * dispatched.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE = 13,
+  /**
+   * Size of static private, spill, and arg segment memory required by
+   * this kernel (per work-item), in bytes. The value of this attribute is
+   * undefined if the symbol is not a kernel. The type of this attribute is
+   * uint32_t.
+   *
+   * If the value of ::HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK is
+   * true, the kernel may use more private memory than the reported value, and
+   * the application must add the dynamic call stack usage to @a
+   * private_segment_size when populating a kernel dispatch packet.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE = 14,
+  /**
+   * Dynamic callstack flag. The value of this attribute is undefined if the
+   * symbol is not a kernel. The type of this attribute is bool.
+   *
+   * If this flag is set (the value is true), the kernel uses a dynamically
+   * sized call stack. This can happen if recursive calls, calls to indirect
+   * functions, or the HSAIL alloca instruction are present in the kernel.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK = 15,
+  /**
+   * Indirect function object handle. The value of this attribute is undefined
+   * if the symbol is not an indirect function, or the associated agent does
+   * not support the Full Profile. The type of this attribute depends on the
+   * machine model: if machine model is small, then the type is uint32_t, if
+   * machine model is large, then the type is uint64_t.
+   *
+   * If the state of the executable is ::HSA_EXECUTABLE_STATE_UNFROZEN, then 0
+   * is returned.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT = 23,
+  /**
+   * Call convention of the indirect function. The value of this attribute is
+   * undefined if the symbol is not an indirect function, or the associated
+   * agent does not support the Full Profile. The type of this attribute is
+   * uint32_t.
+   */
+  HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION = 16
+} hsa_executable_symbol_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given executable symbol.
+ *
+ * @param[in] executable_symbol Executable symbol.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behavior is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * executable symbol attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_symbol_get_info(
+    hsa_executable_symbol_t executable_symbol,
+    hsa_executable_symbol_info_t attribute,
+    void *value);
+
+/**
+ * @brief Iterate over the symbols in a executable, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] executable Executable.
+ *
+ * @param[in] callback Callback to be invoked once per executable symbol. The
+ * HSA runtime passes three arguments to the callback: the executable, a symbol,
+ * and the application data.  If @p callback returns a status other than
+ * ::HSA_STATUS_SUCCESS for a particular iteration, the traversal stops and
+ * ::hsa_executable_iterate_symbols returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_EXECUTABLE Th executable is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_executable_iterate_symbols(
+    hsa_executable_t executable,
+    hsa_status_t (*callback)(hsa_executable_t executable, hsa_executable_symbol_t symbol, void* data),
+    void* data);
+
+/** @} */
+
+#ifdef __cplusplus
+}  // end extern "C" block
+#endif
+
+#endif  // header guard
diff --git a/libgomp/hsa_ext_finalize.h b/libgomp/hsa_ext_finalize.h
new file mode 100644
index 0000000..7147f93
--- /dev/null
+++ b/libgomp/hsa_ext_finalize.h
@@ -0,0 +1,527 @@ 
+////////////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 ADVANCED MICRO DEVICES, INC.
+//
+// AMD is granting you permission to use this software and documentation(if any)
+// (collectively, the "Materials") pursuant to the terms and conditions of the
+// Software License Agreement included with the Materials.If you do not have a
+// copy of the Software License Agreement, contact your AMD representative for a
+// copy.
+//
+// You agree that you will not reverse engineer or decompile the Materials, in
+// whole or in part, except as allowed by applicable law.
+//
+// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY,
+// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE
+// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM
+// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF
+// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion
+// of implied warranties, so the above exclusion may not apply to You.
+//
+// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT,
+// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT,
+// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF
+// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total
+// liability to You for all damages, losses, and causes of action (whether in
+// contract, tort (including negligence) or otherwise) exceed the amount of $100
+// USD.  You agree to defend, indemnify and hold harmless AMD and its licensors,
+// and any of their directors, officers, employees, affiliates or agents from
+// and against any and all loss, damage, liability and other expenses (including
+// reasonable attorneys' fees), resulting from Your use of the Software or
+// violation of the terms and conditions of this Agreement.
+//
+// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with
+// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is
+// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 -
+// 7013, et seq., or its successor.Use of the Materials by the Government
+// constitutes acknowledgement of AMD's proprietary rights in them.
+//
+// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as
+//                      stated in the Software License Agreement.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
+#define HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
+
+#include "hsa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef void* BrigModule_t;
+
+/** \defgroup ext-alt-finalizer-extensions Finalization Extensions
+ *  @{
+ */
+
+/**
+ * @brief Enumeration constants added to ::hsa_status_t by this extension.
+ */
+enum {
+  /**
+   * The HSAIL program is invalid.
+   */
+  HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
+  /**
+   * The HSAIL module is invalid.
+   */
+  HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
+  /**
+   * Machine model or profile of the HSAIL module do not match the machine model
+   * or profile of the HSAIL program.
+   */
+  HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
+  /**
+   * The HSAIL module is already a part of the HSAIL program.
+   */
+  HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
+  /**
+   * Compatibility mismatch between symbol declaration and symbol definition.
+   */
+  HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
+  /**
+   * The finalization encountered an error while finalizing a kernel or
+   * indirect function.
+   */
+  HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
+  /**
+   * Mismatch between a directive in the control directive structure and in
+   * the HSAIL kernel.
+   */
+  HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
+};
+
+/** @} */
+
+/** \defgroup ext-alt-finalizer-program Finalization Program
+ *  @{
+ */
+
+/**
+ * @brief HSAIL (BRIG) module. The HSA Programmer's Reference Manual contains
+ * the definition of the BrigModule_t type.
+ */
+typedef BrigModule_t hsa_ext_module_t;
+
+/**
+ * @brief An opaque handle to a HSAIL program, which groups a set of HSAIL
+ * modules that collectively define functions and variables used by kernels and
+ * indirect functions.
+ */
+typedef struct hsa_ext_program_s {
+  /**
+   * Opaque handle.
+   */
+  uint64_t handle;
+} hsa_ext_program_t;
+
+/**
+ * @brief Create an empty HSAIL program.
+ *
+ * @param[in] machine_model Machine model used in the HSAIL program.
+ *
+ * @param[in] profile Profile used in the HSAIL program.
+ *
+ * @param[in] default_float_rounding_mode Default float rounding mode used in
+ * the HSAIL program.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[out] program Memory location where the HSA runtime stores the newly
+ * created HSAIL program handle.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p machine_model is invalid,
+ * @p profile is invalid, @p default_float_rounding_mode is invalid, or
+ * @p program is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_create(
+    hsa_machine_model_t machine_model,
+    hsa_profile_t profile,
+    hsa_default_float_rounding_mode_t default_float_rounding_mode,
+    const char *options,
+    hsa_ext_program_t *program);
+
+/**
+ * @brief Destroy a HSAIL program.
+ *
+ * @details The HSAIL program handle becomes invalid after it has been
+ * destroyed. Code object handles produced by ::hsa_ext_program_finalize are
+ * still valid after the HSAIL program has been destroyed, and can be used as
+ * intended. Resources allocated outside and associated with the HSAIL program
+ * (such as HSAIL modules that are added to the HSAIL program) can be released
+ * after the finalization program has been destroyed.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
+ * invalid.
+ */
+hsa_status_t HSA_API hsa_ext_program_destroy(
+    hsa_ext_program_t program);
+
+/**
+ * @brief Add a HSAIL module to an existing HSAIL program.
+ *
+ * @details The HSA runtime does not perform a deep copy of the HSAIL module
+ * upon addition. Instead, it stores a pointer to the HSAIL module. The
+ * ownership of the HSAIL module belongs to the application, which must ensure
+ * that @p module is not released before destroying the HSAIL program.
+ *
+ * The HSAIL module is successfully added to the HSAIL program if @p module is
+ * valid, if all the declarations and definitions for the same symbol are
+ * compatible, and if @p module specify machine model and profile that matches
+ * the HSAIL program.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] module HSAIL module. The application can add the same HSAIL module
+ * to @p program at most once. The HSAIL module must specify the same machine
+ * model and profile as @p program. If the floating-mode rounding mode of @p
+ * module is not default, then it should match that of @p program.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_MODULE The HSAIL module is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE The machine model of @p
+ * module does not match machine model of @p program, or the profile of @p
+ * module does not match profile of @p program.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED The HSAIL module is
+ * already a part of the HSAIL program.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH Symbol declaration and symbol
+ * definition compatibility mismatch. See the symbol compatibility rules in the
+ * HSA Programming Reference Manual.
+ */
+hsa_status_t HSA_API hsa_ext_program_add_module(
+    hsa_ext_program_t program,
+    hsa_ext_module_t module);
+
+/**
+ * @brief Iterate over the HSAIL modules in a program, and invoke an
+ * application-defined callback on every iteration.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] callback Callback to be invoked once per HSAIL module in the
+ * program. The HSA runtime passes three arguments to the callback: the program,
+ * a HSAIL module, and the application data.  If @p callback returns a status
+ * other than ::HSA_STATUS_SUCCESS for a particular iteration, the traversal
+ * stops and ::hsa_ext_program_iterate_modules returns that status value.
+ *
+ * @param[in] data Application data that is passed to @p callback on every
+ * iteration. May be NULL.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The program is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p callback is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_iterate_modules(
+    hsa_ext_program_t program,
+    hsa_status_t (*callback)(hsa_ext_program_t program, hsa_ext_module_t module,
+                             void* data),
+    void* data);
+
+/**
+ * @brief HSAIL program attributes.
+ */
+typedef enum {
+  /**
+   * Machine model specified when the HSAIL program was created. The type
+   * of this attribute is ::hsa_machine_model_t.
+   */
+  HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
+  /**
+   * Profile specified when the HSAIL program was created. The type of
+   * this attribute is ::hsa_profile_t.
+   */
+  HSA_EXT_PROGRAM_INFO_PROFILE = 1,
+  /**
+   * Default float rounding mode specified when the HSAIL program was
+   * created. The type of this attribute is ::hsa_default_float_rounding_mode_t.
+   */
+  HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
+} hsa_ext_program_info_t;
+
+/**
+ * @brief Get the current value of an attribute for a given HSAIL program.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] attribute Attribute to query.
+ *
+ * @param[out] value Pointer to an application-allocated buffer where to store
+ * the value of the attribute. If the buffer passed by the application is not
+ * large enough to hold the value of @p attribute, the behaviour is undefined.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p attribute is an invalid
+ * HSAIL program attribute, or @p value is NULL.
+ */
+hsa_status_t HSA_API hsa_ext_program_get_info(
+    hsa_ext_program_t program,
+    hsa_ext_program_info_t attribute,
+    void *value);
+
+/**
+ * @brief Finalizer-determined call convention.
+ */
+typedef enum {
+ /**
+  * Finalizer-determined call convention.
+  */
+  HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
+} hsa_ext_finalizer_call_convention_t;
+
+/**
+ * @brief Control directives specify low-level information about the
+ * finalization process.
+ */
+typedef struct hsa_ext_control_directives_s {
+  /**
+   * Bitset indicating which control directives are enabled. The bit assigned to
+   * a control directive is determined by the corresponding value in
+   * BrigControlDirective.
+   *
+   * If a control directive is disabled, its corresponding field value (if any)
+   * must be 0. Control directives that are only present or absent (such as
+   * partial workgroups) have no corresponding field as the presence of the bit
+   * in this mask is sufficient.
+   */
+  uint64_t control_directives_mask;
+  /**
+   * Bitset of HSAIL exceptions that must have the BREAK policy enabled. The bit
+   * assigned to an HSAIL exception is determined by the corresponding value
+   * in BrigExceptionsMask. If the kernel contains a enablebreakexceptions
+   * control directive, the finalizer uses the union of the two masks.
+   */
+  uint16_t break_exceptions_mask;
+  /**
+   * Bitset of HSAIL exceptions that must have the DETECT policy enabled. The
+   * bit assigned to an HSAIL exception is determined by the corresponding value
+   * in BrigExceptionsMask. If the kernel contains a enabledetectexceptions
+   * control directive, the finalizer uses the union of the two masks.
+   */
+  uint16_t detect_exceptions_mask;
+  /**
+   * Maximum size (in bytes) of dynamic group memory that will be allocated by
+   * the application for any dispatch of the kernel.  If the kernel contains a
+   * maxdynamicsize control directive, the two values should match.
+   */
+  uint32_t max_dynamic_group_size;
+  /**
+   * Maximum number of grid work-items that will be used by the application to
+   * launch the kernel. If the kernel contains a maxflatgridsize control
+   * directive, the value of @a max_flat_grid_size must not be greater than the
+   * value of the directive, and takes precedence.
+   *
+   * The value specified for maximum absolute grid size must be greater than or
+   * equal to the product of the values specified by @a required_grid_size.
+   *
+   * If the bit at position BRIG_CONTROL_MAXFLATGRIDSIZE is set in @a
+   * control_directives_mask, this field must be greater than 0.
+   */
+  uint64_t max_flat_grid_size;
+  /**
+   * Maximum number of work-group work-items that will be used by the
+   * application to launch the kernel. If the kernel contains a
+   * maxflatworkgroupsize control directive, the value of @a
+   * max_flat_workgroup_size must not be greater than the value of the
+   * directive, and takes precedence.
+   *
+   * The value specified for maximum absolute grid size must be greater than or
+   * equal to the product of the values specified by @a required_workgroup_size.
+   *
+   * If the bit at position BRIG_CONTROL_MAXFLATWORKGROUPSIZE is set in @a
+   * control_directives_mask, this field must be greater than 0.
+   */
+  uint32_t max_flat_workgroup_size;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint32_t reserved1;
+  /**
+   * Grid size that will be used by the application in any dispatch of the
+   * kernel. If the kernel contains a requiredgridsize control directive, the
+   * dimensions should match.
+   *
+   * The specified grid size must be consistent with @a required_workgroup_size
+   * and @a required_dim. Also, the product of the three dimensions must not
+   * exceed @a max_flat_grid_size. Note that the listed invariants must hold
+   * only if all the corresponding control directives are enabled.
+   *
+   * If the bit at position BRIG_CONTROL_REQUIREDGRIDSIZE is set in @a
+   * control_directives_mask, the three dimension values must be greater than 0.
+   */
+  uint64_t required_grid_size[3];
+  /**
+   * Work-group size that will be used by the application in any dispatch of the
+   * kernel. If the kernel contains a requiredworkgroupsize control directive,
+   * the dimensions should match.
+   *
+   * The specified work-group size must be consistent with @a required_grid_size
+   * and @a required_dim. Also, the product of the three dimensions must not
+   * exceed @a max_flat_workgroup_size. Note that the listed invariants must
+   * hold only if all the corresponding control directives are enabled.
+   *
+   * If the bit at position BRIG_CONTROL_REQUIREDWORKGROUPSIZE is set in @a
+   * control_directives_mask, the three dimension values must be greater than 0.
+   */
+  hsa_dim3_t required_workgroup_size;
+  /**
+   * Number of dimensions that will be used by the application to launch the
+   * kernel. If the kernel contains a requireddim control directive, the two
+   * values should match.
+   *
+   * The specified dimensions must be consistent with @a required_grid_size and
+   * @a required_workgroup_size. This invariant must hold only if all the
+   * corresponding control directives are enabled.
+   *
+   * If the bit at position BRIG_CONTROL_REQUIREDDIM is set in @a
+   * control_directives_mask, this field must be 1, 2, or 3.
+   */
+  uint8_t required_dim;
+  /**
+   * Reserved. Must be 0.
+   */
+  uint8_t reserved2[75];
+} hsa_ext_control_directives_t;
+
+/**
+ * @brief Finalize an HSAIL program for a given instruction set architecture.
+ *
+ * @details Finalize all of the kernels and indirect functions that belong to
+ * the same HSAIL program for a specific instruction set architecture (ISA). The
+ * transitive closure of all functions specified by call or scall must be
+ * defined. Kernels and indirect functions that are being finalized must be
+ * defined. Kernels and indirect functions that are referenced in kernels and
+ * indirect functions being finalized may or may not be defined, but must be
+ * declared. All the global/readonly segment variables that are referenced in
+ * kernels and indirect functions being finalized may or may not be defined, but
+ * must be declared.
+ *
+ * @param[in] program HSAIL program.
+ *
+ * @param[in] isa Instruction set architecture to finalize for.
+ *
+ * @param[in] call_convention A call convention used in a finalization. Must
+ * have a value between ::HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO (inclusive)
+ * and the value of the attribute ::HSA_ISA_INFO_CALL_CONVENTION_COUNT in @p
+ * isa (not inclusive).
+ *
+ * @param[in] control_directives Low-level control directives that influence
+ * the finalization process.
+ *
+ * @param[in] options Vendor-specific options. May be NULL.
+ *
+ * @param[in] code_object_type Type of code object to produce.
+ *
+ * @param[out] code_object Code object generated by the Finalizer, which
+ * contains the machine code for the kernels and indirect functions in the HSAIL
+ * program. The code object is independent of the HSAIL module that was used to
+ * generate it.
+ *
+ * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
+ *
+ * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
+ * initialized.
+ *
+ * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES There is a failure to allocate
+ * resources required for the operation.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_INVALID_PROGRAM The HSAIL program is
+ * invalid.
+ *
+ * @retval ::HSA_STATUS_ERROR_INVALID_ISA @p isa is invalid.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH The directive in
+ * the control directive structure and in the HSAIL kernel mismatch, or if the
+ * same directive is used with a different value in one of the functions used by
+ * this kernel.
+ *
+ * @retval ::HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED The Finalizer
+ * encountered an error while compiling a kernel or an indirect function.
+ */
+hsa_status_t HSA_API hsa_ext_program_finalize(
+    hsa_ext_program_t program,
+    hsa_isa_t isa,
+    int32_t call_convention,
+    hsa_ext_control_directives_t control_directives,
+    const char *options,
+    hsa_code_object_type_t code_object_type,
+    hsa_code_object_t *code_object);
+
+/** @} */
+
+#define hsa_ext_finalizer_1_00
+
+typedef struct hsa_ext_finalizer_1_00_pfn_s {
+  hsa_status_t (*hsa_ext_program_create)(
+      hsa_machine_model_t machine_model, hsa_profile_t profile,
+      hsa_default_float_rounding_mode_t default_float_rounding_mode,
+      const char *options, hsa_ext_program_t *program);
+
+  hsa_status_t (*hsa_ext_program_destroy)(hsa_ext_program_t program);
+
+  hsa_status_t (*hsa_ext_program_add_module)(hsa_ext_program_t program,
+                                                 hsa_ext_module_t module);
+
+  hsa_status_t (*hsa_ext_program_iterate_modules)(
+      hsa_ext_program_t program,
+      hsa_status_t (*callback)(hsa_ext_program_t program,
+                               hsa_ext_module_t module, void *data),
+      void *data);
+
+  hsa_status_t (*hsa_ext_program_get_info)(
+      hsa_ext_program_t program, hsa_ext_program_info_t attribute,
+      void *value);
+
+  hsa_status_t (*hsa_ext_program_finalize)(
+      hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
+      hsa_ext_control_directives_t control_directives, const char *options,
+      hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
+} hsa_ext_finalizer_1_00_pfn_t;
+
+#ifdef __cplusplus
+} // extern "C" block
+#endif // __cplusplus
+
+#endif // HSA_RUNTIME_INC_HSA_EXT_FINALIZE_H_
diff --git a/libgomp/hsaokra.c b/libgomp/hsaokra.c
deleted file mode 100644
index c41b86b..0000000
--- a/libgomp/hsaokra.c
+++ /dev/null
@@ -1,177 +0,0 @@ 
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <dlfcn.h>
-#include "okra.h"
-
-typedef okra_status_t (*okra_get_context_func_t)(okra_context_t**);
-typedef okra_status_t (*okra_kernel_create_from_binary_func_t)(okra_context_t *, const char *, size_t , const char *, okra_kernel_t **);
-typedef okra_status_t (*okra_push_pointer_func_t)(okra_kernel_t* , void* );
-typedef okra_status_t (*okra_execute_kernel_func_t)(okra_context_t*, okra_kernel_t* , okra_range_t* );
-typedef okra_status_t (*okra_clear_args_func_t)(okra_kernel_t* );
-typedef okra_status_t (*okra_dispose_kernel_func_t)(okra_kernel_t*);
-
-static void *okralib;
-static okra_get_context_func_t     _okra_get_context;
-static okra_kernel_create_from_binary_func_t   _okra_kernel_create_from_binary;
-static okra_push_pointer_func_t    _okra_push_pointer;
-static okra_execute_kernel_func_t  _okra_execute_kernel;
-static okra_clear_args_func_t      _okra_clear_args;
-
-/* Returns false on error.  */
-static bool
-loadokra (void)
-{
-  if (okralib)
-    return okralib != (void*) -1;
-  okralib = dlopen("libokra_x86_64.so", RTLD_LAZY);
-  if (!okralib)
-    {
-      okralib = (void*) -1;
-      fprintf(stderr, "Unable to load libokra_x86_64.so\n");
-      return false;
-    }
-   _okra_get_context         = (okra_get_context_func_t) dlsym(okralib, "okra_get_context");
-   _okra_kernel_create_from_binary = (okra_kernel_create_from_binary_func_t)dlsym(okralib, "okra_create_kernel_from_binary");
-   _okra_push_pointer        = (okra_push_pointer_func_t) dlsym(okralib, "okra_push_pointer");
-   _okra_execute_kernel      = (okra_execute_kernel_func_t) dlsym(okralib, "okra_execute_kernel");
-   _okra_clear_args          = (okra_clear_args_func_t) dlsym(okralib, "okra_clear_args");
-
-   if (!_okra_get_context
-       || !_okra_kernel_create_from_binary
-       || !_okra_push_pointer
-       || !_okra_execute_kernel
-       || !_okra_clear_args)
-     {
-       dlclose (okralib);
-       okralib = (void*) -1;
-       fprintf (stderr, "Cannot find OKRA symbols\n");
-       return false;
-     }
-
-   return true;
-}
-
-typedef union __hsa_kernelarg_
-{
-  void* addr;             ///< pointer to a buffer
-  int32_t s32value;       ///< signed 32 bit value
-  uint32_t u32value;      ///< unsigned 32 bit value
-  float fvalue;           ///< float value
-  double dvalue;          ///< double value
-  int64_t s64value;       ///< signed 64 bit value
-  uint64_t u64value;      ///< unsigned 64 bit value
-} __hsa_kernelarg;
-
-typedef struct __hsa_launch_attrs_
-{
-  uint64_t flags;
-  uint64_t grid[3];
-  uint64_t group[3];
-} __hsa_launch_attrs;
-
-typedef struct __hsa_kernel_desc_
-{
-  const char *filename;
-  const char *name;
-  uint64_t nargs;
-  okra_context_t *kernel;
-  okra_context_t *context;
-} __hsa_kernel_desc;
-
-typedef okra_range_t __hsa_launch_range;
-
-void * __hsa_launch_kernel (__hsa_kernel_desc *, __hsa_launch_range *,
-			    __hsa_kernelarg *);
-
-void *
-__hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_range *range_p,
-		     __hsa_kernelarg *args)
-{
-  okra_status_t status;
-  okra_context_t *context;
-  okra_kernel_t *kernel;
-  okra_range_t range;
-  unsigned int i = 0;
-  static int debug = 0;
-
-  if (!loadokra ())
-    return NULL;
-
-  if (!debug)
-    {
-      if (getenv ("HSA_DEBUG"))
-	debug = 1;
-      else
-	debug = -1;
-    }
-
-  if (_kd->context)
-    context = _kd->context ;
-  else {
-    status = _okra_get_context(&context);
-    if (status != OKRA_SUCCESS)
-      {
-	fprintf (stderr, "Unable to create context\n");
-	return NULL;
-      }
-      _kd->context = context ;
-    }
-  if (_kd->kernel)
-    {
-      kernel = _kd->kernel;
-    }
-  else
-    {
-      size_t size = 1;
-      const char* pfile;
-      const char* fileName = _kd->filename;
-      if (_kd->filename[0] == 0)
-	fileName = "hsakernel.o";
-      pfile = (const char *) fopen (fileName, "rb");
-      if (!pfile)
-	{
-	  fprintf (stderr, "Unable to open file %s\n", fileName);
-	  return NULL;
-	}
-      status = _okra_kernel_create_from_binary(context, pfile, size, _kd->name, &kernel);
-      fclose((FILE *)pfile);
-      if (status != OKRA_SUCCESS)
-	{
-	  fprintf (stderr, "Unable to create Kernel\n");
-	  return NULL;
-	}
-      _kd->kernel = kernel;
-    }
-
-  _okra_clear_args (kernel);
-  for (; i < _kd->nargs; i++) {
-      void *cur_args = args[i].addr;
-      _okra_push_pointer (kernel, cur_args);
-  }
-  /* set launch dimensions */
-  range.dimension = 1;
-  range.global_size[0] = 256;
-  range.group_size[0] = 16;
-  if (!range_p)
-    {
-      range.dimension = 1;
-      range.global_size[0] = 256;
-      range.group_size[0] = 16;
-      range_p = &range;
-    }
-  if (debug > 0)
-    {
-      fprintf (stderr, "HSA: launching kernel %s\n", _kd->name);
-      fprintf (stderr, "dim: %u, s0: %u, g0: %u, r: %u\n", range_p->dimension,
-	       range_p->global_size[0], range_p->group_size[0],
-	       range_p->reserved);
-    }
-  status = _okra_execute_kernel (context, kernel, range_p);
-  if (status != OKRA_SUCCESS)
-    {
-      fprintf (stderr, "Failed to launch kernel\n");
-      return NULL;
-    }
-  return kernel;
-}
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 208132e..b8f86e1 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -236,4 +236,5 @@  GOMP_4.0.1 {
 HSA_1.0 {
   global:
 	__hsa_launch_kernel;
+	__hsa_register_image;
 } GOMP_4.0.1;
diff --git a/libgomp/okra.h b/libgomp/okra.h
deleted file mode 100644
index 24d370a..0000000
--- a/libgomp/okra.h
+++ /dev/null
@@ -1,147 +0,0 @@ 
-/*
-OKRA Runtime C interface
-*/
-
-#ifndef OKRA_H
-#define OKRA_H
-
-#include <stdint.h>
-#include <stddef.h>
-
-#if defined (_WIN32)
-   #ifndef __EXPORT__
-      #define OKRA_API __declspec(dllimport)
-   #else
-      #define OKRA_API __declspec(dllexport)
-   #endif
-#else
-   #define OKRA_API 
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-//opaque okra context
-typedef uint64_t okra_context_t;
-
-//opaque okra kernel
-typedef uint64_t okra_kernel_t;
-
-//launch attributes that defines execution range
-typedef struct okra_range_s
-{
-  uint32_t dimension;        //max value is 3
-  uint32_t global_size[3];
-  uint32_t group_size[3];
-  uint32_t reserved;         //For future use
-} okra_range_t;
-
-
-//This is the list of errors that okra supports
-//@Note: Will add more error codes as needed
-typedef enum okra_status_t {
-   OKRA_SUCCESS=0,
-   OKRA_CONTEXT_NO_DEVICE_FOUND,
-   OKRA_CONTEXT_QUEUE_CREATION_FAILED,
-   OKRA_SET_MEMORY_POLICY_FAILED,
-   OKRA_MEMORY_REGISTRATION_FAILED,
-   OKRA_MEMORY_DEREGISTRATION_FAILED,
-   OKRA_CONTEXT_CREATE_FAILED,
-   OKRA_CONTEXT_ALREADY_EXIST,
-   OKRA_KERNEL_HSAIL_ASSEMBLING_FAILED,
-   OKRA_KERNEL_FINALIZE_FAILED,
-   OKRA_KERNEL_CREATE_FAILED,
-   OKRA_KERNEL_ELF_INITIALIZATION_FAILED,
-   OKRA_KERNEL_INVALID_ELF_CONTAINER,
-   OKRA_KERNEL_INVALID_SECTION_HEADER,
-   OKRA_KERNEL_MISSING_STRING_SECTION,
-   OKRA_KERNEL_MISSING_DIRECTIVE_SECTION,
-   OKRA_KERNEL_MISSING_CODE_SECTION,
-   OKRA_KERNEL_MISSING_OPERANDS_SECTION,
-   OKRA_KERNEL_MISSING_DEBUG_SECTION,
-   OKRA_LOAD_BRIG_FAILED,
-   OKRA_UNLOAD_BRIG_FAILED,
-   OKRA_KERNEL_CREATE_FROM_BINARY_FAILED,
-   OKRA_KERNEL_PUSH_KERNARG_FAILED,
-   OKRA_KERNEL_CLEARARG_FAILED,
-   OKRA_RANGE_INVALID_DIMENSION,
-   OKRA_RANGE_INVALID_GLOBAL_SIZE,
-   OKRA_RANGE_INVALID_GROUP_SIZE,
-   OKRA_EXECUTE_FAILED,
-   OKRA_DISPOSE_FAILED,
-   OKRA_INVALID_ARGUMENT,
-   OKRA_UNKNOWN
-}okra_status_t;
-
-//Get a okra context - does device detection, command queue creation internally
-//Note context is singleton at the moment - may change later if requirement
-//changes
-//This means you have one context, device and queue per process, but sufficient
-//for most common cases
-okra_status_t OKRA_API okra_get_context(okra_context_t** context);
-
-//create kernel that can be dispatched - takes in hsail text as input and creates
-// a kernel - does HSAIL assembling and finalize
-okra_status_t OKRA_API okra_create_kernel(okra_context_t* context, 
-                        const char *hsail_source, const char *entryName, 
-                        okra_kernel_t **kernel);
-
-//create kernel that can be dispatched - takes in binary as input and creates a
-//kernel
-okra_status_t OKRA_API okra_create_kernel_from_binary(okra_context_t *context, 
-                        const char *binary, size_t size, const char *entryName,
-                        okra_kernel_t **kernel);
-
-//Following are set of apis to push kernel args to the kernel
-//for pointers and objects
-okra_status_t OKRA_API okra_push_pointer(okra_kernel_t* kernel, 
-                        void* address);
-
-//unsigned char is equivalent for jboolean-use this for passing boolean 
-//if using from java world
-okra_status_t OKRA_API okra_push_boolean(okra_kernel_t* kernel, 
-                        unsigned char value);
-
-//char is equivalent for jbyte-use this for passing byte if using
-//from java world
-okra_status_t OKRA_API okra_push_byte(okra_kernel_t* kernel, 
-                        char value);   
-
-//use this if passing jdouble from java world
-okra_status_t OKRA_API okra_push_double(okra_kernel_t* kernel, 
-                        double value);
-
-//use this if passing jfloat from java world
-okra_status_t OKRA_API okra_push_float(okra_kernel_t* kernel, 
-                        float value);
-
-//use this if passing jint from java world
-okra_status_t OKRA_API okra_push_int(okra_kernel_t* kernel, 
-                        int value);
-
-//use this if passing jlong from java world
-okra_status_t OKRA_API okra_push_long(okra_kernel_t* kernel, 
-                        long value);
-
-// Call clearargs between executions of a kernel before setting the new args
-okra_status_t OKRA_API okra_clear_args(okra_kernel_t* kernel);
-//end of kernel arg related APIs
-
-//execute the kernel - takes kernel, execution range as input
-//This is a synchronous call - returns only after kernel completion
-//If the user pass 0's for group size, the runtime will choose one
-okra_status_t OKRA_API okra_execute_kernel(okra_context_t* context, okra_kernel_t* kernel, okra_range_t* range);
-
-//cleanup kernel
-okra_status_t OKRA_API okra_dispose_kernel(okra_kernel_t* kernel);
-
-//cleanup any resource allocated by okra context
-okra_status_t OKRA_API okra_dispose_context(okra_context_t* context);
-
-
-#ifdef __cplusplus
-} // end of extern "C"
-#endif
-
-#endif //OKRA_H