@@ -128,6 +128,7 @@ extern void GOMP_PLUGIN_target_rev (uint64_t, uint64_t, uint64_t, uint64_t,
/* Prototypes for functions implemented by libgomp plugins. */
extern const char *GOMP_OFFLOAD_get_name (void);
extern unsigned int GOMP_OFFLOAD_get_caps (void);
+extern unsigned int GOMP_OFFLOAD_get_dev_caps (int);
extern int GOMP_OFFLOAD_get_type (void);
extern int GOMP_OFFLOAD_get_num_devices (unsigned int);
extern bool GOMP_OFFLOAD_init_device (int);
@@ -1402,6 +1402,7 @@ struct gomp_device_descr
/* Function handlers. */
__typeof (GOMP_OFFLOAD_get_name) *get_name_func;
__typeof (GOMP_OFFLOAD_get_caps) *get_caps_func;
+ __typeof (GOMP_OFFLOAD_get_dev_caps) *get_dev_caps_func;
__typeof (GOMP_OFFLOAD_get_type) *get_type_func;
__typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func;
__typeof (GOMP_OFFLOAD_init_device) *init_device_func;
@@ -3321,9 +3321,43 @@ GOMP_OFFLOAD_get_name (void)
unsigned int
GOMP_OFFLOAD_get_caps (void)
{
- /* FIXME: Enable shared memory for APU, but not discrete GPU. */
- return /*GOMP_OFFLOAD_CAP_SHARED_MEM |*/ GOMP_OFFLOAD_CAP_OPENMP_400
- | GOMP_OFFLOAD_CAP_OPENACC_200;
+ return GOMP_OFFLOAD_CAP_OPENMP_400 | GOMP_OFFLOAD_CAP_OPENACC_200;
+}
+
+/* Return any capabilities that are specific to one device only. */
+
+unsigned int
+GOMP_OFFLOAD_get_dev_caps (int n)
+{
+ /* The device agents have been enumerated, but might not have been
+ initialized, so get_agent_info won't work here. */
+ struct agent_info *agent = &hsa_context.agents[n];
+
+ char name[64];
+ hsa_status_t status = hsa_fns.hsa_agent_get_info_fn (agent->id,
+ HSA_AGENT_INFO_NAME,
+ &name);
+ if (status != HSA_STATUS_SUCCESS)
+ return 0;
+
+ gcn_isa device_isa = isa_code (name);
+ unsigned int caps = 0;
+
+ /* APU devices might have shared memory.
+ Don't add devices to this check if they support shared memory
+ via XNACK and page migration! */
+ if (device_isa == EF_AMDGPU_MACH_AMDGCN_GFX1036 /* Expect "yes". */
+ || device_isa == EF_AMDGPU_MACH_AMDGCN_GFX1103 /* Observed "no". */)
+ {
+ bool b;
+ hsa_system_info_t type = HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT;
+ status = hsa_fns.hsa_system_get_info_fn (type, &b);
+ if (status == HSA_STATUS_SUCCESS
+ && b)
+ caps |= GOMP_OFFLOAD_CAP_SHARED_MEM;
+ }
+
+ return caps;
}
/* Identify as GCN accelerator. */
@@ -1185,6 +1185,22 @@ GOMP_OFFLOAD_get_caps (void)
return GOMP_OFFLOAD_CAP_OPENACC_200 | GOMP_OFFLOAD_CAP_OPENMP_400;
}
+unsigned int
+GOMP_OFFLOAD_get_dev_caps (int n)
+{
+ unsigned int caps = 0;
+
+ /* APU devices might share memory with the host system. */
+ int pi;
+ CUresult r;
+ r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_INTEGRATED, n);
+ if (r == CUDA_SUCCESS && pi != 0)
+ caps |= GOMP_OFFLOAD_CAP_SHARED_MEM;
+
+ return caps;
+}
+
int
GOMP_OFFLOAD_get_type (void)
{
@@ -5285,6 +5285,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (get_name);
DLSYM (get_caps);
+ DLSYM_OPT (get_dev_caps, get_dev_caps);
DLSYM (get_type);
DLSYM (get_num_devices);
DLSYM (init_device);
@@ -5456,11 +5457,6 @@ gomp_target_init (void)
{
/* Augment DEVICES and NUM_DEVICES. */
- /* If USM has been requested and is supported by all devices
- of this type, set the capability accordingly. */
- if (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
- current_device.capabilities |= GOMP_OFFLOAD_CAP_SHARED_MEM;
-
devs = realloc (devs, (num_devs + new_num_devs)
* sizeof (struct gomp_device_descr));
if (!devs)
@@ -5479,6 +5475,9 @@ gomp_target_init (void)
for (i = 0; i < new_num_devs; i++)
{
current_device.target_id = i;
+ if (current_device.get_dev_caps_func)
+ current_device.capabilities
+ |= current_device.get_dev_caps_func (i);
devs[num_devs] = current_device;
gomp_mutex_init (&devs[num_devs].lock);
num_devs++;