2015-07-14 Nathan Sidwell <nathan@codesourcery.com>
gcc/
* config/i386/intelmic-mkoffload.c (generate_host_descr_file):
Constify target data. Adjust calls to GOMP_offload_register and
GOMP_offload_unregister.
* config/nvptx/mkoffload.c (process): Constify target_data.
Adjust call to GOMP_offload_register. Emit destructor to unload
image.
libgomp/
* libgomp.h (gomp_device_descr): Add device data argument to
load_image_func and unload_image_func.
(gomp_unload_device): Declare.
* target.c (struct offload_image_descr): Constify target_data.
(gomp_offload_image_to_device): Rename to ...
(gomp_load_image_to_device): ... here. Constify target_data.
Add ability for plugin to stash device-speficic load data.
Allocate single memory slot to record plugin's device data.
(gomp_unload_image_from_device): New, broken out of
GOMP_offloat_unregister. Adjust plugin unloading call.
(GOMP_offload_register): Constify target data. Adjust
gomp_load_image_to_device.
(GOMP_offload_unregister): Constify target_data. Move unloading
to gomp_unload_image_from_device.
(gomp_init_device): Call gomp_load_image_to_device.
(gomp_unload_decice): New.
* oacc-init.c (acc_shutdown_1): Call gomp_unload_device before
removing maps..
* plugin/plugin-host.c (GOMP_OFFLOAD_load_image): Adjust
arguments.
(GOMP_OFFLOAD_unload_image): Likewise.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Adjust
arguments, set device data.
(GOMP_OFFLOAD_unload_image): Adjust arguments. Free device_data.
===================================================================
@@ -277,7 +277,7 @@ process (FILE *in, FILE *out)
fprintf (out, "};\n\n");
fprintf (out,
- "static struct nvptx_tdata {\n"
+ "static const struct nvptx_tdata {\n"
" const char *ptx_src;\n"
" __SIZE_TYPE__ ptx_len;\n"
" const char *const *var_names;\n"
@@ -292,22 +292,32 @@ process (FILE *in, FILE *out)
" sizeof (func_mappings) / sizeof (func_mappings[0])\n"
"};\n\n");
- fprintf (out, "#ifdef __cplusplus\n");
- fprintf (out, "extern \"C\" {\n");
- fprintf (out, "#endif\n");
-
- fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
-
- fprintf (out, "#ifdef __cplusplus\n");
- fprintf (out, "}\n");
- fprintf (out, "#endif\n");
+ fprintf (out, "#ifdef __cplusplus\n"
+ "extern \"C\" {\n"
+ "#endif\n");
+
+ fprintf (out, "extern void GOMP_offload_register"
+ " (const void *, int, const void *);\n"
+ "extern void GOMP_offload_unregister"
+ " (const void *, int, const void *);\n");
+
+ fprintf (out, "#ifdef __cplusplus\n"
+ "}\n"
+ "#endif\n");
fprintf (out, "extern void *__OFFLOAD_TABLE__[];\n\n");
- fprintf (out, "static __attribute__((constructor)) void init (void)\n{\n");
- fprintf (out, " GOMP_offload_register (__OFFLOAD_TABLE__, %d,\n",
- GOMP_DEVICE_NVIDIA_PTX);
- fprintf (out, " &target_data);\n");
- fprintf (out, "};\n");
+
+ fprintf (out, "static __attribute__((constructor)) void init (void)\n"
+ "{\n"
+ " GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+ " &target_data);\n"
+ "};\n", GOMP_DEVICE_NVIDIA_PTX);
+
+ fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
+ "{\n"
+ " GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+ " &target_data);\n"
+ "};\n", GOMP_DEVICE_NVIDIA_PTX);
}
static void
===================================================================
@@ -342,7 +342,7 @@ generate_host_descr_file (const char *ho
"extern void *__offload_image_intelmic_start;\n"
"extern void *__offload_image_intelmic_end;\n\n"
- "static const void *__offload_target_data[] = {\n"
+ "static const void *const __offload_target_data[] = {\n"
" &__offload_image_intelmic_start, &__offload_image_intelmic_end\n"
"};\n\n");
@@ -350,11 +350,11 @@ generate_host_descr_file (const char *ho
"#ifdef __cplusplus\n"
"extern \"C\"\n"
"#endif\n"
- "void GOMP_offload_register (void *, int, void *);\n"
+ "void GOMP_offload_register (void *, int, const void *);\n"
+ "void GOMP_offload_unregister (void *, int, void const *);\n"
"#ifdef __cplusplus\n"
"extern \"C\"\n"
"#endif\n"
- "void GOMP_offload_unregister (void *, int, void *);\n\n"
"__attribute__((constructor))\n"
"static void\n"
===================================================================
@@ -254,6 +254,18 @@ acc_shutdown_1 (acc_device_t d)
goacc_deallocate_static (d);
+ ndevs = base_dev->get_num_devices_func ();
+
+ /* Unload all the devices of this type that have been opened. */
+ for (i = 0; i < ndevs; i++)
+ {
+ struct gomp_device_descr *acc_dev = &base_dev[i];
+
+ gomp_mutex_lock (&acc_dev->lock);
+ gomp_unload_device (acc_dev);
+ gomp_mutex_unlock (&acc_dev->lock);
+ }
+
gomp_mutex_lock (&goacc_thread_lock);
/* Free target-specific TLS data and close all devices. */
@@ -292,7 +304,6 @@ acc_shutdown_1 (acc_device_t d)
gomp_mutex_unlock (&goacc_thread_lock);
- ndevs = base_dev->get_num_devices_func ();
/* Close all the devices of this type that have been opened. */
for (i = 0; i < ndevs; i++)
===================================================================
@@ -58,7 +58,7 @@ static gomp_mutex_t register_lock;
struct offload_image_descr {
enum offload_target_type type;
void *host_table;
- void *target_data;
+ const void *target_data;
};
/* Array of descriptors of offload images. */
@@ -647,12 +647,13 @@ gomp_update (struct gomp_device_descr *d
/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
And insert to splay tree the mapping between addresses from HOST_TABLE and
- from loaded target image. */
+ from loaded target image. We rely in the host and device compiler
+ emitting variable and functions in the same order. */
static void
-gomp_offload_image_to_device (struct gomp_device_descr *devicep,
- void *host_table, void *target_data,
- bool is_register_lock)
+gomp_load_image_to_device (struct gomp_device_descr *devicep,
+ void *host_table, const void *target_data,
+ bool is_register_lock)
{
void **host_func_table = ((void ***) host_table)[0];
void **host_funcs_end = ((void ***) host_table)[1];
@@ -666,8 +667,10 @@ gomp_offload_image_to_device (struct gom
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
+ void *dev_data = NULL;
int i, num_target_entries
- = devicep->load_image_func (devicep->target_id, target_data, &target_table);
+ = devicep->load_image_func (devicep->target_id, target_data,
+ &target_table, &dev_data);
if (num_target_entries != num_funcs + num_vars)
{
@@ -679,16 +682,34 @@ gomp_offload_image_to_device (struct gom
/* Insert host-target address mapping into splay tree. */
struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
- tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
+ tgt->array = gomp_malloc ((num_funcs + num_vars + 1) * sizeof (*tgt->array));
tgt->refcount = 1;
tgt->tgt_start = 0;
tgt->tgt_end = 0;
- tgt->to_free = NULL;
+ tgt->to_free = dev_data;
tgt->prev = NULL;
tgt->list_count = 0;
tgt->device_descr = devicep;
splay_tree_node array = tgt->array;
+ /* Insert target_data as a mapping to NULL so we can find tgt again. */
+ {
+ splay_tree_key k = &array->key;
+
+ k->host_start = (uintptr_t) target_data;
+ k->host_end = k->host_start + 1;
+ k->tgt = tgt;
+ k->tgt_offset = 0;
+ k->refcount = 1;
+ k->async_refcount = 0;
+ k->copy_from = false;
+ k->dealloc_host = false;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ array++;
+ }
+
for (i = 0; i < num_funcs; i++)
{
splay_tree_key k = &array->key;
@@ -736,13 +757,65 @@ gomp_offload_image_to_device (struct gom
free (target_table);
}
+/* Unload the mappings described by target_data from device DEVICE_P.
+ The device must be locked. */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+ void *host_table, const void *target_data)
+{
+ void **host_func_table = ((void ***) host_table)[0];
+ void **host_funcs_end = ((void ***) host_table)[1];
+ void **host_var_table = ((void ***) host_table)[2];
+ void **host_vars_end = ((void ***) host_table)[3];
+
+ /* The func table contains only addresses, the var table contains addresses
+ and corresponding sizes. */
+ int num_funcs = host_funcs_end - host_func_table;
+ int num_vars = (host_vars_end - host_var_table) / 2;
+
+ unsigned j;
+ void *dev_data = NULL;
+ struct splay_tree_key_s k;
+ splay_tree_key node = NULL;
+
+ /* Locate any dev_data */
+ k.host_start = (uintptr_t) target_data;
+ k.host_end = k.host_start + 1;
+ node = splay_tree_lookup (&devicep->mem_map, &k);
+ splay_tree_remove (&devicep->mem_map, &k);
+
+ dev_data = node->tgt->to_free;
+ node->tgt->to_free = NULL;
+
+ devicep->unload_image_func (devicep->target_id, target_data, dev_data);
+
+ /* Remove mappings from splay tree. */
+ for (j = 0; j < num_funcs; j++)
+ {
+ k.host_start = (uintptr_t) host_func_table[j];
+ k.host_end = k.host_start + 1;
+ splay_tree_remove (&devicep->mem_map, &k);
+ }
+
+ for (j = 0; j < num_vars; j++)
+ {
+ k.host_start = (uintptr_t) host_var_table[j * 2];
+ k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+ splay_tree_remove (&devicep->mem_map, &k);
+ }
+
+ free (node->tgt);
+ free (node);
+}
+
/* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
void
GOMP_offload_register (void *host_table, enum offload_target_type target_type,
- void *target_data)
+ const void *target_data)
{
int i;
gomp_mutex_lock (®ister_lock);
@@ -753,7 +826,7 @@ GOMP_offload_register (void *host_table,
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == target_type && devicep->is_initialized)
- gomp_offload_image_to_device (devicep, host_table, target_data, true);
+ gomp_load_image_to_device (devicep, host_table, target_data, true);
gomp_mutex_unlock (&devicep->lock);
}
@@ -775,72 +848,21 @@ GOMP_offload_register (void *host_table,
the target, and TARGET_DATA needed by target plugin. */
void
-GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
- void *target_data)
+GOMP_offload_unregister (void *host_table,
+ enum offload_target_type target_type,
+ const void *target_data)
{
- void **host_func_table = ((void ***) host_table)[0];
- void **host_funcs_end = ((void ***) host_table)[1];
- void **host_var_table = ((void ***) host_table)[2];
- void **host_vars_end = ((void ***) host_table)[3];
int i;
- /* The func table contains only addresses, the var table contains addresses
- and corresponding sizes. */
- int num_funcs = host_funcs_end - host_func_table;
- int num_vars = (host_vars_end - host_var_table) / 2;
-
gomp_mutex_lock (®ister_lock);
/* Unload image from all initialized devices. */
for (i = 0; i < num_devices; i++)
{
- int j;
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
- if (devicep->type != target_type || !devicep->is_initialized)
- {
- gomp_mutex_unlock (&devicep->lock);
- continue;
- }
-
- devicep->unload_image_func (devicep->target_id, target_data);
-
- /* Remove mapping from splay tree. */
- struct splay_tree_key_s k;
- splay_tree_key node = NULL;
- if (num_funcs > 0)
- {
- k.host_start = (uintptr_t) host_func_table[0];
- k.host_end = k.host_start + 1;
- node = splay_tree_lookup (&devicep->mem_map, &k);
- }
- else if (num_vars > 0)
- {
- k.host_start = (uintptr_t) host_var_table[0];
- k.host_end = k.host_start + (uintptr_t) host_var_table[1];
- node = splay_tree_lookup (&devicep->mem_map, &k);
- }
-
- for (j = 0; j < num_funcs; j++)
- {
- k.host_start = (uintptr_t) host_func_table[j];
- k.host_end = k.host_start + 1;
- splay_tree_remove (&devicep->mem_map, &k);
- }
-
- for (j = 0; j < num_vars; j++)
- {
- k.host_start = (uintptr_t) host_var_table[j * 2];
- k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
- splay_tree_remove (&devicep->mem_map, &k);
- }
-
- if (node)
- {
- free (node->tgt);
- free (node);
- }
-
+ if (devicep->type == target_type && devicep->is_initialized)
+ gomp_unload_image_from_device(devicep, host_table, target_data);
gomp_mutex_unlock (&devicep->lock);
}
@@ -869,13 +891,31 @@ gomp_init_device (struct gomp_device_des
{
struct offload_image_descr *image = &offload_images[i];
if (image->type == devicep->type)
- gomp_offload_image_to_device (devicep, image->host_table,
- image->target_data, false);
+ gomp_load_image_to_device (devicep, image->host_table,
+ image->target_data, false);
}
devicep->is_initialized = true;
}
+attribute_hidden void
+gomp_unload_device (struct gomp_device_descr *devicep)
+{
+ if (devicep->is_initialized)
+ {
+ unsigned i;
+
+ /* Unload from device all images registered at the moment. */
+ for (i = 0; i < num_offload_images; i++)
+ {
+ struct offload_image_descr *image = &offload_images[i];
+ if (image->type == devicep->type)
+ gomp_unload_image_from_device (devicep, image->host_table,
+ image->target_data);
+ }
+ }
+}
+
/* Free address mapping tables. MM must be locked on entry, and remains locked
on return. */
===================================================================
@@ -750,8 +750,8 @@ struct gomp_device_descr
int (*get_num_devices_func) (void);
void (*init_device_func) (int);
void (*fini_device_func) (int);
- int (*load_image_func) (int, void *, struct addr_pair **);
- void (*unload_image_func) (int, void *);
+ int (*load_image_func) (int, const void *, struct addr_pair **, void **);
+ void (*unload_image_func) (int, const void *, void *);
void *(*alloc_func) (int, size_t);
void (*free_func) (int, void *);
void *(*dev2host_func) (int, void *, const void *, size_t);
@@ -784,6 +784,7 @@ extern void gomp_unmap_vars (struct targ
extern void gomp_init_device (struct gomp_device_descr *);
extern void gomp_free_memmap (struct splay_tree_s *);
extern void gomp_fini_device (struct gomp_device_descr *);
+extern void gomp_unload_device (struct gomp_device_descr *);
/* work.c */
===================================================================
@@ -111,15 +111,17 @@ GOMP_OFFLOAD_fini_device (int n __attrib
STATIC int
GOMP_OFFLOAD_load_image (int n __attribute__ ((unused)),
- void *i __attribute__ ((unused)),
- struct addr_pair **r __attribute__ ((unused)))
+ void const *t __attribute__ ((unused)),
+ struct addr_pair **r __attribute__ ((unused)),
+ void **d __attribute__ ((unused)))
{
return 0;
}
STATIC void
GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)),
- void *i __attribute__ ((unused)))
+ void const *t __attribute__ ((unused)),
+ void *d __attribute__ ((unused)))
{
}
===================================================================
@@ -1587,6 +1587,9 @@ GOMP_OFFLOAD_fini_device (int n)
pthread_mutex_unlock (&ptx_dev_lock);
}
+/* Structure created by mkoffload to describe device variables and
+ functions in a translation unit. */
+
typedef struct nvptx_tdata
{
const char *ptx_src;
@@ -1599,9 +1602,14 @@ typedef struct nvptx_tdata
size_t fn_num;
} nvptx_tdata_t;
+/* Load the (partial) program described by TARGET_DATA. Allocate and
+ return TARGET_TABLE. DEV_DATA is used to store the function
+ descriptors, and used when unloading the image. */
+
int
GOMP_OFFLOAD_load_image (int ord, void *target_data,
- struct addr_pair **target_table)
+ struct addr_pair **target_table,
+ void **dev_data)
{
CUmodule module;
const char *const *fn_names, *const *var_names;
@@ -1668,15 +1676,19 @@ GOMP_OFFLOAD_load_image (int ord, void *
(*target_table)[i].end = (*target_table)[i].start + bytes;
}
+ *dev_data = targ_fns;
return i;
}
+/* Unload the program described by TARGET_DATA. DEV_DATA is the
+ function descriptors allocated by G_O_load_image. */
+
void
-GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)), void *target_data)
+GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)),
+ const void *target_data, void *dev_data)
{
- void **img_header = (void **) target_data;
struct targ_fn_descriptor *targ_fns
- = (struct targ_fn_descriptor *) img_header[0];
+ = (struct targ_fn_descriptor *) dev_data;
struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
free (targ_fns);