@@ -60,7 +60,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c
+ time.c fortran.c affinity.c target.c
nodist_noinst_HEADERS = libgomp_f.h
nodist_libsubinclude_HEADERS = omp.h
@@ -96,7 +96,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
parallel.lo sections.lo single.lo task.lo team.lo work.lo \
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
- fortran.lo affinity.lo
+ fortran.lo affinity.lo target.lo
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -317,7 +317,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c
+ time.c fortran.c affinity.c target.c
nodist_noinst_HEADERS = libgomp_f.h
nodist_libsubinclude_HEADERS = omp.h
@@ -474,6 +474,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
@@ -39,3 +39,15 @@ GOMP_barrier (void)
gomp_team_barrier_wait (&team->barrier);
}
+
+bool
+GOMP_barrier_cancel (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ /* The compiler transforms to barrier_cancel when it sees that the
+ barrier is within a construct that can cancel. Thus we should
+ never have an orphaned cancellable barrier. */
+ return gomp_team_barrier_wait_cancel (&team->barrier);
+}
@@ -29,90 +29,327 @@
#endif
#include "libgomp.h"
#include "proc.h"
+#include <errno.h>
#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
#include <unistd.h>
#ifdef HAVE_PTHREAD_AFFINITY_NP
-static unsigned int affinity_counter;
+#ifndef CPU_ALLOC_SIZE
+#define CPU_ISSET_S(idx, size, set) CPU_ISSET(idx, set)
+#define CPU_ZERO_S(size, set) CPU_ZERO(set)
+#define CPU_SET_S(idx, size, set) CPU_SET(idx, set)
+#define CPU_CLR_S(idx, size, set) CPU_CLR(idx, set)
+#endif
void
gomp_init_affinity (void)
{
- cpu_set_t cpuset, cpusetnew;
- size_t idx, widx;
- unsigned long cpus = 0;
-
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset))
- {
- gomp_error ("could not get CPU affinity set");
- free (gomp_cpu_affinity);
- gomp_cpu_affinity = NULL;
- gomp_cpu_affinity_len = 0;
- return;
- }
-
- CPU_ZERO (&cpusetnew);
- if (gomp_cpu_affinity_len == 0)
- {
- unsigned long count = gomp_cpuset_popcount (&cpuset);
- if (count >= 65536)
- count = 65536;
- gomp_cpu_affinity = malloc (count * sizeof (unsigned short));
- if (gomp_cpu_affinity == NULL)
+ if (gomp_places_list == NULL)
+ {
+ if (!gomp_affinity_init_level (1, ULONG_MAX, true))
+ return;
+ }
+
+ struct gomp_thread *thr = gomp_thread ();
+ pthread_setaffinity_np (pthread_self (), gomp_cpuset_size,
+ (cpu_set_t *) gomp_places_list[0]);
+ thr->place = 1;
+ thr->ts.place_partition_off = 0;
+ thr->ts.place_partition_len = gomp_places_list_len;
+}
+
+void
+gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
+{
+ pthread_attr_setaffinity_np (attr, gomp_cpuset_size,
+ (cpu_set_t *) gomp_places_list[place]);
+}
+
+void **
+gomp_affinity_alloc (unsigned long count, bool quiet)
+{
+ unsigned long i;
+ void **ret;
+ char *p;
+
+ if (gomp_cpusetp == NULL)
+ {
+ if (!quiet)
+ gomp_error ("Could not get CPU affinity set");
+ return NULL;
+ }
+
+ ret = malloc (count * sizeof (void *) + count * gomp_cpuset_size);
+ if (ret == NULL)
+ {
+ if (!quiet)
+ gomp_error ("Out of memory trying to allocate places list");
+ return NULL;
+ }
+
+ p = (char *) (ret + count);
+ for (i = 0; i < count; i++, p += gomp_cpuset_size)
+ ret[i] = p;
+ return ret;
+}
+
+void
+gomp_affinity_init_place (void *p)
+{
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
+ CPU_ZERO_S (gomp_cpuset_size, cpusetp);
+}
+
+bool
+gomp_affinity_add_cpus (void *p, unsigned long num,
+ unsigned long len, long stride, bool quiet)
+{
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
+ unsigned long max = 8 * gomp_cpuset_size;
+ for (;;)
+ {
+ if (num >= max)
{
- gomp_error ("not enough memory to store CPU affinity list");
- return;
+ if (!quiet)
+ gomp_error ("Logical CPU number %lu out of range", num);
+ return false;
}
- for (widx = idx = 0; widx < count && idx < 65536; idx++)
- if (CPU_ISSET (idx, &cpuset))
+ CPU_SET_S (num, gomp_cpuset_size, cpusetp);
+ if (--len == 0)
+ return true;
+ if ((stride < 0 && num + stride > num)
+ || (stride > 0 && num + stride < num))
+ {
+ if (!quiet)
+ gomp_error ("Logical CPU number %lu+%ld out of range",
+ num, stride);
+ return false;
+ }
+ num += stride;
+ }
+}
+
+bool
+gomp_affinity_remove_cpu (void *p, unsigned long num)
+{
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
+ if (num >= 8 * gomp_cpuset_size)
+ {
+ gomp_error ("Logical CPU number %lu out of range", num);
+ return false;
+ }
+ if (!CPU_ISSET_S (num, gomp_cpuset_size, cpusetp))
+ {
+ gomp_error ("Logical CPU %lu to be removed is not in the set", num);
+ return false;
+ }
+ CPU_CLR_S (num, gomp_cpuset_size, cpusetp);
+ return true;
+}
+
+bool
+gomp_affinity_copy_place (void *p, void *q, long stride)
+{
+ unsigned long i, max = 8 * gomp_cpuset_size;
+ cpu_set_t *destp = (cpu_set_t *) p;
+ cpu_set_t *srcp = (cpu_set_t *) q;
+
+ CPU_ZERO_S (gomp_cpuset_size, destp);
+ for (i = 0; i < max; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, srcp))
+ {
+ if ((stride < 0 && i + stride > i)
+ || (stride > 0 && (i + stride < i || i + stride >= max)))
+ {
+ gomp_error ("Logical CPU number %lu+%ld out of range", i, stride);
+ return false;
+ }
+ CPU_SET_S (i + stride, gomp_cpuset_size, destp);
+ }
+ return true;
+}
+
+bool
+gomp_affinity_same_place (void *p, void *q)
+{
+#ifdef CPU_EQUAL_S
+ return CPU_EQUAL_S (gomp_cpuset_size, (cpu_set_t *) p, (cpu_set_t *) q);
+#else
+ return memcmp (p, q, gomp_cpuset_size) == 0;
+#endif
+}
+
+bool
+gomp_affinity_finalize_place_list (bool quiet)
+{
+ unsigned long i, j;
+
+ for (i = 0, j = 0; i < gomp_places_list_len; i++)
+ {
+ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[i];
+ bool nonempty = false;
+#ifdef CPU_AND_S
+ CPU_AND_S (gomp_cpuset_size, cpusetp, cpusetp, gomp_cpusetp);
+ nonempty = gomp_cpuset_popcount (gomp_cpuset_size, cpusetp) != 0;
+#else
+ unsigned long k, max = gomp_cpuset_size / sizeof (cpusetp->__bits[0]);
+ for (k = 0; k < max; k++)
+ if ((cpusetp->__bits[k] &= gomp_cpusetp->__bits[k]) != 0)
+ nonempty = true;
+#endif
+ if (nonempty)
+ gomp_places_list[j++] = gomp_places_list[i];
+ }
+
+ if (j == 0)
+ {
+ if (!quiet)
+ gomp_error ("None of the places contain usable logical CPUs");
+ return false;
+ }
+ else if (j < gomp_places_list_len)
+ {
+ if (!quiet)
+ gomp_error ("Number of places reduced from %ld to %ld because some "
+ "places didn't contain any usable logical CPUs",
+ gomp_places_list_len, j);
+ gomp_places_list_len = j;
+ }
+ return true;
+}
+
+bool
+gomp_affinity_init_level (int level, unsigned long count, bool quiet)
+{
+ unsigned long i, max = 8 * gomp_cpuset_size;
+
+ if (gomp_cpusetp)
+ {
+ unsigned long maxcount
+ = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
+ if (count > maxcount)
+ count = maxcount;
+ }
+ gomp_places_list = gomp_affinity_alloc (count, quiet);
+ gomp_places_list_len = 0;
+ if (gomp_places_list == NULL)
+ return false;
+ /* SMT (threads). */
+ if (level == 1)
+ {
+ for (i = 0; i < max && gomp_places_list_len < count; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, gomp_cpusetp))
{
- cpus++;
- gomp_cpu_affinity[widx++] = idx;
+ gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
+ gomp_affinity_add_cpus (gomp_places_list[gomp_places_list_len],
+ i, 1, 0, true);
+ ++gomp_places_list_len;
}
+ return true;
}
else
- for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++)
- if (gomp_cpu_affinity[idx] < CPU_SETSIZE
- && CPU_ISSET (gomp_cpu_affinity[idx], &cpuset))
+ {
+ char name[sizeof ("/sys/devices/system/cpu/cpu/topology/"
+ "thread_siblings_list") + 3 * sizeof (unsigned long)];
+ size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1;
+ cpu_set_t *copy = gomp_alloca (gomp_cpuset_size);
+ FILE *f;
+ char *line = NULL;
+ size_t linelen = 0;
+
+ memcpy (name, "/sys/devices/system/cpu/cpu", prefix_len);
+ memcpy (copy, gomp_cpusetp, gomp_cpuset_size);
+ for (i = 0; i < max && gomp_places_list_len < count; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, copy))
+ {
+ sprintf (name + prefix_len, "%lu/topology/%s_siblings_list",
+ i, level == 2 ? "thread" : "core");
+ f = fopen (name, "r");
+ if (f != NULL)
+ {
+ if (getline (&line, &linelen, f) > 0)
+ {
+ char *p = line;
+ bool seen_i = false;
+ void *pl = gomp_places_list[gomp_places_list_len];
+ gomp_affinity_init_place (pl);
+ while (*p && *p != '\n')
+ {
+ unsigned long first, last;
+ errno = 0;
+ first = strtoul (p, &p, 10);
+ if (errno)
+ break;
+ last = first;
+ if (*p == '-')
+ {
+ errno = 0;
+ last = strtoul (p + 1, &p, 10);
+ if (errno || last < first)
+ break;
+ }
+ for (; first <= last; first++)
+ if (CPU_ISSET_S (first, gomp_cpuset_size, copy)
+ && gomp_affinity_add_cpus (pl, first, 1, 0,
+ true))
+ {
+ CPU_CLR_S (first, gomp_cpuset_size, copy);
+ if (first == i)
+ seen_i = true;
+ }
+ if (*p == ',')
+ ++p;
+ }
+ if (seen_i)
+ gomp_places_list_len++;
+ }
+ fclose (f);
+ }
+ }
+ if (gomp_places_list == 0)
{
- if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpusetnew))
- {
- cpus++;
- CPU_SET (gomp_cpu_affinity[idx], &cpusetnew);
- }
- gomp_cpu_affinity[widx++] = gomp_cpu_affinity[idx];
+ if (!quiet)
+ gomp_error ("Error reading %s topology",
+ level == 2 ? "core" : "socket");
+ free (gomp_places_list);
+ gomp_places_list = NULL;
+ return false;
}
-
- if (widx == 0)
- {
- gomp_error ("no CPUs left for affinity setting");
- free (gomp_cpu_affinity);
- gomp_cpu_affinity = NULL;
- gomp_cpu_affinity_len = 0;
- return;
- }
-
- gomp_cpu_affinity_len = widx;
- if (cpus < gomp_available_cpus)
- gomp_available_cpus = cpus;
- CPU_ZERO (&cpuset);
- CPU_SET (gomp_cpu_affinity[0], &cpuset);
- pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset);
- affinity_counter = 1;
+ return true;
+ }
+ return false;
}
void
-gomp_init_thread_affinity (pthread_attr_t *attr)
+gomp_affinity_print_place (void *p)
{
- unsigned int cpu;
- cpu_set_t cpuset;
+ unsigned long i, max = 8 * gomp_cpuset_size, len;
+ cpu_set_t *cpusetp = (cpu_set_t *) p;
+ bool notfirst = false;
- cpu = __atomic_fetch_add (&affinity_counter, 1, MEMMODEL_RELAXED);
- cpu %= gomp_cpu_affinity_len;
- CPU_ZERO (&cpuset);
- CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
- pthread_attr_setaffinity_np (attr, sizeof (cpu_set_t), &cpuset);
+ for (i = 0, len = 0; i < max; i++)
+ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
+ {
+ if (len == 0)
+ {
+ if (notfirst)
+ fputc (',', stderr);
+ notfirst = true;
+ fprintf (stderr, "%lu", i);
+ }
+ ++len;
+ }
+ else
+ {
+ if (len > 1)
+ fprintf (stderr, ":%lu", len);
+ len = 0;
+ }
+ if (len > 1)
+ fprintf (stderr, ":%lu", len);
}
#else
@@ -33,11 +33,11 @@
void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
- if (__builtin_expect ((state & 1) != 0, 0))
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
bar->awaited = bar->total;
- __atomic_store_n (&bar->generation, bar->generation + 4,
+ __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
MEMMODEL_RELEASE);
futex_wake ((int *) &bar->generation, INT_MAX);
}
@@ -66,7 +66,7 @@ void
gomp_barrier_wait_last (gomp_barrier_t *bar)
{
gomp_barrier_state_t state = gomp_barrier_wait_start (bar);
- if (state & 1)
+ if (state & BAR_WAS_LAST)
gomp_barrier_wait_end (bar, state);
}
@@ -81,40 +81,43 @@ gomp_team_barrier_wait_end (gomp_barrier
{
unsigned int generation, gen;
- if (__builtin_expect ((state & 1) != 0, 0))
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
{
/* Next time we'll be awaiting TOTAL threads again. */
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
if (__builtin_expect (team->task_count, 0))
{
gomp_barrier_handle_tasks (state);
- state &= ~1;
+ state &= ~BAR_WAS_LAST;
}
else
{
- __atomic_store_n (&bar->generation, state + 3, MEMMODEL_RELEASE);
+ state &= ~BAR_CANCELLED;
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
futex_wake ((int *) &bar->generation, INT_MAX);
return;
}
}
generation = state;
+ state &= ~BAR_CANCELLED;
do
{
do_wait ((int *) &bar->generation, generation);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
- if (__builtin_expect (gen & 1, 0))
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
{
gomp_barrier_handle_tasks (state);
gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
}
- if ((gen & 2) != 0)
- generation |= 2;
+ generation |= gen & BAR_WAITING_FOR_TASK;
}
- while (gen != state + 4);
+ while (gen != state + BAR_INCR);
}
void
@@ -122,3 +125,86 @@ gomp_team_barrier_wait (gomp_barrier_t *
{
gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}
+
+void
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+{
+ gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ bar->awaited_final = bar->total;
+ gomp_team_barrier_wait_end (bar, state);
+}
+
+bool
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+ gomp_barrier_state_t state)
+{
+ unsigned int generation, gen;
+
+ if (__builtin_expect (state & BAR_WAS_LAST, 0))
+ {
+ /* Next time we'll be awaiting TOTAL threads again. */
+ /* BAR_CANCELLED should never be set in state here, because
+ cancellation means that at least one of the threads has been
+ cancelled, thus on a cancellable barrier we should never see
+ all threads to arrive. */
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ bar->awaited = bar->total;
+ team->work_share_cancelled = 0;
+ if (__builtin_expect (team->task_count, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ state &= ~BAR_WAS_LAST;
+ }
+ else
+ {
+ state += BAR_INCR - BAR_WAS_LAST;
+ __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
+ futex_wake ((int *) &bar->generation, INT_MAX);
+ return false;
+ }
+ }
+
+ if (__builtin_expect (state & BAR_CANCELLED, 0))
+ return true;
+
+ generation = state;
+ do
+ {
+ do_wait ((int *) &bar->generation, generation);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (__builtin_expect (gen & BAR_CANCELLED, 0))
+ return true;
+ if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ }
+ generation |= gen & BAR_WAITING_FOR_TASK;
+ }
+ while (gen != state + BAR_INCR);
+
+ return false;
+}
+
+bool
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+{
+ return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
+}
+
+void
+gomp_team_barrier_cancel (struct gomp_team *team)
+{
+ gomp_mutex_lock (&team->task_lock);
+ if (team->barrier.generation & BAR_CANCELLED)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ team->barrier.generation |= BAR_CANCELLED;
+ gomp_mutex_unlock (&team->task_lock);
+ futex_wake ((int *) &team->barrier.generation, INT_MAX);
+}
@@ -38,13 +38,25 @@ typedef struct
unsigned total __attribute__((aligned (64)));
unsigned generation;
unsigned awaited __attribute__((aligned (64)));
+ unsigned awaited_final;
} gomp_barrier_t;
+
typedef unsigned int gomp_barrier_state_t;
+/* The generation field contains a counter in the high bits, with a few
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
+ share space because WAS_LAST is never stored back to generation. */
+#define BAR_TASK_PENDING 1
+#define BAR_WAS_LAST 1
+#define BAR_WAITING_FOR_TASK 2
+#define BAR_CANCELLED 4
+#define BAR_INCR 8
+
static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count)
{
bar->total = count;
bar->awaited = count;
+ bar->awaited_final = count;
bar->generation = 0;
}
@@ -62,27 +74,55 @@ extern void gomp_barrier_wait (gomp_barr
extern void gomp_barrier_wait_last (gomp_barrier_t *);
extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t);
extern void gomp_team_barrier_wait (gomp_barrier_t *);
+extern void gomp_team_barrier_wait_final (gomp_barrier_t *);
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
gomp_barrier_state_t);
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
+struct gomp_team;
+extern void gomp_team_barrier_cancel (struct gomp_team *);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
- unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) & ~3;
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ ret &= -BAR_INCR | BAR_CANCELLED;
/* A memory barrier is needed before exiting from the various forms
of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section
2.8.6 flush Construct, which says there is an implicit flush during
a barrier region. This is a convenient place to add the barrier,
so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */
- ret += __atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0;
+ if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
+{
+ return gomp_barrier_wait_start (bar);
+}
+
+/* This is like gomp_barrier_wait_start, except it decrements
+ bar->awaited_final rather than bar->awaited and should be used
+ for the gomp_team_end barrier only. */
+static inline gomp_barrier_state_t
+gomp_barrier_wait_final_start (gomp_barrier_t *bar)
+{
+ unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ ret &= -BAR_INCR | BAR_CANCELLED;
+ /* See above gomp_barrier_wait_start comment. */
+ if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_ACQ_REL) == 0)
+ ret |= BAR_WAS_LAST;
return ret;
}
static inline bool
gomp_barrier_last_thread (gomp_barrier_state_t state)
{
- return state & 1;
+ return state & BAR_WAS_LAST;
}
/* All the inlines below must be called with team->task_lock
@@ -91,31 +131,37 @@ gomp_barrier_last_thread (gomp_barrier_s
static inline void
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
{
- bar->generation |= 1;
+ bar->generation |= BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
{
- bar->generation &= ~1;
+ bar->generation &= ~BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
{
- bar->generation |= 2;
+ bar->generation |= BAR_WAITING_FOR_TASK;
}
static inline bool
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
{
- return (bar->generation & 2) != 0;
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
+}
+
+static inline bool
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
+{
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
}
static inline void
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
- bar->generation = (state & ~3) + 4;
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
#endif /* GOMP_BARRIER_H */
@@ -30,6 +30,7 @@
#endif
#include "libgomp.h"
#include "proc.h"
+#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#ifdef HAVE_GETLOADAVG
@@ -39,19 +40,28 @@
#endif
#ifdef HAVE_PTHREAD_AFFINITY_NP
+unsigned long gomp_cpuset_size;
+static unsigned long gomp_get_cpuset_size;
+cpu_set_t *gomp_cpusetp;
+
unsigned long
-gomp_cpuset_popcount (cpu_set_t *cpusetp)
+gomp_cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
{
-#ifdef CPU_COUNT
- /* glibc 2.6 and above provide a macro for this. */
- return CPU_COUNT (cpusetp);
+#ifdef CPU_COUNT_S
+ /* glibc 2.7 and above provide a macro for this. */
+ return CPU_COUNT_S (cpusetsize, cpusetp);
#else
+#ifdef CPU_COUNT
+ if (cpusetsize == sizeof (cpu_set_t))
+ /* glibc 2.6 and above provide a macro for this. */
+ return CPU_COUNT (cpusetp);
+#endif
size_t i;
unsigned long ret = 0;
- extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)];
+ extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)
+ ? 1 : -1];
- (void) check;
- for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++)
+ for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
{
unsigned long int mask = cpusetp->__bits[i];
if (mask == 0)
@@ -70,16 +80,63 @@ void
gomp_init_num_threads (void)
{
#ifdef HAVE_PTHREAD_AFFINITY_NP
- cpu_set_t cpuset;
+#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
+ gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
+ gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size);
+#else
+ gomp_cpuset_size = sizeof (cpu_set_t);
+#endif
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0)
+ gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size);
+ do
{
- /* Count only the CPUs this process can use. */
- gomp_global_icv.nthreads_var = gomp_cpuset_popcount (&cpuset);
- if (gomp_global_icv.nthreads_var == 0)
- gomp_global_icv.nthreads_var = 1;
- return;
+ int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+ gomp_cpusetp);
+ if (ret == 0)
+ {
+ unsigned long i;
+ /* Count only the CPUs this process can use. */
+ gomp_global_icv.nthreads_var
+ = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
+ if (gomp_global_icv.nthreads_var == 0)
+ break;
+ gomp_get_cpuset_size = gomp_cpuset_size;
+#ifdef CPU_ALLOC_SIZE
+ for (i = gomp_cpuset_size * 8; i; i--)
+ if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp))
+ break;
+ gomp_cpuset_size = CPU_ALLOC_SIZE (i);
+#endif
+ return;
+ }
+ if (ret != EINVAL)
+ break;
+#ifdef CPU_ALLOC_SIZE
+ if (gomp_cpuset_size < sizeof (cpu_set_t))
+ gomp_cpuset_size = sizeof (cpu_set_t);
+ else
+ gomp_cpuset_size = gomp_cpuset_size * 2;
+ if (gomp_cpuset_size < 8 * sizeof (cpu_set_t))
+ gomp_cpusetp
+ = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size);
+ else
+ {
+ /* Avoid gomp_fatal if too large memory allocation would be
+ requested, e.g. kernel returning EINVAL all the time. */
+ void *p = realloc (gomp_cpusetp, gomp_cpuset_size);
+ if (p == NULL)
+ break;
+ gomp_cpusetp = (cpu_set_t *) p;
+ }
+#else
+ break;
+#endif
}
+ while (1);
+ gomp_cpuset_size = 0;
+ gomp_global_icv.nthreads_var = 1;
+ free (gomp_cpusetp);
+ gomp_cpusetp = NULL;
#endif
#ifdef _SC_NPROCESSORS_ONLN
gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
@@ -90,15 +147,14 @@ static int
get_num_procs (void)
{
#ifdef HAVE_PTHREAD_AFFINITY_NP
- cpu_set_t cpuset;
-
- if (gomp_cpu_affinity == NULL)
+ if (gomp_places_list == NULL)
{
/* Count only the CPUs this process can use. */
- if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset),
- &cpuset) == 0)
+ if (gomp_cpusetp
+ && pthread_getaffinity_np (pthread_self (), gomp_get_cpuset_size,
+ gomp_cpusetp) == 0)
{
- int ret = gomp_cpuset_popcount (&cpuset);
+ int ret = gomp_cpuset_popcount (gomp_get_cpuset_size, gomp_cpusetp);
return ret != 0 ? ret : 1;
}
}
@@ -28,7 +28,10 @@
#include <sched.h>
#ifdef HAVE_PTHREAD_AFFINITY_NP
-extern unsigned long gomp_cpuset_popcount (cpu_set_t *);
+extern unsigned long gomp_cpuset_size attribute_hidden;
+extern cpu_set_t *gomp_cpusetp attribute_hidden;
+extern unsigned long gomp_cpuset_popcount (unsigned long, cpu_set_t *)
+ attribute_hidden;
#endif
#endif /* GOMP_PROC_H */
@@ -32,7 +32,84 @@ gomp_init_affinity (void)
}
void
-gomp_init_thread_affinity (pthread_attr_t *attr)
+gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place)
{
(void) attr;
+ (void) place;
+}
+
+void **
+gomp_affinity_alloc (unsigned long count, bool quiet)
+{
+ (void) count;
+ if (!quiet)
+ gomp_error ("Affinity not supported on this configuration");
+ return NULL;
+}
+
+void
+gomp_affinity_init_place (void *p)
+{
+ (void) p;
+}
+
+bool
+gomp_affinity_add_cpus (void *p, unsigned long num,
+ unsigned long len, long stride, bool quiet)
+{
+ (void) p;
+ (void) num;
+ (void) len;
+ (void) stride;
+ (void) quiet;
+ return false;
+}
+
+bool
+gomp_affinity_remove_cpu (void *p, unsigned long num)
+{
+ (void) p;
+ (void) num;
+ return false;
+}
+
+bool
+gomp_affinity_copy_place (void *p, void *q, long stride)
+{
+ (void) p;
+ (void) q;
+ (void) stride;
+ return false;
+}
+
+bool
+gomp_affinity_same_place (void *p, void *q)
+{
+ (void) p;
+ (void) q;
+ return false;
+}
+
+bool
+gomp_affinity_finalize_place_list (bool quiet)
+{
+ (void) quiet;
+ return false;
+}
+
+bool
+gomp_affinity_init_level (int level, unsigned long count, bool quiet)
+{
+ (void) level;
+ (void) count;
+ (void) quiet;
+ if (!quiet)
+ gomp_error ("Affinity not supported on this configuration");
+ return NULL;
+}
+
+void
+gomp_affinity_print_place (void *p)
+{
+ (void) p;
}
@@ -42,6 +42,7 @@ gomp_barrier_init (gomp_barrier_t *bar,
bar->total = count;
bar->arrived = 0;
bar->generation = 0;
+ bar->cancellable = false;
}
void
@@ -72,7 +73,7 @@ gomp_barrier_wait_end (gomp_barrier_t *b
{
unsigned int n;
- if (state & 1)
+ if (state & BAR_WAS_LAST)
{
n = --bar->arrived;
if (n > 0)
@@ -113,12 +114,14 @@ gomp_team_barrier_wait_end (gomp_barrier
{
unsigned int n;
- if (state & 1)
+ state &= ~BAR_CANCELLED;
+ if (state & BAR_WAS_LAST)
{
n = --bar->arrived;
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
+ team->work_share_cancelled = 0;
if (team->task_count)
{
gomp_barrier_handle_tasks (state);
@@ -128,7 +131,7 @@ gomp_team_barrier_wait_end (gomp_barrier
return;
}
- bar->generation = state + 3;
+ bar->generation = state + BAR_INCR - BAR_WAS_LAST;
if (n > 0)
{
do
@@ -141,13 +144,18 @@ gomp_team_barrier_wait_end (gomp_barrier
else
{
gomp_mutex_unlock (&bar->mutex1);
+ int gen;
do
{
gomp_sem_wait (&bar->sem1);
- if (bar->generation & 1)
- gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (gen & BAR_TASK_PENDING)
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ }
}
- while (bar->generation != state + 4);
+ while (gen != state + BAR_INCR);
#ifdef HAVE_SYNC_BUILTINS
n = __sync_add_and_fetch (&bar->arrived, -1);
@@ -162,6 +170,81 @@ gomp_team_barrier_wait_end (gomp_barrier
}
}
+bool
+gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
+ gomp_barrier_state_t state)
+{
+ unsigned int n;
+
+ if (state & BAR_WAS_LAST)
+ {
+ bar->cancellable = false;
+ n = --bar->arrived;
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+
+ team->work_share_cancelled = 0;
+ if (team->task_count)
+ {
+ gomp_barrier_handle_tasks (state);
+ if (n > 0)
+ gomp_sem_wait (&bar->sem2);
+ gomp_mutex_unlock (&bar->mutex1);
+ return false;
+ }
+
+ bar->generation = state + BAR_INCR - BAR_WAS_LAST;
+ if (n > 0)
+ {
+ do
+ gomp_sem_post (&bar->sem1);
+ while (--n != 0);
+ gomp_sem_wait (&bar->sem2);
+ }
+ gomp_mutex_unlock (&bar->mutex1);
+ }
+ else
+ {
+ if (state & BAR_CANCELLED)
+ {
+ gomp_mutex_unlock (&bar->mutex1);
+ return true;
+ }
+ bar->cancellable = true;
+ gomp_mutex_unlock (&bar->mutex1);
+ int gen;
+ do
+ {
+ gomp_sem_wait (&bar->sem1);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (gen & BAR_CANCELLED)
+ break;
+ if (gen & BAR_TASK_PENDING)
+ {
+ gomp_barrier_handle_tasks (state);
+ gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
+ if (gen & BAR_CANCELLED)
+ break;
+ }
+ }
+ while (gen != state + BAR_INCR);
+
+#ifdef HAVE_SYNC_BUILTINS
+ n = __sync_add_and_fetch (&bar->arrived, -1);
+#else
+ gomp_mutex_lock (&bar->mutex2);
+ n = --bar->arrived;
+ gomp_mutex_unlock (&bar->mutex2);
+#endif
+
+ if (n == 0)
+ gomp_sem_post (&bar->sem2);
+ if (gen & BAR_CANCELLED)
+ return true;
+ }
+ return false;
+}
+
void
gomp_team_barrier_wait (gomp_barrier_t *barrier)
{
@@ -176,3 +259,40 @@ gomp_team_barrier_wake (gomp_barrier_t *
while (count-- > 0)
gomp_sem_post (&bar->sem1);
}
+
+bool
+gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
+{
+ gomp_barrier_state_t state = gomp_barrier_wait_cancel_start (bar);
+ return gomp_team_barrier_wait_cancel_end (bar, state);
+}
+
+void
+gomp_team_barrier_cancel (struct gomp_team *team)
+{
+ if (team->barrier.generation & BAR_CANCELLED)
+ return;
+ gomp_mutex_lock (&team->barrier.mutex1);
+ gomp_mutex_lock (&team->task_lock);
+ if (team->barrier.generation & BAR_CANCELLED)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_mutex_unlock (&team->barrier.mutex1);
+ return;
+ }
+ team->barrier.generation |= BAR_CANCELLED;
+ gomp_mutex_unlock (&team->task_lock);
+ if (team->barrier.cancellable)
+ {
+ int n = team->barrier.arrived;
+ if (n > 0)
+ {
+ do
+ gomp_sem_post (&team->barrier.sem1);
+ while (--n != 0);
+ gomp_sem_wait (&team->barrier.sem2);
+ }
+ team->barrier.cancellable = false;
+ }
+ gomp_mutex_unlock (&team->barrier.mutex1);
+}
@@ -43,9 +43,20 @@ typedef struct
unsigned total;
unsigned arrived;
unsigned generation;
+ bool cancellable;
} gomp_barrier_t;
+
typedef unsigned int gomp_barrier_state_t;
+/* The generation field contains a counter in the high bits, with a few
+ low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can
+ share space because WAS_LAST is never stored back to generation. */
+#define BAR_TASK_PENDING 1
+#define BAR_WAS_LAST 1
+#define BAR_WAITING_FOR_TASK 2
+#define BAR_CANCELLED 4
+#define BAR_INCR 8
+
extern void gomp_barrier_init (gomp_barrier_t *, unsigned);
extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned);
extern void gomp_barrier_destroy (gomp_barrier_t *);
@@ -55,22 +66,47 @@ extern void gomp_barrier_wait_end (gomp_
extern void gomp_team_barrier_wait (gomp_barrier_t *);
extern void gomp_team_barrier_wait_end (gomp_barrier_t *,
gomp_barrier_state_t);
+extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *);
+extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *,
+ gomp_barrier_state_t);
extern void gomp_team_barrier_wake (gomp_barrier_t *, int);
+struct gomp_team;
+extern void gomp_team_barrier_cancel (struct gomp_team *);
static inline gomp_barrier_state_t
gomp_barrier_wait_start (gomp_barrier_t *bar)
{
unsigned int ret;
gomp_mutex_lock (&bar->mutex1);
- ret = bar->generation & ~3;
- ret += ++bar->arrived == bar->total;
+ ret = bar->generation & (-BAR_INCR | BAR_CANCELLED);
+ if (++bar->arrived == bar->total)
+ ret |= BAR_WAS_LAST;
+ return ret;
+}
+
+static inline gomp_barrier_state_t
+gomp_barrier_wait_cancel_start (gomp_barrier_t *bar)
+{
+ unsigned int ret;
+ gomp_mutex_lock (&bar->mutex1);
+ ret = bar->generation & (-BAR_INCR | BAR_CANCELLED);
+ if (ret & BAR_CANCELLED)
+ return ret;
+ if (++bar->arrived == bar->total)
+ ret |= BAR_WAS_LAST;
return ret;
}
+static inline void
+gomp_team_barrier_wait_final (gomp_barrier_t *bar)
+{
+ gomp_team_barrier_wait (bar);
+}
+
static inline bool
gomp_barrier_last_thread (gomp_barrier_state_t state)
{
- return state & 1;
+ return state & BAR_WAS_LAST;
}
static inline void
@@ -85,31 +121,37 @@ gomp_barrier_wait_last (gomp_barrier_t *
static inline void
gomp_team_barrier_set_task_pending (gomp_barrier_t *bar)
{
- bar->generation |= 1;
+ bar->generation |= BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar)
{
- bar->generation &= ~1;
+ bar->generation &= ~BAR_TASK_PENDING;
}
static inline void
gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar)
{
- bar->generation |= 2;
+ bar->generation |= BAR_WAITING_FOR_TASK;
}
static inline bool
gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar)
{
- return (bar->generation & 2) != 0;
+ return (bar->generation & BAR_WAITING_FOR_TASK) != 0;
+}
+
+static inline bool
+gomp_team_barrier_cancelled (gomp_barrier_t *bar)
+{
+ return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0);
}
static inline void
gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
- bar->generation = (state & ~3) + 4;
+ bar->generation = (state & -BAR_INCR) + BAR_INCR;
}
#endif /* GOMP_BARRIER_H */
@@ -29,6 +29,10 @@
#include "libgomp_f.h"
#include <ctype.h>
#include <stdlib.h>
+#include <stdio.h>
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h> /* For PRIu64. */
+#endif
#ifdef STRING_WITH_STRINGS
# include <string.h>
# include <strings.h>
@@ -50,23 +54,28 @@
struct gomp_task_icv gomp_global_icv = {
.nthreads_var = 1,
+ .thread_limit_var = UINT_MAX,
.run_sched_var = GFS_DYNAMIC,
.run_sched_modifier = 1,
+ .default_device_var = 0,
.dyn_var = false,
- .nest_var = false
+ .nest_var = false,
+ .bind_var = omp_proc_bind_false,
+ .target_data = NULL
};
-unsigned short *gomp_cpu_affinity;
-size_t gomp_cpu_affinity_len;
unsigned long gomp_max_active_levels_var = INT_MAX;
-unsigned long gomp_thread_limit_var = ULONG_MAX;
-unsigned long gomp_remaining_threads_count;
+bool gomp_cancel_var = false;
#ifndef HAVE_SYNC_BUILTINS
-gomp_mutex_t gomp_remaining_threads_lock;
+gomp_mutex_t gomp_managed_threads_lock;
#endif
unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
+char *gomp_bind_var_list;
+unsigned long gomp_bind_var_list_len;
+void **gomp_places_list;
+unsigned long gomp_places_list_len;
/* Parse the OMP_SCHEDULE environment variable. */
@@ -184,6 +193,24 @@ parse_unsigned_long (const char *name, u
return false;
}
+/* Parse a positive int environment variable. Return true if one was
+ present and it was successfully parsed. */
+
+static bool
+parse_int (const char *name, int *pvalue, bool allow_zero)
+{
+ unsigned long value;
+ if (!parse_unsigned_long (name, &value, allow_zero))
+ return false;
+ if (value > INT_MAX)
+ {
+ gomp_error ("Invalid value for environment variable %s", name);
+ return false;
+ }
+ *pvalue = (int) value;
+ return true;
+}
+
/* Parse an unsigned long list environment variable. Return true if one was
present and it was successfully parsed. */
@@ -273,6 +300,412 @@ parse_unsigned_long_list (const char *na
return false;
}
+/* Parse environment variable set to a boolean or list of omp_proc_bind_t
+ enum values. Return true if one was present and it was successfully
+ parsed. */
+
+static bool
+parse_bind_var (const char *name, char *p1stvalue,
+ char **pvalues, unsigned long *pnvalues)
+{
+ char *env;
+ char value, *values = NULL;
+ int i;
+ static struct proc_bind_kinds
+ {
+ const char name[7];
+ const char len;
+ omp_proc_bind_t kind;
+ } kinds[] =
+ {
+ { "false", 5, omp_proc_bind_false },
+ { "true", 4, omp_proc_bind_true },
+ { "master", 6, omp_proc_bind_master },
+ { "close", 5, omp_proc_bind_close },
+ { "spread", 6, omp_proc_bind_spread }
+ };
+
+ env = getenv (name);
+ if (env == NULL)
+ return false;
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ goto invalid;
+
+ for (i = 0; i < 5; i++)
+ if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0)
+ {
+ value = kinds[i].kind;
+ env += kinds[i].len;
+ break;
+ }
+ if (i == 5)
+ goto invalid;
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env != '\0')
+ {
+ if (*env == ',')
+ {
+ unsigned long nvalues = 0, nalloced = 0;
+
+ if (value == omp_proc_bind_false
+ || value == omp_proc_bind_true)
+ goto invalid;
+
+ do
+ {
+ env++;
+ if (nvalues == nalloced)
+ {
+ char *n;
+ nalloced = nalloced ? nalloced * 2 : 16;
+ n = realloc (values, nalloced);
+ if (n == NULL)
+ {
+ free (values);
+ gomp_error ("Out of memory while trying to parse"
+ " environment variable %s", name);
+ return false;
+ }
+ values = n;
+ if (nvalues == 0)
+ values[nvalues++] = value;
+ }
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ goto invalid;
+
+ for (i = 2; i < 5; i++)
+ if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0)
+ {
+ value = kinds[i].kind;
+ env += kinds[i].len;
+ break;
+ }
+ if (i == 5)
+ goto invalid;
+
+ values[nvalues++] = value;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ break;
+ if (*env != ',')
+ goto invalid;
+ }
+ while (1);
+ *p1stvalue = values[0];
+ *pvalues = values;
+ *pnvalues = nvalues;
+ return true;
+ }
+ goto invalid;
+ }
+
+ *p1stvalue = value;
+ return true;
+
+ invalid:
+ free (values);
+ gomp_error ("Invalid value for environment variable %s", name);
+ return false;
+}
+
+static bool
+parse_one_place (char **envp, bool *negatep, unsigned long *lenp,
+ long *stridep)
+{
+ char *env = *envp, *start;
+ void *p = gomp_places_list ? gomp_places_list[gomp_places_list_len] : NULL;
+ unsigned long len = 1;
+ long stride = 1;
+ int pass;
+ bool any_negate = false;
+ *negatep = false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '!')
+ {
+ *negatep = true;
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ }
+ if (*env != '{')
+ return false;
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ start = env;
+ for (pass = 0; pass < (any_negate ? 2 : 1); pass++)
+ {
+ env = start;
+ do
+ {
+ unsigned long this_num, this_len = 1;
+ long this_stride = 1;
+ bool this_negate = (*env == '!');
+ if (this_negate)
+ {
+ if (gomp_places_list)
+ any_negate = true;
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ }
+
+ errno = 0;
+ this_num = strtoul (env, &env, 10);
+ if (errno)
+ return false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == ':')
+ {
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ errno = 0;
+ this_len = strtoul (env, &env, 10);
+ if (errno || this_len == 0)
+ return false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == ':')
+ {
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ errno = 0;
+ this_stride = strtol (env, &env, 10);
+ if (errno)
+ return false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ }
+ }
+ if (this_negate && this_len != 1)
+ return false;
+ if (gomp_places_list && pass == this_negate)
+ {
+ if (this_negate)
+ {
+ if (!gomp_affinity_remove_cpu (p, this_num))
+ return false;
+ }
+ else if (!gomp_affinity_add_cpus (p, this_num, this_len,
+ this_stride, false))
+ return false;
+ }
+ if (*env == '}')
+ break;
+ if (*env != ',')
+ return false;
+ ++env;
+ }
+ while (1);
+ }
+
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == ':')
+ {
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ errno = 0;
+ len = strtoul (env, &env, 10);
+ if (errno || len == 0 || len >= 65536)
+ return false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == ':')
+ {
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ errno = 0;
+ stride = strtol (env, &env, 10);
+ if (errno)
+ return false;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ }
+ }
+ if (*negatep && len != 1)
+ return false;
+ *envp = env;
+ *lenp = len;
+ *stridep = stride;
+ return true;
+}
+
+static bool
+parse_places_var (const char *name)
+{
+ char *env = getenv (name), *end;
+ bool any_negate = false;
+ int level = 0;
+ unsigned long count = 0;
+ if (env == NULL)
+ return false;
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env == '\0')
+ goto invalid;
+
+ if (strncasecmp (env, "threads", 7) == 0)
+ {
+ env += 7;
+ level = 1;
+ }
+ else if (strncasecmp (env, "cores", 5) == 0)
+ {
+ env += 5;
+ level = 2;
+ }
+ else if (strncasecmp (env, "sockets", 7) == 0)
+ {
+ env += 7;
+ level = 3;
+ }
+ if (level)
+ {
+ count = ULONG_MAX;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env != '\0')
+ {
+ if (*env++ != '(')
+ goto invalid;
+ while (isspace ((unsigned char) *env))
+ ++env;
+
+ errno = 0;
+ count = strtoul (env, &end, 10);
+ if (errno)
+ goto invalid;
+ env = end;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env != ')')
+ goto invalid;
+ ++env;
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env != '\0')
+ goto invalid;
+ }
+ return gomp_affinity_init_level (level, count, false);
+ }
+
+ count = 0;
+ end = env;
+ do
+ {
+ bool negate;
+ unsigned long len;
+ long stride;
+ if (!parse_one_place (&end, &negate, &len, &stride))
+ goto invalid;
+ if (negate)
+ {
+ if (!any_negate)
+ count++;
+ any_negate = true;
+ }
+ else
+ count += len;
+ if (count > 65536)
+ goto invalid;
+ if (*end == '\0')
+ break;
+ if (*end != ',')
+ goto invalid;
+ end++;
+ }
+ while (1);
+
+ if (gomp_global_icv.bind_var == omp_proc_bind_false)
+ return false;
+
+ gomp_places_list_len = 0;
+ gomp_places_list = gomp_affinity_alloc (count, false);
+ if (gomp_places_list == NULL)
+ return false;
+
+ do
+ {
+ bool negate;
+ unsigned long len;
+ long stride;
+ gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
+ if (!parse_one_place (&env, &negate, &len, &stride))
+ goto invalid;
+ if (negate)
+ {
+ void *p;
+ for (count = 0; count < gomp_places_list_len; count++)
+ if (gomp_affinity_same_place
+ (gomp_places_list[count],
+ gomp_places_list[gomp_places_list_len]))
+ break;
+ if (count == gomp_places_list_len)
+ {
+ gomp_error ("Trying to remove a non-existing place from list "
+ "of places");
+ goto invalid;
+ }
+ p = gomp_places_list[count];
+ memmove (&gomp_places_list[count],
+ &gomp_places_list[count + 1],
+ (gomp_places_list_len - count - 1) * sizeof (void *));
+ --gomp_places_list_len;
+ gomp_places_list[gomp_places_list_len] = p;
+ }
+ else if (len == 1)
+ ++gomp_places_list_len;
+ else
+ {
+ for (count = 0; count < len - 1; count++)
+ if (!gomp_affinity_copy_place
+ (gomp_places_list[gomp_places_list_len + count + 1],
+ gomp_places_list[gomp_places_list_len + count],
+ stride))
+ goto invalid;
+ gomp_places_list_len += len;
+ }
+ if (*env == '\0')
+ break;
+ env++;
+ }
+ while (1);
+
+ if (gomp_places_list_len == 0)
+ {
+ gomp_error ("All places have been removed");
+ goto invalid;
+ }
+ if (!gomp_affinity_finalize_place_list (false))
+ goto invalid;
+ return true;
+
+ invalid:
+ free (gomp_places_list);
+ gomp_places_list = NULL;
+ gomp_places_list_len = 0;
+ gomp_error ("Invalid value for environment variable %s", name);
+ return false;
+}
+
/* Parse the OMP_STACKSIZE environment varible. Return true if one was
present and it was successfully parsed. */
@@ -480,84 +913,89 @@ parse_wait_policy (void)
static bool
parse_affinity (void)
{
- char *env, *end;
+ char *env, *end, *start;
+ int pass;
unsigned long cpu_beg, cpu_end, cpu_stride;
- unsigned short *cpus = NULL;
- size_t allocated = 0, used = 0, needed;
+ size_t count = 0, needed;
env = getenv ("GOMP_CPU_AFFINITY");
if (env == NULL)
return false;
- do
+ start = env;
+ for (pass = 0; pass < 2; pass++)
{
- while (*env == ' ' || *env == '\t')
- env++;
-
- cpu_beg = strtoul (env, &end, 0);
- cpu_end = cpu_beg;
- cpu_stride = 1;
- if (env == end || cpu_beg >= 65536)
- goto invalid;
-
- env = end;
- if (*env == '-')
+ env = start;
+ if (pass == 1)
+ {
+ gomp_places_list_len = 0;
+ gomp_places_list = gomp_affinity_alloc (count, true);
+ if (gomp_places_list == NULL)
+ return false;
+ }
+ do
{
- cpu_end = strtoul (++env, &end, 0);
- if (env == end || cpu_end >= 65536 || cpu_end < cpu_beg)
+ while (isspace ((unsigned char) *env))
+ ++env;
+
+ errno = 0;
+ cpu_beg = strtoul (env, &end, 0);
+ if (errno || cpu_beg >= 65536)
goto invalid;
+ cpu_end = cpu_beg;
+ cpu_stride = 1;
env = end;
- if (*env == ':')
+ if (*env == '-')
{
- cpu_stride = strtoul (++env, &end, 0);
- if (env == end || cpu_stride == 0 || cpu_stride >= 65536)
+ errno = 0;
+ cpu_end = strtoul (++env, &end, 0);
+ if (errno || cpu_end >= 65536 || cpu_end < cpu_beg)
goto invalid;
env = end;
- }
- }
+ if (*env == ':')
+ {
+ errno = 0;
+ cpu_stride = strtoul (++env, &end, 0);
+ if (errno || cpu_stride == 0 || cpu_stride >= 65536)
+ goto invalid;
- needed = (cpu_end - cpu_beg) / cpu_stride + 1;
- if (used + needed >= allocated)
- {
- unsigned short *new_cpus;
+ env = end;
+ }
+ }
- if (allocated < 64)
- allocated = 64;
- if (allocated > needed)
- allocated <<= 1;
+ needed = (cpu_end - cpu_beg) / cpu_stride + 1;
+ if (pass == 0)
+ count += needed;
else
- allocated += 2 * needed;
- new_cpus = realloc (cpus, allocated * sizeof (unsigned short));
- if (new_cpus == NULL)
{
- free (cpus);
- gomp_error ("not enough memory to store GOMP_CPU_AFFINITY list");
- return false;
+ while (needed--)
+ {
+ void *p = gomp_places_list[gomp_places_list_len];
+ gomp_affinity_init_place (p);
+ if (gomp_affinity_add_cpus (p, cpu_beg, 1, 0, true))
+ ++gomp_places_list_len;
+ cpu_beg += cpu_stride;
+ }
}
- cpus = new_cpus;
- }
+ while (isspace ((unsigned char) *env))
+ ++env;
- while (needed--)
- {
- cpus[used++] = cpu_beg;
- cpu_beg += cpu_stride;
+ if (*env == ',')
+ env++;
+ else if (*env == '\0')
+ break;
}
-
- while (*env == ' ' || *env == '\t')
- env++;
-
- if (*env == ',')
- env++;
- else if (*env == '\0')
- break;
+ while (1);
}
- while (1);
- gomp_cpu_affinity = cpus;
- gomp_cpu_affinity_len = used;
+ if (gomp_places_list_len == 0)
+ {
+ free (gomp_places_list);
+ gomp_places_list = NULL;
+ }
return true;
invalid:
@@ -565,12 +1003,160 @@ parse_affinity (void)
return false;
}
+
+static void
+handle_omp_display_env (unsigned long stacksize, int wait_policy)
+{
+ const char *env;
+ bool display = false;
+ bool verbose = false;
+ int i;
+
+ env = getenv ("OMP_DISPLAY_ENV");
+ if (env == NULL)
+ return;
+
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (strncasecmp (env, "true", 4) == 0)
+ {
+ display = true;
+ env += 4;
+ }
+ else if (strncasecmp (env, "false", 5) == 0)
+ {
+ display = false;
+ env += 5;
+ }
+ else if (strncasecmp (env, "verbose", 7) == 0)
+ {
+ display = true;
+ verbose = true;
+ env += 7;
+ }
+ else
+ env = "X";
+ while (isspace ((unsigned char) *env))
+ ++env;
+ if (*env != '\0')
+ gomp_error ("Invalid value for environment variable OMP_DISPLAY_ENV");
+
+ if (!display)
+ return;
+
+ fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr);
+
+ fputs (" _OPENMP = '201307'\n", stderr);
+ fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
+ gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
+ fprintf (stderr, " OMP_NESTED = '%s'\n",
+ gomp_global_icv.nest_var ? "TRUE" : "FALSE");
+
+ fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
+ for (i = 1; i < gomp_nthreads_var_list_len; i++)
+ fprintf (stderr, ",%lu", gomp_nthreads_var_list[i]);
+ fputs ("'\n", stderr);
+
+ fprintf (stderr, " OMP_SCHEDULE = '");
+ switch (gomp_global_icv.run_sched_var)
+ {
+ case GFS_RUNTIME:
+ fputs ("RUNTIME", stderr);
+ break;
+ case GFS_STATIC:
+ fputs ("STATIC", stderr);
+ break;
+ case GFS_DYNAMIC:
+ fputs ("DYNAMIC", stderr);
+ break;
+ case GFS_GUIDED:
+ fputs ("GUIDED", stderr);
+ break;
+ case GFS_AUTO:
+ fputs ("AUTO", stderr);
+ break;
+ }
+ fputs ("'\n", stderr);
+
+ fputs (" OMP_PROC_BIND = '", stderr);
+ switch (gomp_global_icv.bind_var)
+ {
+ case omp_proc_bind_false:
+ fputs ("FALSE", stderr);
+ break;
+ case omp_proc_bind_true:
+ fputs ("TRUE", stderr);
+ break;
+ case omp_proc_bind_master:
+ fputs ("MASTER", stderr);
+ break;
+ case omp_proc_bind_close:
+ fputs ("CLOSE", stderr);
+ break;
+ case omp_proc_bind_spread:
+ fputs ("SPREAD", stderr);
+ break;
+ }
+ for (i = 1; i < gomp_bind_var_list_len; i++)
+ switch (gomp_bind_var_list[i])
+ {
+ case omp_proc_bind_master:
+ fputs (",MASTER", stderr);
+ break;
+ case omp_proc_bind_close:
+ fputs (",CLOSE", stderr);
+ break;
+ case omp_proc_bind_spread:
+ fputs (",SPREAD", stderr);
+ break;
+ }
+ fputs ("'\n", stderr);
+ fputs (" OMP_PLACES = '", stderr);
+ for (i = 0; i < gomp_places_list_len; i++)
+ {
+ fputs ("{", stderr);
+ gomp_affinity_print_place (gomp_places_list[i]);
+ fputs (i + 1 == gomp_places_list_len ? "}" : "},", stderr);
+ }
+ fputs ("'\n", stderr);
+
+ fprintf (stderr, " OMP_STACKSIZE = '%lu'\n", stacksize);
+
+ /* GOMP's default value is actually neither active nor passive. */
+ fprintf (stderr, " OMP_WAIT_POLICY = '%s'\n",
+ wait_policy > 0 ? "ACTIVE" : "PASSIVE");
+ fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n",
+ gomp_global_icv.thread_limit_var);
+ fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n",
+ gomp_max_active_levels_var);
+
+ fprintf (stderr, " OMP_CANCELLATION = '%s'\n",
+ gomp_cancel_var ? "TRUE" : "FALSE");
+ fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n",
+ gomp_global_icv.default_device_var);
+
+ if (verbose)
+ {
+ fputs (" GOMP_CPU_AFFINITY = ''\n", stderr);
+ fprintf (stderr, " GOMP_STACKSIZE = '%lu'\n", stacksize);
+#ifdef HAVE_INTTYPES_H
+ fprintf (stderr, " GOMP_SPINCOUNT = '%"PRIu64"'\n",
+ (uint64_t) gomp_spin_count_var);
+#else
+ fprintf (stderr, " GOMP_SPINCOUNT = '%lu'\n",
+ (unsigned long) gomp_spin_count_var);
+#endif
+ }
+
+ fputs ("OPENMP DISPLAY ENVIRONMENT END\n", stderr);
+}
+
+
static void __attribute__((constructor))
initialize_env (void)
{
- unsigned long stacksize;
+ unsigned long thread_limit_var, stacksize;
int wait_policy;
- bool bind_var = false;
/* Do a compile time check that mkomp_h.pl did good job. */
omp_check_defines ();
@@ -578,14 +1164,17 @@ initialize_env (void)
parse_schedule ();
parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
- parse_boolean ("OMP_PROC_BIND", &bind_var);
+ parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
+ parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
true);
- parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false);
- if (gomp_thread_limit_var != ULONG_MAX)
- gomp_remaining_threads_count = gomp_thread_limit_var - 1;
+ if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
+ {
+ gomp_global_icv.thread_limit_var
+ = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var;
+ }
#ifndef HAVE_SYNC_BUILTINS
- gomp_mutex_init (&gomp_remaining_threads_lock);
+ gomp_mutex_init (&gomp_managed_threads_lock);
#endif
gomp_init_num_threads ();
gomp_available_cpus = gomp_global_icv.nthreads_var;
@@ -594,7 +1183,14 @@ initialize_env (void)
&gomp_nthreads_var_list,
&gomp_nthreads_var_list_len))
gomp_global_icv.nthreads_var = gomp_available_cpus;
- if (parse_affinity () || bind_var)
+ if (!parse_bind_var ("OMP_PROC_BIND",
+ &gomp_global_icv.bind_var,
+ &gomp_bind_var_list,
+ &gomp_bind_var_list_len))
+ gomp_global_icv.bind_var = omp_proc_bind_false;
+ if (parse_places_var ("OMP_PLACES")
+ || parse_affinity ()
+ || gomp_global_icv.bind_var)
gomp_init_affinity ();
wait_policy = parse_wait_policy ();
if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
@@ -645,6 +1241,8 @@ initialize_env (void)
if (err != 0)
gomp_error ("Stack size change failed: %s", strerror (err));
}
+
+ handle_omp_display_env (stacksize, wait_policy);
}
@@ -728,7 +1326,8 @@ omp_get_max_threads (void)
int
omp_get_thread_limit (void)
{
- return gomp_thread_limit_var > INT_MAX ? INT_MAX : gomp_thread_limit_var;
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var;
}
void
@@ -744,6 +1343,60 @@ omp_get_max_active_levels (void)
return gomp_max_active_levels_var;
}
+int
+omp_get_cancellation (void)
+{
+ return gomp_cancel_var;
+}
+
+omp_proc_bind_t
+omp_get_proc_bind (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->bind_var;
+}
+
+void
+omp_set_default_device (int device_num)
+{
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->default_device_var = device_num >= 0 ? device_num : 0;
+}
+
+int
+omp_get_default_device (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ return icv->default_device_var;
+}
+
+int
+omp_get_num_devices (void)
+{
+ return gomp_get_num_devices ();
+}
+
+int
+omp_get_num_teams (void)
+{
+ /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */
+ return 1;
+}
+
+int
+omp_get_team_num (void)
+{
+ /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */
+ return 0;
+}
+
+int
+omp_is_initial_device (void)
+{
+ /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */
+ return 1;
+}
+
ialias (omp_set_dynamic)
ialias (omp_set_nested)
ialias (omp_set_num_threads)
@@ -755,3 +1408,11 @@ ialias (omp_get_max_threads)
ialias (omp_get_thread_limit)
ialias (omp_set_max_active_levels)
ialias (omp_get_max_active_levels)
+ialias (omp_get_cancellation)
+ialias (omp_get_proc_bind)
+ialias (omp_set_default_device)
+ialias (omp_get_default_device)
+ialias (omp_get_num_devices)
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
+ialias (omp_is_initial_device)
@@ -31,11 +31,6 @@
#ifdef HAVE_ATTRIBUTE_ALIAS
/* Use internal aliases if possible. */
-# define ULP STR1(__USER_LABEL_PREFIX__)
-# define STR1(x) STR2(x)
-# define STR2(x) #x
-# define ialias_redirect(fn) \
- extern __typeof (fn) fn __asm__ (ULP "gomp_ialias_" #fn) attribute_hidden;
# ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
ialias_redirect (omp_init_lock)
ialias_redirect (omp_init_nest_lock)
@@ -70,6 +65,14 @@ ialias_redirect (omp_get_ancestor_thread
ialias_redirect (omp_get_team_size)
ialias_redirect (omp_get_active_level)
ialias_redirect (omp_in_final)
+ialias_redirect (omp_get_cancellation)
+ialias_redirect (omp_get_proc_bind)
+ialias_redirect (omp_set_default_device)
+ialias_redirect (omp_get_default_device)
+ialias_redirect (omp_get_num_devices)
+ialias_redirect (omp_get_num_teams)
+ialias_redirect (omp_get_team_num)
+ialias_redirect (omp_is_initial_device)
#endif
#ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
@@ -435,3 +438,57 @@ omp_in_final_ (void)
{
return omp_in_final ();
}
+
+int32_t
+omp_get_cancellation_ (void)
+{
+ return omp_get_cancellation ();
+}
+
+int32_t
+omp_get_proc_bind_ (void)
+{
+ return omp_get_proc_bind ();
+}
+
+void
+omp_set_default_device_ (const int32_t *device_num)
+{
+ return omp_set_default_device (*device_num);
+}
+
+void
+omp_set_default_device_8_ (const int64_t *device_num)
+{
+ return omp_set_default_device (TO_INT (*device_num));
+}
+
+int32_t
+omp_get_default_device_ (void)
+{
+ return omp_get_default_device ();
+}
+
+int32_t
+omp_get_num_devices_ (void)
+{
+ return omp_get_num_devices ();
+}
+
+int32_t
+omp_get_num_teams_ (void)
+{
+ return omp_get_num_teams ();
+}
+
+int32_t
+omp_get_team_num_ (void)
+{
+ return omp_get_team_num ();
+}
+
+int32_t
+omp_is_initial_device_ (void)
+{
+ return omp_is_initial_device ();
+}
@@ -0,0 +1,443 @@
+/* An expandable hash tables datatype.
+ Copyright (C) 1999-2013
+ Free Software Foundation, Inc.
+ Contributed by Vladimir Makarov <vmakarov@cygnus.com>.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* The hash table code copied from include/hashtab.[hc] and adjusted,
+ so that the hash table entries are in the flexible array at the end
+ of the control structure, no callbacks are used and the elements in the
+ table are of the hash_entry_type type.
+ Before including this file, define hash_entry_type type and
+ htab_alloc and htab_free functions. After including it, define
+ htab_hash and htab_eq inline functions. */
+
+/* This package implements basic hash table functionality. It is possible
+ to search for an entry, create an entry and destroy an entry.
+
+ Elements in the table are generic pointers.
+
+ The size of the table is not fixed; if the occupancy of the table
+ grows too high the hash table will be expanded.
+
+ The abstract data implementation is based on generalized Algorithm D
+ from Knuth's book "The art of computer programming". Hash table is
+ expanded by creation of new hash table and transferring elements from
+ the old table to the new table. */
+
+/* The type for a hash code. */
+typedef unsigned int hashval_t;
+
+static inline hashval_t htab_hash (hash_entry_type);
+static inline bool htab_eq (hash_entry_type, hash_entry_type);
+
+/* This macro defines reserved value for empty table entry. */
+
+#define HTAB_EMPTY_ENTRY ((hash_entry_type) 0)
+
+/* This macro defines reserved value for table entry which contained
+ a deleted element. */
+
+#define HTAB_DELETED_ENTRY ((hash_entry_type) 1)
+
+/* Hash tables are of the following type. The structure
+ (implementation) of this type is not needed for using the hash
+ tables. All work with hash table should be executed only through
+ functions mentioned below. The size of this structure is subject to
+ change. */
+
+struct htab {
+ /* Current size (in entries) of the hash table. */
+ size_t size;
+
+ /* Current number of elements including also deleted elements. */
+ size_t n_elements;
+
+ /* Current number of deleted elements in the table. */
+ size_t n_deleted;
+
+ /* Current size (in entries) of the hash table, as an index into the
+ table of primes. */
+ unsigned int size_prime_index;
+
+ /* Table itself. */
+ hash_entry_type entries[];
+};
+
+typedef struct htab *htab_t;
+
+/* An enum saying whether we insert into the hash table or not. */
+enum insert_option {NO_INSERT, INSERT};
+
+/* Table of primes and multiplicative inverses.
+
+ Note that these are not minimally reduced inverses. Unlike when generating
+ code to divide by a constant, we want to be able to use the same algorithm
+ all the time. All of these inverses (are implied to) have bit 32 set.
+
+ For the record, the function that computed the table is in
+ libiberty/hashtab.c. */
+
+struct prime_ent
+{
+ hashval_t prime;
+ hashval_t inv;
+ hashval_t inv_m2; /* inverse of prime-2 */
+ hashval_t shift;
+};
+
+static struct prime_ent const prime_tab[] = {
+ { 7, 0x24924925, 0x9999999b, 2 },
+ { 13, 0x3b13b13c, 0x745d1747, 3 },
+ { 31, 0x08421085, 0x1a7b9612, 4 },
+ { 61, 0x0c9714fc, 0x15b1e5f8, 5 },
+ { 127, 0x02040811, 0x0624dd30, 6 },
+ { 251, 0x05197f7e, 0x073260a5, 7 },
+ { 509, 0x01824366, 0x02864fc8, 8 },
+ { 1021, 0x00c0906d, 0x014191f7, 9 },
+ { 2039, 0x0121456f, 0x0161e69e, 10 },
+ { 4093, 0x00300902, 0x00501908, 11 },
+ { 8191, 0x00080041, 0x00180241, 12 },
+ { 16381, 0x000c0091, 0x00140191, 13 },
+ { 32749, 0x002605a5, 0x002a06e6, 14 },
+ { 65521, 0x000f00e2, 0x00110122, 15 },
+ { 131071, 0x00008001, 0x00018003, 16 },
+ { 262139, 0x00014002, 0x0001c004, 17 },
+ { 524287, 0x00002001, 0x00006001, 18 },
+ { 1048573, 0x00003001, 0x00005001, 19 },
+ { 2097143, 0x00004801, 0x00005801, 20 },
+ { 4194301, 0x00000c01, 0x00001401, 21 },
+ { 8388593, 0x00001e01, 0x00002201, 22 },
+ { 16777213, 0x00000301, 0x00000501, 23 },
+ { 33554393, 0x00001381, 0x00001481, 24 },
+ { 67108859, 0x00000141, 0x000001c1, 25 },
+ { 134217689, 0x000004e1, 0x00000521, 26 },
+ { 268435399, 0x00000391, 0x000003b1, 27 },
+ { 536870909, 0x00000019, 0x00000029, 28 },
+ { 1073741789, 0x0000008d, 0x00000095, 29 },
+ { 2147483647, 0x00000003, 0x00000007, 30 },
+ /* Avoid "decimal constant so large it is unsigned" for 4294967291. */
+ { 0xfffffffb, 0x00000006, 0x00000008, 31 }
+};
+
+/* The following function returns an index into the above table of the
+ nearest prime number which is greater than N, and near a power of two. */
+
+static unsigned int
+higher_prime_index (unsigned long n)
+{
+ unsigned int low = 0;
+ unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
+
+ while (low != high)
+ {
+ unsigned int mid = low + (high - low) / 2;
+ if (n > prime_tab[mid].prime)
+ low = mid + 1;
+ else
+ high = mid;
+ }
+
+ /* If we've run out of primes, abort. */
+ if (n > prime_tab[low].prime)
+ abort ();
+
+ return low;
+}
+
+/* Return the current size of given hash table. */
+
+static inline size_t
+htab_size (htab_t htab)
+{
+ return htab->size;
+}
+
+/* Return the current number of elements in given hash table. */
+
+static inline size_t
+htab_elements (htab_t htab)
+{
+ return htab->n_elements - htab->n_deleted;
+}
+
+/* Return X % Y. */
+
+static inline hashval_t
+htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
+{
+ /* The multiplicative inverses computed above are for 32-bit types, and
+ requires that we be able to compute a highpart multiply. */
+ if (sizeof (hashval_t) * __CHAR_BIT__ <= 32)
+ {
+ hashval_t t1, t2, t3, t4, q, r;
+
+ t1 = ((unsigned long long)x * inv) >> 32;
+ t2 = x - t1;
+ t3 = t2 >> 1;
+ t4 = t1 + t3;
+ q = t4 >> shift;
+ r = x - (q * y);
+
+ return r;
+ }
+
+ /* Otherwise just use the native division routines. */
+ return x % y;
+}
+
+/* Compute the primary hash for HASH given HTAB's current size. */
+
+static inline hashval_t
+htab_mod (hashval_t hash, htab_t htab)
+{
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+ return htab_mod_1 (hash, p->prime, p->inv, p->shift);
+}
+
+/* Compute the secondary hash for HASH given HTAB's current size. */
+
+static inline hashval_t
+htab_mod_m2 (hashval_t hash, htab_t htab)
+{
+ const struct prime_ent *p = &prime_tab[htab->size_prime_index];
+ return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
+}
+
+/* Create hash table of size SIZE. */
+
+static htab_t
+htab_create (size_t size)
+{
+ htab_t result;
+ unsigned int size_prime_index;
+
+ size_prime_index = higher_prime_index (size);
+ size = prime_tab[size_prime_index].prime;
+
+ result = (htab_t) htab_alloc (sizeof (struct htab)
+ + size * sizeof (hash_entry_type));
+ result->size = size;
+ result->n_elements = 0;
+ result->n_deleted = 0;
+ result->size_prime_index = size_prime_index;
+ memset (result->entries, 0, size * sizeof (hash_entry_type));
+ return result;
+}
+
+/* Similar to htab_find_slot, but without several unwanted side effects:
+ - Does not call htab_eq when it finds an existing entry.
+ - Does not change the count of elements in the hash table.
+ This function also assumes there are no deleted entries in the table.
+ HASH is the hash value for the element to be inserted. */
+
+static hash_entry_type *
+find_empty_slot_for_expand (htab_t htab, hashval_t hash)
+{
+ hashval_t index = htab_mod (hash, htab);
+ size_t size = htab_size (htab);
+ hash_entry_type *slot = htab->entries + index;
+ hashval_t hash2;
+
+ if (*slot == HTAB_EMPTY_ENTRY)
+ return slot;
+ else if (*slot == HTAB_DELETED_ENTRY)
+ abort ();
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ slot = htab->entries + index;
+ if (*slot == HTAB_EMPTY_ENTRY)
+ return slot;
+ else if (*slot == HTAB_DELETED_ENTRY)
+ abort ();
+ }
+}
+
+/* The following function changes size of memory allocated for the
+ entries and repeatedly inserts the table elements. The occupancy
+ of the table after the call will be about 50%. Naturally the hash
+ table must already exist. Remember also that the place of the
+ table entries is changed. */
+
+static htab_t
+htab_expand (htab_t htab)
+{
+ htab_t nhtab;
+ hash_entry_type *olimit;
+ hash_entry_type *p;
+ size_t osize, elts;
+
+ osize = htab->size;
+ olimit = htab->entries + osize;
+ elts = htab_elements (htab);
+
+ /* Resize only when table after removal of unused elements is either
+ too full or too empty. */
+ if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
+ nhtab = htab_create (elts * 2);
+ else
+ nhtab = htab_create (osize - 1);
+ nhtab->n_elements = htab->n_elements - htab->n_deleted;
+
+ p = htab->entries;
+ do
+ {
+ hash_entry_type x = *p;
+
+ if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
+ *find_empty_slot_for_expand (nhtab, htab_hash (x)) = x;
+
+ p++;
+ }
+ while (p < olimit);
+
+ htab_free (htab);
+ return nhtab;
+}
+
+/* This function searches for a hash table entry equal to the given
+ element. It cannot be used to insert or delete an element. */
+
+static hash_entry_type
+htab_find (htab_t htab, const hash_entry_type element)
+{
+ hashval_t index, hash2, hash = htab_hash (element);
+ size_t size;
+ hash_entry_type entry;
+
+ size = htab_size (htab);
+ index = htab_mod (hash, htab);
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+ return entry;
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY
+ || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element)))
+ return entry;
+ }
+}
+
+/* This function searches for a hash table slot containing an entry
+ equal to the given element. To delete an entry, call this with
+ insert=NO_INSERT, then call htab_clear_slot on the slot returned
+ (possibly after doing some checks). To insert an entry, call this
+ with insert=INSERT, then write the value you want into the returned
+ slot. */
+
+static hash_entry_type *
+htab_find_slot (htab_t *htabp, const hash_entry_type element,
+ enum insert_option insert)
+{
+ hash_entry_type *first_deleted_slot;
+ hashval_t index, hash2, hash = htab_hash (element);
+ size_t size;
+ hash_entry_type entry;
+ htab_t htab = *htabp;
+
+ size = htab_size (htab);
+ if (insert == INSERT && size * 3 <= htab->n_elements * 4)
+ {
+ htab = *htabp = htab_expand (htab);
+ size = htab_size (htab);
+ }
+
+ index = htab_mod (hash, htab);
+
+ first_deleted_slot = NULL;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY)
+ goto empty_entry;
+ else if (entry == HTAB_DELETED_ENTRY)
+ first_deleted_slot = &htab->entries[index];
+ else if (htab_eq (entry, element))
+ return &htab->entries[index];
+
+ hash2 = htab_mod_m2 (hash, htab);
+ for (;;)
+ {
+ index += hash2;
+ if (index >= size)
+ index -= size;
+
+ entry = htab->entries[index];
+ if (entry == HTAB_EMPTY_ENTRY)
+ goto empty_entry;
+ else if (entry == HTAB_DELETED_ENTRY)
+ {
+ if (!first_deleted_slot)
+ first_deleted_slot = &htab->entries[index];
+ }
+ else if (htab_eq (entry, element))
+ return &htab->entries[index];
+ }
+
+ empty_entry:
+ if (insert == NO_INSERT)
+ return NULL;
+
+ if (first_deleted_slot)
+ {
+ htab->n_deleted--;
+ *first_deleted_slot = HTAB_EMPTY_ENTRY;
+ return first_deleted_slot;
+ }
+
+ htab->n_elements++;
+ return &htab->entries[index];
+}
+
+/* This function clears a specified slot in a hash table. It is
+ useful when you've already done the lookup and don't want to do it
+ again. */
+
+static inline void
+htab_clear_slot (htab_t htab, hash_entry_type *slot)
+{
+ if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
+ || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
+ abort ();
+
+ *slot = HTAB_DELETED_ENTRY;
+ htab->n_deleted++;
+}
+
+/* Returns a hash code for pointer P. Simplified version of evahash */
+
+static inline hashval_t
+hash_pointer (const void *p)
+{
+ uintptr_t v = (uintptr_t) p;
+ if (sizeof (v) > sizeof (hashval_t))
+ v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__);
+ return v;
+}
@@ -39,6 +39,7 @@
#include <pthread.h>
#include <stdbool.h>
+#include <stdlib.h>
#ifdef HAVE_ATTRIBUTE_VISIBILITY
# pragma GCC visibility push(hidden)
@@ -201,6 +202,10 @@ struct gomp_team_state
/* Active nesting level. Only active parallel regions are counted. */
unsigned active_level;
+ /* Place-partition-var, offset and length into gomp_places_list array. */
+ unsigned place_partition_off;
+ unsigned place_partition_len;
+
#ifdef HAVE_SYNC_BUILTINS
/* Number of single stmts encountered. */
unsigned long single_count;
@@ -214,30 +219,40 @@ struct gomp_team_state
unsigned long static_trip;
};
-/* These are the OpenMP 3.0 Internal Control Variables described in
+struct target_mem_desc;
+
+/* These are the OpenMP 4.0 Internal Control Variables described in
section 2.3.1. Those described as having one copy per task are
stored within the structure; those described as having one copy
for the whole program are (naturally) global variables. */
-
+
struct gomp_task_icv
{
unsigned long nthreads_var;
enum gomp_schedule_type run_sched_var;
int run_sched_modifier;
+ int default_device_var;
+ unsigned int thread_limit_var;
bool dyn_var;
bool nest_var;
+ char bind_var;
+ /* Internal ICV. */
+ struct target_mem_desc *target_data;
};
extern struct gomp_task_icv gomp_global_icv;
-extern unsigned long gomp_thread_limit_var;
-extern unsigned long gomp_remaining_threads_count;
#ifndef HAVE_SYNC_BUILTINS
-extern gomp_mutex_t gomp_remaining_threads_lock;
+extern gomp_mutex_t gomp_managed_threads_lock;
#endif
extern unsigned long gomp_max_active_levels_var;
+extern bool gomp_cancel_var;
extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
extern unsigned long gomp_available_cpus, gomp_managed_threads;
extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
+extern char *gomp_bind_var_list;
+extern unsigned long gomp_bind_var_list_len;
+extern void **gomp_places_list;
+extern unsigned long gomp_places_list_len;
enum gomp_task_kind
{
@@ -247,6 +262,27 @@ enum gomp_task_kind
GOMP_TASK_TIED
};
+struct gomp_task;
+struct gomp_taskgroup;
+struct htab;
+
+struct gomp_task_depend_entry
+{
+ void *addr;
+ struct gomp_task_depend_entry *next;
+ struct gomp_task_depend_entry *prev;
+ struct gomp_task *task;
+ bool is_in;
+ bool redundant;
+};
+
+struct gomp_dependers_vec
+{
+ size_t n_elem;
+ size_t allocated;
+ struct gomp_task *elem[];
+};
+
/* This structure describes a "task" to be run by a thread. */
struct gomp_task
@@ -257,6 +293,13 @@ struct gomp_task
struct gomp_task *prev_child;
struct gomp_task *next_queue;
struct gomp_task *prev_queue;
+ struct gomp_task *next_taskgroup;
+ struct gomp_task *prev_taskgroup;
+ struct gomp_taskgroup *taskgroup;
+ struct gomp_dependers_vec *dependers;
+ struct htab *depend_hash;
+ size_t depend_count;
+ size_t num_dependees;
struct gomp_task_icv icv;
void (*fn) (void *);
void *fn_data;
@@ -264,7 +307,19 @@ struct gomp_task
bool in_taskwait;
bool in_tied_task;
bool final_task;
+ bool copy_ctors_done;
gomp_sem_t taskwait_sem;
+ struct gomp_task_depend_entry depend[];
+};
+
+struct gomp_taskgroup
+{
+ struct gomp_taskgroup *prev;
+ struct gomp_task *children;
+ bool in_taskgroup_wait;
+ bool cancelled;
+ gomp_sem_t taskgroup_sem;
+ size_t num_children;
};
/* This structure describes a "team" of threads. These are the threads
@@ -293,6 +348,12 @@ struct gomp_team
of the threads in the team. */
gomp_sem_t **ordered_release;
+ /* List of work shares on which gomp_fini_work_share hasn't been
+ called yet. If the team hasn't been cancelled, this should be
+ equal to each thr->ts.work_share, but otherwise it can be a possibly
+ long list of workshares. */
+ struct gomp_work_share *work_shares_to_free;
+
/* List of gomp_work_share structs chained through next_free fields.
This is populated and taken off only by the first thread in the
team encountering a new work sharing construct, in a critical
@@ -324,8 +385,20 @@ struct gomp_team
gomp_mutex_t task_lock;
struct gomp_task *task_queue;
- int task_count;
- int task_running_count;
+ /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */
+ unsigned int task_count;
+ /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */
+ unsigned int task_queued_count;
+ /* Number of GOMP_TASK_{WAITING,TIED} tasks currently running
+ directly in gomp_barrier_handle_tasks; tasks spawned
+ from e.g. GOMP_taskwait or GOMP_taskgroup_end don't count, even when
+ that is called from a task run from gomp_barrier_handle_tasks.
+ task_running_count should be always <= team->nthreads,
+ and if current task isn't in_tied_task, then it will be
+ even < team->nthreads. */
+ unsigned int task_running_count;
+ int work_share_cancelled;
+ int team_cancelled;
/* This array contains structures for implicit tasks. */
struct gomp_task implicit_task[];
@@ -350,7 +423,11 @@ struct gomp_thread
/* This semaphore is used for ordered loops. */
gomp_sem_t release;
- /* user pthread thread pool */
+ /* Place this thread is bound to plus one, or zero if not bound
+ to any place. */
+ unsigned int place;
+
+ /* User pthread thread pool */
struct gomp_thread_pool *thread_pool;
};
@@ -363,11 +440,23 @@ struct gomp_thread_pool
unsigned threads_size;
unsigned threads_used;
struct gomp_team *last_team;
+ /* Number of threads running in this contention group. */
+ unsigned long threads_busy;
/* This barrier holds and releases threads waiting in threads. */
gomp_barrier_t threads_dock;
};
+enum gomp_cancel_kind
+{
+ GOMP_CANCEL_PARALLEL = 1,
+ GOMP_CANCEL_LOOP = 2,
+ GOMP_CANCEL_FOR = GOMP_CANCEL_LOOP,
+ GOMP_CANCEL_DO = GOMP_CANCEL_LOOP,
+ GOMP_CANCEL_SECTIONS = 4,
+ GOMP_CANCEL_TASKGROUP = 8
+};
+
/* ... and here is that TLS data. */
#ifdef HAVE_TLS
@@ -402,17 +491,22 @@ static inline struct gomp_task_icv *gomp
/* The attributes to be used during thread creation. */
extern pthread_attr_t gomp_thread_attr;
-/* Other variables. */
-
-extern unsigned short *gomp_cpu_affinity;
-extern size_t gomp_cpu_affinity_len;
-
/* Function prototypes. */
/* affinity.c */
extern void gomp_init_affinity (void);
-extern void gomp_init_thread_affinity (pthread_attr_t *);
+extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int);
+extern void **gomp_affinity_alloc (unsigned long, bool);
+extern void gomp_affinity_init_place (void *);
+extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long,
+ long, bool);
+extern bool gomp_affinity_remove_cpu (void *, unsigned long);
+extern bool gomp_affinity_copy_place (void *, void *, long);
+extern bool gomp_affinity_same_place (void *, void *);
+extern bool gomp_affinity_finalize_place_list (bool);
+extern bool gomp_affinity_init_level (int, unsigned long, bool);
+extern void gomp_affinity_print_place (void *);
/* alloc.c */
@@ -486,6 +580,8 @@ extern void gomp_barrier_handle_tasks (g
static void inline
gomp_finish_task (struct gomp_task *task)
{
+ if (__builtin_expect (task->depend_hash != NULL, 0))
+ free (task->depend_hash);
gomp_sem_destroy (&task->taskwait_sem);
}
@@ -493,8 +589,13 @@ gomp_finish_task (struct gomp_task *task
extern struct gomp_team *gomp_new_team (unsigned);
extern void gomp_team_start (void (*) (void *), void *, unsigned,
- struct gomp_team *);
+ unsigned, struct gomp_team *);
extern void gomp_team_end (void);
+extern void gomp_free_thread (void *);
+
+/* target.c */
+
+extern int gomp_get_num_devices (void);
/* work.c */
@@ -502,6 +603,7 @@ extern void gomp_init_work_share (struct
extern void gomp_fini_work_share (struct gomp_work_share *);
extern bool gomp_work_share_start (bool);
extern void gomp_work_share_end (void);
+extern bool gomp_work_share_end_cancel (void);
extern void gomp_work_share_end_nowait (void);
static inline void
@@ -580,11 +682,19 @@ extern int gomp_test_nest_lock_25 (omp_n
#endif
#ifdef HAVE_ATTRIBUTE_ALIAS
+# define ialias_ulp ialias_str1(__USER_LABEL_PREFIX__)
+# define ialias_str1(x) ialias_str2(x)
+# define ialias_str2(x) #x
# define ialias(fn) \
extern __typeof (fn) gomp_ialias_##fn \
__attribute__ ((alias (#fn))) attribute_hidden;
+# define ialias_redirect(fn) \
+ extern __typeof (fn) fn __asm__ (ialias_ulp "gomp_ialias_" #fn) attribute_hidden;
+# define ialias_call(fn) gomp_ialias_ ## fn
#else
# define ialias(fn)
+# define ialias_redirect(fn)
+# define ialias_call(fn) fn
#endif
#endif /* LIBGOMP_H */
@@ -113,6 +113,27 @@ OMP_3.1 {
omp_in_final_;
} OMP_3.0;
+OMP_4.0 {
+ global:
+ omp_get_cancellation;
+ omp_get_cancellation_;
+ omp_get_proc_bind;
+ omp_get_proc_bind_;
+ omp_set_default_device;
+ omp_set_default_device_;
+ omp_set_default_device_8_;
+ omp_get_default_device;
+ omp_get_default_device_;
+ omp_get_num_devices;
+ omp_get_num_devices_;
+ omp_get_num_teams;
+ omp_get_num_teams_;
+ omp_get_team_num;
+ omp_get_team_num_;
+ omp_is_initial_device;
+ omp_is_initial_device_;
+} OMP_3.1;
+
GOMP_1.0 {
global:
GOMP_atomic_end;
@@ -184,3 +205,25 @@ GOMP_3.0 {
global:
GOMP_taskyield;
} GOMP_2.0;
+
+GOMP_4.0 {
+ global:
+ GOMP_barrier_cancel;
+ GOMP_cancel;
+ GOMP_cancellation_point;
+ GOMP_loop_end_cancel;
+ GOMP_parallel_loop_dynamic;
+ GOMP_parallel_loop_guided;
+ GOMP_parallel_loop_runtime;
+ GOMP_parallel_loop_static;
+ GOMP_parallel_sections;
+ GOMP_parallel;
+ GOMP_sections_end_cancel;
+ GOMP_taskgroup_start;
+ GOMP_taskgroup_end;
+ GOMP_target;
+ GOMP_target_data;
+ GOMP_target_end_data;
+ GOMP_target_update;
+ GOMP_teams;
+} GOMP_3.0;
@@ -1083,12 +1083,9 @@ guaranteed not to change during the exec
@node Environment Variables
@chapter Environment Variables
-The variables @env{OMP_DYNAMIC}, @env{OMP_MAX_ACTIVE_LEVELS},
-@env{OMP_NESTED}, @env{OMP_NUM_THREADS}, @env{OMP_SCHEDULE},
-@env{OMP_STACKSIZE},@env{OMP_THREAD_LIMIT} and @env{OMP_WAIT_POLICY}
-are defined by section 4 of the OpenMP specifications in version 3.1,
-while @env{GOMP_CPU_AFFINITY} and @env{GOMP_STACKSIZE} are GNU
-extensions.
+The environment variables which beginning with @env{OMP_} are defined by
+section 4 of the OpenMP specification in version 4.0, while those
+beginning with @env{GOMP_} are GNU extensions.
@menu
* OMP_DYNAMIC:: Dynamic adjustment of threads
@@ -1099,9 +1096,11 @@ extensions.
* OMP_SCHEDULE:: How threads are scheduled
* OMP_THREAD_LIMIT:: Set the maximum number of threads
* OMP_WAIT_POLICY:: How waiting threads are handled
+* OMP_DISPLAY_ENV:: Show OpenMP version and environment variables
* OMP_PROC_BIND:: Whether theads may be moved between CPUs
* GOMP_CPU_AFFINITY:: Bind threads to specific CPUs
* GOMP_STACKSIZE:: Set default thread stack size
+* GOMP_SPINCOUNT:: Set the busy-wait spin count
@end menu
@@ -1119,7 +1118,7 @@ disabled by default.
@ref{omp_set_dynamic}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.3
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.3
@end table
@@ -1137,7 +1136,7 @@ If undefined, the number of active level
@ref{omp_set_max_active_levels}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.8
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.9
@end table
@@ -1157,7 +1156,7 @@ regions are disabled by default.
@ref{omp_set_nested}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.5
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.6
@end table
@@ -1177,7 +1176,7 @@ level. If undefined one thread per CPU i
@ref{omp_set_num_threads}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.2
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.2
@end table
@@ -1198,7 +1197,7 @@ dynamic scheduling and a chunk size of 1
@ref{omp_set_schedule}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 2.5.1 and 4.1
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, sections 2.7.1 and 4.1
@end table
@@ -1218,7 +1217,7 @@ stack size is left unchanged. If undefin
dependent.
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.6
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.7
@end table
@@ -1237,7 +1236,7 @@ the number of threads is not limited.
@ref{omp_get_thread_limit}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.9
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.10
@end table
@@ -1250,10 +1249,14 @@ the number of threads is not limited.
Specifies whether waiting threads should be active or passive. If
the value is @code{PASSIVE}, waiting threads should not consume CPU
power while waiting; while the value is @code{ACTIVE} specifies that
-they should.
+they should. If undefined, threads wait actively for a short time
+before waiting passively.
+
+@item @emph{See also}:
+@ref{GOMP_SPINCOUNT}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.7
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.8
@end table
@@ -1264,14 +1267,32 @@ they should.
@table @asis
@item @emph{Description}:
Specifies whether threads may be moved between processors. If set to
-@code{true}, OpenMP theads should not be moved, if set to @code{false}
-they may be moved.
+@code{TRUE}, OpenMP theads should not be moved, if set to @code{FALSE}
+they may be moved. If undefined, threads may move between processors.
@item @emph{See also}:
@ref{GOMP_CPU_AFFINITY}
@item @emph{Reference}:
-@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.4
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.4
+@end table
+
+
+
+@node OMP_DISPLAY_ENV
+@section @env{OMP_DISPLAY_ENV} -- Show OpenMP version and environment variables
+@cindex Environment Variable
+@table @asis
+@item @emph{Description}:
+If set to @code{TRUE}, the OpenMP version number and the values
+associated with the OpenMP environment variables are printed to @code{stderr}.
+If set to @code{VERBOSE}, it additionally shows the value of the environment
+variables which are GNU extensions. If undefined or set to @code{FALSE},
+this information will not be shown.
+
+
+@item @emph{Reference}:
+@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.12
@end table
@@ -1298,7 +1319,7 @@ Fortran, may be used to query the settin
environment variable. A defined CPU affinity on startup cannot be changed
or disabled during the runtime of the application.
-If this environment variable is omitted, the host system will handle the
+If this environment variable is omitted, the host system will handle the
assignment of threads to CPUs.
@item @emph{See also}:
@@ -1330,6 +1351,33 @@ GCC Patches Mailinglist}
@end table
+
+@node GOMP_SPINCOUNT
+@section @env{GOMP_SPINCOUNT} -- Set the busy-wait spin count
+@cindex Environment Variable
+@cindex Implementation specific setting
+@table @asis
+@item @emph{Description}:
+Determines how long a threads waits actively with consuming CPU power
+before waiting passively without consuming CPU power. The value may be
+either @code{INFINITE}, @code{INFINITY} to always wait actively or an
+integer which gives the number of spins of the busy-wait loop. The
+integer may optionally be followed by the following suffixes acting
+as multiplication factors: @code{k} (kilo, thousand), @code{M} (mega,
+million), @code{G} (giga, billion), or @code{T} (tera, trillion).
+If undefined, 0 is used when @env{OMP_WAIT_POLICY} is @code{PASSIVE},
+300,000 is used when @env{OMP_WAIT_POLICY} is undefined and
+30 billion is used when @env{OMP_WAIT_POLICY} is @code{ACTIVE}.
+If there are more OpenMP threads than available CPUs, 1000 and 100
+spins are used for @env{OMP_WAIT_POLICY} being @code{ACTIVE} or
+undefined, respectively; unless the @env{GOMP_SPINCOUNT} is lower
+or @env{OMP_WAIT_POLICY} is @code{PASSIVE}.
+
+@item @emph{See also}:
+@ref{OMP_WAIT_POLICY}
+@end table
+
+
@c ---------------------------------------------------------------------
@c The libgomp ABI
@@ -33,6 +33,7 @@
/* barrier.c */
extern void GOMP_barrier (void);
+extern bool GOMP_barrier_cancel (void);
/* critical.c */
@@ -76,9 +77,22 @@ extern void GOMP_parallel_loop_guided_st
unsigned, long, long, long, long);
extern void GOMP_parallel_loop_runtime_start (void (*)(void *), void *,
unsigned, long, long, long);
+extern void GOMP_parallel_loop_static (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+extern void GOMP_parallel_loop_dynamic (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+extern void GOMP_parallel_loop_guided (void (*)(void *), void *,
+ unsigned, long, long, long, long,
+ unsigned);
+extern void GOMP_parallel_loop_runtime (void (*)(void *), void *,
+ unsigned, long, long, long,
+ unsigned);
extern void GOMP_loop_end (void);
extern void GOMP_loop_end_nowait (void);
+extern bool GOMP_loop_end_cancel (void);
/* loop_ull.c */
@@ -157,13 +171,18 @@ extern void GOMP_ordered_end (void);
extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
extern void GOMP_parallel_end (void);
+extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
+extern bool GOMP_cancel (int, bool);
+extern bool GOMP_cancellation_point (int);
/* task.c */
extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
- long, long, bool, unsigned);
+ long, long, bool, unsigned, void **);
extern void GOMP_taskwait (void);
extern void GOMP_taskyield (void);
+extern void GOMP_taskgroup_start (void);
+extern void GOMP_taskgroup_end (void);
/* sections.c */
@@ -171,8 +190,11 @@ extern unsigned GOMP_sections_start (uns
extern unsigned GOMP_sections_next (void);
extern void GOMP_parallel_sections_start (void (*) (void *), void *,
unsigned, unsigned);
+extern void GOMP_parallel_sections (void (*) (void *), void *,
+ unsigned, unsigned, unsigned);
extern void GOMP_sections_end (void);
extern void GOMP_sections_end_nowait (void);
+extern bool GOMP_sections_end_cancel (void);
/* single.c */
@@ -180,4 +202,15 @@ extern bool GOMP_single_start (void);
extern void *GOMP_single_copy_start (void);
extern void GOMP_single_copy_end (void *);
+/* target.c */
+
+extern void GOMP_target (int, void (*) (void *), const void *,
+ size_t, void **, size_t *, unsigned char *);
+extern void GOMP_target_data (int, const void *,
+ size_t, void **, size_t *, unsigned char *);
+extern void GOMP_target_end_data (void);
+extern void GOMP_target_update (int, const void *,
+ size_t, void **, size_t *, unsigned char *);
+extern void GOMP_teams (unsigned int, unsigned int);
+
#endif /* LIBGOMP_G_H */
@@ -439,14 +439,14 @@ static void
gomp_parallel_loop_start (void (*fn) (void *), void *data,
unsigned num_threads, long start, long end,
long incr, enum gomp_schedule_type sched,
- long chunk_size)
+ long chunk_size, unsigned int flags)
{
struct gomp_team *team;
num_threads = gomp_resolve_num_threads (num_threads, 0);
team = gomp_new_team (num_threads);
gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
- gomp_team_start (fn, data, num_threads, team);
+ gomp_team_start (fn, data, num_threads, flags, team);
}
void
@@ -455,7 +455,7 @@ GOMP_parallel_loop_static_start (void (*
long incr, long chunk_size)
{
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- GFS_STATIC, chunk_size);
+ GFS_STATIC, chunk_size, 0);
}
void
@@ -464,7 +464,7 @@ GOMP_parallel_loop_dynamic_start (void (
long incr, long chunk_size)
{
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- GFS_DYNAMIC, chunk_size);
+ GFS_DYNAMIC, chunk_size, 0);
}
void
@@ -473,7 +473,7 @@ GOMP_parallel_loop_guided_start (void (*
long incr, long chunk_size)
{
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- GFS_GUIDED, chunk_size);
+ GFS_GUIDED, chunk_size, 0);
}
void
@@ -483,11 +483,59 @@ GOMP_parallel_loop_runtime_start (void (
{
struct gomp_task_icv *icv = gomp_icv (false);
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
- icv->run_sched_var, icv->run_sched_modifier);
+ icv->run_sched_var, icv->run_sched_modifier, 0);
+}
+
+ialias_redirect (GOMP_parallel_end)
+
+void
+GOMP_parallel_loop_static (void (*fn) (void *), void *data,
+ unsigned num_threads, long start, long end,
+ long incr, long chunk_size, unsigned flags)
+{
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+ GFS_STATIC, chunk_size, flags);
+ fn (data);
+ GOMP_parallel_end ();
+}
+
+void
+GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
+ unsigned num_threads, long start, long end,
+ long incr, long chunk_size, unsigned flags)
+{
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+ GFS_DYNAMIC, chunk_size, flags);
+ fn (data);
+ GOMP_parallel_end ();
+}
+
+void
+GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
+ unsigned num_threads, long start, long end,
+ long incr, long chunk_size, unsigned flags)
+{
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+ GFS_GUIDED, chunk_size, flags);
+ fn (data);
+ GOMP_parallel_end ();
+}
+
+void
+GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
+ unsigned num_threads, long start, long end,
+ long incr, unsigned flags)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+ icv->run_sched_var, icv->run_sched_modifier,
+ flags);
+ fn (data);
+ GOMP_parallel_end ();
}
/* The GOMP_loop_end* routines are called after the thread is told that
- all loop iterations are complete. This first version synchronizes
+ all loop iterations are complete. The first two versions synchronize
all threads; the nowait version does not. */
void
@@ -496,6 +544,12 @@ GOMP_loop_end (void)
gomp_work_share_end ();
}
+bool
+GOMP_loop_end_cancel (void)
+{
+ return gomp_work_share_end_cancel ();
+}
+
void
GOMP_loop_end_nowait (void)
{
@@ -52,6 +52,15 @@ typedef enum omp_sched_t
omp_sched_auto = 4
} omp_sched_t;
+typedef enum omp_proc_bind_t
+{
+ omp_proc_bind_false = 0,
+ omp_proc_bind_true = 1,
+ omp_proc_bind_master = 2,
+ omp_proc_bind_close = 3,
+ omp_proc_bind_spread = 4
+} omp_proc_bind_t;
+
#ifdef __cplusplus
extern "C" {
# define __GOMP_NOTHROW throw ()
@@ -88,17 +97,28 @@ extern int omp_test_nest_lock (omp_nest_
extern double omp_get_wtime (void) __GOMP_NOTHROW;
extern double omp_get_wtick (void) __GOMP_NOTHROW;
-void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW;
-void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW;
-int omp_get_thread_limit (void) __GOMP_NOTHROW;
-void omp_set_max_active_levels (int) __GOMP_NOTHROW;
-int omp_get_max_active_levels (void) __GOMP_NOTHROW;
-int omp_get_level (void) __GOMP_NOTHROW;
-int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW;
-int omp_get_team_size (int) __GOMP_NOTHROW;
-int omp_get_active_level (void) __GOMP_NOTHROW;
+extern void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW;
+extern void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW;
+extern int omp_get_thread_limit (void) __GOMP_NOTHROW;
+extern void omp_set_max_active_levels (int) __GOMP_NOTHROW;
+extern int omp_get_max_active_levels (void) __GOMP_NOTHROW;
+extern int omp_get_level (void) __GOMP_NOTHROW;
+extern int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW;
+extern int omp_get_team_size (int) __GOMP_NOTHROW;
+extern int omp_get_active_level (void) __GOMP_NOTHROW;
+
+extern int omp_in_final (void) __GOMP_NOTHROW;
+
+extern int omp_get_cancellation (void) __GOMP_NOTHROW;
+extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
+
+extern void omp_set_default_device (int) __GOMP_NOTHROW;
+extern int omp_get_default_device (void) __GOMP_NOTHROW;
+extern int omp_get_num_devices (void) __GOMP_NOTHROW;
+extern int omp_get_num_teams (void) __GOMP_NOTHROW;
+extern int omp_get_team_num (void) __GOMP_NOTHROW;
-int omp_in_final (void) __GOMP_NOTHROW;
+extern int omp_is_initial_device (void) __GOMP_NOTHROW;
#ifdef __cplusplus
}
@@ -27,16 +27,22 @@
integer, parameter :: omp_lock_kind = @OMP_LOCK_KIND@
integer, parameter :: omp_nest_lock_kind = @OMP_NEST_LOCK_KIND@
integer, parameter :: omp_sched_kind = 4
+ integer, parameter :: omp_proc_bind_kind = 4
+ integer (omp_sched_kind), parameter :: omp_sched_static = 1
+ integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2
+ integer (omp_sched_kind), parameter :: omp_sched_guided = 3
+ integer (omp_sched_kind), parameter :: omp_sched_auto = 4
+ integer (omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
+ integer (omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
+ integer (omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
+ integer (omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
+ integer (omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
end module
module omp_lib
use omp_lib_kinds
implicit none
integer, parameter :: openmp_version = 201107
- integer (omp_sched_kind), parameter :: omp_sched_static = 1
- integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2
- integer (omp_sched_kind), parameter :: omp_sched_guided = 3
- integer (omp_sched_kind), parameter :: omp_sched_auto = 4
interface
subroutine omp_init_lock (svar)
@@ -123,21 +129,18 @@
interface
function omp_get_dynamic ()
- use omp_lib_kinds
logical (4) :: omp_get_dynamic
end function omp_get_dynamic
end interface
interface
function omp_get_nested ()
- use omp_lib_kinds
logical (4) :: omp_get_nested
end function omp_get_nested
end interface
interface
function omp_in_parallel ()
- use omp_lib_kinds
logical (4) :: omp_in_parallel
end function omp_in_parallel
end interface
@@ -152,28 +155,24 @@
interface
function omp_get_max_threads ()
- use omp_lib_kinds
integer (4) :: omp_get_max_threads
end function omp_get_max_threads
end interface
interface
function omp_get_num_procs ()
- use omp_lib_kinds
integer (4) :: omp_get_num_procs
end function omp_get_num_procs
end interface
interface
function omp_get_num_threads ()
- use omp_lib_kinds
integer (4) :: omp_get_num_threads
end function omp_get_num_threads
end interface
interface
function omp_get_thread_num ()
- use omp_lib_kinds
integer (4) :: omp_get_thread_num
end function omp_get_thread_num
end interface
@@ -226,44 +225,37 @@
interface
function omp_get_thread_limit ()
- use omp_lib_kinds
integer (4) :: omp_get_thread_limit
end function omp_get_thread_limit
end interface
interface omp_set_max_active_levels
subroutine omp_set_max_active_levels (max_levels)
- use omp_lib_kinds
integer (4), intent (in) :: max_levels
end subroutine omp_set_max_active_levels
subroutine omp_set_max_active_levels_8 (max_levels)
- use omp_lib_kinds
integer (8), intent (in) :: max_levels
end subroutine omp_set_max_active_levels_8
end interface
interface
function omp_get_max_active_levels ()
- use omp_lib_kinds
integer (4) :: omp_get_max_active_levels
end function omp_get_max_active_levels
end interface
interface
function omp_get_level ()
- use omp_lib_kinds
integer (4) :: omp_get_level
end function omp_get_level
end interface
interface omp_get_ancestor_thread_num
function omp_get_ancestor_thread_num (level)
- use omp_lib_kinds
integer (4), intent (in) :: level
integer (4) :: omp_get_ancestor_thread_num
end function omp_get_ancestor_thread_num
function omp_get_ancestor_thread_num_8 (level)
- use omp_lib_kinds
integer (8), intent (in) :: level
integer (4) :: omp_get_ancestor_thread_num_8
end function omp_get_ancestor_thread_num_8
@@ -271,12 +263,10 @@
interface omp_get_team_size
function omp_get_team_size (level)
- use omp_lib_kinds
integer (4), intent (in) :: level
integer (4) :: omp_get_team_size
end function omp_get_team_size
function omp_get_team_size_8 (level)
- use omp_lib_kinds
integer (8), intent (in) :: level
integer (4) :: omp_get_team_size_8
end function omp_get_team_size_8
@@ -284,16 +274,66 @@
interface
function omp_get_active_level ()
- use omp_lib_kinds
integer (4) :: omp_get_active_level
end function omp_get_active_level
end interface
interface
function omp_in_final ()
- use omp_lib_kinds
logical (4) :: omp_in_final
end function omp_in_final
end interface
+ interface
+ function omp_get_cancellation ()
+ logical (4) :: omp_get_cancellation
+ end function omp_get_cancellation
+ end interface
+
+ interface
+ function omp_get_proc_bind ()
+ use omp_lib_kinds
+ integer (omp_proc_bind_kind) :: omp_get_proc_bind
+ end function omp_get_proc_bind
+ end interface
+
+ interface omp_set_default_device
+ subroutine omp_set_default_device (device_num)
+ integer (4), intent (in) :: device_num
+ end subroutine omp_set_default_device
+ subroutine omp_set_default_device_8 (device_num)
+ integer (8), intent (in) :: device_num
+ end subroutine omp_set_default_device_8
+ end interface
+
+ interface
+ function omp_get_default_device ()
+ integer (4) :: omp_get_default_device
+ end function omp_get_default_device
+ end interface
+
+ interface
+ function omp_get_num_devices ()
+ integer (4) :: omp_get_num_devices
+ end function omp_get_num_devices
+ end interface
+
+ interface
+ function omp_get_num_teams ()
+ integer (4) :: omp_get_num_teams
+ end function omp_get_num_teams
+ end interface
+
+ interface
+ function omp_get_team_num ()
+ integer (4) :: omp_get_team_num
+ end function omp_get_team_num
+ end interface
+
+ interface
+ function omp_is_initial_device ()
+ logical (4) :: omp_is_initial_device
+ end function omp_is_initial_device
+ end interface
+
end module omp_lib
@@ -33,6 +33,18 @@
parameter (omp_sched_dynamic = 2)
parameter (omp_sched_guided = 3)
parameter (omp_sched_auto = 4)
+ integer omp_proc_bind_kind
+ parameter (omp_proc_bind_kind = 4)
+ integer (omp_proc_bind_kind) omp_proc_bind_false
+ integer (omp_proc_bind_kind) omp_proc_bind_true
+ integer (omp_proc_bind_kind) omp_proc_bind_master
+ integer (omp_proc_bind_kind) omp_proc_bind_close
+ integer (omp_proc_bind_kind) omp_proc_bind_spread
+ parameter (omp_proc_bind_false = 0)
+ parameter (omp_proc_bind_true = 1)
+ parameter (omp_proc_bind_master = 2)
+ parameter (omp_proc_bind_close = 3)
+ parameter (omp_proc_bind_spread = 4)
parameter (openmp_version = 201107)
external omp_init_lock, omp_init_nest_lock
@@ -68,3 +80,18 @@
external omp_in_final
logical(4) omp_in_final
+
+ external omp_get_cancelllation
+ logical(4) omp_get_cancelllation
+
+ external omp_get_proc_bind
+ integer(omp_proc_bind_kind) omp_get_proc_bind
+
+ external omp_set_default_device, omp_get_default_device
+ external omp_get_num_devices, omp_get_num_teams
+ external omp_get_team_num
+ integer(4) omp_get_default_device, omp_get_num_devices
+ integer(4) omp_get_num_teams, omp_get_team_num
+
+ external omp_is_initial_device
+ logical(4) omp_is_initial_device
@@ -37,18 +37,19 @@
unsigned
gomp_resolve_num_threads (unsigned specified, unsigned count)
{
- struct gomp_thread *thread = gomp_thread();
+ struct gomp_thread *thr = gomp_thread ();
struct gomp_task_icv *icv;
unsigned threads_requested, max_num_threads, num_threads;
- unsigned long remaining;
+ unsigned long busy;
+ struct gomp_thread_pool *pool;
icv = gomp_icv (false);
if (specified == 1)
return 1;
- else if (thread->ts.active_level >= 1 && !icv->nest_var)
+ else if (thr->ts.active_level >= 1 && !icv->nest_var)
return 1;
- else if (thread->ts.active_level >= gomp_max_active_levels_var)
+ else if (thr->ts.active_level >= gomp_max_active_levels_var)
return 1;
/* If NUM_THREADS not specified, use nthreads_var. */
@@ -72,30 +73,46 @@ gomp_resolve_num_threads (unsigned speci
max_num_threads = count;
}
- /* ULONG_MAX stands for infinity. */
- if (__builtin_expect (gomp_thread_limit_var == ULONG_MAX, 1)
+ /* UINT_MAX stands for infinity. */
+ if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
|| max_num_threads == 1)
return max_num_threads;
+ /* The threads_busy counter lives in thread_pool, if there
+ isn't a thread_pool yet, there must be just one thread
+ in the contention group. If thr->team is NULL, this isn't
+ nested parallel, so there is just one thread in the
+ contention group as well, no need to handle it atomically. */
+ pool = thr->thread_pool;
+ if (thr->ts.team == NULL)
+ {
+ num_threads = max_num_threads;
+ if (num_threads > icv->thread_limit_var)
+ num_threads = icv->thread_limit_var;
+ if (pool)
+ pool->threads_busy = num_threads;
+ return num_threads;
+ }
+
#ifdef HAVE_SYNC_BUILTINS
do
{
- remaining = gomp_remaining_threads_count;
+ busy = pool->threads_busy;
num_threads = max_num_threads;
- if (num_threads > remaining)
- num_threads = remaining + 1;
+ if (icv->thread_limit_var - busy + 1 < num_threads)
+ num_threads = icv->thread_limit_var - busy + 1;
}
- while (__sync_val_compare_and_swap (&gomp_remaining_threads_count,
- remaining, remaining - num_threads + 1)
- != remaining);
+ while (__sync_val_compare_and_swap (&pool->threads_busy,
+ busy, busy + num_threads - 1)
+ != busy);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
num_threads = max_num_threads;
- remaining = gomp_remaining_threads_count;
- if (num_threads > remaining)
- num_threads = remaining + 1;
- gomp_remaining_threads_count -= num_threads - 1;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ busy = pool->threads_busy;
+ if (icv->thread_limit_var - busy + 1 < num_threads)
+ num_threads = icv->thread_limit_var - busy + 1;
+ pool->threads_busy += num_threads - 1;
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
return num_threads;
@@ -105,31 +122,113 @@ void
GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
{
num_threads = gomp_resolve_num_threads (num_threads, 0);
- gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads));
+ gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
}
void
GOMP_parallel_end (void)
{
- if (__builtin_expect (gomp_thread_limit_var != ULONG_MAX, 0))
+ struct gomp_task_icv *icv = gomp_icv (false);
+ if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
- if (team && team->nthreads > 1)
+ unsigned int nthreads = team ? team->nthreads : 1;
+ gomp_team_end ();
+ if (nthreads > 1)
{
+ /* If not nested, there is just one thread in the
+ contention group left, no need for atomicity. */
+ if (thr->ts.team == NULL)
+ thr->thread_pool->threads_busy = 1;
+ else
+ {
#ifdef HAVE_SYNC_BUILTINS
- __sync_fetch_and_add (&gomp_remaining_threads_count,
- 1UL - team->nthreads);
+ __sync_fetch_and_add (&thr->thread_pool->threads_busy,
+ 1UL - nthreads);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
- gomp_remaining_threads_count -= team->nthreads - 1;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
+ thr->thread_pool->threads_busy -= nthreads - 1;
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
+ }
}
}
- gomp_team_end ();
+ else
+ gomp_team_end ();
+}
+ialias (GOMP_parallel_end)
+
+void
+GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
+{
+ num_threads = gomp_resolve_num_threads (num_threads, 0);
+ gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
+ fn (data);
+ ialias_call (GOMP_parallel_end) ();
+}
+
+bool
+GOMP_cancellation_point (int which)
+{
+ if (!gomp_cancel_var)
+ return false;
+
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
+ {
+ if (team == NULL)
+ return false;
+ return team->work_share_cancelled != 0;
+ }
+ else if (which & GOMP_CANCEL_TASKGROUP)
+ {
+ if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
+ return true;
+ /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
+ as #pragma omp cancel parallel also cancels all explicit
+ tasks. */
+ }
+ if (team)
+ return gomp_team_barrier_cancelled (&team->barrier);
+ return false;
}
+ialias (GOMP_cancellation_point)
+
+bool
+GOMP_cancel (int which, bool do_cancel)
+{
+ if (!gomp_cancel_var)
+ return false;
+
+ if (!do_cancel)
+ return ialias_call (GOMP_cancellation_point) (which);
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
+ {
+ /* In orphaned worksharing region, all we want to cancel
+ is current thread. */
+ if (team != NULL)
+ team->work_share_cancelled = 1;
+ return true;
+ }
+ else if (which & GOMP_CANCEL_TASKGROUP)
+ {
+ if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ thr->task->taskgroup->cancelled = true;
+ gomp_mutex_unlock (&team->task_lock);
+ }
+ return true;
+ }
+ team->team_cancelled = 1;
+ gomp_team_barrier_cancel (team);
+ return true;
+}
/* The public OpenMP API for thread and team related inquiries. */
@@ -139,11 +139,27 @@ GOMP_parallel_sections_start (void (*fn)
num_threads = gomp_resolve_num_threads (num_threads, count);
team = gomp_new_team (num_threads);
gomp_sections_init (&team->work_shares[0], count);
- gomp_team_start (fn, data, num_threads, team);
+ gomp_team_start (fn, data, num_threads, 0, team);
+}
+
+ialias_redirect (GOMP_parallel_end)
+
+void
+GOMP_parallel_sections (void (*fn) (void *), void *data,
+ unsigned num_threads, unsigned count, unsigned flags)
+{
+ struct gomp_team *team;
+
+ num_threads = gomp_resolve_num_threads (num_threads, count);
+ team = gomp_new_team (num_threads);
+ gomp_sections_init (&team->work_shares[0], count);
+ gomp_team_start (fn, data, num_threads, flags, team);
+ fn (data);
+ GOMP_parallel_end ();
}
/* The GOMP_section_end* routines are called after the thread is told
- that all sections are complete. This first version synchronizes
+ that all sections are complete. The first two versions synchronize
all threads; the nowait version does not. */
void
@@ -152,6 +168,12 @@ GOMP_sections_end (void)
gomp_work_share_end ();
}
+bool
+GOMP_sections_end_cancel (void)
+{
+ return gomp_work_share_end_cancel ();
+}
+
void
GOMP_sections_end_nowait (void)
{
@@ -0,0 +1,96 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+ This file is part of the GNU OpenMP Library (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles the maintainence of threads in response to team
+ creation and termination. */
+
+#include "libgomp.h"
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+attribute_hidden int
+gomp_get_num_devices (void)
+{
+ return 0;
+}
+
+/* Called when encountering a target directive. If DEVICE
+ is -1, it means use device-var ICV. If it is -2 (or any other value
+ larger than last available hw device, use host fallback.
+ FN is address of host code, OPENMP_TARGET contains value of the
+ __OPENMP_TARGET__ symbol in the shared library or binary that invokes
+ GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
+ with MAPNUM entries, with addresses of the host objects,
+ sizes of the host objects (resp. for pointer kind pointer bias
+ and assumed sizeof (void *) size) and kinds. */
+
+void
+GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned char *kinds)
+{
+ /* Host fallback. */
+ struct gomp_thread old_thr, *thr = gomp_thread ();
+ old_thr = *thr;
+ memset (thr, '\0', sizeof (*thr));
+ if (gomp_places_list)
+ {
+ thr->place = old_thr.place;
+ thr->ts.place_partition_len = gomp_places_list_len;
+ }
+ fn (hostaddrs);
+ gomp_free_thread (thr);
+ *thr = old_thr;
+}
+
+void
+GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+}
+
+void
+GOMP_target_end_data (void)
+{
+}
+
+void
+GOMP_target_update (int device, const void *openmp_target, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+}
+
+void
+GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+{
+ if (thread_limit)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->thread_limit_var
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+ }
+ (void) num_teams;
+}
@@ -29,6 +29,33 @@
#include <stdlib.h>
#include <string.h>
+typedef struct gomp_task_depend_entry *hash_entry_type;
+
+static inline void *
+htab_alloc (size_t size)
+{
+ return gomp_malloc (size);
+}
+
+static inline void
+htab_free (void *ptr)
+{
+ free (ptr);
+}
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+ return hash_pointer (element->addr);
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+ return x->addr == y->addr;
+}
/* Create a new task data structure. */
@@ -42,7 +69,12 @@ gomp_init_task (struct gomp_task *task,
task->in_taskwait = false;
task->in_tied_task = false;
task->final_task = false;
+ task->copy_ctors_done = false;
task->children = NULL;
+ task->taskgroup = NULL;
+ task->dependers = NULL;
+ task->depend_hash = NULL;
+ task->depend_count = 0;
gomp_sem_init (&task->taskwait_sem, 0);
}
@@ -78,7 +110,8 @@ gomp_clear_parent (struct gomp_task *chi
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
- long arg_size, long arg_align, bool if_clause, unsigned flags)
+ long arg_size, long arg_align, bool if_clause, unsigned flags,
+ void **depend)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
@@ -94,17 +127,58 @@ GOMP_task (void (*fn) (void *), void *da
flags &= ~1;
#endif
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
if (!if_clause || team == NULL
|| (thr->task && thr->task->final_task)
|| team->task_count > 64 * team->nthreads)
{
struct gomp_task task;
+ /* If there are depend clauses and earlier deferred sibling tasks
+ with depend clauses, check if there isn't a dependency. If there
+ is, fall through to the deferred task handling, as we can't
+ schedule such tasks right away. There is no need to handle
+ depend clauses for non-deferred tasks other than this, because
+ the parent task is suspended until the child task finishes and thus
+ it can't start further child tasks. */
+ if ((flags & 8) && thr->task && thr->task->depend_hash)
+ {
+ struct gomp_task *parent = thr->task;
+ struct gomp_task_depend_entry elem, *ent = NULL;
+ size_t ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ gomp_mutex_lock (&team->task_lock);
+ for (i = 0; i < ndepend; i++)
+ {
+ elem.addr = depend[i + 2];
+ ent = htab_find (parent->depend_hash, &elem);
+ for (; ent; ent = ent->next)
+ if (i >= nout && ent->is_in)
+ continue;
+ else
+ break;
+ if (ent)
+ break;
+ }
+ gomp_mutex_unlock (&team->task_lock);
+ if (ent)
+ goto defer;
+ }
+
gomp_init_task (&task, thr->task, gomp_icv (false));
task.kind = GOMP_TASK_IFFALSE;
task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
if (thr->task)
- task.in_tied_task = thr->task->in_tied_task;
+ {
+ task.in_tied_task = thr->task->in_tied_task;
+ task.taskgroup = thr->task->taskgroup;
+ }
thr->task = &task;
if (__builtin_expect (cpyfn != NULL, 0))
{
@@ -135,29 +209,161 @@ GOMP_task (void (*fn) (void *), void *da
}
else
{
+ defer:;
struct gomp_task *task;
struct gomp_task *parent = thr->task;
+ struct gomp_taskgroup *taskgroup = parent->taskgroup;
char *arg;
bool do_wake;
+ size_t depend_size = 0;
- task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
- arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+ if (flags & 8)
+ depend_size = ((uintptr_t) depend[0]
+ * sizeof (struct gomp_task_depend_entry));
+ task = gomp_malloc (sizeof (*task) + depend_size
+ + arg_size + arg_align - 1);
+ arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
& ~(uintptr_t) (arg_align - 1));
gomp_init_task (task, parent, gomp_icv (false));
task->kind = GOMP_TASK_IFFALSE;
task->in_tied_task = parent->in_tied_task;
+ task->taskgroup = taskgroup;
thr->task = task;
if (cpyfn)
- cpyfn (arg, data);
+ {
+ cpyfn (arg, data);
+ task->copy_ctors_done = true;
+ }
else
memcpy (arg, data, arg_size);
thr->task = parent;
task->kind = GOMP_TASK_WAITING;
task->fn = fn;
task->fn_data = arg;
- task->in_tied_task = true;
task->final_task = (flags & 2) >> 1;
gomp_mutex_lock (&team->task_lock);
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+ || (taskgroup && taskgroup->cancelled))
+ && !task->copy_ctors_done, 0))
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_finish_task (task);
+ free (task);
+ return;
+ }
+ if (taskgroup)
+ taskgroup->num_children++;
+ if (depend_size)
+ {
+ size_t ndepend = (uintptr_t) depend[0];
+ size_t nout = (uintptr_t) depend[1];
+ size_t i;
+ hash_entry_type ent;
+
+ task->depend_count = ndepend;
+ task->num_dependees = 0;
+ if (parent->depend_hash == NULL)
+ parent->depend_hash
+ = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+ for (i = 0; i < ndepend; i++)
+ {
+ task->depend[i].addr = depend[2 + i];
+ task->depend[i].next = NULL;
+ task->depend[i].prev = NULL;
+ task->depend[i].task = task;
+ task->depend[i].is_in = i >= nout;
+ task->depend[i].redundant = false;
+
+ hash_entry_type *slot
+ = htab_find_slot (&parent->depend_hash, &task->depend[i],
+ INSERT);
+ hash_entry_type out = NULL;
+ if (*slot)
+ {
+ /* If multiple depends on the same task are the
+ same, all but the first one are redundant.
+ As inout/out come first, if any of them is
+ inout/out, it will win, which is the right
+ semantics. */
+ if ((*slot)->task == task)
+ {
+ task->depend[i].redundant = true;
+ continue;
+ }
+ for (ent = *slot; ent; ent = ent->next)
+ {
+ /* depend(in:...) doesn't depend on earlier
+ depend(in:...). */
+ if (i >= nout && ent->is_in)
+ continue;
+
+ if (!ent->is_in)
+ out = ent;
+
+ struct gomp_task *tsk = ent->task;
+ if (tsk->dependers == NULL)
+ {
+ tsk->dependers
+ = gomp_malloc (sizeof (struct gomp_dependers_vec)
+ + 6 * sizeof (struct gomp_task *));
+ tsk->dependers->n_elem = 1;
+ tsk->dependers->allocated = 6;
+ tsk->dependers->elem[0] = task;
+ task->num_dependees++;
+ continue;
+ }
+ /* We already have some other dependency on tsk
+ from earlier depend clause. */
+ else if (tsk->dependers->n_elem
+ && (tsk->dependers->elem[tsk->dependers->n_elem
+ - 1]
+ == task))
+ continue;
+ else if (tsk->dependers->n_elem
+ == tsk->dependers->allocated)
+ {
+ tsk->dependers->allocated
+ = tsk->dependers->allocated * 2 + 2;
+ tsk->dependers
+ = gomp_realloc (tsk->dependers,
+ sizeof (struct gomp_dependers_vec)
+ + (tsk->dependers->allocated
+ * sizeof (struct gomp_task *)));
+ }
+ tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+ task->num_dependees++;
+ }
+ task->depend[i].next = *slot;
+ (*slot)->prev = &task->depend[i];
+ }
+ *slot = &task->depend[i];
+
+ /* There is no need to store more than one depend({,in}out:)
+ task per address in the hash table chain, because each out
+ depends on all earlier outs, thus it is enough to record
+ just the last depend({,in}out:). For depend(in:), we need
+ to keep all of the previous ones not terminated yet, because
+ a later depend({,in}out:) might need to depend on all of
+ them. So, if the new task's clause is depend({,in}out:),
+ we know there is at most one other depend({,in}out:) clause
+ in the list (out) and to maintain the invariant we now
+ need to remove it from the list. */
+ if (!task->depend[i].is_in && out)
+ {
+ if (out->next)
+ out->next->prev = out->prev;
+ out->prev->next = out->next;
+ out->redundant = true;
+ }
+ }
+ if (task->num_dependees)
+ {
+ gomp_mutex_unlock (&team->task_lock);
+ return;
+ }
+ }
if (parent->children)
{
task->next_child = parent->children;
@@ -171,6 +377,22 @@ GOMP_task (void (*fn) (void *), void *da
task->prev_child = task;
}
parent->children = task;
+ if (taskgroup)
+ {
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ }
if (team->task_queue)
{
task->next_queue = team->task_queue;
@@ -185,6 +407,7 @@ GOMP_task (void (*fn) (void *), void *da
team->task_queue = task;
}
++team->task_count;
+ ++team->task_queued_count;
gomp_team_barrier_set_task_pending (&team->barrier);
do_wake = team->task_running_count + !parent->in_tied_task
< team->nthreads;
@@ -194,6 +417,220 @@ GOMP_task (void (*fn) (void *), void *da
}
}
+static inline bool
+gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
+ struct gomp_taskgroup *taskgroup, struct gomp_team *team)
+{
+ if (parent && parent->children == child_task)
+ parent->children = child_task->next_child;
+ if (taskgroup && taskgroup->children == child_task)
+ taskgroup->children = child_task->next_taskgroup;
+ child_task->prev_queue->next_queue = child_task->next_queue;
+ child_task->next_queue->prev_queue = child_task->prev_queue;
+ if (team->task_queue == child_task)
+ {
+ if (child_task->next_queue != child_task)
+ team->task_queue = child_task->next_queue;
+ else
+ team->task_queue = NULL;
+ }
+ child_task->kind = GOMP_TASK_TIED;
+ if (--team->task_queued_count == 0)
+ gomp_team_barrier_clear_task_pending (&team->barrier);
+ if ((gomp_team_barrier_cancelled (&team->barrier)
+ || (taskgroup && taskgroup->cancelled))
+ && !child_task->copy_ctors_done)
+ return true;
+ return false;
+}
+
+static void
+gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
+{
+ struct gomp_task *parent = child_task->parent;
+ size_t i;
+
+ for (i = 0; i < child_task->depend_count; i++)
+ if (!child_task->depend[i].redundant)
+ {
+ if (child_task->depend[i].next)
+ child_task->depend[i].next->prev = child_task->depend[i].prev;
+ if (child_task->depend[i].prev)
+ child_task->depend[i].prev->next = child_task->depend[i].next;
+ else
+ {
+ hash_entry_type *slot
+ = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
+ NO_INSERT);
+ if (*slot != &child_task->depend[i])
+ abort ();
+ if (child_task->depend[i].next)
+ *slot = child_task->depend[i].next;
+ else
+ htab_clear_slot (parent->depend_hash, slot);
+ }
+ }
+}
+
+static size_t
+gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
+ struct gomp_team *team)
+{
+ struct gomp_task *parent = child_task->parent;
+ size_t i, count = child_task->dependers->n_elem, ret = 0;
+ for (i = 0; i < count; i++)
+ {
+ struct gomp_task *task = child_task->dependers->elem[i];
+ if (--task->num_dependees != 0)
+ continue;
+
+ struct gomp_taskgroup *taskgroup = task->taskgroup;
+ if (parent)
+ {
+ if (parent->children)
+ {
+ task->next_child = parent->children;
+ task->prev_child = parent->children->prev_child;
+ task->next_child->prev_child = task;
+ task->prev_child->next_child = task;
+ }
+ else
+ {
+ task->next_child = task;
+ task->prev_child = task;
+ }
+ parent->children = task;
+ if (parent->in_taskwait)
+ {
+ parent->in_taskwait = false;
+ gomp_sem_post (&parent->taskwait_sem);
+ }
+ }
+ if (taskgroup)
+ {
+ if (taskgroup->children)
+ {
+ task->next_taskgroup = taskgroup->children;
+ task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+ task->next_taskgroup->prev_taskgroup = task;
+ task->prev_taskgroup->next_taskgroup = task;
+ }
+ else
+ {
+ task->next_taskgroup = task;
+ task->prev_taskgroup = task;
+ }
+ taskgroup->children = task;
+ if (taskgroup->in_taskgroup_wait)
+ {
+ taskgroup->in_taskgroup_wait = false;
+ gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
+ }
+ if (team->task_queue)
+ {
+ task->next_queue = team->task_queue;
+ task->prev_queue = team->task_queue->prev_queue;
+ task->next_queue->prev_queue = task;
+ task->prev_queue->next_queue = task;
+ }
+ else
+ {
+ task->next_queue = task;
+ task->prev_queue = task;
+ team->task_queue = task;
+ }
+ ++team->task_count;
+ ++team->task_queued_count;
+ ++ret;
+ }
+ free (child_task->dependers);
+ child_task->dependers = NULL;
+ if (ret > 1)
+ gomp_team_barrier_set_task_pending (&team->barrier);
+ return ret;
+}
+
+static inline size_t
+gomp_task_run_post_handle_depend (struct gomp_task *child_task,
+ struct gomp_team *team)
+{
+ if (child_task->depend_count == 0)
+ return 0;
+
+ /* If parent is gone already, the hash table is freed and nothing
+ will use the hash table anymore, no need to remove anything from it. */
+ if (child_task->parent != NULL)
+ gomp_task_run_post_handle_depend_hash (child_task);
+
+ if (child_task->dependers == NULL)
+ return 0;
+
+ return gomp_task_run_post_handle_dependers (child_task, team);
+}
+
+static inline void
+gomp_task_run_post_remove_parent (struct gomp_task *child_task)
+{
+ struct gomp_task *parent = child_task->parent;
+ if (parent == NULL)
+ return;
+ child_task->prev_child->next_child = child_task->next_child;
+ child_task->next_child->prev_child = child_task->prev_child;
+ if (parent->children != child_task)
+ return;
+ if (child_task->next_child != child_task)
+ parent->children = child_task->next_child;
+ else
+ {
+ /* We access task->children in GOMP_taskwait
+ outside of the task lock mutex region, so
+ need a release barrier here to ensure memory
+ written by child_task->fn above is flushed
+ before the NULL is written. */
+ __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
+ if (parent->in_taskwait)
+ {
+ parent->in_taskwait = false;
+ gomp_sem_post (&parent->taskwait_sem);
+ }
+ }
+}
+
+static inline void
+gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
+{
+ struct gomp_taskgroup *taskgroup = child_task->taskgroup;
+ if (taskgroup == NULL)
+ return;
+ child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
+ child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
+ if (taskgroup->num_children > 1)
+ --taskgroup->num_children;
+ else
+ {
+ /* We access taskgroup->num_children in GOMP_taskgroup_end
+ outside of the task lock mutex region, so
+ need a release barrier here to ensure memory
+ written by child_task->fn above is flushed
+ before the NULL is written. */
+ __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+ }
+ if (taskgroup->children != child_task)
+ return;
+ if (child_task->next_taskgroup != child_task)
+ taskgroup->children = child_task->next_taskgroup;
+ else
+ {
+ taskgroup->children = NULL;
+ if (taskgroup->in_taskgroup_wait)
+ {
+ taskgroup->in_taskgroup_wait = false;
+ gomp_sem_post (&taskgroup->taskgroup_sem);
+ }
+ }
+}
+
void
gomp_barrier_handle_tasks (gomp_barrier_state_t state)
{
@@ -202,6 +639,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
struct gomp_task *task = thr->task;
struct gomp_task *child_task = NULL;
struct gomp_task *to_free = NULL;
+ int do_wake = 0;
gomp_mutex_lock (&team->task_lock);
if (gomp_barrier_last_thread (state))
@@ -218,26 +656,31 @@ gomp_barrier_handle_tasks (gomp_barrier_
while (1)
{
+ bool cancelled = false;
if (team->task_queue != NULL)
{
- struct gomp_task *parent;
-
child_task = team->task_queue;
- parent = child_task->parent;
- if (parent && parent->children == child_task)
- parent->children = child_task->next_child;
- child_task->prev_queue->next_queue = child_task->next_queue;
- child_task->next_queue->prev_queue = child_task->prev_queue;
- if (child_task->next_queue != child_task)
- team->task_queue = child_task->next_queue;
- else
- team->task_queue = NULL;
- child_task->kind = GOMP_TASK_TIED;
+ cancelled = gomp_task_run_pre (child_task, child_task->parent,
+ child_task->taskgroup, team);
+ if (__builtin_expect (cancelled, 0))
+ {
+ if (to_free)
+ {
+ gomp_finish_task (to_free);
+ free (to_free);
+ to_free = NULL;
+ }
+ goto finish_cancelled;
+ }
team->task_running_count++;
- if (team->task_count == team->task_running_count)
- gomp_team_barrier_clear_task_pending (&team->barrier);
+ child_task->in_tied_task = true;
}
gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
if (to_free)
{
gomp_finish_task (to_free);
@@ -255,33 +698,22 @@ gomp_barrier_handle_tasks (gomp_barrier_
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
- struct gomp_task *parent = child_task->parent;
- if (parent)
- {
- child_task->prev_child->next_child = child_task->next_child;
- child_task->next_child->prev_child = child_task->prev_child;
- if (parent->children == child_task)
- {
- if (child_task->next_child != child_task)
- parent->children = child_task->next_child;
- else
- {
- /* We access task->children in GOMP_taskwait
- outside of the task lock mutex region, so
- need a release barrier here to ensure memory
- written by child_task->fn above is flushed
- before the NULL is written. */
- __atomic_store_n (&parent->children, NULL,
- MEMMODEL_RELEASE);
- if (parent->in_taskwait)
- gomp_sem_post (&parent->taskwait_sem);
- }
- }
- }
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
+ gomp_task_run_post_remove_parent (child_task);
gomp_clear_parent (child_task->children);
+ gomp_task_run_post_remove_taskgroup (child_task);
to_free = child_task;
child_task = NULL;
- team->task_running_count--;
+ if (!cancelled)
+ team->task_running_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
if (--team->task_count == 0
&& gomp_team_barrier_waiting_for_tasks (&team->barrier))
{
@@ -304,9 +736,10 @@ GOMP_taskwait (void)
struct gomp_task *task = thr->task;
struct gomp_task *child_task = NULL;
struct gomp_task *to_free = NULL;
+ int do_wake = 0;
/* The acquire barrier on load of task->children here synchronizes
- with the write of a NULL in gomp_barrier_handle_tasks. It is
+ with the write of a NULL in gomp_task_run_post_remove_parent. It is
not necessary that we synchronize with other non-NULL writes at
this point, but we must ensure that all writes to memory by a
child thread task work function are seen before we exit from
@@ -318,6 +751,7 @@ GOMP_taskwait (void)
gomp_mutex_lock (&team->task_lock);
while (1)
{
+ bool cancelled = false;
if (task->children == NULL)
{
gomp_mutex_unlock (&team->task_lock);
@@ -331,26 +765,30 @@ GOMP_taskwait (void)
if (task->children->kind == GOMP_TASK_WAITING)
{
child_task = task->children;
- task->children = child_task->next_child;
- child_task->prev_queue->next_queue = child_task->next_queue;
- child_task->next_queue->prev_queue = child_task->prev_queue;
- if (team->task_queue == child_task)
+ cancelled
+ = gomp_task_run_pre (child_task, task, child_task->taskgroup,
+ team);
+ if (__builtin_expect (cancelled, 0))
{
- if (child_task->next_queue != child_task)
- team->task_queue = child_task->next_queue;
- else
- team->task_queue = NULL;
+ if (to_free)
+ {
+ gomp_finish_task (to_free);
+ free (to_free);
+ to_free = NULL;
+ }
+ goto finish_cancelled;
}
- child_task->kind = GOMP_TASK_TIED;
- team->task_running_count++;
- if (team->task_count == team->task_running_count)
- gomp_team_barrier_clear_task_pending (&team->barrier);
}
else
/* All tasks we are waiting for are already running
in other threads. Wait for them. */
task->in_taskwait = true;
gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
if (to_free)
{
gomp_finish_task (to_free);
@@ -364,14 +802,13 @@ GOMP_taskwait (void)
thr->task = task;
}
else
- {
- gomp_sem_wait (&task->taskwait_sem);
- task->in_taskwait = false;
- return;
- }
+ gomp_sem_wait (&task->taskwait_sem);
gomp_mutex_lock (&team->task_lock);
if (child_task)
{
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
child_task->prev_child->next_child = child_task->next_child;
child_task->next_child->prev_child = child_task->prev_child;
if (task->children == child_task)
@@ -382,10 +819,17 @@ GOMP_taskwait (void)
task->children = NULL;
}
gomp_clear_parent (child_task->children);
+ gomp_task_run_post_remove_taskgroup (child_task);
to_free = child_task;
child_task = NULL;
team->task_count--;
- team->task_running_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !task->in_tied_task;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
}
}
}
@@ -398,6 +842,153 @@ GOMP_taskyield (void)
/* Nothing at the moment. */
}
+void
+GOMP_taskgroup_start (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task = thr->task;
+ struct gomp_taskgroup *taskgroup;
+
+ /* If team is NULL, all tasks are executed as
+ GOMP_TASK_IFFALSE tasks and thus all children tasks of
+ taskgroup and their descendant tasks will be finished
+ by the time GOMP_taskgroup_end is called. */
+ if (team == NULL)
+ return;
+ taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
+ taskgroup->prev = task->taskgroup;
+ taskgroup->children = NULL;
+ taskgroup->in_taskgroup_wait = false;
+ taskgroup->cancelled = false;
+ taskgroup->num_children = 0;
+ gomp_sem_init (&taskgroup->taskgroup_sem, 0);
+ task->taskgroup = taskgroup;
+}
+
+void
+GOMP_taskgroup_end (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task = thr->task;
+ struct gomp_taskgroup *taskgroup;
+ struct gomp_task *child_task = NULL;
+ struct gomp_task *to_free = NULL;
+ int do_wake = 0;
+
+ if (team == NULL)
+ return;
+ taskgroup = task->taskgroup;
+
+ /* The acquire barrier on load of taskgroup->num_children here
+ synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
+ It is not necessary that we synchronize with other non-0 writes at
+ this point, but we must ensure that all writes to memory by a
+ child thread task work function are seen before we exit from
+ GOMP_taskgroup_end. */
+ if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
+ goto finish;
+
+ gomp_mutex_lock (&team->task_lock);
+ while (1)
+ {
+ bool cancelled = false;
+ if (taskgroup->children == NULL)
+ {
+ if (taskgroup->num_children)
+ goto do_wait;
+ gomp_mutex_unlock (&team->task_lock);
+ if (to_free)
+ {
+ gomp_finish_task (to_free);
+ free (to_free);
+ }
+ goto finish;
+ }
+ if (taskgroup->children->kind == GOMP_TASK_WAITING)
+ {
+ child_task = taskgroup->children;
+ cancelled
+ = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
+ team);
+ if (__builtin_expect (cancelled, 0))
+ {
+ if (to_free)
+ {
+ gomp_finish_task (to_free);
+ free (to_free);
+ to_free = NULL;
+ }
+ goto finish_cancelled;
+ }
+ }
+ else
+ {
+ do_wait:
+ /* All tasks we are waiting for are already running
+ in other threads. Wait for them. */
+ taskgroup->in_taskgroup_wait = true;
+ }
+ gomp_mutex_unlock (&team->task_lock);
+ if (do_wake)
+ {
+ gomp_team_barrier_wake (&team->barrier, do_wake);
+ do_wake = 0;
+ }
+ if (to_free)
+ {
+ gomp_finish_task (to_free);
+ free (to_free);
+ to_free = NULL;
+ }
+ if (child_task)
+ {
+ thr->task = child_task;
+ child_task->fn (child_task->fn_data);
+ thr->task = task;
+ }
+ else
+ gomp_sem_wait (&taskgroup->taskgroup_sem);
+ gomp_mutex_lock (&team->task_lock);
+ if (child_task)
+ {
+ finish_cancelled:;
+ size_t new_tasks
+ = gomp_task_run_post_handle_depend (child_task, team);
+ child_task->prev_taskgroup->next_taskgroup
+ = child_task->next_taskgroup;
+ child_task->next_taskgroup->prev_taskgroup
+ = child_task->prev_taskgroup;
+ --taskgroup->num_children;
+ if (taskgroup->children == child_task)
+ {
+ if (child_task->next_taskgroup != child_task)
+ taskgroup->children = child_task->next_taskgroup;
+ else
+ taskgroup->children = NULL;
+ }
+ gomp_task_run_post_remove_parent (child_task);
+ gomp_clear_parent (child_task->children);
+ to_free = child_task;
+ child_task = NULL;
+ team->task_count--;
+ if (new_tasks > 1)
+ {
+ do_wake = team->nthreads - team->task_running_count
+ - !task->in_tied_task;
+ if (do_wake > new_tasks)
+ do_wake = new_tasks;
+ }
+ }
+ }
+
+ finish:
+ task->taskgroup = taskgroup->prev;
+ gomp_sem_destroy (&taskgroup->taskgroup_sem);
+ free (taskgroup);
+}
+
int
omp_in_final (void)
{
@@ -53,6 +53,7 @@ struct gomp_thread_start_data
struct gomp_team_state ts;
struct gomp_task *task;
struct gomp_thread_pool *thread_pool;
+ unsigned int place;
bool nested;
};
@@ -84,6 +85,7 @@ gomp_thread_start (void *xdata)
thr->thread_pool = data->thread_pool;
thr->ts = data->ts;
thr->task = data->task;
+ thr->place = data->place;
thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
@@ -98,7 +100,7 @@ gomp_thread_start (void *xdata)
gomp_barrier_wait (&team->barrier);
local_fn (local_data);
- gomp_team_barrier_wait (&team->barrier);
+ gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_barrier_wait_last (&team->barrier);
}
@@ -113,7 +115,7 @@ gomp_thread_start (void *xdata)
struct gomp_task *task = thr->task;
local_fn (local_data);
- gomp_team_barrier_wait (&team->barrier);
+ gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_barrier_wait (&pool->threads_dock);
@@ -126,6 +128,8 @@ gomp_thread_start (void *xdata)
}
gomp_sem_destroy (&thr->release);
+ thr->thread_pool = NULL;
+ thr->task = NULL;
return NULL;
}
@@ -149,6 +153,7 @@ gomp_new_team (unsigned nthreads)
#else
gomp_mutex_init (&team->work_share_list_free_lock);
#endif
+ team->work_shares_to_free = &team->work_shares[0];
gomp_init_work_share (&team->work_shares[0], false, nthreads);
team->work_shares[0].next_alloc = NULL;
team->work_share_list_free = NULL;
@@ -167,7 +172,10 @@ gomp_new_team (unsigned nthreads)
gomp_mutex_init (&team->task_lock);
team->task_queue = NULL;
team->task_count = 0;
+ team->task_queued_count = 0;
team->task_running_count = 0;
+ team->work_share_cancelled = 0;
+ team->team_cancelled = 0;
return team;
}
@@ -199,16 +207,19 @@ static struct gomp_thread_pool *gomp_new
static void
gomp_free_pool_helper (void *thread_pool)
{
+ struct gomp_thread *thr = gomp_thread ();
struct gomp_thread_pool *pool
= (struct gomp_thread_pool *) thread_pool;
gomp_barrier_wait_last (&pool->threads_dock);
- gomp_sem_destroy (&gomp_thread ()->release);
+ gomp_sem_destroy (&thr->release);
+ thr->thread_pool = NULL;
+ thr->task = NULL;
pthread_exit (NULL);
}
/* Free a thread pool and release its threads. */
-static void
+void
gomp_free_thread (void *arg __attribute__((unused)))
{
struct gomp_thread *thr = gomp_thread ();
@@ -236,9 +247,9 @@ gomp_free_thread (void *arg __attribute_
__sync_fetch_and_add (&gomp_managed_threads,
1L - pool->threads_used);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
gomp_managed_threads -= pool->threads_used - 1L;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
}
free (pool->threads);
@@ -259,7 +270,7 @@ gomp_free_thread (void *arg __attribute_
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
- struct gomp_team *team)
+ unsigned flags, struct gomp_team *team)
{
struct gomp_thread_start_data *start_data;
struct gomp_thread *thr, *nthr;
@@ -270,17 +281,24 @@ gomp_team_start (void (*fn) (void *), vo
unsigned i, n, old_threads_used = 0;
pthread_attr_t thread_attr, *attr;
unsigned long nthreads_var;
+ char bind, bind_var;
+ unsigned int s = 0, rest = 0, p = 0, k = 0;
+ unsigned int affinity_count = 0;
+ struct gomp_thread **affinity_thr = NULL;
thr = gomp_thread ();
nested = thr->ts.team != NULL;
if (__builtin_expect (thr->thread_pool == NULL, 0))
{
thr->thread_pool = gomp_new_thread_pool ();
+ thr->thread_pool->threads_busy = nthreads;
pthread_setspecific (gomp_thread_destructor, thr);
}
pool = thr->thread_pool;
task = thr->task;
icv = task ? &task->icv : &gomp_global_icv;
+ if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
+ gomp_init_affinity ();
/* Always save the previous state, even if this isn't a nested team.
In particular, we should save any work share state from an outer
@@ -303,14 +321,95 @@ gomp_team_start (void (*fn) (void *), vo
if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
&& thr->ts.level < gomp_nthreads_var_list_len)
nthreads_var = gomp_nthreads_var_list[thr->ts.level];
+ bind_var = icv->bind_var;
+ if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
+ bind_var = flags & 7;
+ bind = bind_var;
+ if (__builtin_expect (gomp_bind_var_list != NULL, 0)
+ && thr->ts.level < gomp_bind_var_list_len)
+ bind_var = gomp_bind_var_list[thr->ts.level];
gomp_init_task (thr->task, task, icv);
team->implicit_task[0].icv.nthreads_var = nthreads_var;
+ team->implicit_task[0].icv.bind_var = bind_var;
if (nthreads == 1)
return;
i = 1;
+ if (__builtin_expect (gomp_places_list != NULL, 0))
+ {
+ if (bind == omp_proc_bind_false)
+ bind = omp_proc_bind_true;
+ /* Depending on chosen proc_bind model, set subpartition
+ for the master thread and initialize helper variables
+ P and optionally S, K and/or REST used by later place
+ computation for each additional thread. */
+ p = thr->place - 1;
+ switch (bind)
+ {
+ case omp_proc_bind_false:
+ bind = omp_proc_bind_true;
+ /* FALLTHRU */
+ case omp_proc_bind_true:
+ case omp_proc_bind_close:
+ if (nthreads > thr->ts.place_partition_len)
+ {
+ /* T > P. S threads will be placed in each place,
+ and the final REM threads placed one by one
+ into the already occupied places. */
+ s = nthreads / thr->ts.place_partition_len;
+ rest = nthreads % thr->ts.place_partition_len;
+ }
+ else
+ s = 1;
+ k = 1;
+ break;
+ case omp_proc_bind_master:
+ /* Each thread will be bound to master's place. */
+ break;
+ case omp_proc_bind_spread:
+ if (nthreads <= thr->ts.place_partition_len)
+ {
+ /* T <= P. Each subpartition will have in between s
+ and s+1 places (subpartitions starting at or
+ after rest will have s places, earlier s+1 places),
+ each thread will be bound to the first place in
+ its subpartition (except for the master thread
+ that can be bound to another place in its
+ subpartition). */
+ s = thr->ts.place_partition_len / nthreads;
+ rest = thr->ts.place_partition_len % nthreads;
+ rest = (s + 1) * rest + thr->ts.place_partition_off;
+ if (p < rest)
+ {
+ p -= (p - thr->ts.place_partition_off) % (s + 1);
+ thr->ts.place_partition_len = s + 1;
+ }
+ else
+ {
+ p -= (p - rest) % s;
+ thr->ts.place_partition_len = s;
+ }
+ thr->ts.place_partition_off = p;
+ }
+ else
+ {
+ /* T > P. Each subpartition will have just a single
+ place and we'll place between s and s+1
+ threads into each subpartition. */
+ s = nthreads / thr->ts.place_partition_len;
+ rest = nthreads % thr->ts.place_partition_len;
+ thr->ts.place_partition_off = p;
+ thr->ts.place_partition_len = 1;
+ k = 1;
+ }
+ break;
+ }
+ }
+ else
+ bind = omp_proc_bind_false;
+
/* We only allow the reuse of idle threads for non-nested PARALLEL
regions. This appears to be implied by the semantics of
threadprivate variables, but perhaps that's reading too much into
@@ -341,47 +440,244 @@ gomp_team_start (void (*fn) (void *), vo
team will exit. */
pool->threads_used = nthreads;
+ /* If necessary, expand the size of the gomp_threads array. It is
+ expected that changes in the number of threads are rare, thus we
+ make no effort to expand gomp_threads_size geometrically. */
+ if (nthreads >= pool->threads_size)
+ {
+ pool->threads_size = nthreads + 1;
+ pool->threads
+ = gomp_realloc (pool->threads,
+ pool->threads_size
+ * sizeof (struct gomp_thread_data *));
+ }
+
/* Release existing idle threads. */
for (; i < n; ++i)
{
- nthr = pool->threads[i];
+ unsigned int place_partition_off = thr->ts.place_partition_off;
+ unsigned int place_partition_len = thr->ts.place_partition_len;
+ unsigned int place = 0;
+ if (__builtin_expect (gomp_places_list != NULL, 0))
+ {
+ switch (bind)
+ {
+ case omp_proc_bind_true:
+ case omp_proc_bind_close:
+ if (k == s)
+ {
+ ++p;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ k = 1;
+ if (i == nthreads - rest)
+ s = 1;
+ }
+ else
+ ++k;
+ break;
+ case omp_proc_bind_master:
+ break;
+ case omp_proc_bind_spread:
+ if (k == 0)
+ {
+ /* T <= P. */
+ if (p < rest)
+ p += s + 1;
+ else
+ p += s;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ place_partition_off = p;
+ if (p < rest)
+ place_partition_len = s + 1;
+ else
+ place_partition_len = s;
+ }
+ else
+ {
+ /* T > P. */
+ if (k == s)
+ {
+ ++p;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ k = 1;
+ if (i == nthreads - rest)
+ s = 1;
+ }
+ else
+ ++k;
+ place_partition_off = p;
+ place_partition_len = 1;
+ }
+ break;
+ }
+ if (affinity_thr != NULL
+ || (bind != omp_proc_bind_true
+ && pool->threads[i]->place != p + 1)
+ || pool->threads[i]->place <= place_partition_off
+ || pool->threads[i]->place > (place_partition_off
+ + place_partition_len))
+ {
+ unsigned int l;
+ if (affinity_thr == NULL)
+ {
+ unsigned int j;
+
+ if (team->prev_ts.place_partition_len > 64)
+ affinity_thr
+ = gomp_malloc (team->prev_ts.place_partition_len
+ * sizeof (struct gomp_thread *));
+ else
+ affinity_thr
+ = gomp_alloca (team->prev_ts.place_partition_len
+ * sizeof (struct gomp_thread *));
+ memset (affinity_thr, '\0',
+ team->prev_ts.place_partition_len
+ * sizeof (struct gomp_thread *));
+ for (j = i; j < old_threads_used; j++)
+ {
+ if (pool->threads[j]->place
+ > team->prev_ts.place_partition_off
+ && (pool->threads[j]->place
+ <= (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len)))
+ {
+ l = pool->threads[j]->place - 1
+ - team->prev_ts.place_partition_off;
+ pool->threads[j]->data = affinity_thr[l];
+ affinity_thr[l] = pool->threads[j];
+ }
+ pool->threads[j] = NULL;
+ }
+ if (nthreads > old_threads_used)
+ memset (&pool->threads[old_threads_used],
+ '\0', ((nthreads - old_threads_used)
+ * sizeof (struct gomp_thread *)));
+ n = nthreads;
+ affinity_count = old_threads_used - i;
+ }
+ if (affinity_count == 0)
+ break;
+ l = p;
+ if (affinity_thr[l - team->prev_ts.place_partition_off]
+ == NULL)
+ {
+ if (bind != omp_proc_bind_true)
+ continue;
+ for (l = place_partition_off;
+ l < place_partition_off + place_partition_len;
+ l++)
+ if (affinity_thr[l - team->prev_ts.place_partition_off]
+ != NULL)
+ break;
+ if (l == place_partition_off + place_partition_len)
+ continue;
+ }
+ nthr = affinity_thr[l - team->prev_ts.place_partition_off];
+ affinity_thr[l - team->prev_ts.place_partition_off]
+ = (struct gomp_thread *) nthr->data;
+ affinity_count--;
+ pool->threads[i] = nthr;
+ }
+ else
+ nthr = pool->threads[i];
+ place = p + 1;
+ }
+ else
+ nthr = pool->threads[i];
nthr->ts.team = team;
nthr->ts.work_share = &team->work_shares[0];
nthr->ts.last_work_share = NULL;
nthr->ts.team_id = i;
nthr->ts.level = team->prev_ts.level + 1;
nthr->ts.active_level = thr->ts.active_level;
+ nthr->ts.place_partition_off = place_partition_off;
+ nthr->ts.place_partition_len = place_partition_len;
#ifdef HAVE_SYNC_BUILTINS
nthr->ts.single_count = 0;
#endif
nthr->ts.static_trip = 0;
nthr->task = &team->implicit_task[i];
+ nthr->place = place;
gomp_init_task (nthr->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].icv.bind_var = bind_var;
nthr->fn = fn;
nthr->data = data;
team->ordered_release[i] = &nthr->release;
}
+ if (__builtin_expect (affinity_thr != NULL, 0))
+ {
+ /* If AFFINITY_THR is non-NULL just because we had to
+ permute some threads in the pool, but we've managed
+ to find exactly as many old threads as we'd find
+ without affinity, we don't need to handle this
+ specially anymore. */
+ if (nthreads <= old_threads_used
+ ? (affinity_count == old_threads_used - nthreads)
+ : (i == old_threads_used))
+ {
+ if (team->prev_ts.place_partition_len > 64)
+ free (affinity_thr);
+ affinity_thr = NULL;
+ affinity_count = 0;
+ }
+ else
+ {
+ i = 1;
+ /* We are going to compute the places/subpartitions
+ again from the beginning. So, we need to reinitialize
+ vars modified by the switch (bind) above inside
+ of the loop, to the state they had after the initial
+ switch (bind). */
+ switch (bind)
+ {
+ case omp_proc_bind_true:
+ case omp_proc_bind_close:
+ if (nthreads > thr->ts.place_partition_len)
+ /* T > P. S has been changed, so needs
+ to be recomputed. */
+ s = nthreads / thr->ts.place_partition_len;
+ k = 1;
+ p = thr->place - 1;
+ break;
+ case omp_proc_bind_master:
+ /* No vars have been changed. */
+ break;
+ case omp_proc_bind_spread:
+ p = thr->ts.place_partition_off;
+ if (k != 0)
+ {
+ /* T > P. */
+ s = nthreads / team->prev_ts.place_partition_len;
+ k = 1;
+ }
+ break;
+ }
+
+ /* Increase the barrier threshold to make sure all new
+ threads and all the threads we're going to let die
+ arrive before the team is released. */
+ if (affinity_count)
+ gomp_barrier_reinit (&pool->threads_dock,
+ nthreads + affinity_count);
+ }
+ }
+
if (i == nthreads)
goto do_release;
- /* If necessary, expand the size of the gomp_threads array. It is
- expected that changes in the number of threads are rare, thus we
- make no effort to expand gomp_threads_size geometrically. */
- if (nthreads >= pool->threads_size)
- {
- pool->threads_size = nthreads + 1;
- pool->threads
- = gomp_realloc (pool->threads,
- pool->threads_size
- * sizeof (struct gomp_thread_data *));
- }
}
- if (__builtin_expect (nthreads > old_threads_used, 0))
+ if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
{
- long diff = (long) nthreads - (long) old_threads_used;
+ long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
if (old_threads_used == 0)
--diff;
@@ -389,14 +685,14 @@ gomp_team_start (void (*fn) (void *), vo
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, diff);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
gomp_managed_threads += diff;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
}
attr = &gomp_thread_attr;
- if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
+ if (__builtin_expect (gomp_places_list != NULL, 0))
{
size_t stacksize;
pthread_attr_init (&thread_attr);
@@ -410,11 +706,78 @@ gomp_team_start (void (*fn) (void *), vo
* (nthreads-i));
/* Launch new threads. */
- for (; i < nthreads; ++i, ++start_data)
+ for (; i < nthreads; ++i)
{
pthread_t pt;
int err;
+ start_data->ts.place_partition_off = thr->ts.place_partition_off;
+ start_data->ts.place_partition_len = thr->ts.place_partition_len;
+ start_data->place = 0;
+ if (__builtin_expect (gomp_places_list != NULL, 0))
+ {
+ switch (bind)
+ {
+ case omp_proc_bind_true:
+ case omp_proc_bind_close:
+ if (k == s)
+ {
+ ++p;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ k = 1;
+ if (i == nthreads - rest)
+ s = 1;
+ }
+ else
+ ++k;
+ break;
+ case omp_proc_bind_master:
+ break;
+ case omp_proc_bind_spread:
+ if (k == 0)
+ {
+ /* T <= P. */
+ if (p < rest)
+ p += s + 1;
+ else
+ p += s;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ start_data->ts.place_partition_off = p;
+ if (p < rest)
+ start_data->ts.place_partition_len = s + 1;
+ else
+ start_data->ts.place_partition_len = s;
+ }
+ else
+ {
+ /* T > P. */
+ if (k == s)
+ {
+ ++p;
+ if (p == (team->prev_ts.place_partition_off
+ + team->prev_ts.place_partition_len))
+ p = team->prev_ts.place_partition_off;
+ k = 1;
+ if (i == nthreads - rest)
+ s = 1;
+ }
+ else
+ ++k;
+ start_data->ts.place_partition_off = p;
+ start_data->ts.place_partition_len = 1;
+ }
+ break;
+ }
+ start_data->place = p + 1;
+ if (affinity_thr != NULL && pool->threads[i] != NULL)
+ continue;
+ gomp_init_thread_affinity (attr, p);
+ }
+
start_data->fn = fn;
start_data->fn_data = data;
start_data->ts.team = team;
@@ -430,18 +793,16 @@ gomp_team_start (void (*fn) (void *), vo
start_data->task = &team->implicit_task[i];
gomp_init_task (start_data->task, task, icv);
team->implicit_task[i].icv.nthreads_var = nthreads_var;
+ team->implicit_task[i].icv.bind_var = bind_var;
start_data->thread_pool = pool;
start_data->nested = nested;
- if (gomp_cpu_affinity != NULL)
- gomp_init_thread_affinity (attr);
-
- err = pthread_create (&pt, attr, gomp_thread_start, start_data);
+ err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
if (err != 0)
gomp_fatal ("Thread creation failed: %s", strerror (err));
}
- if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
+ if (__builtin_expect (gomp_places_list != NULL, 0))
pthread_attr_destroy (&thread_attr);
do_release:
@@ -450,21 +811,32 @@ gomp_team_start (void (*fn) (void *), vo
/* Decrease the barrier threshold to match the number of threads
that should arrive back at the end of this team. The extra
threads should be exiting. Note that we arrange for this test
- to never be true for nested teams. */
- if (__builtin_expect (nthreads < old_threads_used, 0))
+ to never be true for nested teams. If AFFINITY_COUNT is non-zero,
+ the barrier as well as gomp_managed_threads was temporarily
+ set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
+ AFFINITY_COUNT if non-zero will be always at least
+ OLD_THREADS_COUNT - NTHREADS. */
+ if (__builtin_expect (nthreads < old_threads_used, 0)
+ || __builtin_expect (affinity_count, 0))
{
long diff = (long) nthreads - (long) old_threads_used;
+ if (affinity_count)
+ diff = -affinity_count;
+
gomp_barrier_reinit (&pool->threads_dock, nthreads);
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, diff);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
gomp_managed_threads += diff;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
}
+ if (__builtin_expect (affinity_thr != NULL, 0)
+ && team->prev_ts.place_partition_len > 64)
+ free (affinity_thr);
}
@@ -477,9 +849,26 @@ gomp_team_end (void)
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
- /* This barrier handles all pending explicit threads. */
- gomp_team_barrier_wait (&team->barrier);
- gomp_fini_work_share (thr->ts.work_share);
+ /* This barrier handles all pending explicit threads.
+ As #pragma omp cancel parallel might get awaited count in
+ team->barrier in a inconsistent state, we need to use a different
+ counter here. */
+ gomp_team_barrier_wait_final (&team->barrier);
+ if (__builtin_expect (team->team_cancelled, 0))
+ {
+ struct gomp_work_share *ws = team->work_shares_to_free;
+ do
+ {
+ struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
+ if (next_ws == NULL)
+ gomp_ptrlock_set (&ws->next_ws, ws);
+ gomp_fini_work_share (ws);
+ ws = next_ws;
+ }
+ while (ws != NULL);
+ }
+ else
+ gomp_fini_work_share (thr->ts.work_share);
gomp_end_task ();
thr->ts = team->prev_ts;
@@ -489,9 +878,9 @@ gomp_team_end (void)
#ifdef HAVE_SYNC_BUILTINS
__sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
#else
- gomp_mutex_lock (&gomp_remaining_threads_lock);
+ gomp_mutex_lock (&gomp_managed_threads_lock);
gomp_managed_threads -= team->nthreads - 1L;
- gomp_mutex_unlock (&gomp_remaining_threads_lock);
+ gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
/* This barrier has gomp_barrier_wait_last counterparts
and ensures the team can be safely destroyed. */
@@ -532,8 +921,6 @@ gomp_team_end (void)
static void __attribute__((constructor))
initialize_team (void)
{
- struct gomp_thread *thr;
-
#ifndef HAVE_TLS
static struct gomp_thread initial_thread_tls_data;
@@ -543,13 +930,6 @@ initialize_team (void)
if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
gomp_fatal ("could not create thread pool destructor.");
-
-#ifdef HAVE_TLS
- thr = &gomp_tls_data;
-#else
- thr = &initial_thread_tls_data;
-#endif
- gomp_sem_init (&thr->release, 0);
}
static void __attribute__((destructor))
@@ -221,7 +221,10 @@ gomp_work_share_end (void)
if (gomp_barrier_last_thread (bstate))
{
if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
- free_work_share (team, thr->ts.last_work_share);
+ {
+ team->work_shares_to_free = thr->ts.work_share;
+ free_work_share (team, thr->ts.last_work_share);
+ }
}
gomp_team_barrier_wait_end (&team->barrier, bstate);
@@ -229,6 +232,32 @@ gomp_work_share_end (void)
}
/* The current thread is done with its current work sharing construct.
+ This version implies a cancellable barrier at the end of the work-share. */
+
+bool
+gomp_work_share_end_cancel (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ gomp_barrier_state_t bstate;
+
+ /* Cancellable work sharing constructs cannot be orphaned. */
+ bstate = gomp_barrier_wait_cancel_start (&team->barrier);
+
+ if (gomp_barrier_last_thread (bstate))
+ {
+ if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
+ {
+ team->work_shares_to_free = thr->ts.work_share;
+ free_work_share (team, thr->ts.last_work_share);
+ }
+ }
+ thr->ts.last_work_share = NULL;
+
+ return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
+}
+
+/* The current thread is done with its current work sharing construct.
This version does NOT imply a barrier at the end of the work-share. */
void
@@ -259,6 +288,9 @@ gomp_work_share_end_nowait (void)
#endif
if (completed == team->nthreads)
- free_work_share (team, thr->ts.last_work_share);
+ {
+ team->work_shares_to_free = thr->ts.work_share;
+ free_work_share (team, thr->ts.last_work_share);
+ }
thr->ts.last_work_share = NULL;
}