[committed,2/4] (Partial) OpenMP 5.0 support for GCC 9 (runtime library changes)

Message ID 20181108171827.GM11625@tucnak
State New
Headers show
Series
  • (Partial) OpenMP 5.0 support for GCC 9
Related show

Commit Message

Jakub Jelinek Nov. 8, 2018, 5:18 p.m.
Hi!

This is the libgomp/ except libgomp/testsuite/ part of the gomp-5_0-branch
merge to trunk I've just committed.

2018-11-08  Jakub Jelinek  <jakub@redhat.com>

	* affinity.c (gomp_display_affinity_place): New function.
	* affinity-fmt.c: New file.
	* alloc.c (gomp_aligned_alloc, gomp_aligned_free): New functions.
	* config/linux/affinity.c (gomp_display_affinity_place): New function.
	* config/nvptx/icv-device.c (omp_get_num_teams, omp_get_team_num):
	Move these functions to ...
	* config/nvptx/teams.c: ... here.  New file.
	* config/nvptx/target.c (omp_pause_resource, omp_pause_resource_all):
	New functions.
	* config/nvptx/team.c (gomp_team_start, gomp_pause_host): New
	functions.
	* configure.ac: Check for aligned_alloc, posix_memalign, memalign
	and _aligned_malloc.
	(HAVE_UNAME, HAVE_GETHOSTNAME, HAVE_GETPID): Add new tests.
	* configure.tgt: Add -DUSING_INITIAL_EXEC_TLS to XCFLAGS for Linux.
	* env.c (gomp_display_affinity_var, gomp_affinity_format_var,
	gomp_affinity_format_len): New variables.
	(parse_schedule): Parse monotonic and nonmonotonic modifiers in
	OMP_SCHEDULE variable.  Set GFS_MONOTONIC for monotonic schedules.
	(handle_omp_display_env): Display monotonic/nonmonotonic schedule
	modifiers.  Display (non-default) chunk sizes.  Print
	OMP_DISPLAY_AFFINITY and OMP_AFFINITY_FORMAT.
	(initialize_env): Don't call pthread_attr_setdetachstate.  Handle
	OMP_DISPLAY_AFFINITY and OMP_AFFINITY_FORMAT env vars.
	* fortran.c: Include stdio.h and string.h.
	(omp_pause_resource, omp_pause_resource_all): Add ialias_redirect.
	(omp_get_schedule_, omp_get_schedule_8_): Mask off GFS_MONOTONIC bit.
	(omp_set_affinity_format_, omp_get_affinity_format_,
	omp_display_affinity_, omp_capture_affinity_, omp_pause_resource_,
	omp_pause_resource_all_): New functions.
	* icv.c (omp_set_schedule): Mask off omp_sched_monotonic bit in
	switch.
	* icv-device.c (omp_get_num_teams, omp_get_team_num): Move these
	functions to ...
	* teams.c: ... here.  New file.
	* libgomp_g.h: Include gstdint.h.
	(GOMP_loop_nonmonotonic_runtime_start,
	GOMP_loop_maybe_nonmonotonic_runtime_start, GOMP_loop_start,
	GOMP_loop_ordered_start, GOMP_loop_nonmonotonic_runtime_next,
	GOMP_loop_maybe_nonmonotonic_runtime_next, GOMP_loop_doacross_start,
	GOMP_parallel_loop_nonmonotonic_runtime,
	GOMP_parallel_loop_maybe_nonmonotonic_runtime,
	GOMP_loop_ull_nonmonotonic_runtime_start,
	GOMP_loop_ull_maybe_nonmonotonic_runtime_start, GOMP_loop_ull_start,
	GOMP_loop_ull_ordered_start, GOMP_loop_ull_nonmonotonic_runtime_next,
	GOMP_loop_ull_maybe_nonmonotonic_runtime_next,
	GOMP_loop_ull_doacross_start, GOMP_parallel_reductions,
	GOMP_taskwait_depend, GOMP_taskgroup_reduction_register,
	GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap,
	GOMP_workshare_task_reduction_unregister, GOMP_sections2_start,
	GOMP_teams_reg): Declare.
	* libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless
	gomp_aligned_alloc uses fallback implementation.
	(gomp_aligned_alloc, gomp_aligned_free): Declare.
	(enum gomp_schedule_type): Add GFS_MONOTONIC.
	(struct gomp_doacross_work_share): Add extra field.
	(struct gomp_work_share): Add task_reductions field.
	(struct gomp_taskgroup): Add workshare and reductions fields.
	(GOMP_NEEDS_THREAD_HANDLE): Define if needed.
	(gomp_thread_handle): New typedef.
	(gomp_display_affinity_place, gomp_set_affinity_format,
	gomp_display_string, gomp_display_affinity,
	gomp_display_affinity_thread): Declare.
	(gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument.
	(gomp_parallel_reduction_register, gomp_workshare_taskgroup_start,
	gomp_workshare_task_reduction_register): Declare.
	(gomp_team_start): Add taskgroup argument.
	(gomp_pause_host): Declare.
	(gomp_init_work_share, gomp_work_share_start): Change bool argument
	to size_t.
	(gomp_thread_self, gomp_thread_to_pthread_t): New inline functions.
	* libgomp.map (GOMP_5.0): Export GOMP_loop_start,
	GOMP_loop_ordered_start, GOMP_loop_doacross_start,
	GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
	GOMP_loop_ull_doacross_start,
	GOMP_workshare_task_reduction_unregister, GOMP_sections2_start,
	GOMP_loop_maybe_nonmonotonic_runtime_next,
	GOMP_loop_maybe_nonmonotonic_runtime_start,
	GOMP_loop_nonmonotonic_runtime_next,
	GOMP_loop_nonmonotonic_runtime_start,
	GOMP_loop_ull_maybe_nonmonotonic_runtime_next,
	GOMP_loop_ull_maybe_nonmonotonic_runtime_start,
	GOMP_loop_ull_nonmonotonic_runtime_next,
	GOMP_loop_ull_nonmonotonic_runtime_start,
	GOMP_parallel_loop_maybe_nonmonotonic_runtime,
	GOMP_parallel_loop_nonmonotonic_runtime, GOMP_parallel_reductions,
	GOMP_taskgroup_reduction_register,
	GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap,
	GOMP_teams_reg and GOMP_taskwait_depend.
	(OMP_5.0): Export omp_pause_resource{,_all}{,_},
	omp_{capture,display}_affinity{,_}, and
	omp_[gs]et_affinity_format{,_}.
	* loop.c: Include string.h.
	(GOMP_loop_runtime_next): Add ialias.
	(GOMP_taskgroup_reduction_register): Add ialias_redirect.
	(gomp_loop_static_start, gomp_loop_dynamic_start,
	gomp_loop_guided_start, gomp_loop_ordered_static_start,
	gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start,
	gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start,
	gomp_loop_doacross_guided_start): Adjust gomp_work_share_start
	or gomp_doacross_init callers.
	(gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start,
	GOMP_loop_doacross_start): New functions.
	(GOMP_loop_runtime_start, GOMP_loop_ordered_runtime_start,
	GOMP_loop_doacross_runtime_start, GOMP_parallel_loop_runtime_start):
	Mask off GFS_MONOTONIC bit.
	(GOMP_loop_maybe_nonmonotonic_runtime_next,
	GOMP_loop_maybe_nonmonotonic_runtime_start,
	GOMP_loop_nonmonotonic_runtime_next,
	GOMP_loop_nonmonotonic_runtime_start,
	GOMP_parallel_loop_maybe_nonmonotonic_runtime,
	GOMP_parallel_loop_nonmonotonic_runtime): New aliases or wrapper
	functions.
	(gomp_parallel_loop_start): Pass NULL as taskgroup to
	gomp_team_start.
	* loop_ull.c: Include string.h.
	(GOMP_loop_ull_runtime_next): Add ialias.
	(GOMP_taskgroup_reduction_register): Add ialias_redirect.
	(gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start,
	gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start,
	gomp_loop_ull_ordered_dynamic_start,
	gomp_loop_ull_ordered_guided_start,
	gomp_loop_ull_doacross_static_start,
	gomp_loop_ull_doacross_dynamic_start,
	gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start
	and gomp_doacross_ull_init callers.
	(gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
	GOMP_loop_ull_doacross_start): New functions.
	(GOMP_loop_ull_runtime_start,
	GOMP_loop_ull_ordered_runtime_start,
	GOMP_loop_ull_doacross_runtime_start): Mask off GFS_MONOTONIC bit.
	(GOMP_loop_ull_maybe_nonmonotonic_runtime_next,
	GOMP_loop_ull_maybe_nonmonotonic_runtime_start,
	GOMP_loop_ull_nonmonotonic_runtime_next,
	GOMP_loop_ull_nonmonotonic_runtime_start): Likewise.
	* Makefile.am (libgomp_la_SOURCES): Add teams.c and affinity-fmt.c.
	* omp.h.in (enum omp_sched_t): Add omp_sched_monotonic.
	(omp_pause_resource_t, omp_depend_t): New typedefs.
	(enum omp_lock_hint_t): Renamed to ...
	(enum omp_sync_hint_t): ... this.  Define omp_sync_hint_*
	enumerators using numbers and omp_lock_hint_* as their aliases.
	(omp_lock_hint_t): New typedef.  Rename to ...
	(omp_sync_hint_t): ... this.
	(omp_init_lock_with_hint, omp_init_nest_lock_with_hint): Use
	omp_sync_hint_t instead of omp_lock_hint_t.
	(omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format,
	omp_get_affinity_format, omp_display_affinity, omp_capture_affinity):
	Declare.
	(omp_target_is_present, omp_target_disassociate_ptr):
	Change first argument from void * to const void *.
	(omp_target_memcpy, omp_target_memcpy_rect): Change second argument
	from void * to const void *.
	(omp_target_associate_ptr): Change first and second arguments from
	void * to const void *.
	* omp_lib.f90.in (omp_pause_resource_kind, omp_pause_soft,
	omp_pause_hard): New parameters.
	(omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format,
	omp_get_affinity_format, omp_display_affinity, omp_capture_affinity):
	New interfaces.
	* omp_lib.h.in (omp_pause_resource_kind, omp_pause_soft,
	omp_pause_hard): New parameters.
	(omp_pause_resource, omp_pause_resource_all, omp_set_affinity_format,
	omp_get_affinity_format, omp_display_affinity, omp_capture_affinity):
	New externals.
	* ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add
	EXTRA argument.  If not needed to prepare array, if extra is 0,
	clear ws->doacross, otherwise allocate just doacross structure and
	extra payload.  If array is needed, allocate also extra payload.
	(GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post,
	GOMP_doacross_ull_wait): Handle doacross->array == NULL like
	doacross == NULL.
	* parallel.c (GOMP_parallel_start): Pass NULL as taskgroup to
	gomp_team_start.
	(GOMP_parallel): Likewise.  Formatting fix.
	(GOMP_parallel_reductions): New function.
	(GOMP_cancellation_point): If taskgroup has workshare
	flag set, check cancelled of prev taskgroup if any.
	(GOMP_cancel): If taskgroup has workshare flag set, set cancelled
	on prev taskgroup if any.
	* sections.c: Include string.h.
	(GOMP_taskgroup_reduction_register): Add ialias_redirect.
	(GOMP_sections_start): Adjust gomp_work_share_start caller.
	(GOMP_sections2_start): New function.
	(GOMP_parallel_sections_start, GOMP_parallel_sections):
	Pass NULL as taskgroup to gomp_team_start.
	* single.c (GOMP_single_start, GOMP_single_copy_start): Adjust
	gomp_work_share_start callers.
	* target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data):
	If taskgroup has workshare flag set, check cancelled on prev
	taskgroup if any.  Guard all cancellation tests with
	gomp_cancel_var test.
	(omp_target_is_present, omp_target_disassociate_ptr):
	Change ptr argument from void * to const void *.
	(omp_target_memcpy): Change src argument from void * to const void *.
	(omp_target_memcpy_rect): Likewise.
	(omp_target_memcpy_rect_worker): Likewise.  Use const char * casts
	instead of char * where needed.
	(omp_target_associate_ptr): Change host_ptr and device_ptr arguments
	from void * to const void *.
	(omp_pause_resource, omp_pause_resource_all): New functions.
	* task.c (gomp_task_handle_depend): Handle new depend array format
	in addition to the old.  Handle mutexinoutset kinds the same as
	inout for now, handle unspecified kinds.
	(gomp_create_target_task): If taskgroup has workshare flag set, check
	cancelled on prev taskgroup if any.  Guard all cancellation tests with
	gomp_cancel_var test.  Handle new depend array format count in
	addition to the old.
	(GOMP_task): Likewise.  Adjust function comment.
	(gomp_task_run_pre): If taskgroup has workshare flag set, check
	cancelled on prev taskgroup if any.  Guard all cancellation tests with
	gomp_cancel_var test.
	(GOMP_taskwait_depend): New function.
	(gomp_task_maybe_wait_for_dependencies): Handle new depend array
	format in addition to the old.  Handle mutexinoutset kinds the same as
	inout for now, handle unspecified kinds.  Fix a function comment typo.
	(gomp_taskgroup_init): New function.
	(GOMP_taskgroup_start): Use it.
	(gomp_reduction_register, gomp_create_artificial_team,
	GOMP_taskgroup_reduction_register,
	GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap,
	gomp_parallel_reduction_register,
	gomp_workshare_task_reduction_register,
	gomp_workshare_taskgroup_start,
	GOMP_workshare_task_reduction_unregister): New functions.
	* taskloop.c (GOMP_taskloop): If taskgroup has workshare flag set,
	check cancelled on prev taskgroup if any.  Guard all cancellation
	tests with gomp_cancel_var test.  Handle GOMP_TASK_FLAG_REDUCTION flag
	by calling GOMP_taskgroup_reduction_register.
	* team.c (gomp_thread_attr): Remove comment.
	(struct gomp_thread_start_data): Add handle field.
	(gomp_thread_start): Call pthread_detach.
	(gomp_new_team): Adjust gomp_init_work_share caller.
	(gomp_free_pool_helper): Call pthread_detach.
	(gomp_team_start): Add taskgroup argument, initialize implicit
	tasks' taskgroup field to that.  Don't call
	pthread_attr_setdetachstate.  Handle OMP_DISPLAY_AFFINITY env var.
	(gomp_team_end): Determine nesting by thr->ts.level != 0
	rather than thr->ts.team != NULL.
	(gomp_pause_pool_helper, gomp_pause_host): New functions.
	* work.c (alloc_work_share): Use gomp_aligned_alloc instead of
	gomp_malloc if GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined.
	(gomp_init_work_share): Change ORDERED argument from bool to size_t,
	if more than 1 allocate also extra payload at the end of array.  Never
	keep ordered_team_ids NULL, set it to inline_ordered_team_ids instead.
	(gomp_work_share_start): Change ORDERED argument from bool to size_t,
	return true instead of ws.
	* Makefile.in: Regenerated.
	* configure: Regenerated.
	* config.h.in: Regenerated.



	Jakub

Patch

--- libgomp/affinity.c	(.../trunk)	(revision 265884)
+++ libgomp/affinity.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -138,5 +138,18 @@  gomp_get_place_proc_ids_8 (int place_num
   (void) ids;
 }
 
+void
+gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
+			     int place)
+{
+  cpu_set_t *cpusetp;
+  char buf[sizeof (long) * 3 + 4];
+  if (gomp_available_cpus > 1)
+    sprintf (buf, "0-%lu", gomp_available_cpus - 1);
+  else
+    strcpy (buf, "0");
+  gomp_display_string (buffer, size, ret, buf, strlen (buf));
+}
+
 ialias(omp_get_place_num_procs)
 ialias(omp_get_place_proc_ids)
--- libgomp/affinity-fmt.c	(.../trunk)	(nonexistent)
+++ libgomp/affinity-fmt.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -0,0 +1,481 @@ 
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libgomp.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_UNAME
+#include <sys/utsname.h>
+#endif
+
+void
+gomp_set_affinity_format (const char *format, size_t len)
+{
+  if (len < gomp_affinity_format_len)
+    memcpy (gomp_affinity_format_var, format, len);
+  else
+    {
+      char *p;
+      if (gomp_affinity_format_len)
+	p = gomp_realloc (gomp_affinity_format_var, len + 1);
+      else
+	p = gomp_malloc (len + 1);
+      memcpy (p, format, len);
+      gomp_affinity_format_var = p;
+      gomp_affinity_format_len = len + 1;
+    }
+  gomp_affinity_format_var[len] = '\0';
+}
+
+void
+omp_set_affinity_format (const char *format)
+{
+  gomp_set_affinity_format (format, strlen (format));
+}
+
+size_t
+omp_get_affinity_format (char *buffer, size_t size)
+{
+  size_t len = strlen (gomp_affinity_format_var);
+  if (size)
+    {
+      if (len < size)
+	memcpy (buffer, gomp_affinity_format_var, len + 1);
+      else
+	{
+	  memcpy (buffer, gomp_affinity_format_var, size - 1);
+	  buffer[size - 1] = '\0';
+	}
+    }
+  return len;
+}
+
+void
+gomp_display_string (char *buffer, size_t size, size_t *ret,
+		     const char *str, size_t len)
+{
+  size_t r = *ret;
+  if (size && r < size)
+    {
+      size_t l = len;
+      if (size - r < len)
+	l = size - r;
+      memcpy (buffer + r, str, l);
+    }
+  *ret += len;
+  if (__builtin_expect (r > *ret, 0))
+    gomp_fatal ("overflow in omp_capture_affinity");
+}
+
+static void
+gomp_display_repeat (char *buffer, size_t size, size_t *ret,
+		     char c, size_t len)
+{
+  size_t r = *ret;
+  if (size && r < size)
+    {
+      size_t l = len;
+      if (size - r < len)
+	l = size - r;
+      memset (buffer + r, c, l);
+    }
+  *ret += len;
+  if (__builtin_expect (r > *ret, 0))
+    gomp_fatal ("overflow in omp_capture_affinity");
+}
+
+static void
+gomp_display_num (char *buffer, size_t size, size_t *ret,
+		  bool zero, bool right, size_t sz, char *buf)
+{
+  size_t l = strlen (buf);
+  if (sz == (size_t) -1 || l >= sz)
+    {
+      gomp_display_string (buffer, size, ret, buf, l);
+      return;
+    }
+  if (zero)
+    {
+      if (buf[0] == '-')
+	gomp_display_string (buffer, size, ret, buf, 1);
+      else if (buf[0] == '0' && buf[1] == 'x')
+	gomp_display_string (buffer, size, ret, buf, 2);
+      gomp_display_repeat (buffer, size, ret, '0', sz - l);
+      if (buf[0] == '-')
+	gomp_display_string (buffer, size, ret, buf + 1, l - 1);
+      else if (buf[0] == '0' && buf[1] == 'x')
+	gomp_display_string (buffer, size, ret, buf + 2, l - 2);
+      else
+	gomp_display_string (buffer, size, ret, buf, l);
+    }
+  else if (right)
+    {
+      gomp_display_repeat (buffer, size, ret, ' ', sz - l);
+      gomp_display_string (buffer, size, ret, buf, l);
+    }
+  else
+    {
+      gomp_display_string (buffer, size, ret, buf, l);
+      gomp_display_repeat (buffer, size, ret, ' ', sz - l);
+    }
+}
+
+static void
+gomp_display_int (char *buffer, size_t size, size_t *ret,
+		  bool zero, bool right, size_t sz, int num)
+{
+  char buf[3 * sizeof (int) + 2];
+  sprintf (buf, "%d", num);
+  gomp_display_num (buffer, size, ret, zero, right, sz, buf);
+}
+
+static void
+gomp_display_string_len (char *buffer, size_t size, size_t *ret,
+			 bool right, size_t sz, char *str, size_t len)
+{
+  if (sz == (size_t) -1 || len >= sz)
+    {
+      gomp_display_string (buffer, size, ret, str, len);
+      return;
+    }
+
+  if (right)
+    {
+      gomp_display_repeat (buffer, size, ret, ' ', sz - len);
+      gomp_display_string (buffer, size, ret, str, len);
+    }
+  else
+    {
+      gomp_display_string (buffer, size, ret, str, len);
+      gomp_display_repeat (buffer, size, ret, ' ', sz - len);
+    }
+}
+
+static void
+gomp_display_hostname (char *buffer, size_t size, size_t *ret,
+		       bool right, size_t sz)
+{
+#ifdef HAVE_GETHOSTNAME
+  {
+    char buf[256];
+    char *b = buf;
+    size_t len = 256;
+    do
+      {
+	b[len - 1] = '\0';
+	if (gethostname (b, len - 1) == 0)
+	  {
+	    size_t l = strlen (b);
+	    if (l < len - 1)
+	      {
+		gomp_display_string_len (buffer, size, ret,
+					 right, sz, b, l);
+		if (b != buf)
+		  free (b);
+		return;
+	      }
+	  }
+	if (len == 1048576)
+	  break;
+	len = len * 2;
+	if (len == 512)
+	  b = gomp_malloc (len);
+	else
+	  b = gomp_realloc (b, len);
+      }
+    while (1);
+    if (b != buf)
+      free (b);
+  }
+#endif
+#ifdef HAVE_UNAME
+  {
+    struct utsname buf;
+    if (uname (&buf) == 0)
+      {
+	gomp_display_string_len (buffer, size, ret, right, sz,
+				 buf.nodename, strlen (buf.nodename));
+	return;
+      }
+  }
+#endif
+  gomp_display_string_len (buffer, size, ret, right, sz, "node", 4);
+}
+
+struct affinity_types_struct {
+  char long_str[18];
+  char long_len;
+  char short_c; };
+
+static struct affinity_types_struct affinity_types[] =
+{
+#define AFFINITY_TYPE(l, s) \
+  { #l, sizeof (#l) - 1, s }
+  AFFINITY_TYPE (team_num, 't'),
+  AFFINITY_TYPE (num_teams, 'T'),
+  AFFINITY_TYPE (nesting_level, 'L'),
+  AFFINITY_TYPE (thread_num, 'n'),
+  AFFINITY_TYPE (num_threads, 'N'),
+  AFFINITY_TYPE (ancestor_tnum, 'a'),
+  AFFINITY_TYPE (host, 'H'),
+  AFFINITY_TYPE (process_id, 'P'),
+  AFFINITY_TYPE (native_thread_id, 'i'),
+  AFFINITY_TYPE (thread_affinity, 'A')
+#undef AFFINITY_TYPE
+};
+
+size_t
+gomp_display_affinity (char *buffer, size_t size,
+		       const char *format, gomp_thread_handle handle,
+		       struct gomp_team_state *ts, unsigned int place)
+{
+  size_t ret = 0;
+  do
+    {
+      const char *p = strchr (format, '%');
+      bool zero = false;
+      bool right = false;
+      size_t sz = -1;
+      char c;
+      int val;
+      if (p == NULL)
+	p = strchr (format, '\0');
+      if (p != format)
+	gomp_display_string (buffer, size, &ret,
+			     format, p - format);
+      if (*p == '\0')
+	break;
+      p++;
+      if (*p == '%')
+	{
+	  gomp_display_string (buffer, size, &ret, "%", 1);
+	  format = p + 1;
+	  continue;
+	}
+      if (*p == '0')
+	{
+	  zero = true;
+	  p++;
+	  if (*p != '.')
+	    gomp_fatal ("leading zero not followed by dot in affinity format");
+	}
+      if (*p == '.')
+	{
+	  right = true;
+	  p++;
+	}
+      if (*p >= '1' && *p <= '9')
+	{
+	  char *end;
+	  sz = strtoul (p, &end, 10);
+	  p = end;
+	}
+      else if (zero || right)
+	gomp_fatal ("leading zero or right justification in affinity format "
+		    "requires size");
+      c = *p;
+      if (c == '{')
+	{
+	  int i;
+	  for (i = 0;
+	       i < sizeof (affinity_types) / sizeof (affinity_types[0]); ++i)
+	    if (strncmp (p + 1, affinity_types[i].long_str,
+			 affinity_types[i].long_len) == 0
+		&& p[affinity_types[i].long_len + 1] == '}')
+	      {
+		c = affinity_types[i].short_c;
+		p += affinity_types[i].long_len + 1;
+		break;
+	      }
+	  if (c == '{')
+	    {
+	      char *q = strchr (p + 1, '}');
+	      if (q)
+		gomp_fatal ("unsupported long type name '%.*s' in affinity "
+			    "format", (int) (q - (p + 1)), p + 1);
+	      else
+		gomp_fatal ("unterminated long type name '%s' in affinity "
+			    "format", p + 1);
+	    }
+	}
+      switch (c)
+	{
+	case 't':
+	  val = omp_get_team_num ();
+	  goto do_int;
+	case 'T':
+	  val = omp_get_num_teams ();
+	  goto do_int;
+	case 'L':
+	  val = ts->level;
+	  goto do_int;
+	case 'n':
+	  val = ts->team_id;
+	  goto do_int;
+	case 'N':
+	  val = ts->team ? ts->team->nthreads : 1;
+	  goto do_int;
+	case 'a':
+	  val = ts->team ? ts->team->prev_ts.team_id : -1;
+	  goto do_int;
+	case 'H':
+	  gomp_display_hostname (buffer, size, &ret, right, sz);
+	  break;
+	case 'P':
+#ifdef HAVE_GETPID
+	  val = getpid ();
+#else
+	  val = 0;
+#endif
+	  goto do_int;
+	case 'i':
+#if defined(LIBGOMP_USE_PTHREADS) && defined(__GNUC__)
+	  /* Handle integral pthread_t.  */
+	  if (__builtin_classify_type (handle) == 1)
+	    {
+	      char buf[3 * (sizeof (handle) + sizeof (int)) + 4];
+
+	      if (sizeof (handle) == sizeof (long))
+		sprintf (buf, "0x%lx", (long) handle);
+	      else if (sizeof (handle) == sizeof (long long))
+		sprintf (buf, "0x%llx", (long long) handle);
+	      else
+		sprintf (buf, "0x%x", (int) handle);
+	      gomp_display_num (buffer, size, &ret, zero, right, sz, buf);
+	      break;
+	    }
+	  /* And pointer pthread_t.  */
+	  else if (__builtin_classify_type (handle) == 5)
+	    {
+	      char buf[3 * (sizeof (uintptr_t) + sizeof (int)) + 4];
+
+	      if (sizeof (uintptr_t) == sizeof (long))
+		sprintf (buf, "0x%lx", (long) (uintptr_t) handle);
+	      else if (sizeof (uintptr_t) == sizeof (long long))
+		sprintf (buf, "0x%llx", (long long) (uintptr_t) handle);
+	      else
+		sprintf (buf, "0x%x", (int) (uintptr_t) handle);
+	      gomp_display_num (buffer, size, &ret, zero, right, sz, buf);
+	      break;
+	    }
+#endif
+	  val = 0;
+	  goto do_int;
+	case 'A':
+	  if (sz == (size_t) -1)
+	    gomp_display_affinity_place (buffer, size, &ret,
+					 place - 1);
+	  else if (right)
+	    {
+	      size_t len = 0;
+	      gomp_display_affinity_place (NULL, 0, &len, place - 1);
+	      if (len < sz)
+		gomp_display_repeat (buffer, size, &ret, ' ', sz - len);
+	      gomp_display_affinity_place (buffer, size, &ret, place - 1);
+	    }
+	  else
+	    {
+	      size_t start = ret;
+	      gomp_display_affinity_place (buffer, size, &ret, place - 1);
+	      if (ret - start < sz)
+		gomp_display_repeat (buffer, size, &ret, ' ', sz - (ret - start));
+	    }
+	  break;
+	do_int:
+	  gomp_display_int (buffer, size, &ret, zero, right, sz, val);
+	  break;
+	default:
+	  gomp_fatal ("unsupported type %c in affinity format", c);
+	}
+      format = p + 1;
+    }
+  while (1);
+  return ret;
+}
+
+size_t
+omp_capture_affinity (char *buffer, size_t size, const char *format)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ret
+    = gomp_display_affinity (buffer, size,
+			     format && *format
+			     ? format : gomp_affinity_format_var,
+			     gomp_thread_self (), &thr->ts, thr->place);
+  if (size)
+    {
+      if (ret >= size)
+	buffer[size - 1] = '\0';
+      else
+	buffer[ret] = '\0';
+    }
+  return ret;
+}
+ialias (omp_capture_affinity)
+
+void
+omp_display_affinity (const char *format)
+{
+  char buf[512];
+  char *b;
+  size_t ret = ialias_call (omp_capture_affinity) (buf, sizeof buf, format);
+  if (ret < sizeof buf)
+    {
+      buf[ret] = '\n';
+      fwrite (buf, 1, ret + 1, stderr);
+      return;
+    }
+  b = gomp_malloc (ret + 1);
+  ialias_call (omp_capture_affinity) (b, ret + 1, format);
+  b[ret] = '\n';
+  fwrite (b, 1, ret + 1, stderr);
+  free (b);
+}
+
+void
+gomp_display_affinity_thread (gomp_thread_handle handle,
+			      struct gomp_team_state *ts, unsigned int place)
+{
+  char buf[512];
+  char *b;
+  size_t ret = gomp_display_affinity (buf, sizeof buf, gomp_affinity_format_var,
+				      handle, ts, place);
+  if (ret < sizeof buf)
+    {
+      buf[ret] = '\n';
+      fwrite (buf, 1, ret + 1, stderr);
+      return;
+    }
+  b = gomp_malloc (ret + 1);
+  gomp_display_affinity (b, ret + 1, gomp_affinity_format_var,
+  			 handle, ts, place);
+  b[ret] = '\n';
+  fwrite (b, 1, ret + 1, stderr);
+  free (b);
+}
--- libgomp/alloc.c	(.../trunk)	(revision 265884)
+++ libgomp/alloc.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -57,3 +57,50 @@  gomp_realloc (void *old, size_t size)
     gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
   return ret;
 }
+
+void *
+gomp_aligned_alloc (size_t al, size_t size)
+{
+  void *ret;
+  if (al < sizeof (void *))
+    al = sizeof (void *);
+#ifdef HAVE_ALIGNED_ALLOC
+  ret = aligned_alloc (al, size);
+#elif defined(HAVE__ALIGNED_MALLOC)
+  ret = _aligned_malloc (size, al);
+#elif defined(HAVE_POSIX_MEMALIGN)
+  if (posix_memalign (&ret, al, size) != 0)
+    ret = NULL;
+#elif defined(HAVE_MEMALIGN)
+  {
+    extern void *memalign (size_t, size_t);
+    ret = memalign (al, size);
+  }
+#else
+  ret = NULL;
+  if ((al & (al - 1)) == 0 && size)
+    {
+      void *p = malloc (size + al);
+      if (p)
+	{
+	  void *ap = (void *) (((uintptr_t) p + al) & -al);
+	  ((void **) ap)[-1] = p;
+	  ret = ap;
+	}
+    }
+#endif
+  if (ret == NULL)
+    gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
+  return ret;
+}
+
+void
+gomp_aligned_free (void *ptr)
+{
+#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
+  free (ptr);
+#else
+  if (ptr)
+    free (((void **) ptr)[-1]);
+#endif
+}
--- libgomp/config.h.in	(.../trunk)	(revision 265884)
+++ libgomp/config.h.in	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -1,5 +1,8 @@ 
 /* config.h.in.  Generated from configure.ac by autoheader.  */
 
+/* Define to 1 if you have the `aligned_alloc' function. */
+#undef HAVE_ALIGNED_ALLOC
+
 /* Define to 1 if the target assembler supports .symver directive. */
 #undef HAVE_AS_SYMVER_DIRECTIVE
 
@@ -33,9 +36,15 @@ 
 /* Define to 1 if you have the `getgid' function. */
 #undef HAVE_GETGID
 
+/* Define if gethostname is supported. */
+#undef HAVE_GETHOSTNAME
+
 /* Define to 1 if you have the `getloadavg' function. */
 #undef HAVE_GETLOADAVG
 
+/* Define if getpid is supported. */
+#undef HAVE_GETPID
+
 /* Define to 1 if you have the `getuid' function. */
 #undef HAVE_GETUID
 
@@ -45,9 +54,15 @@ 
 /* Define to 1 if you have the `dl' library (-ldl). */
 #undef HAVE_LIBDL
 
+/* Define to 1 if you have the `memalign' function. */
+#undef HAVE_MEMALIGN
+
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
 
+/* Define to 1 if you have the `posix_memalign' function. */
+#undef HAVE_POSIX_MEMALIGN
+
 /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */
 #undef HAVE_PTHREAD_AFFINITY_NP
 
@@ -103,9 +118,15 @@ 
 /* Define to 1 if the target supports thread-local storage. */
 #undef HAVE_TLS
 
+/* Define if uname is supported and struct utsname has nodename field. */
+#undef HAVE_UNAME
+
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
+/* Define to 1 if you have the `_aligned_malloc' function. */
+#undef HAVE__ALIGNED_MALLOC
+
 /* Define to 1 if you have the `__secure_getenv' function. */
 #undef HAVE___SECURE_GETENV
 
--- libgomp/config/linux/affinity.c	(.../trunk)	(revision 265884)
+++ libgomp/config/linux/affinity.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -396,6 +396,56 @@  gomp_get_place_proc_ids_8 (int place_num
       *ids++ = i;
 }
 
+void
+gomp_display_affinity_place (char *buffer, size_t size, size_t *ret,
+			     int place)
+{
+  cpu_set_t *cpusetp;
+  char buf[sizeof (long) * 3 + 4];
+  if (place >= 0 && place < gomp_places_list_len)
+    cpusetp = (cpu_set_t *) gomp_places_list[place];
+  else if (gomp_cpusetp)
+    cpusetp = gomp_cpusetp;
+  else
+    {
+      if (gomp_available_cpus > 1)
+	sprintf (buf, "0-%lu", gomp_available_cpus - 1);
+      else
+	strcpy (buf, "0");
+      gomp_display_string (buffer, size, ret, buf, strlen (buf));
+      return;
+    }
+
+  unsigned long i, max = 8 * gomp_cpuset_size, start;
+  bool prev_set = false;
+  start = max;
+  for (i = 0; i <= max; i++)
+    {
+      bool this_set;
+      if (i == max)
+	this_set = false;
+      else
+	this_set = CPU_ISSET_S (i, gomp_cpuset_size, cpusetp);
+      if (this_set != prev_set)
+	{
+	  prev_set = this_set;
+	  if (this_set)
+	    {
+	      char *p = buf;
+	      if (start != max)
+		*p++ = ',';
+	      sprintf (p, "%lu", i);
+	      start = i;
+	    }
+	  else if (i == start + 1)
+	    continue;
+	  else
+	    sprintf (buf, "-%lu", i - 1);
+	  gomp_display_string (buffer, size, ret, buf, strlen (buf));
+	}
+    }
+}
+
 ialias(omp_get_place_num_procs)
 ialias(omp_get_place_proc_ids)
 
--- libgomp/config/nvptx/icv-device.c	(.../trunk)	(revision 265884)
+++ libgomp/config/nvptx/icv-device.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -46,20 +46,6 @@  omp_get_num_devices (void)
 }
 
 int
-omp_get_num_teams (void)
-{
-  return gomp_num_teams_var + 1;
-}
-
-int
-omp_get_team_num (void)
-{
-  int ctaid;
-  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
-  return ctaid;
-}
-
-int
 omp_is_initial_device (void)
 {
   /* NVPTX is an accelerator-only target.  */
@@ -69,6 +55,4 @@  omp_is_initial_device (void)
 ialias (omp_set_default_device)
 ialias (omp_get_default_device)
 ialias (omp_get_num_devices)
-ialias (omp_get_num_teams)
-ialias (omp_get_team_num)
 ialias (omp_is_initial_device)
--- libgomp/config/nvptx/target.c	(.../trunk)	(revision 265884)
+++ libgomp/config/nvptx/target.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -47,3 +47,21 @@  GOMP_teams (unsigned int num_teams, unsi
     }
   gomp_num_teams_var = num_teams - 1;
 }
+
+int
+omp_pause_resource (omp_pause_resource_t kind, int device_num)
+{
+  (void) kind;
+  (void) device_num;
+  return -1;
+}
+
+int
+omp_pause_resource_all (omp_pause_resource_t kind)
+{
+  (void) kind;
+  return -1;
+}
+
+ialias (omp_pause_resource)
+ialias (omp_pause_resource_all)
--- libgomp/config/nvptx/team.c	(.../trunk)	(revision 265884)
+++ libgomp/config/nvptx/team.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -116,7 +116,8 @@  gomp_thread_start (struct gomp_thread_po
 
 void
 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
-		 unsigned flags, struct gomp_team *team)
+		 unsigned flags, struct gomp_team *team,
+		 struct gomp_taskgroup *taskgroup)
 {
   struct gomp_thread *thr, *nthr;
   struct gomp_task *task;
@@ -147,6 +148,7 @@  gomp_team_start (void (*fn) (void *), vo
   nthreads_var = icv->nthreads_var;
   gomp_init_task (thr->task, task, icv);
   team->implicit_task[0].icv.nthreads_var = nthreads_var;
+  team->implicit_task[0].taskgroup = taskgroup;
 
   if (nthreads == 1)
     return;
@@ -166,6 +168,7 @@  gomp_team_start (void (*fn) (void *), vo
       nthr->task = &team->implicit_task[i];
       gomp_init_task (nthr->task, task, icv);
       team->implicit_task[i].icv.nthreads_var = nthreads_var;
+      team->implicit_task[i].taskgroup = taskgroup;
       nthr->fn = fn;
       nthr->data = data;
       team->ordered_release[i] = &nthr->release;
@@ -174,5 +177,11 @@  gomp_team_start (void (*fn) (void *), vo
   gomp_simple_barrier_wait (&pool->threads_dock);
 }
 
+int
+gomp_pause_host (void)
+{
+  return -1;
+}
+
 #include "../../team.c"
 #endif
--- libgomp/config/nvptx/teams.c	(.../trunk)	(nonexistent)
+++ libgomp/config/nvptx/teams.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -0,0 +1,57 @@ 
+/* Copyright (C) 2015-2018 Free Software Foundation, Inc.
+   Contributed by Alexander Monakov <amonakov@ispras.ru>
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file defines OpenMP API entry points that accelerator targets are
+   expected to replace.  */
+
+#include "libgomp.h"
+
+void
+GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
+		unsigned int thread_limit, unsigned int flags)
+{
+  (void) fn;
+  (void) data;
+  (void) flags;
+  (void) num_teams;
+  (void) thread_limit;
+}
+
+int
+omp_get_num_teams (void)
+{
+  return gomp_num_teams_var + 1;
+}
+
+int
+omp_get_team_num (void)
+{
+  int ctaid;
+  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
+  return ctaid;
+}
+
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
--- libgomp/configure	(.../trunk)	(revision 265884)
+++ libgomp/configure	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -15812,6 +15812,19 @@  _ACEOF
 fi
 done
 
+for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc
+do :
+  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+eval as_val=\$$as_ac_var
+   if test "x$as_val" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
 
 # Check for broken semaphore implementation on darwin.
 # sem_init returns: sem_init error: Function not implemented.
@@ -16026,6 +16039,72 @@  fi
 
 fi
 
+# Check for uname.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+   #include <stdlib.h>
+   #include <sys/utsname.h>
+int
+main ()
+{
+struct utsname buf;
+   volatile size_t len = 0;
+   if (!uname (buf))
+     len = strlen (buf.nodename);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_UNAME 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+# Check for gethostname.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <unistd.h>
+int
+main ()
+{
+
+   char buf[256];
+   if (gethostname (buf, sizeof (buf) - 1) == 0)
+     buf[255] = '\0';
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_GETHOSTNAME 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+# Check for getpid.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <unistd.h>
+int
+main ()
+{
+int pid = getpid ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_GETPID 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
 # See if we support thread-local storage.
 
 
--- libgomp/configure.ac	(.../trunk)	(revision 265884)
+++ libgomp/configure.ac	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -218,6 +218,7 @@  m4_include([plugin/configfrag.ac])
 
 # Check for functions needed.
 AC_CHECK_FUNCS(getloadavg clock_gettime strtoull)
+AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc)
 
 # Check for broken semaphore implementation on darwin.
 # sem_init returns: sem_init error: Function not implemented.
@@ -265,6 +266,41 @@  if test $ac_cv_func_clock_gettime = no;
 	       [Define to 1 if you have the `clock_gettime' function.])])
 fi
 
+# Check for uname.
+AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+  [#include <string.h>
+   #include <stdlib.h>
+   #include <sys/utsname.h>],
+  [struct utsname buf;
+   volatile size_t len = 0;
+   if (!uname (buf))
+     len = strlen (buf.nodename);])],
+  AC_DEFINE(HAVE_UNAME, 1,
+[	Define if uname is supported and struct utsname has nodename field.]))
+
+# Check for gethostname.
+AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+  [#include <unistd.h>],
+  [
+changequote(,)dnl
+   char buf[256];
+   if (gethostname (buf, sizeof (buf) - 1) == 0)
+     buf[255] = '\0';
+changequote([,])dnl
+  ])],
+  AC_DEFINE(HAVE_GETHOSTNAME, 1,
+[	Define if gethostname is supported.]))
+
+# Check for getpid.
+AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+  [#include <unistd.h>],
+  [int pid = getpid ();])],
+  AC_DEFINE(HAVE_GETPID, 1,
+[	Define if getpid is supported.]))
+
 # See if we support thread-local storage.
 GCC_CHECK_TLS
 
--- libgomp/configure.tgt	(.../trunk)	(revision 265884)
+++ libgomp/configure.tgt	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -18,7 +18,7 @@  if test $gcc_cv_have_tls = yes ; then
 	;;
 
     *-*-linux* | *-*-gnu*)
-	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec"
+	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec -DUSING_INITIAL_EXEC_TLS"
 	;;
 
     *-*-rtems*)
--- libgomp/env.c	(.../trunk)	(revision 265884)
+++ libgomp/env.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -88,6 +88,9 @@  void **gomp_places_list;
 unsigned long gomp_places_list_len;
 int gomp_debug_var;
 unsigned int gomp_num_teams_var;
+bool gomp_display_affinity_var;
+char *gomp_affinity_format_var = "level %L thread %i affinity %A";
+size_t gomp_affinity_format_len;
 char *goacc_device_type;
 int goacc_device_num;
 int goacc_default_dims[GOMP_DIM_MAX];
@@ -101,6 +104,7 @@  parse_schedule (void)
 {
   char *env, *end;
   unsigned long value;
+  int monotonic = 0;
 
   env = getenv ("OMP_SCHEDULE");
   if (env == NULL)
@@ -108,6 +112,26 @@  parse_schedule (void)
 
   while (isspace ((unsigned char) *env))
     ++env;
+  if (strncasecmp (env, "monotonic", 9) == 0)
+    {
+      monotonic = 1;
+      env += 9;
+    }
+  else if (strncasecmp (env, "nonmonotonic", 12) == 0)
+    {
+      monotonic = -1;
+      env += 12;
+    }
+  if (monotonic)
+    {
+      while (isspace ((unsigned char) *env))
+	++env;
+      if (*env != ':')
+	goto unknown;
+      ++env;
+      while (isspace ((unsigned char) *env))
+	++env;
+    }
   if (strncasecmp (env, "static", 6) == 0)
     {
       gomp_global_icv.run_sched_var = GFS_STATIC;
@@ -131,12 +155,16 @@  parse_schedule (void)
   else
     goto unknown;
 
+  if (monotonic == 1
+      || (monotonic == 0 && gomp_global_icv.run_sched_var == GFS_STATIC))
+    gomp_global_icv.run_sched_var |= GFS_MONOTONIC;
+
   while (isspace ((unsigned char) *env))
     ++env;
   if (*env == '\0')
     {
       gomp_global_icv.run_sched_chunk_size
-	= gomp_global_icv.run_sched_var != GFS_STATIC;
+	= (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC;
       return;
     }
   if (*env++ != ',')
@@ -159,7 +187,8 @@  parse_schedule (void)
   if ((int)value != value)
     goto invalid;
 
-  if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
+  if (value == 0
+      && (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC) != GFS_STATIC)
     value = 1;
   gomp_global_icv.run_sched_chunk_size = value;
   return;
@@ -1150,19 +1179,34 @@  handle_omp_display_env (unsigned long st
   fputs ("'\n", stderr);
 
   fprintf (stderr, "  OMP_SCHEDULE = '");
-  switch (gomp_global_icv.run_sched_var)
+  if ((gomp_global_icv.run_sched_var & GFS_MONOTONIC))
+    {
+      if (gomp_global_icv.run_sched_var != (GFS_MONOTONIC | GFS_STATIC))
+	fputs ("MONOTONIC:", stderr);
+    }
+  else if (gomp_global_icv.run_sched_var == GFS_STATIC)
+    fputs ("NONMONOTONIC:", stderr);
+  switch (gomp_global_icv.run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_RUNTIME:
       fputs ("RUNTIME", stderr);
+      if (gomp_global_icv.run_sched_chunk_size != 1)
+	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
       break;
     case GFS_STATIC:
       fputs ("STATIC", stderr);
+      if (gomp_global_icv.run_sched_chunk_size != 0)
+	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
       break;
     case GFS_DYNAMIC:
       fputs ("DYNAMIC", stderr);
+      if (gomp_global_icv.run_sched_chunk_size != 1)
+	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
       break;
     case GFS_GUIDED:
       fputs ("GUIDED", stderr);
+      if (gomp_global_icv.run_sched_chunk_size != 1)
+	fprintf (stderr, ",%d", gomp_global_icv.run_sched_chunk_size);
       break;
     case GFS_AUTO:
       fputs ("AUTO", stderr);
@@ -1228,6 +1272,10 @@  handle_omp_display_env (unsigned long st
 	   gomp_global_icv.default_device_var);
   fprintf (stderr, "  OMP_MAX_TASK_PRIORITY = '%d'\n",
 	   gomp_max_task_priority_var);
+  fprintf (stderr, "  OMP_DISPLAY_AFFINITY = '%s'\n",
+	   gomp_display_affinity_var ? "TRUE" : "FALSE");
+  fprintf (stderr, "  OMP_AFFINITY_FORMAT = '%s'\n",
+	   gomp_affinity_format_var);
 
   if (verbose)
     {
@@ -1259,6 +1307,7 @@  initialize_env (void)
   parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
   parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
+  parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
   parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
   parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
@@ -1308,6 +1357,13 @@  initialize_env (void)
     }
   if (gomp_global_icv.bind_var != omp_proc_bind_false)
     gomp_init_affinity ();
+
+  {
+    const char *env = getenv ("OMP_AFFINITY_FORMAT");
+    if (env != NULL)
+      gomp_set_affinity_format (env, strlen (env));
+  }
+
   wait_policy = parse_wait_policy ();
   if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
     {
@@ -1333,7 +1389,6 @@  initialize_env (void)
 
   /* Not strictly environment related, but ordering constructors is tricky.  */
   pthread_attr_init (&gomp_thread_attr);
-  pthread_attr_setdetachstate (&gomp_thread_attr, PTHREAD_CREATE_DETACHED);
 
   if (parse_stacksize ("OMP_STACKSIZE", &stacksize)
       || parse_stacksize ("GOMP_STACKSIZE", &stacksize)
--- libgomp/fortran.c	(.../trunk)	(revision 265884)
+++ libgomp/fortran.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -28,6 +28,8 @@ 
 #include "libgomp.h"
 #include "libgomp_f.h"
 #include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
 #include <limits.h>
 
 #ifdef HAVE_ATTRIBUTE_ALIAS
@@ -82,6 +84,8 @@  ialias_redirect (omp_get_team_num)
 ialias_redirect (omp_is_initial_device)
 ialias_redirect (omp_get_initial_device)
 ialias_redirect (omp_get_max_task_priority)
+ialias_redirect (omp_pause_resource)
+ialias_redirect (omp_pause_resource_all)
 #endif
 
 #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
@@ -368,7 +372,9 @@  omp_get_schedule_ (int32_t *kind, int32_
   omp_sched_t k;
   int cs;
   omp_get_schedule (&k, &cs);
-  *kind = k;
+  /* For now mask off GFS_MONOTONIC, because OpenMP 4.5 code will not
+     expect to see it.  */
+  *kind = k & ~GFS_MONOTONIC;
   *chunk_size = cs;
 }
 
@@ -378,7 +384,8 @@  omp_get_schedule_8_ (int32_t *kind, int6
   omp_sched_t k;
   int cs;
   omp_get_schedule (&k, &cs);
-  *kind = k;
+  /* See above.  */
+  *kind = k & ~GFS_MONOTONIC;
   *chunk_size = cs;
 }
 
@@ -576,3 +583,96 @@  omp_get_max_task_priority_ (void)
 {
   return omp_get_max_task_priority ();
 }
+
+void
+omp_set_affinity_format_ (const char *format, size_t format_len)
+{
+  gomp_set_affinity_format (format, format_len);
+}
+
+int32_t
+omp_get_affinity_format_ (char *buffer, size_t buffer_len)
+{
+  size_t len = strlen (gomp_affinity_format_var);
+  if (buffer_len)
+    {
+      if (len < buffer_len)
+	{
+	  memcpy (buffer, gomp_affinity_format_var, len);
+	  memset (buffer + len, ' ', buffer_len - len);
+	}
+      else
+	memcpy (buffer, gomp_affinity_format_var, buffer_len);
+    }
+  return len;
+}
+
+void
+omp_display_affinity_ (const char *format, size_t format_len)
+{
+  char *fmt = NULL, fmt_buf[256];
+  char buf[512];
+  if (format_len)
+    {
+      fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
+      memcpy (fmt, format, format_len);
+      fmt[format_len] = '\0';
+    }
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ret
+    = gomp_display_affinity (buf, sizeof buf,
+			     format_len ? fmt : gomp_affinity_format_var,
+			     gomp_thread_self (), &thr->ts, thr->place);
+  if (ret < sizeof buf)
+    {
+      buf[ret] = '\n';
+      fwrite (buf, 1, ret + 1, stderr);
+    }
+  else
+    {
+      char *b = gomp_malloc (ret + 1);
+      gomp_display_affinity (buf, sizeof buf,
+			     format_len ? fmt : gomp_affinity_format_var,
+			     gomp_thread_self (), &thr->ts, thr->place);
+      b[ret] = '\n';
+      fwrite (b, 1, ret + 1, stderr);
+      free (b);
+    }
+  if (fmt && fmt != fmt_buf)
+    free (fmt);
+}
+
+int32_t
+omp_capture_affinity_ (char *buffer, const char *format,
+		       size_t buffer_len, size_t format_len)
+{
+  char *fmt = NULL, fmt_buf[256];
+  if (format_len)
+    {
+      fmt = format_len < 256 ? fmt_buf : gomp_malloc (format_len + 1);
+      memcpy (fmt, format, format_len);
+      fmt[format_len] = '\0';
+    }
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ret
+    = gomp_display_affinity (buffer, buffer_len,
+			     format_len ? fmt : gomp_affinity_format_var,
+			     gomp_thread_self (), &thr->ts, thr->place);
+  if (fmt && fmt != fmt_buf)
+    free (fmt);
+  if (ret < buffer_len)
+    memset (buffer + ret, ' ', buffer_len - ret);
+  return ret;
+}
+
+int32_t
+omp_pause_resource_ (const int32_t *kind, const int32_t *device_num)
+{
+  return omp_pause_resource (*kind, *device_num);
+}
+
+int32_t
+omp_pause_resource_all_ (const int32_t *kind)
+{
+  return omp_pause_resource_all (*kind);
+}
--- libgomp/icv.c	(.../trunk)	(revision 265884)
+++ libgomp/icv.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -69,7 +69,7 @@  void
 omp_set_schedule (omp_sched_t kind, int chunk_size)
 {
   struct gomp_task_icv *icv = gomp_icv (true);
-  switch (kind)
+  switch (kind & ~omp_sched_monotonic)
     {
     case omp_sched_static:
       if (chunk_size < 1)
--- libgomp/icv-device.c	(.../trunk)	(revision 265884)
+++ libgomp/icv-device.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -49,20 +49,6 @@  omp_get_num_devices (void)
 }
 
 int
-omp_get_num_teams (void)
-{
-  /* Hardcoded to 1 on host, MIC, HSAIL?  Maybe variable on PTX.  */
-  return 1;
-}
-
-int
-omp_get_team_num (void)
-{
-  /* Hardcoded to 0 on host, MIC, HSAIL?  Maybe variable on PTX.  */
-  return 0;
-}
-
-int
 omp_is_initial_device (void)
 {
   /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX.  */
@@ -72,6 +58,4 @@  omp_is_initial_device (void)
 ialias (omp_set_default_device)
 ialias (omp_get_default_device)
 ialias (omp_get_num_devices)
-ialias (omp_get_num_teams)
-ialias (omp_get_team_num)
 ialias (omp_is_initial_device)
--- libgomp/libgomp_g.h	(.../trunk)	(revision 265884)
+++ libgomp/libgomp_g.h	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -31,6 +31,7 @@ 
 
 #include <stdbool.h>
 #include <stddef.h>
+#include "gstdint.h"
 
 /* barrier.c */
 
@@ -56,6 +57,12 @@  extern bool GOMP_loop_nonmonotonic_dynam
 						  long *, long *);
 extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long,
 						 long *, long *);
+extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long,
+						  long *, long *);
+extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long,
+							long *, long *);
+extern bool GOMP_loop_start (long, long, long, long, long, long *, long *,
+			     uintptr_t *, void **);
 
 extern bool GOMP_loop_ordered_static_start (long, long, long, long,
 					    long *, long *);
@@ -64,6 +71,8 @@  extern bool GOMP_loop_ordered_dynamic_st
 extern bool GOMP_loop_ordered_guided_start (long, long, long, long,
 					    long *, long *);
 extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
+extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *,
+				     long *, uintptr_t *, void **);
 
 extern bool GOMP_loop_static_next (long *, long *);
 extern bool GOMP_loop_dynamic_next (long *, long *);
@@ -71,6 +80,8 @@  extern bool GOMP_loop_guided_next (long
 extern bool GOMP_loop_runtime_next (long *, long *);
 extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *);
 extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *);
+extern bool GOMP_loop_nonmonotonic_runtime_next (long *, long *);
+extern bool GOMP_loop_maybe_nonmonotonic_runtime_next (long *, long *);
 
 extern bool GOMP_loop_ordered_static_next (long *, long *);
 extern bool GOMP_loop_ordered_dynamic_next (long *, long *);
@@ -85,6 +96,8 @@  extern bool GOMP_loop_doacross_guided_st
 					     long *);
 extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
 					      long *);
+extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *,
+				      long *, uintptr_t *, void **);
 
 extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
 					     unsigned, long, long, long, long);
@@ -112,6 +125,13 @@  extern void GOMP_parallel_loop_nonmonoto
 extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *,
 						    unsigned, long, long,
 						    long, long, unsigned);
+extern void GOMP_parallel_loop_nonmonotonic_runtime (void (*)(void *), void *,
+						     unsigned, long, long,
+						     long, unsigned);
+extern void GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*)(void *),
+							   void *, unsigned,
+							   long, long,
+							   long, unsigned);
 
 extern void GOMP_loop_end (void);
 extern void GOMP_loop_end_nowait (void);
@@ -154,6 +174,21 @@  extern bool GOMP_loop_ull_nonmonotonic_g
 						     unsigned long long,
 						     unsigned long long *,
 						     unsigned long long *);
+extern bool GOMP_loop_ull_nonmonotonic_runtime_start (bool, unsigned long long,
+						      unsigned long long,
+						      unsigned long long,
+						      unsigned long long *,
+						      unsigned long long *);
+extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool,
+							    unsigned long long,
+							    unsigned long long,
+							    unsigned long long,
+							    unsigned long long *,
+							    unsigned long long *);
+extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long,
+				 unsigned long long, long, unsigned long long,
+				 unsigned long long *, unsigned long long *,
+				 uintptr_t *, void **);
 
 extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
 						unsigned long long,
@@ -178,6 +213,13 @@  extern bool GOMP_loop_ull_ordered_runtim
 						 unsigned long long,
 						 unsigned long long *,
 						 unsigned long long *);
+extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long,
+					 unsigned long long,
+					 unsigned long long, long,
+					 unsigned long long,
+					 unsigned long long *,
+					 unsigned long long *,
+					 uintptr_t *, void **);
 
 extern bool GOMP_loop_ull_static_next (unsigned long long *,
 				       unsigned long long *);
@@ -191,6 +233,10 @@  extern bool GOMP_loop_ull_nonmonotonic_d
 						     unsigned long long *);
 extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *,
 						    unsigned long long *);
+extern bool GOMP_loop_ull_nonmonotonic_runtime_next (unsigned long long *,
+						     unsigned long long *);
+extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_next (unsigned long long *,
+							   unsigned long long *);
 
 extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *,
 					       unsigned long long *);
@@ -220,6 +266,11 @@  extern bool GOMP_loop_ull_doacross_runti
 						  unsigned long long *,
 						  unsigned long long *,
 						  unsigned long long *);
+extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *,
+					  long, unsigned long long,
+					  unsigned long long *,
+					  unsigned long long *,
+					  uintptr_t *, void **);
 
 /* ordered.c */
 
@@ -235,6 +286,8 @@  extern void GOMP_doacross_ull_wait (unsi
 extern void GOMP_parallel_start (void (*) (void *), void *, unsigned);
 extern void GOMP_parallel_end (void);
 extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned);
+extern unsigned GOMP_parallel_reductions (void (*) (void *), void *, unsigned,
+					  unsigned);
 extern bool GOMP_cancel (int, bool);
 extern bool GOMP_cancellation_point (int);
 
@@ -251,13 +304,19 @@  extern void GOMP_taskloop_ull (void (*)
 			       unsigned long long, unsigned long long,
 			       unsigned long long);
 extern void GOMP_taskwait (void);
+extern void GOMP_taskwait_depend (void **);
 extern void GOMP_taskyield (void);
 extern void GOMP_taskgroup_start (void);
 extern void GOMP_taskgroup_end (void);
+extern void GOMP_taskgroup_reduction_register (uintptr_t *);
+extern void GOMP_taskgroup_reduction_unregister (uintptr_t *);
+extern void GOMP_task_reduction_remap (size_t, size_t, void **);
+extern void GOMP_workshare_task_reduction_unregister (bool);
 
 /* sections.c */
 
 extern unsigned GOMP_sections_start (unsigned);
+extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **);
 extern unsigned GOMP_sections_next (void);
 extern void GOMP_parallel_sections_start (void (*) (void *), void *,
 					  unsigned, unsigned);
@@ -293,6 +352,11 @@  extern void GOMP_target_enter_exit_data
 					 void **);
 extern void GOMP_teams (unsigned int, unsigned int);
 
+/* teams.c */
+
+extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
+			    unsigned);
+
 /* oacc-parallel.c */
 
 extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
--- libgomp/libgomp.h	(.../trunk)	(revision 265884)
+++ libgomp/libgomp.h	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -86,9 +86,21 @@  enum memmodel
 
 /* alloc.c */
 
+#if defined(HAVE_ALIGNED_ALLOC) \
+    || defined(HAVE__ALIGNED_MALLOC) \
+    || defined(HAVE_POSIX_MEMALIGN) \
+    || defined(HAVE_MEMALIGN)
+/* Defined if gomp_aligned_alloc doesn't use fallback version
+   and free can be used instead of gomp_aligned_free.  */
+#define GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC 1
+#endif
+
 extern void *gomp_malloc (size_t) __attribute__((malloc));
 extern void *gomp_malloc_cleared (size_t) __attribute__((malloc));
 extern void *gomp_realloc (void *, size_t);
+extern void *gomp_aligned_alloc (size_t, size_t)
+  __attribute__((malloc, alloc_size (2)));
+extern void gomp_aligned_free (void *);
 
 /* Avoid conflicting prototypes of alloca() in system headers by using
    GCC's builtin alloca().  */
@@ -138,7 +150,8 @@  enum gomp_schedule_type
   GFS_STATIC,
   GFS_DYNAMIC,
   GFS_GUIDED,
-  GFS_AUTO
+  GFS_AUTO,
+  GFS_MONOTONIC = 0x80000000U
 };
 
 struct gomp_doacross_work_share
@@ -175,6 +188,8 @@  struct gomp_doacross_work_share
     /* Likewise, but for the ull implementation.  */
     unsigned long long boundary_ull;
   };
+  /* Pointer to extra memory if needed for lastprivate(conditional).  */
+  void *extra;
   /* Array of shift counts for each dimension if they can be flattened.  */
   unsigned int shift_counts[];
 };
@@ -276,6 +291,9 @@  struct gomp_work_share
     struct gomp_work_share *next_free;
   };
 
+  /* Task reductions for this work-sharing construct.  */
+  uintptr_t *task_reductions;
+
   /* If only few threads are in the team, ordered_team_ids can point
      to this array which fills the padding at the end of this struct.  */
   unsigned inline_ordered_team_ids[0];
@@ -366,6 +384,9 @@  extern void **gomp_places_list;
 extern unsigned long gomp_places_list_len;
 extern unsigned int gomp_num_teams_var;
 extern int gomp_debug_var;
+extern bool gomp_display_affinity_var;
+extern char *gomp_affinity_format_var;
+extern size_t gomp_affinity_format_len;
 extern int goacc_device_num;
 extern char *goacc_device_type;
 extern int goacc_default_dims[GOMP_DIM_MAX];
@@ -471,8 +492,10 @@  struct gomp_taskgroup
   struct gomp_taskgroup *prev;
   /* Queue of tasks that belong in this taskgroup.  */
   struct priority_queue taskgroup_queue;
+  uintptr_t *reductions;
   bool in_taskgroup_wait;
   bool cancelled;
+  bool workshare;
   gomp_sem_t taskgroup_sem;
   size_t num_children;
 };
@@ -615,6 +638,19 @@  struct gomp_thread
 
   /* User pthread thread pool */
   struct gomp_thread_pool *thread_pool;
+
+#if defined(LIBGOMP_USE_PTHREADS) \
+    && (!defined(HAVE_TLS) \
+	|| !defined(__GLIBC__) \
+	|| !defined(USING_INITIAL_EXEC_TLS))
+  /* pthread_t of the thread containing this gomp_thread.
+     On Linux when using initial-exec TLS,
+     (typeof (pthread_t)) gomp_thread () - pthread_self ()
+     is constant in all threads, so we can optimize and not
+     store it.  */
+#define GOMP_NEEDS_THREAD_HANDLE 1
+  pthread_t handle;
+#endif
 };
 
 
@@ -711,6 +747,24 @@  extern bool gomp_affinity_finalize_place
 extern bool gomp_affinity_init_level (int, unsigned long, bool);
 extern void gomp_affinity_print_place (void *);
 extern void gomp_get_place_proc_ids_8 (int, int64_t *);
+extern void gomp_display_affinity_place (char *, size_t, size_t *, int);
+
+/* affinity-fmt.c */
+
+extern void gomp_set_affinity_format (const char *, size_t);
+extern void gomp_display_string (char *, size_t, size_t *, const char *,
+				 size_t);
+#ifdef LIBGOMP_USE_PTHREADS
+typedef pthread_t gomp_thread_handle;
+#else
+typedef struct {} gomp_thread_handle;
+#endif
+extern size_t gomp_display_affinity (char *, size_t, const char *,
+				     gomp_thread_handle,
+				     struct gomp_team_state *, unsigned int);
+extern void gomp_display_affinity_thread (gomp_thread_handle,
+					  struct gomp_team_state *,
+					  unsigned int) __attribute__((cold));
 
 /* iter.c */
 
@@ -747,9 +801,9 @@  extern void gomp_ordered_next (void);
 extern void gomp_ordered_static_init (void);
 extern void gomp_ordered_static_next (void);
 extern void gomp_ordered_sync (void);
-extern void gomp_doacross_init (unsigned, long *, long);
+extern void gomp_doacross_init (unsigned, long *, long, size_t);
 extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
-				    unsigned long long);
+				    unsigned long long, size_t);
 
 /* parallel.c */
 
@@ -772,6 +826,10 @@  extern bool gomp_create_target_task (str
 				     size_t *, unsigned short *, unsigned int,
 				     void **, void **,
 				     enum gomp_target_task_state);
+extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
+								unsigned);
+extern void gomp_workshare_taskgroup_start (void);
+extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *);
 
 static void inline
 gomp_finish_task (struct gomp_task *task)
@@ -784,9 +842,11 @@  gomp_finish_task (struct gomp_task *task
 
 extern struct gomp_team *gomp_new_team (unsigned);
 extern void gomp_team_start (void (*) (void *), void *, unsigned,
-			     unsigned, struct gomp_team *);
+			     unsigned, struct gomp_team *,
+			     struct gomp_taskgroup *);
 extern void gomp_team_end (void);
 extern void gomp_free_thread (void *);
+extern int gomp_pause_host (void);
 
 /* target.c */
 
@@ -1009,9 +1069,9 @@  extern bool gomp_remove_var (struct gomp
 
 /* work.c */
 
-extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
+extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned);
 extern void gomp_fini_work_share (struct gomp_work_share *);
-extern bool gomp_work_share_start (bool);
+extern bool gomp_work_share_start (size_t);
 extern void gomp_work_share_end (void);
 extern bool gomp_work_share_end_cancel (void);
 extern void gomp_work_share_end_nowait (void);
@@ -1138,4 +1198,42 @@  task_to_priority_node (enum priority_que
   return (struct priority_node *) ((char *) task
 				   + priority_queue_offset (type));
 }
+
+#ifdef LIBGOMP_USE_PTHREADS
+static inline gomp_thread_handle
+gomp_thread_self (void)
+{
+  return pthread_self ();
+}
+
+static inline gomp_thread_handle
+gomp_thread_to_pthread_t (struct gomp_thread *thr)
+{
+  struct gomp_thread *this_thr = gomp_thread ();
+  if (thr == this_thr)
+    return pthread_self ();
+#ifdef GOMP_NEEDS_THREAD_HANDLE
+  return thr->handle;
+#else
+  /* On Linux with initial-exec TLS, the pthread_t of the thread containing
+     thr can be computed from thr, this_thr and pthread_self (),
+     as the distance between this_thr and pthread_self () is constant.  */
+  return pthread_self () + ((uintptr_t) thr - (uintptr_t) this_thr);
+#endif
+}
+#else
+static inline gomp_thread_handle
+gomp_thread_self (void)
+{
+  return (gomp_thread_handle) {};
+}
+
+static inline gomp_thread_handle
+gomp_thread_to_pthread_t (struct gomp_thread *thr)
+{
+  (void) thr;
+  return gomp_thread_self ();
+}
+#endif
+
 #endif /* LIBGOMP_H */
--- libgomp/libgomp.map	(.../trunk)	(revision 265884)
+++ libgomp/libgomp.map	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -164,6 +164,22 @@  OMP_4.5 {
 	omp_target_disassociate_ptr;
 } OMP_4.0;
 
+OMP_5.0 {
+  global:
+	omp_capture_affinity;
+	omp_capture_affinity_;
+	omp_display_affinity;
+	omp_display_affinity_;
+	omp_get_affinity_format;
+	omp_get_affinity_format_;
+	omp_set_affinity_format;
+	omp_set_affinity_format_;
+	omp_pause_resource;
+	omp_pause_resource_;
+	omp_pause_resource_all;
+	omp_pause_resource_all_;
+} OMP_4.5;
+
 GOMP_1.0 {
   global:
 	GOMP_atomic_end;
@@ -298,6 +314,34 @@  GOMP_4.5 {
 	GOMP_parallel_loop_nonmonotonic_guided;
 } GOMP_4.0.1;
 
+GOMP_5.0 {
+  global:
+	GOMP_loop_doacross_start;
+	GOMP_loop_maybe_nonmonotonic_runtime_next;
+	GOMP_loop_maybe_nonmonotonic_runtime_start;
+	GOMP_loop_nonmonotonic_runtime_next;
+	GOMP_loop_nonmonotonic_runtime_start;
+	GOMP_loop_ordered_start;
+	GOMP_loop_start;
+	GOMP_loop_ull_doacross_start;
+	GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
+	GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
+	GOMP_loop_ull_nonmonotonic_runtime_next;
+	GOMP_loop_ull_nonmonotonic_runtime_start;
+	GOMP_loop_ull_ordered_start;
+	GOMP_loop_ull_start;
+	GOMP_parallel_loop_maybe_nonmonotonic_runtime;
+	GOMP_parallel_loop_nonmonotonic_runtime;
+	GOMP_parallel_reductions;
+	GOMP_sections2_start;
+	GOMP_taskgroup_reduction_register;
+	GOMP_taskgroup_reduction_unregister;
+	GOMP_task_reduction_remap;
+	GOMP_taskwait_depend;
+	GOMP_teams_reg;
+	GOMP_workshare_task_reduction_unregister;
+} GOMP_4.5;
+
 OACC_2.0 {
   global:
 	acc_get_num_devices;
--- libgomp/loop.c	(.../trunk)	(revision 265884)
+++ libgomp/loop.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -27,9 +27,13 @@ 
 
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 #include "libgomp.h"
 
 
+ialias (GOMP_loop_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
 /* Initialize the given work share construct from the given arguments.  */
 
 static inline void
@@ -79,12 +83,12 @@  gomp_loop_init (struct gomp_work_share *
 }
 
 /* The *_start routines are called when first encountering a loop construct
-   that is not bound directly to a parallel construct.  The first thread 
+   that is not bound directly to a parallel construct.  The first thread
    that arrives will create the work-share construct; subsequent threads
    will see the construct exists and allocate work from it.
 
    START, END, INCR are the bounds of the loop; due to the restrictions of
-   OpenMP, these values must be the same in every thread.  This is not 
+   OpenMP, these values must be the same in every thread.  This is not
    verified (nor is it entirely verifiable, since START is not necessarily
    retained intact in the work-share data structure).  CHUNK_SIZE is the
    scheduling parameter; again this must be identical in all threads.
@@ -101,7 +105,7 @@  gomp_loop_static_start (long start, long
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_STATIC, chunk_size);
@@ -123,7 +127,7 @@  gomp_loop_dynamic_start (long start, lon
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_DYNAMIC, chunk_size);
@@ -151,7 +155,7 @@  gomp_loop_guided_start (long start, long
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_GUIDED, chunk_size);
@@ -174,7 +178,7 @@  GOMP_loop_runtime_start (long start, lon
 			 long *istart, long *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_static_start (start, end, incr,
@@ -197,6 +201,100 @@  GOMP_loop_runtime_start (long start, lon
     }
 }
 
+static long
+gomp_adjust_sched (long sched, long *chunk_size)
+{
+  sched &= ~GFS_MONOTONIC;
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_DYNAMIC:
+    case GFS_GUIDED:
+      return sched;
+    /* GFS_RUNTIME is used for runtime schedule without monotonic
+       or nonmonotonic modifiers on the clause.
+       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+       modifier.  */
+    case GFS_RUNTIME:
+    /* GFS_AUTO is used for runtime schedule with nonmonotonic
+       modifier.  */
+    case GFS_AUTO:
+      {
+	struct gomp_task_icv *icv = gomp_icv (false);
+	sched = icv->run_sched_var & ~GFS_MONOTONIC;
+	switch (sched)
+	  {
+	  case GFS_STATIC:
+	  case GFS_DYNAMIC:
+	  case GFS_GUIDED:
+	    *chunk_size = icv->run_sched_chunk_size;
+	    break;
+	  case GFS_AUTO:
+	    sched = GFS_STATIC;
+	    *chunk_size = 0;
+	    break;
+	  default:
+	    abort ();
+	  }
+	return sched;
+      }
+    default:
+      abort ();
+    }
+}
+
+bool
+GOMP_loop_start (long start, long end, long incr, long sched,
+		 long chunk_size, long *istart, long *iend,
+		 uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, start, end, incr,
+		      sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (mem)
+	{
+	  uintptr_t size = (uintptr_t) *mem;
+	  if (size > (sizeof (struct gomp_work_share)
+		      - offsetof (struct gomp_work_share,
+				  inline_ordered_team_ids)))
+	    thr->ts.work_share->ordered_team_ids
+	      = gomp_malloc_cleared (size);
+	  else
+	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      if (mem)
+	*mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+  if (!istart)
+    return true;
+  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
 /* The *_ordered_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED section.  */
 
@@ -207,7 +305,7 @@  gomp_loop_ordered_static_start (long sta
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_STATIC, chunk_size);
@@ -225,7 +323,7 @@  gomp_loop_ordered_dynamic_start (long st
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_DYNAMIC, chunk_size);
@@ -250,7 +348,7 @@  gomp_loop_ordered_guided_start (long sta
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_init (thr->ts.work_share, start, end, incr,
 		      GFS_GUIDED, chunk_size);
@@ -273,7 +371,7 @@  GOMP_loop_ordered_runtime_start (long st
 				 long *istart, long *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ordered_static_start (start, end, incr,
@@ -297,6 +395,81 @@  GOMP_loop_ordered_runtime_start (long st
     }
 }
 
+bool
+GOMP_loop_ordered_start (long start, long end, long incr, long sched,
+			 long chunk_size, long *istart, long *iend,
+			 uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ordered = 1;
+  bool ret;
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (mem)
+    ordered += (uintptr_t) *mem;
+  if (gomp_work_share_start (ordered))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, start, end, incr,
+		      sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (sched == GFS_STATIC)
+	gomp_ordered_static_init ();
+      else
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+      if (sched != GFS_STATIC)
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+    }
+
+  if (mem)
+    {
+      uintptr_t p
+	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
+		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
+      p += __alignof__ (long long) - 1;
+      p &= ~(__alignof__ (long long) - 1);
+      *mem = (void *) p;
+    }
+
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_AUTO:
+      return !gomp_iter_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      ret = gomp_iter_dynamic_next_locked (istart, iend);
+      break;
+    case GFS_GUIDED:
+      ret = gomp_iter_guided_next_locked (istart, iend);
+      break;
+    default:
+      abort ();
+    }
+
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+  return ret;
+}
+
 /* The *_doacross_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
@@ -310,11 +483,11 @@  gomp_loop_doacross_static_start (unsigne
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 		      GFS_STATIC, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -328,11 +501,11 @@  gomp_loop_doacross_dynamic_start (unsign
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 		      GFS_DYNAMIC, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -354,11 +527,11 @@  gomp_loop_doacross_guided_start (unsigne
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 		      GFS_GUIDED, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -378,7 +551,7 @@  GOMP_loop_doacross_runtime_start (unsign
 				  long *istart, long *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_doacross_static_start (ncounts, counts,
@@ -402,8 +575,52 @@  GOMP_loop_doacross_runtime_start (unsign
     }
 }
 
-/* The *_next routines are called when the thread completes processing of 
-   the iteration block currently assigned to it.  If the work-share 
+bool
+GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
+			  long chunk_size, long *istart, long *iend,
+			  uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      size_t extra = 0;
+      if (mem)
+	extra = (uintptr_t) *mem;
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+		      sched, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, extra);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+    }
+
+  if (mem)
+    *mem = thr->ts.work_share->doacross->extra;
+
+  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
+/* The *_next routines are called when the thread completes processing of
+   the iteration block currently assigned to it.  If the work-share
    construct is bound directly to a parallel construct, then the iteration
    bounds may have been set up before the parallel.  In which case, this
    may be the first iteration for the thread.
@@ -456,7 +673,7 @@  bool
 GOMP_loop_runtime_next (long *istart, long *iend)
 {
   struct gomp_thread *thr = gomp_thread ();
-  
+
   switch (thr->ts.work_share->sched)
     {
     case GFS_STATIC:
@@ -534,7 +751,7 @@  bool
 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
 {
   struct gomp_thread *thr = gomp_thread ();
-  
+
   switch (thr->ts.work_share->sched)
     {
     case GFS_STATIC:
@@ -563,7 +780,7 @@  gomp_parallel_loop_start (void (*fn) (vo
   num_threads = gomp_resolve_num_threads (num_threads, 0);
   team = gomp_new_team (num_threads);
   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
-  gomp_team_start (fn, data, num_threads, flags, team);
+  gomp_team_start (fn, data, num_threads, flags, team, NULL);
 }
 
 void
@@ -600,7 +817,8 @@  GOMP_parallel_loop_runtime_start (void (
 {
   struct gomp_task_icv *icv = gomp_icv (false);
   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
-			    icv->run_sched_var, icv->run_sched_chunk_size, 0);
+			    icv->run_sched_var & ~GFS_MONOTONIC,
+			    icv->run_sched_chunk_size, 0);
 }
 
 ialias_redirect (GOMP_parallel_end)
@@ -638,11 +856,28 @@  GOMP_parallel_loop_guided (void (*fn) (v
   GOMP_parallel_end ();
 }
 
+void
+GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
+			    unsigned num_threads, long start, long end,
+			    long incr, unsigned flags)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+			    icv->run_sched_var & ~GFS_MONOTONIC,
+			    icv->run_sched_chunk_size, flags);
+  fn (data);
+  GOMP_parallel_end ();
+}
+
 #ifdef HAVE_ATTRIBUTE_ALIAS
 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
 	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
 	__attribute__((alias ("GOMP_parallel_loop_guided")));
+extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
+	__attribute__((alias ("GOMP_parallel_loop_runtime")));
+extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
+	__attribute__((alias ("GOMP_parallel_loop_runtime")));
 #else
 void
 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
@@ -667,21 +902,35 @@  GOMP_parallel_loop_nonmonotonic_guided (
   fn (data);
   GOMP_parallel_end ();
 }
-#endif
 
 void
-GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
-			    unsigned num_threads, long start, long end,
-			    long incr, unsigned flags)
+GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
+					 unsigned num_threads, long start,
+					 long end, long incr, unsigned flags)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
-			    icv->run_sched_var, icv->run_sched_chunk_size,
-			    flags);
+			    icv->run_sched_var & ~GFS_MONOTONIC,
+			    icv->run_sched_chunk_size, flags);
   fn (data);
   GOMP_parallel_end ();
 }
 
+void
+GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
+					       unsigned num_threads, long start,
+					       long end, long incr,
+					       unsigned flags)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+			    icv->run_sched_var & ~GFS_MONOTONIC,
+			    icv->run_sched_chunk_size, flags);
+  fn (data);
+  GOMP_parallel_end ();
+}
+#endif
+
 /* The GOMP_loop_end* routines are called after the thread is told that
    all loop iterations are complete.  The first two versions synchronize
    all threads; the nowait version does not.  */
@@ -721,6 +970,10 @@  extern __typeof(gomp_loop_dynamic_start)
 	__attribute__((alias ("gomp_loop_dynamic_start")));
 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
 	__attribute__((alias ("gomp_loop_guided_start")));
+extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_runtime_start")));
+extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_runtime_start")));
 
 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
 	__attribute__((alias ("gomp_loop_ordered_static_start")));
@@ -746,6 +999,10 @@  extern __typeof(gomp_loop_dynamic_next)
 	__attribute__((alias ("gomp_loop_dynamic_next")));
 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
 	__attribute__((alias ("gomp_loop_guided_next")));
+extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_runtime_next")));
+extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_runtime_next")));
 
 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
 	__attribute__((alias ("gomp_loop_ordered_static_next")));
@@ -791,6 +1048,20 @@  GOMP_loop_nonmonotonic_guided_start (lon
 }
 
 bool
+GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
+				      long *istart, long *iend)
+{
+  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
+}
+
+bool
+GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
+					    long *istart, long *iend)
+{
+  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
+}
+
+bool
 GOMP_loop_ordered_static_start (long start, long end, long incr,
 				long chunk_size, long *istart, long *iend)
 {
@@ -869,6 +1140,18 @@  GOMP_loop_nonmonotonic_guided_next (long
 }
 
 bool
+GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
+{
+  return GOMP_loop_runtime_next (istart, iend);
+}
+
+bool
+GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
+{
+  return GOMP_loop_runtime_next (istart, iend);
+}
+
+bool
 GOMP_loop_ordered_static_next (long *istart, long *iend)
 {
   return gomp_loop_ordered_static_next (istart, iend);
--- libgomp/loop_ull.c	(.../trunk)	(revision 265884)
+++ libgomp/loop_ull.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -27,8 +27,12 @@ 
 
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 #include "libgomp.h"
 
+ialias (GOMP_loop_ull_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
 typedef unsigned long long gomp_ull;
 
 /* Initialize the given work share construct from the given arguments.  */
@@ -104,7 +108,7 @@  gomp_loop_ull_static_start (bool up, gom
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_STATIC, chunk_size);
@@ -122,7 +126,7 @@  gomp_loop_ull_dynamic_start (bool up, go
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_DYNAMIC, chunk_size);
@@ -148,7 +152,7 @@  gomp_loop_ull_guided_start (bool up, gom
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_GUIDED, chunk_size);
@@ -171,7 +175,7 @@  GOMP_loop_ull_runtime_start (bool up, go
 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_static_start (up, start, end, incr,
@@ -195,6 +199,99 @@  GOMP_loop_ull_runtime_start (bool up, go
     }
 }
 
+static long
+gomp_adjust_sched (long sched, gomp_ull *chunk_size)
+{
+  sched &= ~GFS_MONOTONIC;
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_DYNAMIC:
+    case GFS_GUIDED:
+      return sched;
+    /* GFS_RUNTIME is used for runtime schedule without monotonic
+       or nonmonotonic modifiers on the clause.
+       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+       modifier.  */
+    case GFS_RUNTIME:
+    /* GFS_AUTO is used for runtime schedule with nonmonotonic
+       modifier.  */
+    case GFS_AUTO:
+      {
+	struct gomp_task_icv *icv = gomp_icv (false);
+	sched = icv->run_sched_var & ~GFS_MONOTONIC;
+	switch (sched)
+	  {
+	  case GFS_STATIC:
+	  case GFS_DYNAMIC:
+	  case GFS_GUIDED:
+	    *chunk_size = icv->run_sched_chunk_size;
+	    break;
+	  case GFS_AUTO:
+	    sched = GFS_STATIC;
+	    *chunk_size = 0;
+	    break;
+	  default:
+	    abort ();
+	  }
+	return sched;
+      }
+    default:
+      abort ();
+    }
+}
+
+bool
+GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
+		     gomp_ull incr, long sched, gomp_ull chunk_size,
+		     gomp_ull *istart, gomp_ull *iend,
+		     uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+      			  sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (mem)
+	{
+	  uintptr_t size = (uintptr_t) *mem;
+	  if (size > (sizeof (struct gomp_work_share)
+		      - offsetof (struct gomp_work_share,
+				  inline_ordered_team_ids)))
+	    thr->ts.work_share->ordered_team_ids
+	      = gomp_malloc_cleared (size);
+	  else
+	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      if (mem)
+	*mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
 /* The *_ordered_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED section.  */
 
@@ -206,7 +303,7 @@  gomp_loop_ull_ordered_static_start (bool
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_STATIC, chunk_size);
@@ -225,7 +322,7 @@  gomp_loop_ull_ordered_dynamic_start (boo
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_DYNAMIC, chunk_size);
@@ -251,7 +348,7 @@  gomp_loop_ull_ordered_guided_start (bool
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_GUIDED, chunk_size);
@@ -275,7 +372,7 @@  GOMP_loop_ull_ordered_runtime_start (boo
 				     gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
@@ -299,6 +396,82 @@  GOMP_loop_ull_ordered_runtime_start (boo
     }
 }
 
+bool
+GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
+			     gomp_ull incr, long sched, gomp_ull chunk_size,
+			     gomp_ull *istart, gomp_ull *iend,
+			     uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ordered = 1;
+  bool ret;
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (mem)
+    ordered += (uintptr_t) *mem;
+  if (gomp_work_share_start (ordered))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (sched == GFS_STATIC)
+	gomp_ordered_static_init ();
+      else
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+      if (sched != GFS_STATIC)
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+    }
+
+  if (mem)
+    {
+      uintptr_t p
+	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
+		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
+      p += __alignof__ (long long) - 1;
+      p &= ~(__alignof__ (long long) - 1);
+      *mem = (void *) p;
+    }
+
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_AUTO:
+      return !gomp_iter_ull_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+      break;
+    case GFS_GUIDED:
+      ret = gomp_iter_ull_guided_next_locked (istart, iend);
+      break;
+    default:
+      abort ();
+    }
+
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+  return ret;
+}
+
 /* The *_doacross_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
@@ -313,11 +486,11 @@  gomp_loop_ull_doacross_static_start (uns
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_STATIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -332,11 +505,11 @@  gomp_loop_ull_doacross_dynamic_start (un
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_DYNAMIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -359,11 +532,11 @@  gomp_loop_ull_doacross_guided_start (uns
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_GUIDED, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -383,7 +556,7 @@  GOMP_loop_ull_doacross_runtime_start (un
 				      gomp_ull *istart, gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_doacross_static_start (ncounts, counts,
@@ -407,6 +580,51 @@  GOMP_loop_ull_doacross_runtime_start (un
     }
 }
 
+bool
+GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
+			      long sched, gomp_ull chunk_size,
+			      gomp_ull *istart, gomp_ull *iend,
+			      uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      size_t extra = 0;
+      if (mem)
+	extra = (uintptr_t) *mem;
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+			  sched, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+    }
+
+  if (mem)
+    *mem = thr->ts.work_share->doacross->extra;
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
 /* The *_next routines are called when the thread completes processing of
    the iteration block currently assigned to it.  If the work-share
    construct is bound directly to a parallel construct, then the iteration
@@ -570,6 +788,10 @@  extern __typeof(gomp_loop_ull_dynamic_st
 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
 	__attribute__((alias ("gomp_loop_ull_guided_start")));
+extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
+extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
 
 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
@@ -595,6 +817,10 @@  extern __typeof(gomp_loop_ull_dynamic_ne
 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
 	__attribute__((alias ("gomp_loop_ull_guided_next")));
+extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
+extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
 
 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
@@ -650,6 +876,23 @@  GOMP_loop_ull_nonmonotonic_guided_start
 }
 
 bool
+GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
+					  gomp_ull end, gomp_ull incr,
+					  gomp_ull *istart, gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
+}
+
+bool
+GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
+						gomp_ull end, gomp_ull incr,
+						gomp_ull *istart,
+						gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
+}
+
+bool
 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
 				    gomp_ull incr, gomp_ull chunk_size,
 				    gomp_ull *istart, gomp_ull *iend)
@@ -734,6 +977,19 @@  GOMP_loop_ull_nonmonotonic_guided_next (
 }
 
 bool
+GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
+					       gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_next (istart, iend);
+}
+
+bool
 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
 {
   return gomp_loop_ull_ordered_static_next (istart, iend);
--- libgomp/Makefile.am	(.../trunk)	(revision 265884)
+++ libgomp/Makefile.am	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -64,7 +64,8 @@  libgomp_la_SOURCES = alloc.c atomic.c ba
 	parallel.c sections.c single.c task.c team.c work.c lock.c mutex.c \
 	proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
 	splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
-	oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c
+	oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
+	affinity-fmt.c teams.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
--- libgomp/Makefile.in	(.../trunk)	(revision 265884)
+++ libgomp/Makefile.in	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -216,7 +216,8 @@  am_libgomp_la_OBJECTS = alloc.lo atomic.
 	sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo \
 	target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
 	oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
-	oacc-plugin.lo oacc-cuda.lo priority_queue.lo $(am__objects_1)
+	oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
+	teams.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -549,7 +550,7 @@  libgomp_la_SOURCES = alloc.c atomic.c ba
 	affinity.c target.c splay-tree.c libgomp-plugin.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
-	$(am__append_3)
+	affinity-fmt.c teams.c $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -724,6 +725,7 @@  distclean-compile:
 	-rm -f *.tab.c
 
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@
@@ -762,6 +764,7 @@  distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@
 
--- libgomp/omp.h.in	(.../trunk)	(revision 265884)
+++ libgomp/omp.h.in	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -50,7 +50,8 @@  typedef enum omp_sched_t
   omp_sched_static = 1,
   omp_sched_dynamic = 2,
   omp_sched_guided = 3,
-  omp_sched_auto = 4
+  omp_sched_auto = 4,
+  omp_sched_monotonic = 0x80000000U
 } omp_sched_t;
 
 typedef enum omp_proc_bind_t
@@ -62,14 +63,32 @@  typedef enum omp_proc_bind_t
   omp_proc_bind_spread = 4
 } omp_proc_bind_t;
 
-typedef enum omp_lock_hint_t
+typedef enum omp_sync_hint_t
 {
-  omp_lock_hint_none = 0,
-  omp_lock_hint_uncontended = 1,
-  omp_lock_hint_contended = 2,
-  omp_lock_hint_nonspeculative = 4,
-  omp_lock_hint_speculative = 8,
-} omp_lock_hint_t;
+  omp_sync_hint_none = 0,
+  omp_lock_hint_none = omp_sync_hint_none,
+  omp_sync_hint_uncontended = 1,
+  omp_lock_hint_uncontended = omp_sync_hint_uncontended,
+  omp_sync_hint_contended = 2,
+  omp_lock_hint_contended = omp_sync_hint_contended,
+  omp_sync_hint_nonspeculative = 4,
+  omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,
+  omp_sync_hint_speculative = 8,
+  omp_lock_hint_speculative = omp_sync_hint_speculative
+} omp_sync_hint_t;
+
+typedef omp_sync_hint_t omp_lock_hint_t;
+
+typedef struct __attribute__((__aligned__ (sizeof (void *)))) omp_depend_t
+{
+  char __omp_depend_t__[2 * sizeof (void *)];
+} omp_depend_t;
+
+typedef enum omp_pause_resource_t
+{
+  omp_pause_soft = 1,
+  omp_pause_hard = 2
+} omp_pause_resource_t;
 
 #ifdef __cplusplus
 extern "C" {
@@ -93,7 +112,7 @@  extern void omp_set_nested (int) __GOMP_
 extern int omp_get_nested (void) __GOMP_NOTHROW;
 
 extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW;
-extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t)
+extern void omp_init_lock_with_hint (omp_lock_t *, omp_sync_hint_t)
   __GOMP_NOTHROW;
 extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW;
 extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW;
@@ -101,7 +120,7 @@  extern void omp_unset_lock (omp_lock_t *
 extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW;
 
 extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-extern void omp_init_nest_lock_with_hint (omp_nest_lock_t *, omp_lock_hint_t)
+extern void omp_init_nest_lock_with_hint (omp_nest_lock_t *, omp_sync_hint_t)
   __GOMP_NOTHROW;
 extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
 extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
@@ -144,19 +163,30 @@  extern int omp_get_max_task_priority (vo
 
 extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern void omp_target_free (void *, int) __GOMP_NOTHROW;
-extern int omp_target_is_present (void *, int) __GOMP_NOTHROW;
-extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__,
-			      __SIZE_TYPE__, int, int) __GOMP_NOTHROW;
-extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int,
+extern int omp_target_is_present (const void *, int) __GOMP_NOTHROW;
+extern int omp_target_memcpy (void *, const void *, __SIZE_TYPE__,
+			      __SIZE_TYPE__, __SIZE_TYPE__, int, int)
+  __GOMP_NOTHROW;
+extern int omp_target_memcpy_rect (void *, const void *, __SIZE_TYPE__, int,
 				   const __SIZE_TYPE__ *,
 				   const __SIZE_TYPE__ *,
 				   const __SIZE_TYPE__ *,
 				   const __SIZE_TYPE__ *,
 				   const __SIZE_TYPE__ *, int, int)
   __GOMP_NOTHROW;
-extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__,
+extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 				     __SIZE_TYPE__, int) __GOMP_NOTHROW;
-extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW;
+extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
+
+extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW;
+extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__)
+  __GOMP_NOTHROW;
+extern void omp_display_affinity (const char *) __GOMP_NOTHROW;
+extern __SIZE_TYPE__ omp_capture_affinity (char *, __SIZE_TYPE__, const char *)
+  __GOMP_NOTHROW;
+
+extern int omp_pause_resource (omp_pause_resource_t, int) __GOMP_NOTHROW;
+extern int omp_pause_resource_all (omp_pause_resource_t) __GOMP_NOTHROW;
 
 #ifdef __cplusplus
 }
--- libgomp/omp_lib.f90.in	(.../trunk)	(revision 265884)
+++ libgomp/omp_lib.f90.in	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -30,6 +30,7 @@ 
         integer, parameter :: omp_sched_kind = 4
         integer, parameter :: omp_proc_bind_kind = 4
         integer, parameter :: omp_lock_hint_kind = 4
+        integer, parameter :: omp_pause_resource_kind = 4
         integer (omp_sched_kind), parameter :: omp_sched_static = 1
         integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2
         integer (omp_sched_kind), parameter :: omp_sched_guided = 3
@@ -54,6 +55,10 @@ 
                  parameter :: omp_lock_hint_nonspeculative = 4
         integer (omp_lock_hint_kind), &
                  parameter :: omp_lock_hint_speculative = 8
+        integer (kind=omp_pause_resource_kind), &
+                 parameter :: omp_pause_soft = 1
+        integer (kind=omp_pause_resource_kind), &
+                 parameter :: omp_pause_hard = 2
       end module
 
       module omp_lib
@@ -433,4 +438,50 @@ 
           end function omp_get_max_task_priority
         end interface
 
+        interface
+          subroutine omp_set_affinity_format (format)
+            character(len=*), intent(in) :: format
+          end subroutine omp_set_affinity_format
+        end interface
+
+        interface
+          function omp_get_affinity_format (buffer)
+            integer (4) :: omp_get_affinity_format
+            character(len=*), intent(out) :: buffer
+          end function omp_get_affinity_format
+        end interface
+
+        interface
+          subroutine omp_display_affinity (format)
+            character(len=*), intent(in) :: format
+          end subroutine omp_display_affinity
+        end interface
+
+        interface
+          function omp_capture_affinity (buffer, format)
+            integer (4) :: omp_capture_affinity
+            character(len=*), intent(out) :: buffer
+            character(len=*), intent(in) :: format
+          end function omp_capture_affinity
+        end interface
+
+        interface
+          function omp_pause_resource (kind, device_num)
+            use omp_lib_kinds
+            integer (4) :: omp_pause_resource
+            integer (kind=omp_pause_resource_kind), &
+              intent(in) :: kind
+            integer (4) :: device_num
+          end function
+        end interface
+
+        interface
+          function omp_pause_resource_all (kind)
+            use omp_lib_kinds
+            integer (4) :: omp_pause_resource_all
+            integer (kind=omp_pause_resource_kind), &
+              intent(in) :: kind
+          end function
+        end interface
+
       end module omp_lib
--- libgomp/omp_lib.h.in	(.../trunk)	(revision 265884)
+++ libgomp/omp_lib.h.in	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -59,6 +59,12 @@ 
       parameter (omp_lock_hint_nonspeculative = 4)
       parameter (omp_lock_hint_speculative = 8)
       parameter (openmp_version = 201511)
+      integer omp_pause_resource_kind
+      parameter (omp_pause_resource_kind = 4)
+      integer (omp_pause_resource_kind) omp_pause_soft
+      integer (omp_pause_resource_kind) omp_pause_hard
+      parameter (omp_pause_soft = 1)
+      parameter (omp_pause_hard = 2)
 
       external omp_init_lock, omp_init_nest_lock
       external omp_init_lock_with_hint
@@ -126,3 +132,12 @@ 
 
       external omp_get_max_task_priority
       integer(4) omp_get_max_task_priority
+
+      external omp_set_affinity_format, omp_get_affinity_format
+      external omp_display_affinity, omp_capture_affinity
+      integer(4) omp_get_affinity_format
+      integer(4) omp_capture_affinity
+
+      external omp_pause_resource, omp_pause_resource_all
+      integer(4) omp_pause_resource
+      integer(4) omp_pause_resource_all
--- libgomp/ordered.c	(.../trunk)	(revision 265884)
+++ libgomp/ordered.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -259,7 +259,8 @@  GOMP_ordered_end (void)
 #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
 
 void
-gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
+gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size,
+		    size_t extra)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
@@ -269,13 +270,24 @@  gomp_doacross_init (unsigned ncounts, lo
   struct gomp_doacross_work_share *doacross;
 
   if (team == NULL || team->nthreads == 1)
-    return;
+    {
+    empty:
+      if (!extra)
+	ws->doacross = NULL;
+      else
+	{
+	  doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+	  doacross->extra = (void *) (doacross + 1);
+	  ws->doacross = doacross;
+	}
+      return;
+    }
 
   for (i = 0; i < ncounts; i++)
     {
       /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
       if (counts[i] == 0)
-	return;
+	goto empty;
 
       if (num_bits <= MAX_COLLAPSED_BITS)
 	{
@@ -314,7 +326,7 @@  gomp_doacross_init (unsigned ncounts, lo
   elt_sz = (elt_sz + 63) & ~63UL;
 
   doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
-			  + shift_sz);
+			  + shift_sz + extra);
   doacross->chunk_size = chunk_size;
   doacross->elt_sz = elt_sz;
   doacross->ncounts = ncounts;
@@ -322,6 +334,13 @@  gomp_doacross_init (unsigned ncounts, lo
   doacross->array = (unsigned char *)
 		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
 		     & ~(uintptr_t) 63);
+  if (extra)
+    {
+      doacross->extra = doacross->array + num_ents * elt_sz;
+      memset (doacross->extra, '\0', extra);
+    }
+  else
+    doacross->extra = NULL;
   if (num_bits <= MAX_COLLAPSED_BITS)
     {
       unsigned int shift_count = 0;
@@ -360,7 +379,8 @@  GOMP_doacross_post (long *counts)
   unsigned long ent;
   unsigned int i;
 
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
     {
       __sync_synchronize ();
       return;
@@ -411,7 +431,8 @@  GOMP_doacross_wait (long first, ...)
   unsigned long ent;
   unsigned int i;
 
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
     {
       __sync_synchronize ();
       return;
@@ -488,7 +509,8 @@  GOMP_doacross_wait (long first, ...)
 typedef unsigned long long gomp_ull;
 
 void
-gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
+gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts,
+			gomp_ull chunk_size, size_t extra)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
@@ -498,13 +520,24 @@  gomp_doacross_ull_init (unsigned ncounts
   struct gomp_doacross_work_share *doacross;
 
   if (team == NULL || team->nthreads == 1)
-    return;
+    {
+    empty:
+      if (!extra)
+	ws->doacross = NULL;
+      else
+	{
+	  doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+	  doacross->extra = (void *) (doacross + 1);
+	  ws->doacross = doacross;
+	}
+      return;
+    }
 
   for (i = 0; i < ncounts; i++)
     {
       /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
       if (counts[i] == 0)
-	return;
+	goto empty;
 
       if (num_bits <= MAX_COLLAPSED_BITS)
 	{
@@ -557,6 +590,13 @@  gomp_doacross_ull_init (unsigned ncounts
   doacross->array = (unsigned char *)
 		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
 		     & ~(uintptr_t) 63);
+  if (extra)
+    {
+      doacross->extra = doacross->array + num_ents * elt_sz;
+      memset (doacross->extra, '\0', extra);
+    }
+  else
+    doacross->extra = NULL;
   if (num_bits <= MAX_COLLAPSED_BITS)
     {
       unsigned int shift_count = 0;
@@ -595,7 +635,8 @@  GOMP_doacross_ull_post (gomp_ull *counts
   unsigned long ent;
   unsigned int i;
 
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
     {
       __sync_synchronize ();
       return;
@@ -667,7 +708,8 @@  GOMP_doacross_ull_wait (gomp_ull first,
   unsigned long ent;
   unsigned int i;
 
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
     {
       __sync_synchronize ();
       return;
--- libgomp/parallel.c	(.../trunk)	(revision 265884)
+++ libgomp/parallel.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -123,7 +123,8 @@  void
 GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
 {
   num_threads = gomp_resolve_num_threads (num_threads, 0);
-  gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads));
+  gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
+		   NULL);
 }
 
 void
@@ -161,14 +162,33 @@  GOMP_parallel_end (void)
 ialias (GOMP_parallel_end)
 
 void
-GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags)
+GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
+	       unsigned int flags)
 {
   num_threads = gomp_resolve_num_threads (num_threads, 0);
-  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads));
+  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
+		   NULL);
   fn (data);
   ialias_call (GOMP_parallel_end) ();
 }
 
+unsigned
+GOMP_parallel_reductions (void (*fn) (void *), void *data,
+			  unsigned num_threads, unsigned int flags)
+{
+  struct gomp_taskgroup *taskgroup;
+  num_threads = gomp_resolve_num_threads (num_threads, 0);
+  uintptr_t *rdata = *(uintptr_t **)data;
+  taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
+  gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
+		   taskgroup);
+  fn (data);
+  ialias_call (GOMP_parallel_end) ();
+  gomp_sem_destroy (&taskgroup->taskgroup_sem);
+  free (taskgroup);
+  return num_threads;
+}
+
 bool
 GOMP_cancellation_point (int which)
 {
@@ -185,8 +205,15 @@  GOMP_cancellation_point (int which)
     }
   else if (which & GOMP_CANCEL_TASKGROUP)
     {
-      if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
-	return true;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return true;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return true;
+	}
       /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
 	 as #pragma omp cancel parallel also cancels all explicit
 	 tasks.  */
@@ -218,11 +245,17 @@  GOMP_cancel (int which, bool do_cancel)
     }
   else if (which & GOMP_CANCEL_TASKGROUP)
     {
-      if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
+      if (thr->task->taskgroup)
 	{
-	  gomp_mutex_lock (&team->task_lock);
-	  thr->task->taskgroup->cancelled = true;
-	  gomp_mutex_unlock (&team->task_lock);
+	  struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
+	  if (taskgroup->workshare && taskgroup->prev)
+	    taskgroup = taskgroup->prev;
+	  if (!taskgroup->cancelled)
+	    {
+	      gomp_mutex_lock (&team->task_lock);
+	      taskgroup->cancelled = true;
+	      gomp_mutex_unlock (&team->task_lock);
+	    }
 	}
       return true;
     }
--- libgomp/sections.c	(.../trunk)	(revision 265884)
+++ libgomp/sections.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -26,8 +26,11 @@ 
 /* This file handles the SECTIONS construct.  */
 
 #include "libgomp.h"
+#include <string.h>
 
 
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
 /* Initialize the given work share construct from the given arguments.  */
 
 static inline void
@@ -72,7 +75,7 @@  GOMP_sections_start (unsigned count)
   struct gomp_thread *thr = gomp_thread ();
   long s, e, ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_sections_init (thr->ts.work_share, count);
       gomp_work_share_init_done ();
@@ -95,6 +98,66 @@  GOMP_sections_start (unsigned count)
   return ret;
 }
 
+unsigned
+GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  long s, e, ret;
+
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      gomp_sections_init (thr->ts.work_share, count);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (mem)
+	{
+	  uintptr_t size = (uintptr_t) *mem;
+	  if (size > (sizeof (struct gomp_work_share)
+		      - offsetof (struct gomp_work_share,
+				  inline_ordered_team_ids)))
+	    thr->ts.work_share->ordered_team_ids
+	      = gomp_malloc_cleared (size);
+	  else
+	    memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+	  *mem = (void *) thr->ts.work_share->ordered_team_ids;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      if (mem)
+	*mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+#ifdef HAVE_SYNC_BUILTINS
+  if (gomp_iter_dynamic_next (&s, &e))
+    ret = s;
+  else
+    ret = 0;
+#else
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  if (gomp_iter_dynamic_next_locked (&s, &e))
+    ret = s;
+  else
+    ret = 0;
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
 /* This routine is called when the thread completes processing of the
    section currently assigned to it.  If the work-share construct is
    bound directly to a parallel construct, then the construct may have
@@ -140,7 +203,7 @@  GOMP_parallel_sections_start (void (*fn)
   num_threads = gomp_resolve_num_threads (num_threads, count);
   team = gomp_new_team (num_threads);
   gomp_sections_init (&team->work_shares[0], count);
-  gomp_team_start (fn, data, num_threads, 0, team);
+  gomp_team_start (fn, data, num_threads, 0, team, NULL);
 }
 
 ialias_redirect (GOMP_parallel_end)
@@ -154,7 +217,7 @@  GOMP_parallel_sections (void (*fn) (void
   num_threads = gomp_resolve_num_threads (num_threads, count);
   team = gomp_new_team (num_threads);
   gomp_sections_init (&team->work_shares[0], count);
-  gomp_team_start (fn, data, num_threads, flags, team);
+  gomp_team_start (fn, data, num_threads, flags, team, NULL);
   fn (data);
   GOMP_parallel_end ();
 }
--- libgomp/single.c	(.../trunk)	(revision 265884)
+++ libgomp/single.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -47,7 +47,7 @@  GOMP_single_start (void)
   return __sync_bool_compare_and_swap (&team->single_count, single_count,
 				       single_count + 1L);
 #else
-  bool ret = gomp_work_share_start (false);
+  bool ret = gomp_work_share_start (0);
   if (ret)
     gomp_work_share_init_done ();
   gomp_work_share_end_nowait ();
@@ -68,7 +68,7 @@  GOMP_single_copy_start (void)
   bool first;
   void *ret;
 
-  first = gomp_work_share_start (false);
+  first = gomp_work_share_start (0);
   
   if (first)
     {
--- libgomp/target.c	(.../trunk)	(revision 265884)
+++ libgomp/target.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -1854,11 +1854,20 @@  GOMP_target_update_ext (int device, size
 	      struct gomp_team *team = thr->ts.team;
 	      /* If parallel or taskgroup has been cancelled, don't start new
 		 tasks.  */
-	      if (team
-		  && (gomp_team_barrier_cancelled (&team->barrier)
-		      || (thr->task->taskgroup
-			  && thr->task->taskgroup->cancelled)))
-		return;
+	      if (__builtin_expect (gomp_cancel_var, 0) && team)
+		{
+		  if (gomp_team_barrier_cancelled (&team->barrier))
+		    return;
+		  if (thr->task->taskgroup)
+		    {
+		      if (thr->task->taskgroup->cancelled)
+			return;
+		      if (thr->task->taskgroup->workshare
+			  && thr->task->taskgroup->prev
+			  && thr->task->taskgroup->prev->cancelled)
+			return;
+		    }
+		}
 
 	      gomp_task_maybe_wait_for_dependencies (depend);
 	    }
@@ -1873,10 +1882,20 @@  GOMP_target_update_ext (int device, size
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return;
+	}
+    }
 
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
 }
@@ -1985,11 +2004,20 @@  GOMP_target_enter_exit_data (int device,
 	      struct gomp_team *team = thr->ts.team;
 	      /* If parallel or taskgroup has been cancelled, don't start new
 		 tasks.  */
-	      if (team
-		  && (gomp_team_barrier_cancelled (&team->barrier)
-		      || (thr->task->taskgroup
-			  && thr->task->taskgroup->cancelled)))
-		return;
+	      if (__builtin_expect (gomp_cancel_var, 0) && team)
+		{
+		  if (gomp_team_barrier_cancelled (&team->barrier))
+		    return;
+		  if (thr->task->taskgroup)
+		    {
+		      if (thr->task->taskgroup->cancelled)
+			return;
+		      if (thr->task->taskgroup->workshare
+			  && thr->task->taskgroup->prev
+			  && thr->task->taskgroup->prev->cancelled)
+			return;
+		    }
+		}
 
 	      gomp_task_maybe_wait_for_dependencies (depend);
 	    }
@@ -2004,10 +2032,20 @@  GOMP_target_enter_exit_data (int device,
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return;
+	}
+    }
 
   size_t i;
   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
@@ -2164,7 +2202,7 @@  omp_target_free (void *device_ptr, int d
 }
 
 int
-omp_target_is_present (void *ptr, int device_num)
+omp_target_is_present (const void *ptr, int device_num)
 {
   if (ptr == NULL)
     return 1;
@@ -2196,8 +2234,9 @@  omp_target_is_present (void *ptr, int de
 }
 
 int
-omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
-		   size_t src_offset, int dst_device_num, int src_device_num)
+omp_target_memcpy (void *dst, const void *src, size_t length,
+		   size_t dst_offset, size_t src_offset, int dst_device_num,
+		   int src_device_num)
 {
   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
   bool ret;
@@ -2264,7 +2303,7 @@  omp_target_memcpy (void *dst, void *src,
 }
 
 static int
-omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
 			       int num_dims, const size_t *volume,
 			       const size_t *dst_offsets,
 			       const size_t *src_offsets,
@@ -2286,21 +2325,25 @@  omp_target_memcpy_rect_worker (void *dst
 	return EINVAL;
       if (dst_devicep == NULL && src_devicep == NULL)
 	{
-	  memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
+		  length);
 	  ret = 1;
 	}
       else if (src_devicep == NULL)
 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
 					  (char *) dst + dst_off,
-					  (char *) src + src_off, length);
+					  (const char *) src + src_off,
+					  length);
       else if (dst_devicep == NULL)
 	ret = src_devicep->dev2host_func (src_devicep->target_id,
 					  (char *) dst + dst_off,
-					  (char *) src + src_off, length);
+					  (const char *) src + src_off,
+					  length);
       else if (src_devicep == dst_devicep)
 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
 					 (char *) dst + dst_off,
-					 (char *) src + src_off, length);
+					 (const char *) src + src_off,
+					 length);
       else
 	ret = 0;
       return ret ? 0 : EINVAL;
@@ -2321,7 +2364,7 @@  omp_target_memcpy_rect_worker (void *dst
   for (j = 0; j < volume[0]; j++)
     {
       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
-					   (char *) src + src_off,
+					   (const char *) src + src_off,
 					   element_size, num_dims - 1,
 					   volume + 1, dst_offsets + 1,
 					   src_offsets + 1, dst_dimensions + 1,
@@ -2336,7 +2379,7 @@  omp_target_memcpy_rect_worker (void *dst
 }
 
 int
-omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
 			int num_dims, const size_t *volume,
 			const size_t *dst_offsets,
 			const size_t *src_offsets,
@@ -2395,8 +2438,8 @@  omp_target_memcpy_rect (void *dst, void
 }
 
 int
-omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
-			  size_t device_offset, int device_num)
+omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
+			  size_t size, size_t device_offset, int device_num)
 {
   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
     return EINVAL;
@@ -2457,7 +2500,7 @@  omp_target_associate_ptr (void *host_ptr
 }
 
 int
-omp_target_disassociate_ptr (void *ptr, int device_num)
+omp_target_disassociate_ptr (const void *ptr, int device_num)
 {
   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
     return EINVAL;
@@ -2498,6 +2541,31 @@  omp_target_disassociate_ptr (void *ptr,
   return ret;
 }
 
+int
+omp_pause_resource (omp_pause_resource_t kind, int device_num)
+{
+  (void) kind;
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return gomp_pause_host ();
+  if (device_num < 0 || device_num >= gomp_get_num_devices ())
+    return -1;
+  /* Do nothing for target devices for now.  */
+  return 0;
+}
+
+int
+omp_pause_resource_all (omp_pause_resource_t kind)
+{
+  (void) kind;
+  if (gomp_pause_host ())
+    return -1;
+  /* Do nothing for target devices for now.  */
+  return 0;
+}
+
+ialias (omp_pause_resource)
+ialias (omp_pause_resource_all)
+
 #ifdef PLUGIN_SUPPORT
 
 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
--- libgomp/task.c	(.../trunk)	(revision 265884)
+++ libgomp/task.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -166,21 +166,72 @@  gomp_task_handle_depend (struct gomp_tas
 			 void **depend)
 {
   size_t ndepend = (uintptr_t) depend[0];
-  size_t nout = (uintptr_t) depend[1];
   size_t i;
   hash_entry_type ent;
 
+  if (ndepend)
+    {
+      /* depend[0] is total # */
+      size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
+      /* ndepend - nout is # of in: */
+      for (i = 0; i < ndepend; i++)
+	{
+	  task->depend[i].addr = depend[2 + i];
+	  task->depend[i].is_in = i >= nout;
+	}
+    }
+  else
+    {
+      ndepend = (uintptr_t) depend[1]; /* total # */
+      size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
+      size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
+      /* For now we treat mutexinoutset like out, which is compliant, but
+	 inefficient.  */
+      size_t nin = (uintptr_t) depend[4]; /* # of in: */
+      /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
+      size_t normal = nout + nmutexinoutset + nin;
+      size_t n = 0;
+      for (i = normal; i < ndepend; i++)
+	{
+	  void **d = (void **) (uintptr_t) depend[5 + i];
+	  switch ((uintptr_t) d[1])
+	    {
+	    case GOMP_DEPEND_OUT:
+	    case GOMP_DEPEND_INOUT:
+	    case GOMP_DEPEND_MUTEXINOUTSET:
+	      break;
+	    case GOMP_DEPEND_IN:
+	      continue;
+	    default:
+	      gomp_fatal ("unknown omp_depend_t dependence type %d",
+			  (int) (uintptr_t) d[1]);
+	    }
+	  task->depend[n].addr = d[0];
+	  task->depend[n++].is_in = 0;
+	}
+      for (i = 0; i < normal; i++)
+	{
+	  task->depend[n].addr = depend[5 + i];
+	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
+	}
+      for (i = normal; i < ndepend; i++)
+	{
+	  void **d = (void **) (uintptr_t) depend[5 + i];
+	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
+	    continue;
+	  task->depend[n].addr = d[0];
+	  task->depend[n++].is_in = 1;
+	}
+    }
   task->depend_count = ndepend;
   task->num_dependees = 0;
   if (parent->depend_hash == NULL)
     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
   for (i = 0; i < ndepend; i++)
     {
-      task->depend[i].addr = depend[2 + i];
       task->depend[i].next = NULL;
       task->depend[i].prev = NULL;
       task->depend[i].task = task;
-      task->depend[i].is_in = i >= nout;
       task->depend[i].redundant = false;
       task->depend[i].redundant_out = false;
 
@@ -205,7 +256,7 @@  gomp_task_handle_depend (struct gomp_tas
 	      last = ent;
 
 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
-	      if (i >= nout && ent->is_in)
+	      if (task->depend[i].is_in && ent->is_in)
 		continue;
 
 	      if (!ent->is_in)
@@ -280,9 +331,18 @@  gomp_task_handle_depend (struct gomp_tas
    then the task may be executed by any member of the team.
 
    DEPEND is an array containing:
+     if depend[0] is non-zero, then:
 	depend[0]: number of depend elements.
-	depend[1]: number of depend elements of type "out".
-	depend[2..N+1]: address of [1..N]th depend element.  */
+	depend[1]: number of depend elements of type "out/inout".
+	depend[2..N+1]: address of [1..N]th depend element.
+     otherwise, when depend[0] is zero, then:
+	depend[1]: number of depend elements.
+	depend[2]: number of depend elements of type "out/inout".
+	depend[3]: number of depend elements of type "mutexinoutset".
+	depend[4]: number of depend elements of type "in".
+	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
+	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
+		   omp_depend_t objects.  */
 
 void
 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
@@ -303,10 +363,20 @@  GOMP_task (void (*fn) (void *), void *da
 #endif
 
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return;
+	}
+    }
 
   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
     priority = 0;
@@ -377,7 +447,7 @@  GOMP_task (void (*fn) (void *), void *da
       size_t depend_size = 0;
 
       if (flags & GOMP_TASK_FLAG_DEPEND)
-	depend_size = ((uintptr_t) depend[0]
+	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
 		       * sizeof (struct gomp_task_depend_entry));
       task = gomp_malloc (sizeof (*task) + depend_size
 			  + arg_size + arg_align - 1);
@@ -404,14 +474,26 @@  GOMP_task (void (*fn) (void *), void *da
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
 	 tasks.  */
-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
-			     || (taskgroup && taskgroup->cancelled))
-			    && !task->copy_ctors_done, 0))
+      if (__builtin_expect (gomp_cancel_var, 0)
+	  && !task->copy_ctors_done)
 	{
-	  gomp_mutex_unlock (&team->task_lock);
-	  gomp_finish_task (task);
-	  free (task);
-	  return;
+	  if (gomp_team_barrier_cancelled (&team->barrier))
+	    {
+	    do_cancel:
+	      gomp_mutex_unlock (&team->task_lock);
+	      gomp_finish_task (task);
+	      free (task);
+	      return;
+	    }
+	  if (taskgroup)
+	    {
+	      if (taskgroup->cancelled)
+		goto do_cancel;
+	      if (taskgroup->workshare
+		  && taskgroup->prev
+		  && taskgroup->prev->cancelled)
+		goto do_cancel;
+	    }
 	}
       if (taskgroup)
 	taskgroup->num_children++;
@@ -463,6 +545,7 @@  GOMP_task (void (*fn) (void *), void *da
 
 ialias (GOMP_taskgroup_start)
 ialias (GOMP_taskgroup_end)
+ialias (GOMP_taskgroup_reduction_register)
 
 #define TYPE long
 #define UTYPE unsigned long
@@ -601,10 +684,20 @@  gomp_create_target_task (struct gomp_dev
   struct gomp_team *team = thr->ts.team;
 
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return true;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return true;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return true;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return true;
+	}
+    }
 
   struct gomp_target_task *ttask;
   struct gomp_task *task;
@@ -617,7 +710,7 @@  gomp_create_target_task (struct gomp_dev
 
   if (depend != NULL)
     {
-      depend_cnt = (uintptr_t) depend[0];
+      depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
     }
   if (fn)
@@ -687,13 +780,25 @@  gomp_create_target_task (struct gomp_dev
   task->final_task = 0;
   gomp_mutex_lock (&team->task_lock);
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
-			|| (taskgroup && taskgroup->cancelled), 0))
+  if (__builtin_expect (gomp_cancel_var, 0))
     {
-      gomp_mutex_unlock (&team->task_lock);
-      gomp_finish_task (task);
-      free (task);
-      return true;
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	{
+	do_cancel:
+	  gomp_mutex_unlock (&team->task_lock);
+	  gomp_finish_task (task);
+	  free (task);
+	  return true;
+	}
+      if (taskgroup)
+	{
+	  if (taskgroup->cancelled)
+	    goto do_cancel;
+	  if (taskgroup->workshare
+	      && taskgroup->prev
+	      && taskgroup->prev->cancelled)
+	    goto do_cancel;
+	}
     }
   if (depend_size)
     {
@@ -986,10 +1091,21 @@  gomp_task_run_pre (struct gomp_task *chi
 
   if (--team->task_queued_count == 0)
     gomp_team_barrier_clear_task_pending (&team->barrier);
-  if ((gomp_team_barrier_cancelled (&team->barrier)
-       || (taskgroup && taskgroup->cancelled))
+  if (__builtin_expect (gomp_cancel_var, 0)
       && !child_task->copy_ctors_done)
-    return true;
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return true;
+      if (taskgroup)
+	{
+	  if (taskgroup->cancelled)
+	    return true;
+	  if (taskgroup->workshare
+	      && taskgroup->prev
+	      && taskgroup->prev->cancelled)
+	    return true;
+	}
+    }
   return false;
 }
 
@@ -1456,6 +1572,35 @@  GOMP_taskwait (void)
     }
 }
 
+/* Called when encountering a taskwait directive with depend clause(s).
+   Wait as if it was an mergeable included task construct with empty body.  */
+
+void
+GOMP_taskwait_depend (void **depend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+
+  /* If parallel or taskgroup has been cancelled, return early.  */
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+	return;
+      if (thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return;
+	}
+    }
+
+  if (thr->task && thr->task->depend_hash)
+    gomp_task_maybe_wait_for_dependencies (depend);
+}
+
 /* An undeferred task is about to run.  Wait for all tasks that this
    undeferred task depends on.
 
@@ -1464,7 +1609,7 @@  GOMP_taskwait (void)
    the scheduling queues.  Then we iterate through these imminently
    ready tasks (and possibly other high priority tasks), and run them.
    If we run out of ready dependencies to execute, we either wait for
-   the reamining dependencies to finish, or wait for them to get
+   the remaining dependencies to finish, or wait for them to get
    scheduled so we can run them.
 
    DEPEND is as in GOMP_task.  */
@@ -1477,21 +1622,50 @@  gomp_task_maybe_wait_for_dependencies (v
   struct gomp_team *team = thr->ts.team;
   struct gomp_task_depend_entry elem, *ent = NULL;
   struct gomp_taskwait taskwait;
-  size_t ndepend = (uintptr_t) depend[0];
+  size_t orig_ndepend = (uintptr_t) depend[0];
   size_t nout = (uintptr_t) depend[1];
+  size_t ndepend = orig_ndepend;
+  size_t normal = ndepend;
+  size_t n = 2;
   size_t i;
   size_t num_awaited = 0;
   struct gomp_task *child_task = NULL;
   struct gomp_task *to_free = NULL;
   int do_wake = 0;
 
+  if (ndepend == 0)
+    {
+      ndepend = nout;
+      nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
+      normal = nout + (uintptr_t) depend[4];
+      n = 5;
+    }
   gomp_mutex_lock (&team->task_lock);
   for (i = 0; i < ndepend; i++)
     {
-      elem.addr = depend[i + 2];
+      elem.addr = depend[i + n];
+      elem.is_in = i >= nout;
+      if (__builtin_expect (i >= normal, 0))
+	{
+	  void **d = (void **) elem.addr;
+	  switch ((uintptr_t) d[1])
+	    {
+	    case GOMP_DEPEND_IN:
+	      break;
+	    case GOMP_DEPEND_OUT:
+	    case GOMP_DEPEND_INOUT:
+	    case GOMP_DEPEND_MUTEXINOUTSET:
+	      elem.is_in = 0;
+	      break;
+	    default:
+	      gomp_fatal ("unknown omp_depend_t dependence type %d",
+			  (int) (uintptr_t) d[1]);
+	    }
+	  elem.addr = d[0];
+	}
       ent = htab_find (task->depend_hash, &elem);
       for (; ent; ent = ent->next)
-	if (i >= nout && ent->is_in)
+	if (elem.is_in && ent->is_in)
 	  continue;
 	else
 	  {
@@ -1654,13 +1828,28 @@  GOMP_taskyield (void)
   /* Nothing at the moment.  */
 }
 
+static inline struct gomp_taskgroup *
+gomp_taskgroup_init (struct gomp_taskgroup *prev)
+{
+  struct gomp_taskgroup *taskgroup
+    = gomp_malloc (sizeof (struct gomp_taskgroup));
+  taskgroup->prev = prev;
+  priority_queue_init (&taskgroup->taskgroup_queue);
+  taskgroup->reductions = prev ? prev->reductions : NULL;
+  taskgroup->in_taskgroup_wait = false;
+  taskgroup->cancelled = false;
+  taskgroup->workshare = false;
+  taskgroup->num_children = 0;
+  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
+  return taskgroup;
+}
+
 void
 GOMP_taskgroup_start (void)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   struct gomp_task *task = thr->task;
-  struct gomp_taskgroup *taskgroup;
 
   /* If team is NULL, all tasks are executed as
      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
@@ -1668,14 +1857,7 @@  GOMP_taskgroup_start (void)
      by the time GOMP_taskgroup_end is called.  */
   if (team == NULL)
     return;
-  taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
-  taskgroup->prev = task->taskgroup;
-  priority_queue_init (&taskgroup->taskgroup_queue);
-  taskgroup->in_taskgroup_wait = false;
-  taskgroup->cancelled = false;
-  taskgroup->num_children = 0;
-  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
-  task->taskgroup = taskgroup;
+  task->taskgroup = gomp_taskgroup_init (task->taskgroup);
 }
 
 void
@@ -1840,6 +2022,302 @@  GOMP_taskgroup_end (void)
   free (taskgroup);
 }
 
+static inline __attribute__((always_inline)) void
+gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
+			 unsigned nthreads)
+{
+  size_t total_cnt = 0;
+  uintptr_t *d = data;
+  struct htab *old_htab = NULL, *new_htab;
+  do
+    {
+      if (__builtin_expect (orig != NULL, 0))
+	{
+	  /* For worksharing task reductions, memory has been allocated
+	     already by some other thread that encountered the construct
+	     earlier.  */
+	  d[2] = orig[2];
+	  d[6] = orig[6];
+	  orig = (uintptr_t *) orig[4];
+	}
+      else
+	{
+	  size_t sz = d[1] * nthreads;
+	  /* Should use omp_alloc if d[3] is not -1.  */
+	  void *ptr = gomp_aligned_alloc (d[2], sz);
+	  memset (ptr, '\0', sz);
+	  d[2] = (uintptr_t) ptr;
+	  d[6] = d[2] + sz;
+	}
+      d[5] = 0;
+      total_cnt += d[0];
+      if (d[4] == 0)
+	{
+	  d[4] = (uintptr_t) old;
+	  break;
+	}
+      else
+	d = (uintptr_t *) d[4];
+    }
+  while (1);
+  if (old && old[5])
+    {
+      old_htab = (struct htab *) old[5];
+      total_cnt += htab_elements (old_htab);
+    }
+  new_htab = htab_create (total_cnt);
+  if (old_htab)
+    {
+      /* Copy old hash table, like in htab_expand.  */
+      hash_entry_type *p, *olimit;
+      new_htab->n_elements = htab_elements (old_htab);
+      olimit = old_htab->entries + old_htab->size;
+      p = old_htab->entries;
+      do
+	{
+	  hash_entry_type x = *p;
+	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
+	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
+	  p++;
+	}
+      while (p < olimit);
+    }
+  d = data;
+  do
+    {
+      size_t j;
+      for (j = 0; j < d[0]; ++j)
+	{
+	  uintptr_t *p = d + 7 + j * 3;
+	  p[2] = (uintptr_t) d;
+	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
+	     which hash on the first element which is a pointer.  We need
+	     to hash also on the first sizeof (uintptr_t) bytes which contain
+	     a pointer.  Hide the cast from the compiler.  */
+	  hash_entry_type n;
+	  __asm ("" : "=g" (n) : "0" (p));
+	  *htab_find_slot (&new_htab, n, INSERT) = n;
+	}
+      if (d[4] == (uintptr_t) old)
+	break;
+      else
+	d = (uintptr_t *) d[4];
+    }
+  while (1);
+  d[5] = (uintptr_t) new_htab;
+}
+
+static void
+gomp_create_artificial_team (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task_icv *icv;
+  struct gomp_team *team = gomp_new_team (1);
+  struct gomp_task *task = thr->task;
+  icv = task ? &task->icv : &gomp_global_icv;
+  team->prev_ts = thr->ts;
+  thr->ts.team = team;
+  thr->ts.team_id = 0;
+  thr->ts.work_share = &team->work_shares[0];
+  thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+  thr->ts.single_count = 0;
+#endif
+  thr->ts.static_trip = 0;
+  thr->task = &team->implicit_task[0];
+  gomp_init_task (thr->task, NULL, icv);
+  if (task)
+    {
+      thr->task = task;
+      gomp_end_task ();
+      free (task);
+      thr->task = &team->implicit_task[0];
+    }
+#ifdef LIBGOMP_USE_PTHREADS
+  else
+    pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+}
+
+/* The format of data is:
+   data[0]	cnt
+   data[1]	size
+   data[2]	alignment (on output array pointer)
+   data[3]	allocator (-1 if malloc allocator)
+   data[4]	next pointer
+   data[5]	used internally (htab pointer)
+   data[6]	used internally (end of array)
+   cnt times
+   ent[0]	address
+   ent[1]	offset
+   ent[2]	used internally (pointer to data[0])
+   The entries are sorted by increasing offset, so that a binary
+   search can be performed.  Normally, data[8] is 0, exception is
+   for worksharing construct task reductions in cancellable parallel,
+   where at offset 0 there should be space for a pointer and an integer
+   which are used internally.  */
+
+void
+GOMP_taskgroup_reduction_register (uintptr_t *data)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_task *task;
+  unsigned nthreads;
+  if (__builtin_expect (team == NULL, 0))
+    {
+      /* The task reduction code needs a team and task, so for
+	 orphaned taskgroups just create the implicit team.  */
+      gomp_create_artificial_team ();
+      ialias_call (GOMP_taskgroup_start) ();
+      team = thr->ts.team;
+    }
+  nthreads = team->nthreads;
+  task = thr->task;
+  gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
+  task->taskgroup->reductions = data;
+}
+
+void
+GOMP_taskgroup_reduction_unregister (uintptr_t *data)
+{
+  uintptr_t *d = data;
+  htab_free ((struct htab *) data[5]);
+  do
+    {
+      gomp_aligned_free ((void *) d[2]);
+      d = (uintptr_t *) d[4];
+    }
+  while (d && !d[5]);
+}
+ialias (GOMP_taskgroup_reduction_unregister)
+
+/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
+   original list item or address of previously remapped original list
+   item to address of the private copy, store that to ptrs[i].
+   For i < cntorig, additionally set ptrs[cnt+i] to the address of
+   the original list item.  */
+
+void
+GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task *task = thr->task;
+  unsigned id = thr->ts.team_id;
+  uintptr_t *data = task->taskgroup->reductions;
+  uintptr_t *d;
+  struct htab *reduction_htab = (struct htab *) data[5];
+  size_t i;
+  for (i = 0; i < cnt; ++i)
+    {
+      hash_entry_type ent, n;
+      __asm ("" : "=g" (ent) : "0" (ptrs + i));
+      n = htab_find (reduction_htab, ent);
+      if (n)
+	{
+	  uintptr_t *p;
+	  __asm ("" : "=g" (p) : "0" (n));
+	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
+	     p[1] is the offset within the allocated chunk for each
+	     thread, p[2] is the array registered with
+	     GOMP_taskgroup_reduction_register, d[2] is the base of the
+	     allocated memory and d[1] is the size of the allocated chunk
+	     for one thread.  */
+	  d = (uintptr_t *) p[2];
+	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
+	  if (__builtin_expect (i < cntorig, 0))
+	    ptrs[cnt + i] = (void *) p[0];
+	  continue;
+	}
+      d = data;
+      while (d != NULL)
+	{
+	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
+	    break;
+	  d = (uintptr_t *) d[4];
+	}
+      if (d == NULL)
+	gomp_fatal ("couldn't find matching task_reduction or reduction with "
+		    "task modifier for %p", ptrs[i]);
+      uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
+      ptrs[i] = (void *) (d[2] + id * d[1] + off);
+      if (__builtin_expect (i < cntorig, 0))
+	{
+	  size_t lo = 0, hi = d[0] - 1;
+	  while (lo <= hi)
+	    {
+	      size_t m = (lo + hi) / 2;
+	      if (d[7 + 3 * m + 1] < off)
+		lo = m + 1;
+	      else if (d[7 + 3 * m + 1] == off)
+		{
+		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
+		  break;
+		}
+	      else
+		hi = m - 1;
+	    }
+	  if (lo > hi)
+	    gomp_fatal ("couldn't find matching task_reduction or reduction "
+			"with task modifier for %p", ptrs[i]);
+	}
+    }
+}
+
+struct gomp_taskgroup *
+gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
+{
+  struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
+  gomp_reduction_register (data, NULL, NULL, nthreads);
+  taskgroup->reductions = data;
+  return taskgroup;
+}
+
+void
+gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_task *task = thr->task;
+  unsigned nthreads = team->nthreads;
+  gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
+  task->taskgroup->reductions = data;
+}
+
+void
+gomp_workshare_taskgroup_start (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_task *task;
+
+  if (team == NULL)
+    {
+      gomp_create_artificial_team ();
+      team = thr->ts.team;
+    }
+  task = thr->task;
+  task->taskgroup = gomp_taskgroup_init (task->taskgroup);
+  task->taskgroup->workshare = true;
+}
+
+void
+GOMP_workshare_task_reduction_unregister (bool cancelled)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task *task = thr->task;
+  struct gomp_team *team = thr->ts.team;
+  uintptr_t *data = task->taskgroup->reductions;
+  ialias_call (GOMP_taskgroup_end) ();
+  if (thr->ts.team_id == 0)
+    ialias_call (GOMP_taskgroup_reduction_unregister) (data);
+  else
+    htab_free ((struct htab *) data[5]);
+
+  if (!cancelled)
+    gomp_team_barrier_wait (&team->barrier);
+}
+
 int
 omp_in_final (void)
 {
--- libgomp/taskloop.c	(.../trunk)	(revision 265884)
+++ libgomp/taskloop.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -149,11 +149,28 @@  GOMP_taskloop (void (*fn) (void *), void
 
   if (flags & GOMP_TASK_FLAG_NOGROUP)
     {
-      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
-	return;
+      if (__builtin_expect (gomp_cancel_var, 0)
+	  && thr->task
+	  && thr->task->taskgroup)
+	{
+	  if (thr->task->taskgroup->cancelled)
+	    return;
+	  if (thr->task->taskgroup->workshare
+	      && thr->task->taskgroup->prev
+	      && thr->task->taskgroup->prev->cancelled)
+	    return;
+	}
     }
   else
-    ialias_call (GOMP_taskgroup_start) ();
+    {
+      ialias_call (GOMP_taskgroup_start) ();
+      if (flags & GOMP_TASK_FLAG_REDUCTION)
+	{
+	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
+	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
+	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
+	}
+    }
 
   if (priority > gomp_max_task_priority_var)
     priority = gomp_max_task_priority_var;
@@ -284,19 +301,31 @@  GOMP_taskloop (void (*fn) (void *), void
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
 	 tasks.  */
-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
-			     || (taskgroup && taskgroup->cancelled))
-			    && cpyfn == NULL, 0))
+      if (__builtin_expect (gomp_cancel_var, 0)
+	  && cpyfn == NULL)
 	{
-	  gomp_mutex_unlock (&team->task_lock);
-	  for (i = 0; i < num_tasks; i++)
+	  if (gomp_team_barrier_cancelled (&team->barrier))
+	    {
+	    do_cancel:
+	      gomp_mutex_unlock (&team->task_lock);
+	      for (i = 0; i < num_tasks; i++)
+		{
+		  gomp_finish_task (tasks[i]);
+		  free (tasks[i]);
+		}
+	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+		ialias_call (GOMP_taskgroup_end) ();
+	      return;
+	    }
+	  if (taskgroup)
 	    {
-	      gomp_finish_task (tasks[i]);
-	      free (tasks[i]);
+	      if (taskgroup->cancelled)
+		goto do_cancel;
+	      if (taskgroup->workshare
+		  && taskgroup->prev
+		  && taskgroup->prev->cancelled)
+		goto do_cancel;
 	    }
-	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
-	    ialias_call (GOMP_taskgroup_end) ();
-	  return;
 	}
       if (taskgroup)
 	taskgroup->num_children += num_tasks;
--- libgomp/team.c	(.../trunk)	(revision 265884)
+++ libgomp/team.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -32,7 +32,6 @@ 
 #include <string.h>
 
 #ifdef LIBGOMP_USE_PTHREADS
-/* This attribute contains PTHREAD_CREATE_DETACHED.  */
 pthread_attr_t gomp_thread_attr;
 
 /* This key is for the thread destructor.  */
@@ -58,6 +57,7 @@  struct gomp_thread_start_data
   struct gomp_thread_pool *thread_pool;
   unsigned int place;
   bool nested;
+  pthread_t handle;
 };
 
 
@@ -89,6 +89,9 @@  gomp_thread_start (void *xdata)
   thr->ts = data->ts;
   thr->task = data->task;
   thr->place = data->place;
+#ifdef GOMP_NEEDS_THREAD_HANDLE
+  thr->handle = data->handle;
+#endif
 
   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
 
@@ -131,6 +134,7 @@  gomp_thread_start (void *xdata)
     }
 
   gomp_sem_destroy (&thr->release);
+  pthread_detach (pthread_self ());
   thr->thread_pool = NULL;
   thr->task = NULL;
   return NULL;
@@ -183,7 +187,7 @@  gomp_new_team (unsigned nthreads)
   team->single_count = 0;
 #endif
   team->work_shares_to_free = &team->work_shares[0];
-  gomp_init_work_share (&team->work_shares[0], false, nthreads);
+  gomp_init_work_share (&team->work_shares[0], 0, nthreads);
   team->work_shares[0].next_alloc = NULL;
   team->work_share_list_free = NULL;
   team->work_share_list_alloc = &team->work_shares[1];
@@ -231,6 +235,7 @@  gomp_free_pool_helper (void *thread_pool
   thr->thread_pool = NULL;
   thr->task = NULL;
 #ifdef LIBGOMP_USE_PTHREADS
+  pthread_detach (pthread_self ());
   pthread_exit (NULL);
 #elif defined(__nvptx__)
   asm ("exit;");
@@ -297,7 +302,8 @@  gomp_free_thread (void *arg __attribute_
 #ifdef LIBGOMP_USE_PTHREADS
 void
 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
-		 unsigned flags, struct gomp_team *team)
+		 unsigned flags, struct gomp_team *team,
+		 struct gomp_taskgroup *taskgroup)
 {
   struct gomp_thread_start_data *start_data;
   struct gomp_thread *thr, *nthr;
@@ -312,6 +318,7 @@  gomp_team_start (void (*fn) (void *), vo
   unsigned int s = 0, rest = 0, p = 0, k = 0;
   unsigned int affinity_count = 0;
   struct gomp_thread **affinity_thr = NULL;
+  bool force_display = false;
 
   thr = gomp_thread ();
   nested = thr->ts.level;
@@ -319,7 +326,12 @@  gomp_team_start (void (*fn) (void *), vo
   task = thr->task;
   icv = task ? &task->icv : &gomp_global_icv;
   if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
-    gomp_init_affinity ();
+    {
+      gomp_init_affinity ();
+      if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
+	gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
+				      thr->place);
+    }
 
   /* Always save the previous state, even if this isn't a nested team.
      In particular, we should save any work share state from an outer
@@ -338,6 +350,9 @@  gomp_team_start (void (*fn) (void *), vo
 #endif
   thr->ts.static_trip = 0;
   thr->task = &team->implicit_task[0];
+#ifdef GOMP_NEEDS_THREAD_HANDLE
+  thr->handle = pthread_self ();
+#endif
   nthreads_var = icv->nthreads_var;
   if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
       && thr->ts.level < gomp_nthreads_var_list_len)
@@ -350,6 +365,7 @@  gomp_team_start (void (*fn) (void *), vo
       && thr->ts.level < gomp_bind_var_list_len)
     bind_var = gomp_bind_var_list[thr->ts.level];
   gomp_init_task (thr->task, task, icv);
+  thr->task->taskgroup = taskgroup;
   team->implicit_task[0].icv.nthreads_var = nthreads_var;
   team->implicit_task[0].icv.bind_var = bind_var;
 
@@ -465,7 +481,7 @@  gomp_team_start (void (*fn) (void *), vo
 	  pool->threads
 	    = gomp_realloc (pool->threads,
 			    pool->threads_size
-			    * sizeof (struct gomp_thread_data *));
+			    * sizeof (struct gomp_thread *));
 	}
 
       /* Release existing idle threads.  */
@@ -540,6 +556,7 @@  gomp_team_start (void (*fn) (void *), vo
 						+ place_partition_len))
 		{
 		  unsigned int l;
+		  force_display = true;
 		  if (affinity_thr == NULL)
 		    {
 		      unsigned int j;
@@ -623,6 +640,7 @@  gomp_team_start (void (*fn) (void *), vo
 	  gomp_init_task (nthr->task, task, icv);
 	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
 	  team->implicit_task[i].icv.bind_var = bind_var;
+	  nthr->task->taskgroup = taskgroup;
 	  nthr->fn = fn;
 	  nthr->data = data;
 	  team->ordered_release[i] = &nthr->release;
@@ -712,19 +730,17 @@  gomp_team_start (void (*fn) (void *), vo
     {
       size_t stacksize;
       pthread_attr_init (&thread_attr);
-      pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
 	pthread_attr_setstacksize (&thread_attr, stacksize);
       attr = &thread_attr;
     }
 
   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
-			    * (nthreads-i));
+			    * (nthreads - i));
 
   /* Launch new threads.  */
   for (; i < nthreads; ++i)
     {
-      pthread_t pt;
       int err;
 
       start_data->ts.place_partition_off = thr->ts.place_partition_off;
@@ -810,11 +826,14 @@  gomp_team_start (void (*fn) (void *), vo
       gomp_init_task (start_data->task, task, icv);
       team->implicit_task[i].icv.nthreads_var = nthreads_var;
       team->implicit_task[i].icv.bind_var = bind_var;
+      start_data->task->taskgroup = taskgroup;
       start_data->thread_pool = pool;
       start_data->nested = nested;
 
       attr = gomp_adjust_thread_attr (attr, &thread_attr);
-      err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
+      err = pthread_create (&start_data->handle, attr, gomp_thread_start,
+			    start_data);
+      start_data++;
       if (err != 0)
 	gomp_fatal ("Thread creation failed: %s", strerror (err));
     }
@@ -854,6 +873,42 @@  gomp_team_start (void (*fn) (void *), vo
       gomp_mutex_unlock (&gomp_managed_threads_lock);
 #endif
     }
+  if (__builtin_expect (gomp_display_affinity_var, 0))
+    {
+      if (nested
+	  || nthreads != old_threads_used
+	  || force_display)
+	{
+	  gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
+					thr->place);
+	  if (nested)
+	    {
+	      start_data -= nthreads - 1;
+	      for (i = 1; i < nthreads; ++i)
+		{
+		  gomp_display_affinity_thread (
+#ifdef LIBGOMP_USE_PTHREADS
+						start_data->handle,
+#else
+						gomp_thread_self (),
+#endif
+						&start_data->ts,
+						start_data->place);
+		  start_data++;
+		}
+	    }
+	  else
+	    {
+	      for (i = 1; i < nthreads; ++i)
+		{
+		  gomp_thread_handle handle
+		    = gomp_thread_to_pthread_t (pool->threads[i]);
+		  gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
+						pool->threads[i]->place);
+		}
+	    }
+	}
+    }
   if (__builtin_expect (affinity_thr != NULL, 0)
       && team->prev_ts.place_partition_len > 64)
     free (affinity_thr);
@@ -894,7 +949,7 @@  gomp_team_end (void)
   gomp_end_task ();
   thr->ts = team->prev_ts;
 
-  if (__builtin_expect (thr->ts.team != NULL, 0))
+  if (__builtin_expect (thr->ts.level != 0, 0))
     {
 #ifdef HAVE_SYNC_BUILTINS
       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
@@ -959,6 +1014,76 @@  team_destructor (void)
      crashes.  */
   pthread_key_delete (gomp_thread_destructor);
 }
+
+/* Similar to gomp_free_pool_helper, but don't detach itself,
+   gomp_pause_host will pthread_join those threads.  */
+
+static void
+gomp_pause_pool_helper (void *thread_pool)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_thread_pool *pool
+    = (struct gomp_thread_pool *) thread_pool;
+  gomp_simple_barrier_wait_last (&pool->threads_dock);
+  gomp_sem_destroy (&thr->release);
+  thr->thread_pool = NULL;
+  thr->task = NULL;
+  pthread_exit (NULL);
+}
+
+/* Free a thread pool and release its threads.  Return non-zero on
+   failure.  */
+
+int
+gomp_pause_host (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_thread_pool *pool = thr->thread_pool;
+  if (thr->ts.level)
+    return -1;
+  if (pool)
+    {
+      if (pool->threads_used > 0)
+	{
+	  int i;
+	  pthread_t *thrs
+	    = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
+	  for (i = 1; i < pool->threads_used; i++)
+	    {
+	      struct gomp_thread *nthr = pool->threads[i];
+	      nthr->fn = gomp_pause_pool_helper;
+	      nthr->data = pool;
+	      thrs[i] = gomp_thread_to_pthread_t (nthr);
+	    }
+	  /* This barrier undocks threads docked on pool->threads_dock.  */
+	  gomp_simple_barrier_wait (&pool->threads_dock);
+	  /* And this waits till all threads have called gomp_barrier_wait_last
+	     in gomp_pause_pool_helper.  */
+	  gomp_simple_barrier_wait (&pool->threads_dock);
+	  /* Now it is safe to destroy the barrier and free the pool.  */
+	  gomp_simple_barrier_destroy (&pool->threads_dock);
+
+#ifdef HAVE_SYNC_BUILTINS
+	  __sync_fetch_and_add (&gomp_managed_threads,
+				1L - pool->threads_used);
+#else
+	  gomp_mutex_lock (&gomp_managed_threads_lock);
+	  gomp_managed_threads -= pool->threads_used - 1L;
+	  gomp_mutex_unlock (&gomp_managed_threads_lock);
+#endif
+	  for (i = 1; i < pool->threads_used; i++)
+	    pthread_join (thrs[i], NULL);
+	}
+      if (pool->last_team)
+	free_team (pool->last_team);
+#ifndef __nvptx__
+      free (pool->threads);
+      free (pool);
+#endif
+      thr->thread_pool = NULL;
+    }
+  return 0;
+}
 #endif
 
 struct gomp_task_icv *
--- libgomp/teams.c	(.../trunk)	(nonexistent)
+++ libgomp/teams.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -0,0 +1,73 @@ 
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file handles the host TEAMS construct.  */
+
+#include "libgomp.h"
+
+static unsigned gomp_num_teams = 1, gomp_team_num = 0;
+
+void
+GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
+		unsigned int thread_limit, unsigned int flags)
+{
+  (void) flags;
+  (void) num_teams;
+  unsigned old_thread_limit_var = 0;
+  if (thread_limit)
+    {
+      struct gomp_task_icv *icv = gomp_icv (true);
+      old_thread_limit_var = icv->thread_limit_var;
+      icv->thread_limit_var
+	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+    }
+  if (num_teams == 0)
+    num_teams = 3;
+  gomp_num_teams = num_teams;
+  for (gomp_team_num = 0; gomp_team_num < num_teams; gomp_team_num++)
+    fn (data);
+  gomp_num_teams = 1;
+  gomp_team_num = 0;
+  if (thread_limit)
+    {
+      struct gomp_task_icv *icv = gomp_icv (true);
+      icv->thread_limit_var = old_thread_limit_var;
+    }
+}
+
+int
+omp_get_num_teams (void)
+{
+  return gomp_num_teams;
+}
+
+int
+omp_get_team_num (void)
+{
+  return gomp_team_num;
+}
+
+ialias (omp_get_num_teams)
+ialias (omp_get_team_num)
--- libgomp/work.c	(.../trunk)	(revision 265884)
+++ libgomp/work.c	(.../branches/gomp-5_0-branch)	(revision 265887)
@@ -76,7 +76,15 @@  alloc_work_share (struct gomp_team *team
 #endif
 
   team->work_share_chunk *= 2;
+  /* Allocating gomp_work_share structures aligned is just an
+     optimization, don't do it when using the fallback method.  */
+#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
+  ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
+			   team->work_share_chunk
+			   * sizeof (struct gomp_work_share));
+#else
   ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
+#endif
   ws->next_alloc = team->work_shares[0].next_alloc;
   team->work_shares[0].next_alloc = ws;
   team->work_share_list_alloc = &ws[1];
@@ -90,30 +98,35 @@  alloc_work_share (struct gomp_team *team
    This shouldn't touch the next_alloc field.  */
 
 void
-gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
+gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
 		      unsigned nthreads)
 {
   gomp_mutex_init (&ws->lock);
   if (__builtin_expect (ordered, 0))
     {
-#define INLINE_ORDERED_TEAM_IDS_CNT \
-  ((sizeof (struct gomp_work_share) \
-    - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
-   / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
-
-      if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
-	ws->ordered_team_ids
-	  = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
+#define INLINE_ORDERED_TEAM_IDS_SIZE \
+  (sizeof (struct gomp_work_share) \
+   - offsetof (struct gomp_work_share, inline_ordered_team_ids))
+
+      if (__builtin_expect (ordered != 1, 0))
+	{
+	  ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1;
+	  ordered = ordered + __alignof__ (long long) - 1;
+	  ordered &= ~(__alignof__ (long long) - 1);
+	}
+      else
+	ordered = nthreads * sizeof (*ws->ordered_team_ids);
+      if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
+	ws->ordered_team_ids = gomp_malloc (ordered);
       else
 	ws->ordered_team_ids = ws->inline_ordered_team_ids;
-      memset (ws->ordered_team_ids, '\0',
-	      nthreads * sizeof (*ws->ordered_team_ids));
+      memset (ws->ordered_team_ids, '\0', ordered);
       ws->ordered_num_used = 0;
       ws->ordered_owner = -1;
       ws->ordered_cur = 0;
     }
   else
-    ws->ordered_team_ids = NULL;
+    ws->ordered_team_ids = ws->inline_ordered_team_ids;
   gomp_ptrlock_init (&ws->next_ws, NULL);
   ws->threads_completed = 0;
 }
@@ -166,7 +179,7 @@  free_work_share (struct gomp_team *team,
    if this was the first thread to reach this point.  */
 
 bool
-gomp_work_share_start (bool ordered)
+gomp_work_share_start (size_t ordered)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
@@ -178,7 +191,7 @@  gomp_work_share_start (bool ordered)
       ws = gomp_malloc (sizeof (*ws));
       gomp_init_work_share (ws, ordered, 1);
       thr->ts.work_share = ws;
-      return ws;
+      return true;
     }
 
   ws = thr->ts.work_share;