Index: libgomp/task.c
===================================================================
--- libgomp/task.c	(revision 181718)
+++ libgomp/task.c	(working copy)
@@ -273,6 +273,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
 	      gomp_team_barrier_done (&team->barrier, state);
 	      gomp_mutex_unlock (&team->task_lock);
 	      gomp_team_barrier_wake (&team->barrier, 0);
+	      gomp_mutex_lock (&team->task_lock);
 	    }
 	}
     }
Index: libgomp/config/linux/bar.h
===================================================================
--- libgomp/config/linux/bar.h	(revision 181770)
+++ libgomp/config/linux/bar.h	(working copy)
@@ -50,7 +50,7 @@ static inline void gomp_barrier_init (go
 
 static inline void gomp_barrier_reinit (gomp_barrier_t *bar, unsigned count)
 {
-  __sync_fetch_and_add (&bar->awaited, count - bar->total);
+  __atomic_add_fetch (&bar->awaited, count - bar->total, MEMMODEL_ACQ_REL);
   bar->total = count;
 }
 
@@ -69,10 +69,8 @@ extern void gomp_team_barrier_wake (gomp
 static inline gomp_barrier_state_t
 gomp_barrier_wait_start (gomp_barrier_t *bar)
 {
-  unsigned int ret = bar->generation & ~3;
-  /* Do we need any barrier here or is __sync_add_and_fetch acting
-     as the needed LoadLoad barrier already?  */
-  ret += __sync_add_and_fetch (&bar->awaited, -1) == 0;
+  unsigned int ret = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE) & ~3;
+  ret += __atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0;
   return ret;
 }
 
Index: libgomp/config/linux/bar.c
===================================================================
--- libgomp/config/linux/bar.c	(revision 181770)
+++ libgomp/config/linux/bar.c	(working copy)
@@ -37,17 +37,15 @@ gomp_barrier_wait_end (gomp_barrier_t *b
     {
       /* Next time we'll be awaiting TOTAL threads again.  */
       bar->awaited = bar->total;
-      atomic_write_barrier ();
-      bar->generation += 4;
+      __atomic_store_4 (&bar->generation, bar->generation + 4,
+			MEMMODEL_RELEASE);
       futex_wake ((int *) &bar->generation, INT_MAX);
     }
   else
     {
-      unsigned int generation = state;
-
       do
-	do_wait ((int *) &bar->generation, generation);
-      while (bar->generation == generation);
+	do_wait ((int *) &bar->generation, state);
+      while (__atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE) == state);
     }
 }
 
@@ -81,15 +79,15 @@ gomp_team_barrier_wake (gomp_barrier_t *
 void
 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
 {
-  unsigned int generation;
+  unsigned int generation, gen;
 
   if (__builtin_expect ((state & 1) != 0, 0))
     {
       /* Next time we'll be awaiting TOTAL threads again.  */
       struct gomp_thread *thr = gomp_thread ();
       struct gomp_team *team = thr->ts.team;
+
       bar->awaited = bar->total;
-      atomic_write_barrier ();
       if (__builtin_expect (team->task_count, 0))
 	{
 	  gomp_barrier_handle_tasks (state);
@@ -97,7 +95,7 @@ gomp_team_barrier_wait_end (gomp_barrier
 	}
       else
 	{
-	  bar->generation = state + 3;
+	  __atomic_store_4 (&bar->generation, state + 3, MEMMODEL_RELEASE);
 	  futex_wake ((int *) &bar->generation, INT_MAX);
 	  return;
 	}
@@ -107,12 +105,16 @@ gomp_team_barrier_wait_end (gomp_barrier
   do
     {
       do_wait ((int *) &bar->generation, generation);
-      if (__builtin_expect (bar->generation & 1, 0))
-	gomp_barrier_handle_tasks (state);
-      if ((bar->generation & 2))
+      gen = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (gen & 1, 0))
+	{
+	  gomp_barrier_handle_tasks (state);
+	  gen = __atomic_load_4 (&bar->generation, MEMMODEL_ACQUIRE);
+	}
+      if ((gen & 2) != 0)
 	generation |= 2;
     }
-  while (bar->generation != state + 4);
+  while (gen != state + 4);
 }
 
 void
