rwlock: Fix explicit hand-over.
diff mbox

Message ID 1490471341.26906.366.camel@redhat.com
State New
Headers show

Commit Message

Torvald Riegel March 25, 2017, 7:49 p.m. UTC
This patch fixes a bug in the new rwlock's detection of when explicit
hand-over has to happen (see the description of the rwlock for what this
is).
Without this patch, the rwlock can fail to execute the explicit
hand-over in certain cases (e.g., empty critical sections that switch
quickly between read and write phases).  This can then lead to errors in
how __wrphase_futex is accessed, which in turn can lead to deadlocks.

This bug was reported by Waiman Long.
(The testcase in the patch is a reduced form of your benchmark.  It
would be nice if you could test this too with your benchmark/machine).

Tested on x86_64.  The new test triggers the bug with the unfixed
rwlock, but not always.  We could make the duration of the test longer
to increase the likelihood of the test triggering the bug, but this
would increase the testsuite runtime, so I'm not sure whether this is
worth it.

	[BZ #21298]
	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full): Fix
	explicit hand-over.
	(__pthread_rwlock_wrlock_full): Likewise.
	* nptl/tst-rwlock20.c: New file.
	* nptl/Makefile: Add new test.

Comments

Florian Weimer March 25, 2017, 8:17 p.m. UTC | #1
* Torvald Riegel:

> +  bool registered_while_in_write_phase = false;
>    if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
>      return 0;
> +  else
> +    registered_while_in_write_phase = true;

Sorry, this doesn't look quite right.  Isn't
registered_while_in_write_phase always true?
Torvald Riegel March 25, 2017, 8:36 p.m. UTC | #2
On Sat, 2017-03-25 at 21:17 +0100, Florian Weimer wrote:
> * Torvald Riegel:
> 
> > +  bool registered_while_in_write_phase = false;
> >    if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
> >      return 0;
> > +  else
> > +    registered_while_in_write_phase = true;
> 
> Sorry, this doesn't look quite right.  Isn't
> registered_while_in_write_phase always true?

It's intended and I think it's correct, but I agree it's not pretty :)

I'll fix this by cleaning up the code a little more in v2 of this patch.

Patch
diff mbox

commit da1a4b2807e489f199aa966050fe0b7a7a82a856
Author: Torvald Riegel <triegel@redhat.com>
Date:   Sat Mar 25 00:36:46 2017 +0100

    rwlock: Fix explicit hand-over.
    
    Without this patch, the rwlock can fail to execute the explicit hand-over
    in certain cases (e.g., empty critical sections that switch quickly between
    read and write phases).  This can then lead to errors in how __wrphase_futex
    is accessed, which in turn can lead to deadlocks.
    
    	[BZ #21298]
    	* nptl/pthread_rwlock_common.c (__pthread_rwlock_rdlock_full): Fix
    	explicit hand-over.
    	(__pthread_rwlock_wrlock_full): Likewise.
    	* nptl/tst-rwlock20.c: New file.
    	* nptl/Makefile: Add new test.

diff --git a/nptl/Makefile b/nptl/Makefile
index 6d48c0c..0cc2873 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -241,7 +241,7 @@  tests = tst-typesizes \
 	tst-rwlock4 tst-rwlock5 tst-rwlock6 tst-rwlock7 tst-rwlock8 \
 	tst-rwlock9 tst-rwlock10 tst-rwlock11 tst-rwlock12 tst-rwlock13 \
 	tst-rwlock14 tst-rwlock15 tst-rwlock16 tst-rwlock17 tst-rwlock18 \
-	tst-rwlock19 \
+	tst-rwlock19 tst-rwlock20 \
 	tst-once1 tst-once2 tst-once3 tst-once4 tst-once5 \
 	tst-key1 tst-key2 tst-key3 tst-key4 \
 	tst-sem1 tst-sem2 tst-sem3 tst-sem4 tst-sem5 tst-sem6 tst-sem7 \
diff --git a/nptl/pthread_rwlock_common.c b/nptl/pthread_rwlock_common.c
index 256508c..86516e5 100644
--- a/nptl/pthread_rwlock_common.c
+++ b/nptl/pthread_rwlock_common.c
@@ -372,9 +372,17 @@  __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,
      cannot have acquired the lock previously as a reader (which could result
      in deadlock if we would wait for the primary writer to run).  However,
      this seems to be a corner case and handling it specially not be worth the
-     complexity.  */
+     complexity.
+     Otherwise, if we were in a write phase, we save this fact so that it is
+     not lost when we overwrite r when trying to install a read phase (see
+     below; those attempts could observe a read phase that has been started
+     by a writer in the meantime, which in turn could result in incorrectly
+     skipping explicit hand-over).  */
+  bool registered_while_in_write_phase = false;
   if (__glibc_likely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
     return 0;
+  else
+    registered_while_in_write_phase = true;
 
   /* If there is no primary writer but we are in a write phase, we can try
      to install a read phase ourself.  */
@@ -390,15 +398,17 @@  __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,
 	{
 	  /* We started the read phase, so we are also responsible for
 	     updating the write-phase futex.  Relaxed MO is sufficient.
-	     Note that there can be no other reader that we have to wake
-	     because all other readers will see the read phase started by us
-	     (or they will try to start it themselves); if a writer started
-	     the read phase, we cannot have started it.  Furthermore, we
-	     cannot discard a PTHREAD_RWLOCK_FUTEX_USED flag because we will
-	     overwrite the value set by the most recent writer (or the readers
-	     before it in case of explicit hand-over) and we know that there
-	     are no waiting readers.  */
-	  atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 0);
+	     We have to do the same steps as a writer would when handing
+	     over the read phase to us because other readers cannot
+	     distinguish between us and the writer.  Therefore, we must
+	     take the same steps as a writer would, which includes
+	     potentially having to wake other readers.  */
+	  if ((atomic_exchange_relaxed (&rwlock->__data.__wrphase_futex, 0)
+	      & PTHREAD_RWLOCK_FUTEX_USED) != 0)
+	    {
+	      int private = __pthread_rwlock_get_private (rwlock);
+	      futex_wake (&rwlock->__data.__wrphase_futex, INT_MAX, private);
+	    }
 	  return 0;
 	}
       else
@@ -407,11 +417,11 @@  __pthread_rwlock_rdlock_full (pthread_rwlock_t *rwlock,
 	}
     }
 
-  if ((r & PTHREAD_RWLOCK_WRPHASE) != 0)
+  if (registered_while_in_write_phase)
     {
-      /* We are in a write phase, and there must be a primary writer because
-	 of the previous loop.  Block until the primary writer gives up the
-	 write phase.  This case requires explicit hand-over using
+      /* We are or were in a write phase, and there must be a primary writer
+	 because of the previous loop.  Block until the primary writer gives
+	 up the write phase.  This case requires explicit hand-over using
 	 __wrphase_futex.
 	 However, __wrphase_futex might not have been set to 1 yet (either
 	 because explicit hand-over to the writer is still ongoing, or because
@@ -741,10 +751,17 @@  __pthread_rwlock_wrlock_full (pthread_rwlock_t *rwlock,
 	  r = atomic_load_relaxed (&rwlock->__data.__readers);
 	}
       /* Our snapshot of __readers is up-to-date at this point because we
-	 either set WRLOCKED using a CAS or were handed over WRLOCKED from
+	 either set WRLOCKED using a CAS (and update r accordingly below,
+	 which was used as expected value for the CAS) or got WRLOCKED from
 	 another writer whose snapshot of __readers we inherit.  */
+      r |= PTHREAD_RWLOCK_WRLOCKED;
     }
 
+  /* Save whether we got wrlocked while being in a read phase so that this
+     fact isn't lost when we reuse r when trying to install a write phase
+     below.  Also see __pthread_rwlock_rdlock_full.  */
+  bool got_wrlocked_while_in_read_phase = (r & PTHREAD_RWLOCK_WRPHASE) == 0;
+
   /* If we are in a read phase and there are no readers, try to start a write
      phase.  */
   while (((r & PTHREAD_RWLOCK_WRPHASE) == 0)
@@ -758,10 +775,13 @@  __pthread_rwlock_wrlock_full (pthread_rwlock_t *rwlock,
 	  &r, r | PTHREAD_RWLOCK_WRPHASE))
 	{
 	  /* We have started a write phase, so need to enable readers to wait.
-	     See the similar case in__pthread_rwlock_rdlock_full.  */
+	     See the similar case in __pthread_rwlock_rdlock_full.  Unlike in
+	     that similar case, we are the (only) primary writer and so do
+	     not need to wake another writer.  */
 	  atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 1);
+
 	  /* Make sure we fall through to the end of the function.  */
-	  r |= PTHREAD_RWLOCK_WRPHASE;
+	  got_wrlocked_while_in_read_phase = false;
 	  break;
 	}
       /* TODO Back-off.  */
@@ -774,7 +794,7 @@  __pthread_rwlock_wrlock_full (pthread_rwlock_t *rwlock,
   atomic_store_relaxed (&rwlock->__data.__writers_futex,
       1 | (may_share_futex_used_flag ? PTHREAD_RWLOCK_FUTEX_USED : 0));
 
-  if (__glibc_unlikely ((r & PTHREAD_RWLOCK_WRPHASE) == 0))
+  if (__glibc_unlikely (got_wrlocked_while_in_read_phase))
     {
       /* We are not in a read phase and there are readers (because of the
 	 previous loop).  Thus, we have to wait for explicit hand-over from
diff --git a/nptl/tst-rwlock20.c b/nptl/tst-rwlock20.c
new file mode 100644
index 0000000..fc636e3
--- /dev/null
+++ b/nptl/tst-rwlock20.c
@@ -0,0 +1,128 @@ 
+/* Test program for a read-phase / write-phase explicit hand-over.
+   Copyright (C) 2000-2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <error.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <time.h>
+#include <atomic.h>
+
+#define THREADS 2
+
+#define KIND PTHREAD_RWLOCK_PREFER_READER_NP
+
+static pthread_rwlock_t lock;
+static int done = 0;
+
+static void*
+tf (void* arg)
+{
+  while (atomic_load_relaxed (&done) == 0)
+    {
+      int rcnt = 0;
+      int wcnt = 100;
+      if ((uintptr_t) arg == 0)
+	{
+	  rcnt = 1;
+	  wcnt = 1;
+	}
+
+      do
+	{
+	  if (wcnt)
+	    {
+	      pthread_rwlock_wrlock (&lock);
+	      pthread_rwlock_unlock (&lock);
+	      wcnt--;
+	  }
+	  if (rcnt)
+	    {
+	      pthread_rwlock_rdlock (&lock);
+	      pthread_rwlock_unlock (&lock);
+	      rcnt--;
+	  }
+	}
+      while ((atomic_load_relaxed (&done) == 0) && (rcnt + wcnt > 0));
+
+    }
+    return NULL;
+}
+
+
+
+static int
+do_test (void)
+{
+  pthread_t thr[THREADS];
+  int n;
+  void *res;
+  pthread_rwlockattr_t a;
+
+  if (pthread_rwlockattr_init (&a) != 0)
+    {
+      puts ("rwlockattr_t failed");
+      exit (1);
+    }
+
+  if (pthread_rwlockattr_setkind_np (&a, KIND) != 0)
+    {
+      puts ("rwlockattr_setkind failed");
+      exit (1);
+    }
+
+  if (pthread_rwlock_init (&lock, &a) != 0)
+    {
+      puts ("rwlock_init failed");
+      exit (1);
+    }
+
+  /* Make standard error the same as standard output.  */
+  dup2 (1, 2);
+
+  /* Make sure we see all message, even those on stdout.  */
+  setvbuf (stdout, NULL, _IONBF, 0);
+
+  for (n = 0; n < THREADS; ++n)
+    if (pthread_create (&thr[n], NULL, tf, (void *) (uintptr_t) n) != 0)
+      {
+	puts ("thread create failed");
+	exit (1);
+      }
+  struct timespec delay;
+  delay.tv_sec = 10;
+  delay.tv_nsec = 0;
+  nanosleep (&delay, NULL);
+  atomic_store_relaxed (&done, 1);
+
+  /* Wait for all the threads.  */
+  for (n = 0; n < THREADS; ++n)
+    if (pthread_join (thr[n], &res) != 0)
+      {
+	puts ("writer join failed");
+	exit (1);
+      }
+
+  return 0;
+}
+
+#define TIMEOUT 15
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"