diff mbox

[08/12] qemu-thread: report RCU quiescent states

Message ID 1368632937-32272-9-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini May 15, 2013, 3:48 p.m. UTC
Most threads will use mutexes and other sleeping synchronization primitives
(condition variables, semaphores, events) periodically.  For these threads,
the synchronization primitives are natural places to report a quiescent
state (possibly an extended one).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 docs/rcu.txt             | 28 ++++++++++++++++++++++++++++
 util/qemu-thread-posix.c | 30 ++++++++++++++++++++++++++----
 util/qemu-thread-win32.c | 16 +++++++++++++++-
 util/rcu.c               |  3 ---
 4 files changed, 69 insertions(+), 8 deletions(-)

Comments

pingfan liu May 16, 2013, 8:33 a.m. UTC | #1
On Wed, May 15, 2013 at 11:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Most threads will use mutexes and other sleeping synchronization primitives
> (condition variables, semaphores, events) periodically.  For these threads,
> the synchronization primitives are natural places to report a quiescent
> state (possibly an extended one).
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  docs/rcu.txt             | 28 ++++++++++++++++++++++++++++
>  util/qemu-thread-posix.c | 30 ++++++++++++++++++++++++++----
>  util/qemu-thread-win32.c | 16 +++++++++++++++-
>  util/rcu.c               |  3 ---
>  4 files changed, 69 insertions(+), 8 deletions(-)
>
> diff --git a/docs/rcu.txt b/docs/rcu.txt
> index 4e7cde3..d249ebf 100644
> --- a/docs/rcu.txt
> +++ b/docs/rcu.txt
> @@ -168,6 +168,34 @@ of "quiescent states", i.e. points where no RCU read-side critical
>  section can be active.  All threads created with qemu_thread_create
>  participate in the RCU mechanism and need to annotate such points.
>
> +Luckily, in most cases no manual annotation is needed, because waiting
> +on condition variables (qemu_cond_wait), semaphores (qemu_sem_wait,
> +qemu_sem_timedwait) or events (qemu_event_wait) implicitly marks the thread
> +as quiescent for the whole duration of the wait.  (There is an exception
> +for semaphore waits with a zero timeout).
> +
Why not the same rule for zero timeout?

> +Manual annotation is still needed in the following cases:
> +
> +- threads that spend their sleeping time in the kernel, for example
> +  in a call to select(), poll() or WaitForMultipleObjects().  The QEMU
> +  I/O thread is an example of this case.
> +
> +- threads that perform a lot of I/O.  In QEMU, the workers used for
> +  aio=thread are an example of this case (see aio_worker in block/raw-*).
> +
> +- threads that run continuously until they exit.  The migration thread
> +  is an example of this case.
> +
> +Regarding the second case, note that the workers run in the QEMU thread
> +pool.  The thread pool uses semaphores for synchronization, hence it does
> +report quiescent states periodically.  However, in some cases (e.g. NFS
> +mounted with the "hard" option) the workers can take an arbitrarily long
> +amount of time.  When this happens, synchronize_rcu() will not exit and
> +call_rcu() callbacks will be delayed arbitrarily.  It is therefore a
> +good idea to mark I/O system calls as quiescence points in the worker
> +functions.
> +
> +
>  Marking quiescent states is done with the following three APIs:
>
>       void rcu_quiescent_state(void);
> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
> index 2df3382..f1f325a 100644
> --- a/util/qemu-thread-posix.c
> +++ b/util/qemu-thread-posix.c
> @@ -119,7 +119,9 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
>  {
>      int err;
>
> +    rcu_thread_offline();
>      err = pthread_cond_wait(&cond->cond, &mutex->lock);
> +    rcu_thread_online();
>      if (err)
>          error_exit(err, __func__);
>  }
> @@ -212,6 +214,10 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
>      int rc;
>      struct timespec ts;
>
> +    if (ms) {
> +        rcu_thread_offline();
> +    }
> +
>  #if defined(__APPLE__) || defined(__NetBSD__)
>      compute_abs_deadline(&ts, ms);
>      pthread_mutex_lock(&sem->lock);
> @@ -227,7 +233,10 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
>          }
>      }
>      pthread_mutex_unlock(&sem->lock);
> -    return (rc == ETIMEDOUT ? -1 : 0);
> +    if (rc == ETIMEDOUT) {
> +        rc == -1;
> +    }
> +
>  #else
>      if (ms <= 0) {
>          /* This is cheaper than sem_timedwait.  */
> @@ -235,7 +244,7 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
>              rc = sem_trywait(&sem->sem);
>          } while (rc == -1 && errno == EINTR);
>          if (rc == -1 && errno == EAGAIN) {
> -            return -1;
> +            goto out;
>          }
>      } else {
>          compute_abs_deadline(&ts, ms);
> @@ -243,18 +252,25 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
>              rc = sem_timedwait(&sem->sem, &ts);
>          } while (rc == -1 && errno == EINTR);
>          if (rc == -1 && errno == ETIMEDOUT) {
> -            return -1;
> +            goto out;
>          }
>      }
>      if (rc < 0) {
>          error_exit(errno, __func__);
>      }
> -    return 0;
>  #endif
> +
> +out:
> +    if (ms) {
> +        rcu_thread_offline();

s/offline/online/

Regards,
Pingfan
> +    }
> +    return rc;
>  }
>
>  void qemu_sem_wait(QemuSemaphore *sem)
>  {
> +    rcu_thread_offline();
> +
>  #if defined(__APPLE__) || defined(__NetBSD__)
>      pthread_mutex_lock(&sem->lock);
>      --sem->count;
> @@ -272,6 +288,8 @@ void qemu_sem_wait(QemuSemaphore *sem)
>          error_exit(errno, __func__);
>      }
>  #endif
> +
> +    rcu_thread_online();
>  }
>
>  #ifdef __linux__
> @@ -380,7 +398,11 @@ void qemu_event_wait(QemuEvent *ev)
>                  return;
>              }
>          }
> +        rcu_thread_offline();
>          futex_wait(ev, EV_BUSY);
> +        rcu_thread_online();
> +    } else {
> +        rcu_quiescent_state();
>      }
>  }
>
> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
> index 0c4850d..6fff1a4 100644
> --- a/util/qemu-thread-win32.c
> +++ b/util/qemu-thread-win32.c
> @@ -12,6 +12,7 @@
>   */
>  #include "qemu-common.h"
>  #include "qemu/thread.h"
> +#include "qemu/rcu.h"
>  #include <process.h>
>  #include <assert.h>
>  #include <limits.h>
> @@ -170,7 +171,9 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
>       * leaving mutex unlocked before we wait on semaphore.
>       */
>      qemu_mutex_unlock(mutex);
> +    rcu_thread_offline();
>      WaitForSingleObject(cond->sema, INFINITE);
> +    rcu_thread_online();
>
>      /* Now waiters must rendez-vous with the signaling thread and
>       * let it continue.  For cond_broadcast this has heavy contention
> @@ -210,7 +213,16 @@ void qemu_sem_post(QemuSemaphore *sem)
>
>  int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
>  {
> -    int rc = WaitForSingleObject(sem->sema, ms);
> +    int rc;
> +
> +    if (ms) {
> +        rcu_thread_offline();
> +    }
> +    rc = WaitForSingleObject(sem->sema, ms);
> +    if (ms) {
> +        rcu_thread_offline();
> +    }
> +
>      if (rc == WAIT_OBJECT_0) {
>          return 0;
>      }
> @@ -250,7 +262,9 @@ void qemu_event_reset(QemuEvent *ev)
>
>  void qemu_event_wait(QemuEvent *ev)
>  {
> +    rcu_thread_offline();
>      WaitForSingleObject(ev->event, INFINITE);
> +    rcu_thread_online();
>  }
>
>  struct QemuThreadData {
> diff --git a/util/rcu.c b/util/rcu.c
> index 27fda86..91d6ae2 100644
> --- a/util/rcu.c
> +++ b/util/rcu.c
> @@ -240,9 +240,6 @@ static void *call_rcu_thread(void *opaque)
>  {
>      struct rcu_head *node;
>
> -    /* This thread is just a writer.  */
> -    rcu_thread_offline();
> -
>      for (;;) {
>          int tries = 0;
>          int n = atomic_read(&rcu_call_count);
> --
> 1.8.1.4
>
>
>
Paolo Bonzini May 16, 2013, 8:43 a.m. UTC | #2
Il 16/05/2013 10:33, liu ping fan ha scritto:
>> > +Luckily, in most cases no manual annotation is needed, because waiting
>> > +on condition variables (qemu_cond_wait), semaphores (qemu_sem_wait,
>> > +qemu_sem_timedwait) or events (qemu_event_wait) implicitly marks the thread
>> > +as quiescent for the whole duration of the wait.  (There is an exception
>> > +for semaphore waits with a zero timeout).
>> > +
> Why not the same rule for zero timeout?
> 

Because they are not really doing synchronization, they are basically an
"atomic_dec_if_not_zero" on the semaphore count.

Paolo
diff mbox

Patch

diff --git a/docs/rcu.txt b/docs/rcu.txt
index 4e7cde3..d249ebf 100644
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -168,6 +168,34 @@  of "quiescent states", i.e. points where no RCU read-side critical
 section can be active.  All threads created with qemu_thread_create
 participate in the RCU mechanism and need to annotate such points.
 
+Luckily, in most cases no manual annotation is needed, because waiting
+on condition variables (qemu_cond_wait), semaphores (qemu_sem_wait,
+qemu_sem_timedwait) or events (qemu_event_wait) implicitly marks the thread
+as quiescent for the whole duration of the wait.  (There is an exception
+for semaphore waits with a zero timeout).
+
+Manual annotation is still needed in the following cases:
+
+- threads that spend their sleeping time in the kernel, for example
+  in a call to select(), poll() or WaitForMultipleObjects().  The QEMU
+  I/O thread is an example of this case.
+
+- threads that perform a lot of I/O.  In QEMU, the workers used for
+  aio=thread are an example of this case (see aio_worker in block/raw-*).
+
+- threads that run continuously until they exit.  The migration thread
+  is an example of this case.
+
+Regarding the second case, note that the workers run in the QEMU thread
+pool.  The thread pool uses semaphores for synchronization, hence it does
+report quiescent states periodically.  However, in some cases (e.g. NFS
+mounted with the "hard" option) the workers can take an arbitrarily long
+amount of time.  When this happens, synchronize_rcu() will not exit and
+call_rcu() callbacks will be delayed arbitrarily.  It is therefore a
+good idea to mark I/O system calls as quiescence points in the worker
+functions.
+
+
 Marking quiescent states is done with the following three APIs:
 
      void rcu_quiescent_state(void);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 2df3382..f1f325a 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -119,7 +119,9 @@  void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
 {
     int err;
 
+    rcu_thread_offline();
     err = pthread_cond_wait(&cond->cond, &mutex->lock);
+    rcu_thread_online();
     if (err)
         error_exit(err, __func__);
 }
@@ -212,6 +214,10 @@  int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
     int rc;
     struct timespec ts;
 
+    if (ms) {
+        rcu_thread_offline();
+    }
+
 #if defined(__APPLE__) || defined(__NetBSD__)
     compute_abs_deadline(&ts, ms);
     pthread_mutex_lock(&sem->lock);
@@ -227,7 +233,10 @@  int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
         }
     }
     pthread_mutex_unlock(&sem->lock);
-    return (rc == ETIMEDOUT ? -1 : 0);
+    if (rc == ETIMEDOUT) {
+        rc == -1;
+    }
+
 #else
     if (ms <= 0) {
         /* This is cheaper than sem_timedwait.  */
@@ -235,7 +244,7 @@  int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
             rc = sem_trywait(&sem->sem);
         } while (rc == -1 && errno == EINTR);
         if (rc == -1 && errno == EAGAIN) {
-            return -1;
+            goto out;
         }
     } else {
         compute_abs_deadline(&ts, ms);
@@ -243,18 +252,25 @@  int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
             rc = sem_timedwait(&sem->sem, &ts);
         } while (rc == -1 && errno == EINTR);
         if (rc == -1 && errno == ETIMEDOUT) {
-            return -1;
+            goto out;
         }
     }
     if (rc < 0) {
         error_exit(errno, __func__);
     }
-    return 0;
 #endif
+
+out:
+    if (ms) {
+        rcu_thread_offline();
+    }
+    return rc;
 }
 
 void qemu_sem_wait(QemuSemaphore *sem)
 {
+    rcu_thread_offline();
+
 #if defined(__APPLE__) || defined(__NetBSD__)
     pthread_mutex_lock(&sem->lock);
     --sem->count;
@@ -272,6 +288,8 @@  void qemu_sem_wait(QemuSemaphore *sem)
         error_exit(errno, __func__);
     }
 #endif
+
+    rcu_thread_online();
 }
 
 #ifdef __linux__
@@ -380,7 +398,11 @@  void qemu_event_wait(QemuEvent *ev)
                 return;
             }
         }
+        rcu_thread_offline();
         futex_wait(ev, EV_BUSY);
+        rcu_thread_online();
+    } else {
+        rcu_quiescent_state();
     }
 }
 
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 0c4850d..6fff1a4 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -12,6 +12,7 @@ 
  */
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qemu/rcu.h"
 #include <process.h>
 #include <assert.h>
 #include <limits.h>
@@ -170,7 +171,9 @@  void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
      * leaving mutex unlocked before we wait on semaphore.
      */
     qemu_mutex_unlock(mutex);
+    rcu_thread_offline();
     WaitForSingleObject(cond->sema, INFINITE);
+    rcu_thread_online();
 
     /* Now waiters must rendez-vous with the signaling thread and
      * let it continue.  For cond_broadcast this has heavy contention
@@ -210,7 +213,16 @@  void qemu_sem_post(QemuSemaphore *sem)
 
 int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
 {
-    int rc = WaitForSingleObject(sem->sema, ms);
+    int rc;
+
+    if (ms) {
+        rcu_thread_offline();
+    }
+    rc = WaitForSingleObject(sem->sema, ms);
+    if (ms) {
+        rcu_thread_offline();
+    }
+
     if (rc == WAIT_OBJECT_0) {
         return 0;
     }
@@ -250,7 +262,9 @@  void qemu_event_reset(QemuEvent *ev)
 
 void qemu_event_wait(QemuEvent *ev)
 {
+    rcu_thread_offline();
     WaitForSingleObject(ev->event, INFINITE);
+    rcu_thread_online();
 }
 
 struct QemuThreadData {
diff --git a/util/rcu.c b/util/rcu.c
index 27fda86..91d6ae2 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -240,9 +240,6 @@  static void *call_rcu_thread(void *opaque)
 {
     struct rcu_head *node;
 
-    /* This thread is just a writer.  */
-    rcu_thread_offline();
-
     for (;;) {
         int tries = 0;
         int n = atomic_read(&rcu_call_count);