diff mbox series

[v2,1/2] rcu: Introduce force_rcu notifier

Message ID 20211019055632.252879-2-groug@kaod.org
State New
Headers show
Series accel/tcg: Fix monitor deadlock | expand

Commit Message

Greg Kurz Oct. 19, 2021, 5:56 a.m. UTC
The drain_rcu_call() function can be blocked as long as an RCU reader
stays in a read-side critical section. This is typically what happens
when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .

This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
period. Since each reader might need to do specific actions to end a
read-side critical section, do it with notifiers.

Prepare ground for this by adding a notifier list to the RCU reader
struct and use it in wait_for_readers() if drain_rcu_call() is in
progress. An API is added for readers to register their notifiers.

This is largely based on a draft from Paolo Bonzini.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
---
 include/qemu/rcu.h | 16 ++++++++++++++++
 util/rcu.c         | 22 +++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini Oct. 19, 2021, 11:26 a.m. UTC | #1
On 19/10/21 07:56, Greg Kurz wrote:
> The drain_rcu_call() function can be blocked as long as an RCU reader
> stays in a read-side critical section. This is typically what happens
> when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
> monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .
> 
> This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
> period. Since each reader might need to do specific actions to end a
> read-side critical section, do it with notifiers.
> 
> Prepare ground for this by adding a notifier list to the RCU reader
> struct and use it in wait_for_readers() if drain_rcu_call() is in
> progress. An API is added for readers to register their notifiers.
> 
> This is largely based on a draft from Paolo Bonzini.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>   include/qemu/rcu.h | 16 ++++++++++++++++
>   util/rcu.c         | 22 +++++++++++++++++++++-
>   2 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> index 515d327cf11c..d8c4fd8686b4 100644
> --- a/include/qemu/rcu.h
> +++ b/include/qemu/rcu.h
> @@ -27,6 +27,7 @@
>   #include "qemu/thread.h"
>   #include "qemu/queue.h"
>   #include "qemu/atomic.h"
> +#include "qemu/notify.h"
>   #include "qemu/sys_membarrier.h"
>   
>   #ifdef __cplusplus
> @@ -66,6 +67,14 @@ struct rcu_reader_data {
>   
>       /* Data used for registry, protected by rcu_registry_lock */
>       QLIST_ENTRY(rcu_reader_data) node;
> +
> +    /*
> +     * NotifierList used to force an RCU grace period.  Accessed under
> +     * rcu_registry_lock.  Note that the notifier is called _outside_
> +     * the thread!
> +     */
> +    NotifierList force_rcu;
> +    void *force_rcu_data;

This is a bit ugly because the force_rcu_data is shared across all 
notifiers.  Sure right now we have only one, but still the data argument 
should be in rcu_register_thread rather than rcu_add_force_rcu_notifier.

It's a pity because I liked the Notifier local variable...  But after 
thinking about it more and deleting some suggestions that won't work, 
it's just easiest to have the notifier in CPUState.

Maybe even move the unregistration to the existing function 
tcg_cpus_destroy, and add tcg_cpus_init that calls tcg_register_thread() 
and rcu_add_force_rcu_notifier().  This way you don't have to export 
tcg_cpus_force_rcu, and the tcg-accel-ops.h APIs are a bit more tidy.

Paolo

> +void rcu_add_force_rcu_notifier(Notifier *n, void *data)
> +{
> +    qemu_mutex_lock(&rcu_registry_lock);
> +    notifier_list_add(&rcu_reader.force_rcu, n);
> +    rcu_reader.force_rcu_data = data;
> +    qemu_mutex_unlock(&rcu_registry_lock);
> +}
> +
> +void rcu_remove_force_rcu_notifier(Notifier *n)
> +{
> +    qemu_mutex_lock(&rcu_registry_lock);
> +    rcu_reader.force_rcu_data = NULL;
> +    notifier_remove(n);
> +    qemu_mutex_unlock(&rcu_registry_lock);
> +}
> +
>   static void rcu_init_complete(void)
>   {
>       QemuThread thread;
>
Greg Kurz Oct. 20, 2021, 7:59 a.m. UTC | #2
On Tue, 19 Oct 2021 13:26:25 +0200
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 19/10/21 07:56, Greg Kurz wrote:
> > The drain_rcu_call() function can be blocked as long as an RCU reader
> > stays in a read-side critical section. This is typically what happens
> > when a TCG vCPU is executing a busy loop. It can deadlock the QEMU
> > monitor as reported in https://gitlab.com/qemu-project/qemu/-/issues/650 .
> > 
> > This can be avoided by allowing drain_rcu_call() to enforce an RCU grace
> > period. Since each reader might need to do specific actions to end a
> > read-side critical section, do it with notifiers.
> > 
> > Prepare ground for this by adding a notifier list to the RCU reader
> > struct and use it in wait_for_readers() if drain_rcu_call() is in
> > progress. An API is added for readers to register their notifiers.
> > 
> > This is largely based on a draft from Paolo Bonzini.
> > 
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >   include/qemu/rcu.h | 16 ++++++++++++++++
> >   util/rcu.c         | 22 +++++++++++++++++++++-
> >   2 files changed, 37 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> > index 515d327cf11c..d8c4fd8686b4 100644
> > --- a/include/qemu/rcu.h
> > +++ b/include/qemu/rcu.h
> > @@ -27,6 +27,7 @@
> >   #include "qemu/thread.h"
> >   #include "qemu/queue.h"
> >   #include "qemu/atomic.h"
> > +#include "qemu/notify.h"
> >   #include "qemu/sys_membarrier.h"
> >   
> >   #ifdef __cplusplus
> > @@ -66,6 +67,14 @@ struct rcu_reader_data {
> >   
> >       /* Data used for registry, protected by rcu_registry_lock */
> >       QLIST_ENTRY(rcu_reader_data) node;
> > +
> > +    /*
> > +     * NotifierList used to force an RCU grace period.  Accessed under
> > +     * rcu_registry_lock.  Note that the notifier is called _outside_
> > +     * the thread!
> > +     */
> > +    NotifierList force_rcu;
> > +    void *force_rcu_data;
> 
> This is a bit ugly because the force_rcu_data is shared across all 
> notifiers.  Sure right now we have only one, but still the data argument 
> should be in rcu_register_thread rather than rcu_add_force_rcu_notifier.
> 

I don't quite see why we'd need more than one notifier, but indeed
this isn't conceptually correct.

> It's a pity because I liked the Notifier local variable...  But after 
> thinking about it more and deleting some suggestions that won't work, 
> it's just easiest to have the notifier in CPUState.
> 

Agreed.

> Maybe even move the unregistration to the existing function 
> tcg_cpus_destroy, and add tcg_cpus_init that calls tcg_register_thread() 
> and rcu_add_force_rcu_notifier().  This way you don't have to export 
> tcg_cpus_force_rcu, and the tcg-accel-ops.h APIs are a bit more tidy.
> 

I don't think we can do that because of round-robin : we only have one
thread in this case but tcg_cpus_destroy() must still be called for all
vCPUs. Also, a single notifier will work just fine no matter which
vCPU is running when wait_for_readers() is called if I understand
correctly how round-robin works.

> Paolo
> 
> > +void rcu_add_force_rcu_notifier(Notifier *n, void *data)
> > +{
> > +    qemu_mutex_lock(&rcu_registry_lock);
> > +    notifier_list_add(&rcu_reader.force_rcu, n);
> > +    rcu_reader.force_rcu_data = data;
> > +    qemu_mutex_unlock(&rcu_registry_lock);
> > +}
> > +
> > +void rcu_remove_force_rcu_notifier(Notifier *n)
> > +{
> > +    qemu_mutex_lock(&rcu_registry_lock);
> > +    rcu_reader.force_rcu_data = NULL;
> > +    notifier_remove(n);
> > +    qemu_mutex_unlock(&rcu_registry_lock);
> > +}
> > +
> >   static void rcu_init_complete(void)
> >   {
> >       QemuThread thread;
> > 
>
diff mbox series

Patch

diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 515d327cf11c..d8c4fd8686b4 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -27,6 +27,7 @@ 
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
 
 #ifdef __cplusplus
@@ -66,6 +67,14 @@  struct rcu_reader_data {
 
     /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
+
+    /*
+     * NotifierList used to force an RCU grace period.  Accessed under
+     * rcu_registry_lock.  Note that the notifier is called _outside_
+     * the thread!
+     */
+    NotifierList force_rcu;
+    void *force_rcu_data;
 };
 
 extern __thread struct rcu_reader_data rcu_reader;
@@ -180,6 +189,13 @@  G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
 
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n, void *data);
+void rcu_remove_force_rcu_notifier(Notifier *n);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/util/rcu.c b/util/rcu.c
index 13ac0f75cb2a..0c68f068e23d 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -46,6 +46,7 @@ 
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 
@@ -107,6 +108,8 @@  static void wait_for_readers(void)
                  * get some extra futex wakeups.
                  */
                 qatomic_set(&index->waiting, false);
+            } else if (qatomic_read(&in_drain_call_rcu)) {
+                notifier_list_notify(&index->force_rcu, index->force_rcu_data);
             }
         }
 
@@ -293,7 +296,6 @@  void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
     qemu_event_set(&rcu_call_ready_event);
 }
 
-
 struct rcu_drain {
     struct rcu_head rcu;
     QemuEvent drain_complete_event;
@@ -339,8 +341,10 @@  void drain_call_rcu(void)
      * assumed.
      */
 
+    qatomic_inc(&in_drain_call_rcu);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     qemu_event_wait(&rcu_drain.drain_complete_event);
+    qatomic_dec(&in_drain_call_rcu);
 
     if (locked) {
         qemu_mutex_lock_iothread();
@@ -363,6 +367,22 @@  void rcu_unregister_thread(void)
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
+void rcu_add_force_rcu_notifier(Notifier *n, void *data)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_list_add(&rcu_reader.force_rcu, n);
+    rcu_reader.force_rcu_data = data;
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    rcu_reader.force_rcu_data = NULL;
+    notifier_remove(n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
 static void rcu_init_complete(void)
 {
     QemuThread thread;