diff mbox series

[v0] vl: flush all task from rcu queue before exiting

Message ID 20211102133901.286027-1-den-plotnikov@yandex-team.ru
State New
Headers show
Series [v0] vl: flush all task from rcu queue before exiting | expand

Commit Message

Denis Plotnikov Nov. 2, 2021, 1:39 p.m. UTC
The device destruction may superimpose over qemu shutdown.
In this case some management layer, requested a device unplug and
waiting for DEVICE_DELETED event, may never get this event.

This happens because device_finalize() may never be called on qemu shutdown
for some devices using address_space_destroy(). The later is called from
the rcu thread.
On qemu shutdown, not all rcu callbacks may be called because the rcu thread
may not have enough time to converge before qemu main thread exit.

To resolve this issue this patch makes rcu thread to finish all its callbacks
explicitly by calling a new rcu intreface function right before
qemu main thread exit.

Signed-off-by: Denis Plotnikov <den-plotnikov@yandex-team.ru>
---
 include/qemu/rcu.h |  1 +
 softmmu/runstate.c |  3 +++
 util/rcu.c         | 12 ++++++++++++
 3 files changed, 16 insertions(+)

Comments

Denis Plotnikov Nov. 2, 2021, 2:13 p.m. UTC | #1
On 02.11.2021 16:39, Denis Plotnikov wrote:
> The device destruction may superimpose over qemu shutdown.
> In this case some management layer, requested a device unplug and
> waiting for DEVICE_DELETED event, may never get this event.
>
> This happens because device_finalize() may never be called on qemu shutdown
> for some devices using address_space_destroy(). The later is called from
> the rcu thread.
> On qemu shutdown, not all rcu callbacks may be called because the rcu thread
> may not have enough time to converge before qemu main thread exit.
>
> To resolve this issue this patch makes rcu thread to finish all its callbacks
> explicitly by calling a new rcu intreface function right before
> qemu main thread exit.
>
> Signed-off-by: Denis Plotnikov <den-plotnikov@yandex-team.ru>
> ---
>   include/qemu/rcu.h |  1 +
>   softmmu/runstate.c |  3 +++
>   util/rcu.c         | 12 ++++++++++++
>   3 files changed, 16 insertions(+)
>
> diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> index 515d327cf11c..f7fbdc3781e5 100644
> --- a/include/qemu/rcu.h
> +++ b/include/qemu/rcu.h
> @@ -134,6 +134,7 @@ struct rcu_head {
>   
>   extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
>   extern void drain_call_rcu(void);
> +extern void flush_rcu(void);
>   
>   /* The operands of the minus operator must have the same type,
>    * which must be the one that we specify in the cast.
> diff --git a/softmmu/runstate.c b/softmmu/runstate.c
> index 10d9b7365aa7..28f319a97a2b 100644
> --- a/softmmu/runstate.c
> +++ b/softmmu/runstate.c
> @@ -822,5 +822,8 @@ void qemu_cleanup(void)
actually, flush_rcu() should be here before monitor_cleanup to send 
DEVICE_DELETED
>       monitor_cleanup();
>       qemu_chr_cleanup();
>       user_creatable_cleanup();
> +
> +    /* finish all the tasks from rcu queue before exiting */
> +    flush_rcu();
>       /* TODO: unref root container, check all devices are ok */
>   }
> diff --git a/util/rcu.c b/util/rcu.c
> index 13ac0f75cb2a..f047f8ee8d16 100644
> --- a/util/rcu.c
> +++ b/util/rcu.c
> @@ -348,6 +348,18 @@ void drain_call_rcu(void)
>   
>   }
>   
> +/*
> + * This function drains rcu queue until there are no tasks to do left
> + * and aims to the cases when one needs to ensure that no work hang
> + * in rcu thread before proceeding, e.g. on qemu shutdown.
> + */
> +void flush_rcu(void)
> +{
> +    while (qatomic_read(&rcu_call_count) > 0) {
> +        drain_call_rcu();
> +    }
> +}
> +
>   void rcu_register_thread(void)
>   {
>       assert(rcu_reader.ctr == 0);
Denis Plotnikov Nov. 9, 2021, 7:23 a.m. UTC | #2
Ping ping!

On 02.11.2021 16:39, Denis Plotnikov wrote:
> The device destruction may superimpose over qemu shutdown.
> In this case some management layer, requested a device unplug and
> waiting for DEVICE_DELETED event, may never get this event.
>
> This happens because device_finalize() may never be called on qemu shutdown
> for some devices using address_space_destroy(). The later is called from
> the rcu thread.
> On qemu shutdown, not all rcu callbacks may be called because the rcu thread
> may not have enough time to converge before qemu main thread exit.
>
> To resolve this issue this patch makes rcu thread to finish all its callbacks
> explicitly by calling a new rcu intreface function right before
> qemu main thread exit.
>
> Signed-off-by: Denis Plotnikov <den-plotnikov@yandex-team.ru>
> ---
>   include/qemu/rcu.h |  1 +
>   softmmu/runstate.c |  3 +++
>   util/rcu.c         | 12 ++++++++++++
>   3 files changed, 16 insertions(+)
>
> diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
> index 515d327cf11c..f7fbdc3781e5 100644
> --- a/include/qemu/rcu.h
> +++ b/include/qemu/rcu.h
> @@ -134,6 +134,7 @@ struct rcu_head {
>   
>   extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
>   extern void drain_call_rcu(void);
> +extern void flush_rcu(void);
>   
>   /* The operands of the minus operator must have the same type,
>    * which must be the one that we specify in the cast.
> diff --git a/softmmu/runstate.c b/softmmu/runstate.c
> index 10d9b7365aa7..28f319a97a2b 100644
> --- a/softmmu/runstate.c
> +++ b/softmmu/runstate.c
> @@ -822,5 +822,8 @@ void qemu_cleanup(void)
>       monitor_cleanup();
>       qemu_chr_cleanup();
>       user_creatable_cleanup();
> +
> +    /* finish all the tasks from rcu queue before exiting */
> +    flush_rcu();
>       /* TODO: unref root container, check all devices are ok */
>   }
> diff --git a/util/rcu.c b/util/rcu.c
> index 13ac0f75cb2a..f047f8ee8d16 100644
> --- a/util/rcu.c
> +++ b/util/rcu.c
> @@ -348,6 +348,18 @@ void drain_call_rcu(void)
>   
>   }
>   
> +/*
> + * This function drains rcu queue until there are no tasks to do left
> + * and aims to the cases when one needs to ensure that no work hang
> + * in rcu thread before proceeding, e.g. on qemu shutdown.
> + */
> +void flush_rcu(void)
> +{
> +    while (qatomic_read(&rcu_call_count) > 0) {
> +        drain_call_rcu();
> +    }
> +}
> +
>   void rcu_register_thread(void)
>   {
>       assert(rcu_reader.ctr == 0);
Paolo Bonzini Nov. 9, 2021, 5:46 p.m. UTC | #3
On 11/9/21 08:23, Denis Plotnikov wrote:
> Ping ping!

Looks good, but can you explain why it's okay to call it before 
qemu_chr_cleanup() and user_creatable_cleanup()?

I think a better solution to the ordering problem would be:

   qemu_chr_cleanup();
   user_creatable_cleanup();
   flush_rcu();
   monitor_cleanup();

with something like this:

diff --git a/chardev/char-fe.c b/chardev/char-fe.c
index 7789f7be9c..f0c3ea5447 100644
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -195,6 +195,7 @@ bool qemu_chr_fe_init(CharBackend *b,
      int tag = 0;

      if (s) {
+        object_ref(OBJECT(s));
          if (CHARDEV_IS_MUX(s)) {
              MuxChardev *d = MUX_CHARDEV(s);

@@ -241,6 +242,7 @@ void qemu_chr_fe_deinit(CharBackend *b, bool del)
              } else {
                  object_unref(obj);
              }
+            object_unref(obj);
          }
          b->chr = NULL;
      }

to keep the chardev live between qemu_chr_cleanup() and monitor_cleanup().

Paolo
Denis Plotnikov Nov. 10, 2021, 1:29 p.m. UTC | #4
On 09.11.2021 20:46, Paolo Bonzini wrote:
> On 11/9/21 08:23, Denis Plotnikov wrote:
>> Ping ping!
>
> Looks good, but can you explain why it's okay to call it before 
> qemu_chr_cleanup() and user_creatable_cleanup()?
>
> I think a better solution to the ordering problem would be:
>
>   qemu_chr_cleanup();
>   user_creatable_cleanup();
>   flush_rcu();
>   monitor_cleanup();
I agree, this looks better
>
> with something like this:
>
> diff --git a/chardev/char-fe.c b/chardev/char-fe.c
> index 7789f7be9c..f0c3ea5447 100644
> --- a/chardev/char-fe.c
> +++ b/chardev/char-fe.c
> @@ -195,6 +195,7 @@ bool qemu_chr_fe_init(CharBackend *b,
>      int tag = 0;
>
>      if (s) {
> +        object_ref(OBJECT(s));
>          if (CHARDEV_IS_MUX(s)) {
>              MuxChardev *d = MUX_CHARDEV(s);
>
> @@ -241,6 +242,7 @@ void qemu_chr_fe_deinit(CharBackend *b, bool del)
>              } else {
>                  object_unref(obj);
>              }
> +            object_unref(obj);
>          }
>          b->chr = NULL;
>      }
>
> to keep the chardev live between qemu_chr_cleanup() and 
> monitor_cleanup().

but frankly speaking I don't understand why we have to do ref/unref in 
char-fe interface functions, instead of just ref/uref-ing monitor's char 
device directly like this:

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 21c7a68758f5..3692a8e15268 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -611,6 +611,7 @@ void monitor_data_destroy(Monitor *mon)
  {
      g_free(mon->mon_cpu_path);
      qemu_chr_fe_deinit(&mon->chr, false);
+    object_unref(OBJECT(&mon->chr));
      if (monitor_is_qmp(mon)) {
          monitor_data_destroy_qmp(container_of(mon, MonitorQMP, common));
      } else {
@@ -737,6 +738,7 @@ int monitor_init(MonitorOptions *opts, bool 
allow_hmp, Error **errp)
          error_propagate(errp, local_err);
          return -1;
      }
+    object_ref(OBJECT(chr));
      return 0;
  }

May be this shows the intentions better?

Denis

>
> Paolo
>
Paolo Bonzini Nov. 10, 2021, 4:29 p.m. UTC | #5
On 11/10/21 14:29, Denis Plotnikov wrote:
> 
> On 09.11.2021 20:46, Paolo Bonzini wrote:
>> On 11/9/21 08:23, Denis Plotnikov wrote:
>>> Ping ping!
>>
>> Looks good, but can you explain why it's okay to call it before 
>> qemu_chr_cleanup() and user_creatable_cleanup()?
>>
>> I think a better solution to the ordering problem would be:
>>
>>   qemu_chr_cleanup();
>>   user_creatable_cleanup();
>>   flush_rcu();
>>   monitor_cleanup();
> I agree, this looks better
>>
>> with something like this:
>>
>> diff --git a/chardev/char-fe.c b/chardev/char-fe.c
>> index 7789f7be9c..f0c3ea5447 100644
>> --- a/chardev/char-fe.c
>> +++ b/chardev/char-fe.c
>> @@ -195,6 +195,7 @@ bool qemu_chr_fe_init(CharBackend *b,
>>      int tag = 0;
>>
>>      if (s) {
>> +        object_ref(OBJECT(s));
>>          if (CHARDEV_IS_MUX(s)) {
>>              MuxChardev *d = MUX_CHARDEV(s);
>>
>> @@ -241,6 +242,7 @@ void qemu_chr_fe_deinit(CharBackend *b, bool del)
>>              } else {
>>                  object_unref(obj);
>>              }
>> +            object_unref(obj);
>>          }
>>          b->chr = NULL;
>>      }
>>
>> to keep the chardev live between qemu_chr_cleanup() and 
>> monitor_cleanup().
> 
> but frankly speaking I don't understand why we have to do ref/unref in 
> char-fe interface functions, instead of just ref/uref-ing monitor's char 
> device directly like this:
> 
> diff --git a/monitor/monitor.c b/monitor/monitor.c
> index 21c7a68758f5..3692a8e15268 100644
> --- a/monitor/monitor.c
> +++ b/monitor/monitor.c
> @@ -611,6 +611,7 @@ void monitor_data_destroy(Monitor *mon)
>   {
>       g_free(mon->mon_cpu_path);
>       qemu_chr_fe_deinit(&mon->chr, false);
> +    object_unref(OBJECT(&mon->chr));
>       if (monitor_is_qmp(mon)) {
>           monitor_data_destroy_qmp(container_of(mon, MonitorQMP, common));
>       } else {
> @@ -737,6 +738,7 @@ int monitor_init(MonitorOptions *opts, bool 
> allow_hmp, Error **errp)
>           error_propagate(errp, local_err);
>           return -1;
>       }
> +    object_ref(OBJECT(chr));
>       return 0;
>   }
> 
> May be this shows the intentions better?

Sure, that works too.  But in the end the char-fe _is_ the place where 
the extra reference is taken/dropped (in the ->chr field of 
CharBackend), so I was thinking of a more generic solution too.  Feel 
free to submit yours though, it's certainly safer for 6.2 freeze.

Paolo
diff mbox series

Patch

diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 515d327cf11c..f7fbdc3781e5 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -134,6 +134,7 @@  struct rcu_head {
 
 extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
 extern void drain_call_rcu(void);
+extern void flush_rcu(void);
 
 /* The operands of the minus operator must have the same type,
  * which must be the one that we specify in the cast.
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 10d9b7365aa7..28f319a97a2b 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -822,5 +822,8 @@  void qemu_cleanup(void)
     monitor_cleanup();
     qemu_chr_cleanup();
     user_creatable_cleanup();
+
+    /* finish all the tasks from rcu queue before exiting */
+    flush_rcu();
     /* TODO: unref root container, check all devices are ok */
 }
diff --git a/util/rcu.c b/util/rcu.c
index 13ac0f75cb2a..f047f8ee8d16 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -348,6 +348,18 @@  void drain_call_rcu(void)
 
 }
 
+/*
+ * This function drains rcu queue until there are no tasks to do left
+ * and aims to the cases when one needs to ensure that no work hang
+ * in rcu thread before proceeding, e.g. on qemu shutdown.
+ */
+void flush_rcu(void)
+{
+    while (qatomic_read(&rcu_call_count) > 0) {
+        drain_call_rcu();
+    }
+}
+
 void rcu_register_thread(void)
 {
     assert(rcu_reader.ctr == 0);