diff mbox

iothread: Stop threads before main() quits

Message ID 1473326931-9699-1-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng Sept. 8, 2016, 9:28 a.m. UTC
Right after main_loop ends, we release various things but keep iothread
alive. The latter is not prepared to the sudden change of resources.

Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
surprise at the empty BlockBackend:

(gdb) bt
    at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
    at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577

It is because the d->conf.blk->root is set to NULL, then
blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
pointing to the iothread:

    hw/scsi/virtio-scsi.c:543:

    if (s->dataplane_started) {
        assert(blk_get_aio_context(d->conf.blk) == s->ctx);
    }

To fix this, let's stop iothreads before doing bdrv_close_all().

Cc: qemu-stable@nongnu.org
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 include/sysemu/iothread.h |  1 +
 iothread.c                | 24 ++++++++++++++++++++----
 vl.c                      |  2 ++
 3 files changed, 23 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Sept. 8, 2016, 9:34 a.m. UTC | #1
On 08/09/2016 11:28, Fam Zheng wrote:
> Right after main_loop ends, we release various things but keep iothread
> alive. The latter is not prepared to the sudden change of resources.
> 
> Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
> surprise at the empty BlockBackend:
> 
> (gdb) bt
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577
> 
> It is because the d->conf.blk->root is set to NULL, then
> blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
> pointing to the iothread:
> 
>     hw/scsi/virtio-scsi.c:543:
> 
>     if (s->dataplane_started) {
>         assert(blk_get_aio_context(d->conf.blk) == s->ctx);
>     }
> 
> To fix this, let's stop iothreads before doing bdrv_close_all().
> 
> Cc: qemu-stable@nongnu.org
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  include/sysemu/iothread.h |  1 +
>  iothread.c                | 24 ++++++++++++++++++++----
>  vl.c                      |  2 ++
>  3 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
> index 2eefea1..68ac2de 100644
> --- a/include/sysemu/iothread.h
> +++ b/include/sysemu/iothread.h
> @@ -35,5 +35,6 @@ typedef struct {
>  
>  char *iothread_get_id(IOThread *iothread);
>  AioContext *iothread_get_aio_context(IOThread *iothread);
> +void iothread_stop_all(void);
>  
>  #endif /* IOTHREAD_H */
> diff --git a/iothread.c b/iothread.c
> index f183d38..fb08a60 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -54,16 +54,25 @@ static void *iothread_run(void *opaque)
>      return NULL;
>  }
>  
> -static void iothread_instance_finalize(Object *obj)
> +static int iothread_stop(Object *object, void *opaque)
>  {
> -    IOThread *iothread = IOTHREAD(obj);
> +    IOThread *iothread;
>  
> -    if (!iothread->ctx) {
> -        return;
> +    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
> +    if (!iothread || !iothread->ctx) {
> +        return 0;
>      }
>      iothread->stopping = true;
>      aio_notify(iothread->ctx);
>      qemu_thread_join(&iothread->thread);
> +    return 0;
> +}
> +
> +static void iothread_instance_finalize(Object *obj)
> +{
> +    IOThread *iothread = IOTHREAD(obj);
> +
> +    iothread_stop(obj, NULL);
>      qemu_cond_destroy(&iothread->init_done_cond);
>      qemu_mutex_destroy(&iothread->init_done_lock);
>      aio_context_unref(iothread->ctx);
> @@ -174,3 +183,10 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
>      object_child_foreach(container, query_one_iothread, &prev);
>      return head;
>  }
> +
> +void iothread_stop_all(void)
> +{
> +    Object *container = object_get_objects_root();
> +
> +    object_child_foreach(container, iothread_stop, NULL);
> +}
> diff --git a/vl.c b/vl.c
> index ee557a1..6a218ce 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -121,6 +121,7 @@ int main(int argc, char **argv)
>  #include "crypto/init.h"
>  #include "sysemu/replay.h"
>  #include "qapi/qmp/qerror.h"
> +#include "sysemu/iothread.h"
>  
>  #define MAX_VIRTIO_CONSOLES 1
>  #define MAX_SCLP_CONSOLES 1
> @@ -4616,6 +4617,7 @@ int main(int argc, char **argv, char **envp)
>      trace_init_vcpu_events();
>      main_loop();
>      replay_disable_events();
> +    iothread_stop_all();
>  
>      bdrv_close_all();
>      pause_all_vcpus();
> 

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Paolo
Fam Zheng Sept. 9, 2016, 2:57 a.m. UTC | #2
On Thu, 09/08 11:34, Paolo Bonzini wrote:
> 
> 
> On 08/09/2016 11:28, Fam Zheng wrote:
> > Right after main_loop ends, we release various things but keep iothread
> > alive. The latter is not prepared to the sudden change of resources.
> > 
> > Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
> > surprise at the empty BlockBackend:
> > 
> > (gdb) bt
> >     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
> >     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577
> > 
> > It is because the d->conf.blk->root is set to NULL, then
> > blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
> > pointing to the iothread:
> > 
> >     hw/scsi/virtio-scsi.c:543:
> > 
> >     if (s->dataplane_started) {
> >         assert(blk_get_aio_context(d->conf.blk) == s->ctx);
> >     }
> > 
> > To fix this, let's stop iothreads before doing bdrv_close_all().
> > 
> > Cc: qemu-stable@nongnu.org
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  include/sysemu/iothread.h |  1 +
> >  iothread.c                | 24 ++++++++++++++++++++----
> >  vl.c                      |  2 ++
> >  3 files changed, 23 insertions(+), 4 deletions(-)
> > 
> > diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
> > index 2eefea1..68ac2de 100644
> > --- a/include/sysemu/iothread.h
> > +++ b/include/sysemu/iothread.h
> > @@ -35,5 +35,6 @@ typedef struct {
> >  
> >  char *iothread_get_id(IOThread *iothread);
> >  AioContext *iothread_get_aio_context(IOThread *iothread);
> > +void iothread_stop_all(void);
> >  
> >  #endif /* IOTHREAD_H */
> > diff --git a/iothread.c b/iothread.c
> > index f183d38..fb08a60 100644
> > --- a/iothread.c
> > +++ b/iothread.c
> > @@ -54,16 +54,25 @@ static void *iothread_run(void *opaque)
> >      return NULL;
> >  }
> >  
> > -static void iothread_instance_finalize(Object *obj)
> > +static int iothread_stop(Object *object, void *opaque)
> >  {
> > -    IOThread *iothread = IOTHREAD(obj);
> > +    IOThread *iothread;
> >  
> > -    if (!iothread->ctx) {
> > -        return;
> > +    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
> > +    if (!iothread || !iothread->ctx) {
> > +        return 0;
> >      }
> >      iothread->stopping = true;
> >      aio_notify(iothread->ctx);
> >      qemu_thread_join(&iothread->thread);
> > +    return 0;
> > +}
> > +
> > +static void iothread_instance_finalize(Object *obj)
> > +{
> > +    IOThread *iothread = IOTHREAD(obj);
> > +
> > +    iothread_stop(obj, NULL);
> >      qemu_cond_destroy(&iothread->init_done_cond);
> >      qemu_mutex_destroy(&iothread->init_done_lock);
> >      aio_context_unref(iothread->ctx);
> > @@ -174,3 +183,10 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
> >      object_child_foreach(container, query_one_iothread, &prev);
> >      return head;
> >  }
> > +
> > +void iothread_stop_all(void)
> > +{
> > +    Object *container = object_get_objects_root();
> > +
> > +    object_child_foreach(container, iothread_stop, NULL);
> > +}
> > diff --git a/vl.c b/vl.c
> > index ee557a1..6a218ce 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -121,6 +121,7 @@ int main(int argc, char **argv)
> >  #include "crypto/init.h"
> >  #include "sysemu/replay.h"
> >  #include "qapi/qmp/qerror.h"
> > +#include "sysemu/iothread.h"
> >  
> >  #define MAX_VIRTIO_CONSOLES 1
> >  #define MAX_SCLP_CONSOLES 1
> > @@ -4616,6 +4617,7 @@ int main(int argc, char **argv, char **envp)
> >      trace_init_vcpu_events();
> >      main_loop();
> >      replay_disable_events();
> > +    iothread_stop_all();
> >  
> >      bdrv_close_all();
> >      pause_all_vcpus();
> > 
> 
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Thanks! Is there a coming PULL from you that this patch could sneak in? :)

Fam
Paolo Bonzini Sept. 9, 2016, 8:48 a.m. UTC | #3
> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Thanks! Is there a coming PULL from you that this patch could sneak in? :)

There is (next week), but iothread.c is one of those grey area of maintenance
that I wouldn't mind passing to someone else.  Such as the owner of block/io.c. :)

Paolo
Stefan Hajnoczi Sept. 13, 2016, 8:39 a.m. UTC | #4
On Thu, Sep 08, 2016 at 05:28:51PM +0800, Fam Zheng wrote:
> Right after main_loop ends, we release various things but keep iothread
> alive. The latter is not prepared to the sudden change of resources.
> 
> Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
> surprise at the empty BlockBackend:
> 
> (gdb) bt
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577
> 
> It is because the d->conf.blk->root is set to NULL, then
> blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
> pointing to the iothread:
> 
>     hw/scsi/virtio-scsi.c:543:
> 
>     if (s->dataplane_started) {
>         assert(blk_get_aio_context(d->conf.blk) == s->ctx);
>     }
> 
> To fix this, let's stop iothreads before doing bdrv_close_all().

Did you consider blk_add_remove_bs_notifier()?  It gets called during
bdrv_close_all() and would let virtio-scsi survive any other case where
the same thing happens.

This patch is fine for now but we should think about which approach to
take for all devices in the future.

> 
> Cc: qemu-stable@nongnu.org
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  include/sysemu/iothread.h |  1 +
>  iothread.c                | 24 ++++++++++++++++++++----
>  vl.c                      |  2 ++
>  3 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
> index 2eefea1..68ac2de 100644
> --- a/include/sysemu/iothread.h
> +++ b/include/sysemu/iothread.h
> @@ -35,5 +35,6 @@ typedef struct {
>  
>  char *iothread_get_id(IOThread *iothread);
>  AioContext *iothread_get_aio_context(IOThread *iothread);
> +void iothread_stop_all(void);
>  
>  #endif /* IOTHREAD_H */
> diff --git a/iothread.c b/iothread.c
> index f183d38..fb08a60 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -54,16 +54,25 @@ static void *iothread_run(void *opaque)
>      return NULL;
>  }
>  
> -static void iothread_instance_finalize(Object *obj)
> +static int iothread_stop(Object *object, void *opaque)
>  {
> -    IOThread *iothread = IOTHREAD(obj);
> +    IOThread *iothread;
>  
> -    if (!iothread->ctx) {
> -        return;
> +    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
> +    if (!iothread || !iothread->ctx) {
> +        return 0;
>      }
>      iothread->stopping = true;
>      aio_notify(iothread->ctx);
>      qemu_thread_join(&iothread->thread);
> +    return 0;
> +}
> +
> +static void iothread_instance_finalize(Object *obj)
> +{
> +    IOThread *iothread = IOTHREAD(obj);
> +
> +    iothread_stop(obj, NULL);
>      qemu_cond_destroy(&iothread->init_done_cond);
>      qemu_mutex_destroy(&iothread->init_done_lock);
>      aio_context_unref(iothread->ctx);
> @@ -174,3 +183,10 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
>      object_child_foreach(container, query_one_iothread, &prev);
>      return head;
>  }
> +
> +void iothread_stop_all(void)
> +{
> +    Object *container = object_get_objects_root();
> +
> +    object_child_foreach(container, iothread_stop, NULL);
> +}
> diff --git a/vl.c b/vl.c
> index ee557a1..6a218ce 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -121,6 +121,7 @@ int main(int argc, char **argv)
>  #include "crypto/init.h"
>  #include "sysemu/replay.h"
>  #include "qapi/qmp/qerror.h"
> +#include "sysemu/iothread.h"
>  
>  #define MAX_VIRTIO_CONSOLES 1
>  #define MAX_SCLP_CONSOLES 1
> @@ -4616,6 +4617,7 @@ int main(int argc, char **argv, char **envp)
>      trace_init_vcpu_events();
>      main_loop();
>      replay_disable_events();
> +    iothread_stop_all();
>  
>      bdrv_close_all();
>      pause_all_vcpus();
> -- 
> 2.7.4
>
Stefan Hajnoczi Sept. 13, 2016, 8:39 a.m. UTC | #5
On Thu, Sep 08, 2016 at 05:28:51PM +0800, Fam Zheng wrote:
> Right after main_loop ends, we release various things but keep iothread
> alive. The latter is not prepared to the sudden change of resources.
> 
> Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
> surprise at the empty BlockBackend:
> 
> (gdb) bt
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
>     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577
> 
> It is because the d->conf.blk->root is set to NULL, then
> blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
> pointing to the iothread:
> 
>     hw/scsi/virtio-scsi.c:543:
> 
>     if (s->dataplane_started) {
>         assert(blk_get_aio_context(d->conf.blk) == s->ctx);
>     }
> 
> To fix this, let's stop iothreads before doing bdrv_close_all().
> 
> Cc: qemu-stable@nongnu.org
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  include/sysemu/iothread.h |  1 +
>  iothread.c                | 24 ++++++++++++++++++++----
>  vl.c                      |  2 ++
>  3 files changed, 23 insertions(+), 4 deletions(-)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan
Fam Zheng Sept. 13, 2016, 8:58 a.m. UTC | #6
On Tue, 09/13 09:39, Stefan Hajnoczi wrote:
> On Thu, Sep 08, 2016 at 05:28:51PM +0800, Fam Zheng wrote:
> > Right after main_loop ends, we release various things but keep iothread
> > alive. The latter is not prepared to the sudden change of resources.
> > 
> > Specifically, after bdrv_close_all(), virtio-scsi dataplane get a
> > surprise at the empty BlockBackend:
> > 
> > (gdb) bt
> >     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:543
> >     at /usr/src/debug/qemu-2.6.0/hw/scsi/virtio-scsi.c:577
> > 
> > It is because the d->conf.blk->root is set to NULL, then
> > blk_get_aio_context() returns qemu_aio_context, whereas s->ctx is still
> > pointing to the iothread:
> > 
> >     hw/scsi/virtio-scsi.c:543:
> > 
> >     if (s->dataplane_started) {
> >         assert(blk_get_aio_context(d->conf.blk) == s->ctx);
> >     }
> > 
> > To fix this, let's stop iothreads before doing bdrv_close_all().
> 
> Did you consider blk_add_remove_bs_notifier()?  It gets called during
> bdrv_close_all() and would let virtio-scsi survive any other case where
> the same thing happens.

There is already another case (eject):

https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg02243.html

And I don't know how to fix it with blk_add_remove_bs_notifier. Could you
elaborate?

Fam
diff mbox

Patch

diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 2eefea1..68ac2de 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -35,5 +35,6 @@  typedef struct {
 
 char *iothread_get_id(IOThread *iothread);
 AioContext *iothread_get_aio_context(IOThread *iothread);
+void iothread_stop_all(void);
 
 #endif /* IOTHREAD_H */
diff --git a/iothread.c b/iothread.c
index f183d38..fb08a60 100644
--- a/iothread.c
+++ b/iothread.c
@@ -54,16 +54,25 @@  static void *iothread_run(void *opaque)
     return NULL;
 }
 
-static void iothread_instance_finalize(Object *obj)
+static int iothread_stop(Object *object, void *opaque)
 {
-    IOThread *iothread = IOTHREAD(obj);
+    IOThread *iothread;
 
-    if (!iothread->ctx) {
-        return;
+    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
+    if (!iothread || !iothread->ctx) {
+        return 0;
     }
     iothread->stopping = true;
     aio_notify(iothread->ctx);
     qemu_thread_join(&iothread->thread);
+    return 0;
+}
+
+static void iothread_instance_finalize(Object *obj)
+{
+    IOThread *iothread = IOTHREAD(obj);
+
+    iothread_stop(obj, NULL);
     qemu_cond_destroy(&iothread->init_done_cond);
     qemu_mutex_destroy(&iothread->init_done_lock);
     aio_context_unref(iothread->ctx);
@@ -174,3 +183,10 @@  IOThreadInfoList *qmp_query_iothreads(Error **errp)
     object_child_foreach(container, query_one_iothread, &prev);
     return head;
 }
+
+void iothread_stop_all(void)
+{
+    Object *container = object_get_objects_root();
+
+    object_child_foreach(container, iothread_stop, NULL);
+}
diff --git a/vl.c b/vl.c
index ee557a1..6a218ce 100644
--- a/vl.c
+++ b/vl.c
@@ -121,6 +121,7 @@  int main(int argc, char **argv)
 #include "crypto/init.h"
 #include "sysemu/replay.h"
 #include "qapi/qmp/qerror.h"
+#include "sysemu/iothread.h"
 
 #define MAX_VIRTIO_CONSOLES 1
 #define MAX_SCLP_CONSOLES 1
@@ -4616,6 +4617,7 @@  int main(int argc, char **argv, char **envp)
     trace_init_vcpu_events();
     main_loop();
     replay_disable_events();
+    iothread_stop_all();
 
     bdrv_close_all();
     pause_all_vcpus();