diff mbox series

block/export: Fix vhost-user-blk shutdown with requests in flight

Message ID 20220125151435.48792-1-kwolf@redhat.com
State New
Headers show
Series block/export: Fix vhost-user-blk shutdown with requests in flight | expand

Commit Message

Kevin Wolf Jan. 25, 2022, 3:14 p.m. UTC
The vhost-user-blk export runs requests asynchronously in their own
coroutine. When the vhost connection goes away and we want to stop the
vhost-user server, we need to wait for these coroutines to stop before
we can unmap the shared memory. Otherwise, they would still access the
unmapped memory and crash.

This introduces a refcount to VuServer which is increased when spawning
a new request coroutine and decreased before the coroutine exits. The
memory is only unmapped when the refcount reaches zero.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/qemu/vhost-user-server.h     |  5 +++++
 block/export/vhost-user-blk-server.c |  5 +++++
 util/vhost-user-server.c             | 22 ++++++++++++++++++++++
 3 files changed, 32 insertions(+)

Comments

Stefan Hajnoczi Jan. 26, 2022, 1:39 p.m. UTC | #1
On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote:
> The vhost-user-blk export runs requests asynchronously in their own
> coroutine. When the vhost connection goes away and we want to stop the
> vhost-user server, we need to wait for these coroutines to stop before
> we can unmap the shared memory. Otherwise, they would still access the
> unmapped memory and crash.
> 
> This introduces a refcount to VuServer which is increased when spawning
> a new request coroutine and decreased before the coroutine exits. The
> memory is only unmapped when the refcount reaches zero.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  include/qemu/vhost-user-server.h     |  5 +++++
>  block/export/vhost-user-blk-server.c |  5 +++++
>  util/vhost-user-server.c             | 22 ++++++++++++++++++++++
>  3 files changed, 32 insertions(+)
> 
> diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
> index 121ea1dedf..cd43193b80 100644
> --- a/include/qemu/vhost-user-server.h
> +++ b/include/qemu/vhost-user-server.h
> @@ -42,6 +42,8 @@ typedef struct {
>      const VuDevIface *vu_iface;
>  
>      /* Protected by ctx lock */
> +    unsigned int refcount;
> +    bool wait_idle;
>      VuDev vu_dev;
>      QIOChannel *ioc; /* The I/O channel with the client */
>      QIOChannelSocket *sioc; /* The underlying data channel with the client */
> @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server,
>  
>  void vhost_user_server_stop(VuServer *server);
>  
> +void vhost_user_server_ref(VuServer *server);
> +void vhost_user_server_unref(VuServer *server);
> +
>  void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
>  void vhost_user_server_detach_aio_context(VuServer *server);
>  
> diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
> index 1862563336..a129204c44 100644
> --- a/block/export/vhost-user-blk-server.c
> +++ b/block/export/vhost-user-blk-server.c
> @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
>      return VIRTIO_BLK_S_IOERR;
>  }
>  
> +/* Called with server refcount increased, must decrease before returning */
>  static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
>  {
>      VuBlkReq *req = opaque;
> @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
>      }
>  
>      vu_blk_req_complete(req);
> +    vhost_user_server_unref(server);
>      return;
>  
>  err:
>      free(req);
> +    vhost_user_server_unref(server);
>  }
>  
>  static void vu_blk_process_vq(VuDev *vu_dev, int idx)
> @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
>  
>          Coroutine *co =
>              qemu_coroutine_create(vu_blk_virtio_process_req, req);
> +
> +        vhost_user_server_ref(server);
>          qemu_coroutine_enter(co);

Why not increment inside vu_blk_virtio_process_req()? My understanding
is the coroutine is entered immediately so there is no race that needs
to be protected against by incrementing the refcount early.

>      }
>  }
> diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
> index f68287e811..f66fbba710 100644
> --- a/util/vhost-user-server.c
> +++ b/util/vhost-user-server.c
> @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
>      error_report("vu_panic: %s", buf);
>  }
>  
> +void vhost_user_server_ref(VuServer *server)
> +{
> +    assert(!server->wait_idle);
> +    server->refcount++;
> +}
> +
> +void vhost_user_server_unref(VuServer *server)
> +{
> +    server->refcount--;
> +    if (server->wait_idle && !server->refcount) {
> +        aio_co_wake(server->co_trip);
> +    }
> +}
> +
>  static bool coroutine_fn
>  vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
>  {
> @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque)
>          /* Keep running */
>      }
>  
> +    if (server->refcount) {
> +        /* Wait for requests to complete before we can unmap the memory */
> +        server->wait_idle = true;
> +        qemu_coroutine_yield();
> +        server->wait_idle = false;
> +    }
> +    assert(server->refcount == 0);
> +
>      vu_deinit(vu_dev);
>  
>      /* vu_deinit() should have called remove_watch() */
> -- 
> 2.31.1
>
Kevin Wolf Jan. 27, 2022, 2:10 p.m. UTC | #2
Am 26.01.2022 um 14:39 hat Stefan Hajnoczi geschrieben:
> On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote:
> > The vhost-user-blk export runs requests asynchronously in their own
> > coroutine. When the vhost connection goes away and we want to stop the
> > vhost-user server, we need to wait for these coroutines to stop before
> > we can unmap the shared memory. Otherwise, they would still access the
> > unmapped memory and crash.
> > 
> > This introduces a refcount to VuServer which is increased when spawning
> > a new request coroutine and decreased before the coroutine exits. The
> > memory is only unmapped when the refcount reaches zero.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  include/qemu/vhost-user-server.h     |  5 +++++
> >  block/export/vhost-user-blk-server.c |  5 +++++
> >  util/vhost-user-server.c             | 22 ++++++++++++++++++++++
> >  3 files changed, 32 insertions(+)
> > 
> > diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
> > index 121ea1dedf..cd43193b80 100644
> > --- a/include/qemu/vhost-user-server.h
> > +++ b/include/qemu/vhost-user-server.h
> > @@ -42,6 +42,8 @@ typedef struct {
> >      const VuDevIface *vu_iface;
> >  
> >      /* Protected by ctx lock */
> > +    unsigned int refcount;
> > +    bool wait_idle;
> >      VuDev vu_dev;
> >      QIOChannel *ioc; /* The I/O channel with the client */
> >      QIOChannelSocket *sioc; /* The underlying data channel with the client */
> > @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server,
> >  
> >  void vhost_user_server_stop(VuServer *server);
> >  
> > +void vhost_user_server_ref(VuServer *server);
> > +void vhost_user_server_unref(VuServer *server);
> > +
> >  void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
> >  void vhost_user_server_detach_aio_context(VuServer *server);
> >  
> > diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
> > index 1862563336..a129204c44 100644
> > --- a/block/export/vhost-user-blk-server.c
> > +++ b/block/export/vhost-user-blk-server.c
> > @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
> >      return VIRTIO_BLK_S_IOERR;
> >  }
> >  
> > +/* Called with server refcount increased, must decrease before returning */
> >  static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
> >  {
> >      VuBlkReq *req = opaque;
> > @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
> >      }
> >  
> >      vu_blk_req_complete(req);
> > +    vhost_user_server_unref(server);
> >      return;
> >  
> >  err:
> >      free(req);
> > +    vhost_user_server_unref(server);
> >  }
> >  
> >  static void vu_blk_process_vq(VuDev *vu_dev, int idx)
> > @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
> >  
> >          Coroutine *co =
> >              qemu_coroutine_create(vu_blk_virtio_process_req, req);
> > +
> > +        vhost_user_server_ref(server);
> >          qemu_coroutine_enter(co);
> 
> Why not increment inside vu_blk_virtio_process_req()? My understanding
> is the coroutine is entered immediately so there is no race that needs
> to be protected against by incrementing the refcount early.

You're right, as long as we know that qemu_coroutine_enter() is used to
enter the coroutine and we increase the refcount before the coroutine
yields for the first time, doing this in vu_blk_virtio_process_req is
sufficient.

With respect to potential future code changes, it feels a little safer
to do it here like in this patch, but at the same time I have to admit
that having ref and unref in the same function is a little nicer.

So for me there is no clear winner. If you prefer moving the ref into
the coroutine, I can post a v2.

Kevin

> >      }
> >  }
> > diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
> > index f68287e811..f66fbba710 100644
> > --- a/util/vhost-user-server.c
> > +++ b/util/vhost-user-server.c
> > @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
> >      error_report("vu_panic: %s", buf);
> >  }
> >  
> > +void vhost_user_server_ref(VuServer *server)
> > +{
> > +    assert(!server->wait_idle);
> > +    server->refcount++;
> > +}
> > +
> > +void vhost_user_server_unref(VuServer *server)
> > +{
> > +    server->refcount--;
> > +    if (server->wait_idle && !server->refcount) {
> > +        aio_co_wake(server->co_trip);
> > +    }
> > +}
> > +
> >  static bool coroutine_fn
> >  vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
> >  {
> > @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque)
> >          /* Keep running */
> >      }
> >  
> > +    if (server->refcount) {
> > +        /* Wait for requests to complete before we can unmap the memory */
> > +        server->wait_idle = true;
> > +        qemu_coroutine_yield();
> > +        server->wait_idle = false;
> > +    }
> > +    assert(server->refcount == 0);
> > +
> >      vu_deinit(vu_dev);
> >  
> >      /* vu_deinit() should have called remove_watch() */
> > -- 
> > 2.31.1
> >
Stefan Hajnoczi Jan. 27, 2022, 2:26 p.m. UTC | #3
On Thu, Jan 27, 2022 at 03:10:11PM +0100, Kevin Wolf wrote:
> Am 26.01.2022 um 14:39 hat Stefan Hajnoczi geschrieben:
> > On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote:
> > > The vhost-user-blk export runs requests asynchronously in their own
> > > coroutine. When the vhost connection goes away and we want to stop the
> > > vhost-user server, we need to wait for these coroutines to stop before
> > > we can unmap the shared memory. Otherwise, they would still access the
> > > unmapped memory and crash.
> > > 
> > > This introduces a refcount to VuServer which is increased when spawning
> > > a new request coroutine and decreased before the coroutine exits. The
> > > memory is only unmapped when the refcount reaches zero.
> > > 
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > ---
> > >  include/qemu/vhost-user-server.h     |  5 +++++
> > >  block/export/vhost-user-blk-server.c |  5 +++++
> > >  util/vhost-user-server.c             | 22 ++++++++++++++++++++++
> > >  3 files changed, 32 insertions(+)
> > > 
> > > diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
> > > index 121ea1dedf..cd43193b80 100644
> > > --- a/include/qemu/vhost-user-server.h
> > > +++ b/include/qemu/vhost-user-server.h
> > > @@ -42,6 +42,8 @@ typedef struct {
> > >      const VuDevIface *vu_iface;
> > >  
> > >      /* Protected by ctx lock */
> > > +    unsigned int refcount;
> > > +    bool wait_idle;
> > >      VuDev vu_dev;
> > >      QIOChannel *ioc; /* The I/O channel with the client */
> > >      QIOChannelSocket *sioc; /* The underlying data channel with the client */
> > > @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server,
> > >  
> > >  void vhost_user_server_stop(VuServer *server);
> > >  
> > > +void vhost_user_server_ref(VuServer *server);
> > > +void vhost_user_server_unref(VuServer *server);
> > > +
> > >  void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
> > >  void vhost_user_server_detach_aio_context(VuServer *server);
> > >  
> > > diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
> > > index 1862563336..a129204c44 100644
> > > --- a/block/export/vhost-user-blk-server.c
> > > +++ b/block/export/vhost-user-blk-server.c
> > > @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
> > >      return VIRTIO_BLK_S_IOERR;
> > >  }
> > >  
> > > +/* Called with server refcount increased, must decrease before returning */
> > >  static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
> > >  {
> > >      VuBlkReq *req = opaque;
> > > @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
> > >      }
> > >  
> > >      vu_blk_req_complete(req);
> > > +    vhost_user_server_unref(server);
> > >      return;
> > >  
> > >  err:
> > >      free(req);
> > > +    vhost_user_server_unref(server);
> > >  }
> > >  
> > >  static void vu_blk_process_vq(VuDev *vu_dev, int idx)
> > > @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
> > >  
> > >          Coroutine *co =
> > >              qemu_coroutine_create(vu_blk_virtio_process_req, req);
> > > +
> > > +        vhost_user_server_ref(server);
> > >          qemu_coroutine_enter(co);
> > 
> > Why not increment inside vu_blk_virtio_process_req()? My understanding
> > is the coroutine is entered immediately so there is no race that needs
> > to be protected against by incrementing the refcount early.
> 
> You're right, as long as we know that qemu_coroutine_enter() is used to
> enter the coroutine and we increase the refcount before the coroutine
> yields for the first time, doing this in vu_blk_virtio_process_req is
> sufficient.
> 
> With respect to potential future code changes, it feels a little safer
> to do it here like in this patch, but at the same time I have to admit
> that having ref and unref in the same function is a little nicer.
> 
> So for me there is no clear winner. If you prefer moving the ref into
> the coroutine, I can post a v2.

The way the code is currently written made me question whether I missed
a race condition. This may be a case where defensive programming
actually makes the code harder to understand.

That said, it's just me and I also don't have a strong opinion because
it's correct the way you wrote it. Feel free to merge this patch as it
is.

Stefan
diff mbox series

Patch

diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
index 121ea1dedf..cd43193b80 100644
--- a/include/qemu/vhost-user-server.h
+++ b/include/qemu/vhost-user-server.h
@@ -42,6 +42,8 @@  typedef struct {
     const VuDevIface *vu_iface;
 
     /* Protected by ctx lock */
+    unsigned int refcount;
+    bool wait_idle;
     VuDev vu_dev;
     QIOChannel *ioc; /* The I/O channel with the client */
     QIOChannelSocket *sioc; /* The underlying data channel with the client */
@@ -59,6 +61,9 @@  bool vhost_user_server_start(VuServer *server,
 
 void vhost_user_server_stop(VuServer *server);
 
+void vhost_user_server_ref(VuServer *server);
+void vhost_user_server_unref(VuServer *server);
+
 void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
 void vhost_user_server_detach_aio_context(VuServer *server);
 
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index 1862563336..a129204c44 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -172,6 +172,7 @@  vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
     return VIRTIO_BLK_S_IOERR;
 }
 
+/* Called with server refcount increased, must decrease before returning */
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
 {
     VuBlkReq *req = opaque;
@@ -286,10 +287,12 @@  static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
     }
 
     vu_blk_req_complete(req);
+    vhost_user_server_unref(server);
     return;
 
 err:
     free(req);
+    vhost_user_server_unref(server);
 }
 
 static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@@ -310,6 +313,8 @@  static void vu_blk_process_vq(VuDev *vu_dev, int idx)
 
         Coroutine *co =
             qemu_coroutine_create(vu_blk_virtio_process_req, req);
+
+        vhost_user_server_ref(server);
         qemu_coroutine_enter(co);
     }
 }
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index f68287e811..f66fbba710 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -74,6 +74,20 @@  static void panic_cb(VuDev *vu_dev, const char *buf)
     error_report("vu_panic: %s", buf);
 }
 
+void vhost_user_server_ref(VuServer *server)
+{
+    assert(!server->wait_idle);
+    server->refcount++;
+}
+
+void vhost_user_server_unref(VuServer *server)
+{
+    server->refcount--;
+    if (server->wait_idle && !server->refcount) {
+        aio_co_wake(server->co_trip);
+    }
+}
+
 static bool coroutine_fn
 vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
 {
@@ -177,6 +191,14 @@  static coroutine_fn void vu_client_trip(void *opaque)
         /* Keep running */
     }
 
+    if (server->refcount) {
+        /* Wait for requests to complete before we can unmap the memory */
+        server->wait_idle = true;
+        qemu_coroutine_yield();
+        server->wait_idle = false;
+    }
+    assert(server->refcount == 0);
+
     vu_deinit(vu_dev);
 
     /* vu_deinit() should have called remove_watch() */