Message ID | 20220125151435.48792-1-kwolf@redhat.com |
---|---|
State | New |
Headers | show |
Series | block/export: Fix vhost-user-blk shutdown with requests in flight | expand |
On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote: > The vhost-user-blk export runs requests asynchronously in their own > coroutine. When the vhost connection goes away and we want to stop the > vhost-user server, we need to wait for these coroutines to stop before > we can unmap the shared memory. Otherwise, they would still access the > unmapped memory and crash. > > This introduces a refcount to VuServer which is increased when spawning > a new request coroutine and decreased before the coroutine exits. The > memory is only unmapped when the refcount reaches zero. > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > --- > include/qemu/vhost-user-server.h | 5 +++++ > block/export/vhost-user-blk-server.c | 5 +++++ > util/vhost-user-server.c | 22 ++++++++++++++++++++++ > 3 files changed, 32 insertions(+) > > diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h > index 121ea1dedf..cd43193b80 100644 > --- a/include/qemu/vhost-user-server.h > +++ b/include/qemu/vhost-user-server.h > @@ -42,6 +42,8 @@ typedef struct { > const VuDevIface *vu_iface; > > /* Protected by ctx lock */ > + unsigned int refcount; > + bool wait_idle; > VuDev vu_dev; > QIOChannel *ioc; /* The I/O channel with the client */ > QIOChannelSocket *sioc; /* The underlying data channel with the client */ > @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server, > > void vhost_user_server_stop(VuServer *server); > > +void vhost_user_server_ref(VuServer *server); > +void vhost_user_server_unref(VuServer *server); > + > void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); > void vhost_user_server_detach_aio_context(VuServer *server); > > diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c > index 1862563336..a129204c44 100644 > --- a/block/export/vhost-user-blk-server.c > +++ b/block/export/vhost-user-blk-server.c > @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, > return VIRTIO_BLK_S_IOERR; > } > > +/* Called with server refcount increased, must decrease before returning */ > static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > { > VuBlkReq *req = opaque; > @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > } > > vu_blk_req_complete(req); > + vhost_user_server_unref(server); > return; > > err: > free(req); > + vhost_user_server_unref(server); > } > > static void vu_blk_process_vq(VuDev *vu_dev, int idx) > @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) > > Coroutine *co = > qemu_coroutine_create(vu_blk_virtio_process_req, req); > + > + vhost_user_server_ref(server); > qemu_coroutine_enter(co); Why not increment inside vu_blk_virtio_process_req()? My understanding is the coroutine is entered immediately so there is no race that needs to be protected against by incrementing the refcount early. > } > } > diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c > index f68287e811..f66fbba710 100644 > --- a/util/vhost-user-server.c > +++ b/util/vhost-user-server.c > @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf) > error_report("vu_panic: %s", buf); > } > > +void vhost_user_server_ref(VuServer *server) > +{ > + assert(!server->wait_idle); > + server->refcount++; > +} > + > +void vhost_user_server_unref(VuServer *server) > +{ > + server->refcount--; > + if (server->wait_idle && !server->refcount) { > + aio_co_wake(server->co_trip); > + } > +} > + > static bool coroutine_fn > vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) > { > @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque) > /* Keep running */ > } > > + if (server->refcount) { > + /* Wait for requests to complete before we can unmap the memory */ > + server->wait_idle = true; > + qemu_coroutine_yield(); > + server->wait_idle = false; > + } > + assert(server->refcount == 0); > + > vu_deinit(vu_dev); > > /* vu_deinit() should have called remove_watch() */ > -- > 2.31.1 >
Am 26.01.2022 um 14:39 hat Stefan Hajnoczi geschrieben: > On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote: > > The vhost-user-blk export runs requests asynchronously in their own > > coroutine. When the vhost connection goes away and we want to stop the > > vhost-user server, we need to wait for these coroutines to stop before > > we can unmap the shared memory. Otherwise, they would still access the > > unmapped memory and crash. > > > > This introduces a refcount to VuServer which is increased when spawning > > a new request coroutine and decreased before the coroutine exits. The > > memory is only unmapped when the refcount reaches zero. > > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > --- > > include/qemu/vhost-user-server.h | 5 +++++ > > block/export/vhost-user-blk-server.c | 5 +++++ > > util/vhost-user-server.c | 22 ++++++++++++++++++++++ > > 3 files changed, 32 insertions(+) > > > > diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h > > index 121ea1dedf..cd43193b80 100644 > > --- a/include/qemu/vhost-user-server.h > > +++ b/include/qemu/vhost-user-server.h > > @@ -42,6 +42,8 @@ typedef struct { > > const VuDevIface *vu_iface; > > > > /* Protected by ctx lock */ > > + unsigned int refcount; > > + bool wait_idle; > > VuDev vu_dev; > > QIOChannel *ioc; /* The I/O channel with the client */ > > QIOChannelSocket *sioc; /* The underlying data channel with the client */ > > @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server, > > > > void vhost_user_server_stop(VuServer *server); > > > > +void vhost_user_server_ref(VuServer *server); > > +void vhost_user_server_unref(VuServer *server); > > + > > void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); > > void vhost_user_server_detach_aio_context(VuServer *server); > > > > diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c > > index 1862563336..a129204c44 100644 > > --- a/block/export/vhost-user-blk-server.c > > +++ b/block/export/vhost-user-blk-server.c > > @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, > > return VIRTIO_BLK_S_IOERR; > > } > > > > +/* Called with server refcount increased, must decrease before returning */ > > static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > > { > > VuBlkReq *req = opaque; > > @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > > } > > > > vu_blk_req_complete(req); > > + vhost_user_server_unref(server); > > return; > > > > err: > > free(req); > > + vhost_user_server_unref(server); > > } > > > > static void vu_blk_process_vq(VuDev *vu_dev, int idx) > > @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) > > > > Coroutine *co = > > qemu_coroutine_create(vu_blk_virtio_process_req, req); > > + > > + vhost_user_server_ref(server); > > qemu_coroutine_enter(co); > > Why not increment inside vu_blk_virtio_process_req()? My understanding > is the coroutine is entered immediately so there is no race that needs > to be protected against by incrementing the refcount early. You're right, as long as we know that qemu_coroutine_enter() is used to enter the coroutine and we increase the refcount before the coroutine yields for the first time, doing this in vu_blk_virtio_process_req is sufficient. With respect to potential future code changes, it feels a little safer to do it here like in this patch, but at the same time I have to admit that having ref and unref in the same function is a little nicer. So for me there is no clear winner. If you prefer moving the ref into the coroutine, I can post a v2. Kevin > > } > > } > > diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c > > index f68287e811..f66fbba710 100644 > > --- a/util/vhost-user-server.c > > +++ b/util/vhost-user-server.c > > @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf) > > error_report("vu_panic: %s", buf); > > } > > > > +void vhost_user_server_ref(VuServer *server) > > +{ > > + assert(!server->wait_idle); > > + server->refcount++; > > +} > > + > > +void vhost_user_server_unref(VuServer *server) > > +{ > > + server->refcount--; > > + if (server->wait_idle && !server->refcount) { > > + aio_co_wake(server->co_trip); > > + } > > +} > > + > > static bool coroutine_fn > > vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) > > { > > @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque) > > /* Keep running */ > > } > > > > + if (server->refcount) { > > + /* Wait for requests to complete before we can unmap the memory */ > > + server->wait_idle = true; > > + qemu_coroutine_yield(); > > + server->wait_idle = false; > > + } > > + assert(server->refcount == 0); > > + > > vu_deinit(vu_dev); > > > > /* vu_deinit() should have called remove_watch() */ > > -- > > 2.31.1 > >
On Thu, Jan 27, 2022 at 03:10:11PM +0100, Kevin Wolf wrote: > Am 26.01.2022 um 14:39 hat Stefan Hajnoczi geschrieben: > > On Tue, Jan 25, 2022 at 04:14:35PM +0100, Kevin Wolf wrote: > > > The vhost-user-blk export runs requests asynchronously in their own > > > coroutine. When the vhost connection goes away and we want to stop the > > > vhost-user server, we need to wait for these coroutines to stop before > > > we can unmap the shared memory. Otherwise, they would still access the > > > unmapped memory and crash. > > > > > > This introduces a refcount to VuServer which is increased when spawning > > > a new request coroutine and decreased before the coroutine exits. The > > > memory is only unmapped when the refcount reaches zero. > > > > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com> > > > --- > > > include/qemu/vhost-user-server.h | 5 +++++ > > > block/export/vhost-user-blk-server.c | 5 +++++ > > > util/vhost-user-server.c | 22 ++++++++++++++++++++++ > > > 3 files changed, 32 insertions(+) > > > > > > diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h > > > index 121ea1dedf..cd43193b80 100644 > > > --- a/include/qemu/vhost-user-server.h > > > +++ b/include/qemu/vhost-user-server.h > > > @@ -42,6 +42,8 @@ typedef struct { > > > const VuDevIface *vu_iface; > > > > > > /* Protected by ctx lock */ > > > + unsigned int refcount; > > > + bool wait_idle; > > > VuDev vu_dev; > > > QIOChannel *ioc; /* The I/O channel with the client */ > > > QIOChannelSocket *sioc; /* The underlying data channel with the client */ > > > @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server, > > > > > > void vhost_user_server_stop(VuServer *server); > > > > > > +void vhost_user_server_ref(VuServer *server); > > > +void vhost_user_server_unref(VuServer *server); > > > + > > > void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); > > > void vhost_user_server_detach_aio_context(VuServer *server); > > > > > > diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c > > > index 1862563336..a129204c44 100644 > > > --- a/block/export/vhost-user-blk-server.c > > > +++ b/block/export/vhost-user-blk-server.c > > > @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, > > > return VIRTIO_BLK_S_IOERR; > > > } > > > > > > +/* Called with server refcount increased, must decrease before returning */ > > > static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > > > { > > > VuBlkReq *req = opaque; > > > @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) > > > } > > > > > > vu_blk_req_complete(req); > > > + vhost_user_server_unref(server); > > > return; > > > > > > err: > > > free(req); > > > + vhost_user_server_unref(server); > > > } > > > > > > static void vu_blk_process_vq(VuDev *vu_dev, int idx) > > > @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) > > > > > > Coroutine *co = > > > qemu_coroutine_create(vu_blk_virtio_process_req, req); > > > + > > > + vhost_user_server_ref(server); > > > qemu_coroutine_enter(co); > > > > Why not increment inside vu_blk_virtio_process_req()? My understanding > > is the coroutine is entered immediately so there is no race that needs > > to be protected against by incrementing the refcount early. > > You're right, as long as we know that qemu_coroutine_enter() is used to > enter the coroutine and we increase the refcount before the coroutine > yields for the first time, doing this in vu_blk_virtio_process_req is > sufficient. > > With respect to potential future code changes, it feels a little safer > to do it here like in this patch, but at the same time I have to admit > that having ref and unref in the same function is a little nicer. > > So for me there is no clear winner. If you prefer moving the ref into > the coroutine, I can post a v2. The way the code is currently written made me question whether I missed a race condition. This may be a case where defensive programming actually makes the code harder to understand. That said, it's just me and I also don't have a strong opinion because it's correct the way you wrote it. Feel free to merge this patch as it is. Stefan
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h index 121ea1dedf..cd43193b80 100644 --- a/include/qemu/vhost-user-server.h +++ b/include/qemu/vhost-user-server.h @@ -42,6 +42,8 @@ typedef struct { const VuDevIface *vu_iface; /* Protected by ctx lock */ + unsigned int refcount; + bool wait_idle; VuDev vu_dev; QIOChannel *ioc; /* The I/O channel with the client */ QIOChannelSocket *sioc; /* The underlying data channel with the client */ @@ -59,6 +61,9 @@ bool vhost_user_server_start(VuServer *server, void vhost_user_server_stop(VuServer *server); +void vhost_user_server_ref(VuServer *server); +void vhost_user_server_unref(VuServer *server); + void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx); void vhost_user_server_detach_aio_context(VuServer *server); diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 1862563336..a129204c44 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, return VIRTIO_BLK_S_IOERR; } +/* Called with server refcount increased, must decrease before returning */ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) { VuBlkReq *req = opaque; @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) } vu_blk_req_complete(req); + vhost_user_server_unref(server); return; err: free(req); + vhost_user_server_unref(server); } static void vu_blk_process_vq(VuDev *vu_dev, int idx) @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) Coroutine *co = qemu_coroutine_create(vu_blk_virtio_process_req, req); + + vhost_user_server_ref(server); qemu_coroutine_enter(co); } } diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c index f68287e811..f66fbba710 100644 --- a/util/vhost-user-server.c +++ b/util/vhost-user-server.c @@ -74,6 +74,20 @@ static void panic_cb(VuDev *vu_dev, const char *buf) error_report("vu_panic: %s", buf); } +void vhost_user_server_ref(VuServer *server) +{ + assert(!server->wait_idle); + server->refcount++; +} + +void vhost_user_server_unref(VuServer *server) +{ + server->refcount--; + if (server->wait_idle && !server->refcount) { + aio_co_wake(server->co_trip); + } +} + static bool coroutine_fn vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) { @@ -177,6 +191,14 @@ static coroutine_fn void vu_client_trip(void *opaque) /* Keep running */ } + if (server->refcount) { + /* Wait for requests to complete before we can unmap the memory */ + server->wait_idle = true; + qemu_coroutine_yield(); + server->wait_idle = false; + } + assert(server->refcount == 0); + vu_deinit(vu_dev); /* vu_deinit() should have called remove_watch() */
The vhost-user-blk export runs requests asynchronously in their own coroutine. When the vhost connection goes away and we want to stop the vhost-user server, we need to wait for these coroutines to stop before we can unmap the shared memory. Otherwise, they would still access the unmapped memory and crash. This introduces a refcount to VuServer which is increased when spawning a new request coroutine and decreased before the coroutine exits. The memory is only unmapped when the refcount reaches zero. Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/qemu/vhost-user-server.h | 5 +++++ block/export/vhost-user-blk-server.c | 5 +++++ util/vhost-user-server.c | 22 ++++++++++++++++++++++ 3 files changed, 32 insertions(+)