Message ID | 20240516025753.130171-3-fengli@smartx.com |
---|---|
State | New |
Headers | show |
Series | [v4,1/2] Revert "vhost-user: fix lost reconnect" | expand |
On Wed, May 15, 2024 at 10:58 PM Li Feng <fengli@smartx.com> wrote: > > When the vhost-user is reconnecting to the backend, and if the vhost-user fails > at the get_features in vhost_dev_init(), then the reconnect will fail > and it will not be retriggered forever. > > The reason is: > When the vhost-user fail at get_features, the vhost_dev_cleanup will be called > immediately. > > vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'. > > The reconnect path is: > vhost_user_blk_event > vhost_user_async_close(.. vhost_user_blk_disconnect ..) > qemu_chr_fe_set_handlers <----- clear the notifier callback > schedule vhost_user_async_close_bh > > The vhost->vdev is null, so the vhost_user_blk_disconnect will not be > called, then the event fd callback will not be reinstalled. > > We need to ensure that even if vhost_dev_init initialization fails, the event > handler still needs to be reinstalled when s->connected is false. > > All vhost-user devices have this issue, including vhost-user-blk/scsi. > > Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling") > Reviewed-by: Raphael Norwitz <raphael@enfabrica.net> > Signed-off-by: Li Feng <fengli@smartx.com> > --- > hw/block/vhost-user-blk.c | 3 ++- > hw/scsi/vhost-user-scsi.c | 3 ++- > hw/virtio/vhost-user-base.c | 3 ++- > hw/virtio/vhost-user.c | 10 +--------- > 4 files changed, 7 insertions(+), 12 deletions(-) > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > index 41d1ac3a5a..c6842ced48 100644 > --- a/hw/block/vhost-user-blk.c > +++ b/hw/block/vhost-user-blk.c > @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) > VHostUserBlk *s = VHOST_USER_BLK(vdev); > > if (!s->connected) { > - return; > + goto done; > } > s->connected = false; > > @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) > > vhost_dev_cleanup(&s->dev); > > +done: > /* Re-instate the event handler for new connections */ > qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, > NULL, dev, NULL, true); > diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c > index 48a59e020e..b49a11d23b 100644 > --- a/hw/scsi/vhost-user-scsi.c > +++ b/hw/scsi/vhost-user-scsi.c > @@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) > VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); > > if (!s->connected) { > - return; > + goto done; > } > s->connected = false; > > @@ -189,6 +189,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) > > vhost_dev_cleanup(&vsc->dev); > > +done: > /* Re-instate the event handler for new connections */ > qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, > vhost_user_scsi_event, NULL, dev, NULL, true); > diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c > index 4b54255682..11e72b1e3b 100644 > --- a/hw/virtio/vhost-user-base.c > +++ b/hw/virtio/vhost-user-base.c > @@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev) > VHostUserBase *vub = VHOST_USER_BASE(vdev); > > if (!vub->connected) { > - return; > + goto done; > } > vub->connected = false; > > vub_stop(vdev); > vhost_dev_cleanup(&vub->vhost_dev); > > +done: > /* Re-instate the event handler for new connections */ > qemu_chr_fe_set_handlers(&vub->chardev, > NULL, NULL, vub_event, > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > index c929097e87..c407ea8939 100644 > --- a/hw/virtio/vhost-user.c > +++ b/hw/virtio/vhost-user.c > @@ -2781,16 +2781,8 @@ typedef struct { > static void vhost_user_async_close_bh(void *opaque) > { > VhostAsyncCallback *data = opaque; > - struct vhost_dev *vhost = data->vhost; > > - /* > - * If the vhost_dev has been cleared in the meantime there is > - * nothing left to do as some other path has completed the > - * cleanup. > - */ > - if (vhost->vdev) { > - data->cb(data->dev); > - } > + data->cb(data->dev); > > g_free(data); > } > -- > 2.45.0 >
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 41d1ac3a5a..c6842ced48 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -353,7 +353,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) VHostUserBlk *s = VHOST_USER_BLK(vdev); if (!s->connected) { - return; + goto done; } s->connected = false; @@ -361,6 +361,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, NULL, dev, NULL, true); diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 48a59e020e..b49a11d23b 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -181,7 +181,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); if (!s->connected) { - return; + goto done; } s->connected = false; @@ -189,6 +189,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev) vhost_dev_cleanup(&vsc->dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, vhost_user_scsi_event, NULL, dev, NULL, true); diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 4b54255682..11e72b1e3b 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev) VHostUserBase *vub = VHOST_USER_BASE(vdev); if (!vub->connected) { - return; + goto done; } vub->connected = false; vub_stop(vdev); vhost_dev_cleanup(&vub->vhost_dev); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c929097e87..c407ea8939 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2781,16 +2781,8 @@ typedef struct { static void vhost_user_async_close_bh(void *opaque) { VhostAsyncCallback *data = opaque; - struct vhost_dev *vhost = data->vhost; - /* - * If the vhost_dev has been cleared in the meantime there is - * nothing left to do as some other path has completed the - * cleanup. - */ - if (vhost->vdev) { - data->cb(data->dev); - } + data->cb(data->dev); g_free(data); }
When the vhost-user is reconnecting to the backend, and if the vhost-user fails at the get_features in vhost_dev_init(), then the reconnect will fail and it will not be retriggered forever. The reason is: When the vhost-user fail at get_features, the vhost_dev_cleanup will be called immediately. vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'. The reconnect path is: vhost_user_blk_event vhost_user_async_close(.. vhost_user_blk_disconnect ..) qemu_chr_fe_set_handlers <----- clear the notifier callback schedule vhost_user_async_close_bh The vhost->vdev is null, so the vhost_user_blk_disconnect will not be called, then the event fd callback will not be reinstalled. We need to ensure that even if vhost_dev_init initialization fails, the event handler still needs to be reinstalled when s->connected is false. All vhost-user devices have this issue, including vhost-user-blk/scsi. Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling") Signed-off-by: Li Feng <fengli@smartx.com> --- hw/block/vhost-user-blk.c | 3 ++- hw/scsi/vhost-user-scsi.c | 3 ++- hw/virtio/vhost-user-base.c | 3 ++- hw/virtio/vhost-user.c | 10 +--------- 4 files changed, 7 insertions(+), 12 deletions(-)