diff mbox series

[v2,2/6] vhost-user-blk: Don't reconnect during initialisation

Message ID 20210429171316.162022-3-kwolf@redhat.com
State New
Headers show
Series vhost-user-blk: Error handling fixes during initialistion | expand

Commit Message

Kevin Wolf April 29, 2021, 5:13 p.m. UTC
This is a partial revert of commits 77542d43149 and bc79c87bcde.

Usually, an error during initialisation means that the configuration was
wrong. Reconnecting won't make the error go away, but just turn the
error condition into an endless loop. Avoid this and return errors
again.

Additionally, calling vhost_user_blk_disconnect() from the chardev event
handler could result in use-after-free because none of the
initialisation code expects that the device could just go away in the
middle. So removing the call fixes crashes in several places.

For example, using a num-queues setting that is incompatible with the
backend would result in a crash like this (dereferencing dev->opaque,
which is already NULL):

 #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
 #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
 #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
 #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
 #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
 #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
 #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
 #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
 #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
 #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/vhost-user-blk.c | 59 +++++++++++----------------------------
 1 file changed, 17 insertions(+), 42 deletions(-)

Comments

Raphael Norwitz May 3, 2021, 5:01 p.m. UTC | #1
So we're not going with the suggestion to retry once or a fixed number
of times? Any reason why not?

On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/vhost-user-blk.c | 59 +++++++++++----------------------------
>  1 file changed, 17 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 7c85248a7b..c0b9958da1 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>      vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>      DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>      vhost_user_blk_disconnect(dev);
> -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> +                             NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>      DeviceState *dev = opaque;
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>          }
>          break;
>      case CHR_EVENT_CLOSED:
> -        /*
> -         * Closing the connection should happen differently on device
> -         * initialization and operation stages.
> -         * On initalization, we want to re-start vhost_dev initialization
> -         * from the very beginning right away when the connection is closed,
> -         * so we clean up vhost_dev on each connection closing.
> -         * On operation, we want to postpone vhost_dev cleanup to let the
> -         * other code perform its own cleanup sequence using vhost_dev data
> -         * (e.g. vhost_dev_set_log).
> -         */
> -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>              /*
>               * A close event may happen during a read/write, but vhost
>               * code assumes the vhost_dev remains setup, so delay the
> @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>               * knowing its type (in this case vhost-user).
>               */
>              s->dev.started = false;
> -        } else {
> -            vhost_user_blk_disconnect(dev);
>          }
>          break;
>      case CHR_EVENT_BREAK:
> @@ -489,33 +465,32 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
>      s->connected = false;
>  
> -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> -                             NULL, true);
> -
> -reconnect:
>      if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) {
>          goto virtio_err;
>      }
>  
> -    /* check whether vhost_user_blk_connect() failed or not */
> -    if (!s->connected) {
> -        goto reconnect;
> +    if (vhost_user_blk_connect(dev) < 0) {
> +        error_setg(errp, "vhost-user-blk: could not connect");
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        goto virtio_err;
>      }
> +    assert(s->connected);
>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>                                 sizeof(struct virtio_blk_config));
>      if (ret < 0) {
> -        error_report("vhost-user-blk: get block config failed");
> -        goto reconnect;
> +        error_setg(errp, "vhost-user-blk: get block config failed");
> +        goto vhost_err;
>      }
>  
> -    /* we're fully initialized, now we can operate, so change the handler */
> +    /* we're fully initialized, now we can operate, so add the handler */
>      qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> +                             vhost_user_blk_event, NULL, (void *)dev,
>                               NULL, true);
>      return;
>  
> +vhost_err:
> +    vhost_dev_cleanup(&s->dev);
>  virtio_err:
>      g_free(s->vhost_vqs);
>      s->vhost_vqs = NULL;
> -- 
> 2.30.2
>
Michael S. Tsirkin May 4, 2021, 8:59 a.m. UTC | #2
On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.

So there are several possible reasons for an error:

1. remote restarted - we would like to reconnect,
   this was the original use-case for reconnect.

   I am not very happy that we are killing this usecase.

2. qemu detected an error and closed the connection
   looks like we try to handle that by reconnect,
   this is something we should address.
3. remote failed due to a bad command from qemu.
   this usecase isn't well supported at the moment.

   How about supporting it on the remote side? I think
   that if the data is well-formed just has a configuration
   remote can not support then instead of closing the connection, remote can wait
   for commands with need_reply set, and respond with
   an error. Or at least do it
   if VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated.
   If VHOST_USER_SET_VRING_ERR is used then signalling that
   fd might also be reasonable.

   OTOH if qemu is buggy and sends malformed data and remote detects that then
   hacing qemu retry forever is ok, might actually be benefitial for
   debugging.



> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660

Right. So that's definitely something to fix.

> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/vhost-user-blk.c | 59 +++++++++++----------------------------
>  1 file changed, 17 insertions(+), 42 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 7c85248a7b..c0b9958da1 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>      vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>      DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>      vhost_user_blk_disconnect(dev);
> -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> +                             NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>      DeviceState *dev = opaque;
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>          }
>          break;
>      case CHR_EVENT_CLOSED:
> -        /*
> -         * Closing the connection should happen differently on device
> -         * initialization and operation stages.
> -         * On initalization, we want to re-start vhost_dev initialization
> -         * from the very beginning right away when the connection is closed,
> -         * so we clean up vhost_dev on each connection closing.
> -         * On operation, we want to postpone vhost_dev cleanup to let the
> -         * other code perform its own cleanup sequence using vhost_dev data
> -         * (e.g. vhost_dev_set_log).
> -         */
> -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>              /*
>               * A close event may happen during a read/write, but vhost
>               * code assumes the vhost_dev remains setup, so delay the
> @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>               * knowing its type (in this case vhost-user).
>               */
>              s->dev.started = false;
> -        } else {
> -            vhost_user_blk_disconnect(dev);
>          }
>          break;
>      case CHR_EVENT_BREAK:
> @@ -489,33 +465,32 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
>      s->connected = false;
>  
> -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> -                             NULL, true);
> -
> -reconnect:
>      if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) {
>          goto virtio_err;
>      }
>  
> -    /* check whether vhost_user_blk_connect() failed or not */
> -    if (!s->connected) {
> -        goto reconnect;
> +    if (vhost_user_blk_connect(dev) < 0) {
> +        error_setg(errp, "vhost-user-blk: could not connect");
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        goto virtio_err;
>      }
> +    assert(s->connected);
>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>                                 sizeof(struct virtio_blk_config));
>      if (ret < 0) {
> -        error_report("vhost-user-blk: get block config failed");
> -        goto reconnect;
> +        error_setg(errp, "vhost-user-blk: get block config failed");
> +        goto vhost_err;
>      }
>  
> -    /* we're fully initialized, now we can operate, so change the handler */
> +    /* we're fully initialized, now we can operate, so add the handler */
>      qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> +                             vhost_user_blk_event, NULL, (void *)dev,
>                               NULL, true);
>      return;
>  
> +vhost_err:
> +    vhost_dev_cleanup(&s->dev);
>  virtio_err:
>      g_free(s->vhost_vqs);
>      s->vhost_vqs = NULL;
> -- 
> 2.30.2
Kevin Wolf May 4, 2021, 9:10 a.m. UTC | #3
Am 03.05.2021 um 19:01 hat Raphael Norwitz geschrieben:
> So we're not going with the suggestion to retry once or a fixed number
> of times? Any reason why not?

I thought we agreed that we'd add reconnection back in a follow-up
series that also addresses the different kinds of errors and retries
only when it makes sense?

Kevin

> On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > 
> > Usually, an error during initialisation means that the configuration was
> > wrong. Reconnecting won't make the error go away, but just turn the
> > error condition into an endless loop. Avoid this and return errors
> > again.
> > 
> > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > handler could result in use-after-free because none of the
> > initialisation code expects that the device could just go away in the
> > middle. So removing the call fixes crashes in several places.
> > 
> > For example, using a num-queues setting that is incompatible with the
> > backend would result in a crash like this (dereferencing dev->opaque,
> > which is already NULL):
> > 
> >  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
> >  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
> >  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
> >  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
> >  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> >  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
> >  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> >  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> >  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
> >  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  hw/block/vhost-user-blk.c | 59 +++++++++++----------------------------
> >  1 file changed, 17 insertions(+), 42 deletions(-)
> > 
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index 7c85248a7b..c0b9958da1 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
> >      VHOST_INVALID_FEATURE_BIT
> >  };
> >  
> > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> > +
> >  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> >      vhost_dev_cleanup(&s->dev);
> >  }
> >  
> > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > -                                 bool realized);
> > -
> > -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> > -{
> > -    vhost_user_blk_event(opaque, event, false);
> > -}
> > -
> > -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> > -{
> > -    vhost_user_blk_event(opaque, event, true);
> > -}
> > -
> >  static void vhost_user_blk_chr_closed_bh(void *opaque)
> >  {
> >      DeviceState *dev = opaque;
> > @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >  
> >      vhost_user_blk_disconnect(dev);
> > -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> > -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> > +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> > +                             NULL, opaque, NULL, true);
> >  }
> >  
> > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > -                                 bool realized)
> > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
> >  {
> >      DeviceState *dev = opaque;
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> >          }
> >          break;
> >      case CHR_EVENT_CLOSED:
> > -        /*
> > -         * Closing the connection should happen differently on device
> > -         * initialization and operation stages.
> > -         * On initalization, we want to re-start vhost_dev initialization
> > -         * from the very beginning right away when the connection is closed,
> > -         * so we clean up vhost_dev on each connection closing.
> > -         * On operation, we want to postpone vhost_dev cleanup to let the
> > -         * other code perform its own cleanup sequence using vhost_dev data
> > -         * (e.g. vhost_dev_set_log).
> > -         */
> > -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> > +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
> >              /*
> >               * A close event may happen during a read/write, but vhost
> >               * code assumes the vhost_dev remains setup, so delay the
> > @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> >               * knowing its type (in this case vhost-user).
> >               */
> >              s->dev.started = false;
> > -        } else {
> > -            vhost_user_blk_disconnect(dev);
> >          }
> >          break;
> >      case CHR_EVENT_BREAK:
> > @@ -489,33 +465,32 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
> >      s->connected = false;
> >  
> > -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> > -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> > -                             NULL, true);
> > -
> > -reconnect:
> >      if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) {
> >          goto virtio_err;
> >      }
> >  
> > -    /* check whether vhost_user_blk_connect() failed or not */
> > -    if (!s->connected) {
> > -        goto reconnect;
> > +    if (vhost_user_blk_connect(dev) < 0) {
> > +        error_setg(errp, "vhost-user-blk: could not connect");
> > +        qemu_chr_fe_disconnect(&s->chardev);
> > +        goto virtio_err;
> >      }
> > +    assert(s->connected);
> >  
> >      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
> >                                 sizeof(struct virtio_blk_config));
> >      if (ret < 0) {
> > -        error_report("vhost-user-blk: get block config failed");
> > -        goto reconnect;
> > +        error_setg(errp, "vhost-user-blk: get block config failed");
> > +        goto vhost_err;
> >      }
> >  
> > -    /* we're fully initialized, now we can operate, so change the handler */
> > +    /* we're fully initialized, now we can operate, so add the handler */
> >      qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> > -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> > +                             vhost_user_blk_event, NULL, (void *)dev,
> >                               NULL, true);
> >      return;
> >  
> > +vhost_err:
> > +    vhost_dev_cleanup(&s->dev);
> >  virtio_err:
> >      g_free(s->vhost_vqs);
> >      s->vhost_vqs = NULL;
> > -- 
> > 2.30.2
> >
>
Kevin Wolf May 4, 2021, 9:27 a.m. UTC | #4
Am 04.05.2021 um 10:59 hat Michael S. Tsirkin geschrieben:
> On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > 
> > Usually, an error during initialisation means that the configuration was
> > wrong. Reconnecting won't make the error go away, but just turn the
> > error condition into an endless loop. Avoid this and return errors
> > again.
> 
> So there are several possible reasons for an error:
> 
> 1. remote restarted - we would like to reconnect,
>    this was the original use-case for reconnect.
> 
>    I am not very happy that we are killing this usecase.

This patch is killing it only during initialisation, where it's quite
unlikely compared to other cases and where the current implementation is
rather broken. So reverting the broken feature and going back to a
simpler correct state feels like a good idea to me.

The idea is to add the "retry during initialisation" feature back on top
of this, but it requires some more changes in the error paths so that we
can actually distinguish different kinds of errors and don't retry when
we already know that it can't succeed.

> 2. qemu detected an error and closed the connection
>    looks like we try to handle that by reconnect,
>    this is something we should address.

Yes, if qemu produces the error locally, retrying is useless.

> 3. remote failed due to a bad command from qemu.
>    this usecase isn't well supported at the moment.
> 
>    How about supporting it on the remote side? I think that if the
>    data is well-formed just has a configuration remote can not support
>    then instead of closing the connection, remote can wait for
>    commands with need_reply set, and respond with an error. Or at
>    least do it if VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated.
>    If VHOST_USER_SET_VRING_ERR is used then signalling that fd might
>    also be reasonable.
> 
>    OTOH if qemu is buggy and sends malformed data and remote detects
>    that then hacing qemu retry forever is ok, might actually be
>    benefitial for debugging.

I haven't really checked this case yet, it seems to be less common.
Explicitly communicating an error is certainly better than just cutting
the connection. But as you say, it means QEMU is buggy, so blindly
retrying in this case is kind of acceptable.

Raphael suggested that we could limit the number of retries during
initialisation so that it wouldn't result in a hang at least.

> > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > handler could result in use-after-free because none of the
> > initialisation code expects that the device could just go away in the
> > middle. So removing the call fixes crashes in several places.
> > For example, using a num-queues setting that is incompatible with the
> > backend would result in a crash like this (dereferencing dev->opaque,
> > which is already NULL):
> > 
> >  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
> >  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
> >  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
> >  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
> >  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> >  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
> >  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> >  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> >  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
> >  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> 
> Right. So that's definitely something to fix.
> 
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>

Kevin
Michael S. Tsirkin May 4, 2021, 9:44 a.m. UTC | #5
On Tue, May 04, 2021 at 11:27:12AM +0200, Kevin Wolf wrote:
> Am 04.05.2021 um 10:59 hat Michael S. Tsirkin geschrieben:
> > On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> > > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > > 
> > > Usually, an error during initialisation means that the configuration was
> > > wrong. Reconnecting won't make the error go away, but just turn the
> > > error condition into an endless loop. Avoid this and return errors
> > > again.
> > 
> > So there are several possible reasons for an error:
> > 
> > 1. remote restarted - we would like to reconnect,
> >    this was the original use-case for reconnect.
> > 
> >    I am not very happy that we are killing this usecase.
> 
> This patch is killing it only during initialisation, where it's quite
> unlikely compared to other cases and where the current implementation is
> rather broken. So reverting the broken feature and going back to a
> simpler correct state feels like a good idea to me.
> 
> The idea is to add the "retry during initialisation" feature back on top
> of this, but it requires some more changes in the error paths so that we
> can actually distinguish different kinds of errors and don't retry when
> we already know that it can't succeed.

Okay ... let's make all this explicit in the commit log though, ok?

> > 2. qemu detected an error and closed the connection
> >    looks like we try to handle that by reconnect,
> >    this is something we should address.
> 
> Yes, if qemu produces the error locally, retrying is useless.
> 
> > 3. remote failed due to a bad command from qemu.
> >    this usecase isn't well supported at the moment.
> > 
> >    How about supporting it on the remote side? I think that if the
> >    data is well-formed just has a configuration remote can not support
> >    then instead of closing the connection, remote can wait for
> >    commands with need_reply set, and respond with an error. Or at
> >    least do it if VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated.
> >    If VHOST_USER_SET_VRING_ERR is used then signalling that fd might
> >    also be reasonable.
> > 
> >    OTOH if qemu is buggy and sends malformed data and remote detects
> >    that then hacing qemu retry forever is ok, might actually be
> >    benefitial for debugging.
> 
> I haven't really checked this case yet, it seems to be less common.
> Explicitly communicating an error is certainly better than just cutting
> the connection. But as you say, it means QEMU is buggy, so blindly
> retrying in this case is kind of acceptable.
> 
> Raphael suggested that we could limit the number of retries during
> initialisation so that it wouldn't result in a hang at least.

not sure how do I feel about random limits ... how would we
set the limit?


> > > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > > handler could result in use-after-free because none of the
> > > initialisation code expects that the device could just go away in the
> > > middle. So removing the call fixes crashes in several places.
> > > For example, using a num-queues setting that is incompatible with the
> > > backend would result in a crash like this (dereferencing dev->opaque,
> > > which is already NULL):
> > > 
> > >  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
> > >  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
> > >  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
> > >  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
> > >  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> > >  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
> > >  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> > >  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> > >  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
> > >  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> > 
> > Right. So that's definitely something to fix.
> > 
> > > 
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> 
> Kevin
Kevin Wolf May 4, 2021, 10:57 a.m. UTC | #6
Am 04.05.2021 um 11:44 hat Michael S. Tsirkin geschrieben:
> On Tue, May 04, 2021 at 11:27:12AM +0200, Kevin Wolf wrote:
> > Am 04.05.2021 um 10:59 hat Michael S. Tsirkin geschrieben:
> > > On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> > > > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > > > 
> > > > Usually, an error during initialisation means that the configuration was
> > > > wrong. Reconnecting won't make the error go away, but just turn the
> > > > error condition into an endless loop. Avoid this and return errors
> > > > again.
> > > 
> > > So there are several possible reasons for an error:
> > > 
> > > 1. remote restarted - we would like to reconnect,
> > >    this was the original use-case for reconnect.
> > > 
> > >    I am not very happy that we are killing this usecase.
> > 
> > This patch is killing it only during initialisation, where it's quite
> > unlikely compared to other cases and where the current implementation is
> > rather broken. So reverting the broken feature and going back to a
> > simpler correct state feels like a good idea to me.
> > 
> > The idea is to add the "retry during initialisation" feature back on top
> > of this, but it requires some more changes in the error paths so that we
> > can actually distinguish different kinds of errors and don't retry when
> > we already know that it can't succeed.
> 
> Okay ... let's make all this explicit in the commit log though, ok?

That's fair, I'll add a paragraph addressing this case when merging the
series, like this:

    Note that this removes the ability to reconnect during
    initialisation (but not during operation) when there is no permanent
    error, but the backend restarts, as the implementation was buggy.
    This feature can be added back in a follow-up series after changing
    error paths to distinguish cases where retrying could help from
    cases with permanent errors.

> > > 2. qemu detected an error and closed the connection
> > >    looks like we try to handle that by reconnect,
> > >    this is something we should address.
> > 
> > Yes, if qemu produces the error locally, retrying is useless.
> > 
> > > 3. remote failed due to a bad command from qemu.
> > >    this usecase isn't well supported at the moment.
> > > 
> > >    How about supporting it on the remote side? I think that if the
> > >    data is well-formed just has a configuration remote can not support
> > >    then instead of closing the connection, remote can wait for
> > >    commands with need_reply set, and respond with an error. Or at
> > >    least do it if VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated.
> > >    If VHOST_USER_SET_VRING_ERR is used then signalling that fd might
> > >    also be reasonable.
> > > 
> > >    OTOH if qemu is buggy and sends malformed data and remote detects
> > >    that then hacing qemu retry forever is ok, might actually be
> > >    benefitial for debugging.
> > 
> > I haven't really checked this case yet, it seems to be less common.
> > Explicitly communicating an error is certainly better than just cutting
> > the connection. But as you say, it means QEMU is buggy, so blindly
> > retrying in this case is kind of acceptable.
> > 
> > Raphael suggested that we could limit the number of retries during
> > initialisation so that it wouldn't result in a hang at least.
> 
> not sure how do I feel about random limits ... how would we set the
> limit?

To be honest, probably even 1 would already be good enough in practice.
Make it 5 or something and you definitely cover any realistic case when
there is no bug involved.

Even hitting this case once requires bad luck with the timing, so that
the restart of the backend coincides with already having connected to
the socket, but not completed the configuration yet, which is a really
short window. Having the backend drop the connection again in the same
short window on the second attempt is an almost sure sign of a bug with
one of the operations done during initialisation.

Even if this corner case turned out to be a bit less unlikely to happen
than I'm thinking (which is, it won't happen at all), randomly failing a
device-add once in a while still feels a lot better than hanging the VM
once in a while.

Kevin
Michael S. Tsirkin May 4, 2021, 11:08 a.m. UTC | #7
On Tue, May 04, 2021 at 12:57:29PM +0200, Kevin Wolf wrote:
> Am 04.05.2021 um 11:44 hat Michael S. Tsirkin geschrieben:
> > On Tue, May 04, 2021 at 11:27:12AM +0200, Kevin Wolf wrote:
> > > Am 04.05.2021 um 10:59 hat Michael S. Tsirkin geschrieben:
> > > > On Thu, Apr 29, 2021 at 07:13:12PM +0200, Kevin Wolf wrote:
> > > > > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > > > > 
> > > > > Usually, an error during initialisation means that the configuration was
> > > > > wrong. Reconnecting won't make the error go away, but just turn the
> > > > > error condition into an endless loop. Avoid this and return errors
> > > > > again.
> > > > 
> > > > So there are several possible reasons for an error:
> > > > 
> > > > 1. remote restarted - we would like to reconnect,
> > > >    this was the original use-case for reconnect.
> > > > 
> > > >    I am not very happy that we are killing this usecase.
> > > 
> > > This patch is killing it only during initialisation, where it's quite
> > > unlikely compared to other cases and where the current implementation is
> > > rather broken. So reverting the broken feature and going back to a
> > > simpler correct state feels like a good idea to me.
> > > 
> > > The idea is to add the "retry during initialisation" feature back on top
> > > of this, but it requires some more changes in the error paths so that we
> > > can actually distinguish different kinds of errors and don't retry when
> > > we already know that it can't succeed.
> > 
> > Okay ... let's make all this explicit in the commit log though, ok?
> 
> That's fair, I'll add a paragraph addressing this case when merging the
> series, like this:
> 
>     Note that this removes the ability to reconnect during
>     initialisation (but not during operation) when there is no permanent
>     error, but the backend restarts, as the implementation was buggy.
>     This feature can be added back in a follow-up series after changing
>     error paths to distinguish cases where retrying could help from
>     cases with permanent errors.
> 
> > > > 2. qemu detected an error and closed the connection
> > > >    looks like we try to handle that by reconnect,
> > > >    this is something we should address.
> > > 
> > > Yes, if qemu produces the error locally, retrying is useless.
> > > 
> > > > 3. remote failed due to a bad command from qemu.
> > > >    this usecase isn't well supported at the moment.
> > > > 
> > > >    How about supporting it on the remote side? I think that if the
> > > >    data is well-formed just has a configuration remote can not support
> > > >    then instead of closing the connection, remote can wait for
> > > >    commands with need_reply set, and respond with an error. Or at
> > > >    least do it if VHOST_USER_PROTOCOL_F_REPLY_ACK has been negotiated.
> > > >    If VHOST_USER_SET_VRING_ERR is used then signalling that fd might
> > > >    also be reasonable.
> > > > 
> > > >    OTOH if qemu is buggy and sends malformed data and remote detects
> > > >    that then hacing qemu retry forever is ok, might actually be
> > > >    benefitial for debugging.
> > > 
> > > I haven't really checked this case yet, it seems to be less common.
> > > Explicitly communicating an error is certainly better than just cutting
> > > the connection. But as you say, it means QEMU is buggy, so blindly
> > > retrying in this case is kind of acceptable.
> > > 
> > > Raphael suggested that we could limit the number of retries during
> > > initialisation so that it wouldn't result in a hang at least.
> > 
> > not sure how do I feel about random limits ... how would we set the
> > limit?
> 
> To be honest, probably even 1 would already be good enough in practice.
> Make it 5 or something and you definitely cover any realistic case when
> there is no bug involved.
> 
> Even hitting this case once requires bad luck with the timing, so that
> the restart of the backend coincides with already having connected to
> the socket, but not completed the configuration yet, which is a really
> short window. Having the backend drop the connection again in the same
> short window on the second attempt is an almost sure sign of a bug with
> one of the operations done during initialisation.
> 
> Even if this corner case turned out to be a bit less unlikely to happen
> than I'm thinking (which is, it won't happen at all), randomly failing a
> device-add once in a while still feels a lot better than hanging the VM
> once in a while.
> 
> Kevin

Well if backend is e.g. just stuck and connection does not close, then
VM hangs anyway. So IMHO it's not such a big deal.  If we really want to
address this we should handle all this asynchronously. As in make
device-add succeed and then progress in stages but do not block the
monitor. That would be nice but it's a big change in the code.
diff mbox series

Patch

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 7c85248a7b..c0b9958da1 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -50,6 +50,8 @@  static const int user_feature_bits[] = {
     VHOST_INVALID_FEATURE_BIT
 };
 
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -362,19 +364,6 @@  static void vhost_user_blk_disconnect(DeviceState *dev)
     vhost_dev_cleanup(&s->dev);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, true);
-}
-
 static void vhost_user_blk_chr_closed_bh(void *opaque)
 {
     DeviceState *dev = opaque;
@@ -382,12 +371,11 @@  static void vhost_user_blk_chr_closed_bh(void *opaque)
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
     vhost_user_blk_disconnect(dev);
-    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+                             NULL, opaque, NULL, true);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
 {
     DeviceState *dev = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -401,17 +389,7 @@  static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
         }
         break;
     case CHR_EVENT_CLOSED:
-        /*
-         * Closing the connection should happen differently on device
-         * initialization and operation stages.
-         * On initalization, we want to re-start vhost_dev initialization
-         * from the very beginning right away when the connection is closed,
-         * so we clean up vhost_dev on each connection closing.
-         * On operation, we want to postpone vhost_dev cleanup to let the
-         * other code perform its own cleanup sequence using vhost_dev data
-         * (e.g. vhost_dev_set_log).
-         */
-        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
             /*
              * A close event may happen during a read/write, but vhost
              * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +409,6 @@  static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
              * knowing its type (in this case vhost-user).
              */
             s->dev.started = false;
-        } else {
-            vhost_user_blk_disconnect(dev);
         }
         break;
     case CHR_EVENT_BREAK:
@@ -489,33 +465,32 @@  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->connected = false;
 
-    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_realize, NULL, (void *)dev,
-                             NULL, true);
-
-reconnect:
     if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) {
         goto virtio_err;
     }
 
-    /* check whether vhost_user_blk_connect() failed or not */
-    if (!s->connected) {
-        goto reconnect;
+    if (vhost_user_blk_connect(dev) < 0) {
+        error_setg(errp, "vhost-user-blk: could not connect");
+        qemu_chr_fe_disconnect(&s->chardev);
+        goto virtio_err;
     }
+    assert(s->connected);
 
     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
                                sizeof(struct virtio_blk_config));
     if (ret < 0) {
-        error_report("vhost-user-blk: get block config failed");
-        goto reconnect;
+        error_setg(errp, "vhost-user-blk: get block config failed");
+        goto vhost_err;
     }
 
-    /* we're fully initialized, now we can operate, so change the handler */
+    /* we're fully initialized, now we can operate, so add the handler */
     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_oper, NULL, (void *)dev,
+                             vhost_user_blk_event, NULL, (void *)dev,
                              NULL, true);
     return;
 
+vhost_err:
+    vhost_dev_cleanup(&s->dev);
 virtio_err:
     g_free(s->vhost_vqs);
     s->vhost_vqs = NULL;