diff mbox series

[1/5] vhost-user-blk: Don't reconnect during initialisation

Message ID 20210422170221.285006-2-kwolf@redhat.com
State New
Headers show
Series vhost-user-blk: Error handling fixes during initialistion | expand

Commit Message

Kevin Wolf April 22, 2021, 5:02 p.m. UTC
This is a partial revert of commits 77542d43149 and bc79c87bcde.

Usually, an error during initialisation means that the configuration was
wrong. Reconnecting won't make the error go away, but just turn the
error condition into an endless loop. Avoid this and return errors
again.

Additionally, calling vhost_user_blk_disconnect() from the chardev event
handler could result in use-after-free because none of the
initialisation code expects that the device could just go away in the
middle. So removing the call fixes crashes in several places.

For example, using a num-queues setting that is incompatible with the
backend would result in a crash like this (dereferencing dev->opaque,
which is already NULL):

 #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
 #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
 #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
 #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
 #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
 #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
 #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
 #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
 #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
 #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
 1 file changed, 13 insertions(+), 41 deletions(-)

Comments

Denis Plotnikov April 23, 2021, 7:17 a.m. UTC | #1
Reviewed-by: Denis Plotnikov <den-plotnikov@yandex-team.ru>

On 22.04.2021 20:02, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
>
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
>
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
>
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
>
>   #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>   #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>   #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>   #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>   #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>   #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>   #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>   #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>   #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>   #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>   hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
>   1 file changed, 13 insertions(+), 41 deletions(-)
>
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f5e9682703..e824b0a759 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>       VHOST_INVALID_FEATURE_BIT
>   };
>   
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>   static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>   {
>       VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>       vhost_dev_cleanup(&s->dev);
>   }
>   
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, true);
> -}
> -
>   static void vhost_user_blk_chr_closed_bh(void *opaque)
>   {
>       DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>       VHostUserBlk *s = VHOST_USER_BLK(vdev);
>   
>       vhost_user_blk_disconnect(dev);
> -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> +                             NULL, opaque, NULL, true);
>   }
>   
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>   {
>       DeviceState *dev = opaque;
>       VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>           }
>           break;
>       case CHR_EVENT_CLOSED:
> -        /*
> -         * Closing the connection should happen differently on device
> -         * initialization and operation stages.
> -         * On initalization, we want to re-start vhost_dev initialization
> -         * from the very beginning right away when the connection is closed,
> -         * so we clean up vhost_dev on each connection closing.
> -         * On operation, we want to postpone vhost_dev cleanup to let the
> -         * other code perform its own cleanup sequence using vhost_dev data
> -         * (e.g. vhost_dev_set_log).
> -         */
> -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>               /*
>                * A close event may happen during a read/write, but vhost
>                * code assumes the vhost_dev remains setup, so delay the
> @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>                * knowing its type (in this case vhost-user).
>                */
>               s->dev.started = false;
> -        } else {
> -            vhost_user_blk_disconnect(dev);
>           }
>           break;
>       case CHR_EVENT_BREAK:
> @@ -490,31 +466,27 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>       s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
>       s->connected = false;
>   
> -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> -                             NULL, true);
> -
> -reconnect:
>       if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
>           error_report_err(err);
>           goto virtio_err;
>       }
>   
> -    /* check whether vhost_user_blk_connect() failed or not */
> -    if (!s->connected) {
> -        goto reconnect;
> +    if (vhost_user_blk_connect(dev) < 0) {
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        goto virtio_err;
>       }
> +    assert(s->connected);
>   
>       ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>                                  sizeof(struct virtio_blk_config));
>       if (ret < 0) {
>           error_report("vhost-user-blk: get block config failed");
> -        goto reconnect;
> +        goto virtio_err;
>       }
>   
> -    /* we're fully initialized, now we can operate, so change the handler */
> +    /* we're fully initialized, now we can operate, so add the handler */
>       qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> +                             vhost_user_blk_event, NULL, (void *)dev,
>                                NULL, true);
>       return;
>
Raphael Norwitz April 28, 2021, 4:52 p.m. UTC | #2
Given what you've shown with the use-after-free, I agree the changes
need to be reverted. I'm a little uneasy about removing the reconnect
logic from the device realization completely though.

On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 

Is that nessesarily true? As I understand it the main usecases for
device reconnect are to allow a device backend to be restarted after a
failure or to allow the backend to be upgraded without restarting the
guest. I agree - misconfiguration could be a common cause of a device
backend crashing at realize time, but couldn't there be others? Maybe
transient memory pressure?

Especially in the case where one process is connecting to many different
vhost-user-blk instances, I could imagine power-ons and incoming
migrations racing with backend restarts quite frequently. Should
these cases cause failures?

We can still hit the infinite looping case you describe post-realize.
Why should we treat pre-realize differently?

> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
>  1 file changed, 13 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f5e9682703..e824b0a759 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>      vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>      DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>      vhost_user_blk_disconnect(dev);
> -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> +                             NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>      DeviceState *dev = opaque;
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>          }
>          break;
>      case CHR_EVENT_CLOSED:
> -        /*
> -         * Closing the connection should happen differently on device
> -         * initialization and operation stages.
> -         * On initalization, we want to re-start vhost_dev initialization
> -         * from the very beginning right away when the connection is closed,
> -         * so we clean up vhost_dev on each connection closing.
> -         * On operation, we want to postpone vhost_dev cleanup to let the
> -         * other code perform its own cleanup sequence using vhost_dev data
> -         * (e.g. vhost_dev_set_log).
> -         */
> -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>              /*
>               * A close event may happen during a read/write, but vhost
>               * code assumes the vhost_dev remains setup, so delay the
> @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>               * knowing its type (in this case vhost-user).
>               */
>              s->dev.started = false;
> -        } else {
> -            vhost_user_blk_disconnect(dev);
>          }
>          break;
>      case CHR_EVENT_BREAK:
> @@ -490,31 +466,27 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
>      s->connected = false;
>  
> -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> -                             NULL, true);
> -
> -reconnect:
>      if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
>          error_report_err(err);
>          goto virtio_err;
>      }
>  
> -    /* check whether vhost_user_blk_connect() failed or not */
> -    if (!s->connected) {
> -        goto reconnect;
> +    if (vhost_user_blk_connect(dev) < 0) {
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        goto virtio_err;
>      }
> +    assert(s->connected);

Maybe a good compromise here would be to retry some small number of
times (or even just once) so that cases like daemon upgrades and
recoverable crashes racing with power-ons and incoming migrations
don't result in failures? 

As a more general solution, we could have a user defined parameter to
specify a number of repeated connection failures to allow both pre and
post realize before bringing QEMU down. Thoughts?

>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>                                 sizeof(struct virtio_blk_config));
>      if (ret < 0) {
>          error_report("vhost-user-blk: get block config failed");
> -        goto reconnect;
> +        goto virtio_err;
>      }
>  
> -    /* we're fully initialized, now we can operate, so change the handler */
> +    /* we're fully initialized, now we can operate, so add the handler */
>      qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> +                             vhost_user_blk_event, NULL, (void *)dev,
>                               NULL, true);
>      return;
>  
> -- 
> 2.30.2
>
Kevin Wolf April 28, 2021, 5:31 p.m. UTC | #3
Am 28.04.2021 um 18:52 hat Raphael Norwitz geschrieben:
> Given what you've shown with the use-after-free, I agree the changes
> need to be reverted. I'm a little uneasy about removing the reconnect
> logic from the device realization completely though.
> 
> On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > 
> > Usually, an error during initialisation means that the configuration was
> > wrong. Reconnecting won't make the error go away, but just turn the
> > error condition into an endless loop. Avoid this and return errors
> > again.
> > 
> 
> Is that nessesarily true? As I understand it the main usecases for
> device reconnect are to allow a device backend to be restarted after a
> failure or to allow the backend to be upgraded without restarting the
> guest. I agree - misconfiguration could be a common cause of a device
> backend crashing at realize time, but couldn't there be others? Maybe
> transient memory pressure?
> 
> Especially in the case where one process is connecting to many different
> vhost-user-blk instances, I could imagine power-ons and incoming
> migrations racing with backend restarts quite frequently. Should
> these cases cause failures?
> 
> We can still hit the infinite looping case you describe post-realize.
> Why should we treat pre-realize differently?

I think there is one main difference between realize() and later
operation, which is that we can actually deliver an error to the user
during realize(). When we're just starting QEMU and processing the CLI
arguments, failure is very obvious, and in the context of QMP
device-add, the client is actively waiting for a result, too.

Later on, there is no good way to communicate an error (writes to stderr
just end up in some logfile at best, QAPI events can be missed), and
even if there were, we would have to do something with the guest until
the user/management application actually reacts to the error. The latter
is not a problem during realize() because the device wasn't even plugged
in yet.

So I think there are good reasons why it could make sense to distinguish
initialisation and operation.

> > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > handler could result in use-after-free because none of the
> > initialisation code expects that the device could just go away in the
> > middle. So removing the call fixes crashes in several places.
> > 
> > For example, using a num-queues setting that is incompatible with the
> > backend would result in a crash like this (dereferencing dev->opaque,
> > which is already NULL):
> > 
> >  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
> >  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
> >  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
> >  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
> >  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> >  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
> >  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> >  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> >  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
> >  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
> >  1 file changed, 13 insertions(+), 41 deletions(-)
> > 
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index f5e9682703..e824b0a759 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
> >      VHOST_INVALID_FEATURE_BIT
> >  };
> >  
> > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> > +
> >  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
> >  {
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> >      vhost_dev_cleanup(&s->dev);
> >  }
> >  
> > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > -                                 bool realized);
> > -
> > -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> > -{
> > -    vhost_user_blk_event(opaque, event, false);
> > -}
> > -
> > -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> > -{
> > -    vhost_user_blk_event(opaque, event, true);
> > -}
> > -
> >  static void vhost_user_blk_chr_closed_bh(void *opaque)
> >  {
> >      DeviceState *dev = opaque;
> > @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
> >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> >  
> >      vhost_user_blk_disconnect(dev);
> > -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> > -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> > +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> > +                             NULL, opaque, NULL, true);
> >  }
> >  
> > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > -                                 bool realized)
> > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
> >  {
> >      DeviceState *dev = opaque;
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> >          }
> >          break;
> >      case CHR_EVENT_CLOSED:
> > -        /*
> > -         * Closing the connection should happen differently on device
> > -         * initialization and operation stages.
> > -         * On initalization, we want to re-start vhost_dev initialization
> > -         * from the very beginning right away when the connection is closed,
> > -         * so we clean up vhost_dev on each connection closing.
> > -         * On operation, we want to postpone vhost_dev cleanup to let the
> > -         * other code perform its own cleanup sequence using vhost_dev data
> > -         * (e.g. vhost_dev_set_log).
> > -         */
> > -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> > +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
> >              /*
> >               * A close event may happen during a read/write, but vhost
> >               * code assumes the vhost_dev remains setup, so delay the
> > @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> >               * knowing its type (in this case vhost-user).
> >               */
> >              s->dev.started = false;
> > -        } else {
> > -            vhost_user_blk_disconnect(dev);
> >          }
> >          break;
> >      case CHR_EVENT_BREAK:
> > @@ -490,31 +466,27 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> >      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
> >      s->connected = false;
> >  
> > -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> > -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> > -                             NULL, true);
> > -
> > -reconnect:
> >      if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> >          error_report_err(err);
> >          goto virtio_err;
> >      }
> >  
> > -    /* check whether vhost_user_blk_connect() failed or not */
> > -    if (!s->connected) {
> > -        goto reconnect;
> > +    if (vhost_user_blk_connect(dev) < 0) {
> > +        qemu_chr_fe_disconnect(&s->chardev);
> > +        goto virtio_err;
> >      }
> > +    assert(s->connected);
> 
> Maybe a good compromise here would be to retry some small number of
> times (or even just once) so that cases like daemon upgrades and
> recoverable crashes racing with power-ons and incoming migrations
> don't result in failures?
> 
> As a more general solution, we could have a user defined parameter to
> specify a number of repeated connection failures to allow both pre and
> post realize before bringing QEMU down. Thoughts?

Retrying once or even a small number of times sounds reasonable enough.
At first I thought it wouldn't help because restarting the daemon might
take some time, but with qemu_chr_fe_wait_connected() we already wait
until we can successfully connect in each iteration, and we would only
retry for errors that happen afterwards.

But I think what we really want to do before retrying is distinguishing
errors that are actually related to the connection itself from errors
that relate to the content of the communication (i.e. invalid requests
or configuration).
In fact, I think the error I hit wasn't even produced on the remote
side, it came from QEMU itself. Making vhost_user_blk_connect() return
-EAGAIN in the right cases and reconnecting only there sounds much
better than just blindly retrying.

Of course, adjusting error reporting so that we can distinguish these
cases will probably touch much more places than would be appropriate for
this patch, so I'd suggest that we indeed just revert the reconnection
during realize() in this series, but then try to add some more selective
retry logic back on top of it (without using the event handler, which
only made everything more complicated in a function that waits
synchronously anyway).

Kevin
Raphael Norwitz April 28, 2021, 6:22 p.m. UTC | #4
On Wed, Apr 28, 2021 at 07:31:13PM +0200, Kevin Wolf wrote:
> Am 28.04.2021 um 18:52 hat Raphael Norwitz geschrieben:
> > Given what you've shown with the use-after-free, I agree the changes
> > need to be reverted. I'm a little uneasy about removing the reconnect
> > logic from the device realization completely though.
> > 
> > On Thu, Apr 22, 2021 at 07:02:17PM +0200, Kevin Wolf wrote:
> > > This is a partial revert of commits 77542d43149 and bc79c87bcde.
> > > 
> > > Usually, an error during initialisation means that the configuration was
> > > wrong. Reconnecting won't make the error go away, but just turn the
> > > error condition into an endless loop. Avoid this and return errors
> > > again.
> > > 
> > 
> > Is that nessesarily true? As I understand it the main usecases for
> > device reconnect are to allow a device backend to be restarted after a
> > failure or to allow the backend to be upgraded without restarting the
> > guest. I agree - misconfiguration could be a common cause of a device
> > backend crashing at realize time, but couldn't there be others? Maybe
> > transient memory pressure?
> > 
> > Especially in the case where one process is connecting to many different
> > vhost-user-blk instances, I could imagine power-ons and incoming
> > migrations racing with backend restarts quite frequently. Should
> > these cases cause failures?
> > 
> > We can still hit the infinite looping case you describe post-realize.
> > Why should we treat pre-realize differently?
> 
> I think there is one main difference between realize() and later
> operation, which is that we can actually deliver an error to the user
> during realize(). When we're just starting QEMU and processing the CLI
> arguments, failure is very obvious, and in the context of QMP
> device-add, the client is actively waiting for a result, too.
> 
> Later on, there is no good way to communicate an error (writes to stderr
> just end up in some logfile at best, QAPI events can be missed), and
> even if there were, we would have to do something with the guest until
> the user/management application actually reacts to the error. The latter
> is not a problem during realize() because the device wasn't even plugged
> in yet.
> 
> So I think there are good reasons why it could make sense to distinguish
> initialisation and operation.
>

Fair enough. I'm happy in this case, provided we remain resiliant
against one-off daemon restarts during realize.

> > > Additionally, calling vhost_user_blk_disconnect() from the chardev event
> > > handler could result in use-after-free because none of the
> > > initialisation code expects that the device could just go away in the
> > > middle. So removing the call fixes crashes in several places.
> > > 
> > > For example, using a num-queues setting that is incompatible with the
> > > backend would result in a crash like this (dereferencing dev->opaque,
> > > which is already NULL):
> > > 
> > >  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
> > >  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
> > >  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
> > >  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
> > >  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
> > >  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
> > >  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
> > >  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
> > >  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
> > >  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> > > 
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > ---
> > >  hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
> > >  1 file changed, 13 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > > index f5e9682703..e824b0a759 100644
> > > --- a/hw/block/vhost-user-blk.c
> > > +++ b/hw/block/vhost-user-blk.c
> > > @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
> > >      VHOST_INVALID_FEATURE_BIT
> > >  };
> > >  
> > > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> > > +
> > >  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
> > >  {
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > > @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
> > >      vhost_dev_cleanup(&s->dev);
> > >  }
> > >  
> > > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > > -                                 bool realized);
> > > -
> > > -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> > > -{
> > > -    vhost_user_blk_event(opaque, event, false);
> > > -}
> > > -
> > > -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> > > -{
> > > -    vhost_user_blk_event(opaque, event, true);
> > > -}
> > > -
> > >  static void vhost_user_blk_chr_closed_bh(void *opaque)
> > >  {
> > >      DeviceState *dev = opaque;
> > > @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
> > >      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > >  
> > >      vhost_user_blk_disconnect(dev);
> > > -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> > > -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> > > +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> > > +                             NULL, opaque, NULL, true);
> > >  }
> > >  
> > > -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > > -                                 bool realized)
> > > +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
> > >  {
> > >      DeviceState *dev = opaque;
> > >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > >          }
> > >          break;
> > >      case CHR_EVENT_CLOSED:
> > > -        /*
> > > -         * Closing the connection should happen differently on device
> > > -         * initialization and operation stages.
> > > -         * On initalization, we want to re-start vhost_dev initialization
> > > -         * from the very beginning right away when the connection is closed,
> > > -         * so we clean up vhost_dev on each connection closing.
> > > -         * On operation, we want to postpone vhost_dev cleanup to let the
> > > -         * other code perform its own cleanup sequence using vhost_dev data
> > > -         * (e.g. vhost_dev_set_log).
> > > -         */
> > > -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> > > +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
> > >              /*
> > >               * A close event may happen during a read/write, but vhost
> > >               * code assumes the vhost_dev remains setup, so delay the
> > > @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> > >               * knowing its type (in this case vhost-user).
> > >               */
> > >              s->dev.started = false;
> > > -        } else {
> > > -            vhost_user_blk_disconnect(dev);
> > >          }
> > >          break;
> > >      case CHR_EVENT_BREAK:
> > > @@ -490,31 +466,27 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
> > >      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
> > >      s->connected = false;
> > >  
> > > -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> > > -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> > > -                             NULL, true);
> > > -
> > > -reconnect:
> > >      if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
> > >          error_report_err(err);
> > >          goto virtio_err;
> > >      }
> > >  
> > > -    /* check whether vhost_user_blk_connect() failed or not */
> > > -    if (!s->connected) {
> > > -        goto reconnect;
> > > +    if (vhost_user_blk_connect(dev) < 0) {
> > > +        qemu_chr_fe_disconnect(&s->chardev);
> > > +        goto virtio_err;
> > >      }
> > > +    assert(s->connected);
> > 
> > Maybe a good compromise here would be to retry some small number of
> > times (or even just once) so that cases like daemon upgrades and
> > recoverable crashes racing with power-ons and incoming migrations
> > don't result in failures?
> > 
> > As a more general solution, we could have a user defined parameter to
> > specify a number of repeated connection failures to allow both pre and
> > post realize before bringing QEMU down. Thoughts?
> 
> Retrying once or even a small number of times sounds reasonable enough.
> At first I thought it wouldn't help because restarting the daemon might
> take some time, but with qemu_chr_fe_wait_connected() we already wait
> until we can successfully connect in each iteration, and we would only
> retry for errors that happen afterwards.
> 
> But I think what we really want to do before retrying is distinguishing
> errors that are actually related to the connection itself from errors
> that relate to the content of the communication (i.e. invalid requests
> or configuration).
> In fact, I think the error I hit wasn't even produced on the remote
> side, it came from QEMU itself. Making vhost_user_blk_connect() return
> -EAGAIN in the right cases and reconnecting only there sounds much
> better than just blindly retrying.
> 

Agreed - we should definitely identify any invalid requests or
configuration and treat them differently to legitimate cases.

> Of course, adjusting error reporting so that we can distinguish these
> cases will probably touch much more places than would be appropriate for
> this patch, so I'd suggest that we indeed just revert the reconnection
> during realize() in this series, but then try to add some more selective
> retry logic back on top of it (without using the event handler, which
> only made everything more complicated in a function that waits
> synchronously anyway).

Sounds good to me. I don't see a good reason to use an event handler in
realize since it is synchronous.

> 
> Kevin
>
Philippe Mathieu-Daudé April 28, 2021, 7:37 p.m. UTC | #5
On 4/22/21 7:02 PM, Kevin Wolf wrote:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the

TIL initialisation wording.

> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
>  1 file changed, 13 insertions(+), 41 deletions(-)
Kevin Wolf April 29, 2021, 12:48 p.m. UTC | #6
Am 22.04.2021 um 19:02 hat Kevin Wolf geschrieben:
> This is a partial revert of commits 77542d43149 and bc79c87bcde.
> 
> Usually, an error during initialisation means that the configuration was
> wrong. Reconnecting won't make the error go away, but just turn the
> error condition into an endless loop. Avoid this and return errors
> again.
> 
> Additionally, calling vhost_user_blk_disconnect() from the chardev event
> handler could result in use-after-free because none of the
> initialisation code expects that the device could just go away in the
> middle. So removing the call fixes crashes in several places.
> 
> For example, using a num-queues setting that is incompatible with the
> backend would result in a crash like this (dereferencing dev->opaque,
> which is already NULL):
> 
>  #0  0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313
>  #1  0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 <vhost_user_read_cb>, user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84
>  #2  0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0
>  #3  0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0
>  #4  0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0
>  #5  0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402
>  #6  0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133
>  #7  0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566
>  #8  0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510
>  #9  0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/vhost-user-blk.c | 54 ++++++++++-----------------------------
>  1 file changed, 13 insertions(+), 41 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index f5e9682703..e824b0a759 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -50,6 +50,8 @@ static const int user_feature_bits[] = {
>      VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
> +
>  static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
> @@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
>      vhost_dev_cleanup(&s->dev);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized);
> -
> -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, false);
> -}
> -
> -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
> -{
> -    vhost_user_blk_event(opaque, event, true);
> -}
> -
>  static void vhost_user_blk_chr_closed_bh(void *opaque)
>  {
>      DeviceState *dev = opaque;
> @@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
>      VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  
>      vhost_user_blk_disconnect(dev);
> -    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
> -            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
> +    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
> +                             NULL, opaque, NULL, true);
>  }
>  
> -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
> -                                 bool realized)
> +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>  {
>      DeviceState *dev = opaque;
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>          }
>          break;
>      case CHR_EVENT_CLOSED:
> -        /*
> -         * Closing the connection should happen differently on device
> -         * initialization and operation stages.
> -         * On initalization, we want to re-start vhost_dev initialization
> -         * from the very beginning right away when the connection is closed,
> -         * so we clean up vhost_dev on each connection closing.
> -         * On operation, we want to postpone vhost_dev cleanup to let the
> -         * other code perform its own cleanup sequence using vhost_dev data
> -         * (e.g. vhost_dev_set_log).
> -         */
> -        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
> +        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>              /*
>               * A close event may happen during a read/write, but vhost
>               * code assumes the vhost_dev remains setup, so delay the
> @@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
>               * knowing its type (in this case vhost-user).
>               */
>              s->dev.started = false;
> -        } else {
> -            vhost_user_blk_disconnect(dev);
>          }
>          break;
>      case CHR_EVENT_BREAK:
> @@ -490,31 +466,27 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
>      s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
>      s->connected = false;
>  
> -    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_realize, NULL, (void *)dev,
> -                             NULL, true);
> -
> -reconnect:
>      if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
>          error_report_err(err);
>          goto virtio_err;
>      }

Preexisting bug: We have to set errp before jumping to virtio_err,
otherwise the caller (virtio_device_realize()) will take this as success
and crash when it later tries to access things that we've already freed
in the error path.

> -    /* check whether vhost_user_blk_connect() failed or not */
> -    if (!s->connected) {
> -        goto reconnect;
> +    if (vhost_user_blk_connect(dev) < 0) {
> +        qemu_chr_fe_disconnect(&s->chardev);
> +        goto virtio_err;

Here I'm newly introducing the same bug (fixed again by patch 2).

>      }
> +    assert(s->connected);
>  
>      ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
>                                 sizeof(struct virtio_blk_config));
>      if (ret < 0) {
>          error_report("vhost-user-blk: get block config failed");
> -        goto reconnect;
> +        goto virtio_err;

And this one is wrong, too.

>      }
>  
> -    /* we're fully initialized, now we can operate, so change the handler */
> +    /* we're fully initialized, now we can operate, so add the handler */
>      qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
> -                             vhost_user_blk_event_oper, NULL, (void *)dev,
> +                             vhost_user_blk_event, NULL, (void *)dev,
>                               NULL, true);
>      return;

So maybe patch 2 should come first and also fix the preexisting bug, and
of course this patch needs a v2 that doesn't introduce the new instances
of the bug.

Kevin
Raphael Norwitz April 29, 2021, 4:53 p.m. UTC | #7
On Thu, Apr 29, 2021 at 02:48:52PM +0200, Kevin Wolf wrote:
> So maybe patch 2 should come first and also fix the preexisting bug, and
> of course this patch needs a v2 that doesn't introduce the new instances
> of the bug.

Sounds good to me.

> 
> Kevin
>
diff mbox series

Patch

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f5e9682703..e824b0a759 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -50,6 +50,8 @@  static const int user_feature_bits[] = {
     VHOST_INVALID_FEATURE_BIT
 };
 
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -362,19 +364,6 @@  static void vhost_user_blk_disconnect(DeviceState *dev)
     vhost_dev_cleanup(&s->dev);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, true);
-}
-
 static void vhost_user_blk_chr_closed_bh(void *opaque)
 {
     DeviceState *dev = opaque;
@@ -382,12 +371,11 @@  static void vhost_user_blk_chr_closed_bh(void *opaque)
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
     vhost_user_blk_disconnect(dev);
-    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+                             NULL, opaque, NULL, true);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
 {
     DeviceState *dev = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -401,17 +389,7 @@  static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
         }
         break;
     case CHR_EVENT_CLOSED:
-        /*
-         * Closing the connection should happen differently on device
-         * initialization and operation stages.
-         * On initalization, we want to re-start vhost_dev initialization
-         * from the very beginning right away when the connection is closed,
-         * so we clean up vhost_dev on each connection closing.
-         * On operation, we want to postpone vhost_dev cleanup to let the
-         * other code perform its own cleanup sequence using vhost_dev data
-         * (e.g. vhost_dev_set_log).
-         */
-        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
             /*
              * A close event may happen during a read/write, but vhost
              * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +409,6 @@  static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
              * knowing its type (in this case vhost-user).
              */
             s->dev.started = false;
-        } else {
-            vhost_user_blk_disconnect(dev);
         }
         break;
     case CHR_EVENT_BREAK:
@@ -490,31 +466,27 @@  static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->connected = false;
 
-    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_realize, NULL, (void *)dev,
-                             NULL, true);
-
-reconnect:
     if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
         error_report_err(err);
         goto virtio_err;
     }
 
-    /* check whether vhost_user_blk_connect() failed or not */
-    if (!s->connected) {
-        goto reconnect;
+    if (vhost_user_blk_connect(dev) < 0) {
+        qemu_chr_fe_disconnect(&s->chardev);
+        goto virtio_err;
     }
+    assert(s->connected);
 
     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
                                sizeof(struct virtio_blk_config));
     if (ret < 0) {
         error_report("vhost-user-blk: get block config failed");
-        goto reconnect;
+        goto virtio_err;
     }
 
-    /* we're fully initialized, now we can operate, so change the handler */
+    /* we're fully initialized, now we can operate, so add the handler */
     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_oper, NULL, (void *)dev,
+                             vhost_user_blk_event, NULL, (void *)dev,
                              NULL, true);
     return;