diff mbox series

[RFC,v2,04/13] vdpa: rewind at get_base, not set_base

Message ID 20230112172434.760850-5-eperezma@redhat.com
State New
Headers show
Series Dinamycally switch to vhost shadow virtqueues at vdpa net migration | expand

Commit Message

Eugenio Perez Martin Jan. 12, 2023, 5:24 p.m. UTC
At this moment it is only possible to migrate to a vdpa device running
with x-svq=on. As a protective measure, the rewind of the inflight
descriptors was done at the destination. That way if the source sent a
virtqueue with inuse descriptors they are always discarded.

Since this series allows to migrate also to passthrough devices with no
SVQ, the right thing to do is to rewind at the source so base of vrings
are correct.

Support for inflight descriptors may be added in the future.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/hw/virtio/vhost-backend.h |  4 +++
 hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
 hw/virtio/vhost.c                 |  3 ++
 3 files changed, 36 insertions(+), 17 deletions(-)

Comments

Jason Wang Jan. 13, 2023, 4:09 a.m. UTC | #1
On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> At this moment it is only possible to migrate to a vdpa device running
> with x-svq=on. As a protective measure, the rewind of the inflight
> descriptors was done at the destination. That way if the source sent a
> virtqueue with inuse descriptors they are always discarded.
>
> Since this series allows to migrate also to passthrough devices with no
> SVQ, the right thing to do is to rewind at the source so base of vrings
> are correct.
>
> Support for inflight descriptors may be added in the future.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>  include/hw/virtio/vhost-backend.h |  4 +++
>  hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
>  hw/virtio/vhost.c                 |  3 ++
>  3 files changed, 36 insertions(+), 17 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index c5ab49051e..ec3fbae58d 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
>
>  typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
>                                         int fd);
> +
> +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
> +
>  typedef struct VhostOps {
>      VhostBackendType backend_type;
>      vhost_backend_init vhost_backend_init;
> @@ -177,6 +180,7 @@ typedef struct VhostOps {
>      vhost_get_device_id_op vhost_get_device_id;
>      vhost_force_iommu_op vhost_force_iommu;
>      vhost_set_config_call_op vhost_set_config_call;
> +    vhost_reset_status_op vhost_reset_status;
>  } VhostOps;
>
>  int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 542e003101..28a52ddc78 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>      if (started) {
>          memory_listener_register(&v->listener, &address_space_memory);
>          return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> -    } else {
> -        vhost_vdpa_reset_device(dev);
> -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> -                                   VIRTIO_CONFIG_S_DRIVER);
> -        memory_listener_unregister(&v->listener);
> +    }
>
> -        return 0;
> +    return 0;
> +}
> +
> +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +
> +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> +        return;
>      }
> +
> +    vhost_vdpa_reset_device(dev);
> +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> +                                VIRTIO_CONFIG_S_DRIVER);
> +    memory_listener_unregister(&v->listener);
>  }
>
>  static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
>                                         struct vhost_vring_state *ring)
>  {
>      struct vhost_vdpa *v = dev->opaque;
> -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
>
> -    /*
> -     * vhost-vdpa devices does not support in-flight requests. Set all of them
> -     * as available.
> -     *
> -     * TODO: This is ok for networking, but other kinds of devices might
> -     * have problems with these retransmissions.
> -     */
> -    while (virtqueue_rewind(vq, 1)) {
> -        continue;
> -    }
>      if (v->shadow_vqs_enabled) {
>          /*
>           * Device vring base was set at device start. SVQ base is handled by
> @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
>      int ret;
>
>      if (v->shadow_vqs_enabled) {
> +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> +
> +        /*
> +         * vhost-vdpa devices does not support in-flight requests. Set all of
> +         * them as available.
> +         *
> +         * TODO: This is ok for networking, but other kinds of devices might
> +         * have problems with these retransmissions.
> +         */
> +        while (virtqueue_rewind(vq, 1)) {
> +            continue;
> +        }
> +
>          ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
>          return 0;
>      }
> @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
>          .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
>          .vhost_force_iommu = vhost_vdpa_force_iommu,
>          .vhost_set_config_call = vhost_vdpa_set_config_call,
> +        .vhost_reset_status = vhost_vdpa_reset_status,

Can we simply use the NetClient stop method here?

Thanks

>  };
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index eb8c4c378c..a266396576 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>                               hdev->vqs + i,
>                               hdev->vq_index + i);
>      }
> +    if (hdev->vhost_ops->vhost_reset_status) {
> +        hdev->vhost_ops->vhost_reset_status(hdev);
> +    }
>
>      if (vhost_dev_has_iommu(hdev)) {
>          if (hdev->vhost_ops->vhost_set_iotlb_callback) {
> --
> 2.31.1
>
Eugenio Perez Martin Jan. 13, 2023, 7:40 a.m. UTC | #2
On Fri, Jan 13, 2023 at 5:10 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> >
> > At this moment it is only possible to migrate to a vdpa device running
> > with x-svq=on. As a protective measure, the rewind of the inflight
> > descriptors was done at the destination. That way if the source sent a
> > virtqueue with inuse descriptors they are always discarded.
> >
> > Since this series allows to migrate also to passthrough devices with no
> > SVQ, the right thing to do is to rewind at the source so base of vrings
> > are correct.
> >
> > Support for inflight descriptors may be added in the future.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >  include/hw/virtio/vhost-backend.h |  4 +++
> >  hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
> >  hw/virtio/vhost.c                 |  3 ++
> >  3 files changed, 36 insertions(+), 17 deletions(-)
> >
> > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> > index c5ab49051e..ec3fbae58d 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
> >
> >  typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
> >                                         int fd);
> > +
> > +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
> > +
> >  typedef struct VhostOps {
> >      VhostBackendType backend_type;
> >      vhost_backend_init vhost_backend_init;
> > @@ -177,6 +180,7 @@ typedef struct VhostOps {
> >      vhost_get_device_id_op vhost_get_device_id;
> >      vhost_force_iommu_op vhost_force_iommu;
> >      vhost_set_config_call_op vhost_set_config_call;
> > +    vhost_reset_status_op vhost_reset_status;
> >  } VhostOps;
> >
> >  int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 542e003101..28a52ddc78 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >      if (started) {
> >          memory_listener_register(&v->listener, &address_space_memory);
> >          return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> > -    } else {
> > -        vhost_vdpa_reset_device(dev);
> > -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> > -                                   VIRTIO_CONFIG_S_DRIVER);
> > -        memory_listener_unregister(&v->listener);
> > +    }
> >
> > -        return 0;
> > +    return 0;
> > +}
> > +
> > +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +
> > +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> > +        return;
> >      }
> > +
> > +    vhost_vdpa_reset_device(dev);
> > +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> > +                                VIRTIO_CONFIG_S_DRIVER);
> > +    memory_listener_unregister(&v->listener);
> >  }
> >
> >  static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> > @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> >                                         struct vhost_vring_state *ring)
> >  {
> >      struct vhost_vdpa *v = dev->opaque;
> > -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> >
> > -    /*
> > -     * vhost-vdpa devices does not support in-flight requests. Set all of them
> > -     * as available.
> > -     *
> > -     * TODO: This is ok for networking, but other kinds of devices might
> > -     * have problems with these retransmissions.
> > -     */
> > -    while (virtqueue_rewind(vq, 1)) {
> > -        continue;
> > -    }
> >      if (v->shadow_vqs_enabled) {
> >          /*
> >           * Device vring base was set at device start. SVQ base is handled by
> > @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> >      int ret;
> >
> >      if (v->shadow_vqs_enabled) {
> > +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> > +
> > +        /*
> > +         * vhost-vdpa devices does not support in-flight requests. Set all of
> > +         * them as available.
> > +         *
> > +         * TODO: This is ok for networking, but other kinds of devices might
> > +         * have problems with these retransmissions.
> > +         */
> > +        while (virtqueue_rewind(vq, 1)) {
> > +            continue;
> > +        }
> > +
> >          ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
> >          return 0;
> >      }
> > @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
> >          .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
> >          .vhost_force_iommu = vhost_vdpa_force_iommu,
> >          .vhost_set_config_call = vhost_vdpa_set_config_call,
> > +        .vhost_reset_status = vhost_vdpa_reset_status,
>
> Can we simply use the NetClient stop method here?
>

Ouch, I squashed two patches by mistake here.

All the vhost_reset_status part should be independent of this patch,
and I was especially interested in its feedback. It had this message:

    vdpa: move vhost reset after get vring base

    The function vhost.c:vhost_dev_stop calls vhost operation
    vhost_dev_start(false). In the case of vdpa it totally reset and wipes
    the device, making the fetching of the vring base (virtqueue state) totally
    useless.

    The kernel backend does not use vhost_dev_start vhost op callback, but
    vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa
    is desirable, but it can be added on top.

I can resend the series splitting it again but conversation may
scatter between versions. Would you prefer me to send a new version?

Regarding the use of NetClient, it feels weird to call net specific
functions in VhostOps, doesn't it? At the moment vhost ops is
specialized in vhost-kernel, vhost-user and vhost-vdpa. If we want to
make it specific to the kind of device, that makes vhost-vdpa-net too.

Thanks!


> Thanks
>
> >  };
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index eb8c4c378c..a266396576 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
> >                               hdev->vqs + i,
> >                               hdev->vq_index + i);
> >      }
> > +    if (hdev->vhost_ops->vhost_reset_status) {
> > +        hdev->vhost_ops->vhost_reset_status(hdev);
> > +    }
> >
> >      if (vhost_dev_has_iommu(hdev)) {
> >          if (hdev->vhost_ops->vhost_set_iotlb_callback) {
> > --
> > 2.31.1
> >
>
Jason Wang Jan. 16, 2023, 3:32 a.m. UTC | #3
在 2023/1/13 15:40, Eugenio Perez Martin 写道:
> On Fri, Jan 13, 2023 at 5:10 AM Jason Wang <jasowang@redhat.com> wrote:
>> On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>>> At this moment it is only possible to migrate to a vdpa device running
>>> with x-svq=on. As a protective measure, the rewind of the inflight
>>> descriptors was done at the destination. That way if the source sent a
>>> virtqueue with inuse descriptors they are always discarded.
>>>
>>> Since this series allows to migrate also to passthrough devices with no
>>> SVQ, the right thing to do is to rewind at the source so base of vrings
>>> are correct.
>>>
>>> Support for inflight descriptors may be added in the future.
>>>
>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>> ---
>>>   include/hw/virtio/vhost-backend.h |  4 +++
>>>   hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
>>>   hw/virtio/vhost.c                 |  3 ++
>>>   3 files changed, 36 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
>>> index c5ab49051e..ec3fbae58d 100644
>>> --- a/include/hw/virtio/vhost-backend.h
>>> +++ b/include/hw/virtio/vhost-backend.h
>>> @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
>>>
>>>   typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
>>>                                          int fd);
>>> +
>>> +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
>>> +
>>>   typedef struct VhostOps {
>>>       VhostBackendType backend_type;
>>>       vhost_backend_init vhost_backend_init;
>>> @@ -177,6 +180,7 @@ typedef struct VhostOps {
>>>       vhost_get_device_id_op vhost_get_device_id;
>>>       vhost_force_iommu_op vhost_force_iommu;
>>>       vhost_set_config_call_op vhost_set_config_call;
>>> +    vhost_reset_status_op vhost_reset_status;
>>>   } VhostOps;
>>>
>>>   int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
>>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>>> index 542e003101..28a52ddc78 100644
>>> --- a/hw/virtio/vhost-vdpa.c
>>> +++ b/hw/virtio/vhost-vdpa.c
>>> @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>>>       if (started) {
>>>           memory_listener_register(&v->listener, &address_space_memory);
>>>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>> -    } else {
>>> -        vhost_vdpa_reset_device(dev);
>>> -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>> -                                   VIRTIO_CONFIG_S_DRIVER);
>>> -        memory_listener_unregister(&v->listener);
>>> +    }
>>>
>>> -        return 0;
>>> +    return 0;
>>> +}
>>> +
>>> +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
>>> +{
>>> +    struct vhost_vdpa *v = dev->opaque;
>>> +
>>> +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
>>> +        return;
>>>       }
>>> +
>>> +    vhost_vdpa_reset_device(dev);
>>> +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>> +                                VIRTIO_CONFIG_S_DRIVER);
>>> +    memory_listener_unregister(&v->listener);
>>>   }
>>>
>>>   static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
>>> @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
>>>                                          struct vhost_vring_state *ring)
>>>   {
>>>       struct vhost_vdpa *v = dev->opaque;
>>> -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
>>>
>>> -    /*
>>> -     * vhost-vdpa devices does not support in-flight requests. Set all of them
>>> -     * as available.
>>> -     *
>>> -     * TODO: This is ok for networking, but other kinds of devices might
>>> -     * have problems with these retransmissions.
>>> -     */
>>> -    while (virtqueue_rewind(vq, 1)) {
>>> -        continue;
>>> -    }
>>>       if (v->shadow_vqs_enabled) {
>>>           /*
>>>            * Device vring base was set at device start. SVQ base is handled by
>>> @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
>>>       int ret;
>>>
>>>       if (v->shadow_vqs_enabled) {
>>> +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
>>> +
>>> +        /*
>>> +         * vhost-vdpa devices does not support in-flight requests. Set all of
>>> +         * them as available.
>>> +         *
>>> +         * TODO: This is ok for networking, but other kinds of devices might
>>> +         * have problems with these retransmissions.
>>> +         */
>>> +        while (virtqueue_rewind(vq, 1)) {
>>> +            continue;
>>> +        }
>>> +
>>>           ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
>>>           return 0;
>>>       }
>>> @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
>>>           .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
>>>           .vhost_force_iommu = vhost_vdpa_force_iommu,
>>>           .vhost_set_config_call = vhost_vdpa_set_config_call,
>>> +        .vhost_reset_status = vhost_vdpa_reset_status,
>> Can we simply use the NetClient stop method here?
>>
> Ouch, I squashed two patches by mistake here.
>
> All the vhost_reset_status part should be independent of this patch,
> and I was especially interested in its feedback. It had this message:
>
>      vdpa: move vhost reset after get vring base
>
>      The function vhost.c:vhost_dev_stop calls vhost operation
>      vhost_dev_start(false). In the case of vdpa it totally reset and wipes
>      the device, making the fetching of the vring base (virtqueue state) totally
>      useless.
>
>      The kernel backend does not use vhost_dev_start vhost op callback, but
>      vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa
>      is desirable, but it can be added on top.
>
> I can resend the series splitting it again but conversation may
> scatter between versions. Would you prefer me to send a new version?


I think it can be done in next version (after we finalize the discussion 
for this version).


>
> Regarding the use of NetClient, it feels weird to call net specific
> functions in VhostOps, doesn't it?


Basically, I meant, the patch call vhost_reset_status() in 
vhost_dev_stop(). But we've already had vhost_dev_start ops where we 
implement per backend start/stop logic.

I think it's better to do things in vhost_dev_start():

For device that can do suspend, we can do suspend. For other we need to 
do reset as a workaround.

And if necessary, we can call nc client ops for net specific operations 
(if it has any).

Thanks


> At the moment vhost ops is
> specialized in vhost-kernel, vhost-user and vhost-vdpa. If we want to
> make it specific to the kind of device, that makes vhost-vdpa-net too.
>
> Thanks!
>
>
>> Thanks
>>
>>>   };
>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>> index eb8c4c378c..a266396576 100644
>>> --- a/hw/virtio/vhost.c
>>> +++ b/hw/virtio/vhost.c
>>> @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>>>                                hdev->vqs + i,
>>>                                hdev->vq_index + i);
>>>       }
>>> +    if (hdev->vhost_ops->vhost_reset_status) {
>>> +        hdev->vhost_ops->vhost_reset_status(hdev);
>>> +    }
>>>
>>>       if (vhost_dev_has_iommu(hdev)) {
>>>           if (hdev->vhost_ops->vhost_set_iotlb_callback) {
>>> --
>>> 2.31.1
>>>
Eugenio Perez Martin Jan. 16, 2023, 9:53 a.m. UTC | #4
On Mon, Jan 16, 2023 at 4:32 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/1/13 15:40, Eugenio Perez Martin 写道:
> > On Fri, Jan 13, 2023 at 5:10 AM Jason Wang <jasowang@redhat.com> wrote:
> >> On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> >>> At this moment it is only possible to migrate to a vdpa device running
> >>> with x-svq=on. As a protective measure, the rewind of the inflight
> >>> descriptors was done at the destination. That way if the source sent a
> >>> virtqueue with inuse descriptors they are always discarded.
> >>>
> >>> Since this series allows to migrate also to passthrough devices with no
> >>> SVQ, the right thing to do is to rewind at the source so base of vrings
> >>> are correct.
> >>>
> >>> Support for inflight descriptors may be added in the future.
> >>>
> >>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >>> ---
> >>>   include/hw/virtio/vhost-backend.h |  4 +++
> >>>   hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
> >>>   hw/virtio/vhost.c                 |  3 ++
> >>>   3 files changed, 36 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> >>> index c5ab49051e..ec3fbae58d 100644
> >>> --- a/include/hw/virtio/vhost-backend.h
> >>> +++ b/include/hw/virtio/vhost-backend.h
> >>> @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
> >>>
> >>>   typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
> >>>                                          int fd);
> >>> +
> >>> +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
> >>> +
> >>>   typedef struct VhostOps {
> >>>       VhostBackendType backend_type;
> >>>       vhost_backend_init vhost_backend_init;
> >>> @@ -177,6 +180,7 @@ typedef struct VhostOps {
> >>>       vhost_get_device_id_op vhost_get_device_id;
> >>>       vhost_force_iommu_op vhost_force_iommu;
> >>>       vhost_set_config_call_op vhost_set_config_call;
> >>> +    vhost_reset_status_op vhost_reset_status;
> >>>   } VhostOps;
> >>>
> >>>   int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
> >>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >>> index 542e003101..28a52ddc78 100644
> >>> --- a/hw/virtio/vhost-vdpa.c
> >>> +++ b/hw/virtio/vhost-vdpa.c
> >>> @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >>>       if (started) {
> >>>           memory_listener_register(&v->listener, &address_space_memory);
> >>>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> >>> -    } else {
> >>> -        vhost_vdpa_reset_device(dev);
> >>> -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >>> -                                   VIRTIO_CONFIG_S_DRIVER);
> >>> -        memory_listener_unregister(&v->listener);
> >>> +    }
> >>>
> >>> -        return 0;
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
> >>> +{
> >>> +    struct vhost_vdpa *v = dev->opaque;
> >>> +
> >>> +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> >>> +        return;
> >>>       }
> >>> +
> >>> +    vhost_vdpa_reset_device(dev);
> >>> +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >>> +                                VIRTIO_CONFIG_S_DRIVER);
> >>> +    memory_listener_unregister(&v->listener);
> >>>   }
> >>>
> >>>   static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> >>> @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> >>>                                          struct vhost_vring_state *ring)
> >>>   {
> >>>       struct vhost_vdpa *v = dev->opaque;
> >>> -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> >>>
> >>> -    /*
> >>> -     * vhost-vdpa devices does not support in-flight requests. Set all of them
> >>> -     * as available.
> >>> -     *
> >>> -     * TODO: This is ok for networking, but other kinds of devices might
> >>> -     * have problems with these retransmissions.
> >>> -     */
> >>> -    while (virtqueue_rewind(vq, 1)) {
> >>> -        continue;
> >>> -    }
> >>>       if (v->shadow_vqs_enabled) {
> >>>           /*
> >>>            * Device vring base was set at device start. SVQ base is handled by
> >>> @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> >>>       int ret;
> >>>
> >>>       if (v->shadow_vqs_enabled) {
> >>> +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> >>> +
> >>> +        /*
> >>> +         * vhost-vdpa devices does not support in-flight requests. Set all of
> >>> +         * them as available.
> >>> +         *
> >>> +         * TODO: This is ok for networking, but other kinds of devices might
> >>> +         * have problems with these retransmissions.
> >>> +         */
> >>> +        while (virtqueue_rewind(vq, 1)) {
> >>> +            continue;
> >>> +        }
> >>> +
> >>>           ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
> >>>           return 0;
> >>>       }
> >>> @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
> >>>           .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
> >>>           .vhost_force_iommu = vhost_vdpa_force_iommu,
> >>>           .vhost_set_config_call = vhost_vdpa_set_config_call,
> >>> +        .vhost_reset_status = vhost_vdpa_reset_status,
> >> Can we simply use the NetClient stop method here?
> >>
> > Ouch, I squashed two patches by mistake here.
> >
> > All the vhost_reset_status part should be independent of this patch,
> > and I was especially interested in its feedback. It had this message:
> >
> >      vdpa: move vhost reset after get vring base
> >
> >      The function vhost.c:vhost_dev_stop calls vhost operation
> >      vhost_dev_start(false). In the case of vdpa it totally reset and wipes
> >      the device, making the fetching of the vring base (virtqueue state) totally
> >      useless.
> >
> >      The kernel backend does not use vhost_dev_start vhost op callback, but
> >      vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa
> >      is desirable, but it can be added on top.
> >
> > I can resend the series splitting it again but conversation may
> > scatter between versions. Would you prefer me to send a new version?
>
>
> I think it can be done in next version (after we finalize the discussion
> for this version).
>
>
> >
> > Regarding the use of NetClient, it feels weird to call net specific
> > functions in VhostOps, doesn't it?
>
>
> Basically, I meant, the patch call vhost_reset_status() in
> vhost_dev_stop(). But we've already had vhost_dev_start ops where we
> implement per backend start/stop logic.
>
> I think it's better to do things in vhost_dev_start():
>
> For device that can do suspend, we can do suspend. For other we need to
> do reset as a workaround.
>

If the device implements _F_SUSPEND we can call suspend in
vhost_dev_start(false) and fetch the vq base after it. But we cannot
call vhost_dev_reset until we get the vq base. If we do it, we will
always get zero there.

If we don't reset the device at vhost_vdpa_dev_start(false) we need to
call a proper reset after getting the base, at least in vdpa. So to
create a new vhost_op should be the right thing to do, isn't it?

Hopefully with a better name than vhost_vdpa_reset_status, that's for sure :).

I'm not sure how vhost-user works with this or when it does reset the
indexes. My bet is that it never does at the device reinitialization
and it trusts VMM calls to vhost_user_set_base but I may be wrong.

Thanks!

> And if necessary, we can call nc client ops for net specific operations
> (if it has any).
>
> Thanks
>
>
> > At the moment vhost ops is
> > specialized in vhost-kernel, vhost-user and vhost-vdpa. If we want to
> > make it specific to the kind of device, that makes vhost-vdpa-net too.
> >
> > Thanks!
> >
> >
> >> Thanks
> >>
> >>>   };
> >>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >>> index eb8c4c378c..a266396576 100644
> >>> --- a/hw/virtio/vhost.c
> >>> +++ b/hw/virtio/vhost.c
> >>> @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
> >>>                                hdev->vqs + i,
> >>>                                hdev->vq_index + i);
> >>>       }
> >>> +    if (hdev->vhost_ops->vhost_reset_status) {
> >>> +        hdev->vhost_ops->vhost_reset_status(hdev);
> >>> +    }
> >>>
> >>>       if (vhost_dev_has_iommu(hdev)) {
> >>>           if (hdev->vhost_ops->vhost_set_iotlb_callback) {
> >>> --
> >>> 2.31.1
> >>>
>
Jason Wang Jan. 17, 2023, 4:38 a.m. UTC | #5
在 2023/1/16 17:53, Eugenio Perez Martin 写道:
> On Mon, Jan 16, 2023 at 4:32 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2023/1/13 15:40, Eugenio Perez Martin 写道:
>>> On Fri, Jan 13, 2023 at 5:10 AM Jason Wang <jasowang@redhat.com> wrote:
>>>> On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>>>>> At this moment it is only possible to migrate to a vdpa device running
>>>>> with x-svq=on. As a protective measure, the rewind of the inflight
>>>>> descriptors was done at the destination. That way if the source sent a
>>>>> virtqueue with inuse descriptors they are always discarded.
>>>>>
>>>>> Since this series allows to migrate also to passthrough devices with no
>>>>> SVQ, the right thing to do is to rewind at the source so base of vrings
>>>>> are correct.
>>>>>
>>>>> Support for inflight descriptors may be added in the future.
>>>>>
>>>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>>>> ---
>>>>>    include/hw/virtio/vhost-backend.h |  4 +++
>>>>>    hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
>>>>>    hw/virtio/vhost.c                 |  3 ++
>>>>>    3 files changed, 36 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
>>>>> index c5ab49051e..ec3fbae58d 100644
>>>>> --- a/include/hw/virtio/vhost-backend.h
>>>>> +++ b/include/hw/virtio/vhost-backend.h
>>>>> @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
>>>>>
>>>>>    typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
>>>>>                                           int fd);
>>>>> +
>>>>> +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
>>>>> +
>>>>>    typedef struct VhostOps {
>>>>>        VhostBackendType backend_type;
>>>>>        vhost_backend_init vhost_backend_init;
>>>>> @@ -177,6 +180,7 @@ typedef struct VhostOps {
>>>>>        vhost_get_device_id_op vhost_get_device_id;
>>>>>        vhost_force_iommu_op vhost_force_iommu;
>>>>>        vhost_set_config_call_op vhost_set_config_call;
>>>>> +    vhost_reset_status_op vhost_reset_status;
>>>>>    } VhostOps;
>>>>>
>>>>>    int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
>>>>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>>>>> index 542e003101..28a52ddc78 100644
>>>>> --- a/hw/virtio/vhost-vdpa.c
>>>>> +++ b/hw/virtio/vhost-vdpa.c
>>>>> @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>>>>>        if (started) {
>>>>>            memory_listener_register(&v->listener, &address_space_memory);
>>>>>            return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>>>> -    } else {
>>>>> -        vhost_vdpa_reset_device(dev);
>>>>> -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>>>> -                                   VIRTIO_CONFIG_S_DRIVER);
>>>>> -        memory_listener_unregister(&v->listener);
>>>>> +    }
>>>>>
>>>>> -        return 0;
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
>>>>> +{
>>>>> +    struct vhost_vdpa *v = dev->opaque;
>>>>> +
>>>>> +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
>>>>> +        return;
>>>>>        }
>>>>> +
>>>>> +    vhost_vdpa_reset_device(dev);
>>>>> +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>>>> +                                VIRTIO_CONFIG_S_DRIVER);
>>>>> +    memory_listener_unregister(&v->listener);
>>>>>    }
>>>>>
>>>>>    static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
>>>>> @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
>>>>>                                           struct vhost_vring_state *ring)
>>>>>    {
>>>>>        struct vhost_vdpa *v = dev->opaque;
>>>>> -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
>>>>>
>>>>> -    /*
>>>>> -     * vhost-vdpa devices does not support in-flight requests. Set all of them
>>>>> -     * as available.
>>>>> -     *
>>>>> -     * TODO: This is ok for networking, but other kinds of devices might
>>>>> -     * have problems with these retransmissions.
>>>>> -     */
>>>>> -    while (virtqueue_rewind(vq, 1)) {
>>>>> -        continue;
>>>>> -    }
>>>>>        if (v->shadow_vqs_enabled) {
>>>>>            /*
>>>>>             * Device vring base was set at device start. SVQ base is handled by
>>>>> @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
>>>>>        int ret;
>>>>>
>>>>>        if (v->shadow_vqs_enabled) {
>>>>> +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
>>>>> +
>>>>> +        /*
>>>>> +         * vhost-vdpa devices does not support in-flight requests. Set all of
>>>>> +         * them as available.
>>>>> +         *
>>>>> +         * TODO: This is ok for networking, but other kinds of devices might
>>>>> +         * have problems with these retransmissions.
>>>>> +         */
>>>>> +        while (virtqueue_rewind(vq, 1)) {
>>>>> +            continue;
>>>>> +        }
>>>>> +
>>>>>            ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
>>>>>            return 0;
>>>>>        }
>>>>> @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
>>>>>            .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
>>>>>            .vhost_force_iommu = vhost_vdpa_force_iommu,
>>>>>            .vhost_set_config_call = vhost_vdpa_set_config_call,
>>>>> +        .vhost_reset_status = vhost_vdpa_reset_status,
>>>> Can we simply use the NetClient stop method here?
>>>>
>>> Ouch, I squashed two patches by mistake here.
>>>
>>> All the vhost_reset_status part should be independent of this patch,
>>> and I was especially interested in its feedback. It had this message:
>>>
>>>       vdpa: move vhost reset after get vring base
>>>
>>>       The function vhost.c:vhost_dev_stop calls vhost operation
>>>       vhost_dev_start(false). In the case of vdpa it totally reset and wipes
>>>       the device, making the fetching of the vring base (virtqueue state) totally
>>>       useless.
>>>
>>>       The kernel backend does not use vhost_dev_start vhost op callback, but
>>>       vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa
>>>       is desirable, but it can be added on top.
>>>
>>> I can resend the series splitting it again but conversation may
>>> scatter between versions. Would you prefer me to send a new version?
>>
>> I think it can be done in next version (after we finalize the discussion
>> for this version).
>>
>>
>>> Regarding the use of NetClient, it feels weird to call net specific
>>> functions in VhostOps, doesn't it?
>>
>> Basically, I meant, the patch call vhost_reset_status() in
>> vhost_dev_stop(). But we've already had vhost_dev_start ops where we
>> implement per backend start/stop logic.
>>
>> I think it's better to do things in vhost_dev_start():
>>
>> For device that can do suspend, we can do suspend. For other we need to
>> do reset as a workaround.
>>
> If the device implements _F_SUSPEND we can call suspend in
> vhost_dev_start(false) and fetch the vq base after it. But we cannot
> call vhost_dev_reset until we get the vq base. If we do it, we will
> always get zero there.


I'm not sure I understand here, that is kind of expected. For the device 
that doesn't support suspend, we can't get base anyhow since we need to 
emulate the stop with reset then we lose all the states.


>
> If we don't reset the device at vhost_vdpa_dev_start(false) we need to
> call a proper reset after getting the base, at least in vdpa.


This looks racy if we do get base before reset? Device can move the 
last_avail_idx.


> So to
> create a new vhost_op should be the right thing to do, isn't it?


So we did:

vhost_dev_stop()
     hdev->vhost_ops->vhost_dev_start(hdev, false);
     vhost_virtqueue_stop()
         vhost_get_vring_base()

I don't see any issue if we do suspend in vhost_dev_stop() in this case?

For the device that doesn't support suspend, we do reset in the stop and 
fail the get_vring_base() then we can use software fallback 
virtio_queue_restore_last_avail_idx()

?


>
> Hopefully with a better name than vhost_vdpa_reset_status, that's for sure :).
>
> I'm not sure how vhost-user works with this or when it does reset the
> indexes. My bet is that it never does at the device reinitialization
> and it trusts VMM calls to vhost_user_set_base but I may be wrong.


I think it's more safe to not touch the code path for vhost-user, it may 
connect to various kind of backends some of which might be fragile.

Thanks


>
> Thanks!
>
>> And if necessary, we can call nc client ops for net specific operations
>> (if it has any).
>>
>> Thanks
>>
>>
>>> At the moment vhost ops is
>>> specialized in vhost-kernel, vhost-user and vhost-vdpa. If we want to
>>> make it specific to the kind of device, that makes vhost-vdpa-net too.
>>>
>>> Thanks!
>>>
>>>
>>>> Thanks
>>>>
>>>>>    };
>>>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>>>> index eb8c4c378c..a266396576 100644
>>>>> --- a/hw/virtio/vhost.c
>>>>> +++ b/hw/virtio/vhost.c
>>>>> @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>>>>>                                 hdev->vqs + i,
>>>>>                                 hdev->vq_index + i);
>>>>>        }
>>>>> +    if (hdev->vhost_ops->vhost_reset_status) {
>>>>> +        hdev->vhost_ops->vhost_reset_status(hdev);
>>>>> +    }
>>>>>
>>>>>        if (vhost_dev_has_iommu(hdev)) {
>>>>>            if (hdev->vhost_ops->vhost_set_iotlb_callback) {
>>>>> --
>>>>> 2.31.1
>>>>>
Eugenio Perez Martin Jan. 17, 2023, 6:57 a.m. UTC | #6
On Tue, Jan 17, 2023 at 5:38 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/1/16 17:53, Eugenio Perez Martin 写道:
> > On Mon, Jan 16, 2023 at 4:32 AM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> 在 2023/1/13 15:40, Eugenio Perez Martin 写道:
> >>> On Fri, Jan 13, 2023 at 5:10 AM Jason Wang <jasowang@redhat.com> wrote:
> >>>> On Fri, Jan 13, 2023 at 1:24 AM Eugenio Pérez <eperezma@redhat.com> wrote:
> >>>>> At this moment it is only possible to migrate to a vdpa device running
> >>>>> with x-svq=on. As a protective measure, the rewind of the inflight
> >>>>> descriptors was done at the destination. That way if the source sent a
> >>>>> virtqueue with inuse descriptors they are always discarded.
> >>>>>
> >>>>> Since this series allows to migrate also to passthrough devices with no
> >>>>> SVQ, the right thing to do is to rewind at the source so base of vrings
> >>>>> are correct.
> >>>>>
> >>>>> Support for inflight descriptors may be added in the future.
> >>>>>
> >>>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >>>>> ---
> >>>>>    include/hw/virtio/vhost-backend.h |  4 +++
> >>>>>    hw/virtio/vhost-vdpa.c            | 46 +++++++++++++++++++------------
> >>>>>    hw/virtio/vhost.c                 |  3 ++
> >>>>>    3 files changed, 36 insertions(+), 17 deletions(-)
> >>>>>
> >>>>> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> >>>>> index c5ab49051e..ec3fbae58d 100644
> >>>>> --- a/include/hw/virtio/vhost-backend.h
> >>>>> +++ b/include/hw/virtio/vhost-backend.h
> >>>>> @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
> >>>>>
> >>>>>    typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
> >>>>>                                           int fd);
> >>>>> +
> >>>>> +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
> >>>>> +
> >>>>>    typedef struct VhostOps {
> >>>>>        VhostBackendType backend_type;
> >>>>>        vhost_backend_init vhost_backend_init;
> >>>>> @@ -177,6 +180,7 @@ typedef struct VhostOps {
> >>>>>        vhost_get_device_id_op vhost_get_device_id;
> >>>>>        vhost_force_iommu_op vhost_force_iommu;
> >>>>>        vhost_set_config_call_op vhost_set_config_call;
> >>>>> +    vhost_reset_status_op vhost_reset_status;
> >>>>>    } VhostOps;
> >>>>>
> >>>>>    int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
> >>>>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >>>>> index 542e003101..28a52ddc78 100644
> >>>>> --- a/hw/virtio/vhost-vdpa.c
> >>>>> +++ b/hw/virtio/vhost-vdpa.c
> >>>>> @@ -1132,14 +1132,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >>>>>        if (started) {
> >>>>>            memory_listener_register(&v->listener, &address_space_memory);
> >>>>>            return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> >>>>> -    } else {
> >>>>> -        vhost_vdpa_reset_device(dev);
> >>>>> -        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >>>>> -                                   VIRTIO_CONFIG_S_DRIVER);
> >>>>> -        memory_listener_unregister(&v->listener);
> >>>>> +    }
> >>>>>
> >>>>> -        return 0;
> >>>>> +    return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static void vhost_vdpa_reset_status(struct vhost_dev *dev)
> >>>>> +{
> >>>>> +    struct vhost_vdpa *v = dev->opaque;
> >>>>> +
> >>>>> +    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> >>>>> +        return;
> >>>>>        }
> >>>>> +
> >>>>> +    vhost_vdpa_reset_device(dev);
> >>>>> +    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >>>>> +                                VIRTIO_CONFIG_S_DRIVER);
> >>>>> +    memory_listener_unregister(&v->listener);
> >>>>>    }
> >>>>>
> >>>>>    static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
> >>>>> @@ -1182,18 +1191,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
> >>>>>                                           struct vhost_vring_state *ring)
> >>>>>    {
> >>>>>        struct vhost_vdpa *v = dev->opaque;
> >>>>> -    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> >>>>>
> >>>>> -    /*
> >>>>> -     * vhost-vdpa devices does not support in-flight requests. Set all of them
> >>>>> -     * as available.
> >>>>> -     *
> >>>>> -     * TODO: This is ok for networking, but other kinds of devices might
> >>>>> -     * have problems with these retransmissions.
> >>>>> -     */
> >>>>> -    while (virtqueue_rewind(vq, 1)) {
> >>>>> -        continue;
> >>>>> -    }
> >>>>>        if (v->shadow_vqs_enabled) {
> >>>>>            /*
> >>>>>             * Device vring base was set at device start. SVQ base is handled by
> >>>>> @@ -1212,6 +1210,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
> >>>>>        int ret;
> >>>>>
> >>>>>        if (v->shadow_vqs_enabled) {
> >>>>> +        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
> >>>>> +
> >>>>> +        /*
> >>>>> +         * vhost-vdpa devices does not support in-flight requests. Set all of
> >>>>> +         * them as available.
> >>>>> +         *
> >>>>> +         * TODO: This is ok for networking, but other kinds of devices might
> >>>>> +         * have problems with these retransmissions.
> >>>>> +         */
> >>>>> +        while (virtqueue_rewind(vq, 1)) {
> >>>>> +            continue;
> >>>>> +        }
> >>>>> +
> >>>>>            ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
> >>>>>            return 0;
> >>>>>        }
> >>>>> @@ -1326,4 +1337,5 @@ const VhostOps vdpa_ops = {
> >>>>>            .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
> >>>>>            .vhost_force_iommu = vhost_vdpa_force_iommu,
> >>>>>            .vhost_set_config_call = vhost_vdpa_set_config_call,
> >>>>> +        .vhost_reset_status = vhost_vdpa_reset_status,
> >>>> Can we simply use the NetClient stop method here?
> >>>>
> >>> Ouch, I squashed two patches by mistake here.
> >>>
> >>> All the vhost_reset_status part should be independent of this patch,
> >>> and I was especially interested in its feedback. It had this message:
> >>>
> >>>       vdpa: move vhost reset after get vring base
> >>>
> >>>       The function vhost.c:vhost_dev_stop calls vhost operation
> >>>       vhost_dev_start(false). In the case of vdpa it totally reset and wipes
> >>>       the device, making the fetching of the vring base (virtqueue state) totally
> >>>       useless.
> >>>
> >>>       The kernel backend does not use vhost_dev_start vhost op callback, but
> >>>       vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa
> >>>       is desirable, but it can be added on top.
> >>>
> >>> I can resend the series splitting it again but conversation may
> >>> scatter between versions. Would you prefer me to send a new version?
> >>
> >> I think it can be done in next version (after we finalize the discussion
> >> for this version).
> >>
> >>
> >>> Regarding the use of NetClient, it feels weird to call net specific
> >>> functions in VhostOps, doesn't it?
> >>
> >> Basically, I meant, the patch call vhost_reset_status() in
> >> vhost_dev_stop(). But we've already had vhost_dev_start ops where we
> >> implement per backend start/stop logic.
> >>
> >> I think it's better to do things in vhost_dev_start():
> >>
> >> For device that can do suspend, we can do suspend. For other we need to
> >> do reset as a workaround.
> >>
> > If the device implements _F_SUSPEND we can call suspend in
> > vhost_dev_start(false) and fetch the vq base after it. But we cannot
> > call vhost_dev_reset until we get the vq base. If we do it, we will
> > always get zero there.
>
>
> I'm not sure I understand here, that is kind of expected. For the device
> that doesn't support suspend, we can't get base anyhow since we need to
> emulate the stop with reset then we lose all the states.
>

That is totally right.

Just for completion / suggestion, we *could* return 0 if the device
does not support suspend and then return failure (<0) at
veing_get_base, and vhost.c code already tries to emulate it by
fetching information from guest memory if split. But it is not
included in this series and I'm not sure it's a good idea in general.

>
> >
> > If we don't reset the device at vhost_vdpa_dev_start(false) we need to
> > call a proper reset after getting the base, at least in vdpa.
>
>
> This looks racy if we do get base before reset? Device can move the
> last_avail_idx.
>

After the reset the last_avail_idx will always be 0 before another
set_base or driver_ok, no matter what. We must get the base between
suspend and reset.

>
> > So to
> > create a new vhost_op should be the right thing to do, isn't it?
>
>
> So we did:
>
> vhost_dev_stop()
>      hdev->vhost_ops->vhost_dev_start(hdev, false);
>      vhost_virtqueue_stop()
>          vhost_get_vring_base()
>
> I don't see any issue if we do suspend in vhost_dev_stop() in this case?
>
> For the device that doesn't support suspend, we do reset in the stop and
> fail the get_vring_base() then we can use software fallback
> virtio_queue_restore_last_avail_idx()
>
> ?
>

There is no issue there.

The question is: Do we need to reset after getting the base? I think
yes, because the device may think it can use other resources and, in
general, other code at start assumes the device is clean. If we want
to do so, we need to introduce a new callback, different from
vhost_dev_start(hdev, false) since it must run after get_base, not
before.

>
> >
> > Hopefully with a better name than vhost_vdpa_reset_status, that's for sure :).
> >
> > I'm not sure how vhost-user works with this or when it does reset the
> > indexes. My bet is that it never does at the device reinitialization
> > and it trusts VMM calls to vhost_user_set_base but I may be wrong.
>
>
> I think it's more safe to not touch the code path for vhost-user, it may
> connect to various kind of backends some of which might be fragile.
>

I agree.

Thanks!

> Thanks
>
>
> >
> > Thanks!
> >
> >> And if necessary, we can call nc client ops for net specific operations
> >> (if it has any).
> >>
> >> Thanks
> >>
> >>
> >>> At the moment vhost ops is
> >>> specialized in vhost-kernel, vhost-user and vhost-vdpa. If we want to
> >>> make it specific to the kind of device, that makes vhost-vdpa-net too.
> >>>
> >>> Thanks!
> >>>
> >>>
> >>>> Thanks
> >>>>
> >>>>>    };
> >>>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> >>>>> index eb8c4c378c..a266396576 100644
> >>>>> --- a/hw/virtio/vhost.c
> >>>>> +++ b/hw/virtio/vhost.c
> >>>>> @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
> >>>>>                                 hdev->vqs + i,
> >>>>>                                 hdev->vq_index + i);
> >>>>>        }
> >>>>> +    if (hdev->vhost_ops->vhost_reset_status) {
> >>>>> +        hdev->vhost_ops->vhost_reset_status(hdev);
> >>>>> +    }
> >>>>>
> >>>>>        if (vhost_dev_has_iommu(hdev)) {
> >>>>>            if (hdev->vhost_ops->vhost_set_iotlb_callback) {
> >>>>> --
> >>>>> 2.31.1
> >>>>>
>
diff mbox series

Patch

diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index c5ab49051e..ec3fbae58d 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -130,6 +130,9 @@  typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
 
 typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
                                        int fd);
+
+typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
+
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -177,6 +180,7 @@  typedef struct VhostOps {
     vhost_get_device_id_op vhost_get_device_id;
     vhost_force_iommu_op vhost_force_iommu;
     vhost_set_config_call_op vhost_set_config_call;
+    vhost_reset_status_op vhost_reset_status;
 } VhostOps;
 
 int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 542e003101..28a52ddc78 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1132,14 +1132,23 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
     if (started) {
         memory_listener_register(&v->listener, &address_space_memory);
         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-    } else {
-        vhost_vdpa_reset_device(dev);
-        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
-                                   VIRTIO_CONFIG_S_DRIVER);
-        memory_listener_unregister(&v->listener);
+    }
 
-        return 0;
+    return 0;
+}
+
+static void vhost_vdpa_reset_status(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+        return;
     }
+
+    vhost_vdpa_reset_device(dev);
+    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                                VIRTIO_CONFIG_S_DRIVER);
+    memory_listener_unregister(&v->listener);
 }
 
 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1182,18 +1191,7 @@  static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
                                        struct vhost_vring_state *ring)
 {
     struct vhost_vdpa *v = dev->opaque;
-    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
-    /*
-     * vhost-vdpa devices does not support in-flight requests. Set all of them
-     * as available.
-     *
-     * TODO: This is ok for networking, but other kinds of devices might
-     * have problems with these retransmissions.
-     */
-    while (virtqueue_rewind(vq, 1)) {
-        continue;
-    }
     if (v->shadow_vqs_enabled) {
         /*
          * Device vring base was set at device start. SVQ base is handled by
@@ -1212,6 +1210,19 @@  static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
     int ret;
 
     if (v->shadow_vqs_enabled) {
+        VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
+
+        /*
+         * vhost-vdpa devices does not support in-flight requests. Set all of
+         * them as available.
+         *
+         * TODO: This is ok for networking, but other kinds of devices might
+         * have problems with these retransmissions.
+         */
+        while (virtqueue_rewind(vq, 1)) {
+            continue;
+        }
+
         ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
         return 0;
     }
@@ -1326,4 +1337,5 @@  const VhostOps vdpa_ops = {
         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
         .vhost_force_iommu = vhost_vdpa_force_iommu,
         .vhost_set_config_call = vhost_vdpa_set_config_call,
+        .vhost_reset_status = vhost_vdpa_reset_status,
 };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index eb8c4c378c..a266396576 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -2049,6 +2049,9 @@  void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
                              hdev->vqs + i,
                              hdev->vq_index + i);
     }
+    if (hdev->vhost_ops->vhost_reset_status) {
+        hdev->vhost_ops->vhost_reset_status(hdev);
+    }
 
     if (vhost_dev_has_iommu(hdev)) {
         if (hdev->vhost_ops->vhost_set_iotlb_callback) {