diff mbox series

[v4,09/15] vdpa: add vdpa net migration state notifier

Message ID 20230224155438.112797-10-eperezma@redhat.com
State New
Headers show
Series Dynamically switch to vhost shadow virtqueues at vdpa net migration | expand

Commit Message

Eugenio Perez Martin Feb. 24, 2023, 3:54 p.m. UTC
This allows net to restart the device backend to configure SVQ on it.

Ideally, these changes should not be net specific. However, the vdpa net
backend is the one with enough knowledge to configure everything because
of some reasons:
* Queues might need to be shadowed or not depending on its kind (control
  vs data).
* Queues need to share the same map translations (iova tree).

Because of that it is cleaner to restart the whole net backend and
configure again as expected, similar to how vhost-kernel moves between
userspace and passthrough.

If more kinds of devices need dynamic switching to SVQ we can create a
callback struct like VhostOps and move most of the code there.
VhostOps cannot be reused since all vdpa backend share them, and to
personalize just for networking would be too heavy.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v4:
* Delete duplication of set shadow_data and shadow_vqs_enabled moving it
  to data / cvq net start functions.

v3:
* Check for migration state at vdpa device start to enable SVQ in data
  vqs.

v1 from RFC:
* Add TODO to use the resume operation in the future.
* Use migration_in_setup and migration_has_failed instead of a
  complicated switch case.
---
 net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

Comments

Jason Wang Feb. 27, 2023, 8:08 a.m. UTC | #1
在 2023/2/24 23:54, Eugenio Pérez 写道:
> This allows net to restart the device backend to configure SVQ on it.
>
> Ideally, these changes should not be net specific. However, the vdpa net
> backend is the one with enough knowledge to configure everything because
> of some reasons:
> * Queues might need to be shadowed or not depending on its kind (control
>    vs data).
> * Queues need to share the same map translations (iova tree).
>
> Because of that it is cleaner to restart the whole net backend and
> configure again as expected, similar to how vhost-kernel moves between
> userspace and passthrough.
>
> If more kinds of devices need dynamic switching to SVQ we can create a
> callback struct like VhostOps and move most of the code there.
> VhostOps cannot be reused since all vdpa backend share them, and to
> personalize just for networking would be too heavy.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v4:
> * Delete duplication of set shadow_data and shadow_vqs_enabled moving it
>    to data / cvq net start functions.
>
> v3:
> * Check for migration state at vdpa device start to enable SVQ in data
>    vqs.
>
> v1 from RFC:
> * Add TODO to use the resume operation in the future.
> * Use migration_in_setup and migration_has_failed instead of a
>    complicated switch case.
> ---
>   net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index b89c99066a..c5512ddf10 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -26,12 +26,15 @@
>   #include <err.h>
>   #include "standard-headers/linux/virtio_net.h"
>   #include "monitor/monitor.h"
> +#include "migration/migration.h"
> +#include "migration/misc.h"
>   #include "hw/virtio/vhost.h"
>   
>   /* Todo:need to add the multiqueue support here */
>   typedef struct VhostVDPAState {
>       NetClientState nc;
>       struct vhost_vdpa vhost_vdpa;
> +    Notifier migration_state;
>       VHostNetState *vhost_net;
>   
>       /* Control commands shadow buffers */
> @@ -239,10 +242,59 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
>       return DO_UPCAST(VhostVDPAState, nc, nc0);
>   }
>   
> +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> +{
> +    struct vhost_vdpa *v = &s->vhost_vdpa;
> +    VirtIONet *n;
> +    VirtIODevice *vdev;
> +    int data_queue_pairs, cvq, r;
> +
> +    /* We are only called on the first data vqs and only if x-svq is not set */
> +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> +        return;
> +    }
> +
> +    vdev = v->dev->vdev;
> +    n = VIRTIO_NET(vdev);
> +    if (!n->vhost_started) {
> +        return;
> +    }
> +
> +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> +                                  n->max_ncs - n->max_queue_pairs : 0;
> +    /*
> +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> +     * in the future and resume the device if read-only operations between
> +     * suspend and reset goes wrong.
> +     */
> +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +
> +    /* Start will check migration setup_or_active to configure or not SVQ */
> +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +    if (unlikely(r < 0)) {
> +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> +    }
> +}
> +
> +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> +{
> +    MigrationState *migration = data;
> +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> +                                     migration_state);
> +
> +    if (migration_in_setup(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, true);
> +    } else if (migration_has_failed(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, false);
> +    }
> +}
> +
>   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>   {
>       struct vhost_vdpa *v = &s->vhost_vdpa;
>   
> +    add_migration_state_change_notifier(&s->migration_state);
>       if (v->shadow_vqs_enabled) {
>           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
>                                              v->iova_range.last);
> @@ -256,6 +308,15 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
>   
>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>   
> +    if (s->always_svq ||
> +        migration_is_setup_or_active(migrate_get_current()->state)) {
> +        v->shadow_vqs_enabled = true;
> +        v->shadow_data = true;
> +    } else {
> +        v->shadow_vqs_enabled = false;
> +        v->shadow_data = false;
> +    }
> +
>       if (v->index == 0) {
>           vhost_vdpa_net_data_start_first(s);
>           return 0;
> @@ -276,6 +337,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
>   
>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>   
> +    if (s->vhost_vdpa.index == 0) {
> +        remove_migration_state_change_notifier(&s->migration_state);
> +    }


This should work but I just realize that vhost support 
vhost_dev_set_log(), I wonder if it would be simpler to go with that way.

Using vhost_virtqueue_set_addr(, enable_log = true)?

Thanks


> +
>       dev = s->vhost_vdpa.dev;
>       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
>           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> @@ -412,11 +477,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>       s = DO_UPCAST(VhostVDPAState, nc, nc);
>       v = &s->vhost_vdpa;
>   
> -    v->shadow_data = s->always_svq;
> +    s0 = vhost_vdpa_net_first_nc_vdpa(s);
> +    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
>       v->shadow_vqs_enabled = s->always_svq;
>       s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
>   
> -    if (s->always_svq) {
> +    if (s->vhost_vdpa.shadow_data) {
>           /* SVQ is already configured for all virtqueues */
>           goto out;
>       }
> @@ -473,7 +539,6 @@ out:
>           return 0;
>       }
>   
> -    s0 = vhost_vdpa_net_first_nc_vdpa(s);
>       if (s0->vhost_vdpa.iova_tree) {
>           /*
>            * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
> @@ -749,6 +814,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>       s->vhost_vdpa.device_fd = vdpa_device_fd;
>       s->vhost_vdpa.index = queue_pair_index;
>       s->always_svq = svq;
> +    s->migration_state.notify = vdpa_net_migration_state_notifier;
>       s->vhost_vdpa.shadow_vqs_enabled = svq;
>       s->vhost_vdpa.iova_range = iova_range;
>       s->vhost_vdpa.shadow_data = svq;
Eugenio Perez Martin March 1, 2023, 7:26 p.m. UTC | #2
On Mon, Feb 27, 2023 at 9:08 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/2/24 23:54, Eugenio Pérez 写道:
> > This allows net to restart the device backend to configure SVQ on it.
> >
> > Ideally, these changes should not be net specific. However, the vdpa net
> > backend is the one with enough knowledge to configure everything because
> > of some reasons:
> > * Queues might need to be shadowed or not depending on its kind (control
> >    vs data).
> > * Queues need to share the same map translations (iova tree).
> >
> > Because of that it is cleaner to restart the whole net backend and
> > configure again as expected, similar to how vhost-kernel moves between
> > userspace and passthrough.
> >
> > If more kinds of devices need dynamic switching to SVQ we can create a
> > callback struct like VhostOps and move most of the code there.
> > VhostOps cannot be reused since all vdpa backend share them, and to
> > personalize just for networking would be too heavy.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > v4:
> > * Delete duplication of set shadow_data and shadow_vqs_enabled moving it
> >    to data / cvq net start functions.
> >
> > v3:
> > * Check for migration state at vdpa device start to enable SVQ in data
> >    vqs.
> >
> > v1 from RFC:
> > * Add TODO to use the resume operation in the future.
> > * Use migration_in_setup and migration_has_failed instead of a
> >    complicated switch case.
> > ---
> >   net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--
> >   1 file changed, 69 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index b89c99066a..c5512ddf10 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -26,12 +26,15 @@
> >   #include <err.h>
> >   #include "standard-headers/linux/virtio_net.h"
> >   #include "monitor/monitor.h"
> > +#include "migration/migration.h"
> > +#include "migration/misc.h"
> >   #include "hw/virtio/vhost.h"
> >
> >   /* Todo:need to add the multiqueue support here */
> >   typedef struct VhostVDPAState {
> >       NetClientState nc;
> >       struct vhost_vdpa vhost_vdpa;
> > +    Notifier migration_state;
> >       VHostNetState *vhost_net;
> >
> >       /* Control commands shadow buffers */
> > @@ -239,10 +242,59 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
> >       return DO_UPCAST(VhostVDPAState, nc, nc0);
> >   }
> >
> > +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> > +{
> > +    struct vhost_vdpa *v = &s->vhost_vdpa;
> > +    VirtIONet *n;
> > +    VirtIODevice *vdev;
> > +    int data_queue_pairs, cvq, r;
> > +
> > +    /* We are only called on the first data vqs and only if x-svq is not set */
> > +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> > +        return;
> > +    }
> > +
> > +    vdev = v->dev->vdev;
> > +    n = VIRTIO_NET(vdev);
> > +    if (!n->vhost_started) {
> > +        return;
> > +    }
> > +
> > +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> > +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> > +                                  n->max_ncs - n->max_queue_pairs : 0;
> > +    /*
> > +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> > +     * in the future and resume the device if read-only operations between
> > +     * suspend and reset goes wrong.
> > +     */
> > +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +
> > +    /* Start will check migration setup_or_active to configure or not SVQ */
> > +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +    if (unlikely(r < 0)) {
> > +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> > +    }
> > +}
> > +
> > +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> > +{
> > +    MigrationState *migration = data;
> > +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> > +                                     migration_state);
> > +
> > +    if (migration_in_setup(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, true);
> > +    } else if (migration_has_failed(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, false);
> > +    }
> > +}
> > +
> >   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
> >   {
> >       struct vhost_vdpa *v = &s->vhost_vdpa;
> >
> > +    add_migration_state_change_notifier(&s->migration_state);
> >       if (v->shadow_vqs_enabled) {
> >           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
> >                                              v->iova_range.last);
> > @@ -256,6 +308,15 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
> >
> >       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > +    if (s->always_svq ||
> > +        migration_is_setup_or_active(migrate_get_current()->state)) {
> > +        v->shadow_vqs_enabled = true;
> > +        v->shadow_data = true;
> > +    } else {
> > +        v->shadow_vqs_enabled = false;
> > +        v->shadow_data = false;
> > +    }
> > +
> >       if (v->index == 0) {
> >           vhost_vdpa_net_data_start_first(s);
> >           return 0;
> > @@ -276,6 +337,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
> >
> >       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > +    if (s->vhost_vdpa.index == 0) {
> > +        remove_migration_state_change_notifier(&s->migration_state);
> > +    }
>
>
> This should work but I just realize that vhost support
> vhost_dev_set_log(), I wonder if it would be simpler to go with that way.
>
> Using vhost_virtqueue_set_addr(, enable_log = true)?
>

We can do that but it has the same problem as with checking _F_LOG_ALL
in set_features:

1. We're tearing down a vhost device using a listener registered
against that device, at start / stop.
2. We need to traverse all the devices many times to first get all the
vqs state and then transverse them again to set them up properly.

My two ideas to solve the recursiveness of 1 are:
a. Duplicating vhost_dev_start / vhost_dev_stop at
vhost_vdpa_set_features / vhost_vdpa_set_vring_addr.

This has the same problem as all duplications: It will get out of sync
eventually. For example, the latest changes about configure interrupt
would need to be duplicated in this new call.

b. Add a new parameter to vhost_dev_start/stop to skip the
set_features / set_vring_address step.
Now that the virtio queue reset changes have exposed these functions
it is also possible to call them from vhost-vdpa.

Maybe we can store that parameter in vhost_vdpa so we don't call
vhost_dev_start / stop there instead of affecting all backends, but
the idea is the same.

For problem 2 I still do not have a solution. CVQ / MQ Is out of the
scope for this series but I think it will bite us when we add it
(hopefully soon).

Thanks!

> Thanks
>
>
> > +
> >       dev = s->vhost_vdpa.dev;
> >       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> >           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> > @@ -412,11 +477,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >       s = DO_UPCAST(VhostVDPAState, nc, nc);
> >       v = &s->vhost_vdpa;
> >
> > -    v->shadow_data = s->always_svq;
> > +    s0 = vhost_vdpa_net_first_nc_vdpa(s);
> > +    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
> >       v->shadow_vqs_enabled = s->always_svq;
> >       s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> >
> > -    if (s->always_svq) {
> > +    if (s->vhost_vdpa.shadow_data) {
> >           /* SVQ is already configured for all virtqueues */
> >           goto out;
> >       }
> > @@ -473,7 +539,6 @@ out:
> >           return 0;
> >       }
> >
> > -    s0 = vhost_vdpa_net_first_nc_vdpa(s);
> >       if (s0->vhost_vdpa.iova_tree) {
> >           /*
> >            * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
> > @@ -749,6 +814,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >       s->vhost_vdpa.device_fd = vdpa_device_fd;
> >       s->vhost_vdpa.index = queue_pair_index;
> >       s->always_svq = svq;
> > +    s->migration_state.notify = vdpa_net_migration_state_notifier;
> >       s->vhost_vdpa.shadow_vqs_enabled = svq;
> >       s->vhost_vdpa.iova_range = iova_range;
> >       s->vhost_vdpa.shadow_data = svq;
>
Jason Wang March 3, 2023, 3:34 a.m. UTC | #3
在 2023/3/2 03:26, Eugenio Perez Martin 写道:
> On Mon, Feb 27, 2023 at 9:08 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2023/2/24 23:54, Eugenio Pérez 写道:
>>> This allows net to restart the device backend to configure SVQ on it.
>>>
>>> Ideally, these changes should not be net specific. However, the vdpa net
>>> backend is the one with enough knowledge to configure everything because
>>> of some reasons:
>>> * Queues might need to be shadowed or not depending on its kind (control
>>>     vs data).
>>> * Queues need to share the same map translations (iova tree).
>>>
>>> Because of that it is cleaner to restart the whole net backend and
>>> configure again as expected, similar to how vhost-kernel moves between
>>> userspace and passthrough.
>>>
>>> If more kinds of devices need dynamic switching to SVQ we can create a
>>> callback struct like VhostOps and move most of the code there.
>>> VhostOps cannot be reused since all vdpa backend share them, and to
>>> personalize just for networking would be too heavy.
>>>
>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>> ---
>>> v4:
>>> * Delete duplication of set shadow_data and shadow_vqs_enabled moving it
>>>     to data / cvq net start functions.
>>>
>>> v3:
>>> * Check for migration state at vdpa device start to enable SVQ in data
>>>     vqs.
>>>
>>> v1 from RFC:
>>> * Add TODO to use the resume operation in the future.
>>> * Use migration_in_setup and migration_has_failed instead of a
>>>     complicated switch case.
>>> ---
>>>    net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--
>>>    1 file changed, 69 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>> index b89c99066a..c5512ddf10 100644
>>> --- a/net/vhost-vdpa.c
>>> +++ b/net/vhost-vdpa.c
>>> @@ -26,12 +26,15 @@
>>>    #include <err.h>
>>>    #include "standard-headers/linux/virtio_net.h"
>>>    #include "monitor/monitor.h"
>>> +#include "migration/migration.h"
>>> +#include "migration/misc.h"
>>>    #include "hw/virtio/vhost.h"
>>>
>>>    /* Todo:need to add the multiqueue support here */
>>>    typedef struct VhostVDPAState {
>>>        NetClientState nc;
>>>        struct vhost_vdpa vhost_vdpa;
>>> +    Notifier migration_state;
>>>        VHostNetState *vhost_net;
>>>
>>>        /* Control commands shadow buffers */
>>> @@ -239,10 +242,59 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
>>>        return DO_UPCAST(VhostVDPAState, nc, nc0);
>>>    }
>>>
>>> +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
>>> +{
>>> +    struct vhost_vdpa *v = &s->vhost_vdpa;
>>> +    VirtIONet *n;
>>> +    VirtIODevice *vdev;
>>> +    int data_queue_pairs, cvq, r;
>>> +
>>> +    /* We are only called on the first data vqs and only if x-svq is not set */
>>> +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
>>> +        return;
>>> +    }
>>> +
>>> +    vdev = v->dev->vdev;
>>> +    n = VIRTIO_NET(vdev);
>>> +    if (!n->vhost_started) {
>>> +        return;
>>> +    }
>>> +
>>> +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
>>> +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
>>> +                                  n->max_ncs - n->max_queue_pairs : 0;
>>> +    /*
>>> +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
>>> +     * in the future and resume the device if read-only operations between
>>> +     * suspend and reset goes wrong.
>>> +     */
>>> +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
>>> +
>>> +    /* Start will check migration setup_or_active to configure or not SVQ */
>>> +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
>>> +    if (unlikely(r < 0)) {
>>> +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
>>> +    }
>>> +}
>>> +
>>> +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
>>> +{
>>> +    MigrationState *migration = data;
>>> +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
>>> +                                     migration_state);
>>> +
>>> +    if (migration_in_setup(migration)) {
>>> +        vhost_vdpa_net_log_global_enable(s, true);
>>> +    } else if (migration_has_failed(migration)) {
>>> +        vhost_vdpa_net_log_global_enable(s, false);
>>> +    }
>>> +}
>>> +
>>>    static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>>>    {
>>>        struct vhost_vdpa *v = &s->vhost_vdpa;
>>>
>>> +    add_migration_state_change_notifier(&s->migration_state);
>>>        if (v->shadow_vqs_enabled) {
>>>            v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
>>>                                               v->iova_range.last);
>>> @@ -256,6 +308,15 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
>>>
>>>        assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>
>>> +    if (s->always_svq ||
>>> +        migration_is_setup_or_active(migrate_get_current()->state)) {
>>> +        v->shadow_vqs_enabled = true;
>>> +        v->shadow_data = true;
>>> +    } else {
>>> +        v->shadow_vqs_enabled = false;
>>> +        v->shadow_data = false;
>>> +    }
>>> +
>>>        if (v->index == 0) {
>>>            vhost_vdpa_net_data_start_first(s);
>>>            return 0;
>>> @@ -276,6 +337,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
>>>
>>>        assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>
>>> +    if (s->vhost_vdpa.index == 0) {
>>> +        remove_migration_state_change_notifier(&s->migration_state);
>>> +    }
>>
>> This should work but I just realize that vhost support
>> vhost_dev_set_log(), I wonder if it would be simpler to go with that way.
>>
>> Using vhost_virtqueue_set_addr(, enable_log = true)?
>>
> We can do that but it has the same problem as with checking _F_LOG_ALL
> in set_features:
>
> 1. We're tearing down a vhost device using a listener registered
> against that device, at start / stop.
> 2. We need to traverse all the devices many times to first get all the
> vqs state and then transverse them again to set them up properly.
>
> My two ideas to solve the recursiveness of 1 are:
> a. Duplicating vhost_dev_start / vhost_dev_stop at
> vhost_vdpa_set_features / vhost_vdpa_set_vring_addr.
>
> This has the same problem as all duplications: It will get out of sync
> eventually. For example, the latest changes about configure interrupt
> would need to be duplicated in this new call.
>
> b. Add a new parameter to vhost_dev_start/stop to skip the
> set_features / set_vring_address step.
> Now that the virtio queue reset changes have exposed these functions
> it is also possible to call them from vhost-vdpa.
>
> Maybe we can store that parameter in vhost_vdpa so we don't call
> vhost_dev_start / stop there instead of affecting all backends, but
> the idea is the same.
>
> For problem 2 I still do not have a solution. CVQ / MQ Is out of the
> scope for this series but I think it will bite us when we add it
> (hopefully soon).


Thanks for the clarification, I'd suggest to document the above in the 
changlog.


>
> Thanks!
>
>> Thanks
>>
>>
>>> +
>>>        dev = s->vhost_vdpa.dev;
>>>        if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
>>>            g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
>>> @@ -412,11 +477,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>        s = DO_UPCAST(VhostVDPAState, nc, nc);
>>>        v = &s->vhost_vdpa;
>>>
>>> -    v->shadow_data = s->always_svq;
>>> +    s0 = vhost_vdpa_net_first_nc_vdpa(s);
>>> +    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
>>>        v->shadow_vqs_enabled = s->always_svq;
>>>        s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
>>>
>>> -    if (s->always_svq) {
>>> +    if (s->vhost_vdpa.shadow_data) {
>>>            /* SVQ is already configured for all virtqueues */
>>>            goto out;
>>>        }
>>> @@ -473,7 +539,6 @@ out:
>>>            return 0;
>>>        }
>>>
>>> -    s0 = vhost_vdpa_net_first_nc_vdpa(s);
>>>        if (s0->vhost_vdpa.iova_tree) {
>>>            /*
>>>             * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
>>> @@ -749,6 +814,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>>>        s->vhost_vdpa.device_fd = vdpa_device_fd;
>>>        s->vhost_vdpa.index = queue_pair_index;
>>>        s->always_svq = svq;
>>> +    s->migration_state.notify = vdpa_net_migration_state_notifier;
>>>        s->vhost_vdpa.shadow_vqs_enabled = svq;
>>>        s->vhost_vdpa.iova_range = iova_range;
>>>        s->vhost_vdpa.shadow_data = svq;
Eugenio Perez Martin March 3, 2023, 8:42 a.m. UTC | #4
On Fri, Mar 3, 2023 at 4:34 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/3/2 03:26, Eugenio Perez Martin 写道:
> > On Mon, Feb 27, 2023 at 9:08 AM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> 在 2023/2/24 23:54, Eugenio Pérez 写道:
> >>> This allows net to restart the device backend to configure SVQ on it.
> >>>
> >>> Ideally, these changes should not be net specific. However, the vdpa net
> >>> backend is the one with enough knowledge to configure everything because
> >>> of some reasons:
> >>> * Queues might need to be shadowed or not depending on its kind (control
> >>>     vs data).
> >>> * Queues need to share the same map translations (iova tree).
> >>>
> >>> Because of that it is cleaner to restart the whole net backend and
> >>> configure again as expected, similar to how vhost-kernel moves between
> >>> userspace and passthrough.
> >>>
> >>> If more kinds of devices need dynamic switching to SVQ we can create a
> >>> callback struct like VhostOps and move most of the code there.
> >>> VhostOps cannot be reused since all vdpa backend share them, and to
> >>> personalize just for networking would be too heavy.
> >>>
> >>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >>> ---
> >>> v4:
> >>> * Delete duplication of set shadow_data and shadow_vqs_enabled moving it
> >>>     to data / cvq net start functions.
> >>>
> >>> v3:
> >>> * Check for migration state at vdpa device start to enable SVQ in data
> >>>     vqs.
> >>>
> >>> v1 from RFC:
> >>> * Add TODO to use the resume operation in the future.
> >>> * Use migration_in_setup and migration_has_failed instead of a
> >>>     complicated switch case.
> >>> ---
> >>>    net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--
> >>>    1 file changed, 69 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> >>> index b89c99066a..c5512ddf10 100644
> >>> --- a/net/vhost-vdpa.c
> >>> +++ b/net/vhost-vdpa.c
> >>> @@ -26,12 +26,15 @@
> >>>    #include <err.h>
> >>>    #include "standard-headers/linux/virtio_net.h"
> >>>    #include "monitor/monitor.h"
> >>> +#include "migration/migration.h"
> >>> +#include "migration/misc.h"
> >>>    #include "hw/virtio/vhost.h"
> >>>
> >>>    /* Todo:need to add the multiqueue support here */
> >>>    typedef struct VhostVDPAState {
> >>>        NetClientState nc;
> >>>        struct vhost_vdpa vhost_vdpa;
> >>> +    Notifier migration_state;
> >>>        VHostNetState *vhost_net;
> >>>
> >>>        /* Control commands shadow buffers */
> >>> @@ -239,10 +242,59 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
> >>>        return DO_UPCAST(VhostVDPAState, nc, nc0);
> >>>    }
> >>>
> >>> +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> >>> +{
> >>> +    struct vhost_vdpa *v = &s->vhost_vdpa;
> >>> +    VirtIONet *n;
> >>> +    VirtIODevice *vdev;
> >>> +    int data_queue_pairs, cvq, r;
> >>> +
> >>> +    /* We are only called on the first data vqs and only if x-svq is not set */
> >>> +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    vdev = v->dev->vdev;
> >>> +    n = VIRTIO_NET(vdev);
> >>> +    if (!n->vhost_started) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> >>> +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> >>> +                                  n->max_ncs - n->max_queue_pairs : 0;
> >>> +    /*
> >>> +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> >>> +     * in the future and resume the device if read-only operations between
> >>> +     * suspend and reset goes wrong.
> >>> +     */
> >>> +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> >>> +
> >>> +    /* Start will check migration setup_or_active to configure or not SVQ */
> >>> +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> >>> +    if (unlikely(r < 0)) {
> >>> +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> >>> +    }
> >>> +}
> >>> +
> >>> +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> >>> +{
> >>> +    MigrationState *migration = data;
> >>> +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> >>> +                                     migration_state);
> >>> +
> >>> +    if (migration_in_setup(migration)) {
> >>> +        vhost_vdpa_net_log_global_enable(s, true);
> >>> +    } else if (migration_has_failed(migration)) {
> >>> +        vhost_vdpa_net_log_global_enable(s, false);
> >>> +    }
> >>> +}
> >>> +
> >>>    static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
> >>>    {
> >>>        struct vhost_vdpa *v = &s->vhost_vdpa;
> >>>
> >>> +    add_migration_state_change_notifier(&s->migration_state);
> >>>        if (v->shadow_vqs_enabled) {
> >>>            v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
> >>>                                               v->iova_range.last);
> >>> @@ -256,6 +308,15 @@ static int vhost_vdpa_net_data_start(NetClientState *nc)
> >>>
> >>>        assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >>>
> >>> +    if (s->always_svq ||
> >>> +        migration_is_setup_or_active(migrate_get_current()->state)) {
> >>> +        v->shadow_vqs_enabled = true;
> >>> +        v->shadow_data = true;
> >>> +    } else {
> >>> +        v->shadow_vqs_enabled = false;
> >>> +        v->shadow_data = false;
> >>> +    }
> >>> +
> >>>        if (v->index == 0) {
> >>>            vhost_vdpa_net_data_start_first(s);
> >>>            return 0;
> >>> @@ -276,6 +337,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
> >>>
> >>>        assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >>>
> >>> +    if (s->vhost_vdpa.index == 0) {
> >>> +        remove_migration_state_change_notifier(&s->migration_state);
> >>> +    }
> >>
> >> This should work but I just realize that vhost support
> >> vhost_dev_set_log(), I wonder if it would be simpler to go with that way.
> >>
> >> Using vhost_virtqueue_set_addr(, enable_log = true)?
> >>
> > We can do that but it has the same problem as with checking _F_LOG_ALL
> > in set_features:
> >
> > 1. We're tearing down a vhost device using a listener registered
> > against that device, at start / stop.
> > 2. We need to traverse all the devices many times to first get all the
> > vqs state and then transverse them again to set them up properly.
> >
> > My two ideas to solve the recursiveness of 1 are:
> > a. Duplicating vhost_dev_start / vhost_dev_stop at
> > vhost_vdpa_set_features / vhost_vdpa_set_vring_addr.
> >
> > This has the same problem as all duplications: It will get out of sync
> > eventually. For example, the latest changes about configure interrupt
> > would need to be duplicated in this new call.
> >
> > b. Add a new parameter to vhost_dev_start/stop to skip the
> > set_features / set_vring_address step.
> > Now that the virtio queue reset changes have exposed these functions
> > it is also possible to call them from vhost-vdpa.
> >
> > Maybe we can store that parameter in vhost_vdpa so we don't call
> > vhost_dev_start / stop there instead of affecting all backends, but
> > the idea is the same.
> >
> > For problem 2 I still do not have a solution. CVQ / MQ Is out of the
> > scope for this series but I think it will bite us when we add it
> > (hopefully soon).
>
>
> Thanks for the clarification, I'd suggest to document the above in the
> changlog.
>

Please let me know if you agree on the next message for this patch:

vdpa: add vdpa net migration state notifier

This allows net to restart the device backend to configure SVQ on it.

Ideally, these changes should not be net specific and they could be done
in:
* vhost_vdpa_set_features (with VHOST_F_LOG_ALL)
* vhost_vdpa_set_vring_addr (with .enable_log)
* vhost_vdpa_set_log_base.

However, the vdpa net backend is the one with enough knowledge to
configure everything because of some reasons:
* Queues might need to be shadowed or not depending on its kind (control
  vs data).
* Queues need to share the same map translations (iova tree).

Also, there are other problems that may have solutions but complicates
the implementation at this stage:
* We're basically duplicating vhost_dev_start and vhost_dev_stop, and
  they could go out of sync.  If we want to reuse them, we need a way to
  skip some function calls to avoid recursiveness (either vhost_ops ->
  vhost_set_features, vhost_set_vring_addr, ...).
* We need to traverse all vhost_dev of a given net device twice: one to
  stop and get the vq state and another one after the reset to
  configure properties like address, fd, etc.

Because of that it is cleaner to restart the whole net backend and
configure again as expected, similar to how vhost-kernel moves between
userspace and passthrough.

If more kinds of devices need dynamic switching to SVQ we can:
* Create a callback struct like VhostOps and move most of the code
  there.  VhostOps cannot be reused since all vdpa backend share them,
  and to personalize just for networking would be too heavy.
* Add a parent struct or link all the vhost_vdpa or vhost_dev structs so
  we can traverse them.
---

Thanks!

>
> >
> > Thanks!
> >
> >> Thanks
> >>
> >>
> >>> +
> >>>        dev = s->vhost_vdpa.dev;
> >>>        if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> >>>            g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> >>> @@ -412,11 +477,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >>>        s = DO_UPCAST(VhostVDPAState, nc, nc);
> >>>        v = &s->vhost_vdpa;
> >>>
> >>> -    v->shadow_data = s->always_svq;
> >>> +    s0 = vhost_vdpa_net_first_nc_vdpa(s);
> >>> +    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
> >>>        v->shadow_vqs_enabled = s->always_svq;
> >>>        s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> >>>
> >>> -    if (s->always_svq) {
> >>> +    if (s->vhost_vdpa.shadow_data) {
> >>>            /* SVQ is already configured for all virtqueues */
> >>>            goto out;
> >>>        }
> >>> @@ -473,7 +539,6 @@ out:
> >>>            return 0;
> >>>        }
> >>>
> >>> -    s0 = vhost_vdpa_net_first_nc_vdpa(s);
> >>>        if (s0->vhost_vdpa.iova_tree) {
> >>>            /*
> >>>             * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
> >>> @@ -749,6 +814,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >>>        s->vhost_vdpa.device_fd = vdpa_device_fd;
> >>>        s->vhost_vdpa.index = queue_pair_index;
> >>>        s->always_svq = svq;
> >>> +    s->migration_state.notify = vdpa_net_migration_state_notifier;
> >>>        s->vhost_vdpa.shadow_vqs_enabled = svq;
> >>>        s->vhost_vdpa.iova_range = iova_range;
> >>>        s->vhost_vdpa.shadow_data = svq;
>
vdpa: add vdpa net migration state notifier

This allows net to restart the device backend to configure SVQ on it.

Ideally, these changes should not be net specific and they could be done
in:
* vhost_vdpa_set_features (with VHOST_F_LOG_ALL)
* vhost_vdpa_set_vring_addr (with .enable_log)
* vhost_vdpa_set_log_base.

However, the vdpa net backend is the one with enough knowledge to
configure everything because of some reasons:
* Queues might need to be shadowed or not depending on its kind (control
  vs data).
* Queues need to share the same map translations (iova tree).

Also, there are other problems that may have solutions but complicates
the implementation at this stage:
* We're basically duplicating vhost_dev_start and vhost_dev_stop, and
  they could go out of sync.  If we want to reuse them, we need a way to
  skip some function call to avoid recursiveness (either vhost_ops ->
  vhost_set_features, vhost_set_vring_addr, ...).
* We need to traverse all vhost_dev of a given net device twice: one to
  stop and get the vq state and another one after the reset to
  configure properties like address, fd, etc.

Because of that it is cleaner to restart the whole net backend and
configure again as expected, similar to how vhost-kernel moves between
userspace and passthrough.

If more kinds of devices need dynamic switching to SVQ we can:
* Create a callback struct like VhostOps and move most of the code
  there.  VhostOps cannot be reused since all vdpa backend share them,
  and to personalize just for networking would be too heavy.
* Add a parent struct or link all the vhost_vdpa or vhost_dev structs so
  we can traverse them.
diff mbox series

Patch

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index b89c99066a..c5512ddf10 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,15 @@ 
 #include <err.h>
 #include "standard-headers/linux/virtio_net.h"
 #include "monitor/monitor.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
 #include "hw/virtio/vhost.h"
 
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
+    Notifier migration_state;
     VHostNetState *vhost_net;
 
     /* Control commands shadow buffers */
@@ -239,10 +242,59 @@  static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
     return DO_UPCAST(VhostVDPAState, nc, nc0);
 }
 
+static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+    VirtIONet *n;
+    VirtIODevice *vdev;
+    int data_queue_pairs, cvq, r;
+
+    /* We are only called on the first data vqs and only if x-svq is not set */
+    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
+        return;
+    }
+
+    vdev = v->dev->vdev;
+    n = VIRTIO_NET(vdev);
+    if (!n->vhost_started) {
+        return;
+    }
+
+    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+                                  n->max_ncs - n->max_queue_pairs : 0;
+    /*
+     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
+     * in the future and resume the device if read-only operations between
+     * suspend and reset goes wrong.
+     */
+    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
+
+    /* Start will check migration setup_or_active to configure or not SVQ */
+    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
+    if (unlikely(r < 0)) {
+        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
+    }
+}
+
+static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *migration = data;
+    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
+                                     migration_state);
+
+    if (migration_in_setup(migration)) {
+        vhost_vdpa_net_log_global_enable(s, true);
+    } else if (migration_has_failed(migration)) {
+        vhost_vdpa_net_log_global_enable(s, false);
+    }
+}
+
 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
 {
     struct vhost_vdpa *v = &s->vhost_vdpa;
 
+    add_migration_state_change_notifier(&s->migration_state);
     if (v->shadow_vqs_enabled) {
         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
                                            v->iova_range.last);
@@ -256,6 +308,15 @@  static int vhost_vdpa_net_data_start(NetClientState *nc)
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
+    if (s->always_svq ||
+        migration_is_setup_or_active(migrate_get_current()->state)) {
+        v->shadow_vqs_enabled = true;
+        v->shadow_data = true;
+    } else {
+        v->shadow_vqs_enabled = false;
+        v->shadow_data = false;
+    }
+
     if (v->index == 0) {
         vhost_vdpa_net_data_start_first(s);
         return 0;
@@ -276,6 +337,10 @@  static void vhost_vdpa_net_client_stop(NetClientState *nc)
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
+    if (s->vhost_vdpa.index == 0) {
+        remove_migration_state_change_notifier(&s->migration_state);
+    }
+
     dev = s->vhost_vdpa.dev;
     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
@@ -412,11 +477,12 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
     s = DO_UPCAST(VhostVDPAState, nc, nc);
     v = &s->vhost_vdpa;
 
-    v->shadow_data = s->always_svq;
+    s0 = vhost_vdpa_net_first_nc_vdpa(s);
+    v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
     v->shadow_vqs_enabled = s->always_svq;
     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
 
-    if (s->always_svq) {
+    if (s->vhost_vdpa.shadow_data) {
         /* SVQ is already configured for all virtqueues */
         goto out;
     }
@@ -473,7 +539,6 @@  out:
         return 0;
     }
 
-    s0 = vhost_vdpa_net_first_nc_vdpa(s);
     if (s0->vhost_vdpa.iova_tree) {
         /*
          * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
@@ -749,6 +814,7 @@  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
     s->vhost_vdpa.device_fd = vdpa_device_fd;
     s->vhost_vdpa.index = queue_pair_index;
     s->always_svq = svq;
+    s->migration_state.notify = vdpa_net_migration_state_notifier;
     s->vhost_vdpa.shadow_vqs_enabled = svq;
     s->vhost_vdpa.iova_range = iova_range;
     s->vhost_vdpa.shadow_data = svq;