diff mbox series

[v2,07/13] vdpa: add vdpa net migration state notifier

Message ID 20230208094253.702672-8-eperezma@redhat.com
State New
Headers show
Series Dynamycally switch to vhost shadow virtqueues at vdpa net migration | expand

Commit Message

Eugenio Perez Martin Feb. 8, 2023, 9:42 a.m. UTC
This allows net to restart the device backend to configure SVQ on it.

Ideally, these changes should not be net specific. However, the vdpa net
backend is the one with enough knowledge to configure everything because
of some reasons:
* Queues might need to be shadowed or not depending on its kind (control
  vs data).
* Queues need to share the same map translations (iova tree).

Because of that it is cleaner to restart the whole net backend and
configure again as expected, similar to how vhost-kernel moves between
userspace and passthrough.

If more kinds of devices need dynamic switching to SVQ we can create a
callback struct like VhostOps and move most of the code there.
VhostOps cannot be reused since all vdpa backend share them, and to
personalize just for networking would be too heavy.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v3:
* Add TODO to use the resume operation in the future.
* Use migration_in_setup and migration_has_failed instead of a
  complicated switch case.
---
 net/vhost-vdpa.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

Comments

Si-Wei Liu Feb. 13, 2023, 6:50 a.m. UTC | #1
On 2/8/2023 1:42 AM, Eugenio Pérez wrote:
> This allows net to restart the device backend to configure SVQ on it.
>
> Ideally, these changes should not be net specific. However, the vdpa net
> backend is the one with enough knowledge to configure everything because
> of some reasons:
> * Queues might need to be shadowed or not depending on its kind (control
>    vs data).
> * Queues need to share the same map translations (iova tree).
>
> Because of that it is cleaner to restart the whole net backend and
> configure again as expected, similar to how vhost-kernel moves between
> userspace and passthrough.
>
> If more kinds of devices need dynamic switching to SVQ we can create a
> callback struct like VhostOps and move most of the code there.
> VhostOps cannot be reused since all vdpa backend share them, and to
> personalize just for networking would be too heavy.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v3:
> * Add TODO to use the resume operation in the future.
> * Use migration_in_setup and migration_has_failed instead of a
>    complicated switch case.
> ---
>   net/vhost-vdpa.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 76 insertions(+)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index dd686b4514..bca13f97fd 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -26,12 +26,14 @@
>   #include <err.h>
>   #include "standard-headers/linux/virtio_net.h"
>   #include "monitor/monitor.h"
> +#include "migration/misc.h"
>   #include "hw/virtio/vhost.h"
>   
>   /* Todo:need to add the multiqueue support here */
>   typedef struct VhostVDPAState {
>       NetClientState nc;
>       struct vhost_vdpa vhost_vdpa;
> +    Notifier migration_state;
>       VHostNetState *vhost_net;
>   
>       /* Control commands shadow buffers */
> @@ -241,10 +243,79 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
>       return DO_UPCAST(VhostVDPAState, nc, nc0);
>   }
>   
> +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> +{
> +    struct vhost_vdpa *v = &s->vhost_vdpa;
> +    VirtIONet *n;
> +    VirtIODevice *vdev;
> +    int data_queue_pairs, cvq, r;
> +    NetClientState *peer;
> +
> +    /* We are only called on the first data vqs and only if x-svq is not set */
> +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> +        return;
> +    }
> +
> +    vdev = v->dev->vdev;
> +    n = VIRTIO_NET(vdev);
> +    if (!n->vhost_started) {
> +        return;
What if vhost gets started after migration is started, will svq still be 
(dynamically) enabled during vhost_dev_start()? I don't see relevant 
code to deal with it?

> +    }
> +
> +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> +                                  n->max_ncs - n->max_queue_pairs : 0;
> +    /*
> +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> +     * in the future and resume the device if read-only operations between
> +     * suspend and reset goes wrong.
> +     */
> +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +
> +    peer = s->nc.peer;
> +    for (int i = 0; i < data_queue_pairs + cvq; i++) {
> +        VhostVDPAState *vdpa_state;
> +        NetClientState *nc;
> +
> +        if (i < data_queue_pairs) {
> +            nc = qemu_get_peer(peer, i);
> +        } else {
> +            nc = qemu_get_peer(peer, n->max_queue_pairs);
> +        }
> +
> +        vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
> +        vdpa_state->vhost_vdpa.shadow_data = enable;
Don't get why shadow_data is set on cvq's vhost_vdpa? This may result in 
address space collision: data vq's iova getting improperly allocated on 
cvq's address space in vhost_vdpa_listener_region_{add,del}(). Noted 
currently there's an issue where guest VM's memory listener registration 
is always hooked to the last vq, which could be on the cvq in a 
different iova address space VHOST_VDPA_NET_CVQ_ASID.

Thanks,
-Siwei

> +
> +        if (i < data_queue_pairs) {
> +            /* Do not override CVQ shadow_vqs_enabled */
> +            vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
> +        }
> +    }
> +
> +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +    if (unlikely(r < 0)) {
> +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> +    }
> +}
> +
> +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> +{
> +    MigrationState *migration = data;
> +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> +                                     migration_state);
> +
> +    if (migration_in_setup(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, true);
> +    } else if (migration_has_failed(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, false);
> +    }
> +}
> +
>   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>   {
>       struct vhost_vdpa *v = &s->vhost_vdpa;
>   
> +    add_migration_state_change_notifier(&s->migration_state);
>       if (v->shadow_vqs_enabled) {
>           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
>                                              v->iova_range.last);
> @@ -278,6 +349,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
>   
>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>   
> +    if (s->vhost_vdpa.index == 0) {
> +        remove_migration_state_change_notifier(&s->migration_state);
> +    }
> +
>       dev = s->vhost_vdpa.dev;
>       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
>           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> @@ -741,6 +816,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>       s->vhost_vdpa.device_fd = vdpa_device_fd;
>       s->vhost_vdpa.index = queue_pair_index;
>       s->always_svq = svq;
> +    s->migration_state.notify = vdpa_net_migration_state_notifier;
>       s->vhost_vdpa.shadow_vqs_enabled = svq;
>       s->vhost_vdpa.iova_range = iova_range;
>       s->vhost_vdpa.shadow_data = svq;
Eugenio Perez Martin Feb. 13, 2023, 3:51 p.m. UTC | #2
On Mon, Feb 13, 2023 at 7:51 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
> On 2/8/2023 1:42 AM, Eugenio Pérez wrote:
> > This allows net to restart the device backend to configure SVQ on it.
> >
> > Ideally, these changes should not be net specific. However, the vdpa net
> > backend is the one with enough knowledge to configure everything because
> > of some reasons:
> > * Queues might need to be shadowed or not depending on its kind (control
> >    vs data).
> > * Queues need to share the same map translations (iova tree).
> >
> > Because of that it is cleaner to restart the whole net backend and
> > configure again as expected, similar to how vhost-kernel moves between
> > userspace and passthrough.
> >
> > If more kinds of devices need dynamic switching to SVQ we can create a
> > callback struct like VhostOps and move most of the code there.
> > VhostOps cannot be reused since all vdpa backend share them, and to
> > personalize just for networking would be too heavy.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > v3:
> > * Add TODO to use the resume operation in the future.
> > * Use migration_in_setup and migration_has_failed instead of a
> >    complicated switch case.
> > ---
> >   net/vhost-vdpa.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 76 insertions(+)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index dd686b4514..bca13f97fd 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -26,12 +26,14 @@
> >   #include <err.h>
> >   #include "standard-headers/linux/virtio_net.h"
> >   #include "monitor/monitor.h"
> > +#include "migration/misc.h"
> >   #include "hw/virtio/vhost.h"
> >
> >   /* Todo:need to add the multiqueue support here */
> >   typedef struct VhostVDPAState {
> >       NetClientState nc;
> >       struct vhost_vdpa vhost_vdpa;
> > +    Notifier migration_state;
> >       VHostNetState *vhost_net;
> >
> >       /* Control commands shadow buffers */
> > @@ -241,10 +243,79 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
> >       return DO_UPCAST(VhostVDPAState, nc, nc0);
> >   }
> >
> > +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> > +{
> > +    struct vhost_vdpa *v = &s->vhost_vdpa;
> > +    VirtIONet *n;
> > +    VirtIODevice *vdev;
> > +    int data_queue_pairs, cvq, r;
> > +    NetClientState *peer;
> > +
> > +    /* We are only called on the first data vqs and only if x-svq is not set */
> > +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> > +        return;
> > +    }
> > +
> > +    vdev = v->dev->vdev;
> > +    n = VIRTIO_NET(vdev);
> > +    if (!n->vhost_started) {
> > +        return;
> What if vhost gets started after migration is started, will svq still be
> (dynamically) enabled during vhost_dev_start()? I don't see relevant
> code to deal with it?
>

Good catch. v->shadow_vqs_enabled must change even if
!n->vhost_started. That should be the only code needed.

Also, migration listener must be registered from qemu startup, not on
device start.

> > +    }
> > +
> > +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> > +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> > +                                  n->max_ncs - n->max_queue_pairs : 0;
> > +    /*
> > +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> > +     * in the future and resume the device if read-only operations between
> > +     * suspend and reset goes wrong.
> > +     */
> > +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +
> > +    peer = s->nc.peer;
> > +    for (int i = 0; i < data_queue_pairs + cvq; i++) {
> > +        VhostVDPAState *vdpa_state;
> > +        NetClientState *nc;
> > +
> > +        if (i < data_queue_pairs) {
> > +            nc = qemu_get_peer(peer, i);
> > +        } else {
> > +            nc = qemu_get_peer(peer, n->max_queue_pairs);
> > +        }
> > +
> > +        vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
> > +        vdpa_state->vhost_vdpa.shadow_data = enable;
> Don't get why shadow_data is set on cvq's vhost_vdpa? This may result in
> address space collision: data vq's iova getting improperly allocated on
> cvq's address space in vhost_vdpa_listener_region_{add,del}(). Noted
> currently there's an issue where guest VM's memory listener registration
> is always hooked to the last vq, which could be on the cvq in a
> different iova address space VHOST_VDPA_NET_CVQ_ASID.
>

Let me answer in reverse. guest VM's memory listener registration is
effectively always hooked to the last vq, that's why shadow_data is
needed.

In the past it was enough with v->shadow_vqs_enabled. However, since
the introduction of ASID support & CVQ tracking through it, The
listener (hooked at CVQ) needs to know if it should use iova tree or
not. That's why a separated variable shadow_data is needed.

That way, it may happen that cvq vhost_vdpa->shadow_vqs_enabled = true
but cvq vhost_vdpa->shadow_vqs_enabledshadow_data = false.

Is that clearer?

Thanks!

> Thanks,
> -Siwei
>
> > +
> > +        if (i < data_queue_pairs) {
> > +            /* Do not override CVQ shadow_vqs_enabled */
> > +            vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
> > +        }
> > +    }
> > +
> > +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +    if (unlikely(r < 0)) {
> > +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> > +    }
> > +}
> > +
> > +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> > +{
> > +    MigrationState *migration = data;
> > +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> > +                                     migration_state);
> > +
> > +    if (migration_in_setup(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, true);
> > +    } else if (migration_has_failed(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, false);
> > +    }
> > +}
> > +
> >   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
> >   {
> >       struct vhost_vdpa *v = &s->vhost_vdpa;
> >
> > +    add_migration_state_change_notifier(&s->migration_state);
> >       if (v->shadow_vqs_enabled) {
> >           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
> >                                              v->iova_range.last);
> > @@ -278,6 +349,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
> >
> >       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > +    if (s->vhost_vdpa.index == 0) {
> > +        remove_migration_state_change_notifier(&s->migration_state);
> > +    }
> > +
> >       dev = s->vhost_vdpa.dev;
> >       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> >           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> > @@ -741,6 +816,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >       s->vhost_vdpa.device_fd = vdpa_device_fd;
> >       s->vhost_vdpa.index = queue_pair_index;
> >       s->always_svq = svq;
> > +    s->migration_state.notify = vdpa_net_migration_state_notifier;
> >       s->vhost_vdpa.shadow_vqs_enabled = svq;
> >       s->vhost_vdpa.iova_range = iova_range;
> >       s->vhost_vdpa.shadow_data = svq;
>
Jason Wang Feb. 22, 2023, 3:55 a.m. UTC | #3
在 2023/2/8 17:42, Eugenio Pérez 写道:
> This allows net to restart the device backend to configure SVQ on it.
>
> Ideally, these changes should not be net specific. However, the vdpa net
> backend is the one with enough knowledge to configure everything because
> of some reasons:
> * Queues might need to be shadowed or not depending on its kind (control
>    vs data).
> * Queues need to share the same map translations (iova tree).
>
> Because of that it is cleaner to restart the whole net backend and
> configure again as expected, similar to how vhost-kernel moves between
> userspace and passthrough.
>
> If more kinds of devices need dynamic switching to SVQ we can create a
> callback struct like VhostOps and move most of the code there.
> VhostOps cannot be reused since all vdpa backend share them, and to
> personalize just for networking would be too heavy.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v3:
> * Add TODO to use the resume operation in the future.
> * Use migration_in_setup and migration_has_failed instead of a
>    complicated switch case.
> ---
>   net/vhost-vdpa.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 76 insertions(+)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index dd686b4514..bca13f97fd 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -26,12 +26,14 @@
>   #include <err.h>
>   #include "standard-headers/linux/virtio_net.h"
>   #include "monitor/monitor.h"
> +#include "migration/misc.h"
>   #include "hw/virtio/vhost.h"
>   
>   /* Todo:need to add the multiqueue support here */
>   typedef struct VhostVDPAState {
>       NetClientState nc;
>       struct vhost_vdpa vhost_vdpa;
> +    Notifier migration_state;
>       VHostNetState *vhost_net;
>   
>       /* Control commands shadow buffers */
> @@ -241,10 +243,79 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
>       return DO_UPCAST(VhostVDPAState, nc, nc0);
>   }
>   
> +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> +{
> +    struct vhost_vdpa *v = &s->vhost_vdpa;
> +    VirtIONet *n;
> +    VirtIODevice *vdev;
> +    int data_queue_pairs, cvq, r;
> +    NetClientState *peer;
> +
> +    /* We are only called on the first data vqs and only if x-svq is not set */
> +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> +        return;
> +    }
> +
> +    vdev = v->dev->vdev;
> +    n = VIRTIO_NET(vdev);


Let's tweak the code to move those initialization to the beginning of 
the function.


> +    if (!n->vhost_started) {
> +        return;
> +    }


What happens if the vhost is started during the live migration?


> +
> +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> +                                  n->max_ncs - n->max_queue_pairs : 0;
> +    /*
> +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> +     * in the future and resume the device if read-only operations between
> +     * suspend and reset goes wrong.
> +     */
> +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +
> +    peer = s->nc.peer;
> +    for (int i = 0; i < data_queue_pairs + cvq; i++) {
> +        VhostVDPAState *vdpa_state;
> +        NetClientState *nc;
> +
> +        if (i < data_queue_pairs) {
> +            nc = qemu_get_peer(peer, i);
> +        } else {
> +            nc = qemu_get_peer(peer, n->max_queue_pairs);
> +        }
> +
> +        vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
> +        vdpa_state->vhost_vdpa.shadow_data = enable;
> +
> +        if (i < data_queue_pairs) {
> +            /* Do not override CVQ shadow_vqs_enabled */
> +            vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
> +        }


I wonder what happens if the number of queue pairs is changed during 
live migration? Should we assign all qps in this case?

Thanks


> +    }
> +
> +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> +    if (unlikely(r < 0)) {
> +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> +    }
> +}
> +
> +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> +{
> +    MigrationState *migration = data;
> +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> +                                     migration_state);
> +
> +    if (migration_in_setup(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, true);
> +    } else if (migration_has_failed(migration)) {
> +        vhost_vdpa_net_log_global_enable(s, false);
> +    }
> +}
> +
>   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
>   {
>       struct vhost_vdpa *v = &s->vhost_vdpa;
>   
> +    add_migration_state_change_notifier(&s->migration_state);
>       if (v->shadow_vqs_enabled) {
>           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
>                                              v->iova_range.last);
> @@ -278,6 +349,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
>   
>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>   
> +    if (s->vhost_vdpa.index == 0) {
> +        remove_migration_state_change_notifier(&s->migration_state);
> +    }
> +
>       dev = s->vhost_vdpa.dev;
>       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
>           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> @@ -741,6 +816,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>       s->vhost_vdpa.device_fd = vdpa_device_fd;
>       s->vhost_vdpa.index = queue_pair_index;
>       s->always_svq = svq;
> +    s->migration_state.notify = vdpa_net_migration_state_notifier;
>       s->vhost_vdpa.shadow_vqs_enabled = svq;
>       s->vhost_vdpa.iova_range = iova_range;
>       s->vhost_vdpa.shadow_data = svq;
Eugenio Perez Martin Feb. 22, 2023, 7:23 a.m. UTC | #4
On Wed, Feb 22, 2023 at 4:56 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/2/8 17:42, Eugenio Pérez 写道:
> > This allows net to restart the device backend to configure SVQ on it.
> >
> > Ideally, these changes should not be net specific. However, the vdpa net
> > backend is the one with enough knowledge to configure everything because
> > of some reasons:
> > * Queues might need to be shadowed or not depending on its kind (control
> >    vs data).
> > * Queues need to share the same map translations (iova tree).
> >
> > Because of that it is cleaner to restart the whole net backend and
> > configure again as expected, similar to how vhost-kernel moves between
> > userspace and passthrough.
> >
> > If more kinds of devices need dynamic switching to SVQ we can create a
> > callback struct like VhostOps and move most of the code there.
> > VhostOps cannot be reused since all vdpa backend share them, and to
> > personalize just for networking would be too heavy.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > v3:
> > * Add TODO to use the resume operation in the future.
> > * Use migration_in_setup and migration_has_failed instead of a
> >    complicated switch case.
> > ---
> >   net/vhost-vdpa.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 76 insertions(+)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index dd686b4514..bca13f97fd 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -26,12 +26,14 @@
> >   #include <err.h>
> >   #include "standard-headers/linux/virtio_net.h"
> >   #include "monitor/monitor.h"
> > +#include "migration/misc.h"
> >   #include "hw/virtio/vhost.h"
> >
> >   /* Todo:need to add the multiqueue support here */
> >   typedef struct VhostVDPAState {
> >       NetClientState nc;
> >       struct vhost_vdpa vhost_vdpa;
> > +    Notifier migration_state;
> >       VHostNetState *vhost_net;
> >
> >       /* Control commands shadow buffers */
> > @@ -241,10 +243,79 @@ static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
> >       return DO_UPCAST(VhostVDPAState, nc, nc0);
> >   }
> >
> > +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
> > +{
> > +    struct vhost_vdpa *v = &s->vhost_vdpa;
> > +    VirtIONet *n;
> > +    VirtIODevice *vdev;
> > +    int data_queue_pairs, cvq, r;
> > +    NetClientState *peer;
> > +
> > +    /* We are only called on the first data vqs and only if x-svq is not set */
> > +    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
> > +        return;
> > +    }
> > +
> > +    vdev = v->dev->vdev;
> > +    n = VIRTIO_NET(vdev);
>
>
> Let's tweak the code to move those initialization to the beginning of
> the function.
>

Sure.

>
> > +    if (!n->vhost_started) {
> > +        return;
> > +    }
>
>
> What happens if the vhost is started during the live migration?
>

This is solved at v3, checking the migrate state at
vhost_vdpa_net_data_start_first too [1]. However, this created another
few complications / complex code as Si-Wei points out.

Recent changes due to virtio reset makes it easier to move all this
code to hw/virtio/vhost-vdpa.c, where different kinds of vDPA devices
can share the code. I'll send a new version that way.

>
> > +
> > +    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> > +    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
> > +                                  n->max_ncs - n->max_queue_pairs : 0;
> > +    /*
> > +     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
> > +     * in the future and resume the device if read-only operations between
> > +     * suspend and reset goes wrong.
> > +     */
> > +    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +
> > +    peer = s->nc.peer;
> > +    for (int i = 0; i < data_queue_pairs + cvq; i++) {
> > +        VhostVDPAState *vdpa_state;
> > +        NetClientState *nc;
> > +
> > +        if (i < data_queue_pairs) {
> > +            nc = qemu_get_peer(peer, i);
> > +        } else {
> > +            nc = qemu_get_peer(peer, n->max_queue_pairs);
> > +        }
> > +
> > +        vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
> > +        vdpa_state->vhost_vdpa.shadow_data = enable;
> > +
> > +        if (i < data_queue_pairs) {
> > +            /* Do not override CVQ shadow_vqs_enabled */
> > +            vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
> > +        }
>
>
> I wonder what happens if the number of queue pairs is changed during
> live migration? Should we assign all qps in this case?
>

Migration is blocked if the device has CVQ feature in this series.

Thanks!

[1] https://patchwork.kernel.org/project/qemu-devel/patch/20230215173850.298832-9-eperezma@redhat.com/

> Thanks
>
>
> > +    }
> > +
> > +    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
> > +    if (unlikely(r < 0)) {
> > +        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
> > +    }
> > +}
> > +
> > +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
> > +{
> > +    MigrationState *migration = data;
> > +    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
> > +                                     migration_state);
> > +
> > +    if (migration_in_setup(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, true);
> > +    } else if (migration_has_failed(migration)) {
> > +        vhost_vdpa_net_log_global_enable(s, false);
> > +    }
> > +}
> > +
> >   static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
> >   {
> >       struct vhost_vdpa *v = &s->vhost_vdpa;
> >
> > +    add_migration_state_change_notifier(&s->migration_state);
> >       if (v->shadow_vqs_enabled) {
> >           v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
> >                                              v->iova_range.last);
> > @@ -278,6 +349,10 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc)
> >
> >       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > +    if (s->vhost_vdpa.index == 0) {
> > +        remove_migration_state_change_notifier(&s->migration_state);
> > +    }
> > +
> >       dev = s->vhost_vdpa.dev;
> >       if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> >           g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> > @@ -741,6 +816,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >       s->vhost_vdpa.device_fd = vdpa_device_fd;
> >       s->vhost_vdpa.index = queue_pair_index;
> >       s->always_svq = svq;
> > +    s->migration_state.notify = vdpa_net_migration_state_notifier;
> >       s->vhost_vdpa.shadow_vqs_enabled = svq;
> >       s->vhost_vdpa.iova_range = iova_range;
> >       s->vhost_vdpa.shadow_data = svq;
>
diff mbox series

Patch

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index dd686b4514..bca13f97fd 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,14 @@ 
 #include <err.h>
 #include "standard-headers/linux/virtio_net.h"
 #include "monitor/monitor.h"
+#include "migration/misc.h"
 #include "hw/virtio/vhost.h"
 
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
+    Notifier migration_state;
     VHostNetState *vhost_net;
 
     /* Control commands shadow buffers */
@@ -241,10 +243,79 @@  static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
     return DO_UPCAST(VhostVDPAState, nc, nc0);
 }
 
+static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
+{
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+    VirtIONet *n;
+    VirtIODevice *vdev;
+    int data_queue_pairs, cvq, r;
+    NetClientState *peer;
+
+    /* We are only called on the first data vqs and only if x-svq is not set */
+    if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
+        return;
+    }
+
+    vdev = v->dev->vdev;
+    n = VIRTIO_NET(vdev);
+    if (!n->vhost_started) {
+        return;
+    }
+
+    data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+                                  n->max_ncs - n->max_queue_pairs : 0;
+    /*
+     * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
+     * in the future and resume the device if read-only operations between
+     * suspend and reset goes wrong.
+     */
+    vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
+
+    peer = s->nc.peer;
+    for (int i = 0; i < data_queue_pairs + cvq; i++) {
+        VhostVDPAState *vdpa_state;
+        NetClientState *nc;
+
+        if (i < data_queue_pairs) {
+            nc = qemu_get_peer(peer, i);
+        } else {
+            nc = qemu_get_peer(peer, n->max_queue_pairs);
+        }
+
+        vdpa_state = DO_UPCAST(VhostVDPAState, nc, nc);
+        vdpa_state->vhost_vdpa.shadow_data = enable;
+
+        if (i < data_queue_pairs) {
+            /* Do not override CVQ shadow_vqs_enabled */
+            vdpa_state->vhost_vdpa.shadow_vqs_enabled = enable;
+        }
+    }
+
+    r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
+    if (unlikely(r < 0)) {
+        error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
+    }
+}
+
+static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *migration = data;
+    VhostVDPAState *s = container_of(notifier, VhostVDPAState,
+                                     migration_state);
+
+    if (migration_in_setup(migration)) {
+        vhost_vdpa_net_log_global_enable(s, true);
+    } else if (migration_has_failed(migration)) {
+        vhost_vdpa_net_log_global_enable(s, false);
+    }
+}
+
 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
 {
     struct vhost_vdpa *v = &s->vhost_vdpa;
 
+    add_migration_state_change_notifier(&s->migration_state);
     if (v->shadow_vqs_enabled) {
         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
                                            v->iova_range.last);
@@ -278,6 +349,10 @@  static void vhost_vdpa_net_client_stop(NetClientState *nc)
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
+    if (s->vhost_vdpa.index == 0) {
+        remove_migration_state_change_notifier(&s->migration_state);
+    }
+
     dev = s->vhost_vdpa.dev;
     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
@@ -741,6 +816,7 @@  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
     s->vhost_vdpa.device_fd = vdpa_device_fd;
     s->vhost_vdpa.index = queue_pair_index;
     s->always_svq = svq;
+    s->migration_state.notify = vdpa_net_migration_state_notifier;
     s->vhost_vdpa.shadow_vqs_enabled = svq;
     s->vhost_vdpa.iova_range = iova_range;
     s->vhost_vdpa.shadow_data = svq;