diff mbox series

[v3,10/10] vfio: unplug failover primary device before migration

Message ID 20191011112015.11785-11-jfreimann@redhat.com
State New
Headers show
Series add failover feature for assigned network devices | expand

Commit Message

Jens Freimann Oct. 11, 2019, 11:20 a.m. UTC
As usual block all vfio-pci devices from being migrated, but make an
exception for failover primary devices. This is achieved by setting
unmigratable to 0 but also add a migration blocker for all vfio-pci
devices except failover primary devices. These will be unplugged before
migration happens by the migration handler of the corresponding
virtio-net standby device.

Signed-off-by: Jens Freimann <jfreimann@redhat.com>
---
 hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |  2 ++
 2 files changed, 36 insertions(+), 1 deletion(-)

Comments

Cornelia Huck Oct. 14, 2019, 10:05 a.m. UTC | #1
On Fri, 11 Oct 2019 13:20:15 +0200
Jens Freimann <jfreimann@redhat.com> wrote:

> As usual block all vfio-pci devices from being migrated, but make an
> exception for failover primary devices. This is achieved by setting
> unmigratable to 0 but also add a migration blocker for all vfio-pci
> devices except failover primary devices. These will be unplugged before
> migration happens by the migration handler of the corresponding
> virtio-net standby device.
> 
> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> ---
>  hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h |  2 ++
>  2 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5e6fe61cb..64cf8e07d9 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -40,6 +40,9 @@
>  #include "pci.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "migration/blocker.h"
> +#include "qemu/option.h"
> +#include "qemu/option_int.h"
>  
>  #define TYPE_VFIO_PCI "vfio-pci"
>  #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
> @@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static int has_net_failover_arg(void *opaque, const char *name,
> +                           const char *value, Error **errp)
> +{
> +    return (strcmp(name, "net_failover_pair_id") == 0);
> +}
> +
>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = PCI_VFIO(pdev);
> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      int groupid;
>      int i, ret;
>      bool is_mdev;
> +    uint16_t class_id;
> +
> +    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
> +                         (void *) pdev->qdev.opts, &err) == 0) {
> +        error_setg(&vdev->migration_blocker,
> +                "VFIO device doesn't support migration");
> +        ret = migrate_add_blocker(vdev->migration_blocker, &err);
> +        if (err) {
> +            error_propagate(errp, err);
> +            error_free(vdev->migration_blocker);
> +        }

Do we really want to continue here if adding the migration blocker
failed? Previously, adding the vmsd with unmigratable=1 would have
prevented realization of the device if --only-migratable was specified;
I guess we should keep that behaviour and just bail out here?

> +    } else {
> +        pdev->qdev.allow_unplug_during_migration = true;
> +    }
>  
>      if (!vdev->vbasedev.sysfsdev) {
>          if (!(~vdev->host.domain || ~vdev->host.bus ||
> @@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          goto error;
>      }
>  
> +    if (vdev->net_failover_pair_id != NULL) {
> +        class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
> +        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
> +            error_setg(errp, "failover device is not an Ethernet device");
> +            goto error;
> +        }
> +    }
> +
>      /* vfio emulates a lot for us, but some bits need extra love */
>      vdev->emulated_config_bits = g_malloc0(vdev->config_size);
>  
> @@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
>                              display, ON_OFF_AUTO_OFF),
>      DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
>      DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
> +    DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
> +            net_failover_pair_id),
>      DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
>                         intx.mmap_timeout, 1100),
>      DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
> @@ -3152,7 +3185,7 @@ static Property vfio_pci_dev_properties[] = {
>  
>  static const VMStateDescription vfio_pci_vmstate = {
>      .name = "vfio-pci",
> -    .unmigratable = 1,
> +    .unmigratable = 0,

Is this vmsd useful in any way with that change anymore?

>  };
>  

(...)
Alex Williamson Oct. 16, 2019, 1:52 a.m. UTC | #2
On Fri, 11 Oct 2019 13:20:15 +0200
Jens Freimann <jfreimann@redhat.com> wrote:

> As usual block all vfio-pci devices from being migrated, but make an
> exception for failover primary devices. This is achieved by setting
> unmigratable to 0 but also add a migration blocker for all vfio-pci
> devices except failover primary devices. These will be unplugged before
> migration happens by the migration handler of the corresponding
> virtio-net standby device.
> 
> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> ---
>  hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h |  2 ++
>  2 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5e6fe61cb..64cf8e07d9 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -40,6 +40,9 @@
>  #include "pci.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "migration/blocker.h"
> +#include "qemu/option.h"
> +#include "qemu/option_int.h"
>  
>  #define TYPE_VFIO_PCI "vfio-pci"
>  #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
> @@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static int has_net_failover_arg(void *opaque, const char *name,
> +                           const char *value, Error **errp)
> +{
> +    return (strcmp(name, "net_failover_pair_id") == 0);
> +}
> +
>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = PCI_VFIO(pdev);
> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      int groupid;
>      int i, ret;
>      bool is_mdev;
> +    uint16_t class_id;
> +
> +    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
> +                         (void *) pdev->qdev.opts, &err) == 0) {

Why do we need a qemu_opt_foreach here versus testing
vdev->net_failover_pair_id as you do below or similar to how we test
sysfsdev immediately below this chunk?

> +        error_setg(&vdev->migration_blocker,
> +                "VFIO device doesn't support migration");
> +        ret = migrate_add_blocker(vdev->migration_blocker, &err);

Where's the migrate_del_blocker()/error_free() for any other realize
error or device removal?

> +        if (err) {
> +            error_propagate(errp, err);
> +            error_free(vdev->migration_blocker);
> +        }

As Connie noted, unclear if this aborts or continues without a
migration blocker, which would be bad.

> +    } else {
> +        pdev->qdev.allow_unplug_during_migration = true;
> +    }
>  
>      if (!vdev->vbasedev.sysfsdev) {
>          if (!(~vdev->host.domain || ~vdev->host.bus ||
> @@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          goto error;
>      }
>  
> +    if (vdev->net_failover_pair_id != NULL) {
> +        class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
> +        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
> +            error_setg(errp, "failover device is not an Ethernet device");
> +            goto error;
> +        }
> +    }

Not clear to me why we do this separate from setting up the migration
blocker or why we use a different mechanism to test for the property.

> +
>      /* vfio emulates a lot for us, but some bits need extra love */
>      vdev->emulated_config_bits = g_malloc0(vdev->config_size);
>  
> @@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
>                              display, ON_OFF_AUTO_OFF),
>      DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
>      DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
> +    DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
> +            net_failover_pair_id),

Should this and the Ethernet class test be done in PCIDevice?  The
migration aspect is the only thing unique to vfio since we don't
otherwise support it, right?  For instance, I should be able to
setup an emulated NIC with this failover pair id too, right?  Thanks,

Alex

>      DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
>                         intx.mmap_timeout, 1100),
>      DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
> @@ -3152,7 +3185,7 @@ static Property vfio_pci_dev_properties[] = {
>  
>  static const VMStateDescription vfio_pci_vmstate = {
>      .name = "vfio-pci",
> -    .unmigratable = 1,
> +    .unmigratable = 0,
>  };
>  
>  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 834a90d646..da4417071a 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -134,6 +134,7 @@ typedef struct VFIOPCIDevice {
>      PCIHostDeviceAddress host;
>      EventNotifier err_notifier;
>      EventNotifier req_notifier;
> +    char *net_failover_pair_id;
>      int (*resetfn)(struct VFIOPCIDevice *);
>      uint32_t vendor_id;
>      uint32_t device_id;
> @@ -168,6 +169,7 @@ typedef struct VFIOPCIDevice {
>      bool no_vfio_ioeventfd;
>      bool enable_ramfb;
>      VFIODisplay *dpy;
> +    Error *migration_blocker;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
Jens Freimann Oct. 16, 2019, 8:18 p.m. UTC | #3
On Tue, Oct 15, 2019 at 07:52:12PM -0600, Alex Williamson wrote:
>On Fri, 11 Oct 2019 13:20:15 +0200
>Jens Freimann <jfreimann@redhat.com> wrote:
>
>> As usual block all vfio-pci devices from being migrated, but make an
>> exception for failover primary devices. This is achieved by setting
>> unmigratable to 0 but also add a migration blocker for all vfio-pci
>> devices except failover primary devices. These will be unplugged before
>> migration happens by the migration handler of the corresponding
>> virtio-net standby device.
>>
>> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
>> ---
>>  hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
>>  hw/vfio/pci.h |  2 ++
>>  2 files changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index c5e6fe61cb..64cf8e07d9 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -40,6 +40,9 @@
>>  #include "pci.h"
>>  #include "trace.h"
>>  #include "qapi/error.h"
>> +#include "migration/blocker.h"
>> +#include "qemu/option.h"
>> +#include "qemu/option_int.h"
>>
>>  #define TYPE_VFIO_PCI "vfio-pci"
>>  #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
>> @@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>      vdev->req_enabled = false;
>>  }
>>
>> +static int has_net_failover_arg(void *opaque, const char *name,
>> +                           const char *value, Error **errp)
>> +{
>> +    return (strcmp(name, "net_failover_pair_id") == 0);
>> +}
>> +
>>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>>  {
>>      VFIOPCIDevice *vdev = PCI_VFIO(pdev);
>> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>      int groupid;
>>      int i, ret;
>>      bool is_mdev;
>> +    uint16_t class_id;
>> +
>> +    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
>> +                         (void *) pdev->qdev.opts, &err) == 0) {
>
>Why do we need a qemu_opt_foreach here versus testing
>vdev->net_failover_pair_id as you do below or similar to how we test
>sysfsdev immediately below this chunk?

We don't need it, I will change it and move it to where we check for
the PCI class.
>
>> +        error_setg(&vdev->migration_blocker,
>> +                "VFIO device doesn't support migration");
>> +        ret = migrate_add_blocker(vdev->migration_blocker, &err);
>
>Where's the migrate_del_blocker()/error_free() for any other realize
>error or device removal?
>
>> +        if (err) {
>> +            error_propagate(errp, err);
>> +            error_free(vdev->migration_blocker);
>> +        }
>
>As Connie noted, unclear if this aborts or continues without a
>migration blocker, which would be bad.

It aborts in my test. PCI realize propagates it further and eventually
it leads to aborting qemu.

It looks like this now:

     if (!pdev->net_failover_pair_id) {
          error_setg(&vdev->migration_blocker,
                  "VFIO device doesn't support migration");
          ret = migrate_add_blocker(vdev->migration_blocker, &err);
          if (err) {
              error_propagate(errp, err);
          } else {
              error_propagate(errp, vdev->migration_blocker);
          }
          goto error;
      } else {
          pdev->qdev.allow_unplug_during_migration = true;
      }

>> +    } else {
>> +        pdev->qdev.allow_unplug_during_migration = true;
>> +    }
>>
>>      if (!vdev->vbasedev.sysfsdev) {
>>          if (!(~vdev->host.domain || ~vdev->host.bus ||
>> @@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>          goto error;
>>      }
>>
>> +    if (vdev->net_failover_pair_id != NULL) {
>> +        class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
>> +        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
>> +            error_setg(errp, "failover device is not an Ethernet device");
>> +            goto error;
>> +        }
>> +    }
>
>Not clear to me why we do this separate from setting up the migration
>blocker or why we use a different mechanism to test for the property.

I'm moving this check to hw/pci/pci.c as you suggested.

>> +
>>      /* vfio emulates a lot for us, but some bits need extra love */
>>      vdev->emulated_config_bits = g_malloc0(vdev->config_size);
>>
>> @@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
>>                              display, ON_OFF_AUTO_OFF),
>>      DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
>>      DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
>> +    DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
>> +            net_failover_pair_id),
>
>Should this and the Ethernet class test be done in PCIDevice?  The
>migration aspect is the only thing unique to vfio since we don't
>otherwise support it, right?  For instance, I should be able to
>setup an emulated NIC with this failover pair id too, right?  Thanks,

Yes, we can do it in PCIDevice. Using it with an emulated device.
It wouldn't make sense for production but could make sense for
testing purposes.

Thanks for the review!

regards,
Jens
Alex Williamson Oct. 17, 2019, 12:39 a.m. UTC | #4
On Wed, 16 Oct 2019 22:18:47 +0200
Jens Freimann <jfreimann@redhat.com> wrote:

> On Tue, Oct 15, 2019 at 07:52:12PM -0600, Alex Williamson wrote:
> >On Fri, 11 Oct 2019 13:20:15 +0200
> >Jens Freimann <jfreimann@redhat.com> wrote:
> >  
> >> As usual block all vfio-pci devices from being migrated, but make an
> >> exception for failover primary devices. This is achieved by setting
> >> unmigratable to 0 but also add a migration blocker for all vfio-pci
> >> devices except failover primary devices. These will be unplugged before
> >> migration happens by the migration handler of the corresponding
> >> virtio-net standby device.
> >>
> >> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> >> ---
> >>  hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
> >>  hw/vfio/pci.h |  2 ++
> >>  2 files changed, 36 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index c5e6fe61cb..64cf8e07d9 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -40,6 +40,9 @@
> >>  #include "pci.h"
> >>  #include "trace.h"
> >>  #include "qapi/error.h"
> >> +#include "migration/blocker.h"
> >> +#include "qemu/option.h"
> >> +#include "qemu/option_int.h"
> >>
> >>  #define TYPE_VFIO_PCI "vfio-pci"
> >>  #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
> >> @@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> >>      vdev->req_enabled = false;
> >>  }
> >>
> >> +static int has_net_failover_arg(void *opaque, const char *name,
> >> +                           const char *value, Error **errp)
> >> +{
> >> +    return (strcmp(name, "net_failover_pair_id") == 0);
> >> +}
> >> +
> >>  static void vfio_realize(PCIDevice *pdev, Error **errp)
> >>  {
> >>      VFIOPCIDevice *vdev = PCI_VFIO(pdev);
> >> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> >>      int groupid;
> >>      int i, ret;
> >>      bool is_mdev;
> >> +    uint16_t class_id;
> >> +
> >> +    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
> >> +                         (void *) pdev->qdev.opts, &err) == 0) {  
> >
> >Why do we need a qemu_opt_foreach here versus testing
> >vdev->net_failover_pair_id as you do below or similar to how we test
> >sysfsdev immediately below this chunk?  
> 
> We don't need it, I will change it and move it to where we check for
> the PCI class.
> >  
> >> +        error_setg(&vdev->migration_blocker,
> >> +                "VFIO device doesn't support migration");
> >> +        ret = migrate_add_blocker(vdev->migration_blocker, &err);  
> >
> >Where's the migrate_del_blocker()/error_free() for any other realize
> >error or device removal?
> >  
> >> +        if (err) {
> >> +            error_propagate(errp, err);
> >> +            error_free(vdev->migration_blocker);
> >> +        }  
> >
> >As Connie noted, unclear if this aborts or continues without a
> >migration blocker, which would be bad.  
> 
> It aborts in my test. PCI realize propagates it further and eventually
> it leads to aborting qemu.
> 
> It looks like this now:
> 
>      if (!pdev->net_failover_pair_id) {
>           error_setg(&vdev->migration_blocker,
>                   "VFIO device doesn't support migration");
>           ret = migrate_add_blocker(vdev->migration_blocker, &err);
>           if (err) {
>               error_propagate(errp, err);
>           } else {
>               error_propagate(errp, vdev->migration_blocker);
>           }
>           goto error;

This unconditionally goes to error when we don't have a failover pair
set :-\

I suspect we don't want any sort of error propagate in the success
case, the migration_blocker pre-defines the error when the migration is
blocked, right?  Thanks,

Alex

>       } else {
>           pdev->qdev.allow_unplug_during_migration = true;
>       }
> 
> >> +    } else {
> >> +        pdev->qdev.allow_unplug_during_migration = true;
> >> +    }
> >>
> >>      if (!vdev->vbasedev.sysfsdev) {
> >>          if (!(~vdev->host.domain || ~vdev->host.bus ||
> >> @@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> >>          goto error;
> >>      }
> >>
> >> +    if (vdev->net_failover_pair_id != NULL) {
> >> +        class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
> >> +        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
> >> +            error_setg(errp, "failover device is not an Ethernet device");
> >> +            goto error;
> >> +        }
> >> +    }  
> >
> >Not clear to me why we do this separate from setting up the migration
> >blocker or why we use a different mechanism to test for the property.  
> 
> I'm moving this check to hw/pci/pci.c as you suggested.
> 
> >> +
> >>      /* vfio emulates a lot for us, but some bits need extra love */
> >>      vdev->emulated_config_bits = g_malloc0(vdev->config_size);
> >>
> >> @@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
> >>                              display, ON_OFF_AUTO_OFF),
> >>      DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
> >>      DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
> >> +    DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
> >> +            net_failover_pair_id),  
> >
> >Should this and the Ethernet class test be done in PCIDevice?  The
> >migration aspect is the only thing unique to vfio since we don't
> >otherwise support it, right?  For instance, I should be able to
> >setup an emulated NIC with this failover pair id too, right?  Thanks,  
> 
> Yes, we can do it in PCIDevice. Using it with an emulated device.
> It wouldn't make sense for production but could make sense for
> testing purposes.
> 
> Thanks for the review!
> 
> regards,
> Jens
Jens Freimann Oct. 17, 2019, 7:45 a.m. UTC | #5
On Wed, Oct 16, 2019 at 06:39:58PM -0600, Alex Williamson wrote:
>On Wed, 16 Oct 2019 22:18:47 +0200
>Jens Freimann <jfreimann@redhat.com> wrote:
>
>> On Tue, Oct 15, 2019 at 07:52:12PM -0600, Alex Williamson wrote:
>> >On Fri, 11 Oct 2019 13:20:15 +0200
>> >Jens Freimann <jfreimann@redhat.com> wrote:
>> >
>> >> As usual block all vfio-pci devices from being migrated, but make an
>> >> exception for failover primary devices. This is achieved by setting
>> >> unmigratable to 0 but also add a migration blocker for all vfio-pci
>> >> devices except failover primary devices. These will be unplugged before
>> >> migration happens by the migration handler of the corresponding
>> >> virtio-net standby device.
>> >>
>> >> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
>> >> ---
>> >>  hw/vfio/pci.c | 35 ++++++++++++++++++++++++++++++++++-
>> >>  hw/vfio/pci.h |  2 ++
>> >>  2 files changed, 36 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> >> index c5e6fe61cb..64cf8e07d9 100644
>> >> --- a/hw/vfio/pci.c
>> >> +++ b/hw/vfio/pci.c
>> >> @@ -40,6 +40,9 @@
>> >>  #include "pci.h"
>> >>  #include "trace.h"
>> >>  #include "qapi/error.h"
>> >> +#include "migration/blocker.h"
>> >> +#include "qemu/option.h"
>> >> +#include "qemu/option_int.h"
>> >>
>> >>  #define TYPE_VFIO_PCI "vfio-pci"
>> >>  #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
>> >> @@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>> >>      vdev->req_enabled = false;
>> >>  }
>> >>
>> >> +static int has_net_failover_arg(void *opaque, const char *name,
>> >> +                           const char *value, Error **errp)
>> >> +{
>> >> +    return (strcmp(name, "net_failover_pair_id") == 0);
>> >> +}
>> >> +
>> >>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>> >>  {
>> >>      VFIOPCIDevice *vdev = PCI_VFIO(pdev);
>> >> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>> >>      int groupid;
>> >>      int i, ret;
>> >>      bool is_mdev;
>> >> +    uint16_t class_id;
>> >> +
>> >> +    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
>> >> +                         (void *) pdev->qdev.opts, &err) == 0) {
>> >
>> >Why do we need a qemu_opt_foreach here versus testing
>> >vdev->net_failover_pair_id as you do below or similar to how we test
>> >sysfsdev immediately below this chunk?
>>
>> We don't need it, I will change it and move it to where we check for
>> the PCI class.
>> >
>> >> +        error_setg(&vdev->migration_blocker,
>> >> +                "VFIO device doesn't support migration");
>> >> +        ret = migrate_add_blocker(vdev->migration_blocker, &err);
>> >
>> >Where's the migrate_del_blocker()/error_free() for any other realize
>> >error or device removal?
>> >
>> >> +        if (err) {
>> >> +            error_propagate(errp, err);
>> >> +            error_free(vdev->migration_blocker);
>> >> +        }
>> >
>> >As Connie noted, unclear if this aborts or continues without a
>> >migration blocker, which would be bad.
>>
>> It aborts in my test. PCI realize propagates it further and eventually
>> it leads to aborting qemu.
>>
>> It looks like this now:
>>
>>      if (!pdev->net_failover_pair_id) {
>>           error_setg(&vdev->migration_blocker,
>>                   "VFIO device doesn't support migration");
>>           ret = migrate_add_blocker(vdev->migration_blocker, &err);
>>           if (err) {
>>               error_propagate(errp, err);
>>           } else {
>>               error_propagate(errp, vdev->migration_blocker);
>>           }
>>           goto error;
>
>This unconditionally goes to error when we don't have a failover pair
>set :-\
>
>I suspect we don't want any sort of error propagate in the success
>case, the migration_blocker pre-defines the error when the migration is
>blocked, right?  Thanks,

I shouldn't code or send mails late at night :) I'll fix it in v4

Thanks!

regards,
Jens
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c5e6fe61cb..64cf8e07d9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -40,6 +40,9 @@ 
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/blocker.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"
 
 #define TYPE_VFIO_PCI "vfio-pci"
 #define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
@@ -2698,6 +2701,12 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static int has_net_failover_arg(void *opaque, const char *name,
+                           const char *value, Error **errp)
+{
+    return (strcmp(name, "net_failover_pair_id") == 0);
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
@@ -2710,6 +2719,20 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     int groupid;
     int i, ret;
     bool is_mdev;
+    uint16_t class_id;
+
+    if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
+                         (void *) pdev->qdev.opts, &err) == 0) {
+        error_setg(&vdev->migration_blocker,
+                "VFIO device doesn't support migration");
+        ret = migrate_add_blocker(vdev->migration_blocker, &err);
+        if (err) {
+            error_propagate(errp, err);
+            error_free(vdev->migration_blocker);
+        }
+    } else {
+        pdev->qdev.allow_unplug_during_migration = true;
+    }
 
     if (!vdev->vbasedev.sysfsdev) {
         if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2812,6 +2835,14 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    if (vdev->net_failover_pair_id != NULL) {
+        class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
+        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
+            error_setg(errp, "failover device is not an Ethernet device");
+            goto error;
+        }
+    }
+
     /* vfio emulates a lot for us, but some bits need extra love */
     vdev->emulated_config_bits = g_malloc0(vdev->config_size);
 
@@ -3110,6 +3141,8 @@  static Property vfio_pci_dev_properties[] = {
                             display, ON_OFF_AUTO_OFF),
     DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
     DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
+    DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
+            net_failover_pair_id),
     DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
                        intx.mmap_timeout, 1100),
     DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
@@ -3152,7 +3185,7 @@  static Property vfio_pci_dev_properties[] = {
 
 static const VMStateDescription vfio_pci_vmstate = {
     .name = "vfio-pci",
-    .unmigratable = 1,
+    .unmigratable = 0,
 };
 
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 834a90d646..da4417071a 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -134,6 +134,7 @@  typedef struct VFIOPCIDevice {
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
+    char *net_failover_pair_id;
     int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t vendor_id;
     uint32_t device_id;
@@ -168,6 +169,7 @@  typedef struct VFIOPCIDevice {
     bool no_vfio_ioeventfd;
     bool enable_ramfb;
     VFIODisplay *dpy;
+    Error *migration_blocker;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);