diff mbox series

[v4,5/5] vfio/migration: Refactor and fix print of "Migration disabled"

Message ID 20230629084042.86502-6-zhenzhong.duan@intel.com
State New
Headers show
Series VFIO migration related refactor and bug fix | expand

Commit Message

Duan, Zhenzhong June 29, 2023, 8:40 a.m. UTC
This patch refactors vfio_migration_realize() and its dependend code
as follows:

1. It's redundant in vfio_migration_realize() to registers multiple blockers,
   e.g: vIOMMU blocker can be refactored as per device blocker.
2. Change vfio_viommu_preset() to be only a per device checker.
3. Remove global vIOMMU blocker related stuff, e.g:
   giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
   and vfio_migration_finalize()
4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
   vfio_block_migration() and vfio_viommu_preset() to return bool type.
5. Print "Migration disabled" depending on enable_migration property
   and print it as warning instead of error which is overkill.

migrate_add_blocker() returns 0 when successfully adding the migration blocker.
However, the caller of vfio_migration_realize() considers that migration was
blocked when the latter returned an error. What matters for migration is that
the blocker is added in core migration, so this cleans up usability such that
user sees "Migrate disabled" when any of the vfio migration blockers are active
and it's not intentionally forced by user with enable-migration=off.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/vfio/common.c              | 66 +++++++----------------------------
 hw/vfio/migration.c           | 30 +++++++++-------
 hw/vfio/pci.c                 |  4 +--
 include/hw/vfio/vfio-common.h |  7 ++--
 4 files changed, 36 insertions(+), 71 deletions(-)

Comments

Joao Martins June 29, 2023, 12:44 p.m. UTC | #1
On 29/06/2023 09:40, Zhenzhong Duan wrote:
> This patch refactors vfio_migration_realize() and its dependend code
> as follows:
> 
> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
>    e.g: vIOMMU blocker can be refactored as per device blocker.
> 2. Change vfio_viommu_preset() to be only a per device checker.
> 3. Remove global vIOMMU blocker related stuff, e.g:
>    giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
>    and vfio_migration_finalize()
> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
>    vfio_block_migration() and vfio_viommu_preset() to return bool type.
> 5. Print "Migration disabled" depending on enable_migration property
>    and print it as warning instead of error which is overkill.
> 
I am not enterily sure we need to keep "Migration disabled". Perhaps we should
just derisk from error to warning and use always the same error messages.

> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
> However, the caller of vfio_migration_realize() considers that migration was
> blocked when the latter returned an error. What matters for migration is that
> the blocker is added in core migration, so this cleans up usability such that
> user sees "Migrate disabled" when any of the vfio migration blockers are active
> and it's not intentionally forced by user with enable-migration=off.
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  hw/vfio/common.c              | 66 +++++++----------------------------
>  hw/vfio/migration.c           | 30 +++++++++-------
>  hw/vfio/pci.c                 |  4 +--
>  include/hw/vfio/vfio-common.h |  7 ++--
>  4 files changed, 36 insertions(+), 71 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 77e2ee0e5c6e..c80ecb1da53f 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -362,7 +362,6 @@ bool vfio_mig_active(void)
>  }
>  
>  static Error *multiple_devices_migration_blocker;
> -static Error *giommu_migration_blocker;
>  
>  static unsigned int vfio_migratable_device_num(void)
>  {
> @@ -381,19 +380,19 @@ static unsigned int vfio_migratable_device_num(void)
>      return device_num;
>  }
>  
> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>  {
>      int ret;
>  
>      if (multiple_devices_migration_blocker ||
>          vfio_migratable_device_num() <= 1) {
> -        return 0;
> +        return true;
>      }
>  
>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>          error_setg(errp, "Migration is currently not supported with multiple "
>                           "VFIO devices");
> -        return -EINVAL;
> +        return false;
>      }
>  
>      error_setg(&multiple_devices_migration_blocker,
> @@ -403,9 +402,15 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>      if (ret < 0) {
>          error_free(multiple_devices_migration_blocker);
>          multiple_devices_migration_blocker = NULL;
> +    } else {
> +        /*
> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
> +         * in vfio_migration_realize().
> +         */
> +        warn_report("Migration disabled, not support multiple VFIO devices");
>      }
>  

Perhaps you could stash the previous error message and use it in the
warn_report_error to consolidate the error messages e.g.

bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
{
    Error *err = NULL;

    if (multiple_devices_migration_blocker ||
        vfio_migratable_device_num() <= 1) {
        return true;
    }

    error_setg(&err, "%s: Migration is currently not supported with multiple "
                     "VFIO devices", vbasedev->name);

    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
        error_propagate(errp, err);
        return -EINVAL;
    }

    ...
    if (ret < 0) {
    } else {
        /* Warns only on ON_OFF_AUTO_AUTO case */
        warn_report_err(err);
    }
}

> -    return ret;
> +    return !ret;
>  }
>  
>  void vfio_unblock_multiple_devices_migration(void)
> @@ -420,55 +425,10 @@ void vfio_unblock_multiple_devices_migration(void)
>      multiple_devices_migration_blocker = NULL;
>  }
>  
> -static bool vfio_viommu_preset(void)
> +/* Block migration with a vIOMMU */

I meant in the previous version to put the comment on top of the caller, not on
the definition. But with the new code structure from Avihai the error message
further below... it will look a bit redundant.

> +bool vfio_viommu_preset(VFIODevice *vbasedev)
>  {
> -    VFIOAddressSpace *space;
> -
> -    QLIST_FOREACH(space, &vfio_address_spaces, list) {
> -        if (space->as != &address_space_memory) {
> -            return true;
> -        }
> -    }
> -
> -    return false;
> -}
> -
> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
> -{
> -    int ret;
> -
> -    if (giommu_migration_blocker ||
> -        !vfio_viommu_preset()) {
> -        return 0;
> -    }
> -
> -    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> -        error_setg(errp,
> -                   "Migration is currently not supported with vIOMMU enabled");
> -        return -EINVAL;
> -    }
> -
> -    error_setg(&giommu_migration_blocker,
> -               "Migration is currently not supported with vIOMMU enabled");
> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
> -    if (ret < 0) {
> -        error_free(giommu_migration_blocker);
> -        giommu_migration_blocker = NULL;
> -    }
> -
> -    return ret;
> -}
> -
> -void vfio_migration_finalize(void)
> -{
> -    if (!giommu_migration_blocker ||
> -        vfio_viommu_preset()) {
> -        return;
> -    }
> -
> -    migrate_del_blocker(giommu_migration_blocker);
> -    error_free(giommu_migration_blocker);
> -    giommu_migration_blocker = NULL;
> +    return vbasedev->group->container->space->as != &address_space_memory;
>  }
>  

nice consolidation

>  static void vfio_set_migration_error(int err)
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 1db7d52ab2c1..84036e5cfc01 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -802,13 +802,13 @@ static int vfio_migration_init(VFIODevice *vbasedev)
>      return 0;
>  }
>  
> -static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
> +static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>  {
>      int ret;
>  
>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>          error_propagate(errp, err);
> -        return -EINVAL;
> +        return false;
>      }
>  
>      vbasedev->migration_blocker = error_copy(err);
> @@ -818,9 +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>      if (ret < 0) {
>          error_free(vbasedev->migration_blocker);
>          vbasedev->migration_blocker = NULL;
> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
> +        warn_report("%s: Migration disabled", vbasedev->name);
>      }
>  
Perhaps you can use the the local error to expand on why migration was disabled e.g.

	warn_report_err(err);

> -    return ret;
> +    return !ret;
>  }
>  
>  /* ---------------------------------------------------------------------- */
> @@ -835,7 +837,12 @@ void vfio_reset_bytes_transferred(void)
>      bytes_transferred = 0;
>  }
>  
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> +/*
> + * Return true when either migration initialized or blocker registered.
> + * Currently only return false when adding blocker fails which will
> + * de-register vfio device.
> + */
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>  {
>      Error *err = NULL;
>      int ret;
> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>                      vbasedev->name);
>      }
>  
> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
> -    if (ret) {
> -        return ret;
> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
> +        return false;
>      }
>  
> -    ret = vfio_block_giommu_migration(vbasedev, errp);
> -    if (ret) {
> -        return ret;
> +    if (vfio_viommu_preset(vbasedev)) {

The /* Block migration with a vIOMMU */

Would go above, but I don't think we need it anymore ...

> +        error_setg(&err, "%s: Migration is currently not supported "
> +                   "with vIOMMU enabled", vbasedev->name);
> +        return vfio_block_migration(vbasedev, err, errp);

... as the error message when placed here makes it obvious. So the comment I
suggested won't add much. Unless others disagree.

>      }
>  
> -    trace_vfio_migration_realize(vbasedev->name);
> -    return 0;
> +    return true;
>  }
>  
I think somewhere in function we should have vfio_migration_exit() being called
behind a label or elsewhere from vfio_migration_realize (...)

>  void vfio_migration_exit(VFIODevice *vbasedev)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index dc69d3031b24..184d08568154 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>      if (!pdev->failover_pair_id) {
>          ret = vfio_migration_realize(vbasedev, errp);
>          if (ret) {
> -            error_report("%s: Migration disabled", vbasedev->name);
> +            trace_vfio_migration_realize(vbasedev->name);
> +        } else {
>              goto out_vfio_migration;
>          }
>      }

(...) Which then void the need for this change. Perhaps your previous patch
(4/5) could come after this refactor patch instead ... where you would fix the
unwinding error path inside the vfio_migration_realize() as opposed to
vfio_realize().

> @@ -3257,7 +3258,6 @@ static void vfio_instance_finalize(Object *obj)
>       */
>      vfio_put_device(vdev);
>      vfio_put_group(group);
> -    vfio_migration_finalize();
>  }
>  
>  static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 93429b9abba0..3c18572322fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>  extern VFIOGroupList vfio_group_list;
>  
>  bool vfio_mig_active(void);
> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>  void vfio_unblock_multiple_devices_migration(void);
> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
> +bool vfio_viommu_preset(VFIODevice *vbasedev);
>  int64_t vfio_mig_bytes_transferred(void);
>  void vfio_reset_bytes_transferred(void);
>  
> @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
>  int vfio_spapr_remove_window(VFIOContainer *container,
>                               hwaddr offset_within_address_space);
>  
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>  void vfio_migration_exit(VFIODevice *vbasedev);
> -void vfio_migration_finalize(void);
>  
>  #endif /* HW_VFIO_VFIO_COMMON_H */
Avihai Horon June 29, 2023, 3:20 p.m. UTC | #2
On 29/06/2023 15:44, Joao Martins wrote:
> External email: Use caution opening links or attachments
>
>
> On 29/06/2023 09:40, Zhenzhong Duan wrote:
>> This patch refactors vfio_migration_realize() and its dependend code
>> as follows:
>>
>> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
>>     e.g: vIOMMU blocker can be refactored as per device blocker.
>> 2. Change vfio_viommu_preset() to be only a per device checker.
>> 3. Remove global vIOMMU blocker related stuff, e.g:
>>     giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
>>     and vfio_migration_finalize()
>> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
>>     vfio_block_migration() and vfio_viommu_preset() to return bool type.
>> 5. Print "Migration disabled" depending on enable_migration property
>>     and print it as warning instead of error which is overkill.
>>
> I am not enterily sure we need to keep "Migration disabled". Perhaps we should
> just derisk from error to warning and use always the same error messages.
>
>> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
>> However, the caller of vfio_migration_realize() considers that migration was
>> blocked when the latter returned an error. What matters for migration is that
>> the blocker is added in core migration, so this cleans up usability such that
>> user sees "Migrate disabled" when any of the vfio migration blockers are active
>> and it's not intentionally forced by user with enable-migration=off.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/vfio/common.c              | 66 +++++++----------------------------
>>   hw/vfio/migration.c           | 30 +++++++++-------
>>   hw/vfio/pci.c                 |  4 +--
>>   include/hw/vfio/vfio-common.h |  7 ++--
>>   4 files changed, 36 insertions(+), 71 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 77e2ee0e5c6e..c80ecb1da53f 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -362,7 +362,6 @@ bool vfio_mig_active(void)
>>   }
>>
>>   static Error *multiple_devices_migration_blocker;
>> -static Error *giommu_migration_blocker;
>>
>>   static unsigned int vfio_migratable_device_num(void)
>>   {
>> @@ -381,19 +380,19 @@ static unsigned int vfio_migratable_device_num(void)
>>       return device_num;
>>   }
>>
>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>>   {
>>       int ret;
>>
>>       if (multiple_devices_migration_blocker ||
>>           vfio_migratable_device_num() <= 1) {
>> -        return 0;
>> +        return true;
>>       }
>>
>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>           error_setg(errp, "Migration is currently not supported with multiple "
>>                            "VFIO devices");
>> -        return -EINVAL;
>> +        return false;
>>       }
>>
>>       error_setg(&multiple_devices_migration_blocker,
>> @@ -403,9 +402,15 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>>       if (ret < 0) {
>>           error_free(multiple_devices_migration_blocker);
>>           multiple_devices_migration_blocker = NULL;
>> +    } else {
>> +        /*
>> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
>> +         * in vfio_migration_realize().
>> +         */
>> +        warn_report("Migration disabled, not support multiple VFIO devices");
>>       }
>>
> Perhaps you could stash the previous error message and use it in the
> warn_report_error to consolidate the error messages e.g.
>
> bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> {
>      Error *err = NULL;
>
>      if (multiple_devices_migration_blocker ||
>          vfio_migratable_device_num() <= 1) {
>          return true;
>      }
>
>      error_setg(&err, "%s: Migration is currently not supported with multiple "
>                       "VFIO devices", vbasedev->name);
>
>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>          error_propagate(errp, err);
>          return -EINVAL;
>      }
>
>      ...
>      if (ret < 0) {
>      } else {
>          /* Warns only on ON_OFF_AUTO_AUTO case */
>          warn_report_err(err);

I'm not sure this warning is needed.
If I remember correctly, I think Alex didn't want migration 
error/warning messages to be logged in the AUTO case.

>      }
> }
>
>> -    return ret;
>> +    return !ret;
>>   }
>>
>>   void vfio_unblock_multiple_devices_migration(void)
>> @@ -420,55 +425,10 @@ void vfio_unblock_multiple_devices_migration(void)
>>       multiple_devices_migration_blocker = NULL;
>>   }
>>
>> -static bool vfio_viommu_preset(void)
>> +/* Block migration with a vIOMMU */
> I meant in the previous version to put the comment on top of the caller, not on
> the definition. But with the new code structure from Avihai the error message
> further below... it will look a bit redundant.
>
>> +bool vfio_viommu_preset(VFIODevice *vbasedev)
>>   {
>> -    VFIOAddressSpace *space;
>> -
>> -    QLIST_FOREACH(space, &vfio_address_spaces, list) {
>> -        if (space->as != &address_space_memory) {
>> -            return true;
>> -        }
>> -    }
>> -
>> -    return false;
>> -}
>> -
>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
>> -{
>> -    int ret;
>> -
>> -    if (giommu_migration_blocker ||
>> -        !vfio_viommu_preset()) {
>> -        return 0;
>> -    }
>> -
>> -    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>> -        error_setg(errp,
>> -                   "Migration is currently not supported with vIOMMU enabled");
>> -        return -EINVAL;
>> -    }
>> -
>> -    error_setg(&giommu_migration_blocker,
>> -               "Migration is currently not supported with vIOMMU enabled");
>> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
>> -    if (ret < 0) {
>> -        error_free(giommu_migration_blocker);
>> -        giommu_migration_blocker = NULL;
>> -    }
>> -
>> -    return ret;
>> -}
>> -
>> -void vfio_migration_finalize(void)
>> -{
>> -    if (!giommu_migration_blocker ||
>> -        vfio_viommu_preset()) {
>> -        return;
>> -    }
>> -
>> -    migrate_del_blocker(giommu_migration_blocker);
>> -    error_free(giommu_migration_blocker);
>> -    giommu_migration_blocker = NULL;
>> +    return vbasedev->group->container->space->as != &address_space_memory;
>>   }
>>
> nice consolidation
>
>>   static void vfio_set_migration_error(int err)
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 1db7d52ab2c1..84036e5cfc01 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -802,13 +802,13 @@ static int vfio_migration_init(VFIODevice *vbasedev)
>>       return 0;
>>   }
>>
>> -static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>> +static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>>   {
>>       int ret;
>>
>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>           error_propagate(errp, err);
>> -        return -EINVAL;
>> +        return false;
>>       }
>>
>>       vbasedev->migration_blocker = error_copy(err);
>> @@ -818,9 +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>>       if (ret < 0) {
>>           error_free(vbasedev->migration_blocker);
>>           vbasedev->migration_blocker = NULL;
>> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
>> +        warn_report("%s: Migration disabled", vbasedev->name);
>>       }
>>
> Perhaps you can use the the local error to expand on why migration was disabled e.g.
>
>          warn_report_err(err);

Same here.

Thanks.

>
>> -    return ret;
>> +    return !ret;
>>   }
>>
>>   /* ---------------------------------------------------------------------- */
>> @@ -835,7 +837,12 @@ void vfio_reset_bytes_transferred(void)
>>       bytes_transferred = 0;
>>   }
>>
>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>> +/*
>> + * Return true when either migration initialized or blocker registered.
>> + * Currently only return false when adding blocker fails which will
>> + * de-register vfio device.
>> + */
>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>   {
>>       Error *err = NULL;
>>       int ret;
>> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>                       vbasedev->name);
>>       }
>>
>> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>> -    if (ret) {
>> -        return ret;
>> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
>> +        return false;
>>       }
>>
>> -    ret = vfio_block_giommu_migration(vbasedev, errp);
>> -    if (ret) {
>> -        return ret;
>> +    if (vfio_viommu_preset(vbasedev)) {
> The /* Block migration with a vIOMMU */
>
> Would go above, but I don't think we need it anymore ...
>
>> +        error_setg(&err, "%s: Migration is currently not supported "
>> +                   "with vIOMMU enabled", vbasedev->name);
>> +        return vfio_block_migration(vbasedev, err, errp);
> ... as the error message when placed here makes it obvious. So the comment I
> suggested won't add much. Unless others disagree.
>
>>       }
>>
>> -    trace_vfio_migration_realize(vbasedev->name);
>> -    return 0;
>> +    return true;
>>   }
>>
> I think somewhere in function we should have vfio_migration_exit() being called
> behind a label or elsewhere from vfio_migration_realize (...)
>
>>   void vfio_migration_exit(VFIODevice *vbasedev)
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index dc69d3031b24..184d08568154 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>       if (!pdev->failover_pair_id) {
>>           ret = vfio_migration_realize(vbasedev, errp);
>>           if (ret) {
>> -            error_report("%s: Migration disabled", vbasedev->name);
>> +            trace_vfio_migration_realize(vbasedev->name);
>> +        } else {
>>               goto out_vfio_migration;
>>           }
>>       }
> (...) Which then void the need for this change. Perhaps your previous patch
> (4/5) could come after this refactor patch instead ... where you would fix the
> unwinding error path inside the vfio_migration_realize() as opposed to
> vfio_realize().
>
>> @@ -3257,7 +3258,6 @@ static void vfio_instance_finalize(Object *obj)
>>        */
>>       vfio_put_device(vdev);
>>       vfio_put_group(group);
>> -    vfio_migration_finalize();
>>   }
>>
>>   static void vfio_exitfn(PCIDevice *pdev)
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 93429b9abba0..3c18572322fc 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>>   extern VFIOGroupList vfio_group_list;
>>
>>   bool vfio_mig_active(void);
>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>>   void vfio_unblock_multiple_devices_migration(void);
>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
>> +bool vfio_viommu_preset(VFIODevice *vbasedev);
>>   int64_t vfio_mig_bytes_transferred(void);
>>   void vfio_reset_bytes_transferred(void);
>>
>> @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
>>   int vfio_spapr_remove_window(VFIOContainer *container,
>>                                hwaddr offset_within_address_space);
>>
>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>   void vfio_migration_exit(VFIODevice *vbasedev);
>> -void vfio_migration_finalize(void);
>>
>>   #endif /* HW_VFIO_VFIO_COMMON_H */
Joao Martins June 29, 2023, 3:42 p.m. UTC | #3
On 29/06/2023 16:20, Avihai Horon wrote:
> On 29/06/2023 15:44, Joao Martins wrote:
>> On 29/06/2023 09:40, Zhenzhong Duan wrote:
>>> This patch refactors vfio_migration_realize() and its dependend code
>>> as follows:
>>>
>>> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
>>>     e.g: vIOMMU blocker can be refactored as per device blocker.
>>> 2. Change vfio_viommu_preset() to be only a per device checker.
>>> 3. Remove global vIOMMU blocker related stuff, e.g:
>>>     giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
>>>     and vfio_migration_finalize()
>>> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
>>>     vfio_block_migration() and vfio_viommu_preset() to return bool type.
>>> 5. Print "Migration disabled" depending on enable_migration property
>>>     and print it as warning instead of error which is overkill.
>>>
>> I am not enterily sure we need to keep "Migration disabled". Perhaps we should
>> just derisk from error to warning and use always the same error messages.
>>
>>> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
>>> However, the caller of vfio_migration_realize() considers that migration was
>>> blocked when the latter returned an error. What matters for migration is that
>>> the blocker is added in core migration, so this cleans up usability such that
>>> user sees "Migrate disabled" when any of the vfio migration blockers are active
>>> and it's not intentionally forced by user with enable-migration=off.
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>   hw/vfio/common.c              | 66 +++++++----------------------------
>>>   hw/vfio/migration.c           | 30 +++++++++-------
>>>   hw/vfio/pci.c                 |  4 +--
>>>   include/hw/vfio/vfio-common.h |  7 ++--
>>>   4 files changed, 36 insertions(+), 71 deletions(-)
>>>
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index 77e2ee0e5c6e..c80ecb1da53f 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -362,7 +362,6 @@ bool vfio_mig_active(void)
>>>   }
>>>
>>>   static Error *multiple_devices_migration_blocker;
>>> -static Error *giommu_migration_blocker;
>>>
>>>   static unsigned int vfio_migratable_device_num(void)
>>>   {
>>> @@ -381,19 +380,19 @@ static unsigned int vfio_migratable_device_num(void)
>>>       return device_num;
>>>   }
>>>
>>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>>>   {
>>>       int ret;
>>>
>>>       if (multiple_devices_migration_blocker ||
>>>           vfio_migratable_device_num() <= 1) {
>>> -        return 0;
>>> +        return true;
>>>       }
>>>
>>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>>           error_setg(errp, "Migration is currently not supported with multiple "
>>>                            "VFIO devices");
>>> -        return -EINVAL;
>>> +        return false;
>>>       }
>>>
>>>       error_setg(&multiple_devices_migration_blocker,
>>> @@ -403,9 +402,15 @@ int vfio_block_multiple_devices_migration(VFIODevice
>>> *vbasedev, Error **errp)
>>>       if (ret < 0) {
>>>           error_free(multiple_devices_migration_blocker);
>>>           multiple_devices_migration_blocker = NULL;
>>> +    } else {
>>> +        /*
>>> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
>>> +         * in vfio_migration_realize().
>>> +         */
>>> +        warn_report("Migration disabled, not support multiple VFIO devices");
>>>       }
>>>
>> Perhaps you could stash the previous error message and use it in the
>> warn_report_error to consolidate the error messages e.g.
>>
>> bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>> {
>>      Error *err = NULL;
>>
>>      if (multiple_devices_migration_blocker ||
>>          vfio_migratable_device_num() <= 1) {
>>          return true;
>>      }
>>
>>      error_setg(&err, "%s: Migration is currently not supported with multiple "
>>                       "VFIO devices", vbasedev->name);
>>
>>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>          error_propagate(errp, err);
>>          return -EINVAL;
>>      }
>>
>>      ...
>>      if (ret < 0) {
>>      } else {
>>          /* Warns only on ON_OFF_AUTO_AUTO case */
>>          warn_report_err(err);
> 
> I'm not sure this warning is needed.
> If I remember correctly, I think Alex didn't want migration error/warning
> messages to be logged in the AUTO case.
> 

Hmm, ok, I missed this from the previous discussions.

So today there are migration warnings in the current code. (even in the AUTO
case). So if we want them removed, then this patch would then just remove the
"Migration disabled" all together (in the two places we commented).

The rest of the cases already propagate the error I think. And the AUTO case
will always be blocked migration and see the same printed messages elsewhere.

>>      }
>> }
>>
>>> -    return ret;
>>> +    return !ret;
>>>   }
>>>
>>>   void vfio_unblock_multiple_devices_migration(void)
>>> @@ -420,55 +425,10 @@ void vfio_unblock_multiple_devices_migration(void)
>>>       multiple_devices_migration_blocker = NULL;
>>>   }
>>>
>>> -static bool vfio_viommu_preset(void)
>>> +/* Block migration with a vIOMMU */
>> I meant in the previous version to put the comment on top of the caller, not on
>> the definition. But with the new code structure from Avihai the error message
>> further below... it will look a bit redundant.
>>
>>> +bool vfio_viommu_preset(VFIODevice *vbasedev)
>>>   {
>>> -    VFIOAddressSpace *space;
>>> -
>>> -    QLIST_FOREACH(space, &vfio_address_spaces, list) {
>>> -        if (space->as != &address_space_memory) {
>>> -            return true;
>>> -        }
>>> -    }
>>> -
>>> -    return false;
>>> -}
>>> -
>>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
>>> -{
>>> -    int ret;
>>> -
>>> -    if (giommu_migration_blocker ||
>>> -        !vfio_viommu_preset()) {
>>> -        return 0;
>>> -    }
>>> -
>>> -    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>> -        error_setg(errp,
>>> -                   "Migration is currently not supported with vIOMMU enabled");
>>> -        return -EINVAL;
>>> -    }
>>> -
>>> -    error_setg(&giommu_migration_blocker,
>>> -               "Migration is currently not supported with vIOMMU enabled");
>>> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
>>> -    if (ret < 0) {
>>> -        error_free(giommu_migration_blocker);
>>> -        giommu_migration_blocker = NULL;
>>> -    }
>>> -
>>> -    return ret;
>>> -}
>>> -
>>> -void vfio_migration_finalize(void)
>>> -{
>>> -    if (!giommu_migration_blocker ||
>>> -        vfio_viommu_preset()) {
>>> -        return;
>>> -    }
>>> -
>>> -    migrate_del_blocker(giommu_migration_blocker);
>>> -    error_free(giommu_migration_blocker);
>>> -    giommu_migration_blocker = NULL;
>>> +    return vbasedev->group->container->space->as != &address_space_memory;
>>>   }
>>>
>> nice consolidation
>>
>>>   static void vfio_set_migration_error(int err)
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index 1db7d52ab2c1..84036e5cfc01 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
>>> @@ -802,13 +802,13 @@ static int vfio_migration_init(VFIODevice *vbasedev)
>>>       return 0;
>>>   }
>>>
>>> -static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>>> +static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error
>>> **errp)
>>>   {
>>>       int ret;
>>>
>>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>>>           error_propagate(errp, err);
>>> -        return -EINVAL;
>>> +        return false;
>>>       }
>>>
>>>       vbasedev->migration_blocker = error_copy(err);
>>> @@ -818,9 +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev,
>>> Error *err, Error **errp)
>>>       if (ret < 0) {
>>>           error_free(vbasedev->migration_blocker);
>>>           vbasedev->migration_blocker = NULL;
>>> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
>>> +        warn_report("%s: Migration disabled", vbasedev->name);
>>>       }
>>>
>> Perhaps you can use the the local error to expand on why migration was
>> disabled e.g.
>>
>>          warn_report_err(err);
> 
> Same here.
> 
> Thanks.
> 
>>
>>> -    return ret;
>>> +    return !ret;
>>>   }
>>>
>>>   /* ---------------------------------------------------------------------- */
>>> @@ -835,7 +837,12 @@ void vfio_reset_bytes_transferred(void)
>>>       bytes_transferred = 0;
>>>   }
>>>
>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>> +/*
>>> + * Return true when either migration initialized or blocker registered.
>>> + * Currently only return false when adding blocker fails which will
>>> + * de-register vfio device.
>>> + */
>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>>>   {
>>>       Error *err = NULL;
>>>       int ret;
>>> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error
>>> **errp)
>>>                       vbasedev->name);
>>>       }
>>>
>>> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>>> -    if (ret) {
>>> -        return ret;
>>> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
>>> +        return false;
>>>       }
>>>
>>> -    ret = vfio_block_giommu_migration(vbasedev, errp);
>>> -    if (ret) {
>>> -        return ret;
>>> +    if (vfio_viommu_preset(vbasedev)) {
>> The /* Block migration with a vIOMMU */
>>
>> Would go above, but I don't think we need it anymore ...
>>
>>> +        error_setg(&err, "%s: Migration is currently not supported "
>>> +                   "with vIOMMU enabled", vbasedev->name);
>>> +        return vfio_block_migration(vbasedev, err, errp);
>> ... as the error message when placed here makes it obvious. So the comment I
>> suggested won't add much. Unless others disagree.
>>
>>>       }
>>>
>>> -    trace_vfio_migration_realize(vbasedev->name);
>>> -    return 0;
>>> +    return true;
>>>   }
>>>
>> I think somewhere in function we should have vfio_migration_exit() being called
>> behind a label or elsewhere from vfio_migration_realize (...)
>>
>>>   void vfio_migration_exit(VFIODevice *vbasedev)
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index dc69d3031b24..184d08568154 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>>       if (!pdev->failover_pair_id) {
>>>           ret = vfio_migration_realize(vbasedev, errp);
>>>           if (ret) {
>>> -            error_report("%s: Migration disabled", vbasedev->name);
>>> +            trace_vfio_migration_realize(vbasedev->name);
>>> +        } else {
>>>               goto out_vfio_migration;
>>>           }
>>>       }
>> (...) Which then void the need for this change. Perhaps your previous patch
>> (4/5) could come after this refactor patch instead ... where you would fix the
>> unwinding error path inside the vfio_migration_realize() as opposed to
>> vfio_realize().
>>
>>> @@ -3257,7 +3258,6 @@ static void vfio_instance_finalize(Object *obj)
>>>        */
>>>       vfio_put_device(vdev);
>>>       vfio_put_group(group);
>>> -    vfio_migration_finalize();
>>>   }
>>>
>>>   static void vfio_exitfn(PCIDevice *pdev)
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 93429b9abba0..3c18572322fc 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>>>   extern VFIOGroupList vfio_group_list;
>>>
>>>   bool vfio_mig_active(void);
>>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>>>   void vfio_unblock_multiple_devices_migration(void);
>>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
>>> +bool vfio_viommu_preset(VFIODevice *vbasedev);
>>>   int64_t vfio_mig_bytes_transferred(void);
>>>   void vfio_reset_bytes_transferred(void);
>>>
>>> @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
>>>   int vfio_spapr_remove_window(VFIOContainer *container,
>>>                                hwaddr offset_within_address_space);
>>>
>>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>>>   void vfio_migration_exit(VFIODevice *vbasedev);
>>> -void vfio_migration_finalize(void);
>>>
>>>   #endif /* HW_VFIO_VFIO_COMMON_H */
Cédric Le Goater June 29, 2023, 4:40 p.m. UTC | #4
Hello Zhenzhong,

On 6/29/23 10:40, Zhenzhong Duan wrote:
> This patch refactors vfio_migration_realize() and its dependend code
> as follows:
> 
> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
>     e.g: vIOMMU blocker can be refactored as per device blocker.
> 2. Change vfio_viommu_preset() to be only a per device checker.
> 3. Remove global vIOMMU blocker related stuff, e.g:
>     giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
>     and vfio_migration_finalize()
> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
>     vfio_block_migration() and vfio_viommu_preset() to return bool type.
> 5. Print "Migration disabled" depending on enable_migration property
>     and print it as warning instead of error which is overkill.


We are close to soft freeze and these combo patches adding various fixes
all at once are difficult to evaluate.

Please split this patch in multiple ones to ease the review.  May be
start with the  int -> bool conversion of the return values. It should
remove some noise.

Thanks,

C.

> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
> However, the caller of vfio_migration_realize() considers that migration was
> blocked when the latter returned an error. What matters for migration is that
> the blocker is added in core migration, so this cleans up usability such that
> user sees "Migrate disabled" when any of the vfio migration blockers are active
> and it's not intentionally forced by user with enable-migration=off.
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/vfio/common.c              | 66 +++++++----------------------------
>   hw/vfio/migration.c           | 30 +++++++++-------
>   hw/vfio/pci.c                 |  4 +--
>   include/hw/vfio/vfio-common.h |  7 ++--
>   4 files changed, 36 insertions(+), 71 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 77e2ee0e5c6e..c80ecb1da53f 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -362,7 +362,6 @@ bool vfio_mig_active(void)
>   }
>   
>   static Error *multiple_devices_migration_blocker;
> -static Error *giommu_migration_blocker;
>   
>   static unsigned int vfio_migratable_device_num(void)
>   {
> @@ -381,19 +380,19 @@ static unsigned int vfio_migratable_device_num(void)
>       return device_num;
>   }
>   
> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>   {
>       int ret;
>   
>       if (multiple_devices_migration_blocker ||
>           vfio_migratable_device_num() <= 1) {
> -        return 0;
> +        return true;
>       }
>   
>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>           error_setg(errp, "Migration is currently not supported with multiple "
>                            "VFIO devices");
> -        return -EINVAL;
> +        return false;
>       }
>   
>       error_setg(&multiple_devices_migration_blocker,
> @@ -403,9 +402,15 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
>       if (ret < 0) {
>           error_free(multiple_devices_migration_blocker);
>           multiple_devices_migration_blocker = NULL;
> +    } else {
> +        /*
> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
> +         * in vfio_migration_realize().
> +         */
> +        warn_report("Migration disabled, not support multiple VFIO devices");
>       }
>   
> -    return ret;
> +    return !ret;
>   }
>   
>   void vfio_unblock_multiple_devices_migration(void)
> @@ -420,55 +425,10 @@ void vfio_unblock_multiple_devices_migration(void)
>       multiple_devices_migration_blocker = NULL;
>   }
>   
> -static bool vfio_viommu_preset(void)
> +/* Block migration with a vIOMMU */
> +bool vfio_viommu_preset(VFIODevice *vbasedev)
>   {
> -    VFIOAddressSpace *space;
> -
> -    QLIST_FOREACH(space, &vfio_address_spaces, list) {
> -        if (space->as != &address_space_memory) {
> -            return true;
> -        }
> -    }
> -
> -    return false;
> -}
> -
> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
> -{
> -    int ret;
> -
> -    if (giommu_migration_blocker ||
> -        !vfio_viommu_preset()) {
> -        return 0;
> -    }
> -
> -    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> -        error_setg(errp,
> -                   "Migration is currently not supported with vIOMMU enabled");
> -        return -EINVAL;
> -    }
> -
> -    error_setg(&giommu_migration_blocker,
> -               "Migration is currently not supported with vIOMMU enabled");
> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
> -    if (ret < 0) {
> -        error_free(giommu_migration_blocker);
> -        giommu_migration_blocker = NULL;
> -    }
> -
> -    return ret;
> -}
> -
> -void vfio_migration_finalize(void)
> -{
> -    if (!giommu_migration_blocker ||
> -        vfio_viommu_preset()) {
> -        return;
> -    }
> -
> -    migrate_del_blocker(giommu_migration_blocker);
> -    error_free(giommu_migration_blocker);
> -    giommu_migration_blocker = NULL;
> +    return vbasedev->group->container->space->as != &address_space_memory;
>   }
>   
>   static void vfio_set_migration_error(int err)
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 1db7d52ab2c1..84036e5cfc01 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -802,13 +802,13 @@ static int vfio_migration_init(VFIODevice *vbasedev)
>       return 0;
>   }
>   
> -static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
> +static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>   {
>       int ret;
>   
>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>           error_propagate(errp, err);
> -        return -EINVAL;
> +        return false;
>       }
>   
>       vbasedev->migration_blocker = error_copy(err);
> @@ -818,9 +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
>       if (ret < 0) {
>           error_free(vbasedev->migration_blocker);
>           vbasedev->migration_blocker = NULL;
> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
> +        warn_report("%s: Migration disabled", vbasedev->name);
>       }
>   
> -    return ret;
> +    return !ret;
>   }
>   
>   /* ---------------------------------------------------------------------- */
> @@ -835,7 +837,12 @@ void vfio_reset_bytes_transferred(void)
>       bytes_transferred = 0;
>   }
>   
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> +/*
> + * Return true when either migration initialized or blocker registered.
> + * Currently only return false when adding blocker fails which will
> + * de-register vfio device.
> + */
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>   {
>       Error *err = NULL;
>       int ret;
> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>                       vbasedev->name);
>       }
>   
> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
> -    if (ret) {
> -        return ret;
> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
> +        return false;
>       }
>   
> -    ret = vfio_block_giommu_migration(vbasedev, errp);
> -    if (ret) {
> -        return ret;
> +    if (vfio_viommu_preset(vbasedev)) {
> +        error_setg(&err, "%s: Migration is currently not supported "
> +                   "with vIOMMU enabled", vbasedev->name);
> +        return vfio_block_migration(vbasedev, err, errp);
>       }
>   
> -    trace_vfio_migration_realize(vbasedev->name);
> -    return 0;
> +    return true;
>   }
>   
>   void vfio_migration_exit(VFIODevice *vbasedev)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index dc69d3031b24..184d08568154 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       if (!pdev->failover_pair_id) {
>           ret = vfio_migration_realize(vbasedev, errp);
>           if (ret) {
> -            error_report("%s: Migration disabled", vbasedev->name);
> +            trace_vfio_migration_realize(vbasedev->name);
> +        } else {
>               goto out_vfio_migration;
>           }
>       }
> @@ -3257,7 +3258,6 @@ static void vfio_instance_finalize(Object *obj)
>        */
>       vfio_put_device(vdev);
>       vfio_put_group(group);
> -    vfio_migration_finalize();
>   }
>   
>   static void vfio_exitfn(PCIDevice *pdev)
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 93429b9abba0..3c18572322fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>   extern VFIOGroupList vfio_group_list;
>   
>   bool vfio_mig_active(void);
> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
>   void vfio_unblock_multiple_devices_migration(void);
> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
> +bool vfio_viommu_preset(VFIODevice *vbasedev);
>   int64_t vfio_mig_bytes_transferred(void);
>   void vfio_reset_bytes_transferred(void);
>   
> @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
>   int vfio_spapr_remove_window(VFIOContainer *container,
>                                hwaddr offset_within_address_space);
>   
> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
>   void vfio_migration_exit(VFIODevice *vbasedev);
> -void vfio_migration_finalize(void);
>   
>   #endif /* HW_VFIO_VFIO_COMMON_H */
Alex Williamson June 29, 2023, 10:12 p.m. UTC | #5
On Thu, 29 Jun 2023 16:42:23 +0100
Joao Martins <joao.m.martins@oracle.com> wrote:

> On 29/06/2023 16:20, Avihai Horon wrote:
> > On 29/06/2023 15:44, Joao Martins wrote:  
> >> On 29/06/2023 09:40, Zhenzhong Duan wrote:  
> >>> This patch refactors vfio_migration_realize() and its dependend code
> >>> as follows:
> >>>
> >>> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
> >>>     e.g: vIOMMU blocker can be refactored as per device blocker.
> >>> 2. Change vfio_viommu_preset() to be only a per device checker.
> >>> 3. Remove global vIOMMU blocker related stuff, e.g:
> >>>     giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
> >>>     and vfio_migration_finalize()
> >>> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
> >>>     vfio_block_migration() and vfio_viommu_preset() to return bool type.
> >>> 5. Print "Migration disabled" depending on enable_migration property
> >>>     and print it as warning instead of error which is overkill.
> >>>  
> >> I am not enterily sure we need to keep "Migration disabled". Perhaps we should
> >> just derisk from error to warning and use always the same error messages.
> >>  
> >>> migrate_add_blocker() returns 0 when successfully adding the migration blocker.
> >>> However, the caller of vfio_migration_realize() considers that migration was
> >>> blocked when the latter returned an error. What matters for migration is that
> >>> the blocker is added in core migration, so this cleans up usability such that
> >>> user sees "Migrate disabled" when any of the vfio migration blockers are active
> >>> and it's not intentionally forced by user with enable-migration=off.
> >>>
> >>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >>> ---
> >>>   hw/vfio/common.c              | 66 +++++++----------------------------
> >>>   hw/vfio/migration.c           | 30 +++++++++-------
> >>>   hw/vfio/pci.c                 |  4 +--
> >>>   include/hw/vfio/vfio-common.h |  7 ++--
> >>>   4 files changed, 36 insertions(+), 71 deletions(-)
> >>>
> >>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>> index 77e2ee0e5c6e..c80ecb1da53f 100644
> >>> --- a/hw/vfio/common.c
> >>> +++ b/hw/vfio/common.c
> >>> @@ -362,7 +362,6 @@ bool vfio_mig_active(void)
> >>>   }
> >>>
> >>>   static Error *multiple_devices_migration_blocker;
> >>> -static Error *giommu_migration_blocker;
> >>>
> >>>   static unsigned int vfio_migratable_device_num(void)
> >>>   {
> >>> @@ -381,19 +380,19 @@ static unsigned int vfio_migratable_device_num(void)
> >>>       return device_num;
> >>>   }
> >>>
> >>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> >>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> >>>   {
> >>>       int ret;
> >>>
> >>>       if (multiple_devices_migration_blocker ||
> >>>           vfio_migratable_device_num() <= 1) {
> >>> -        return 0;
> >>> +        return true;
> >>>       }
> >>>
> >>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> >>>           error_setg(errp, "Migration is currently not supported with multiple "
> >>>                            "VFIO devices");
> >>> -        return -EINVAL;
> >>> +        return false;
> >>>       }
> >>>
> >>>       error_setg(&multiple_devices_migration_blocker,
> >>> @@ -403,9 +402,15 @@ int vfio_block_multiple_devices_migration(VFIODevice
> >>> *vbasedev, Error **errp)
> >>>       if (ret < 0) {
> >>>           error_free(multiple_devices_migration_blocker);
> >>>           multiple_devices_migration_blocker = NULL;
> >>> +    } else {
> >>> +        /*
> >>> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
> >>> +         * in vfio_migration_realize().
> >>> +         */
> >>> +        warn_report("Migration disabled, not support multiple VFIO devices");
> >>>       }
> >>>  
> >> Perhaps you could stash the previous error message and use it in the
> >> warn_report_error to consolidate the error messages e.g.
> >>
> >> bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
> >> {
> >>      Error *err = NULL;
> >>
> >>      if (multiple_devices_migration_blocker ||
> >>          vfio_migratable_device_num() <= 1) {
> >>          return true;
> >>      }
> >>
> >>      error_setg(&err, "%s: Migration is currently not supported with multiple "
> >>                       "VFIO devices", vbasedev->name);
> >>
> >>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> >>          error_propagate(errp, err);
> >>          return -EINVAL;
> >>      }
> >>
> >>      ...
> >>      if (ret < 0) {
> >>      } else {
> >>          /* Warns only on ON_OFF_AUTO_AUTO case */
> >>          warn_report_err(err);  
> > 
> > I'm not sure this warning is needed.
> > If I remember correctly, I think Alex didn't want migration error/warning
> > messages to be logged in the AUTO case.

Correct.

> Hmm, ok, I missed this from the previous discussions.
> 
> So today there are migration warnings in the current code. (even in the AUTO
> case). So if we want them removed, then this patch would then just remove the
> "Migration disabled" all together (in the two places we commented).
> 
> The rest of the cases already propagate the error I think. And the AUTO case
> will always be blocked migration and see the same printed messages elsewhere.

I tested this with Avihai's series and saw the correct logging, at
least for a device that does not support migration.

In AUTO mode, we should only ever see errors or warnings if the device
supports migration and an error or incompatibility occurs while further
probing or configuring it.  Lack of support for migration should only
ever generate an error or warning when using enable_migration=on or the
global -only-migratable flag.

As I understood Avihai's patch, we're populating the Error pointer, but
we only ever propagate that error in the above cases.  Thanks,

Alex

> >>      }
> >> }
> >>  
> >>> -    return ret;
> >>> +    return !ret;
> >>>   }
> >>>
> >>>   void vfio_unblock_multiple_devices_migration(void)
> >>> @@ -420,55 +425,10 @@ void vfio_unblock_multiple_devices_migration(void)
> >>>       multiple_devices_migration_blocker = NULL;
> >>>   }
> >>>
> >>> -static bool vfio_viommu_preset(void)
> >>> +/* Block migration with a vIOMMU */  
> >> I meant in the previous version to put the comment on top of the caller, not on
> >> the definition. But with the new code structure from Avihai the error message
> >> further below... it will look a bit redundant.
> >>  
> >>> +bool vfio_viommu_preset(VFIODevice *vbasedev)
> >>>   {
> >>> -    VFIOAddressSpace *space;
> >>> -
> >>> -    QLIST_FOREACH(space, &vfio_address_spaces, list) {
> >>> -        if (space->as != &address_space_memory) {
> >>> -            return true;
> >>> -        }
> >>> -    }
> >>> -
> >>> -    return false;
> >>> -}
> >>> -
> >>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
> >>> -{
> >>> -    int ret;
> >>> -
> >>> -    if (giommu_migration_blocker ||
> >>> -        !vfio_viommu_preset()) {
> >>> -        return 0;
> >>> -    }
> >>> -
> >>> -    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> >>> -        error_setg(errp,
> >>> -                   "Migration is currently not supported with vIOMMU enabled");
> >>> -        return -EINVAL;
> >>> -    }
> >>> -
> >>> -    error_setg(&giommu_migration_blocker,
> >>> -               "Migration is currently not supported with vIOMMU enabled");
> >>> -    ret = migrate_add_blocker(giommu_migration_blocker, errp);
> >>> -    if (ret < 0) {
> >>> -        error_free(giommu_migration_blocker);
> >>> -        giommu_migration_blocker = NULL;
> >>> -    }
> >>> -
> >>> -    return ret;
> >>> -}
> >>> -
> >>> -void vfio_migration_finalize(void)
> >>> -{
> >>> -    if (!giommu_migration_blocker ||
> >>> -        vfio_viommu_preset()) {
> >>> -        return;
> >>> -    }
> >>> -
> >>> -    migrate_del_blocker(giommu_migration_blocker);
> >>> -    error_free(giommu_migration_blocker);
> >>> -    giommu_migration_blocker = NULL;
> >>> +    return vbasedev->group->container->space->as != &address_space_memory;
> >>>   }
> >>>  
> >> nice consolidation
> >>  
> >>>   static void vfio_set_migration_error(int err)
> >>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> >>> index 1db7d52ab2c1..84036e5cfc01 100644
> >>> --- a/hw/vfio/migration.c
> >>> +++ b/hw/vfio/migration.c
> >>> @@ -802,13 +802,13 @@ static int vfio_migration_init(VFIODevice *vbasedev)
> >>>       return 0;
> >>>   }
> >>>
> >>> -static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
> >>> +static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error
> >>> **errp)
> >>>   {
> >>>       int ret;
> >>>
> >>>       if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
> >>>           error_propagate(errp, err);
> >>> -        return -EINVAL;
> >>> +        return false;
> >>>       }
> >>>
> >>>       vbasedev->migration_blocker = error_copy(err);
> >>> @@ -818,9 +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev,
> >>> Error *err, Error **errp)
> >>>       if (ret < 0) {
> >>>           error_free(vbasedev->migration_blocker);
> >>>           vbasedev->migration_blocker = NULL;
> >>> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
> >>> +        warn_report("%s: Migration disabled", vbasedev->name);
> >>>       }
> >>>  
> >> Perhaps you can use the the local error to expand on why migration was
> >> disabled e.g.
> >>
> >>          warn_report_err(err);  
> > 
> > Same here.
> > 
> > Thanks.
> >   
> >>  
> >>> -    return ret;
> >>> +    return !ret;
> >>>   }
> >>>
> >>>   /* ---------------------------------------------------------------------- */
> >>> @@ -835,7 +837,12 @@ void vfio_reset_bytes_transferred(void)
> >>>       bytes_transferred = 0;
> >>>   }
> >>>
> >>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> >>> +/*
> >>> + * Return true when either migration initialized or blocker registered.
> >>> + * Currently only return false when adding blocker fails which will
> >>> + * de-register vfio device.
> >>> + */
> >>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
> >>>   {
> >>>       Error *err = NULL;
> >>>       int ret;
> >>> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error
> >>> **errp)
> >>>                       vbasedev->name);
> >>>       }
> >>>
> >>> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
> >>> -    if (ret) {
> >>> -        return ret;
> >>> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
> >>> +        return false;
> >>>       }
> >>>
> >>> -    ret = vfio_block_giommu_migration(vbasedev, errp);
> >>> -    if (ret) {
> >>> -        return ret;
> >>> +    if (vfio_viommu_preset(vbasedev)) {  
> >> The /* Block migration with a vIOMMU */
> >>
> >> Would go above, but I don't think we need it anymore ...
> >>  
> >>> +        error_setg(&err, "%s: Migration is currently not supported "
> >>> +                   "with vIOMMU enabled", vbasedev->name);
> >>> +        return vfio_block_migration(vbasedev, err, errp);  
> >> ... as the error message when placed here makes it obvious. So the comment I
> >> suggested won't add much. Unless others disagree.
> >>  
> >>>       }
> >>>
> >>> -    trace_vfio_migration_realize(vbasedev->name);
> >>> -    return 0;
> >>> +    return true;
> >>>   }
> >>>  
> >> I think somewhere in function we should have vfio_migration_exit() being called
> >> behind a label or elsewhere from vfio_migration_realize (...)
> >>  
> >>>   void vfio_migration_exit(VFIODevice *vbasedev)
> >>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >>> index dc69d3031b24..184d08568154 100644
> >>> --- a/hw/vfio/pci.c
> >>> +++ b/hw/vfio/pci.c
> >>> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> >>>       if (!pdev->failover_pair_id) {
> >>>           ret = vfio_migration_realize(vbasedev, errp);
> >>>           if (ret) {
> >>> -            error_report("%s: Migration disabled", vbasedev->name);
> >>> +            trace_vfio_migration_realize(vbasedev->name);
> >>> +        } else {
> >>>               goto out_vfio_migration;
> >>>           }
> >>>       }  
> >> (...) Which then void the need for this change. Perhaps your previous patch
> >> (4/5) could come after this refactor patch instead ... where you would fix the
> >> unwinding error path inside the vfio_migration_realize() as opposed to
> >> vfio_realize().
> >>  
> >>> @@ -3257,7 +3258,6 @@ static void vfio_instance_finalize(Object *obj)
> >>>        */
> >>>       vfio_put_device(vdev);
> >>>       vfio_put_group(group);
> >>> -    vfio_migration_finalize();
> >>>   }
> >>>
> >>>   static void vfio_exitfn(PCIDevice *pdev)
> >>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >>> index 93429b9abba0..3c18572322fc 100644
> >>> --- a/include/hw/vfio/vfio-common.h
> >>> +++ b/include/hw/vfio/vfio-common.h
> >>> @@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
> >>>   extern VFIOGroupList vfio_group_list;
> >>>
> >>>   bool vfio_mig_active(void);
> >>> -int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
> >>> +bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
> >>>   void vfio_unblock_multiple_devices_migration(void);
> >>> -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
> >>> +bool vfio_viommu_preset(VFIODevice *vbasedev);
> >>>   int64_t vfio_mig_bytes_transferred(void);
> >>>   void vfio_reset_bytes_transferred(void);
> >>>
> >>> @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
> >>>   int vfio_spapr_remove_window(VFIOContainer *container,
> >>>                                hwaddr offset_within_address_space);
> >>>
> >>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> >>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
> >>>   void vfio_migration_exit(VFIODevice *vbasedev);
> >>> -void vfio_migration_finalize(void);
> >>>
> >>>   #endif /* HW_VFIO_VFIO_COMMON_H */
Duan, Zhenzhong June 30, 2023, 1:38 a.m. UTC | #6
>-----Original Message-----
>From: Alex Williamson <alex.williamson@redhat.com>
>Subject: Re: [PATCH v4 5/5] vfio/migration: Refactor and fix print of "Migration
>disabled"
>
>On Thu, 29 Jun 2023 16:42:23 +0100
>Joao Martins <joao.m.martins@oracle.com> wrote:
>
>> On 29/06/2023 16:20, Avihai Horon wrote:
>> > On 29/06/2023 15:44, Joao Martins wrote:
>> >> On 29/06/2023 09:40, Zhenzhong Duan wrote:
...
>> >>> @@ -403,9 +402,15 @@ int
>> >>> vfio_block_multiple_devices_migration(VFIODevice
>> >>> *vbasedev, Error **errp)
>> >>>       if (ret < 0) {
>> >>>           error_free(multiple_devices_migration_blocker);
>> >>>           multiple_devices_migration_blocker = NULL;
>> >>> +    } else {
>> >>> +        /*
>> >>> +         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
>> >>> +         * in vfio_migration_realize().
>> >>> +         */
>> >>> +        warn_report("Migration disabled, not support multiple
>> >>> +VFIO devices");
>> >>>       }
>> >>>
>> >> Perhaps you could stash the previous error message and use it in
>> >> the warn_report_error to consolidate the error messages e.g.
>> >>
>> >> bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev,
>> >> Error **errp) {
>> >>      Error *err = NULL;
>> >>
>> >>      if (multiple_devices_migration_blocker ||
>> >>          vfio_migratable_device_num() <= 1) {
>> >>          return true;
>> >>      }
>> >>
>> >>      error_setg(&err, "%s: Migration is currently not supported with
>multiple "
>> >>                       "VFIO devices", vbasedev->name);
>> >>
>> >>      if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
>> >>          error_propagate(errp, err);
>> >>          return -EINVAL;
>> >>      }
>> >>
>> >>      ...
>> >>      if (ret < 0) {
>> >>      } else {
>> >>          /* Warns only on ON_OFF_AUTO_AUTO case */
>> >>          warn_report_err(err);
>> >
>> > I'm not sure this warning is needed.
>> > If I remember correctly, I think Alex didn't want migration
>> > error/warning messages to be logged in the AUTO case.
>
>Correct.
>
>> Hmm, ok, I missed this from the previous discussions.
>>
>> So today there are migration warnings in the current code. (even in
>> the AUTO case). So if we want them removed, then this patch would then
>> just remove the "Migration disabled" all together (in the two places we
>commented).
>>
>> The rest of the cases already propagate the error I think. And the
>> AUTO case will always be blocked migration and see the same printed
>messages elsewhere.
>
>I tested this with Avihai's series and saw the correct logging, at least for a
>device that does not support migration.
>
>In AUTO mode, we should only ever see errors or warnings if the device
>supports migration and an error or incompatibility occurs while further
>probing or configuring it.  Lack of support for migration should only ever
>generate an error or warning when using enable_migration=on or the global -
>only-migratable flag.
Will remove the two places of "Migration disabled" print.

>
>As I understood Avihai's patch, we're populating the Error pointer, but we
>only ever propagate that error in the above cases.  Thanks,
>
>Alex
>
...
>> >>> +818,11 @@ static int vfio_block_migration(VFIODevice *vbasedev,
>> >>> Error *err, Error **errp)
>> >>>       if (ret < 0) {
>> >>>           error_free(vbasedev->migration_blocker);
>> >>>           vbasedev->migration_blocker = NULL;
>> >>> +    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
>> >>> +        warn_report("%s: Migration disabled", vbasedev->name);
>> >>>       }
>> >>>
>> >> Perhaps you can use the the local error to expand on why migration
>> >> was disabled e.g.
>> >>
>> >>          warn_report_err(err);
>> >
>> > Same here.
>> >
>> > Thanks.
>> >
>> >>
>> >>> -    return ret;
>> >>> +    return !ret;
>> >>>   }
>> >>>
>> >>>   /*
>> >>> ------------------------------------------------------------------
>> >>> ---- */ @@ -835,7 +837,12 @@ void
>> >>> vfio_reset_bytes_transferred(void)
>> >>>       bytes_transferred = 0;
>> >>>   }
>> >>>
>> >>> -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>> >>> +/*
>> >>> + * Return true when either migration initialized or blocker registered.
>> >>> + * Currently only return false when adding blocker fails which
>> >>> +will
>> >>> + * de-register vfio device.
>> >>> + */
>> >>> +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
>> >>>   {
>> >>>       Error *err = NULL;
>> >>>       int ret;
>> >>> @@ -873,18 +880,17 @@ int vfio_migration_realize(VFIODevice
>> >>> *vbasedev, Error
>> >>> **errp)
>> >>>                       vbasedev->name);
>> >>>       }
>> >>>
>> >>> -    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
>> >>> -    if (ret) {
>> >>> -        return ret;
>> >>> +    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
>> >>> +        return false;
>> >>>       }
>> >>>
>> >>> -    ret = vfio_block_giommu_migration(vbasedev, errp);
>> >>> -    if (ret) {
>> >>> -        return ret;
>> >>> +    if (vfio_viommu_preset(vbasedev)) {
>> >> The /* Block migration with a vIOMMU */
>> >>
>> >> Would go above, but I don't think we need it anymore ...
Will remove it.

>> >>
>> >>> +        error_setg(&err, "%s: Migration is currently not supported "
>> >>> +                   "with vIOMMU enabled", vbasedev->name);
>> >>> +        return vfio_block_migration(vbasedev, err, errp);
>> >> ... as the error message when placed here makes it obvious. So the
>> >> comment I suggested won't add much. Unless others disagree.
>> >>
>> >>>       }
>> >>>
>> >>> -    trace_vfio_migration_realize(vbasedev->name);
>> >>> -    return 0;
>> >>> +    return true;
>> >>>   }
>> >>>
>> >> I think somewhere in function we should have vfio_migration_exit()
>> >> being called behind a label or elsewhere from
>> >> vfio_migration_realize (...)
>> >>
>> >>>   void vfio_migration_exit(VFIODevice *vbasedev) diff --git
>> >>> a/hw/vfio/pci.c b/hw/vfio/pci.c index dc69d3031b24..184d08568154
>> >>> 100644
>> >>> --- a/hw/vfio/pci.c
>> >>> +++ b/hw/vfio/pci.c
>> >>> @@ -3209,7 +3209,8 @@ static void vfio_realize(PCIDevice *pdev,
>> >>> Error **errp)
>> >>>       if (!pdev->failover_pair_id) {
>> >>>           ret = vfio_migration_realize(vbasedev, errp);
>> >>>           if (ret) {
>> >>> -            error_report("%s: Migration disabled",
>> >>> vbasedev->name);
>> >>> +            trace_vfio_migration_realize(vbasedev->name);
>> >>> +        } else {
>> >>>               goto out_vfio_migration;
>> >>>           }
>> >>>       }
>> >> (...) Which then void the need for this change. Perhaps your
>> >> previous patch
>> >> (4/5) could come after this refactor patch instead ... where you
>> >> would fix the unwinding error path inside the
>> >> vfio_migration_realize() as opposed to vfio_realize().
Sure, will fix.

Thanks
Zhenzhong
Duan, Zhenzhong June 30, 2023, 1:40 a.m. UTC | #7
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Friday, June 30, 2023 12:40 AM
>Subject: Re: [PATCH v4 5/5] vfio/migration: Refactor and fix print of "Migration
>disabled"
>
>Hello Zhenzhong,
>
>On 6/29/23 10:40, Zhenzhong Duan wrote:
>> This patch refactors vfio_migration_realize() and its dependend code
>> as follows:
>>
>> 1. It's redundant in vfio_migration_realize() to registers multiple blockers,
>>     e.g: vIOMMU blocker can be refactored as per device blocker.
>> 2. Change vfio_viommu_preset() to be only a per device checker.
>> 3. Remove global vIOMMU blocker related stuff, e.g:
>>     giommu_migration_blocker, vfio_[block|unblock]_giommu_migration()
>>     and vfio_migration_finalize()
>> 4. Change vfio_migration_realize(), vfio_block_multiple_devices_migration()
>>     vfio_block_migration() and vfio_viommu_preset() to return bool type.
>> 5. Print "Migration disabled" depending on enable_migration property
>>     and print it as warning instead of error which is overkill.
>
>
>We are close to soft freeze and these combo patches adding various fixes all
>at once are difficult to evaluate.
>
>Please split this patch in multiple ones to ease the review.  May be start with
>the  int -> bool conversion of the return values. It should remove some noise.
Good suggestion! Will do.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 77e2ee0e5c6e..c80ecb1da53f 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -362,7 +362,6 @@  bool vfio_mig_active(void)
 }
 
 static Error *multiple_devices_migration_blocker;
-static Error *giommu_migration_blocker;
 
 static unsigned int vfio_migratable_device_num(void)
 {
@@ -381,19 +380,19 @@  static unsigned int vfio_migratable_device_num(void)
     return device_num;
 }
 
-int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
+bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
 {
     int ret;
 
     if (multiple_devices_migration_blocker ||
         vfio_migratable_device_num() <= 1) {
-        return 0;
+        return true;
     }
 
     if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
         error_setg(errp, "Migration is currently not supported with multiple "
                          "VFIO devices");
-        return -EINVAL;
+        return false;
     }
 
     error_setg(&multiple_devices_migration_blocker,
@@ -403,9 +402,15 @@  int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
     if (ret < 0) {
         error_free(multiple_devices_migration_blocker);
         multiple_devices_migration_blocker = NULL;
+    } else {
+        /*
+         * Only ON_OFF_AUTO_AUTO case, ON_OFF_AUTO_OFF is checked
+         * in vfio_migration_realize().
+         */
+        warn_report("Migration disabled, not support multiple VFIO devices");
     }
 
-    return ret;
+    return !ret;
 }
 
 void vfio_unblock_multiple_devices_migration(void)
@@ -420,55 +425,10 @@  void vfio_unblock_multiple_devices_migration(void)
     multiple_devices_migration_blocker = NULL;
 }
 
-static bool vfio_viommu_preset(void)
+/* Block migration with a vIOMMU */
+bool vfio_viommu_preset(VFIODevice *vbasedev)
 {
-    VFIOAddressSpace *space;
-
-    QLIST_FOREACH(space, &vfio_address_spaces, list) {
-        if (space->as != &address_space_memory) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp)
-{
-    int ret;
-
-    if (giommu_migration_blocker ||
-        !vfio_viommu_preset()) {
-        return 0;
-    }
-
-    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
-        error_setg(errp,
-                   "Migration is currently not supported with vIOMMU enabled");
-        return -EINVAL;
-    }
-
-    error_setg(&giommu_migration_blocker,
-               "Migration is currently not supported with vIOMMU enabled");
-    ret = migrate_add_blocker(giommu_migration_blocker, errp);
-    if (ret < 0) {
-        error_free(giommu_migration_blocker);
-        giommu_migration_blocker = NULL;
-    }
-
-    return ret;
-}
-
-void vfio_migration_finalize(void)
-{
-    if (!giommu_migration_blocker ||
-        vfio_viommu_preset()) {
-        return;
-    }
-
-    migrate_del_blocker(giommu_migration_blocker);
-    error_free(giommu_migration_blocker);
-    giommu_migration_blocker = NULL;
+    return vbasedev->group->container->space->as != &address_space_memory;
 }
 
 static void vfio_set_migration_error(int err)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 1db7d52ab2c1..84036e5cfc01 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -802,13 +802,13 @@  static int vfio_migration_init(VFIODevice *vbasedev)
     return 0;
 }
 
-static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
+static bool vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
 {
     int ret;
 
     if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
         error_propagate(errp, err);
-        return -EINVAL;
+        return false;
     }
 
     vbasedev->migration_blocker = error_copy(err);
@@ -818,9 +818,11 @@  static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
     if (ret < 0) {
         error_free(vbasedev->migration_blocker);
         vbasedev->migration_blocker = NULL;
+    } else if (vbasedev->enable_migration != ON_OFF_AUTO_OFF) {
+        warn_report("%s: Migration disabled", vbasedev->name);
     }
 
-    return ret;
+    return !ret;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -835,7 +837,12 @@  void vfio_reset_bytes_transferred(void)
     bytes_transferred = 0;
 }
 
-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
+/*
+ * Return true when either migration initialized or blocker registered.
+ * Currently only return false when adding blocker fails which will
+ * de-register vfio device.
+ */
+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
 {
     Error *err = NULL;
     int ret;
@@ -873,18 +880,17 @@  int vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
                     vbasedev->name);
     }
 
-    ret = vfio_block_multiple_devices_migration(vbasedev, errp);
-    if (ret) {
-        return ret;
+    if (!vfio_block_multiple_devices_migration(vbasedev, errp)) {
+        return false;
     }
 
-    ret = vfio_block_giommu_migration(vbasedev, errp);
-    if (ret) {
-        return ret;
+    if (vfio_viommu_preset(vbasedev)) {
+        error_setg(&err, "%s: Migration is currently not supported "
+                   "with vIOMMU enabled", vbasedev->name);
+        return vfio_block_migration(vbasedev, err, errp);
     }
 
-    trace_vfio_migration_realize(vbasedev->name);
-    return 0;
+    return true;
 }
 
 void vfio_migration_exit(VFIODevice *vbasedev)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index dc69d3031b24..184d08568154 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3209,7 +3209,8 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (!pdev->failover_pair_id) {
         ret = vfio_migration_realize(vbasedev, errp);
         if (ret) {
-            error_report("%s: Migration disabled", vbasedev->name);
+            trace_vfio_migration_realize(vbasedev->name);
+        } else {
             goto out_vfio_migration;
         }
     }
@@ -3257,7 +3258,6 @@  static void vfio_instance_finalize(Object *obj)
      */
     vfio_put_device(vdev);
     vfio_put_group(group);
-    vfio_migration_finalize();
 }
 
 static void vfio_exitfn(PCIDevice *pdev)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 93429b9abba0..3c18572322fc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -225,9 +225,9 @@  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
 extern VFIOGroupList vfio_group_list;
 
 bool vfio_mig_active(void);
-int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
+bool vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
 void vfio_unblock_multiple_devices_migration(void);
-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp);
+bool vfio_viommu_preset(VFIODevice *vbasedev);
 int64_t vfio_mig_bytes_transferred(void);
 void vfio_reset_bytes_transferred(void);
 
@@ -252,8 +252,7 @@  int vfio_spapr_create_window(VFIOContainer *container,
 int vfio_spapr_remove_window(VFIOContainer *container,
                              hwaddr offset_within_address_space);
 
-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
 void vfio_migration_exit(VFIODevice *vbasedev);
-void vfio_migration_finalize(void);
 
 #endif /* HW_VFIO_VFIO_COMMON_H */