diff mbox series

[v6,11/21] vfio/pci: Make vfio cdev pre-openable by passing a file handle

Message ID 20231114100955.1961974-12-zhenzhong.duan@intel.com
State New
Headers show
Series vfio: Adopt iommufd | expand

Commit Message

Duan, Zhenzhong Nov. 14, 2023, 10:09 a.m. UTC
This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.

Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.

Add helper functions vfio_device_set_fd() and vfio_device_get_name()
to set fd and get device name, they will also be used by other vfio
devices.

There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.

There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v6: simplify CONFIG_IOMMUFD checking code
    introduce a helper vfio_device_set_fd

 include/hw/vfio/vfio-common.h |  3 +++
 hw/vfio/helpers.c             | 44 +++++++++++++++++++++++++++++++++++
 hw/vfio/iommufd.c             | 12 ++++++----
 hw/vfio/pci.c                 | 28 ++++++++++++----------
 4 files changed, 71 insertions(+), 16 deletions(-)

Comments

Cédric Le Goater Nov. 14, 2023, 2:08 p.m. UTC | #1
On 11/14/23 11:09, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
> 
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
> 
> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
> to set fd and get device name, they will also be used by other vfio
> devices.
> 
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
> 
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v6: simplify CONFIG_IOMMUFD checking code
>      introduce a helper vfio_device_set_fd
> 
>   include/hw/vfio/vfio-common.h |  3 +++
>   hw/vfio/helpers.c             | 44 +++++++++++++++++++++++++++++++++++
>   hw/vfio/iommufd.c             | 12 ++++++----
>   hw/vfio/pci.c                 | 28 ++++++++++++----------
>   4 files changed, 71 insertions(+), 16 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..567e5f7bea 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>                                       hwaddr size);
>   int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>                                    uint64_t size, ram_addr_t ram_addr);
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>   #endif /* HW_VFIO_VFIO_COMMON_H */
> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
> index 168847e7c5..986ef1095a 100644
> --- a/hw/vfio/helpers.c
> +++ b/hw/vfio/helpers.c
> @@ -20,6 +20,7 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */

Unused now. I removed it.


Reviewed-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.

>   #include <sys/ioctl.h>
>   
>   #include "hw/vfio/vfio-common.h"
> @@ -27,6 +28,7 @@
>   #include "trace.h"
>   #include "qapi/error.h"
>   #include "qemu/error-report.h"
> +#include "monitor/monitor.h"
>   
>   /*
>    * Common VFIO interrupt disable
> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>   
>       return ret;
>   }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> +    struct stat st;
> +
> +    if (vbasedev->fd < 0) {
> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
> +            error_setg_errno(errp, errno, "no such host device");
> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +            return -errno;
> +        }
> +        /* User may specify a name, e.g: VFIO platform device */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> +        }
> +    } else {
> +        if (!vbasedev->iommufd) {
> +            error_setg(errp, "Use FD passing only with iommufd backend");
> +            return -EINVAL;
> +        }
> +        /*
> +         * Give a name with fd so any function printing out vbasedev->name
> +         * will not break.
> +         */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
> +{
> +    int fd = monitor_fd_param(monitor_cur(), str, errp);
> +
> +    if (fd < 0) {
> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
> +        return;
> +    }
> +    vbasedev->fd = fd;
> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 3eec428162..e08a217057 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>       uint32_t ioas_id;
>       Error *err = NULL;
>   
> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> -    if (devfd < 0) {
> -        return devfd;
> +    if (vbasedev->fd < 0) {
> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> +        if (devfd < 0) {
> +            return devfd;
> +        }
> +        vbasedev->fd = devfd;
> +    } else {
> +        devfd = vbasedev->fd;
>       }
> -    vbasedev->fd = devfd;
>   
>       ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>       if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5984b0598..b23b492cce 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       VFIODevice *vbasedev = &vdev->vbasedev;
>       char *tmp, *subsys;
>       Error *err = NULL;
> -    struct stat st;
>       int i, ret;
>       bool is_mdev;
>       char uuid[UUID_STR_LEN];
>       char *name;
>   
> -    if (!vbasedev->sysfsdev) {
> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>                 ~vdev->host.slot || ~vdev->host.function)) {
>               error_setg(errp, "No provided host device");
>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> +#ifdef CONFIG_IOMMUFD
> +                              "or -device vfio-pci,fd=DEVICE_FD "
> +#endif
>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>               return;
>           }
> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>                               vdev->host.slot, vdev->host.function);
>       }
>   
> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
> -        error_setg_errno(errp, errno, "no such host device");
> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +    if (vfio_device_get_name(vbasedev, errp)) {
>           return;
>       }
> -
> -    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>       vbasedev->ops = &vfio_pci_ops;
>       vbasedev->type = VFIO_DEVICE_TYPE_PCI;
>       vbasedev->dev = DEVICE(vdev);
> @@ -3330,6 +3328,7 @@ static void vfio_instance_init(Object *obj)
>       vdev->host.bus = ~0U;
>       vdev->host.slot = ~0U;
>       vdev->host.function = ~0U;
> +    vdev->vbasedev.fd = -1;
>   
>       vdev->nv_gpudirect_clique = 0xFF;
>   
> @@ -3383,11 +3382,6 @@ static Property vfio_pci_dev_properties[] = {
>                                      qdev_prop_nv_gpudirect_clique, uint8_t),
>       DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
>                                   OFF_AUTOPCIBAR_OFF),
> -    /*
> -     * TODO - support passed fds... is this necessary?
> -     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
> -     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
> -     */
>   #ifdef CONFIG_IOMMUFD
>       DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
>                        TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
> @@ -3395,6 +3389,13 @@ static Property vfio_pci_dev_properties[] = {
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> +#ifdef CONFIG_IOMMUFD
> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
> +{
> +    vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
> +}
> +#endif
> +
>   static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>   {
>       DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -3402,6 +3403,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>   
>       dc->reset = vfio_pci_reset;
>       device_class_set_props(dc, vfio_pci_dev_properties);
> +#ifdef CONFIG_IOMMUFD
> +    object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
> +#endif
>       dc->desc = "VFIO-based PCI device assignment";
>       set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>       pdc->realize = vfio_realize;
Philippe Mathieu-Daudé Nov. 15, 2023, 12:09 p.m. UTC | #2
Hi Zhenzhong,

On 14/11/23 11:09, Zhenzhong Duan wrote:
> This gives management tools like libvirt a chance to open the vfio
> cdev with privilege and pass FD to qemu. This way qemu never needs
> to have privilege to open a VFIO or iommu cdev node.
> 
> Together with the earlier support of pre-opening /dev/iommu device,
> now we have full support of passing a vfio device to unprivileged
> qemu by management tool. This mode is no more considered for the
> legacy backend. So let's remove the "TODO" comment.
> 
> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
> to set fd and get device name, they will also be used by other vfio
> devices.
> 
> There is no easy way to check if a device is mdev with FD passing,
> so fail the x-balloon-allowed check unconditionally in this case.
> 
> There is also no easy way to get BDF as name with FD passing, so
> we fake a name by VFIO_FD[fd].
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> v6: simplify CONFIG_IOMMUFD checking code
>      introduce a helper vfio_device_set_fd
> 
>   include/hw/vfio/vfio-common.h |  3 +++
>   hw/vfio/helpers.c             | 44 +++++++++++++++++++++++++++++++++++
>   hw/vfio/iommufd.c             | 12 ++++++----
>   hw/vfio/pci.c                 | 28 ++++++++++++----------
>   4 files changed, 71 insertions(+), 16 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3dac5c167e..567e5f7bea 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>                                       hwaddr size);
>   int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>                                    uint64_t size, ram_addr_t ram_addr);
> +

Please add bare documentation:

   /* Returns 0 on success, or a negative errno. */

> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);

Functions taking an Error** param should return a boolean, so:

   /* Return: true on success, else false setting @errp with error. */

> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>   #endif /* HW_VFIO_VFIO_COMMON_H */


> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>   
>       return ret;
>   }
> +
> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
> +{
> +    struct stat st;
> +
> +    if (vbasedev->fd < 0) {
> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
> +            error_setg_errno(errp, errno, "no such host device");
> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +            return -errno;
> +        }
> +        /* User may specify a name, e.g: VFIO platform device */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
> +        }
> +    } else {
> +        if (!vbasedev->iommufd) {
> +            error_setg(errp, "Use FD passing only with iommufd backend");
> +            return -EINVAL;
> +        }
> +        /*
> +         * Give a name with fd so any function printing out vbasedev->name
> +         * will not break.
> +         */
> +        if (!vbasedev->name) {
> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)

    bool vfio_device_set_fd(..., Error **errp)

> +{
> +    int fd = monitor_fd_param(monitor_cur(), str, errp);
> +
> +    if (fd < 0) {
> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
> +        return;

            return false;

> +    }
> +    vbasedev->fd = fd;

        return true;

> +}
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 3eec428162..e08a217057 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>       uint32_t ioas_id;
>       Error *err = NULL;
>   
> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> -    if (devfd < 0) {
> -        return devfd;
> +    if (vbasedev->fd < 0) {
> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
> +        if (devfd < 0) {
> +            return devfd;
> +        }
> +        vbasedev->fd = devfd;
> +    } else {
> +        devfd = vbasedev->fd;
>       }
> -    vbasedev->fd = devfd;
>   
>       ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>       if (ret) {
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c5984b0598..b23b492cce 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>       VFIODevice *vbasedev = &vdev->vbasedev;
>       char *tmp, *subsys;
>       Error *err = NULL;
> -    struct stat st;
>       int i, ret;
>       bool is_mdev;
>       char uuid[UUID_STR_LEN];
>       char *name;
>   
> -    if (!vbasedev->sysfsdev) {
> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>                 ~vdev->host.slot || ~vdev->host.function)) {
>               error_setg(errp, "No provided host device");
>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
> +#ifdef CONFIG_IOMMUFD
> +                              "or -device vfio-pci,fd=DEVICE_FD "
> +#endif
>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>               return;
>           }
> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>                               vdev->host.slot, vdev->host.function);
>       }
>   
> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
> -        error_setg_errno(errp, errno, "no such host device");
> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
> +    if (vfio_device_get_name(vbasedev, errp)) {

Clearer as:

        if (vfio_device_get_name(vbasedev, errp) < 0) {

>           return;
>       }

Regards,

Phil.
Cédric Le Goater Nov. 15, 2023, 1:05 p.m. UTC | #3
On 11/15/23 13:09, Philippe Mathieu-Daudé wrote:
> Hi Zhenzhong,
> 
> On 14/11/23 11:09, Zhenzhong Duan wrote:
>> This gives management tools like libvirt a chance to open the vfio
>> cdev with privilege and pass FD to qemu. This way qemu never needs
>> to have privilege to open a VFIO or iommu cdev node.
>>
>> Together with the earlier support of pre-opening /dev/iommu device,
>> now we have full support of passing a vfio device to unprivileged
>> qemu by management tool. This mode is no more considered for the
>> legacy backend. So let's remove the "TODO" comment.
>>
>> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
>> to set fd and get device name, they will also be used by other vfio
>> devices.
>>
>> There is no easy way to check if a device is mdev with FD passing,
>> so fail the x-balloon-allowed check unconditionally in this case.
>>
>> There is also no easy way to get BDF as name with FD passing, so
>> we fake a name by VFIO_FD[fd].
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> v6: simplify CONFIG_IOMMUFD checking code
>>      introduce a helper vfio_device_set_fd
>>
>>   include/hw/vfio/vfio-common.h |  3 +++
>>   hw/vfio/helpers.c             | 44 +++++++++++++++++++++++++++++++++++
>>   hw/vfio/iommufd.c             | 12 ++++++----
>>   hw/vfio/pci.c                 | 28 ++++++++++++----------
>>   4 files changed, 71 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 3dac5c167e..567e5f7bea 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -251,4 +251,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>>                                       hwaddr size);
>>   int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>>                                    uint64_t size, ram_addr_t ram_addr);
>> +
> 
> Please add bare documentation:
> 
>    /* Returns 0 on success, or a negative errno. */
> 
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> 
> Functions taking an Error** param should return a boolean, so:
> 
>    /* Return: true on success, else false setting @errp with error. */
> 
>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>>   #endif /* HW_VFIO_VFIO_COMMON_H */
> 
> 
>> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
>>       return ret;
>>   }
>> +
>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>> +{
>> +    struct stat st;
>> +
>> +    if (vbasedev->fd < 0) {
>> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
>> +            error_setg_errno(errp, errno, "no such host device");
>> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> +            return -errno;
>> +        }
>> +        /* User may specify a name, e.g: VFIO platform device */
>> +        if (!vbasedev->name) {
>> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>> +        }
>> +    } else {
>> +        if (!vbasedev->iommufd) {
>> +            error_setg(errp, "Use FD passing only with iommufd backend");
>> +            return -EINVAL;
>> +        }
>> +        /*
>> +         * Give a name with fd so any function printing out vbasedev->name
>> +         * will not break.
>> +         */
>> +        if (!vbasedev->name) {
>> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
> 
>     bool vfio_device_set_fd(..., Error **errp)
> 
>> +{
>> +    int fd = monitor_fd_param(monitor_cur(), str, errp);
>> +
>> +    if (fd < 0) {
>> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
>> +        return;
> 
>             return false;
> 
>> +    }
>> +    vbasedev->fd = fd;
> 
>         return true;

If we had a QOM base device object, vfio_device_set_fd() would be passed
directly to object_class_property_add_str() which expects a :

   void (*set)(Object *, const char *, Error **)

I think it is fine to keep as it is. We might have a QOM base device object
one day ! Minor anyway.

Thanks,

C.


> 
>> +}
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 3eec428162..e08a217057 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>>       uint32_t ioas_id;
>>       Error *err = NULL;
>> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> -    if (devfd < 0) {
>> -        return devfd;
>> +    if (vbasedev->fd < 0) {
>> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>> +        if (devfd < 0) {
>> +            return devfd;
>> +        }
>> +        vbasedev->fd = devfd;
>> +    } else {
>> +        devfd = vbasedev->fd;
>>       }
>> -    vbasedev->fd = devfd;
>>       ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>>       if (ret) {
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index c5984b0598..b23b492cce 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>       VFIODevice *vbasedev = &vdev->vbasedev;
>>       char *tmp, *subsys;
>>       Error *err = NULL;
>> -    struct stat st;
>>       int i, ret;
>>       bool is_mdev;
>>       char uuid[UUID_STR_LEN];
>>       char *name;
>> -    if (!vbasedev->sysfsdev) {
>> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>>                 ~vdev->host.slot || ~vdev->host.function)) {
>>               error_setg(errp, "No provided host device");
>>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>> +#ifdef CONFIG_IOMMUFD
>> +                              "or -device vfio-pci,fd=DEVICE_FD "
>> +#endif
>>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>               return;
>>           }
>> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>                               vdev->host.slot, vdev->host.function);
>>       }
>> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
>> -        error_setg_errno(errp, errno, "no such host device");
>> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>> +    if (vfio_device_get_name(vbasedev, errp)) {
> 
> Clearer as:
> 
>         if (vfio_device_get_name(vbasedev, errp) < 0) {
> 
>>           return;
>>       }
> 
> Regards,
> 
> Phil.
>
Duan, Zhenzhong Nov. 16, 2023, 2:15 a.m. UTC | #4
Hi Philippe,

>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Wednesday, November 15, 2023 9:05 PM
>Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>On 11/15/23 13:09, Philippe Mathieu-Daudé wrote:
>> Hi Zhenzhong,
>>
>> On 14/11/23 11:09, Zhenzhong Duan wrote:
>>> This gives management tools like libvirt a chance to open the vfio
>>> cdev with privilege and pass FD to qemu. This way qemu never needs
>>> to have privilege to open a VFIO or iommu cdev node.
>>>
>>> Together with the earlier support of pre-opening /dev/iommu device,
>>> now we have full support of passing a vfio device to unprivileged
>>> qemu by management tool. This mode is no more considered for the
>>> legacy backend. So let's remove the "TODO" comment.
>>>
>>> Add helper functions vfio_device_set_fd() and vfio_device_get_name()
>>> to set fd and get device name, they will also be used by other vfio
>>> devices.
>>>
>>> There is no easy way to check if a device is mdev with FD passing,
>>> so fail the x-balloon-allowed check unconditionally in this case.
>>>
>>> There is also no easy way to get BDF as name with FD passing, so
>>> we fake a name by VFIO_FD[fd].
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> v6: simplify CONFIG_IOMMUFD checking code
>>>      introduce a helper vfio_device_set_fd
>>>
>>>   include/hw/vfio/vfio-common.h |  3 +++
>>>   hw/vfio/helpers.c             | 44 +++++++++++++++++++++++++++++++++++
>>>   hw/vfio/iommufd.c             | 12 ++++++----
>>>   hw/vfio/pci.c                 | 28 ++++++++++++----------
>>>   4 files changed, 71 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index 3dac5c167e..567e5f7bea 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -251,4 +251,7 @@ int
>vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
>>>                                       hwaddr size);
>>>   int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
>>>                                    uint64_t size, ram_addr_t ram_addr);
>>> +
>>
>> Please add bare documentation:
>>
>>    /* Returns 0 on success, or a negative errno. */
>>
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);

Will do, I'd like to wait a few days to collect more suggested changes and RB,
Then send all these updates to Cédric in once before he pushes this series to vfio-next.

>>
>> Functions taking an Error** param should return a boolean, so:
>>
>>    /* Return: true on success, else false setting @errp with error. */
>>
>>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
>>>   #endif /* HW_VFIO_VFIO_COMMON_H */
>>
>>
>>> @@ -609,3 +611,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int
>region, uint16_t cap_type)
>>>       return ret;
>>>   }
>>> +
>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
>>> +{
>>> +    struct stat st;
>>> +
>>> +    if (vbasedev->fd < 0) {
>>> +        if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> +            error_setg_errno(errp, errno, "no such host device");
>>> +            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> +            return -errno;
>>> +        }
>>> +        /* User may specify a name, e.g: VFIO platform device */
>>> +        if (!vbasedev->name) {
>>> +            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
>>> +        }
>>> +    } else {
>>> +        if (!vbasedev->iommufd) {
>>> +            error_setg(errp, "Use FD passing only with iommufd backend");
>>> +            return -EINVAL;
>>> +        }
>>> +        /*
>>> +         * Give a name with fd so any function printing out vbasedev->name
>>> +         * will not break.
>>> +         */
>>> +        if (!vbasedev->name) {
>>> +            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
>>
>>     bool vfio_device_set_fd(..., Error **errp)
>>
>>> +{
>>> +    int fd = monitor_fd_param(monitor_cur(), str, errp);
>>> +
>>> +    if (fd < 0) {
>>> +        error_prepend(errp, "Could not parse remote object fd %s:", str);
>>> +        return;
>>
>>             return false;
>>
>>> +    }
>>> +    vbasedev->fd = fd;
>>
>>         return true;
>
>If we had a QOM base device object, vfio_device_set_fd() would be passed
>directly to object_class_property_add_str() which expects a :
>
>   void (*set)(Object *, const char *, Error **)
>
>I think it is fine to keep as it is. We might have a QOM base device object
>one day ! Minor anyway.
>
>Thanks,
>
>C.
>
>
>>
>>> +}
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 3eec428162..e08a217057 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -326,11 +326,15 @@ static int iommufd_cdev_attach(const char *name,
>VFIODevice *vbasedev,
>>>       uint32_t ioas_id;
>>>       Error *err = NULL;
>>> -    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> -    if (devfd < 0) {
>>> -        return devfd;
>>> +    if (vbasedev->fd < 0) {
>>> +        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
>>> +        if (devfd < 0) {
>>> +            return devfd;
>>> +        }
>>> +        vbasedev->fd = devfd;
>>> +    } else {
>>> +        devfd = vbasedev->fd;
>>>       }
>>> -    vbasedev->fd = devfd;
>>>       ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
>>>       if (ret) {
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index c5984b0598..b23b492cce 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -2944,17 +2944,19 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>>       VFIODevice *vbasedev = &vdev->vbasedev;
>>>       char *tmp, *subsys;
>>>       Error *err = NULL;
>>> -    struct stat st;
>>>       int i, ret;
>>>       bool is_mdev;
>>>       char uuid[UUID_STR_LEN];
>>>       char *name;
>>> -    if (!vbasedev->sysfsdev) {
>>> +    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
>>>           if (!(~vdev->host.domain || ~vdev->host.bus ||
>>>                 ~vdev->host.slot || ~vdev->host.function)) {
>>>               error_setg(errp, "No provided host device");
>>>               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
>>> +#ifdef CONFIG_IOMMUFD
>>> +                              "or -device vfio-pci,fd=DEVICE_FD "
>>> +#endif
>>>                                 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
>>>               return;
>>>           }
>>> @@ -2964,13 +2966,9 @@ static void vfio_realize(PCIDevice *pdev, Error
>**errp)
>>>                               vdev->host.slot, vdev->host.function);
>>>       }
>>> -    if (stat(vbasedev->sysfsdev, &st) < 0) {
>>> -        error_setg_errno(errp, errno, "no such host device");
>>> -        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
>>> +    if (vfio_device_get_name(vbasedev, errp)) {
>>
>> Clearer as:
>>
>>         if (vfio_device_get_name(vbasedev, errp) < 0) {
>>
>>>           return;
>>>       }

Will do.

Thanks
Zhenzhong
Cédric Le Goater Nov. 16, 2023, 7:25 a.m. UTC | #5
>>> Please add bare documentation:
>>>
>>>     /* Returns 0 on success, or a negative errno. */
>>>
>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
> 
> Will do, I'd like to wait a few days to collect more suggested changes and RB,
> Then send all these updates to Cédric in once before he pushes this series to vfio-next.

Yep. Could you respin a v7 with all the comments on v6 ? I will
then apply directly on vfio-next.

Please wait for Eric to finish looking at the platform part.

Thanks,

C.
Duan, Zhenzhong Nov. 16, 2023, 7:43 a.m. UTC | #6
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Thursday, November 16, 2023 3:25 PM
>Subject: Re: [PATCH v6 11/21] vfio/pci: Make vfio cdev pre-openable by passing a
>file handle
>
>>>> Please add bare documentation:
>>>>
>>>>     /* Returns 0 on success, or a negative errno. */
>>>>
>>>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
>>
>> Will do, I'd like to wait a few days to collect more suggested changes and RB,
>> Then send all these updates to Cédric in once before he pushes this series to
>vfio-next.
>
>Yep. Could you respin a v7 with all the comments on v6 ? I will
>then apply directly on vfio-next.

Sure.

>
>Please wait for Eric to finish looking at the platform part.

Sure.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..567e5f7bea 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -251,4 +251,7 @@  int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
                                     hwaddr size);
 int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
                                  uint64_t size, ram_addr_t ram_addr);
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
 #endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..986ef1095a 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
 #include <sys/ioctl.h>
 
 #include "hw/vfio/vfio-common.h"
@@ -27,6 +28,7 @@ 
 #include "trace.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "monitor/monitor.h"
 
 /*
  * Common VFIO interrupt disable
@@ -609,3 +611,45 @@  bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
 
     return ret;
 }
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+    struct stat st;
+
+    if (vbasedev->fd < 0) {
+        if (stat(vbasedev->sysfsdev, &st) < 0) {
+            error_setg_errno(errp, errno, "no such host device");
+            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+            return -errno;
+        }
+        /* User may specify a name, e.g: VFIO platform device */
+        if (!vbasedev->name) {
+            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+        }
+    } else {
+        if (!vbasedev->iommufd) {
+            error_setg(errp, "Use FD passing only with iommufd backend");
+            return -EINVAL;
+        }
+        /*
+         * Give a name with fd so any function printing out vbasedev->name
+         * will not break.
+         */
+        if (!vbasedev->name) {
+            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+        }
+    }
+
+    return 0;
+}
+
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+    int fd = monitor_fd_param(monitor_cur(), str, errp);
+
+    if (fd < 0) {
+        error_prepend(errp, "Could not parse remote object fd %s:", str);
+        return;
+    }
+    vbasedev->fd = fd;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 3eec428162..e08a217057 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@  static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
     uint32_t ioas_id;
     Error *err = NULL;
 
-    devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
-    if (devfd < 0) {
-        return devfd;
+    if (vbasedev->fd < 0) {
+        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+        if (devfd < 0) {
+            return devfd;
+        }
+        vbasedev->fd = devfd;
+    } else {
+        devfd = vbasedev->fd;
     }
-    vbasedev->fd = devfd;
 
     ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
     if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c5984b0598..b23b492cce 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2944,17 +2944,19 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     VFIODevice *vbasedev = &vdev->vbasedev;
     char *tmp, *subsys;
     Error *err = NULL;
-    struct stat st;
     int i, ret;
     bool is_mdev;
     char uuid[UUID_STR_LEN];
     char *name;
 
-    if (!vbasedev->sysfsdev) {
+    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
         if (!(~vdev->host.domain || ~vdev->host.bus ||
               ~vdev->host.slot || ~vdev->host.function)) {
             error_setg(errp, "No provided host device");
             error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+#ifdef CONFIG_IOMMUFD
+                              "or -device vfio-pci,fd=DEVICE_FD "
+#endif
                               "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
             return;
         }
@@ -2964,13 +2966,9 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
                             vdev->host.slot, vdev->host.function);
     }
 
-    if (stat(vbasedev->sysfsdev, &st) < 0) {
-        error_setg_errno(errp, errno, "no such host device");
-        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+    if (vfio_device_get_name(vbasedev, errp)) {
         return;
     }
-
-    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
     vbasedev->ops = &vfio_pci_ops;
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);
@@ -3330,6 +3328,7 @@  static void vfio_instance_init(Object *obj)
     vdev->host.bus = ~0U;
     vdev->host.slot = ~0U;
     vdev->host.function = ~0U;
+    vdev->vbasedev.fd = -1;
 
     vdev->nv_gpudirect_clique = 0xFF;
 
@@ -3383,11 +3382,6 @@  static Property vfio_pci_dev_properties[] = {
                                    qdev_prop_nv_gpudirect_clique, uint8_t),
     DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
                                 OFF_AUTOPCIBAR_OFF),
-    /*
-     * TODO - support passed fds... is this necessary?
-     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
-     */
 #ifdef CONFIG_IOMMUFD
     DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
                      TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3395,6 +3389,13 @@  static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+    vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
+}
+#endif
+
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3402,6 +3403,9 @@  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
 
     dc->reset = vfio_pci_reset;
     device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+    object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
     dc->desc = "VFIO-based PCI device assignment";
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     pdc->realize = vfio_realize;