diff mbox

[v14,08/13] vfio: add check host bus reset is support or not

Message ID d3b4848742510cb233f17e2ba94acde0550db3c9.1447748073.git.chen.fan.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Cao jin Nov. 17, 2015, 8:41 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when init vfio devices done, we should test all the devices supported
aer whether conflict with others. For each one, get the hot reset
info for the affected device list.  For each affected device, all
should attach to the VM and on/below the same bus. also, we should test
all of the non-AER supporting vfio-pci devices on or below the target
bus to verify they have a reset mechanism.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 hw/vfio/pci.h |   1 +
 2 files changed, 230 insertions(+), 7 deletions(-)

Comments

Alex Williamson Dec. 17, 2015, 8:32 p.m. UTC | #1
On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when init vfio devices done, we should test all the devices supported
> aer whether conflict with others. For each one, get the hot reset
> info for the affected device list.  For each affected device, all
> should attach to the VM and on/below the same bus. also, we should test
> all of the non-AER supporting vfio-pci devices on or below the target
> bus to verify they have a reset mechanism.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/vfio/pci.h |   1 +
>  2 files changed, 230 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d00b0e4..6926dcc 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> +                                     PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot);
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (vfio_pci_host_slot_match(host1, host2) &&
> +            host1->function == host2->function);
> +}
> +
> +struct VFIODeviceFind {
> +    PCIDevice *pdev;
> +    bool found;
> +};
> +
> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> +                                      void *opaque)
> +{
> +    DeviceState *dev = DEVICE(pdev);
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +    VFIOPCIDevice *vdev;
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        if (!dc->reset) {
> +            goto found;
> +        }
> +        return;
> +    }
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !vdev->vbasedev.reset_works) {
> +        goto found;
> +    }
> +
> +    return;
> +found:
> +    find->pdev = pdev;
> +    find->found = true;
> +}
> +
> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
> +{
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (pdev == find->pdev) {
> +        find->found = true;
> +    }
> +}
> +
> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    struct VFIODeviceFind find;
> +    int ret, i;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        goto out;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "depends on group %d which is not owned.",
> +                         vdev->vbasedev.name, devices[i].group_id);
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on/blow the bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> +                PCIDevice *pci = PCI_DEVICE(tmp);
> +
> +                /*
> +                 * For multifunction device, due to vfio driver signal all
> +                 * functions under the upstream link of the end point. here
> +                 * we validate all functions whether enable AER.
> +                 */
> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
> +                                 " the dependent device %s which does not enable AER.",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }

It took me a while to understand this code block, so I've updated the
comment to read as follows:

                /*
                 * AER errors may be broadcast to all functions of a multi-
                 * function endpoint.  If any of those sibling functions are
                 * also assigned, they need to have AER enabled or else an
                 * error may continue to cause a vm_stop condition.  IOW,
                 * AER setup of this function would be pointless.
                 */

Does that match your intention?

> +
> +                find.pdev = pci;
> +                find.found = false;
> +                pci_for_each_device(bus, pci_bus_num(bus),
> +                                    device_find, &find);
> +                if (!find.found) {
> +                    error_report("vfio: Cannot enable AER for device %s, "
> +                                 "the dependent device %s is not under the same bus",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */
> +        if (!found) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "the dependent device %04x:%02x:%02x.%x "
> +                         "is not assigned to VM.",
> +                         vdev->vbasedev.name, host.domain, host.bus,
> +                         host.slot, host.function);
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * Check the all pci devices on or below the target bus
> +     * have a reset mechanism at least.
> +     */
> +    find.pdev = NULL;
> +    find.found = false;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        vfio_check_device_noreset, &find);
> +    if (find.found) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "the affected device %s does not have a reset mechanism.",
> +                     vdev->vbasedev.name, find.pdev->name);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    ret = 0;
> +out:
> +    g_free(info);
> +    return ret;
> +}
> +
> +static int vfio_check_devices_host_bus_reset(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +                vfio_check_host_bus_reset(vdev)) {
> +                return -1;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    int ret;
> +
> +    ret = vfio_check_devices_host_bus_reset();
> +    if (ret) {
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +    /*
> +     * Register notifier when machine init is done, since we need
> +     * check the configration manner after all vfio device are inited.
> +     */
> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 48c1f69..59ae194 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
chenfan Dec. 18, 2015, 1:14 a.m. UTC | #2
On 12/18/2015 04:32 AM, Alex Williamson wrote:
> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> when init vfio devices done, we should test all the devices supported
>> aer whether conflict with others. For each one, get the hot reset
>> info for the affected device list.  For each affected device, all
>> should attach to the VM and on/below the same bus. also, we should test
>> all of the non-AER supporting vfio-pci devices on or below the target
>> bus to verify they have a reset mechanism.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>   hw/vfio/pci.h |   1 +
>>   2 files changed, 230 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index d00b0e4..6926dcc 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>>       return 0;
>>   }
>>   
>> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
>> +                                     PCIHostDeviceAddress *host2)
>> +{
>> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> +            host1->slot == host2->slot);
>> +}
>> +
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> +                                PCIHostDeviceAddress *host2)
>> +{
>> +    return (vfio_pci_host_slot_match(host1, host2) &&
>> +            host1->function == host2->function);
>> +}
>> +
>> +struct VFIODeviceFind {
>> +    PCIDevice *pdev;
>> +    bool found;
>> +};
>> +
>> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
>> +                                      void *opaque)
>> +{
>> +    DeviceState *dev = DEVICE(pdev);
>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>> +    VFIOPCIDevice *vdev;
>> +    struct VFIODeviceFind *find = opaque;
>> +
>> +    if (find->found) {
>> +        return;
>> +    }
>> +
>> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +        if (!dc->reset) {
>> +            goto found;
>> +        }
>> +        return;
>> +    }
>> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
>> +        !vdev->vbasedev.reset_works) {
>> +        goto found;
>> +    }
>> +
>> +    return;
>> +found:
>> +    find->pdev = pdev;
>> +    find->found = true;
>> +}
>> +
>> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
>> +{
>> +    struct VFIODeviceFind *find = opaque;
>> +
>> +    if (find->found) {
>> +        return;
>> +    }
>> +
>> +    if (pdev == find->pdev) {
>> +        find->found = true;
>> +    }
>> +}
>> +
>> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>> +{
>> +    PCIBus *bus = vdev->pdev.bus;
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +    struct VFIODeviceFind find;
>> +    int ret, i;
>> +
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret) {
>> +        error_report("vfio: Cannot enable AER for device %s,"
>> +                     " device does not support hot reset.",
>> +                     vdev->vbasedev.name);
>> +        goto out;
>> +    }
>> +
>> +    /* List all affected devices by bus reset */
>> +    devices = &info->devices[0];
>> +
>> +    /* Verify that we have all the groups required */
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +        bool found = false;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        /* Skip the current device */
>> +        if (vfio_pci_host_match(&host, &vdev->host)) {
>> +            continue;
>> +        }
>> +
>> +        /* Ensure we own the group of the affected device */
>> +        QLIST_FOREACH(group, &vfio_group_list, next) {
>> +            if (group->groupid == devices[i].group_id) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!group) {
>> +            error_report("vfio: Cannot enable AER for device %s, "
>> +                         "depends on group %d which is not owned.",
>> +                         vdev->vbasedev.name, devices[i].group_id);
>> +            ret = -1;
>> +            goto out;
>> +        }
>> +
>> +        /* Ensure affected devices for reset on/blow the bus */
>> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> +            if (vfio_pci_host_match(&host, &tmp->host)) {
>> +                PCIDevice *pci = PCI_DEVICE(tmp);
>> +
>> +                /*
>> +                 * For multifunction device, due to vfio driver signal all
>> +                 * functions under the upstream link of the end point. here
>> +                 * we validate all functions whether enable AER.
>> +                 */
>> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
>> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
>> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
>> +                                 " the dependent device %s which does not enable AER.",
>> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    ret = -1;
>> +                    goto out;
>> +                }
> It took me a while to understand this code block, so I've updated the
> comment to read as follows:
>
>                  /*
>                   * AER errors may be broadcast to all functions of a multi-
>                   * function endpoint.  If any of those sibling functions are
>                   * also assigned, they need to have AER enabled or else an
>                   * error may continue to cause a vm_stop condition.  IOW,
>                   * AER setup of this function would be pointless.
>                   */
>
> Does that match your intention?
Yes, this is the code's mean. your explanation is much better, I will 
update the common in next version.

Thanks,
Chen

>
>> +
>> +                find.pdev = pci;
>> +                find.found = false;
>> +                pci_for_each_device(bus, pci_bus_num(bus),
>> +                                    device_find, &find);
>> +                if (!find.found) {
>> +                    error_report("vfio: Cannot enable AER for device %s, "
>> +                                 "the dependent device %s is not under the same bus",
>> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    ret = -1;
>> +                    goto out;
>> +                }
>> +                found = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        /* Ensure all affected devices assigned to VM */
>> +        if (!found) {
>> +            error_report("vfio: Cannot enable AER for device %s, "
>> +                         "the dependent device %04x:%02x:%02x.%x "
>> +                         "is not assigned to VM.",
>> +                         vdev->vbasedev.name, host.domain, host.bus,
>> +                         host.slot, host.function);
>> +            ret = -1;
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Check the all pci devices on or below the target bus
>> +     * have a reset mechanism at least.
>> +     */
>> +    find.pdev = NULL;
>> +    find.found = false;
>> +    pci_for_each_device(bus, pci_bus_num(bus),
>> +                        vfio_check_device_noreset, &find);
>> +    if (find.found) {
>> +        error_report("vfio: Cannot enable AER for device %s, "
>> +                     "the affected device %s does not have a reset mechanism.",
>> +                     vdev->vbasedev.name, find.pdev->name);
>> +        ret = -1;
>> +        goto out;
>> +    }
>> +
>> +    ret = 0;
>> +out:
>> +    g_free(info);
>> +    return ret;
>> +}
>> +
>> +static int vfio_check_devices_host_bus_reset(void)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +
>> +    /* Check All vfio-pci devices if have bus reset capability */
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
>> +                vfio_check_host_bus_reset(vdev)) {
>> +                return -1;
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>>       vfio_intx_enable(vdev);
>>   }
>>   
>> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> -                                PCIHostDeviceAddress *host2)
>> -{
>> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> -            host1->slot == host2->slot && host1->function == host2->function);
>> -}
>> -
>>   static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>>   {
>>       VFIOGroup *group;
>> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>       vdev->req_enabled = false;
>>   }
>>   
>> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
>> +{
>> +    int ret;
>> +
>> +    ret = vfio_check_devices_host_bus_reset();
>> +    if (ret) {
>> +        exit(1);
>> +    }
>> +}
>> +
>> +static Notifier machine_notifier = {
>> +    .notify = vfio_pci_machine_done_notify,
>> +};
>> +
>>   static int vfio_initfn(PCIDevice *pdev)
>>   {
>>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>>   static void register_vfio_pci_dev_type(void)
>>   {
>>       type_register_static(&vfio_pci_dev_info);
>> +    /*
>> +     * Register notifier when machine init is done, since we need
>> +     * check the configration manner after all vfio device are inited.
>> +     */
>> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>>   }
>>   
>>   type_init(register_vfio_pci_dev_type)
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 48c1f69..59ae194 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -15,6 +15,7 @@
>>   #include "qemu-common.h"
>>   #include "exec/memory.h"
>>   #include "hw/pci/pci.h"
>> +#include "hw/pci/pci_bus.h"
>>   #include "hw/pci/pci_bridge.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "qemu/event_notifier.h"
>
>
> .
>
Michael S. Tsirkin Dec. 24, 2015, 2:32 p.m. UTC | #3
On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when init vfio devices done, we should test all the devices supported
> aer whether conflict with others. For each one, get the hot reset
> info for the affected device list.  For each affected device, all
> should attach to the VM and on/below the same bus. also, we should test
> all of the non-AER supporting vfio-pci devices on or below the target
> bus to verify they have a reset mechanism.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/vfio/pci.h |   1 +
>  2 files changed, 230 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d00b0e4..6926dcc 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> +                                     PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot);
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (vfio_pci_host_slot_match(host1, host2) &&
> +            host1->function == host2->function);
> +}
> +
> +struct VFIODeviceFind {
> +    PCIDevice *pdev;
> +    bool found;
> +};
> +
> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> +                                      void *opaque)
> +{
> +    DeviceState *dev = DEVICE(pdev);
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +    VFIOPCIDevice *vdev;
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        if (!dc->reset) {
> +            goto found;
> +        }
> +        return;
> +    }
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !vdev->vbasedev.reset_works) {
> +        goto found;
> +    }
> +
> +    return;
> +found:
> +    find->pdev = pdev;
> +    find->found = true;
> +}
> +
> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
> +{
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (pdev == find->pdev) {
> +        find->found = true;
> +    }
> +}
> +
> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    struct VFIODeviceFind find;
> +    int ret, i;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        goto out;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "depends on group %d which is not owned.",
> +                         vdev->vbasedev.name, devices[i].group_id);
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on/blow the bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> +                PCIDevice *pci = PCI_DEVICE(tmp);
> +
> +                /*
> +                 * For multifunction device, due to vfio driver signal all
> +                 * functions under the upstream link of the end point. here
> +                 * we validate all functions whether enable AER.
> +                 */
> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
> +                                 " the dependent device %s which does not enable AER.",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +
> +                find.pdev = pci;
> +                find.found = false;
> +                pci_for_each_device(bus, pci_bus_num(bus),
> +                                    device_find, &find);
> +                if (!find.found) {
> +                    error_report("vfio: Cannot enable AER for device %s, "
> +                                 "the dependent device %s is not under the same bus",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */

I am puzzled.
Does not kernel enforce this already?
If not it's a security problem.
If yes why does userspace need to check this?

> +        if (!found) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "the dependent device %04x:%02x:%02x.%x "
> +                         "is not assigned to VM.",
> +                         vdev->vbasedev.name, host.domain, host.bus,
> +                         host.slot, host.function);
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * Check the all pci devices on or below the target bus
> +     * have a reset mechanism at least.
> +     */
> +    find.pdev = NULL;
> +    find.found = false;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        vfio_check_device_noreset, &find);
> +    if (find.found) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "the affected device %s does not have a reset mechanism.",
> +                     vdev->vbasedev.name, find.pdev->name);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    ret = 0;
> +out:
> +    g_free(info);
> +    return ret;
> +}
> +
> +static int vfio_check_devices_host_bus_reset(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +                vfio_check_host_bus_reset(vdev)) {
> +                return -1;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    int ret;
> +
> +    ret = vfio_check_devices_host_bus_reset();
> +    if (ret) {
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +    /*
> +     * Register notifier when machine init is done, since we need
> +     * check the configration manner after all vfio device are inited.
> +     */
> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 48c1f69..59ae194 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
> -- 
> 1.9.3
> 
>
Alex Williamson Dec. 24, 2015, 5:47 p.m. UTC | #4
On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > when init vfio devices done, we should test all the devices
> > supported
> > aer whether conflict with others. For each one, get the hot reset
> > info for the affected device list.  For each affected device, all
> > should attach to the VM and on/below the same bus. also, we should
> > test
> > all of the non-AER supporting vfio-pci devices on or below the
> > target
> > bus to verify they have a reset mechanism.
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > ---
> >  hw/vfio/pci.c | 236
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  hw/vfio/pci.h |   1 +
> >  2 files changed, 230 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index d00b0e4..6926dcc 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice
> > *vdev, uint8_t pos)
> >      return 0;
> >  }
> >  
> > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> > +                                     PCIHostDeviceAddress *host2)
> > +{
> > +    return (host1->domain == host2->domain && host1->bus == host2-
> > >bus &&
> > +            host1->slot == host2->slot);
> > +}
> > +
> > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > +                                PCIHostDeviceAddress *host2)
> > +{
> > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > +            host1->function == host2->function);
> > +}
> > +
> > +struct VFIODeviceFind {
> > +    PCIDevice *pdev;
> > +    bool found;
> > +};
> > +
> > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > *pdev,
> > +                                      void *opaque)
> > +{
> > +    DeviceState *dev = DEVICE(pdev);
> > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > +    VFIOPCIDevice *vdev;
> > +    struct VFIODeviceFind *find = opaque;
> > +
> > +    if (find->found) {
> > +        return;
> > +    }
> > +
> > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > +        if (!dc->reset) {
> > +            goto found;
> > +        }
> > +        return;
> > +    }
> > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > +        !vdev->vbasedev.reset_works) {
> > +        goto found;
> > +    }
> > +
> > +    return;
> > +found:
> > +    find->pdev = pdev;
> > +    find->found = true;
> > +}
> > +
> > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > *opaque)
> > +{
> > +    struct VFIODeviceFind *find = opaque;
> > +
> > +    if (find->found) {
> > +        return;
> > +    }
> > +
> > +    if (pdev == find->pdev) {
> > +        find->found = true;
> > +    }
> > +}
> > +
> > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > +{
> > +    PCIBus *bus = vdev->pdev.bus;
> > +    struct vfio_pci_hot_reset_info *info = NULL;
> > +    struct vfio_pci_dependent_device *devices;
> > +    VFIOGroup *group;
> > +    struct VFIODeviceFind find;
> > +    int ret, i;
> > +
> > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > +    if (ret) {
> > +        error_report("vfio: Cannot enable AER for device %s,"
> > +                     " device does not support hot reset.",
> > +                     vdev->vbasedev.name);
> > +        goto out;
> > +    }
> > +
> > +    /* List all affected devices by bus reset */
> > +    devices = &info->devices[0];
> > +
> > +    /* Verify that we have all the groups required */
> > +    for (i = 0; i < info->count; i++) {
> > +        PCIHostDeviceAddress host;
> > +        VFIOPCIDevice *tmp;
> > +        VFIODevice *vbasedev_iter;
> > +        bool found = false;
> > +
> > +        host.domain = devices[i].segment;
> > +        host.bus = devices[i].bus;
> > +        host.slot = PCI_SLOT(devices[i].devfn);
> > +        host.function = PCI_FUNC(devices[i].devfn);
> > +
> > +        /* Skip the current device */
> > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > +            continue;
> > +        }
> > +
> > +        /* Ensure we own the group of the affected device */
> > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > +            if (group->groupid == devices[i].group_id) {
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (!group) {
> > +            error_report("vfio: Cannot enable AER for device %s, "
> > +                         "depends on group %d which is not
> > owned.",
> > +                         vdev->vbasedev.name,
> > devices[i].group_id);
> > +            ret = -1;
> > +            goto out;
> > +        }
> > +
> > +        /* Ensure affected devices for reset on/blow the bus */
> > +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > +                continue;
> > +            }
> > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > vbasedev);
> > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > +
> > +                /*
> > +                 * For multifunction device, due to vfio driver
> > signal all
> > +                 * functions under the upstream link of the end
> > point. here
> > +                 * we validate all functions whether enable AER.
> > +                 */
> > +                if (vfio_pci_host_slot_match(&vdev->host, &tmp-
> > >host) &&
> > +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> > +                    error_report("vfio: Cannot enable AER for
> > device %s, on same slot"
> > +                                 " the dependent device %s which
> > does not enable AER.",
> > +                                 vdev->vbasedev.name, tmp-
> > >vbasedev.name);
> > +                    ret = -1;
> > +                    goto out;
> > +                }
> > +
> > +                find.pdev = pci;
> > +                find.found = false;
> > +                pci_for_each_device(bus, pci_bus_num(bus),
> > +                                    device_find, &find);
> > +                if (!find.found) {
> > +                    error_report("vfio: Cannot enable AER for
> > device %s, "
> > +                                 "the dependent device %s is not
> > under the same bus",
> > +                                 vdev->vbasedev.name, tmp-
> > >vbasedev.name);
> > +                    ret = -1;
> > +                    goto out;
> > +                }
> > +                found = true;
> > +                break;
> > +            }
> > +        }
> > +
> > +        /* Ensure all affected devices assigned to VM */
> 
> I am puzzled.
> Does not kernel enforce this already?
> If not it's a security problem.
> If yes why does userspace need to check this?

DMA isolation and bus level isolation are separate concepts.  Each
function of a multi-function device can have DMA isolation, but a user
needs to own all of the functions affected by a bus reset in order to
perform one.  An AER configuration can only be created if the user can
translate a guest bus reset into a host bus reset and therefore needs
to test whether it has the permissions to do so.  I believe over the
course of reviews we've also added some simplifying constraints around
this to reduce the problem set, things like all the groups being
assigned rather than just owned by the user.  However, I believe the
kernel is sound in how it provides security for bus resets.  Thanks,

Alex
Michael S. Tsirkin Dec. 24, 2015, 6:06 p.m. UTC | #5
On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > 
> > > when init vfio devices done, we should test all the devices
> > > supported
> > > aer whether conflict with others. For each one, get the hot reset
> > > info for the affected device list.  For each affected device, all
> > > should attach to the VM and on/below the same bus. also, we should
> > > test
> > > all of the non-AER supporting vfio-pci devices on or below the
> > > target
> > > bus to verify they have a reset mechanism.
> > > 
> > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > ---
> > >  hw/vfio/pci.c | 236
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  hw/vfio/pci.h |   1 +
> > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index d00b0e4..6926dcc 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice
> > > *vdev, uint8_t pos)
> > >      return 0;
> > >  }
> > >  
> > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> > > +                                     PCIHostDeviceAddress *host2)
> > > +{
> > > +    return (host1->domain == host2->domain && host1->bus == host2-
> > > >bus &&
> > > +            host1->slot == host2->slot);
> > > +}
> > > +
> > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > +                                PCIHostDeviceAddress *host2)
> > > +{
> > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > +            host1->function == host2->function);
> > > +}
> > > +
> > > +struct VFIODeviceFind {
> > > +    PCIDevice *pdev;
> > > +    bool found;
> > > +};
> > > +
> > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > *pdev,
> > > +                                      void *opaque)
> > > +{
> > > +    DeviceState *dev = DEVICE(pdev);
> > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > +    VFIOPCIDevice *vdev;
> > > +    struct VFIODeviceFind *find = opaque;
> > > +
> > > +    if (find->found) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > +        if (!dc->reset) {
> > > +            goto found;
> > > +        }
> > > +        return;
> > > +    }
> > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > +        !vdev->vbasedev.reset_works) {
> > > +        goto found;
> > > +    }
> > > +
> > > +    return;
> > > +found:
> > > +    find->pdev = pdev;
> > > +    find->found = true;
> > > +}
> > > +
> > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > *opaque)
> > > +{
> > > +    struct VFIODeviceFind *find = opaque;
> > > +
> > > +    if (find->found) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (pdev == find->pdev) {
> > > +        find->found = true;
> > > +    }
> > > +}
> > > +
> > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > +{
> > > +    PCIBus *bus = vdev->pdev.bus;
> > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > +    struct vfio_pci_dependent_device *devices;
> > > +    VFIOGroup *group;
> > > +    struct VFIODeviceFind find;
> > > +    int ret, i;
> > > +
> > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > +    if (ret) {
> > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > +                     " device does not support hot reset.",
> > > +                     vdev->vbasedev.name);
> > > +        goto out;
> > > +    }
> > > +
> > > +    /* List all affected devices by bus reset */
> > > +    devices = &info->devices[0];
> > > +
> > > +    /* Verify that we have all the groups required */
> > > +    for (i = 0; i < info->count; i++) {
> > > +        PCIHostDeviceAddress host;
> > > +        VFIOPCIDevice *tmp;
> > > +        VFIODevice *vbasedev_iter;
> > > +        bool found = false;
> > > +
> > > +        host.domain = devices[i].segment;
> > > +        host.bus = devices[i].bus;
> > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > +
> > > +        /* Skip the current device */
> > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > +            continue;
> > > +        }
> > > +
> > > +        /* Ensure we own the group of the affected device */
> > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > +            if (group->groupid == devices[i].group_id) {
> > > +                break;
> > > +            }
> > > +        }
> > > +
> > > +        if (!group) {
> > > +            error_report("vfio: Cannot enable AER for device %s, "
> > > +                         "depends on group %d which is not
> > > owned.",
> > > +                         vdev->vbasedev.name,
> > > devices[i].group_id);
> > > +            ret = -1;
> > > +            goto out;
> > > +        }
> > > +
> > > +        /* Ensure affected devices for reset on/blow the bus */
> > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > +                continue;
> > > +            }
> > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > vbasedev);
> > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > +
> > > +                /*
> > > +                 * For multifunction device, due to vfio driver
> > > signal all
> > > +                 * functions under the upstream link of the end
> > > point. here
> > > +                 * we validate all functions whether enable AER.
> > > +                 */
> > > +                if (vfio_pci_host_slot_match(&vdev->host, &tmp-
> > > >host) &&
> > > +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> > > +                    error_report("vfio: Cannot enable AER for
> > > device %s, on same slot"
> > > +                                 " the dependent device %s which
> > > does not enable AER.",
> > > +                                 vdev->vbasedev.name, tmp-
> > > >vbasedev.name);
> > > +                    ret = -1;
> > > +                    goto out;
> > > +                }
> > > +
> > > +                find.pdev = pci;
> > > +                find.found = false;
> > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > +                                    device_find, &find);
> > > +                if (!find.found) {
> > > +                    error_report("vfio: Cannot enable AER for
> > > device %s, "
> > > +                                 "the dependent device %s is not
> > > under the same bus",
> > > +                                 vdev->vbasedev.name, tmp-
> > > >vbasedev.name);
> > > +                    ret = -1;
> > > +                    goto out;
> > > +                }
> > > +                found = true;
> > > +                break;
> > > +            }
> > > +        }
> > > +
> > > +        /* Ensure all affected devices assigned to VM */
> > 
> > I am puzzled.
> > Does not kernel enforce this already?
> > If not it's a security problem.
> > If yes why does userspace need to check this?
> 
> DMA isolation and bus level isolation are separate concepts.  Each
> function of a multi-function device can have DMA isolation, but a user
> needs to own all of the functions affected by a bus reset in order to
> perform one.  An AER configuration can only be created if the user can
> translate a guest bus reset into a host bus reset and therefore needs
> to test whether it has the permissions to do so.  I believe over the
> course of reviews we've also added some simplifying constraints around
> this to reduce the problem set, things like all the groups being
> assigned rather than just owned by the user.  However, I believe the
> kernel is sound in how it provides security for bus resets.  Thanks,
> 
> Alex

Yes, sounds good.

So how about just trying to do bus reset at setup time?
If kernel allows this, we know it is safe ...
Alex Williamson Dec. 24, 2015, 6:20 p.m. UTC | #6
On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > 
> > > > when init vfio devices done, we should test all the devices
> > > > supported
> > > > aer whether conflict with others. For each one, get the hot
> > > > reset
> > > > info for the affected device list.  For each affected device,
> > > > all
> > > > should attach to the VM and on/below the same bus. also, we
> > > > should
> > > > test
> > > > all of the non-AER supporting vfio-pci devices on or below the
> > > > target
> > > > bus to verify they have a reset mechanism.
> > > > 
> > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > ---
> > > >  hw/vfio/pci.c | 236
> > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > >  hw/vfio/pci.h |   1 +
> > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > index d00b0e4..6926dcc 100644
> > > > --- a/hw/vfio/pci.c
> > > > +++ b/hw/vfio/pci.c
> > > > @@ -1806,6 +1806,216 @@ static int
> > > > vfio_add_std_cap(VFIOPCIDevice
> > > > *vdev, uint8_t pos)
> > > >      return 0;
> > > >  }
> > > >  
> > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > *host1,
> > > > +                                     PCIHostDeviceAddress
> > > > *host2)
> > > > +{
> > > > +    return (host1->domain == host2->domain && host1->bus ==
> > > > host2-
> > > > > bus &&
> > > > +            host1->slot == host2->slot);
> > > > +}
> > > > +
> > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > > +                                PCIHostDeviceAddress *host2)
> > > > +{
> > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > +            host1->function == host2->function);
> > > > +}
> > > > +
> > > > +struct VFIODeviceFind {
> > > > +    PCIDevice *pdev;
> > > > +    bool found;
> > > > +};
> > > > +
> > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > > *pdev,
> > > > +                                      void *opaque)
> > > > +{
> > > > +    DeviceState *dev = DEVICE(pdev);
> > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > +    VFIOPCIDevice *vdev;
> > > > +    struct VFIODeviceFind *find = opaque;
> > > > +
> > > > +    if (find->found) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > +        if (!dc->reset) {
> > > > +            goto found;
> > > > +        }
> > > > +        return;
> > > > +    }
> > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > +        !vdev->vbasedev.reset_works) {
> > > > +        goto found;
> > > > +    }
> > > > +
> > > > +    return;
> > > > +found:
> > > > +    find->pdev = pdev;
> > > > +    find->found = true;
> > > > +}
> > > > +
> > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > *opaque)
> > > > +{
> > > > +    struct VFIODeviceFind *find = opaque;
> > > > +
> > > > +    if (find->found) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (pdev == find->pdev) {
> > > > +        find->found = true;
> > > > +    }
> > > > +}
> > > > +
> > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > +{
> > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > +    struct vfio_pci_dependent_device *devices;
> > > > +    VFIOGroup *group;
> > > > +    struct VFIODeviceFind find;
> > > > +    int ret, i;
> > > > +
> > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > +    if (ret) {
> > > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > > +                     " device does not support hot reset.",
> > > > +                     vdev->vbasedev.name);
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    /* List all affected devices by bus reset */
> > > > +    devices = &info->devices[0];
> > > > +
> > > > +    /* Verify that we have all the groups required */
> > > > +    for (i = 0; i < info->count; i++) {
> > > > +        PCIHostDeviceAddress host;
> > > > +        VFIOPCIDevice *tmp;
> > > > +        VFIODevice *vbasedev_iter;
> > > > +        bool found = false;
> > > > +
> > > > +        host.domain = devices[i].segment;
> > > > +        host.bus = devices[i].bus;
> > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > +
> > > > +        /* Skip the current device */
> > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        /* Ensure we own the group of the affected device */
> > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > +            if (group->groupid == devices[i].group_id) {
> > > > +                break;
> > > > +            }
> > > > +        }
> > > > +
> > > > +        if (!group) {
> > > > +            error_report("vfio: Cannot enable AER for device
> > > > %s, "
> > > > +                         "depends on group %d which is not
> > > > owned.",
> > > > +                         vdev->vbasedev.name,
> > > > devices[i].group_id);
> > > > +            ret = -1;
> > > > +            goto out;
> > > > +        }
> > > > +
> > > > +        /* Ensure affected devices for reset on/blow the bus
> > > > */
> > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > next) {
> > > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > > +                continue;
> > > > +            }
> > > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > > vbasedev);
> > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > +
> > > > +                /*
> > > > +                 * For multifunction device, due to vfio
> > > > driver
> > > > signal all
> > > > +                 * functions under the upstream link of the
> > > > end
> > > > point. here
> > > > +                 * we validate all functions whether enable
> > > > AER.
> > > > +                 */
> > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > &tmp-
> > > > > host) &&
> > > > +                    !(tmp->features &
> > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > +                    error_report("vfio: Cannot enable AER for
> > > > device %s, on same slot"
> > > > +                                 " the dependent device %s
> > > > which
> > > > does not enable AER.",
> > > > +                                 vdev->vbasedev.name, tmp-
> > > > > vbasedev.name);
> > > > +                    ret = -1;
> > > > +                    goto out;
> > > > +                }
> > > > +
> > > > +                find.pdev = pci;
> > > > +                find.found = false;
> > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > +                                    device_find, &find);
> > > > +                if (!find.found) {
> > > > +                    error_report("vfio: Cannot enable AER for
> > > > device %s, "
> > > > +                                 "the dependent device %s is
> > > > not
> > > > under the same bus",
> > > > +                                 vdev->vbasedev.name, tmp-
> > > > > vbasedev.name);
> > > > +                    ret = -1;
> > > > +                    goto out;
> > > > +                }
> > > > +                found = true;
> > > > +                break;
> > > > +            }
> > > > +        }
> > > > +
> > > > +        /* Ensure all affected devices assigned to VM */
> > > 
> > > I am puzzled.
> > > Does not kernel enforce this already?
> > > If not it's a security problem.
> > > If yes why does userspace need to check this?
> > 
> > DMA isolation and bus level isolation are separate concepts.  Each
> > function of a multi-function device can have DMA isolation, but a
> > user
> > needs to own all of the functions affected by a bus reset in order
> > to
> > perform one.  An AER configuration can only be created if the user
> > can
> > translate a guest bus reset into a host bus reset and therefore
> > needs
> > to test whether it has the permissions to do so.  I believe over
> > the
> > course of reviews we've also added some simplifying constraints
> > around
> > this to reduce the problem set, things like all the groups being
> > assigned rather than just owned by the user.  However, I believe
> > the
> > kernel is sound in how it provides security for bus resets.
> >  Thanks,
> > 
> > Alex
> 
> Yes, sounds good.
> 
> So how about just trying to do bus reset at setup time?
> If kernel allows this, we know it is safe ...

The host may support hotplug, what's possible at setup time may not be
possible when an error occurs.  It's unlikely, but worth considering I
think.  Thanks,

Alex
Michael S. Tsirkin Dec. 24, 2015, 6:23 p.m. UTC | #7
On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > 
> > > > > when init vfio devices done, we should test all the devices
> > > > > supported
> > > > > aer whether conflict with others. For each one, get the hot
> > > > > reset
> > > > > info for the affected device list.  For each affected device,
> > > > > all
> > > > > should attach to the VM and on/below the same bus. also, we
> > > > > should
> > > > > test
> > > > > all of the non-AER supporting vfio-pci devices on or below the
> > > > > target
> > > > > bus to verify they have a reset mechanism.
> > > > > 
> > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > ---
> > > > >  hw/vfio/pci.c | 236
> > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > >  hw/vfio/pci.h |   1 +
> > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > 
> > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > index d00b0e4..6926dcc 100644
> > > > > --- a/hw/vfio/pci.c
> > > > > +++ b/hw/vfio/pci.c
> > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > *vdev, uint8_t pos)
> > > > >      return 0;
> > > > >  }
> > > > >  
> > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > *host1,
> > > > > +                                     PCIHostDeviceAddress
> > > > > *host2)
> > > > > +{
> > > > > +    return (host1->domain == host2->domain && host1->bus ==
> > > > > host2-
> > > > > > bus &&
> > > > > +            host1->slot == host2->slot);
> > > > > +}
> > > > > +
> > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > > > +                                PCIHostDeviceAddress *host2)
> > > > > +{
> > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > +            host1->function == host2->function);
> > > > > +}
> > > > > +
> > > > > +struct VFIODeviceFind {
> > > > > +    PCIDevice *pdev;
> > > > > +    bool found;
> > > > > +};
> > > > > +
> > > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > > > *pdev,
> > > > > +                                      void *opaque)
> > > > > +{
> > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > +    VFIOPCIDevice *vdev;
> > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > +
> > > > > +    if (find->found) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > +        if (!dc->reset) {
> > > > > +            goto found;
> > > > > +        }
> > > > > +        return;
> > > > > +    }
> > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > +        !vdev->vbasedev.reset_works) {
> > > > > +        goto found;
> > > > > +    }
> > > > > +
> > > > > +    return;
> > > > > +found:
> > > > > +    find->pdev = pdev;
> > > > > +    find->found = true;
> > > > > +}
> > > > > +
> > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > *opaque)
> > > > > +{
> > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > +
> > > > > +    if (find->found) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    if (pdev == find->pdev) {
> > > > > +        find->found = true;
> > > > > +    }
> > > > > +}
> > > > > +
> > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > +{
> > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > +    VFIOGroup *group;
> > > > > +    struct VFIODeviceFind find;
> > > > > +    int ret, i;
> > > > > +
> > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > +    if (ret) {
> > > > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > > > +                     " device does not support hot reset.",
> > > > > +                     vdev->vbasedev.name);
> > > > > +        goto out;
> > > > > +    }
> > > > > +
> > > > > +    /* List all affected devices by bus reset */
> > > > > +    devices = &info->devices[0];
> > > > > +
> > > > > +    /* Verify that we have all the groups required */
> > > > > +    for (i = 0; i < info->count; i++) {
> > > > > +        PCIHostDeviceAddress host;
> > > > > +        VFIOPCIDevice *tmp;
> > > > > +        VFIODevice *vbasedev_iter;
> > > > > +        bool found = false;
> > > > > +
> > > > > +        host.domain = devices[i].segment;
> > > > > +        host.bus = devices[i].bus;
> > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > +
> > > > > +        /* Skip the current device */
> > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > +            continue;
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure we own the group of the affected device */
> > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > +                break;
> > > > > +            }
> > > > > +        }
> > > > > +
> > > > > +        if (!group) {
> > > > > +            error_report("vfio: Cannot enable AER for device
> > > > > %s, "
> > > > > +                         "depends on group %d which is not
> > > > > owned.",
> > > > > +                         vdev->vbasedev.name,
> > > > > devices[i].group_id);
> > > > > +            ret = -1;
> > > > > +            goto out;
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure affected devices for reset on/blow the bus
> > > > > */
> > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > next) {
> > > > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > > > +                continue;
> > > > > +            }
> > > > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > > > vbasedev);
> > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > +
> > > > > +                /*
> > > > > +                 * For multifunction device, due to vfio
> > > > > driver
> > > > > signal all
> > > > > +                 * functions under the upstream link of the
> > > > > end
> > > > > point. here
> > > > > +                 * we validate all functions whether enable
> > > > > AER.
> > > > > +                 */
> > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > &tmp-
> > > > > > host) &&
> > > > > +                    !(tmp->features &
> > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > +                    error_report("vfio: Cannot enable AER for
> > > > > device %s, on same slot"
> > > > > +                                 " the dependent device %s
> > > > > which
> > > > > does not enable AER.",
> > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > vbasedev.name);
> > > > > +                    ret = -1;
> > > > > +                    goto out;
> > > > > +                }
> > > > > +
> > > > > +                find.pdev = pci;
> > > > > +                find.found = false;
> > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > +                                    device_find, &find);
> > > > > +                if (!find.found) {
> > > > > +                    error_report("vfio: Cannot enable AER for
> > > > > device %s, "
> > > > > +                                 "the dependent device %s is
> > > > > not
> > > > > under the same bus",
> > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > vbasedev.name);
> > > > > +                    ret = -1;
> > > > > +                    goto out;
> > > > > +                }
> > > > > +                found = true;
> > > > > +                break;
> > > > > +            }
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure all affected devices assigned to VM */
> > > > 
> > > > I am puzzled.
> > > > Does not kernel enforce this already?
> > > > If not it's a security problem.
> > > > If yes why does userspace need to check this?
> > > 
> > > DMA isolation and bus level isolation are separate concepts.  Each
> > > function of a multi-function device can have DMA isolation, but a
> > > user
> > > needs to own all of the functions affected by a bus reset in order
> > > to
> > > perform one.  An AER configuration can only be created if the user
> > > can
> > > translate a guest bus reset into a host bus reset and therefore
> > > needs
> > > to test whether it has the permissions to do so.  I believe over
> > > the
> > > course of reviews we've also added some simplifying constraints
> > > around
> > > this to reduce the problem set, things like all the groups being
> > > assigned rather than just owned by the user.  However, I believe
> > > the
> > > kernel is sound in how it provides security for bus resets.
> > >  Thanks,
> > > 
> > > Alex
> > 
> > Yes, sounds good.
> > 
> > So how about just trying to do bus reset at setup time?
> > If kernel allows this, we know it is safe ...
> 
> The host may support hotplug, what's possible at setup time may not be
> possible when an error occurs.

How does this patch help solve this problem?

> It's unlikely, but worth considering I
> think.

I suspect vfio will have to solve this in kernel
(e.g. automatically add all new devices in the same group
wrt reset).

>  Thanks,
> 
> Alex
Alex Williamson Dec. 24, 2015, 6:41 p.m. UTC | #8
On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > 
> > > > > > when init vfio devices done, we should test all the devices
> > > > > > supported
> > > > > > aer whether conflict with others. For each one, get the hot
> > > > > > reset
> > > > > > info for the affected device list.  For each affected
> > > > > > device,
> > > > > > all
> > > > > > should attach to the VM and on/below the same bus. also, we
> > > > > > should
> > > > > > test
> > > > > > all of the non-AER supporting vfio-pci devices on or below
> > > > > > the
> > > > > > target
> > > > > > bus to verify they have a reset mechanism.
> > > > > > 
> > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > ---
> > > > > >  hw/vfio/pci.c | 236
> > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > >  hw/vfio/pci.h |   1 +
> > > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > > 
> > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > > index d00b0e4..6926dcc 100644
> > > > > > --- a/hw/vfio/pci.c
> > > > > > +++ b/hw/vfio/pci.c
> > > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > > *vdev, uint8_t pos)
> > > > > >      return 0;
> > > > > >  }
> > > > > >  
> > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > > *host1,
> > > > > > +                                     PCIHostDeviceAddress
> > > > > > *host2)
> > > > > > +{
> > > > > > +    return (host1->domain == host2->domain && host1->bus
> > > > > > ==
> > > > > > host2-
> > > > > > > bus &&
> > > > > > +            host1->slot == host2->slot);
> > > > > > +}
> > > > > > +
> > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress
> > > > > > *host1,
> > > > > > +                                PCIHostDeviceAddress
> > > > > > *host2)
> > > > > > +{
> > > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > > +            host1->function == host2->function);
> > > > > > +}
> > > > > > +
> > > > > > +struct VFIODeviceFind {
> > > > > > +    PCIDevice *pdev;
> > > > > > +    bool found;
> > > > > > +};
> > > > > > +
> > > > > > +static void vfio_check_device_noreset(PCIBus *bus,
> > > > > > PCIDevice
> > > > > > *pdev,
> > > > > > +                                      void *opaque)
> > > > > > +{
> > > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > > +    VFIOPCIDevice *vdev;
> > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > +
> > > > > > +    if (find->found) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > > +        if (!dc->reset) {
> > > > > > +            goto found;
> > > > > > +        }
> > > > > > +        return;
> > > > > > +    }
> > > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > > +        !vdev->vbasedev.reset_works) {
> > > > > > +        goto found;
> > > > > > +    }
> > > > > > +
> > > > > > +    return;
> > > > > > +found:
> > > > > > +    find->pdev = pdev;
> > > > > > +    find->found = true;
> > > > > > +}
> > > > > > +
> > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > > *opaque)
> > > > > > +{
> > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > +
> > > > > > +    if (find->found) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (pdev == find->pdev) {
> > > > > > +        find->found = true;
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > > +{
> > > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > > +    VFIOGroup *group;
> > > > > > +    struct VFIODeviceFind find;
> > > > > > +    int ret, i;
> > > > > > +
> > > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > > +    if (ret) {
> > > > > > +        error_report("vfio: Cannot enable AER for device
> > > > > > %s,"
> > > > > > +                     " device does not support hot
> > > > > > reset.",
> > > > > > +                     vdev->vbasedev.name);
> > > > > > +        goto out;
> > > > > > +    }
> > > > > > +
> > > > > > +    /* List all affected devices by bus reset */
> > > > > > +    devices = &info->devices[0];
> > > > > > +
> > > > > > +    /* Verify that we have all the groups required */
> > > > > > +    for (i = 0; i < info->count; i++) {
> > > > > > +        PCIHostDeviceAddress host;
> > > > > > +        VFIOPCIDevice *tmp;
> > > > > > +        VFIODevice *vbasedev_iter;
> > > > > > +        bool found = false;
> > > > > > +
> > > > > > +        host.domain = devices[i].segment;
> > > > > > +        host.bus = devices[i].bus;
> > > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > > +
> > > > > > +        /* Skip the current device */
> > > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > > +            continue;
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure we own the group of the affected device
> > > > > > */
> > > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > > +                break;
> > > > > > +            }
> > > > > > +        }
> > > > > > +
> > > > > > +        if (!group) {
> > > > > > +            error_report("vfio: Cannot enable AER for
> > > > > > device
> > > > > > %s, "
> > > > > > +                         "depends on group %d which is not
> > > > > > owned.",
> > > > > > +                         vdev->vbasedev.name,
> > > > > > devices[i].group_id);
> > > > > > +            ret = -1;
> > > > > > +            goto out;
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure affected devices for reset on/blow the
> > > > > > bus
> > > > > > */
> > > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > > next) {
> > > > > > +            if (vbasedev_iter->type !=
> > > > > > VFIO_DEVICE_TYPE_PCI) {
> > > > > > +                continue;
> > > > > > +            }
> > > > > > +            tmp = container_of(vbasedev_iter,
> > > > > > VFIOPCIDevice,
> > > > > > vbasedev);
> > > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > > +
> > > > > > +                /*
> > > > > > +                 * For multifunction device, due to vfio
> > > > > > driver
> > > > > > signal all
> > > > > > +                 * functions under the upstream link of
> > > > > > the
> > > > > > end
> > > > > > point. here
> > > > > > +                 * we validate all functions whether
> > > > > > enable
> > > > > > AER.
> > > > > > +                 */
> > > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > > &tmp-
> > > > > > > host) &&
> > > > > > +                    !(tmp->features &
> > > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > for
> > > > > > device %s, on same slot"
> > > > > > +                                 " the dependent device %s
> > > > > > which
> > > > > > does not enable AER.",
> > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > vbasedev.name);
> > > > > > +                    ret = -1;
> > > > > > +                    goto out;
> > > > > > +                }
> > > > > > +
> > > > > > +                find.pdev = pci;
> > > > > > +                find.found = false;
> > > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > > +                                    device_find, &find);
> > > > > > +                if (!find.found) {
> > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > for
> > > > > > device %s, "
> > > > > > +                                 "the dependent device %s
> > > > > > is
> > > > > > not
> > > > > > under the same bus",
> > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > vbasedev.name);
> > > > > > +                    ret = -1;
> > > > > > +                    goto out;
> > > > > > +                }
> > > > > > +                found = true;
> > > > > > +                break;
> > > > > > +            }
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure all affected devices assigned to VM */
> > > > > 
> > > > > I am puzzled.
> > > > > Does not kernel enforce this already?
> > > > > If not it's a security problem.
> > > > > If yes why does userspace need to check this?
> > > > 
> > > > DMA isolation and bus level isolation are separate concepts.
> > > >  Each
> > > > function of a multi-function device can have DMA isolation, but
> > > > a
> > > > user
> > > > needs to own all of the functions affected by a bus reset in
> > > > order
> > > > to
> > > > perform one.  An AER configuration can only be created if the
> > > > user
> > > > can
> > > > translate a guest bus reset into a host bus reset and therefore
> > > > needs
> > > > to test whether it has the permissions to do so.  I believe
> > > > over
> > > > the
> > > > course of reviews we've also added some simplifying constraints
> > > > around
> > > > this to reduce the problem set, things like all the groups
> > > > being
> > > > assigned rather than just owned by the user.  However, I
> > > > believe
> > > > the
> > > > kernel is sound in how it provides security for bus resets.
> > > >  Thanks,
> > > > 
> > > > Alex
> > > 
> > > Yes, sounds good.
> > > 
> > > So how about just trying to do bus reset at setup time?
> > > If kernel allows this, we know it is safe ...
> > 
> > The host may support hotplug, what's possible at setup time may not
> > be
> > possible when an error occurs.
> 
> How does this patch help solve this problem?

I believe there's a patch in this series that re-tests on the
occurrence of an error, before injecting the AER into the guest.

> > It's unlikely, but worth considering I
> > think.
> 
> I suspect vfio will have to solve this in kernel
> (e.g. automatically add all new devices in the same group
> wrt reset).

Nope, the user simply loses their ability to reset the bus if they
don't own all the groups at the time they attempt to do a bus reset.
 Mixing bus isolation and DMA isolation would cause a mess of groups.
Michael S. Tsirkin Dec. 24, 2015, 7:42 p.m. UTC | #9
On Thu, Dec 24, 2015 at 11:41:15AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> > > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > 
> > > > > > > when init vfio devices done, we should test all the devices
> > > > > > > supported
> > > > > > > aer whether conflict with others. For each one, get the hot
> > > > > > > reset
> > > > > > > info for the affected device list.  For each affected
> > > > > > > device,
> > > > > > > all
> > > > > > > should attach to the VM and on/below the same bus. also, we
> > > > > > > should
> > > > > > > test
> > > > > > > all of the non-AER supporting vfio-pci devices on or below
> > > > > > > the
> > > > > > > target
> > > > > > > bus to verify they have a reset mechanism.
> > > > > > > 
> > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > ---
> > > > > > >  hw/vfio/pci.c | 236
> > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > > >  hw/vfio/pci.h |   1 +
> > > > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > > > index d00b0e4..6926dcc 100644
> > > > > > > --- a/hw/vfio/pci.c
> > > > > > > +++ b/hw/vfio/pci.c
> > > > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > > > *vdev, uint8_t pos)
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                     PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (host1->domain == host2->domain && host1->bus
> > > > > > > ==
> > > > > > > host2-
> > > > > > > > bus &&
> > > > > > > +            host1->slot == host2->slot);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > > > +            host1->function == host2->function);
> > > > > > > +}
> > > > > > > +
> > > > > > > +struct VFIODeviceFind {
> > > > > > > +    PCIDevice *pdev;
> > > > > > > +    bool found;
> > > > > > > +};
> > > > > > > +
> > > > > > > +static void vfio_check_device_noreset(PCIBus *bus,
> > > > > > > PCIDevice
> > > > > > > *pdev,
> > > > > > > +                                      void *opaque)
> > > > > > > +{
> > > > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > > > +    VFIOPCIDevice *vdev;
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > > > +        if (!dc->reset) {
> > > > > > > +            goto found;
> > > > > > > +        }
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > > > +        !vdev->vbasedev.reset_works) {
> > > > > > > +        goto found;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return;
> > > > > > > +found:
> > > > > > > +    find->pdev = pdev;
> > > > > > > +    find->found = true;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > > > *opaque)
> > > > > > > +{
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (pdev == find->pdev) {
> > > > > > > +        find->found = true;
> > > > > > > +    }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > > > +{
> > > > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > > > +    VFIOGroup *group;
> > > > > > > +    struct VFIODeviceFind find;
> > > > > > > +    int ret, i;
> > > > > > > +
> > > > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > > > +    if (ret) {
> > > > > > > +        error_report("vfio: Cannot enable AER for device
> > > > > > > %s,"
> > > > > > > +                     " device does not support hot
> > > > > > > reset.",
> > > > > > > +                     vdev->vbasedev.name);
> > > > > > > +        goto out;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    /* List all affected devices by bus reset */
> > > > > > > +    devices = &info->devices[0];
> > > > > > > +
> > > > > > > +    /* Verify that we have all the groups required */
> > > > > > > +    for (i = 0; i < info->count; i++) {
> > > > > > > +        PCIHostDeviceAddress host;
> > > > > > > +        VFIOPCIDevice *tmp;
> > > > > > > +        VFIODevice *vbasedev_iter;
> > > > > > > +        bool found = false;
> > > > > > > +
> > > > > > > +        host.domain = devices[i].segment;
> > > > > > > +        host.bus = devices[i].bus;
> > > > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > > > +
> > > > > > > +        /* Skip the current device */
> > > > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > > > +            continue;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure we own the group of the affected device
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        if (!group) {
> > > > > > > +            error_report("vfio: Cannot enable AER for
> > > > > > > device
> > > > > > > %s, "
> > > > > > > +                         "depends on group %d which is not
> > > > > > > owned.",
> > > > > > > +                         vdev->vbasedev.name,
> > > > > > > devices[i].group_id);
> > > > > > > +            ret = -1;
> > > > > > > +            goto out;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure affected devices for reset on/blow the
> > > > > > > bus
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > > > next) {
> > > > > > > +            if (vbasedev_iter->type !=
> > > > > > > VFIO_DEVICE_TYPE_PCI) {
> > > > > > > +                continue;
> > > > > > > +            }
> > > > > > > +            tmp = container_of(vbasedev_iter,
> > > > > > > VFIOPCIDevice,
> > > > > > > vbasedev);
> > > > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > > > +
> > > > > > > +                /*
> > > > > > > +                 * For multifunction device, due to vfio
> > > > > > > driver
> > > > > > > signal all
> > > > > > > +                 * functions under the upstream link of
> > > > > > > the
> > > > > > > end
> > > > > > > point. here
> > > > > > > +                 * we validate all functions whether
> > > > > > > enable
> > > > > > > AER.
> > > > > > > +                 */
> > > > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > > > &tmp-
> > > > > > > > host) &&
> > > > > > > +                    !(tmp->features &
> > > > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, on same slot"
> > > > > > > +                                 " the dependent device %s
> > > > > > > which
> > > > > > > does not enable AER.",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +
> > > > > > > +                find.pdev = pci;
> > > > > > > +                find.found = false;
> > > > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > > > +                                    device_find, &find);
> > > > > > > +                if (!find.found) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, "
> > > > > > > +                                 "the dependent device %s
> > > > > > > is
> > > > > > > not
> > > > > > > under the same bus",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +                found = true;
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure all affected devices assigned to VM */
> > > > > > 
> > > > > > I am puzzled.
> > > > > > Does not kernel enforce this already?
> > > > > > If not it's a security problem.
> > > > > > If yes why does userspace need to check this?
> > > > > 
> > > > > DMA isolation and bus level isolation are separate concepts.
> > > > >  Each
> > > > > function of a multi-function device can have DMA isolation, but
> > > > > a
> > > > > user
> > > > > needs to own all of the functions affected by a bus reset in
> > > > > order
> > > > > to
> > > > > perform one.  An AER configuration can only be created if the
> > > > > user
> > > > > can
> > > > > translate a guest bus reset into a host bus reset and therefore
> > > > > needs
> > > > > to test whether it has the permissions to do so.  I believe
> > > > > over
> > > > > the
> > > > > course of reviews we've also added some simplifying constraints
> > > > > around
> > > > > this to reduce the problem set, things like all the groups
> > > > > being
> > > > > assigned rather than just owned by the user.  However, I
> > > > > believe
> > > > > the
> > > > > kernel is sound in how it provides security for bus resets.
> > > > >  Thanks,
> > > > > 
> > > > > Alex
> > > > 
> > > > Yes, sounds good.
> > > > 
> > > > So how about just trying to do bus reset at setup time?
> > > > If kernel allows this, we know it is safe ...
> > > 
> > > The host may support hotplug, what's possible at setup time may not
> > > be
> > > possible when an error occurs.
> > 
> > How does this patch help solve this problem?
> 
> I believe there's a patch in this series that re-tests on the
> occurrence of an error, before injecting the AER into the guest.

Doesn't seem robust.  What if hotplug happens right after error is
injected?

> > > It's unlikely, but worth considering I
> > > think.
> > 
> > I suspect vfio will have to solve this in kernel
> > (e.g. automatically add all new devices in the same group
> > wrt reset).
> 
> Nope, the user simply loses their ability to reset the bus if they
> don't own all the groups at the time they attempt to do a bus reset.

Hmm, this is sub-optimal.
Assume I hot-plug a device behind a bus.
I fully intend to pass it through to a VM
where all other devices are but before I
manage to do this, an error triggers.

>  Mixing bus isolation and DMA isolation would cause a mess of groups.

Not sure how what I said implies this.

I merely suggested that if vfio takes over bus reset
it should take over handling hotplug as well,
so devices added on this bus are automatically
pevented from being used by anyone except
the same VM, making it safe to reset them.
diff mbox

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d00b0e4..6926dcc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1806,6 +1806,216 @@  static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
+                                     PCIHostDeviceAddress *host2)
+{
+    return (host1->domain == host2->domain && host1->bus == host2->bus &&
+            host1->slot == host2->slot);
+}
+
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+                                PCIHostDeviceAddress *host2)
+{
+    return (vfio_pci_host_slot_match(host1, host2) &&
+            host1->function == host2->function);
+}
+
+struct VFIODeviceFind {
+    PCIDevice *pdev;
+    bool found;
+};
+
+static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
+                                      void *opaque)
+{
+    DeviceState *dev = DEVICE(pdev);
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+    VFIOPCIDevice *vdev;
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+        if (!dc->reset) {
+            goto found;
+        }
+        return;
+    }
+    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !vdev->vbasedev.reset_works) {
+        goto found;
+    }
+
+    return;
+found:
+    find->pdev = pdev;
+    find->found = true;
+}
+
+static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
+{
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (pdev == find->pdev) {
+        find->found = true;
+    }
+}
+
+static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    struct VFIODeviceFind find;
+    int ret, i;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_report("vfio: Cannot enable AER for device %s,"
+                     " device does not support hot reset.",
+                     vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+        bool found = false;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "depends on group %d which is not owned.",
+                         vdev->vbasedev.name, devices[i].group_id);
+            ret = -1;
+            goto out;
+        }
+
+        /* Ensure affected devices for reset on/blow the bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, &tmp->host)) {
+                PCIDevice *pci = PCI_DEVICE(tmp);
+
+                /*
+                 * For multifunction device, due to vfio driver signal all
+                 * functions under the upstream link of the end point. here
+                 * we validate all functions whether enable AER.
+                 */
+                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
+                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+                    error_report("vfio: Cannot enable AER for device %s, on same slot"
+                                 " the dependent device %s which does not enable AER.",
+                                 vdev->vbasedev.name, tmp->vbasedev.name);
+                    ret = -1;
+                    goto out;
+                }
+
+                find.pdev = pci;
+                find.found = false;
+                pci_for_each_device(bus, pci_bus_num(bus),
+                                    device_find, &find);
+                if (!find.found) {
+                    error_report("vfio: Cannot enable AER for device %s, "
+                                 "the dependent device %s is not under the same bus",
+                                 vdev->vbasedev.name, tmp->vbasedev.name);
+                    ret = -1;
+                    goto out;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        /* Ensure all affected devices assigned to VM */
+        if (!found) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "the dependent device %04x:%02x:%02x.%x "
+                         "is not assigned to VM.",
+                         vdev->vbasedev.name, host.domain, host.bus,
+                         host.slot, host.function);
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /*
+     * Check the all pci devices on or below the target bus
+     * have a reset mechanism at least.
+     */
+    find.pdev = NULL;
+    find.found = false;
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        vfio_check_device_noreset, &find);
+    if (find.found) {
+        error_report("vfio: Cannot enable AER for device %s, "
+                     "the affected device %s does not have a reset mechanism.",
+                     vdev->vbasedev.name, find.pdev->name);
+        ret = -1;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free(info);
+    return ret;
+}
+
+static int vfio_check_devices_host_bus_reset(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+                vfio_check_host_bus_reset(vdev)) {
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -1983,13 +2193,6 @@  static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
-                                PCIHostDeviceAddress *host2)
-{
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
-            host1->slot == host2->slot && host1->function == host2->function);
-}
-
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
@@ -2495,6 +2698,20 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+    int ret;
+
+    ret = vfio_check_devices_host_bus_reset();
+    if (ret) {
+        exit(1);
+    }
+}
+
+static Notifier machine_notifier = {
+    .notify = vfio_pci_machine_done_notify,
+};
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2841,6 +3058,11 @@  static const TypeInfo vfio_pci_dev_info = {
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+    /*
+     * Register notifier when machine init is done, since we need
+     * check the configration manner after all vfio device are inited.
+     */
+    qemu_add_machine_init_done_notifier(&machine_notifier);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 48c1f69..59ae194 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@ 
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"