diff mbox series

[RFC,v7,26/26] vfio/pci: Implement return_page_response page response callback

Message ID 20201116181349.11908-27-eric.auger@redhat.com
State New
Headers show
Series vSMMUv3/pSMMUv3 2 stage VFIO integration | expand

Commit Message

Eric Auger Nov. 16, 2020, 6:13 p.m. UTC
This patch implements the page response path. The
response s written into the page response ring buffer and then
update header's head index is updated. This path is not used
by this series. It is introduced here as a POC for vSVA/ARM
integration.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 hw/vfio/pci.h |   2 +
 hw/vfio/pci.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

Comments

Shameerali Kolothum Thodi Feb. 18, 2021, 10:19 a.m. UTC | #1
Hi Eric,

> -----Original Message-----
> From: Eric Auger [mailto:eric.auger@redhat.com]
> Sent: 16 November 2020 18:14
> To: eric.auger.pro@gmail.com; eric.auger@redhat.com;
> qemu-devel@nongnu.org; qemu-arm@nongnu.org;
> alex.williamson@redhat.com
> Cc: peter.maydell@linaro.org; jean-philippe@linaro.org; peterx@redhat.com;
> jacob.jun.pan@linux.intel.com; yi.l.liu@intel.com; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; tn@semihalf.com;
> nicoleotsuka@gmail.com; yuzenghui <yuzenghui@huawei.com>;
> zhangfei.gao@gmail.com; vivek.gautam@arm.com
> Subject: [RFC v7 26/26] vfio/pci: Implement return_page_response page
> response callback
> 
> This patch implements the page response path. The
> response s written into the page response ring buffer and then
> update header's head index is updated. This path is not used
> by this series. It is introduced here as a POC for vSVA/ARM
> integration.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
>  hw/vfio/pci.h |   2 +
>  hw/vfio/pci.c | 121
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 123 insertions(+)
> 
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 350e9e9005..ce0472611e 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -147,6 +147,8 @@ struct VFIOPCIDevice {
>      VFIOPCIExtIRQ *ext_irqs;
>      VFIORegion dma_fault_region;
>      uint32_t fault_tail_index;
> +    VFIORegion dma_fault_response_region;
> +    uint32_t fault_response_head_index;
>      int (*resetfn)(struct VFIOPCIDevice *);
>      uint32_t vendor_id;
>      uint32_t device_id;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 4e3495bb60..797acd9c73 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2631,6 +2631,61 @@ out:
>      g_free(fault_region_info);
>  }
> 
> +static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error
> **errp)
> +{
> +    struct vfio_region_info *fault_region_info = NULL;
> +    struct vfio_region_info_cap_fault *cap_fault;
> +    VFIODevice *vbasedev = &vdev->vbasedev;
> +    struct vfio_info_cap_header *hdr;
> +    char *fault_region_name;
> +    int ret;
> +
> +    ret = vfio_get_dev_region_info(&vdev->vbasedev,
> +                                   VFIO_REGION_TYPE_NESTED,
> +
> VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE,
> +                                   &fault_region_info);
> +    if (ret) {
> +        goto out;
> +    }
> +
> +    hdr = vfio_get_region_info_cap(fault_region_info,
> +
> VFIO_REGION_INFO_CAP_DMA_FAULT);

VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE ? 

> +    if (!hdr) {
> +        error_setg(errp, "failed to retrieve DMA FAULT RESPONSE
> capability");
> +        goto out;
> +    }
> +    cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
> +                             header);
> +    if (cap_fault->version != 1) {
> +        error_setg(errp, "Unsupported DMA FAULT RESPONSE API
> version %d",
> +                   cap_fault->version);
> +        goto out;
> +    }
> +
> +    fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d",
> +                                        vbasedev->name,
> +                                        fault_region_info->index);
> +
> +    ret = vfio_region_setup(OBJECT(vdev), vbasedev,
> +                            &vdev->dma_fault_response_region,
> +                            fault_region_info->index,
> +                            fault_region_name);
> +    g_free(fault_region_name);
> +    if (ret) {
> +        error_setg_errno(errp, -ret,
> +                         "failed to set up the DMA FAULT RESPONSE
> region %d",
> +                         fault_region_info->index);
> +        goto out;
> +    }
> +
> +    ret = vfio_region_mmap(&vdev->dma_fault_response_region);
> +    if (ret) {
> +        error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT
> RESPONSE queue");
> +    }
> +out:
> +    g_free(fault_region_info);
> +}
> +
>  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>  {
>      VFIODevice *vbasedev = &vdev->vbasedev;
> @@ -2706,6 +2761,12 @@ static void vfio_populate_device(VFIOPCIDevice
> *vdev, Error **errp)
>          return;
>      }
> 
> +    vfio_init_fault_response_regions(vdev, &err);
> +    if (err) {
> +        error_propagate(errp, err);
> +        return;
> +    }
> +
>      irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
> 
>      ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
> @@ -2884,8 +2945,68 @@ static int vfio_iommu_set_pasid_table(PCIBus
> *bus, int32_t devfn,
>      return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
>  }
> 
> +static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn,
> +                                           IOMMUPageResponse
> *resp)
> +{
> +    PCIDevice *pdev = bus->devices[devfn];
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    struct iommu_page_response *response = &resp->resp;
> +    struct vfio_region_dma_fault_response header;
> +    struct iommu_page_response *queue;
> +    char *queue_buffer = NULL;
> +    ssize_t bytes;
> +
> +    if (!vdev->dma_fault_response_region.mem) {
> +        return -EINVAL;
> +    }
> +
> +    /* read the header */
> +    bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
> +                  vdev->dma_fault_response_region.fd_offset);
> +    if (bytes != sizeof(header)) {
> +        error_report("%s unable to read the fault region header (0x%lx)",
> +                     __func__, bytes);
> +        return -1;
> +    }
> +
> +    /* Normally the fault queue is mmapped */
> +    queue = (struct iommu_page_response
> *)vdev->dma_fault_response_region.mmaps[0].mmap;
> +    if (!queue) {
> +        size_t queue_size = header.nb_entries * header.entry_size;
> +
> +        error_report("%s: fault queue not mmapped: slower fault handling",
> +                     vdev->vbasedev.name);
> +
> +        queue_buffer = g_malloc(queue_size);
> +        bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
> +                      vdev->dma_fault_response_region.fd_offset +
> header.offset);
> +        if (bytes != queue_size) {
> +            error_report("%s unable to read the fault queue (0x%lx)",
> +                         __func__, bytes);
> +            return -1;
> +        }
> +
> +        queue = (struct iommu_page_response *)queue_buffer;
> +    }
> +    /* deposit the new response in the queue and increment the head */
> +    memcpy(queue + header.head, response, header.entry_size);
> +
> +    vdev->fault_response_head_index =
> +        (vdev->fault_response_head_index + 1) % header.nb_entries;
> +    bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index,
> 4,
> +                   vdev->dma_fault_response_region.fd_offset);
> +    if (bytes != 4) {
> +        error_report("%s unable to write the fault response region head
> index (0x%lx)",
> +                     __func__, bytes);
> +    }
> +    g_free(queue_buffer);
> +
> +    return 0;
> +}
> +
>  static PCIPASIDOps vfio_pci_pasid_ops = {
>      .set_pasid_table = vfio_iommu_set_pasid_table,
> +    .return_page_response = vfio_iommu_return_page_response,
>  };
> 
>  static void vfio_dma_fault_notifier_handler(void *opaque)

Also, I just noted that this patch breaks the dev hot add/del functionality.
device_add works fine but device_del is not removing the dev cleanly.

The below one fixes it. Please check.

Thanks,
Shameer

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 797acd9c73..92c1d48316 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3470,6 +3470,7 @@ static void vfio_instance_finalize(Object *obj)
     vfio_display_finalize(vdev);
     vfio_bars_finalize(vdev);
     vfio_region_finalize(&vdev->dma_fault_region);
+    vfio_region_finalize(&vdev->dma_fault_response_region);
     g_free(vdev->emulated_config_bits);
     g_free(vdev->rom);
     /*
@@ -3491,6 +3492,7 @@ static void vfio_exitfn(PCIDevice *pdev)
     vfio_unregister_err_notifier(vdev);
     vfio_unregister_ext_irq_notifiers(vdev);
     vfio_region_exit(&vdev->dma_fault_region);
+    vfio_region_exit(&vdev->dma_fault_response_region);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     if (vdev->irqchip_change_notifier.notify) {
         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_not
Eric Auger Feb. 18, 2021, 10:41 a.m. UTC | #2
Hi Shameer,

On 2/18/21 11:19 AM, Shameerali Kolothum Thodi wrote:
> Hi Eric,
> 
>> -----Original Message-----
>> From: Eric Auger [mailto:eric.auger@redhat.com]
>> Sent: 16 November 2020 18:14
>> To: eric.auger.pro@gmail.com; eric.auger@redhat.com;
>> qemu-devel@nongnu.org; qemu-arm@nongnu.org;
>> alex.williamson@redhat.com
>> Cc: peter.maydell@linaro.org; jean-philippe@linaro.org; peterx@redhat.com;
>> jacob.jun.pan@linux.intel.com; yi.l.liu@intel.com; Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@huawei.com>; tn@semihalf.com;
>> nicoleotsuka@gmail.com; yuzenghui <yuzenghui@huawei.com>;
>> zhangfei.gao@gmail.com; vivek.gautam@arm.com
>> Subject: [RFC v7 26/26] vfio/pci: Implement return_page_response page
>> response callback
>>
>> This patch implements the page response path. The
>> response s written into the page response ring buffer and then
>> update header's head index is updated. This path is not used
>> by this series. It is introduced here as a POC for vSVA/ARM
>> integration.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>> ---
>>  hw/vfio/pci.h |   2 +
>>  hw/vfio/pci.c | 121
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 123 insertions(+)
>>
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 350e9e9005..ce0472611e 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -147,6 +147,8 @@ struct VFIOPCIDevice {
>>      VFIOPCIExtIRQ *ext_irqs;
>>      VFIORegion dma_fault_region;
>>      uint32_t fault_tail_index;
>> +    VFIORegion dma_fault_response_region;
>> +    uint32_t fault_response_head_index;
>>      int (*resetfn)(struct VFIOPCIDevice *);
>>      uint32_t vendor_id;
>>      uint32_t device_id;
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 4e3495bb60..797acd9c73 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2631,6 +2631,61 @@ out:
>>      g_free(fault_region_info);
>>  }
>>
>> +static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error
>> **errp)
>> +{
>> +    struct vfio_region_info *fault_region_info = NULL;
>> +    struct vfio_region_info_cap_fault *cap_fault;
>> +    VFIODevice *vbasedev = &vdev->vbasedev;
>> +    struct vfio_info_cap_header *hdr;
>> +    char *fault_region_name;
>> +    int ret;
>> +
>> +    ret = vfio_get_dev_region_info(&vdev->vbasedev,
>> +                                   VFIO_REGION_TYPE_NESTED,
>> +
>> VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE,
>> +                                   &fault_region_info);
>> +    if (ret) {
>> +        goto out;
>> +    }
>> +
>> +    hdr = vfio_get_region_info_cap(fault_region_info,
>> +
>> VFIO_REGION_INFO_CAP_DMA_FAULT);
> 
> VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE ? 
yes!
> 
>> +    if (!hdr) {
>> +        error_setg(errp, "failed to retrieve DMA FAULT RESPONSE
>> capability");
>> +        goto out;
>> +    }
>> +    cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
>> +                             header);
>> +    if (cap_fault->version != 1) {
>> +        error_setg(errp, "Unsupported DMA FAULT RESPONSE API
>> version %d",
>> +                   cap_fault->version);
>> +        goto out;
>> +    }
>> +
>> +    fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d",
>> +                                        vbasedev->name,
>> +                                        fault_region_info->index);
>> +
>> +    ret = vfio_region_setup(OBJECT(vdev), vbasedev,
>> +                            &vdev->dma_fault_response_region,
>> +                            fault_region_info->index,
>> +                            fault_region_name);
>> +    g_free(fault_region_name);
>> +    if (ret) {
>> +        error_setg_errno(errp, -ret,
>> +                         "failed to set up the DMA FAULT RESPONSE
>> region %d",
>> +                         fault_region_info->index);
>> +        goto out;
>> +    }
>> +
>> +    ret = vfio_region_mmap(&vdev->dma_fault_response_region);
>> +    if (ret) {
>> +        error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT
>> RESPONSE queue");
>> +    }
>> +out:
>> +    g_free(fault_region_info);
>> +}
>> +
>>  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>>  {
>>      VFIODevice *vbasedev = &vdev->vbasedev;
>> @@ -2706,6 +2761,12 @@ static void vfio_populate_device(VFIOPCIDevice
>> *vdev, Error **errp)
>>          return;
>>      }
>>
>> +    vfio_init_fault_response_regions(vdev, &err);
>> +    if (err) {
>> +        error_propagate(errp, err);
>> +        return;
>> +    }
>> +
>>      irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
>>
>>      ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
>> @@ -2884,8 +2945,68 @@ static int vfio_iommu_set_pasid_table(PCIBus
>> *bus, int32_t devfn,
>>      return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
>>  }
>>
>> +static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn,
>> +                                           IOMMUPageResponse
>> *resp)
>> +{
>> +    PCIDevice *pdev = bus->devices[devfn];
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +    struct iommu_page_response *response = &resp->resp;
>> +    struct vfio_region_dma_fault_response header;
>> +    struct iommu_page_response *queue;
>> +    char *queue_buffer = NULL;
>> +    ssize_t bytes;
>> +
>> +    if (!vdev->dma_fault_response_region.mem) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    /* read the header */
>> +    bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
>> +                  vdev->dma_fault_response_region.fd_offset);
>> +    if (bytes != sizeof(header)) {
>> +        error_report("%s unable to read the fault region header (0x%lx)",
>> +                     __func__, bytes);
>> +        return -1;
>> +    }
>> +
>> +    /* Normally the fault queue is mmapped */
>> +    queue = (struct iommu_page_response
>> *)vdev->dma_fault_response_region.mmaps[0].mmap;
>> +    if (!queue) {
>> +        size_t queue_size = header.nb_entries * header.entry_size;
>> +
>> +        error_report("%s: fault queue not mmapped: slower fault handling",
>> +                     vdev->vbasedev.name);
>> +
>> +        queue_buffer = g_malloc(queue_size);
>> +        bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
>> +                      vdev->dma_fault_response_region.fd_offset +
>> header.offset);
>> +        if (bytes != queue_size) {
>> +            error_report("%s unable to read the fault queue (0x%lx)",
>> +                         __func__, bytes);
>> +            return -1;
>> +        }
>> +
>> +        queue = (struct iommu_page_response *)queue_buffer;
>> +    }
>> +    /* deposit the new response in the queue and increment the head */
>> +    memcpy(queue + header.head, response, header.entry_size);
>> +
>> +    vdev->fault_response_head_index =
>> +        (vdev->fault_response_head_index + 1) % header.nb_entries;
>> +    bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index,
>> 4,
>> +                   vdev->dma_fault_response_region.fd_offset);
>> +    if (bytes != 4) {
>> +        error_report("%s unable to write the fault response region head
>> index (0x%lx)",
>> +                     __func__, bytes);
>> +    }
>> +    g_free(queue_buffer);
>> +
>> +    return 0;
>> +}
>> +
>>  static PCIPASIDOps vfio_pci_pasid_ops = {
>>      .set_pasid_table = vfio_iommu_set_pasid_table,
>> +    .return_page_response = vfio_iommu_return_page_response,
>>  };
>>
>>  static void vfio_dma_fault_notifier_handler(void *opaque)
> 
> Also, I just noted that this patch breaks the dev hot add/del functionality.
> device_add works fine but device_del is not removing the dev cleanly.Thank you for reporting this!

The test matrix becomes bigger and bigger :-( I Need to write some
avocado-vt tests or alike.

I am currently working on the respin. At the moment I investigate the
DPDK issue that you reported and I was able to reproduce.

I intend to rebase on top of Jean-Philippe's
[PATCH v12 00/10] iommu: I/O page faults for SMMUv3

Is that good enough for your SVA integration or do you want I prepare a
rebase on some extended code?

Thanks

Eric
> 
> The below one fixes it. Please check.
> 
> Thanks,
> Shameer
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 797acd9c73..92c1d48316 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3470,6 +3470,7 @@ static void vfio_instance_finalize(Object *obj)
>      vfio_display_finalize(vdev);
>      vfio_bars_finalize(vdev);
>      vfio_region_finalize(&vdev->dma_fault_region);
> +    vfio_region_finalize(&vdev->dma_fault_response_region);
>      g_free(vdev->emulated_config_bits);
>      g_free(vdev->rom);
>      /*
> @@ -3491,6 +3492,7 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_err_notifier(vdev);
>      vfio_unregister_ext_irq_notifiers(vdev);
>      vfio_region_exit(&vdev->dma_fault_region);
> +    vfio_region_exit(&vdev->dma_fault_response_region);
>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>      if (vdev->irqchip_change_notifier.notify) {
>          kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_not
> 
> 
>
Shameerali Kolothum Thodi Feb. 18, 2021, 11:46 a.m. UTC | #3
Hi Eric,

> -----Original Message-----
> From: Auger Eric [mailto:eric.auger@redhat.com]
> Sent: 18 February 2021 10:42
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> eric.auger.pro@gmail.com; qemu-devel@nongnu.org; qemu-arm@nongnu.org;
> alex.williamson@redhat.com
> Cc: peter.maydell@linaro.org; jacob.jun.pan@linux.intel.com; Zengtao (B)
> <prime.zeng@hisilicon.com>; jean-philippe@linaro.org; tn@semihalf.com;
> peterx@redhat.com; nicoleotsuka@gmail.com; vivek.gautam@arm.com;
> yi.l.liu@intel.com; zhangfei.gao@gmail.com; yuzenghui
> <yuzenghui@huawei.com>; qubingbing <qubingbing@hisilicon.com>
> Subject: Re: [RFC v7 26/26] vfio/pci: Implement return_page_response page
> response callback
> 
[...]

> > Also, I just noted that this patch breaks the dev hot add/del functionality.
> > device_add works fine but device_del is not removing the dev cleanly.Thank
> you for reporting this!
> 
> The test matrix becomes bigger and bigger :-( I Need to write some
> avocado-vt tests or alike.
> 
> I am currently working on the respin. At the moment I investigate the
> DPDK issue that you reported and I was able to reproduce.

Ok. Good to know that it is reproducible.

> I intend to rebase on top of Jean-Philippe's
> [PATCH v12 00/10] iommu: I/O page faults for SMMUv3
> 
> Is that good enough for your SVA integration or do you want I prepare a
> rebase on some extended code?

Could you please try to base it on https://jpbrucker.net/git/linux/log/?h=sva/current

I think that has the latest from Jean-Philippe and will be easy to add
uacce/zip specific patches to test SVA/vSVA.

Thanks,
Shameer

 
> Thanks
> 
> Eric
> >
> > The below one fixes it. Please check.
> >
> > Thanks,
> > Shameer
> >
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 797acd9c73..92c1d48316 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -3470,6 +3470,7 @@ static void vfio_instance_finalize(Object *obj)
> >      vfio_display_finalize(vdev);
> >      vfio_bars_finalize(vdev);
> >      vfio_region_finalize(&vdev->dma_fault_region);
> > +    vfio_region_finalize(&vdev->dma_fault_response_region);
> >      g_free(vdev->emulated_config_bits);
> >      g_free(vdev->rom);
> >      /*
> > @@ -3491,6 +3492,7 @@ static void vfio_exitfn(PCIDevice *pdev)
> >      vfio_unregister_err_notifier(vdev);
> >      vfio_unregister_ext_irq_notifiers(vdev);
> >      vfio_region_exit(&vdev->dma_fault_region);
> > +    vfio_region_exit(&vdev->dma_fault_response_region);
> >      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
> >      if (vdev->irqchip_change_notifier.notify) {
> >
> kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_not
> >
> >
> >
Eric Auger Feb. 18, 2021, 1:32 p.m. UTC | #4
Hi Shameer,

On 2/18/21 12:46 PM, Shameerali Kolothum Thodi wrote:
> 
> Hi Eric,
> 
>> -----Original Message-----
>> From: Auger Eric [mailto:eric.auger@redhat.com]
>> Sent: 18 February 2021 10:42
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> eric.auger.pro@gmail.com; qemu-devel@nongnu.org; qemu-arm@nongnu.org;
>> alex.williamson@redhat.com
>> Cc: peter.maydell@linaro.org; jacob.jun.pan@linux.intel.com; Zengtao (B)
>> <prime.zeng@hisilicon.com>; jean-philippe@linaro.org; tn@semihalf.com;
>> peterx@redhat.com; nicoleotsuka@gmail.com; vivek.gautam@arm.com;
>> yi.l.liu@intel.com; zhangfei.gao@gmail.com; yuzenghui
>> <yuzenghui@huawei.com>; qubingbing <qubingbing@hisilicon.com>
>> Subject: Re: [RFC v7 26/26] vfio/pci: Implement return_page_response page
>> response callback
>>
> [...]
> 
>>> Also, I just noted that this patch breaks the dev hot add/del functionality.
>>> device_add works fine but device_del is not removing the dev cleanly.Thank
>> you for reporting this!
>>
>> The test matrix becomes bigger and bigger :-( I Need to write some
>> avocado-vt tests or alike.
>>
>> I am currently working on the respin. At the moment I investigate the
>> DPDK issue that you reported and I was able to reproduce.
> 
> Ok. Good to know that it is reproducible.
> 
>> I intend to rebase on top of Jean-Philippe's
>> [PATCH v12 00/10] iommu: I/O page faults for SMMUv3
>>
>> Is that good enough for your SVA integration or do you want I prepare a
>> rebase on some extended code?
> 
> Could you please try to base it on https://jpbrucker.net/git/linux/log/?h=sva/current

OK. At least I will provide a branch.

Eric
> 
> I think that has the latest from Jean-Philippe and will be easy to add
> uacce/zip specific patches to test SVA/vSVA.
> 
> Thanks,
> Shameer
> 
>  
>> Thanks
>>
>> Eric
>>>
>>> The below one fixes it. Please check.
>>>
>>> Thanks,
>>> Shameer
>>>
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index 797acd9c73..92c1d48316 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -3470,6 +3470,7 @@ static void vfio_instance_finalize(Object *obj)
>>>      vfio_display_finalize(vdev);
>>>      vfio_bars_finalize(vdev);
>>>      vfio_region_finalize(&vdev->dma_fault_region);
>>> +    vfio_region_finalize(&vdev->dma_fault_response_region);
>>>      g_free(vdev->emulated_config_bits);
>>>      g_free(vdev->rom);
>>>      /*
>>> @@ -3491,6 +3492,7 @@ static void vfio_exitfn(PCIDevice *pdev)
>>>      vfio_unregister_err_notifier(vdev);
>>>      vfio_unregister_ext_irq_notifiers(vdev);
>>>      vfio_region_exit(&vdev->dma_fault_region);
>>> +    vfio_region_exit(&vdev->dma_fault_response_region);
>>>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>>>      if (vdev->irqchip_change_notifier.notify) {
>>>
>> kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_not
>>>
>>>
>>>
>
Eric Auger Feb. 24, 2021, 1:43 p.m. UTC | #5
Hi Shameer,

On 2/18/21 2:32 PM, Auger Eric wrote:
> Hi Shameer,
> 
> On 2/18/21 12:46 PM, Shameerali Kolothum Thodi wrote:
>>
>> Hi Eric,
>>
>>> -----Original Message-----
>>> From: Auger Eric [mailto:eric.auger@redhat.com]
>>> Sent: 18 February 2021 10:42
>>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>>> eric.auger.pro@gmail.com; qemu-devel@nongnu.org; qemu-arm@nongnu.org;
>>> alex.williamson@redhat.com
>>> Cc: peter.maydell@linaro.org; jacob.jun.pan@linux.intel.com; Zengtao (B)
>>> <prime.zeng@hisilicon.com>; jean-philippe@linaro.org; tn@semihalf.com;
>>> peterx@redhat.com; nicoleotsuka@gmail.com; vivek.gautam@arm.com;
>>> yi.l.liu@intel.com; zhangfei.gao@gmail.com; yuzenghui
>>> <yuzenghui@huawei.com>; qubingbing <qubingbing@hisilicon.com>
>>> Subject: Re: [RFC v7 26/26] vfio/pci: Implement return_page_response page
>>> response callback
>>>
>> [...]
>>
>>>> Also, I just noted that this patch breaks the dev hot add/del functionality.
>>>> device_add works fine but device_del is not removing the dev cleanly.Thank
>>> you for reporting this!
>>>
>>> The test matrix becomes bigger and bigger :-( I Need to write some
>>> avocado-vt tests or alike.
>>>
>>> I am currently working on the respin. At the moment I investigate the
>>> DPDK issue that you reported and I was able to reproduce.
>>
>> Ok. Good to know that it is reproducible.
>>
>>> I intend to rebase on top of Jean-Philippe's
>>> [PATCH v12 00/10] iommu: I/O page faults for SMMUv3
>>>
>>> Is that good enough for your SVA integration or do you want I prepare a
>>> rebase on some extended code?
>>
>> Could you please try to base it on https://jpbrucker.net/git/linux/log/?h=sva/current
> 
> OK. At least I will provide a branch.

I sent the respin on top of master branch + Jean-Philippe's
[PATCH v12 00/10] iommu: I/O page faults for SMMUv3.
because I thought it makes more sense to post on master + some nearly
"ready to go" stuff.

Nevertheless I will do my best to prepare asap a branch based on Jean's
sva/current branch (based on 5.11-rc5)

Thanks

Eric



> 
> Eric
>>
>> I think that has the latest from Jean-Philippe and will be easy to add
>> uacce/zip specific patches to test SVA/vSVA.
>>
>> Thanks,
>> Shameer
>>
>>  
>>> Thanks
>>>
>>> Eric
>>>>
>>>> The below one fixes it. Please check.
>>>>
>>>> Thanks,
>>>> Shameer
>>>>
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index 797acd9c73..92c1d48316 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -3470,6 +3470,7 @@ static void vfio_instance_finalize(Object *obj)
>>>>      vfio_display_finalize(vdev);
>>>>      vfio_bars_finalize(vdev);
>>>>      vfio_region_finalize(&vdev->dma_fault_region);
>>>> +    vfio_region_finalize(&vdev->dma_fault_response_region);
>>>>      g_free(vdev->emulated_config_bits);
>>>>      g_free(vdev->rom);
>>>>      /*
>>>> @@ -3491,6 +3492,7 @@ static void vfio_exitfn(PCIDevice *pdev)
>>>>      vfio_unregister_err_notifier(vdev);
>>>>      vfio_unregister_ext_irq_notifiers(vdev);
>>>>      vfio_region_exit(&vdev->dma_fault_region);
>>>> +    vfio_region_exit(&vdev->dma_fault_response_region);
>>>>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>>>>      if (vdev->irqchip_change_notifier.notify) {
>>>>
>>> kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_not
>>>>
>>>>
>>>>
>>
Shameerali Kolothum Thodi Feb. 24, 2021, 3:38 p.m. UTC | #6
Hi Eric,

> -----Original Message-----
> From: Auger Eric [mailto:eric.auger@redhat.com]
> Sent: 24 February 2021 13:44
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> eric.auger.pro@gmail.com; qemu-devel@nongnu.org; qemu-arm@nongnu.org;
> alex.williamson@redhat.com
> Cc: peter.maydell@linaro.org; jacob.jun.pan@linux.intel.com;
> zhangfei.gao@gmail.com; jean-philippe@linaro.org; tn@semihalf.com;
> peterx@redhat.com; nicoleotsuka@gmail.com; vivek.gautam@arm.com;
> yi.l.liu@intel.com; Zengtao (B) <prime.zeng@hisilicon.com>; yuzenghui
> <yuzenghui@huawei.com>; qubingbing <qubingbing@hisilicon.com>
> Subject: Re: [RFC v7 26/26] vfio/pci: Implement return_page_response page
> response callback
> 
> Hi Shameer,
[...]
 
> I sent the respin on top of master branch + Jean-Philippe's
> [PATCH v12 00/10] iommu: I/O page faults for SMMUv3.
> because I thought it makes more sense to post on master + some nearly
> "ready to go" stuff.

Yes. I see that. Thanks for the respin. Will take a look at this soon.

> 
> Nevertheless I will do my best to prepare asap a branch based on Jean's
> sva/current branch (based on 5.11-rc5)

Ok.

Cheers,
Shameer
diff mbox series

Patch

diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 350e9e9005..ce0472611e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -147,6 +147,8 @@  struct VFIOPCIDevice {
     VFIOPCIExtIRQ *ext_irqs;
     VFIORegion dma_fault_region;
     uint32_t fault_tail_index;
+    VFIORegion dma_fault_response_region;
+    uint32_t fault_response_head_index;
     int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t vendor_id;
     uint32_t device_id;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 4e3495bb60..797acd9c73 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2631,6 +2631,61 @@  out:
     g_free(fault_region_info);
 }
 
+static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error **errp)
+{
+    struct vfio_region_info *fault_region_info = NULL;
+    struct vfio_region_info_cap_fault *cap_fault;
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct vfio_info_cap_header *hdr;
+    char *fault_region_name;
+    int ret;
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                                   VFIO_REGION_TYPE_NESTED,
+                                   VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE,
+                                   &fault_region_info);
+    if (ret) {
+        goto out;
+    }
+
+    hdr = vfio_get_region_info_cap(fault_region_info,
+                                   VFIO_REGION_INFO_CAP_DMA_FAULT);
+    if (!hdr) {
+        error_setg(errp, "failed to retrieve DMA FAULT RESPONSE capability");
+        goto out;
+    }
+    cap_fault = container_of(hdr, struct vfio_region_info_cap_fault,
+                             header);
+    if (cap_fault->version != 1) {
+        error_setg(errp, "Unsupported DMA FAULT RESPONSE API version %d",
+                   cap_fault->version);
+        goto out;
+    }
+
+    fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d",
+                                        vbasedev->name,
+                                        fault_region_info->index);
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                            &vdev->dma_fault_response_region,
+                            fault_region_info->index,
+                            fault_region_name);
+    g_free(fault_region_name);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "failed to set up the DMA FAULT RESPONSE region %d",
+                         fault_region_info->index);
+        goto out;
+    }
+
+    ret = vfio_region_mmap(&vdev->dma_fault_response_region);
+    if (ret) {
+        error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT RESPONSE queue");
+    }
+out:
+    g_free(fault_region_info);
+}
+
 static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2706,6 +2761,12 @@  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
         return;
     }
 
+    vfio_init_fault_response_regions(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
 
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
@@ -2884,8 +2945,68 @@  static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn,
     return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info);
 }
 
+static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn,
+                                           IOMMUPageResponse *resp)
+{
+    PCIDevice *pdev = bus->devices[devfn];
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    struct iommu_page_response *response = &resp->resp;
+    struct vfio_region_dma_fault_response header;
+    struct iommu_page_response *queue;
+    char *queue_buffer = NULL;
+    ssize_t bytes;
+
+    if (!vdev->dma_fault_response_region.mem) {
+        return -EINVAL;
+    }
+
+    /* read the header */
+    bytes = pread(vdev->vbasedev.fd, &header, sizeof(header),
+                  vdev->dma_fault_response_region.fd_offset);
+    if (bytes != sizeof(header)) {
+        error_report("%s unable to read the fault region header (0x%lx)",
+                     __func__, bytes);
+        return -1;
+    }
+
+    /* Normally the fault queue is mmapped */
+    queue = (struct iommu_page_response *)vdev->dma_fault_response_region.mmaps[0].mmap;
+    if (!queue) {
+        size_t queue_size = header.nb_entries * header.entry_size;
+
+        error_report("%s: fault queue not mmapped: slower fault handling",
+                     vdev->vbasedev.name);
+
+        queue_buffer = g_malloc(queue_size);
+        bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size,
+                      vdev->dma_fault_response_region.fd_offset + header.offset);
+        if (bytes != queue_size) {
+            error_report("%s unable to read the fault queue (0x%lx)",
+                         __func__, bytes);
+            return -1;
+        }
+
+        queue = (struct iommu_page_response *)queue_buffer;
+    }
+    /* deposit the new response in the queue and increment the head */
+    memcpy(queue + header.head, response, header.entry_size);
+
+    vdev->fault_response_head_index =
+        (vdev->fault_response_head_index + 1) % header.nb_entries;
+    bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index, 4,
+                   vdev->dma_fault_response_region.fd_offset);
+    if (bytes != 4) {
+        error_report("%s unable to write the fault response region head index (0x%lx)",
+                     __func__, bytes);
+    }
+    g_free(queue_buffer);
+
+    return 0;
+}
+
 static PCIPASIDOps vfio_pci_pasid_ops = {
     .set_pasid_table = vfio_iommu_set_pasid_table,
+    .return_page_response = vfio_iommu_return_page_response,
 };
 
 static void vfio_dma_fault_notifier_handler(void *opaque)