diff mbox series

[v13,2/2] vhost-vdpa: add support for vIOMMU

Message ID 20230208025722.2683024-3-lulu@redhat.com
State New
Headers show
Series vhost-vdpa: add support for vIOMMU | expand

Commit Message

Cindy Lu Feb. 8, 2023, 2:57 a.m. UTC
1.Add support for vIOMMU.
Add the new function to deal with iommu MR.
- during iommu_region_add register a specific IOMMU notifier,
  and store all notifiers in a list.
- during iommu_region_del, compare and delete the IOMMU notifier from the list
- since the SVQ not support iommu yet, add the check for IOMMU
  in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
  function will return fail.

2.Skip the check in vhost_vdpa_listener_skipped_section() while
MR is IOMMU, Move this check to  vhost_vdpa_iommu_map_notify()

Verified in vp_vdpa and vdpa_sim_net driver

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/virtio/vhost-vdpa.c         | 173 ++++++++++++++++++++++++++++++---
 include/hw/virtio/vhost-vdpa.h |  11 +++
 2 files changed, 173 insertions(+), 11 deletions(-)

Comments

Jason Wang March 6, 2023, 3:36 a.m. UTC | #1
在 2023/2/8 10:57, Cindy Lu 写道:
> 1.Add support for vIOMMU.
> Add the new function to deal with iommu MR.
> - during iommu_region_add register a specific IOMMU notifier,
>    and store all notifiers in a list.
> - during iommu_region_del, compare and delete the IOMMU notifier from the list
> - since the SVQ not support iommu yet, add the check for IOMMU
>    in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
>    function will return fail.
>
> 2.Skip the check in vhost_vdpa_listener_skipped_section() while
> MR is IOMMU, Move this check to  vhost_vdpa_iommu_map_notify()


This need some tweak as well, it's better not repeat what is done in the 
code but why do you need this change. More could be found at:

https://docs.kernel.org/process/submitting-patches.html#describe-your-changes


>
> Verified in vp_vdpa and vdpa_sim_net driver
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>   hw/virtio/vhost-vdpa.c         | 173 ++++++++++++++++++++++++++++++---
>   include/hw/virtio/vhost-vdpa.h |  11 +++
>   2 files changed, 173 insertions(+), 11 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 542e003101..46f676ab71 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -26,6 +26,7 @@
>   #include "cpu.h"
>   #include "trace.h"
>   #include "qapi/error.h"
> +#include "hw/virtio/virtio-access.h"
>   
>   /*
>    * Return one past the end of the end of section. Be careful with uint64_t
> @@ -60,15 +61,22 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
>                        iova_min, section->offset_within_address_space);
>           return true;
>       }
> +    /*
> +     * While using vIOMMU, Sometimes the section will be larger than iova_max
> +     * but the memory that  actually mapping is smaller, So skip the check
> +     * here. Will add the check in vhost_vdpa_iommu_map_notify,
> +     *There is the real size that maps to the kernel


Please tweak the comment, it has issues of whitespace, capitalization,  
punctuation marks.


> +     */
>   
> -    llend = vhost_vdpa_section_end(section);
> -    if (int128_gt(llend, int128_make64(iova_max))) {
> -        error_report("RAM section out of device range (max=0x%" PRIx64
> -                     ", end addr=0x%" PRIx64 ")",
> -                     iova_max, int128_get64(llend));
> -        return true;
> +    if (!memory_region_is_iommu(section->mr)) {


Note related to this patch but should we exclude non ram region here as 
well?


> +        llend = vhost_vdpa_section_end(section);
> +        if (int128_gt(llend, int128_make64(iova_max))) {
> +            error_report("RAM section out of device range (max=0x%" PRIx64
> +                         ", end addr=0x%" PRIx64 ")",
> +                         iova_max, int128_get64(llend));
> +            return true;
> +        }
>       }
> -
>       return false;
>   }
>   
> @@ -185,6 +193,118 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
>       v->iotlb_batch_begin_sent = false;
>   }
>   
> +static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> +    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
> +
> +    hwaddr iova = iotlb->iova + iommu->iommu_offset;
> +    struct vhost_vdpa *v = iommu->dev;
> +    void *vaddr;
> +    int ret;
> +    Int128 llend;
> +
> +    if (iotlb->target_as != &address_space_memory) {
> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> +        return;
> +    }
> +    RCU_READ_LOCK_GUARD();
> +    /* check if RAM section out of device range */
> +    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
> +    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
> +        error_report("RAM section out of device range (max=0x%" PRIx64
> +                     ", end addr=0x%" PRIx64 ")",
> +                     v->iova_range.last, int128_get64(llend));
> +        return;


Can you meet this condition? If yes, should we crop instead of fail here?


> +    }
> +
> +    vhost_vdpa_iotlb_batch_begin_once(v);


Where do we send the VHOST_IOTLB_BATCH_END message, or do we even need 
any batching here?


> +
> +    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> +        bool read_only;
> +
> +        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) {
> +            return;
> +        }
> +
> +        ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> +                                 iotlb->addr_mask + 1, vaddr, read_only);
> +        if (ret) {
> +            error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
> +                         "0x%" HWADDR_PRIx ", %p) = %d (%m)",
> +                         v, iova, iotlb->addr_mask + 1, vaddr, ret);
> +        }
> +    } else {
> +        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> +                                   iotlb->addr_mask + 1);
> +        if (ret) {
> +            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> +                         "0x%" HWADDR_PRIx ") = %d (%m)",
> +                         v, iova, iotlb->addr_mask + 1, ret);
> +        }
> +    }
> +}
> +
> +static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
> +                                        MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +
> +    struct vdpa_iommu *iommu;
> +    Int128 end;
> +    int iommu_idx;
> +    IOMMUMemoryRegion *iommu_mr;
> +    int ret;
> +
> +    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
> +
> +    iommu = g_malloc0(sizeof(*iommu));
> +    end = int128_add(int128_make64(section->offset_within_region),
> +                     section->size);
> +    end = int128_sub(end, int128_one());
> +    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
> +                                                   MEMTXATTRS_UNSPECIFIED);
> +    iommu->iommu_mr = iommu_mr;
> +    iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify,
> +                        IOMMU_NOTIFIER_IOTLB_EVENTS,
> +                        section->offset_within_region,
> +                        int128_get64(end),
> +                        iommu_idx);
> +    iommu->iommu_offset = section->offset_within_address_space -
> +                          section->offset_within_region;
> +    iommu->dev = v;
> +
> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> +    if (ret) {
> +        g_free(iommu);
> +        return;
> +    }
> +
> +    QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next);
> +    memory_region_iommu_replay(iommu->iommu_mr, &iommu->n);
> +
> +    return;
> +}
> +
> +static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
> +                                        MemoryRegionSection *section)
> +{
> +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> +
> +    struct vdpa_iommu *iommu;
> +
> +    QLIST_FOREACH(iommu, &v->iommu_list, iommu_next)
> +    {
> +        if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
> +            iommu->n.start == section->offset_within_region) {
> +            memory_region_unregister_iommu_notifier(section->mr, &iommu->n);
> +            QLIST_REMOVE(iommu, iommu_next);
> +            g_free(iommu);
> +            break;
> +        }
> +    }
> +}
> +
>   static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>                                              MemoryRegionSection *section)
>   {
> @@ -199,6 +319,10 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
>                                               v->iova_range.last)) {
>           return;
>       }
> +    if (memory_region_is_iommu(section->mr)) {
> +        vhost_vdpa_iommu_region_add(listener, section);
> +        return;
> +    }
>   
>       if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>                    (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> @@ -278,6 +402,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>                                               v->iova_range.last)) {
>           return;
>       }
> +    if (memory_region_is_iommu(section->mr)) {
> +        vhost_vdpa_iommu_region_del(listener, section);
> +    }
>   
>       if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
>                    (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> @@ -288,7 +415,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>       iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>       llend = vhost_vdpa_section_end(section);
>   
> -    trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
> +    trace_vhost_vdpa_listener_region_del(v, iova,
> +        int128_get64(int128_sub(llend, int128_one())));


Seems like an independent fix?


>   
>       if (int128_ge(int128_make64(iova), llend)) {
>           return;
> @@ -315,10 +443,28 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
>           vhost_iova_tree_remove(v->iova_tree, *result);
>       }
>       vhost_vdpa_iotlb_batch_begin_once(v);
> +    /*
> +     * The unmap ioctl doesn't accept a full 64-bit. need to check it
> +     */
> +    if (int128_eq(llsize, int128_2_64())) {
> +        llsize = int128_rshift(llsize, 1);
> +        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> +                                   int128_get64(llsize));
> +
> +        if (ret) {
> +            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> +                         "0x%" HWADDR_PRIx ") = %d (%m)",
> +                         v, iova, int128_get64(llsize), ret);
> +        }
> +        iova += int128_get64(llsize);
> +    }
>       ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
>                                  int128_get64(llsize));
> +
>       if (ret) {
> -        error_report("vhost_vdpa dma unmap error!");
> +        error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> +                     "0x%" HWADDR_PRIx ") = %d (%m)",
> +                     v, iova, int128_get64(llsize), ret);
>       }


Let's use a separate patch to fix this full 64-bit issue.


>   
>       memory_region_unref(section->mr);
> @@ -578,7 +724,6 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
>       v = dev->opaque;
>       trace_vhost_vdpa_cleanup(dev, v);
>       vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
> -    memory_listener_unregister(&v->listener);


Any reason for not unregistering listener here?

Thanks


>       vhost_vdpa_svq_cleanup(dev);
>   
>       dev->opaque = NULL;
> @@ -1130,7 +1275,13 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>       }
>   
>       if (started) {
> -        memory_listener_register(&v->listener, &address_space_memory);
> +        if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) {
> +            error_report("the SVQ can not wortk in with IOMMU enable, please "
> +                         "disable IOMMU and try again");
> +            return -1;
> +        }
> +        memory_listener_register(&v->listener, dev->vdev->dma_as);
> +
>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>       } else {
>           vhost_vdpa_reset_device(dev);
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 7997f09a8d..54cede86dc 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -49,6 +49,8 @@ typedef struct vhost_vdpa {
>       void *shadow_vq_ops_opaque;
>       struct vhost_dev *dev;
>       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> +    QLIST_HEAD(, vdpa_iommu) iommu_list;
> +    IOMMUNotifier n;
>   } VhostVDPA;
>   
>   int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range);
> @@ -58,4 +60,13 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
>   int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
>                            hwaddr size);
>   
> +typedef struct vdpa_iommu {
> +    struct vhost_vdpa *dev;
> +    IOMMUMemoryRegion *iommu_mr;
> +    hwaddr iommu_offset;
> +    IOMMUNotifier n;
> +    QLIST_ENTRY(vdpa_iommu) iommu_next;
> +} VDPAIOMMUState;
> +
> +
>   #endif
Cindy Lu March 15, 2023, 5:38 a.m. UTC | #2
On Mon, Mar 6, 2023 at 11:36 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/2/8 10:57, Cindy Lu 写道:
> > 1.Add support for vIOMMU.
> > Add the new function to deal with iommu MR.
> > - during iommu_region_add register a specific IOMMU notifier,
> >    and store all notifiers in a list.
> > - during iommu_region_del, compare and delete the IOMMU notifier from the list
> > - since the SVQ not support iommu yet, add the check for IOMMU
> >    in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
> >    function will return fail.
> >
> > 2.Skip the check in vhost_vdpa_listener_skipped_section() while
> > MR is IOMMU, Move this check to  vhost_vdpa_iommu_map_notify()
>
>
> This need some tweak as well, it's better not repeat what is done in the
> code but why do you need this change. More could be found at:
>
> https://docs.kernel.org/process/submitting-patches.html#describe-your-changes
>
sure, will change this
>
> >
> > Verified in vp_vdpa and vdpa_sim_net driver
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >   hw/virtio/vhost-vdpa.c         | 173 ++++++++++++++++++++++++++++++---
> >   include/hw/virtio/vhost-vdpa.h |  11 +++
> >   2 files changed, 173 insertions(+), 11 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 542e003101..46f676ab71 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -26,6 +26,7 @@
> >   #include "cpu.h"
> >   #include "trace.h"
> >   #include "qapi/error.h"
> > +#include "hw/virtio/virtio-access.h"
> >
> >   /*
> >    * Return one past the end of the end of section. Be careful with uint64_t
> > @@ -60,15 +61,22 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
> >                        iova_min, section->offset_within_address_space);
> >           return true;
> >       }
> > +    /*
> > +     * While using vIOMMU, Sometimes the section will be larger than iova_max
> > +     * but the memory that  actually mapping is smaller, So skip the check
> > +     * here. Will add the check-in vhost_vdpa_iommu_map_notify,
> > +     *There is the real size that maps to the kernel
>
>
> Please tweak the comment, it has issues of whitespace, capitalization,
> punctuation marks.
>
sure will change this
>
> > +     */
> >
> > -    llend = vhost_vdpa_section_end(section);
> > -    if (int128_gt(llend, int128_make64(iova_max))) {
> > -        error_report("RAM section out of device range (max=0x%" PRIx64
> > -                     ", end addr=0x%" PRIx64 ")",
> > -                     iova_max, int128_get64(llend));
> > -        return true;
> > +    if (!memory_region_is_iommu(section->mr)) {
>
>
> Note related to this patch but should we exclude non ram region here as
> well?
>
Sure, will add this check

>
> > +        llend = vhost_vdpa_section_end(section);
> > +        if (int128_gt(llend, int128_make64(iova_max))) {
> > +            error_report("RAM section out of device range (max=0x%" PRIx64
> > +                         ", end addr=0x%" PRIx64 ")",
> > +                         iova_max, int128_get64(llend));
> > +            return true;
> > +        }
> >       }
> > -
> >       return false;
> >   }
> >
> > @@ -185,6 +193,118 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
> >       v->iotlb_batch_begin_sent = false;
> >   }
> >
> > +static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > +{
> > +    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
> > +
> > +    hwaddr iova = iotlb->iova + iommu->iommu_offset;
> > +    struct vhost_vdpa *v = iommu->dev;
> > +    void *vaddr;
> > +    int ret;
> > +    Int128 llend;
> > +
> > +    if (iotlb->target_as != &address_space_memory) {
> > +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > +        return;
> > +    }
> > +    RCU_READ_LOCK_GUARD();
> > +    /* check if RAM section out of device range */
> > +    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
> > +    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
> > +        error_report("RAM section out of device range (max=0x%" PRIx64
> > +                     ", end addr=0x%" PRIx64 ")",
> > +                     v->iova_range.last, int128_get64(llend));
> > +        return;
>
>
> Can you meet this condition? If yes, should we crop instead of fail here?
>
Based on my test, we didn't meet this condition. so just put an error
report here.

>
> > +    }
> > +
> > +    vhost_vdpa_iotlb_batch_begin_once(v);
>
>
> Where do we send the VHOST_IOTLB_BATCH_END message, or do we even need
> any batching here?
>
the VHOST_IOTLB_BATCH_END message was send by
vhost_vdpa_listener_commit, because we only use
one vhost_vdpa_memory_listener and no-iommu mode will also need to use
this listener, So we still need to add the batch begin here, based on
my testing after the notify function was called,  the listener_commit
function was also called .so it works well in this situation
>
> > +
> > +    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> > +        bool read_only;
> > +
> > +        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) {
> > +            return;
> > +        }
> > +
> > +        ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> > +                                 iotlb->addr_mask + 1, vaddr, read_only);
> > +        if (ret) {
> > +            error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
> > +                         "0x%" HWADDR_PRIx ", %p) = %d (%m)",
> > +                         v, iova, iotlb->addr_mask + 1, vaddr, ret);
> > +        }
> > +    } else {
> > +        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> > +                                   iotlb->addr_mask + 1);
> > +        if (ret) {
> > +            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> > +                         "0x%" HWADDR_PRIx ") = %d (%m)",
> > +                         v, iova, iotlb->addr_mask + 1, ret);
> > +        }
> > +    }
> > +}
> > +
> > +static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
> > +                                        MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> > +
> > +    struct vdpa_iommu *iommu;
> > +    Int128 end;
> > +    int iommu_idx;
> > +    IOMMUMemoryRegion *iommu_mr;
> > +    int ret;
> > +
> > +    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
> > +
> > +    iommu = g_malloc0(sizeof(*iommu));
> > +    end = int128_add(int128_make64(section->offset_within_region),
> > +                     section->size);
> > +    end = int128_sub(end, int128_one());
> > +    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
> > +                                                   MEMTXATTRS_UNSPECIFIED);
> > +    iommu->iommu_mr = iommu_mr;
> > +    iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify,
> > +                        IOMMU_NOTIFIER_IOTLB_EVENTS,
> > +                        section->offset_within_region,
> > +                        int128_get64(end),
> > +                        iommu_idx);
> > +    iommu->iommu_offset = section->offset_within_address_space -
> > +                          section->offset_within_region;
> > +    iommu->dev = v;
> > +
> > +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> > +    if (ret) {
> > +        g_free(iommu);
> > +        return;
> > +    }
> > +
> > +    QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next);
> > +    memory_region_iommu_replay(iommu->iommu_mr, &iommu->n);
> > +
> > +    return;
> > +}
> > +
> > +static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
> > +                                        MemoryRegionSection *section)
> > +{
> > +    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
> > +
> > +    struct vdpa_iommu *iommu;
> > +
> > +    QLIST_FOREACH(iommu, &v->iommu_list, iommu_next)
> > +    {
> > +        if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
> > +            iommu->n.start == section->offset_within_region) {
> > +            memory_region_unregister_iommu_notifier(section->mr, &iommu->n);
> > +            QLIST_REMOVE(iommu, iommu_next);
> > +            g_free(iommu);
> > +            break;
> > +        }
> > +    }
> > +}
> > +
> >   static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> >                                              MemoryRegionSection *section)
> >   {
> > @@ -199,6 +319,10 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> >                                               v->iova_range.last)) {
> >           return;
> >       }
> > +    if (memory_region_is_iommu(section->mr)) {
> > +        vhost_vdpa_iommu_region_add(listener, section);
> > +        return;
> > +    }
> >
> >       if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> >                    (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > @@ -278,6 +402,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >                                               v->iova_range.last)) {
> >           return;
> >       }
> > +    if (memory_region_is_iommu(section->mr)) {
> > +        vhost_vdpa_iommu_region_del(listener, section);
> > +    }
> >
> >       if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
> >                    (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> > @@ -288,7 +415,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >       iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> >       llend = vhost_vdpa_section_end(section);
> >
> > -    trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
> > +    trace_vhost_vdpa_listener_region_del(v, iova,
> > +        int128_get64(int128_sub(llend, int128_one())));
>
>
> Seems like an independent fix?
>
sure, will move this to another patch
>
> >
> >       if (int128_ge(int128_make64(iova), llend)) {
> >           return;
> > @@ -315,10 +443,28 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
> >           vhost_iova_tree_remove(v->iova_tree, *result);
> >       }
> >       vhost_vdpa_iotlb_batch_begin_once(v);
> > +    /*
> > +     * The unmap ioctl doesn't accept a full 64-bit. need to check it
> > +     */
> > +    if (int128_eq(llsize, int128_2_64())) {
> > +        llsize = int128_rshift(llsize, 1);
> > +        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> > +                                   int128_get64(llsize));
> > +
> > +        if (ret) {
> > +            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> > +                         "0x%" HWADDR_PRIx ") = %d (%m)",
> > +                         v, iova, int128_get64(llsize), ret);
> > +        }
> > +        iova += int128_get64(llsize);
> > +    }
> >       ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
> >                                  int128_get64(llsize));
> > +
> >       if (ret) {
> > -        error_report("vhost_vdpa dma unmap error!");
> > +        error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
> > +                     "0x%" HWADDR_PRIx ") = %d (%m)",
> > +                     v, iova, int128_get64(llsize), ret);
> >       }
>
>
> Let's use a separate patch to fix this full 64-bit issue.
>
sure will do
>
> >
> >       memory_region_unref(section->mr);
> > @@ -578,7 +724,6 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
> >       v = dev->opaque;
> >       trace_vhost_vdpa_cleanup(dev, v);
> >       vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
> > -    memory_listener_unregister(&v->listener);
>
>
> Any reason for not unregistering listener here?
>
sure, will fix this
> Thanks
>
>
> >       vhost_vdpa_svq_cleanup(dev);
> >
> >       dev->opaque = NULL;
> > @@ -1130,7 +1275,13 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >       }
> >
> >       if (started) {
> > -        memory_listener_register(&v->listener, &address_space_memory);
> > +        if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) {
> > +            error_report("the SVQ can not wortk in with IOMMU enable, please "
> > +                         "disable IOMMU and try again");
> > +            return -1;
> > +        }
> > +        memory_listener_register(&v->listener, dev->vdev->dma_as);
> > +
> >           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> >       } else {
> >           vhost_vdpa_reset_device(dev);
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index 7997f09a8d..54cede86dc 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -49,6 +49,8 @@ typedef struct vhost_vdpa {
> >       void *shadow_vq_ops_opaque;
> >       struct vhost_dev *dev;
> >       VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> > +    QLIST_HEAD(, vdpa_iommu) iommu_list;
> > +    IOMMUNotifier n;
> >   } VhostVDPA;
> >
> >   int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range);
> > @@ -58,4 +60,13 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> >   int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> >                            hwaddr size);
> >
> > +typedef struct vdpa_iommu {
> > +    struct vhost_vdpa *dev;
> > +    IOMMUMemoryRegion *iommu_mr;
> > +    hwaddr iommu_offset;
> > +    IOMMUNotifier n;
> > +    QLIST_ENTRY(vdpa_iommu) iommu_next;
> > +} VDPAIOMMUState;
> > +
> > +
> >   #endif
>
diff mbox series

Patch

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 542e003101..46f676ab71 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -26,6 +26,7 @@ 
 #include "cpu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
 
 /*
  * Return one past the end of the end of section. Be careful with uint64_t
@@ -60,15 +61,22 @@  static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
                      iova_min, section->offset_within_address_space);
         return true;
     }
+    /*
+     * While using vIOMMU, Sometimes the section will be larger than iova_max
+     * but the memory that  actually mapping is smaller, So skip the check
+     * here. Will add the check in vhost_vdpa_iommu_map_notify,
+     *There is the real size that maps to the kernel
+     */
 
-    llend = vhost_vdpa_section_end(section);
-    if (int128_gt(llend, int128_make64(iova_max))) {
-        error_report("RAM section out of device range (max=0x%" PRIx64
-                     ", end addr=0x%" PRIx64 ")",
-                     iova_max, int128_get64(llend));
-        return true;
+    if (!memory_region_is_iommu(section->mr)) {
+        llend = vhost_vdpa_section_end(section);
+        if (int128_gt(llend, int128_make64(iova_max))) {
+            error_report("RAM section out of device range (max=0x%" PRIx64
+                         ", end addr=0x%" PRIx64 ")",
+                         iova_max, int128_get64(llend));
+            return true;
+        }
     }
-
     return false;
 }
 
@@ -185,6 +193,118 @@  static void vhost_vdpa_listener_commit(MemoryListener *listener)
     v->iotlb_batch_begin_sent = false;
 }
 
+static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
+
+    hwaddr iova = iotlb->iova + iommu->iommu_offset;
+    struct vhost_vdpa *v = iommu->dev;
+    void *vaddr;
+    int ret;
+    Int128 llend;
+
+    if (iotlb->target_as != &address_space_memory) {
+        error_report("Wrong target AS \"%s\", only system memory is allowed",
+                     iotlb->target_as->name ? iotlb->target_as->name : "none");
+        return;
+    }
+    RCU_READ_LOCK_GUARD();
+    /* check if RAM section out of device range */
+    llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
+    if (int128_gt(llend, int128_make64(v->iova_range.last))) {
+        error_report("RAM section out of device range (max=0x%" PRIx64
+                     ", end addr=0x%" PRIx64 ")",
+                     v->iova_range.last, int128_get64(llend));
+        return;
+    }
+
+    vhost_vdpa_iotlb_batch_begin_once(v);
+
+    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
+        bool read_only;
+
+        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) {
+            return;
+        }
+
+        ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+                                 iotlb->addr_mask + 1, vaddr, read_only);
+        if (ret) {
+            error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
+                         "0x%" HWADDR_PRIx ", %p) = %d (%m)",
+                         v, iova, iotlb->addr_mask + 1, vaddr, ret);
+        }
+    } else {
+        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+                                   iotlb->addr_mask + 1);
+        if (ret) {
+            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+                         "0x%" HWADDR_PRIx ") = %d (%m)",
+                         v, iova, iotlb->addr_mask + 1, ret);
+        }
+    }
+}
+
+static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
+                                        MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+    struct vdpa_iommu *iommu;
+    Int128 end;
+    int iommu_idx;
+    IOMMUMemoryRegion *iommu_mr;
+    int ret;
+
+    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+
+    iommu = g_malloc0(sizeof(*iommu));
+    end = int128_add(int128_make64(section->offset_within_region),
+                     section->size);
+    end = int128_sub(end, int128_one());
+    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                   MEMTXATTRS_UNSPECIFIED);
+    iommu->iommu_mr = iommu_mr;
+    iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify,
+                        IOMMU_NOTIFIER_IOTLB_EVENTS,
+                        section->offset_within_region,
+                        int128_get64(end),
+                        iommu_idx);
+    iommu->iommu_offset = section->offset_within_address_space -
+                          section->offset_within_region;
+    iommu->dev = v;
+
+    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
+    if (ret) {
+        g_free(iommu);
+        return;
+    }
+
+    QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next);
+    memory_region_iommu_replay(iommu->iommu_mr, &iommu->n);
+
+    return;
+}
+
+static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
+                                        MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+    struct vdpa_iommu *iommu;
+
+    QLIST_FOREACH(iommu, &v->iommu_list, iommu_next)
+    {
+        if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
+            iommu->n.start == section->offset_within_region) {
+            memory_region_unregister_iommu_notifier(section->mr, &iommu->n);
+            QLIST_REMOVE(iommu, iommu_next);
+            g_free(iommu);
+            break;
+        }
+    }
+}
+
 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
                                            MemoryRegionSection *section)
 {
@@ -199,6 +319,10 @@  static void vhost_vdpa_listener_region_add(MemoryListener *listener,
                                             v->iova_range.last)) {
         return;
     }
+    if (memory_region_is_iommu(section->mr)) {
+        vhost_vdpa_iommu_region_add(listener, section);
+        return;
+    }
 
     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
@@ -278,6 +402,9 @@  static void vhost_vdpa_listener_region_del(MemoryListener *listener,
                                             v->iova_range.last)) {
         return;
     }
+    if (memory_region_is_iommu(section->mr)) {
+        vhost_vdpa_iommu_region_del(listener, section);
+    }
 
     if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
                  (section->offset_within_region & ~TARGET_PAGE_MASK))) {
@@ -288,7 +415,8 @@  static void vhost_vdpa_listener_region_del(MemoryListener *listener,
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
     llend = vhost_vdpa_section_end(section);
 
-    trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
+    trace_vhost_vdpa_listener_region_del(v, iova,
+        int128_get64(int128_sub(llend, int128_one())));
 
     if (int128_ge(int128_make64(iova), llend)) {
         return;
@@ -315,10 +443,28 @@  static void vhost_vdpa_listener_region_del(MemoryListener *listener,
         vhost_iova_tree_remove(v->iova_tree, *result);
     }
     vhost_vdpa_iotlb_batch_begin_once(v);
+    /*
+     * The unmap ioctl doesn't accept a full 64-bit. need to check it
+     */
+    if (int128_eq(llsize, int128_2_64())) {
+        llsize = int128_rshift(llsize, 1);
+        ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+                                   int128_get64(llsize));
+
+        if (ret) {
+            error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+                         "0x%" HWADDR_PRIx ") = %d (%m)",
+                         v, iova, int128_get64(llsize), ret);
+        }
+        iova += int128_get64(llsize);
+    }
     ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
                                int128_get64(llsize));
+
     if (ret) {
-        error_report("vhost_vdpa dma unmap error!");
+        error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+                     "0x%" HWADDR_PRIx ") = %d (%m)",
+                     v, iova, int128_get64(llsize), ret);
     }
 
     memory_region_unref(section->mr);
@@ -578,7 +724,6 @@  static int vhost_vdpa_cleanup(struct vhost_dev *dev)
     v = dev->opaque;
     trace_vhost_vdpa_cleanup(dev, v);
     vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
-    memory_listener_unregister(&v->listener);
     vhost_vdpa_svq_cleanup(dev);
 
     dev->opaque = NULL;
@@ -1130,7 +1275,13 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
     }
 
     if (started) {
-        memory_listener_register(&v->listener, &address_space_memory);
+        if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) {
+            error_report("the SVQ can not wortk in with IOMMU enable, please "
+                         "disable IOMMU and try again");
+            return -1;
+        }
+        memory_listener_register(&v->listener, dev->vdev->dma_as);
+
         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
     } else {
         vhost_vdpa_reset_device(dev);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7997f09a8d..54cede86dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -49,6 +49,8 @@  typedef struct vhost_vdpa {
     void *shadow_vq_ops_opaque;
     struct vhost_dev *dev;
     VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
+    QLIST_HEAD(, vdpa_iommu) iommu_list;
+    IOMMUNotifier n;
 } VhostVDPA;
 
 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range);
@@ -58,4 +60,13 @@  int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
 int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
                          hwaddr size);
 
+typedef struct vdpa_iommu {
+    struct vhost_vdpa *dev;
+    IOMMUMemoryRegion *iommu_mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(vdpa_iommu) iommu_next;
+} VDPAIOMMUState;
+
+
 #endif