diff mbox

[v7,02/17] vfio: introduce vfio_get_vaddr()

Message ID 1486456099-7345-3-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu Feb. 7, 2017, 8:28 a.m. UTC
A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
the operation is unmap, but it won't hurt much.

One thing to mention is that we need the RCU read lock to protect the
whole translation and map/unmap procedure.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/vfio/common.c | 65 +++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 20 deletions(-)

Comments

David Gibson Feb. 10, 2017, 1:12 a.m. UTC | #1
On Tue, Feb 07, 2017 at 04:28:04PM +0800, Peter Xu wrote:
> A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
> the operation is unmap, but it won't hurt much.
> 
> One thing to mention is that we need the RCU read lock to protect the
> whole translation and map/unmap procedure.
> 
> Acked-by: Alex Williamson <alex.williamson@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Peter Xu <peterx@redhat.com>

So, I know I reviewed this already, but looking again I'm confused.

I'm not sure how the original code ever worked: if this is an unmap
(perm == IOMMU_NONE), then I wouldn't even expect
iotlb->translated_addr to have a valid value, but we're passing it to
address_space_translate() and failing if it it doesn't give us
sensible results.

> ---
>  hw/vfio/common.c | 65 +++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 45 insertions(+), 20 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 174f351..42c4790 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -294,54 +294,79 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>             section->offset_within_address_space & (1ULL << 63);
>  }
>  
> -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +/* Called with rcu_read_lock held.  */
> +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
> +                           bool *read_only)
>  {
> -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> -    VFIOContainer *container = giommu->container;
> -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
>      MemoryRegion *mr;
>      hwaddr xlat;
>      hwaddr len = iotlb->addr_mask + 1;
> -    void *vaddr;
> -    int ret;
> -
> -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> -                                iova, iova + iotlb->addr_mask);
> -
> -    if (iotlb->target_as != &address_space_memory) {
> -        error_report("Wrong target AS \"%s\", only system memory is allowed",
> -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> -        return;
> -    }
> +    bool writable = iotlb->perm & IOMMU_WO;
>  
>      /*
>       * The IOMMU TLB entry we have just covers translation through
>       * this IOMMU to its immediate target.  We need to translate
>       * it the rest of the way through to memory.
>       */
> -    rcu_read_lock();
>      mr = address_space_translate(&address_space_memory,
>                                   iotlb->translated_addr,
> -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> +                                 &xlat, &len, writable);
>      if (!memory_region_is_ram(mr)) {
>          error_report("iommu map to non memory area %"HWADDR_PRIx"",
>                       xlat);
> -        goto out;
> +        return false;
>      }
> +
>      /*
>       * Translation truncates length to the IOMMU page size,
>       * check that it did not truncate too much.
>       */
>      if (len & iotlb->addr_mask) {
>          error_report("iommu has granularity incompatible with target AS");
> +        return false;
> +    }
> +
> +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +    *read_only = !writable || mr->readonly;
> +
> +    return true;
> +}
> +
> +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> +    VFIOContainer *container = giommu->container;
> +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> +    bool read_only;
> +    void *vaddr;
> +    int ret;
> +
> +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> +                                iova, iova + iotlb->addr_mask);
> +
> +    if (iotlb->target_as != &address_space_memory) {
> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> +        return;
> +    }
> +
> +    rcu_read_lock();
> +
> +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
>          goto out;
>      }
>  
>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +        /*
> +         * vaddr is only valid until rcu_read_unlock(). But after
> +         * vfio_dma_map has set up the mapping the pages will be
> +         * pinned by the kernel. This makes sure that the RAM backend
> +         * of vaddr will always be there, even if the memory object is
> +         * destroyed and its backing memory munmap-ed.
> +         */
>          ret = vfio_dma_map(container, iova,
>                             iotlb->addr_mask + 1, vaddr,
> -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> +                           read_only);
>          if (ret) {
>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
Peter Xu Feb. 10, 2017, 5:50 a.m. UTC | #2
On Fri, Feb 10, 2017 at 12:12:22PM +1100, David Gibson wrote:
> On Tue, Feb 07, 2017 at 04:28:04PM +0800, Peter Xu wrote:
> > A cleanup for vfio_iommu_map_notify(). Now we will fetch vaddr even if
> > the operation is unmap, but it won't hurt much.
> > 
> > One thing to mention is that we need the RCU read lock to protect the
> > whole translation and map/unmap procedure.
> > 
> > Acked-by: Alex Williamson <alex.williamson@redhat.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> So, I know I reviewed this already, but looking again I'm confused.
> 
> I'm not sure how the original code ever worked: if this is an unmap
> (perm == IOMMU_NONE), then I wouldn't even expect
> iotlb->translated_addr to have a valid value, but we're passing it to
> address_space_translate() and failing if it it doesn't give us
> sensible results.

Hmm, right.

Looks like it is just because we have accidentally inited
iotlb->translated_addr in all the callers of
memory_region_notify_iommu (one is put_tce_emu(), the other one is
rpcit_service_call()). If so, patch 3 (maybe, along with this one)
would be more essential imho to make sure we don't have such an
assumption.

Thanks,

-- peterx
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 174f351..42c4790 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -294,54 +294,79 @@  static bool vfio_listener_skipped_section(MemoryRegionSection *section)
            section->offset_within_address_space & (1ULL << 63);
 }
 
-static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+/* Called with rcu_read_lock held.  */
+static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
+                           bool *read_only)
 {
-    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
-    VFIOContainer *container = giommu->container;
-    hwaddr iova = iotlb->iova + giommu->iommu_offset;
     MemoryRegion *mr;
     hwaddr xlat;
     hwaddr len = iotlb->addr_mask + 1;
-    void *vaddr;
-    int ret;
-
-    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
-                                iova, iova + iotlb->addr_mask);
-
-    if (iotlb->target_as != &address_space_memory) {
-        error_report("Wrong target AS \"%s\", only system memory is allowed",
-                     iotlb->target_as->name ? iotlb->target_as->name : "none");
-        return;
-    }
+    bool writable = iotlb->perm & IOMMU_WO;
 
     /*
      * The IOMMU TLB entry we have just covers translation through
      * this IOMMU to its immediate target.  We need to translate
      * it the rest of the way through to memory.
      */
-    rcu_read_lock();
     mr = address_space_translate(&address_space_memory,
                                  iotlb->translated_addr,
-                                 &xlat, &len, iotlb->perm & IOMMU_WO);
+                                 &xlat, &len, writable);
     if (!memory_region_is_ram(mr)) {
         error_report("iommu map to non memory area %"HWADDR_PRIx"",
                      xlat);
-        goto out;
+        return false;
     }
+
     /*
      * Translation truncates length to the IOMMU page size,
      * check that it did not truncate too much.
      */
     if (len & iotlb->addr_mask) {
         error_report("iommu has granularity incompatible with target AS");
+        return false;
+    }
+
+    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    *read_only = !writable || mr->readonly;
+
+    return true;
+}
+
+static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+    VFIOContainer *container = giommu->container;
+    hwaddr iova = iotlb->iova + giommu->iommu_offset;
+    bool read_only;
+    void *vaddr;
+    int ret;
+
+    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
+                                iova, iova + iotlb->addr_mask);
+
+    if (iotlb->target_as != &address_space_memory) {
+        error_report("Wrong target AS \"%s\", only system memory is allowed",
+                     iotlb->target_as->name ? iotlb->target_as->name : "none");
+        return;
+    }
+
+    rcu_read_lock();
+
+    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
         goto out;
     }
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        vaddr = memory_region_get_ram_ptr(mr) + xlat;
+        /*
+         * vaddr is only valid until rcu_read_unlock(). But after
+         * vfio_dma_map has set up the mapping the pages will be
+         * pinned by the kernel. This makes sure that the RAM backend
+         * of vaddr will always be there, even if the memory object is
+         * destroyed and its backing memory munmap-ed.
+         */
         ret = vfio_dma_map(container, iova,
                            iotlb->addr_mask + 1, vaddr,
-                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
+                           read_only);
         if (ret) {
             error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx", %p) = %d (%m)",