diff mbox

[v5,02/18] vfio: introduce vfio_get_vaddr()

Message ID 1485253571-19058-3-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu Jan. 24, 2017, 10:25 a.m. UTC
A cleanup for vfio_iommu_map_notify(). Should have no functional change,
just to make the function shorter and easier to understand.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 20 deletions(-)

Comments

Alex Williamson Jan. 24, 2017, 4:29 p.m. UTC | #1
On Tue, 24 Jan 2017 18:25:55 +0800
Peter Xu <peterx@redhat.com> wrote:

> A cleanup for vfio_iommu_map_notify(). Should have no functional change,
> just to make the function shorter and easier to understand.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 38 insertions(+), 20 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 174f351..ce55dff 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -294,25 +294,14 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>             section->offset_within_address_space & (1ULL << 63);
>  }
>  
> -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
> +                           bool *read_only)
>  {
> -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> -    VFIOContainer *container = giommu->container;
> -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
>      MemoryRegion *mr;
>      hwaddr xlat;
>      hwaddr len = iotlb->addr_mask + 1;
> -    void *vaddr;
> -    int ret;
> -
> -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> -                                iova, iova + iotlb->addr_mask);
> -
> -    if (iotlb->target_as != &address_space_memory) {
> -        error_report("Wrong target AS \"%s\", only system memory is allowed",
> -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> -        return;
> -    }
> +    bool ret = false;
> +    bool writable = iotlb->perm & IOMMU_WO;
>  
>      /*
>       * The IOMMU TLB entry we have just covers translation through
> @@ -322,12 +311,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>      rcu_read_lock();
>      mr = address_space_translate(&address_space_memory,
>                                   iotlb->translated_addr,
> -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> +                                 &xlat, &len, writable);
>      if (!memory_region_is_ram(mr)) {
>          error_report("iommu map to non memory area %"HWADDR_PRIx"",
>                       xlat);
>          goto out;
>      }
> +
>      /*
>       * Translation truncates length to the IOMMU page size,
>       * check that it did not truncate too much.
> @@ -337,11 +327,41 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>          goto out;
>      }
>  
> +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +    *read_only = !writable || mr->readonly;
> +    ret = true;
> +
> +out:
> +    rcu_read_unlock();
> +    return ret;
> +}
> +
> +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> +    VFIOContainer *container = giommu->container;
> +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> +    bool read_only;
> +    void *vaddr;
> +    int ret;
> +
> +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> +                                iova, iova + iotlb->addr_mask);
> +
> +    if (iotlb->target_as != &address_space_memory) {
> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> +        return;
> +    }
> +
> +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
> +        return;
> +    }
> +
>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
>          ret = vfio_dma_map(container, iova,
>                             iotlb->addr_mask + 1, vaddr,
> -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> +                           read_only);
>          if (ret) {
>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> @@ -357,8 +377,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>                           iotlb->addr_mask + 1, ret);
>          }
>      }
> -out:
> -    rcu_read_unlock();

The comment from v4 still needs input from Paolo, is it valid to make
use of vaddr (based on address_space_translate ->
memory_region_get_ram_ptr) outside of the rcu read lock or could future
BQL reduction efforts allow this to race?

>  }
>  
>  static void vfio_listener_region_add(MemoryListener *listener,
Peter Xu Jan. 25, 2017, 4:32 a.m. UTC | #2
On Tue, Jan 24, 2017 at 09:29:05AM -0700, Alex Williamson wrote:
> On Tue, 24 Jan 2017 18:25:55 +0800
> Peter Xu <peterx@redhat.com> wrote:
> 
> > A cleanup for vfio_iommu_map_notify(). Should have no functional change,
> > just to make the function shorter and easier to understand.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
> >  1 file changed, 38 insertions(+), 20 deletions(-)
> > 
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index 174f351..ce55dff 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -294,25 +294,14 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
> >             section->offset_within_address_space & (1ULL << 63);
> >  }
> >  
> > -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
> > +                           bool *read_only)
> >  {
> > -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> > -    VFIOContainer *container = giommu->container;
> > -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> >      MemoryRegion *mr;
> >      hwaddr xlat;
> >      hwaddr len = iotlb->addr_mask + 1;
> > -    void *vaddr;
> > -    int ret;
> > -
> > -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> > -                                iova, iova + iotlb->addr_mask);
> > -
> > -    if (iotlb->target_as != &address_space_memory) {
> > -        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > -        return;
> > -    }
> > +    bool ret = false;
> > +    bool writable = iotlb->perm & IOMMU_WO;
> >  
> >      /*
> >       * The IOMMU TLB entry we have just covers translation through
> > @@ -322,12 +311,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >      rcu_read_lock();
> >      mr = address_space_translate(&address_space_memory,
> >                                   iotlb->translated_addr,
> > -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> > +                                 &xlat, &len, writable);
> >      if (!memory_region_is_ram(mr)) {
> >          error_report("iommu map to non memory area %"HWADDR_PRIx"",
> >                       xlat);
> >          goto out;
> >      }
> > +
> >      /*
> >       * Translation truncates length to the IOMMU page size,
> >       * check that it did not truncate too much.
> > @@ -337,11 +327,41 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >          goto out;
> >      }
> >  
> > +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> > +    *read_only = !writable || mr->readonly;
> > +    ret = true;
> > +
> > +out:
> > +    rcu_read_unlock();
> > +    return ret;
> > +}
> > +
> > +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > +{
> > +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> > +    VFIOContainer *container = giommu->container;
> > +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> > +    bool read_only;
> > +    void *vaddr;
> > +    int ret;
> > +
> > +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> > +                                iova, iova + iotlb->addr_mask);
> > +
> > +    if (iotlb->target_as != &address_space_memory) {
> > +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > +        return;
> > +    }
> > +
> > +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
> > +        return;
> > +    }
> > +
> >      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> > -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >          ret = vfio_dma_map(container, iova,
> >                             iotlb->addr_mask + 1, vaddr,
> > -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> > +                           read_only);
> >          if (ret) {
> >              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> >                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> > @@ -357,8 +377,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >                           iotlb->addr_mask + 1, ret);
> >          }
> >      }
> > -out:
> > -    rcu_read_unlock();
> 
> The comment from v4 still needs input from Paolo, is it valid to make
> use of vaddr (based on address_space_translate ->
> memory_region_get_ram_ptr) outside of the rcu read lock or could future
> BQL reduction efforts allow this to race?

Yes we should continue the discussion. Sorry for the inconvenient and
misunderstanding (I was only trying to provide a non-rfc version of
the series, in case Michael would like to pick up some cleanup any of
the patches).

I have similar question as well above - IIUC the RCU read lock
protects us from not losing the references of memory objects, however
in our case even after we release the lock, we are still using the
backend ram (vaddr) since we have set it up inside kernel to build up
the IO page table. After that, the kernel/device should be able to
write to addresses of that backend ram any time. Is there possibly a
window that kernel/device may write to that backend ram even after
guest destroyed the memory objects (e.g., by memory unplug?) and its
backend ram?

Thanks,

-- peterx
Alex Williamson Jan. 25, 2017, 4:43 p.m. UTC | #3
On Wed, 25 Jan 2017 12:32:21 +0800
Peter Xu <peterx@redhat.com> wrote:

> On Tue, Jan 24, 2017 at 09:29:05AM -0700, Alex Williamson wrote:
> > On Tue, 24 Jan 2017 18:25:55 +0800
> > Peter Xu <peterx@redhat.com> wrote:
> >   
> > > A cleanup for vfio_iommu_map_notify(). Should have no functional change,
> > > just to make the function shorter and easier to understand.
> > > 
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > ---
> > >  hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
> > >  1 file changed, 38 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > > index 174f351..ce55dff 100644
> > > --- a/hw/vfio/common.c
> > > +++ b/hw/vfio/common.c
> > > @@ -294,25 +294,14 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
> > >             section->offset_within_address_space & (1ULL << 63);
> > >  }
> > >  
> > > -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > > +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
> > > +                           bool *read_only)
> > >  {
> > > -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> > > -    VFIOContainer *container = giommu->container;
> > > -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> > >      MemoryRegion *mr;
> > >      hwaddr xlat;
> > >      hwaddr len = iotlb->addr_mask + 1;
> > > -    void *vaddr;
> > > -    int ret;
> > > -
> > > -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> > > -                                iova, iova + iotlb->addr_mask);
> > > -
> > > -    if (iotlb->target_as != &address_space_memory) {
> > > -        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > > -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > > -        return;
> > > -    }
> > > +    bool ret = false;
> > > +    bool writable = iotlb->perm & IOMMU_WO;
> > >  
> > >      /*
> > >       * The IOMMU TLB entry we have just covers translation through
> > > @@ -322,12 +311,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > >      rcu_read_lock();
> > >      mr = address_space_translate(&address_space_memory,
> > >                                   iotlb->translated_addr,
> > > -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> > > +                                 &xlat, &len, writable);
> > >      if (!memory_region_is_ram(mr)) {
> > >          error_report("iommu map to non memory area %"HWADDR_PRIx"",
> > >                       xlat);
> > >          goto out;
> > >      }
> > > +
> > >      /*
> > >       * Translation truncates length to the IOMMU page size,
> > >       * check that it did not truncate too much.
> > > @@ -337,11 +327,41 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > >          goto out;
> > >      }
> > >  
> > > +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> > > +    *read_only = !writable || mr->readonly;
> > > +    ret = true;
> > > +
> > > +out:
> > > +    rcu_read_unlock();
> > > +    return ret;
> > > +}
> > > +
> > > +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > > +{
> > > +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> > > +    VFIOContainer *container = giommu->container;
> > > +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> > > +    bool read_only;
> > > +    void *vaddr;
> > > +    int ret;
> > > +
> > > +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> > > +                                iova, iova + iotlb->addr_mask);
> > > +
> > > +    if (iotlb->target_as != &address_space_memory) {
> > > +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> > > +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> > > +        return;
> > > +    }
> > > +
> > > +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
> > > +        return;
> > > +    }
> > > +
> > >      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> > > -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> > >          ret = vfio_dma_map(container, iova,
> > >                             iotlb->addr_mask + 1, vaddr,
> > > -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> > > +                           read_only);
> > >          if (ret) {
> > >              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> > >                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> > > @@ -357,8 +377,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > >                           iotlb->addr_mask + 1, ret);
> > >          }
> > >      }
> > > -out:
> > > -    rcu_read_unlock();  
> > 
> > The comment from v4 still needs input from Paolo, is it valid to make
> > use of vaddr (based on address_space_translate ->
> > memory_region_get_ram_ptr) outside of the rcu read lock or could future
> > BQL reduction efforts allow this to race?  
> 
> Yes we should continue the discussion. Sorry for the inconvenient and
> misunderstanding (I was only trying to provide a non-rfc version of
> the series, in case Michael would like to pick up some cleanup any of
> the patches).
> 
> I have similar question as well above - IIUC the RCU read lock
> protects us from not losing the references of memory objects, however
> in our case even after we release the lock, we are still using the
> backend ram (vaddr) since we have set it up inside kernel to build up
> the IO page table. After that, the kernel/device should be able to
> write to addresses of that backend ram any time. Is there possibly a
> window that kernel/device may write to that backend ram even after
> guest destroyed the memory objects (e.g., by memory unplug?) and its
> backend ram?

If the memory object is destroyed, we better get an unmap, but my
question is whether releasing the rcu read lock before the map ioctl
means that the object could be destroyed between the release and ioctl
and now we have map an unmap code paths racing each other.  I suspect
that QEMU is not nearly this asynchronous and we're all gated on the
BQL, but if the intent of previous commits was to standardize around
the rcu lock so that we can work to get rid of the BQL, I don't want to
make a change explicitly counter to that.  Thanks,

Alex
Paolo Bonzini Jan. 25, 2017, 5:11 p.m. UTC | #4
On 24/01/2017 17:29, Alex Williamson wrote:
> On Tue, 24 Jan 2017 18:25:55 +0800
> Peter Xu <peterx@redhat.com> wrote:
> 
>> A cleanup for vfio_iommu_map_notify(). Should have no functional change,
>> just to make the function shorter and easier to understand.
>>
>> Signed-off-by: Peter Xu <peterx@redhat.com>
>> ---
>>  hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
>>  1 file changed, 38 insertions(+), 20 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 174f351..ce55dff 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -294,25 +294,14 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>>             section->offset_within_address_space & (1ULL << 63);
>>  }
>>  
>> -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>> +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
>> +                           bool *read_only)
>>  {
>> -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>> -    VFIOContainer *container = giommu->container;
>> -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
>>      MemoryRegion *mr;
>>      hwaddr xlat;
>>      hwaddr len = iotlb->addr_mask + 1;
>> -    void *vaddr;
>> -    int ret;
>> -
>> -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
>> -                                iova, iova + iotlb->addr_mask);
>> -
>> -    if (iotlb->target_as != &address_space_memory) {
>> -        error_report("Wrong target AS \"%s\", only system memory is allowed",
>> -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
>> -        return;
>> -    }
>> +    bool ret = false;
>> +    bool writable = iotlb->perm & IOMMU_WO;
>>  
>>      /*
>>       * The IOMMU TLB entry we have just covers translation through
>> @@ -322,12 +311,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>>      rcu_read_lock();
>>      mr = address_space_translate(&address_space_memory,
>>                                   iotlb->translated_addr,
>> -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
>> +                                 &xlat, &len, writable);
>>      if (!memory_region_is_ram(mr)) {
>>          error_report("iommu map to non memory area %"HWADDR_PRIx"",
>>                       xlat);
>>          goto out;
>>      }
>> +
>>      /*
>>       * Translation truncates length to the IOMMU page size,
>>       * check that it did not truncate too much.
>> @@ -337,11 +327,41 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>>          goto out;
>>      }
>>  
>> +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
>> +    *read_only = !writable || mr->readonly;
>> +    ret = true;
>> +
>> +out:
>> +    rcu_read_unlock();
>> +    return ret;
>> +}
>> +
>> +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>> +{
>> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>> +    VFIOContainer *container = giommu->container;
>> +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
>> +    bool read_only;
>> +    void *vaddr;
>> +    int ret;
>> +
>> +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
>> +                                iova, iova + iotlb->addr_mask);
>> +
>> +    if (iotlb->target_as != &address_space_memory) {
>> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
>> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
>> +        return;
>> +    }
>> +
>> +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
>> +        return;
>> +    }
>> +
>>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>> -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
>>          ret = vfio_dma_map(container, iova,
>>                             iotlb->addr_mask + 1, vaddr,
>> -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
>> +                           read_only);
>>          if (ret) {
>>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
>> @@ -357,8 +377,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>>                           iotlb->addr_mask + 1, ret);
>>          }
>>      }
>> -out:
>> -    rcu_read_unlock();
> 
> The comment from v4 still needs input from Paolo, is it valid to make
> use of vaddr (based on address_space_translate ->
> memory_region_get_ram_ptr) outside of the rcu read lock or could future
> BQL reduction efforts allow this to race?

You need to keep a reference to the MemoryRegion if you do
rcu_read_unlock.  But it's simpler to call vfio_get_vaddr within
rcu_read_lock, and keep the lock/unlock in vfio_iommu_map_notify.

You probably should also put a comment about why VFIO does *not* need to
keep a reference between vfio_dma_map and vfio_dma_unmap (which doesn't
sound easy to do either).  Would any well-behaved guest invalidate the
IOMMU page tables before a memory hot-unplug?

Paolo

>>  }
>>  
>>  static void vfio_listener_region_add(MemoryListener *listener,
> 
> 
>
Paolo Bonzini Jan. 25, 2017, 5:16 p.m. UTC | #5
On 25/01/2017 17:43, Alex Williamson wrote:
> On Wed, 25 Jan 2017 12:32:21 +0800
> Peter Xu <peterx@redhat.com> wrote:
>> I have similar question as well above - IIUC the RCU read lock
>> protects us from not losing the references of memory objects, however
>> in our case even after we release the lock, we are still using the
>> backend ram (vaddr) since we have set it up inside kernel to build up
>> the IO page table. After that, the kernel/device should be able to
>> write to addresses of that backend ram any time.

I don't think that's what happens.  As far as I understand, VFIO pins
the pages corresponding to vaddr, not vaddr itself.  The memory backend
is mmap-ed memory; when you hot-unplug it the munmap releases the VMA
and loses the connection between QEMU's virtual address space and the
pages.  However, the pages stay pinned and stay mapped into VFIO's own
IOMMU page tables.

So if a guest does a memory hot-unplug without IOMMU unmap, it would
keep on seeing the content of the hot-unplugged memory, and the host
could not release the pages, but the guest cannot overwrite QEMU data
structures.

Paolo
Alex Williamson Jan. 25, 2017, 5:36 p.m. UTC | #6
On Wed, 25 Jan 2017 18:11:37 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 24/01/2017 17:29, Alex Williamson wrote:
> > On Tue, 24 Jan 2017 18:25:55 +0800
> > Peter Xu <peterx@redhat.com> wrote:
> >   
> >> A cleanup for vfio_iommu_map_notify(). Should have no functional change,
> >> just to make the function shorter and easier to understand.
> >>
> >> Signed-off-by: Peter Xu <peterx@redhat.com>
> >> ---
> >>  hw/vfio/common.c | 58 +++++++++++++++++++++++++++++++++++++-------------------
> >>  1 file changed, 38 insertions(+), 20 deletions(-)
> >>
> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >> index 174f351..ce55dff 100644
> >> --- a/hw/vfio/common.c
> >> +++ b/hw/vfio/common.c
> >> @@ -294,25 +294,14 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
> >>             section->offset_within_address_space & (1ULL << 63);
> >>  }
> >>  
> >> -static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >> +static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
> >> +                           bool *read_only)
> >>  {
> >> -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> >> -    VFIOContainer *container = giommu->container;
> >> -    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> >>      MemoryRegion *mr;
> >>      hwaddr xlat;
> >>      hwaddr len = iotlb->addr_mask + 1;
> >> -    void *vaddr;
> >> -    int ret;
> >> -
> >> -    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> >> -                                iova, iova + iotlb->addr_mask);
> >> -
> >> -    if (iotlb->target_as != &address_space_memory) {
> >> -        error_report("Wrong target AS \"%s\", only system memory is allowed",
> >> -                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> >> -        return;
> >> -    }
> >> +    bool ret = false;
> >> +    bool writable = iotlb->perm & IOMMU_WO;
> >>  
> >>      /*
> >>       * The IOMMU TLB entry we have just covers translation through
> >> @@ -322,12 +311,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >>      rcu_read_lock();
> >>      mr = address_space_translate(&address_space_memory,
> >>                                   iotlb->translated_addr,
> >> -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> >> +                                 &xlat, &len, writable);
> >>      if (!memory_region_is_ram(mr)) {
> >>          error_report("iommu map to non memory area %"HWADDR_PRIx"",
> >>                       xlat);
> >>          goto out;
> >>      }
> >> +
> >>      /*
> >>       * Translation truncates length to the IOMMU page size,
> >>       * check that it did not truncate too much.
> >> @@ -337,11 +327,41 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >>          goto out;
> >>      }
> >>  
> >> +    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >> +    *read_only = !writable || mr->readonly;
> >> +    ret = true;
> >> +
> >> +out:
> >> +    rcu_read_unlock();
> >> +    return ret;
> >> +}
> >> +
> >> +static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >> +{
> >> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> >> +    VFIOContainer *container = giommu->container;
> >> +    hwaddr iova = iotlb->iova + giommu->iommu_offset;
> >> +    bool read_only;
> >> +    void *vaddr;
> >> +    int ret;
> >> +
> >> +    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
> >> +                                iova, iova + iotlb->addr_mask);
> >> +
> >> +    if (iotlb->target_as != &address_space_memory) {
> >> +        error_report("Wrong target AS \"%s\", only system memory is allowed",
> >> +                     iotlb->target_as->name ? iotlb->target_as->name : "none");
> >> +        return;
> >> +    }
> >> +
> >> +    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
> >> +        return;
> >> +    }
> >> +
> >>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> >> -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >>          ret = vfio_dma_map(container, iova,
> >>                             iotlb->addr_mask + 1, vaddr,
> >> -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> >> +                           read_only);
> >>          if (ret) {
> >>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> >>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> >> @@ -357,8 +377,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >>                           iotlb->addr_mask + 1, ret);
> >>          }
> >>      }
> >> -out:
> >> -    rcu_read_unlock();  
> > 
> > The comment from v4 still needs input from Paolo, is it valid to make
> > use of vaddr (based on address_space_translate ->
> > memory_region_get_ram_ptr) outside of the rcu read lock or could future
> > BQL reduction efforts allow this to race?  
> 
> You need to keep a reference to the MemoryRegion if you do
> rcu_read_unlock.  But it's simpler to call vfio_get_vaddr within
> rcu_read_lock, and keep the lock/unlock in vfio_iommu_map_notify.

Right, a memory_region_{un}ref() would be another option.
 
> You probably should also put a comment about why VFIO does *not* need to
> keep a reference between vfio_dma_map and vfio_dma_unmap (which doesn't
> sound easy to do either).  Would any well-behaved guest invalidate the
> IOMMU page tables before a memory hot-unplug?

Hmm, we do take a reference in vfio_listener_region_add(), but this is
of course to the iommu region not to the RAM region we're translating.
In the non-vIOMMU case we would be holding a reference to the memory
region backing a DMA mapping.  I would expect a well behaved guest to
evacuate DMA mappings targeting a hotplug memory region before it gets
ejected, but how much do we want to rely on well behaved guests.
Perhaps we should be taking a reference for each mapping entry, though
this makes Peter's plans to invalidate the entire address space much
more difficult.  Thanks,

Alex
Paolo Bonzini Jan. 25, 2017, 5:40 p.m. UTC | #7
On 25/01/2017 18:36, Alex Williamson wrote:
>> You probably should also put a comment about why VFIO does *not* need to
>> keep a reference between vfio_dma_map and vfio_dma_unmap (which doesn't
>> sound easy to do either).  Would any well-behaved guest invalidate the
>> IOMMU page tables before a memory hot-unplug?
>
> Hmm, we do take a reference in vfio_listener_region_add(), but this is
> of course to the iommu region not to the RAM region we're translating.
> In the non-vIOMMU case we would be holding a reference to the memory
> region backing a DMA mapping.  I would expect a well behaved guest to
> evacuate DMA mappings targeting a hotplug memory region before it gets
> ejected, but how much do we want to rely on well behaved guests.

It depends of what happens if they aren't.  I think it's fine (see other
message), but taking a reference for each mapping entry isn't so easy
because the unmap case doesn't know the old memory region.

Paolo


> Perhaps we should be taking a reference for each mapping entry, though
> this makes Peter's plans to invalidate the entire address space much
> more difficult.  Thanks,
Alex Williamson Jan. 25, 2017, 6:36 p.m. UTC | #8
On Wed, 25 Jan 2017 18:40:56 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 25/01/2017 18:36, Alex Williamson wrote:
> >> You probably should also put a comment about why VFIO does *not* need to
> >> keep a reference between vfio_dma_map and vfio_dma_unmap (which doesn't
> >> sound easy to do either).  Would any well-behaved guest invalidate the
> >> IOMMU page tables before a memory hot-unplug?  
> >
> > Hmm, we do take a reference in vfio_listener_region_add(), but this is
> > of course to the iommu region not to the RAM region we're translating.
> > In the non-vIOMMU case we would be holding a reference to the memory
> > region backing a DMA mapping.  I would expect a well behaved guest to
> > evacuate DMA mappings targeting a hotplug memory region before it gets
> > ejected, but how much do we want to rely on well behaved guests.  
> 
> It depends of what happens if they aren't.  I think it's fine (see other
> message), but taking a reference for each mapping entry isn't so easy
> because the unmap case doesn't know the old memory region.

If we held a reference to the memory region from the mapping path and
walk the IOMMU page table to generate the unmap, then we really should
get to the same original memory region, right?  The vfio iommu notifier
should only be mapping native page sizes of the IOMMU, 4k/2M/1G.  The
problem is that it's a lot of overhead to flush the entire address
space that way vs the single invalidation Peter is trying to enable
here.  It's actually similar to how the type1 iommu works in the kernel
though, we can unmap by iova because we ask the iommu for the iova->pfn
translation in order to unpin the page.

I do agree with your description in the other message about how things
would work for a memory hot-unplug w/o unmap though, which does seem to
imply that we don't need that reference.  Thanks,

Alex
Paolo Bonzini Jan. 25, 2017, 7:42 p.m. UTC | #9
On 25/01/2017 19:36, Alex Williamson wrote:
>> It depends of what happens if they aren't.  I think it's fine (see other
>> message), but taking a reference for each mapping entry isn't so easy
>> because the unmap case doesn't know the old memory region.
> If we held a reference to the memory region from the mapping path and
> walk the IOMMU page table to generate the unmap, then we really should
> get to the same original memory region, right?  The vfio iommu notifier
> should only be mapping native page sizes of the IOMMU, 4k/2M/1G.  The
> problem is that it's a lot of overhead to flush the entire address
> space that way vs the single invalidation Peter is trying to enable
> here.  It's actually similar to how the type1 iommu works in the kernel
> though, we can unmap by iova because we ask the iommu for the iova->pfn
> translation in order to unpin the page.

But in the kernel you can trust the IOMMU page tables because you build
them, here instead it's the guest's page tables that you'd walk, right?
You cannot trust the guest.

Paolo
Alex Williamson Jan. 25, 2017, 8:09 p.m. UTC | #10
On Wed, 25 Jan 2017 20:42:19 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 25/01/2017 19:36, Alex Williamson wrote:
> >> It depends of what happens if they aren't.  I think it's fine (see other
> >> message), but taking a reference for each mapping entry isn't so easy
> >> because the unmap case doesn't know the old memory region.  
> > If we held a reference to the memory region from the mapping path and
> > walk the IOMMU page table to generate the unmap, then we really should
> > get to the same original memory region, right?  The vfio iommu notifier
> > should only be mapping native page sizes of the IOMMU, 4k/2M/1G.  The
> > problem is that it's a lot of overhead to flush the entire address
> > space that way vs the single invalidation Peter is trying to enable
> > here.  It's actually similar to how the type1 iommu works in the kernel
> > though, we can unmap by iova because we ask the iommu for the iova->pfn
> > translation in order to unpin the page.  
> 
> But in the kernel you can trust the IOMMU page tables because you build
> them, here instead it's the guest's page tables that you'd walk, right?
> You cannot trust the guest.

Yes, you're right, we're not shadowing the vt-d page tables, we're
working on the explicit invalidation model.  So there could be
anything, or nothing in the page tables when we go to try to lookup the
unref.  So clearly taking that reference without a shadow page table
would be the wrong approach.  Thanks,

Alex
Peter Xu Jan. 26, 2017, 6:26 a.m. UTC | #11
On Wed, Jan 25, 2017 at 06:16:44PM +0100, Paolo Bonzini wrote:
> 
> 
> On 25/01/2017 17:43, Alex Williamson wrote:
> > On Wed, 25 Jan 2017 12:32:21 +0800
> > Peter Xu <peterx@redhat.com> wrote:
> >> I have similar question as well above - IIUC the RCU read lock
> >> protects us from not losing the references of memory objects, however
> >> in our case even after we release the lock, we are still using the
> >> backend ram (vaddr) since we have set it up inside kernel to build up
> >> the IO page table. After that, the kernel/device should be able to
> >> write to addresses of that backend ram any time.
> 
> I don't think that's what happens.  As far as I understand, VFIO pins
> the pages corresponding to vaddr, not vaddr itself.  The memory backend
> is mmap-ed memory; when you hot-unplug it the munmap releases the VMA
> and loses the connection between QEMU's virtual address space and the
> pages.  However, the pages stay pinned and stay mapped into VFIO's own
> IOMMU page tables.
> 
> So if a guest does a memory hot-unplug without IOMMU unmap, it would
> keep on seeing the content of the hot-unplugged memory, and the host
> could not release the pages, but the guest cannot overwrite QEMU data
> structures.

Sounds reasonable. I forgot that these pages are pinned if without an
explicit unmap.

Thanks Paolo. :)

-- peterx
Peter Xu Jan. 26, 2017, 6:46 a.m. UTC | #12
On Wed, Jan 25, 2017 at 01:09:47PM -0700, Alex Williamson wrote:
> On Wed, 25 Jan 2017 20:42:19 +0100
> Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> > On 25/01/2017 19:36, Alex Williamson wrote:
> > >> It depends of what happens if they aren't.  I think it's fine (see other
> > >> message), but taking a reference for each mapping entry isn't so easy
> > >> because the unmap case doesn't know the old memory region.  
> > > If we held a reference to the memory region from the mapping path and
> > > walk the IOMMU page table to generate the unmap, then we really should
> > > get to the same original memory region, right?  The vfio iommu notifier
> > > should only be mapping native page sizes of the IOMMU, 4k/2M/1G.  The
> > > problem is that it's a lot of overhead to flush the entire address
> > > space that way vs the single invalidation Peter is trying to enable
> > > here.  It's actually similar to how the type1 iommu works in the kernel
> > > though, we can unmap by iova because we ask the iommu for the iova->pfn
> > > translation in order to unpin the page.  
> > 
> > But in the kernel you can trust the IOMMU page tables because you build
> > them, here instead it's the guest's page tables that you'd walk, right?
> > You cannot trust the guest.
> 
> Yes, you're right, we're not shadowing the vt-d page tables, we're
> working on the explicit invalidation model.  So there could be
> anything, or nothing in the page tables when we go to try to lookup the
> unref.  So clearly taking that reference without a shadow page table
> would be the wrong approach.  Thanks,

IIUC of above discussion, moving rcu read lock/unlock out of
vfio_get_vaddr() would be the nicest approach here.

Thanks to you both on helping verify and confirm the problem!

-- peterx
Peter Xu Jan. 26, 2017, 7:12 a.m. UTC | #13
On Wed, Jan 25, 2017 at 06:11:37PM +0100, Paolo Bonzini wrote:

[...]

> > The comment from v4 still needs input from Paolo, is it valid to make
> > use of vaddr (based on address_space_translate ->
> > memory_region_get_ram_ptr) outside of the rcu read lock or could future
> > BQL reduction efforts allow this to race?
> 
> You need to keep a reference to the MemoryRegion if you do
> rcu_read_unlock.  But it's simpler to call vfio_get_vaddr within
> rcu_read_lock, and keep the lock/unlock in vfio_iommu_map_notify.
> 
> You probably should also put a comment about why VFIO does *not* need to
> keep a reference between vfio_dma_map and vfio_dma_unmap (which doesn't
> sound easy to do either).

How about this one?

    /*
     * Here, we need to have the lock not only for vfio_get_vaddr(),
     * but also needs to make sure that the vaddr will be valid for
     * further operations.
     *
     * When we map new pages, we need the lock to make sure that vaddr
     * is valid along the way we build up the IO page table (via
     * vfio_dma_map()). Then, as long as the mapping is set up, we can
     * unlock since those pages will be pinned in kernel (which makes
     * sure that the RAM backend of vaddr will always be there, even
     * if the memory object is destroyed and RAM released).
     *
     * For unmapping case, we don't really need the protection since
     * the pages are in all cases locked in kernel, so we'll probably
     * be safe even without the lock. However, it won't hurt we have
     * the lock as well here.
     */

Thanks,

-- peterx
Paolo Bonzini Jan. 26, 2017, 10:55 a.m. UTC | #14
On 26/01/2017 08:12, Peter Xu wrote:
> 
>     /*
>      * Here, we need to have the lock not only for vfio_get_vaddr(),
>      * but also needs to make sure that the vaddr will be valid for
>      * further operations.
>      *
>      * When we map new pages, we need the lock to make sure that vaddr
>      * is valid along the way we build up the IO page table (via
>      * vfio_dma_map()). Then, as long as the mapping is set up, we can
>      * unlock since those pages will be pinned in kernel (which makes
>      * sure that the RAM backend of vaddr will always be there, even
>      * if the memory object is destroyed and RAM released).
>      *
>      * For unmapping case, we don't really need the protection since
>      * the pages are in all cases locked in kernel, so we'll probably
>      * be safe even without the lock. However, it won't hurt we have
>      * the lock as well here.
>      */

Even simpler, just before the definition of vfio_get_vaddr:

/* Called with rcu_read_lock held.  */

and just before the vfio_dma_map call:

    /* vaddr is only valid until rcu_read_unlock().  But after
     * vfio_dma_map has set up the mapping the pages will be pinned
     * by the kernel.  This makes sure that the RAM backend of vaddr
     * will always be there, even if the memory object is destroyed
     * and its backing memory munmap-ed.
     */

I'm not sure that you can get rid of the lock for the unmapping case.
Better remove that part of the comment.

Thanks,

Paolo
Peter Xu Jan. 26, 2017, 12:01 p.m. UTC | #15
On Thu, Jan 26, 2017 at 11:55:22AM +0100, Paolo Bonzini wrote:
> 
> 
> On 26/01/2017 08:12, Peter Xu wrote:
> > 
> >     /*
> >      * Here, we need to have the lock not only for vfio_get_vaddr(),
> >      * but also needs to make sure that the vaddr will be valid for
> >      * further operations.
> >      *
> >      * When we map new pages, we need the lock to make sure that vaddr
> >      * is valid along the way we build up the IO page table (via
> >      * vfio_dma_map()). Then, as long as the mapping is set up, we can
> >      * unlock since those pages will be pinned in kernel (which makes
> >      * sure that the RAM backend of vaddr will always be there, even
> >      * if the memory object is destroyed and RAM released).
> >      *
> >      * For unmapping case, we don't really need the protection since
> >      * the pages are in all cases locked in kernel, so we'll probably
> >      * be safe even without the lock. However, it won't hurt we have
> >      * the lock as well here.
> >      */
> 
> Even simpler, just before the definition of vfio_get_vaddr:
> 
> /* Called with rcu_read_lock held.  */
> 
> and just before the vfio_dma_map call:
> 
>     /* vaddr is only valid until rcu_read_unlock().  But after
>      * vfio_dma_map has set up the mapping the pages will be pinned
>      * by the kernel.  This makes sure that the RAM backend of vaddr
>      * will always be there, even if the memory object is destroyed
>      * and its backing memory munmap-ed.
>      */
> 
> I'm not sure that you can get rid of the lock for the unmapping case.
> Better remove that part of the comment.

Sure. Let me take yours. Thanks!

-- peterx
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 174f351..ce55dff 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -294,25 +294,14 @@  static bool vfio_listener_skipped_section(MemoryRegionSection *section)
            section->offset_within_address_space & (1ULL << 63);
 }
 
-static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
+                           bool *read_only)
 {
-    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
-    VFIOContainer *container = giommu->container;
-    hwaddr iova = iotlb->iova + giommu->iommu_offset;
     MemoryRegion *mr;
     hwaddr xlat;
     hwaddr len = iotlb->addr_mask + 1;
-    void *vaddr;
-    int ret;
-
-    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
-                                iova, iova + iotlb->addr_mask);
-
-    if (iotlb->target_as != &address_space_memory) {
-        error_report("Wrong target AS \"%s\", only system memory is allowed",
-                     iotlb->target_as->name ? iotlb->target_as->name : "none");
-        return;
-    }
+    bool ret = false;
+    bool writable = iotlb->perm & IOMMU_WO;
 
     /*
      * The IOMMU TLB entry we have just covers translation through
@@ -322,12 +311,13 @@  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     rcu_read_lock();
     mr = address_space_translate(&address_space_memory,
                                  iotlb->translated_addr,
-                                 &xlat, &len, iotlb->perm & IOMMU_WO);
+                                 &xlat, &len, writable);
     if (!memory_region_is_ram(mr)) {
         error_report("iommu map to non memory area %"HWADDR_PRIx"",
                      xlat);
         goto out;
     }
+
     /*
      * Translation truncates length to the IOMMU page size,
      * check that it did not truncate too much.
@@ -337,11 +327,41 @@  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
         goto out;
     }
 
+    *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    *read_only = !writable || mr->readonly;
+    ret = true;
+
+out:
+    rcu_read_unlock();
+    return ret;
+}
+
+static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+    VFIOContainer *container = giommu->container;
+    hwaddr iova = iotlb->iova + giommu->iommu_offset;
+    bool read_only;
+    void *vaddr;
+    int ret;
+
+    trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
+                                iova, iova + iotlb->addr_mask);
+
+    if (iotlb->target_as != &address_space_memory) {
+        error_report("Wrong target AS \"%s\", only system memory is allowed",
+                     iotlb->target_as->name ? iotlb->target_as->name : "none");
+        return;
+    }
+
+    if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+        return;
+    }
+
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        vaddr = memory_region_get_ram_ptr(mr) + xlat;
         ret = vfio_dma_map(container, iova,
                            iotlb->addr_mask + 1, vaddr,
-                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
+                           read_only);
         if (ret) {
             error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx", %p) = %d (%m)",
@@ -357,8 +377,6 @@  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
                          iotlb->addr_mask + 1, ret);
         }
     }
-out:
-    rcu_read_unlock();
 }
 
 static void vfio_listener_region_add(MemoryListener *listener,