diff mbox series

[v5,1/2] vfio: move function vfio_get_xlat_addr() to memory.c

Message ID 20221030043555.1333070-2-lulu@redhat.com
State New
Headers show
Series vhost-vdpa: add support for vIOMMU | expand

Commit Message

Cindy Lu Oct. 30, 2022, 4:35 a.m. UTC
- Move the function vfio_get_xlat_addr to softmmu/memory.c, and
  change the name to memory_get_xlat_addr(). So we can use this
  function on other devices, such as vDPA device.
- Add a new bool arg in this function, which shows whether the memory is
  backed by a discard manager. So the device can have its own warning.

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 hw/vfio/common.c      | 163 +++++++++++++++++++-----------------------
 include/exec/memory.h |   4 ++
 softmmu/memory.c      |  69 ++++++++++++++++++
 3 files changed, 146 insertions(+), 90 deletions(-)

Comments

Alex Williamson Oct. 30, 2022, 5:13 a.m. UTC | #1
On Sun, 30 Oct 2022 12:35:54 +0800
Cindy Lu <lulu@redhat.com> wrote:

> - Move the function vfio_get_xlat_addr to softmmu/memory.c, and
>   change the name to memory_get_xlat_addr(). So we can use this
>   function on other devices, such as vDPA device.
> - Add a new bool arg in this function, which shows whether the memory is
>   backed by a discard manager. So the device can have its own warning.
> 
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  hw/vfio/common.c      | 163 +++++++++++++++++++-----------------------
>  include/exec/memory.h |   4 ++
>  softmmu/memory.c      |  69 ++++++++++++++++++
>  3 files changed, 146 insertions(+), 90 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index ace9562a9b..e958a4435f 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -574,92 +574,6 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
>             section->offset_within_address_space & (1ULL << 63);
>  }
>  
> -/* Called with rcu_read_lock held.  */
> -static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> -                               ram_addr_t *ram_addr, bool *read_only)
> -{
> -    MemoryRegion *mr;
> -    hwaddr xlat;
> -    hwaddr len = iotlb->addr_mask + 1;
> -    bool writable = iotlb->perm & IOMMU_WO;
> -
> -    /*
> -     * The IOMMU TLB entry we have just covers translation through
> -     * this IOMMU to its immediate target.  We need to translate
> -     * it the rest of the way through to memory.
> -     */
> -    mr = address_space_translate(&address_space_memory,
> -                                 iotlb->translated_addr,
> -                                 &xlat, &len, writable,
> -                                 MEMTXATTRS_UNSPECIFIED);
> -    if (!memory_region_is_ram(mr)) {
> -        error_report("iommu map to non memory area %"HWADDR_PRIx"",
> -                     xlat);
> -        return false;
> -    } else if (memory_region_has_ram_discard_manager(mr)) {
> -        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
> -        MemoryRegionSection tmp = {
> -            .mr = mr,
> -            .offset_within_region = xlat,
> -            .size = int128_make64(len),
> -        };
> -
> -        /*
> -         * Malicious VMs can map memory into the IOMMU, which is expected
> -         * to remain discarded. vfio will pin all pages, populating memory.
> -         * Disallow that. vmstate priorities make sure any RamDiscardManager
> -         * were already restored before IOMMUs are restored.
> -         */
> -        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
> -            error_report("iommu map to discarded memory (e.g., unplugged via"
> -                         " virtio-mem): %"HWADDR_PRIx"",
> -                         iotlb->translated_addr);
> -            return false;
> -        }
> -
> -        /*
> -         * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
> -         * pages will remain pinned inside vfio until unmapped, resulting in a
> -         * higher memory consumption than expected. If memory would get
> -         * populated again later, there would be an inconsistency between pages
> -         * pinned by vfio and pages seen by QEMU. This is the case until
> -         * unmapped from the IOMMU (e.g., during device reset).
> -         *
> -         * With malicious guests, we really only care about pinning more memory
> -         * than expected. RLIMIT_MEMLOCK set for the user/process can never be
> -         * exceeded and can be used to mitigate this problem.
> -         */
> -        warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
> -                         " RAM (e.g., virtio-mem) works, however, malicious"
> -                         " guests can trigger pinning of more memory than"
> -                         " intended via an IOMMU. It's possible to mitigate "
> -                         " by setting/adjusting RLIMIT_MEMLOCK.");
> -    }
> -
> -    /*
> -     * Translation truncates length to the IOMMU page size,
> -     * check that it did not truncate too much.
> -     */
> -    if (len & iotlb->addr_mask) {
> -        error_report("iommu has granularity incompatible with target AS");
> -        return false;
> -    }
> -
> -    if (vaddr) {
> -        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> -    }
> -
> -    if (ram_addr) {
> -        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
> -    }
> -
> -    if (read_only) {
> -        *read_only = !writable || mr->readonly;
> -    }
> -
> -    return true;
> -}
> -
>  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>  {
>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> @@ -681,10 +595,46 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>  
>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>          bool read_only;
> -
> -        if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
> +        bool mr_has_discard_manager;
> +
> +        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only,
> +                                  &mr_has_discard_manager)) {
> +            if (mr_has_discard_manager) {
> +                /*
> +                 * Malicious VMs can map memory into the IOMMU, which is
> +                 * expected to remain discarded. vfio will pin all pages,
> +                 * populating memory. Disallow that. vmstate priorities make
> +                 * sure any RamDiscardManager were already restored before
> +                 * IOMMUs are restored.
> +                 */
> +
> +                error_report(
> +                    "iommu map to discarded memory (e.g., unplugged via"
> +                    " virtio-mem): %" HWADDR_PRIx "",
> +                    iotlb->translated_addr);
> +            }
>              goto out;
>          }
> +        if (mr_has_discard_manager) {
> +            /*
> +             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
> +             * The pages will remain pinned inside vfio until unmapped,
> +             * resulting in a higher memory consumption than expected. If memory
> +             * would get populated again later, there would be an inconsistency
> +             * between pages pinned by vfio and pages seen by QEMU. This is the
> +             * case until unmapped from the IOMMU (e.g., during device reset).
> +             *
> +             * With malicious guests, we really only care about pinning more
> +             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
> +             * never be exceeded and can be used to mitigate this problem.
> +             */
> +            warn_report_once(
> +                "Using vfio with vIOMMUs and coordinated discarding of"
> +                " RAM (e.g., virtio-mem) works, however, malicious"
> +                " guests can trigger pinning of more memory than"
> +                " intended via an IOMMU. It's possible to mitigate "
> +                " by setting/adjusting RLIMIT_MEMLOCK.");
> +        }
>          /*
>           * vaddr is only valid until rcu_read_unlock(). But after
>           * vfio_dma_map has set up the mapping the pages will be
> @@ -1349,6 +1299,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>      VFIOContainer *container = giommu->container;
>      hwaddr iova = iotlb->iova + giommu->iommu_offset;
>      ram_addr_t translated_addr;
> +    bool mr_has_discard_manager;
>  
>      trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
>  
> @@ -1359,9 +1310,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>      }
>  
>      rcu_read_lock();
> -    if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
> +    if (memory_get_xlat_addr(iotlb, NULL, &translated_addr, NULL,
> +                             &mr_has_discard_manager)) {
>          int ret;
> -
>          ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
>                                      translated_addr);
>          if (ret) {
> @@ -1370,6 +1321,38 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>                           container, iova,
>                           iotlb->addr_mask + 1, ret);
>          }
> +        if (mr_has_discard_manager) {
> +            /*
> +             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
> +             * The pages will remain pinned inside vfio until unmapped,
> +             * resulting in a higher memory consumption than expected. If memory
> +             * would get populated again later, there would be an inconsistency
> +             * between pages pinned by vfio and pages seen by QEMU. This is the
> +             * case until unmapped from the IOMMU (e.g., during device reset).
> +             *
> +             * With malicious guests, we really only care about pinning more
> +             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
> +             * never be exceeded and can be used to mitigate this problem.
> +             */
> +            warn_report_once(
> +                "Using vfio with vIOMMUs and coordinated discarding of"
> +                " RAM (e.g., virtio-mem) works, however, malicious"
> +                " guests can trigger pinning of more memory than"
> +                " intended via an IOMMU. It's possible to mitigate "
> +                " by setting/adjusting RLIMIT_MEMLOCK.");
> +        }
> +    } else {
> +        /*
> +         * Malicious VMs can map memory into the IOMMU, which is expected
> +         * to remain discarded. vfio will pin all pages, populating memory.
> +         * Disallow that. vmstate priorities make sure any RamDiscardManager
> +         * were already restored before IOMMUs are restored.
> +         */
> +        if (mr_has_discard_manager) {
> +            error_report("iommu map to discarded memory (e.g., unplugged via"
> +                         " virtio-mem): %" HWADDR_PRIx "",
> +                         iotlb->translated_addr);
> +        }


Clearly vfio needs its own wrapper for this function rather than open
coding two identical comments and error reports.  I'm not sure why the
main function dropped the error report for the unpopulated discard
memory region when it triggers error reports for other bogus cases.
Thanks,

Alex

>      }
>      rcu_read_unlock();
>  }
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index bfb1de8eea..ed8b1e8e0e 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -713,6 +713,10 @@ void ram_discard_manager_register_listener(RamDiscardManager *rdm,
>  void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
>                                               RamDiscardListener *rdl);
>  
> +bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> +                          ram_addr_t *ram_addr, bool *read_only,
> +                          bool *mr_discard_populated);
> +
>  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
>  typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
>  
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 7ba2048836..b6ac5161e2 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -33,6 +33,7 @@
>  #include "qemu/accel.h"
>  #include "hw/boards.h"
>  #include "migration/vmstate.h"
> +#include "exec/address-spaces.h"
>  
>  //#define DEBUG_UNASSIGNED
>  
> @@ -2121,6 +2122,74 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
>      rdmc->unregister_listener(rdm, rdl);
>  }
>  
> +/* Called with rcu_read_lock held.  */
> +bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> +                          ram_addr_t *ram_addr, bool *read_only,
> +                          bool *mr_has_discard_manager)
> +{
> +    MemoryRegion *mr;
> +    hwaddr xlat;
> +    hwaddr len = iotlb->addr_mask + 1;
> +    bool writable = iotlb->perm & IOMMU_WO;
> +
> +    if (mr_has_discard_manager) {
> +        *mr_has_discard_manager = false;
> +    }
> +    /*
> +     * The IOMMU TLB entry we have just covers translation through
> +     * this IOMMU to its immediate target.  We need to translate
> +     * it the rest of the way through to memory.
> +     */
> +    mr = address_space_translate(&address_space_memory, iotlb->translated_addr,
> +                                 &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
> +    if (!memory_region_is_ram(mr)) {
> +        error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
> +        return false;
> +    } else if (memory_region_has_ram_discard_manager(mr)) {
> +        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
> +        MemoryRegionSection tmp = {
> +            .mr = mr,
> +            .offset_within_region = xlat,
> +            .size = int128_make64(len),
> +        };
> +        if (mr_has_discard_manager) {
> +            *mr_has_discard_manager = true;
> +        }
> +        /*
> +         * Malicious VMs can map memory into the IOMMU, which is expected
> +         * to remain discarded. vfio will pin all pages, populating memory.
> +         * Disallow that. vmstate priorities make sure any RamDiscardManager
> +         * were already restored before IOMMUs are restored.
> +         */
> +        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
> +            return false;
> +        }
> +    }
> +
> +    /*
> +     * Translation truncates length to the IOMMU page size,
> +     * check that it did not truncate too much.
> +     */
> +    if (len & iotlb->addr_mask) {
> +        error_report("iommu has granularity incompatible with target AS");
> +        return false;
> +    }
> +
> +    if (vaddr) {
> +        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +    }
> +
> +    if (ram_addr) {
> +        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
> +    }
> +
> +    if (read_only) {
> +        *read_only = !writable || mr->readonly;
> +    }
> +
> +    return true;
> +}
> +
>  void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
>  {
>      uint8_t mask = 1 << client;
Cindy Lu Oct. 30, 2022, 5:21 a.m. UTC | #2
On Sun, 30 Oct 2022 at 13:14, Alex Williamson
<alex.williamson@redhat.com> wrote:
>
> On Sun, 30 Oct 2022 12:35:54 +0800
> Cindy Lu <lulu@redhat.com> wrote:
>
> > - Move the function vfio_get_xlat_addr to softmmu/memory.c, and
> >   change the name to memory_get_xlat_addr(). So we can use this
> >   function on other devices, such as vDPA device.
> > - Add a new bool arg in this function, which shows whether the memory is
> >   backed by a discard manager. So the device can have its own warning.
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
> > ---
> >  hw/vfio/common.c      | 163 +++++++++++++++++++-----------------------
> >  include/exec/memory.h |   4 ++
> >  softmmu/memory.c      |  69 ++++++++++++++++++
> >  3 files changed, 146 insertions(+), 90 deletions(-)
> >
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index ace9562a9b..e958a4435f 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -574,92 +574,6 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
> >             section->offset_within_address_space & (1ULL << 63);
> >  }
> >
> > -/* Called with rcu_read_lock held.  */
> > -static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> > -                               ram_addr_t *ram_addr, bool *read_only)
> > -{
> > -    MemoryRegion *mr;
> > -    hwaddr xlat;
> > -    hwaddr len = iotlb->addr_mask + 1;
> > -    bool writable = iotlb->perm & IOMMU_WO;
> > -
> > -    /*
> > -     * The IOMMU TLB entry we have just covers translation through
> > -     * this IOMMU to its immediate target.  We need to translate
> > -     * it the rest of the way through to memory.
> > -     */
> > -    mr = address_space_translate(&address_space_memory,
> > -                                 iotlb->translated_addr,
> > -                                 &xlat, &len, writable,
> > -                                 MEMTXATTRS_UNSPECIFIED);
> > -    if (!memory_region_is_ram(mr)) {
> > -        error_report("iommu map to non memory area %"HWADDR_PRIx"",
> > -                     xlat);
> > -        return false;
> > -    } else if (memory_region_has_ram_discard_manager(mr)) {
> > -        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
> > -        MemoryRegionSection tmp = {
> > -            .mr = mr,
> > -            .offset_within_region = xlat,
> > -            .size = int128_make64(len),
> > -        };
> > -
> > -        /*
> > -         * Malicious VMs can map memory into the IOMMU, which is expected
> > -         * to remain discarded. vfio will pin all pages, populating memory.
> > -         * Disallow that. vmstate priorities make sure any RamDiscardManager
> > -         * were already restored before IOMMUs are restored.
> > -         */
> > -        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
> > -            error_report("iommu map to discarded memory (e.g., unplugged via"
> > -                         " virtio-mem): %"HWADDR_PRIx"",
> > -                         iotlb->translated_addr);
> > -            return false;
> > -        }
> > -
> > -        /*
> > -         * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
> > -         * pages will remain pinned inside vfio until unmapped, resulting in a
> > -         * higher memory consumption than expected. If memory would get
> > -         * populated again later, there would be an inconsistency between pages
> > -         * pinned by vfio and pages seen by QEMU. This is the case until
> > -         * unmapped from the IOMMU (e.g., during device reset).
> > -         *
> > -         * With malicious guests, we really only care about pinning more memory
> > -         * than expected. RLIMIT_MEMLOCK set for the user/process can never be
> > -         * exceeded and can be used to mitigate this problem.
> > -         */
> > -        warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
> > -                         " RAM (e.g., virtio-mem) works, however, malicious"
> > -                         " guests can trigger pinning of more memory than"
> > -                         " intended via an IOMMU. It's possible to mitigate "
> > -                         " by setting/adjusting RLIMIT_MEMLOCK.");
> > -    }
> > -
> > -    /*
> > -     * Translation truncates length to the IOMMU page size,
> > -     * check that it did not truncate too much.
> > -     */
> > -    if (len & iotlb->addr_mask) {
> > -        error_report("iommu has granularity incompatible with target AS");
> > -        return false;
> > -    }
> > -
> > -    if (vaddr) {
> > -        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> > -    }
> > -
> > -    if (ram_addr) {
> > -        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
> > -    }
> > -
> > -    if (read_only) {
> > -        *read_only = !writable || mr->readonly;
> > -    }
> > -
> > -    return true;
> > -}
> > -
> >  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >  {
> >      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> > @@ -681,10 +595,46 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >
> >      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> >          bool read_only;
> > -
> > -        if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
> > +        bool mr_has_discard_manager;
> > +
> > +        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only,
> > +                                  &mr_has_discard_manager)) {
> > +            if (mr_has_discard_manager) {
> > +                /*
> > +                 * Malicious VMs can map memory into the IOMMU, which is
> > +                 * expected to remain discarded. vfio will pin all pages,
> > +                 * populating memory. Disallow that. vmstate priorities make
> > +                 * sure any RamDiscardManager were already restored before
> > +                 * IOMMUs are restored.
> > +                 */
> > +
> > +                error_report(
> > +                    "iommu map to discarded memory (e.g., unplugged via"
> > +                    " virtio-mem): %" HWADDR_PRIx "",
> > +                    iotlb->translated_addr);
> > +            }
> >              goto out;
> >          }
> > +        if (mr_has_discard_manager) {
> > +            /*
> > +             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
> > +             * The pages will remain pinned inside vfio until unmapped,
> > +             * resulting in a higher memory consumption than expected. If memory
> > +             * would get populated again later, there would be an inconsistency
> > +             * between pages pinned by vfio and pages seen by QEMU. This is the
> > +             * case until unmapped from the IOMMU (e.g., during device reset).
> > +             *
> > +             * With malicious guests, we really only care about pinning more
> > +             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
> > +             * never be exceeded and can be used to mitigate this problem.
> > +             */
> > +            warn_report_once(
> > +                "Using vfio with vIOMMUs and coordinated discarding of"
> > +                " RAM (e.g., virtio-mem) works, however, malicious"
> > +                " guests can trigger pinning of more memory than"
> > +                " intended via an IOMMU. It's possible to mitigate "
> > +                " by setting/adjusting RLIMIT_MEMLOCK.");
> > +        }
> >          /*
> >           * vaddr is only valid until rcu_read_unlock(). But after
> >           * vfio_dma_map has set up the mapping the pages will be
> > @@ -1349,6 +1299,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >      VFIOContainer *container = giommu->container;
> >      hwaddr iova = iotlb->iova + giommu->iommu_offset;
> >      ram_addr_t translated_addr;
> > +    bool mr_has_discard_manager;
> >
> >      trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
> >
> > @@ -1359,9 +1310,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >      }
> >
> >      rcu_read_lock();
> > -    if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
> > +    if (memory_get_xlat_addr(iotlb, NULL, &translated_addr, NULL,
> > +                             &mr_has_discard_manager)) {
> >          int ret;
> > -
> >          ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
> >                                      translated_addr);
> >          if (ret) {
> > @@ -1370,6 +1321,38 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >                           container, iova,
> >                           iotlb->addr_mask + 1, ret);
> >          }
> > +        if (mr_has_discard_manager) {
> > +            /*
> > +             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
> > +             * The pages will remain pinned inside vfio until unmapped,
> > +             * resulting in a higher memory consumption than expected. If memory
> > +             * would get populated again later, there would be an inconsistency
> > +             * between pages pinned by vfio and pages seen by QEMU. This is the
> > +             * case until unmapped from the IOMMU (e.g., during device reset).
> > +             *
> > +             * With malicious guests, we really only care about pinning more
> > +             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
> > +             * never be exceeded and can be used to mitigate this problem.
> > +             */
> > +            warn_report_once(
> > +                "Using vfio with vIOMMUs and coordinated discarding of"
> > +                " RAM (e.g., virtio-mem) works, however, malicious"
> > +                " guests can trigger pinning of more memory than"
> > +                " intended via an IOMMU. It's possible to mitigate "
> > +                " by setting/adjusting RLIMIT_MEMLOCK.");
> > +        }
> > +    } else {
> > +        /*
> > +         * Malicious VMs can map memory into the IOMMU, which is expected
> > +         * to remain discarded. vfio will pin all pages, populating memory.
> > +         * Disallow that. vmstate priorities make sure any RamDiscardManager
> > +         * were already restored before IOMMUs are restored.
> > +         */
> > +        if (mr_has_discard_manager) {
> > +            error_report("iommu map to discarded memory (e.g., unplugged via"
> > +                         " virtio-mem): %" HWADDR_PRIx "",
> > +                         iotlb->translated_addr);
> > +        }
>
>
> Clearly vfio needs its own wrapper for this function rather than open
> coding two identical comments and error reports.  I'm not sure why the
> main function dropped the error report for the unpopulated discard
> memory region when it triggers error reports for other bogus cases.
> Thanks,
>
> Alex
>
I just wonder if other device wants to have their own error msg, so I
move it out
of the main function, That's make sense to keep it as the old version.
I will send a new version soon, Thanks Alex
Thanks
Cindy
> >      }
> >      rcu_read_unlock();
> >  }
> > diff --git a/include/exec/memory.h b/include/exec/memory.h
> > index bfb1de8eea..ed8b1e8e0e 100644
> > --- a/include/exec/memory.h
> > +++ b/include/exec/memory.h
> > @@ -713,6 +713,10 @@ void ram_discard_manager_register_listener(RamDiscardManager *rdm,
> >  void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
> >                                               RamDiscardListener *rdl);
> >
> > +bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> > +                          ram_addr_t *ram_addr, bool *read_only,
> > +                          bool *mr_discard_populated);
> > +
> >  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
> >  typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
> >
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index 7ba2048836..b6ac5161e2 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -33,6 +33,7 @@
> >  #include "qemu/accel.h"
> >  #include "hw/boards.h"
> >  #include "migration/vmstate.h"
> > +#include "exec/address-spaces.h"
> >
> >  //#define DEBUG_UNASSIGNED
> >
> > @@ -2121,6 +2122,74 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
> >      rdmc->unregister_listener(rdm, rdl);
> >  }
> >
> > +/* Called with rcu_read_lock held.  */
> > +bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> > +                          ram_addr_t *ram_addr, bool *read_only,
> > +                          bool *mr_has_discard_manager)
> > +{
> > +    MemoryRegion *mr;
> > +    hwaddr xlat;
> > +    hwaddr len = iotlb->addr_mask + 1;
> > +    bool writable = iotlb->perm & IOMMU_WO;
> > +
> > +    if (mr_has_discard_manager) {
> > +        *mr_has_discard_manager = false;
> > +    }
> > +    /*
> > +     * The IOMMU TLB entry we have just covers translation through
> > +     * this IOMMU to its immediate target.  We need to translate
> > +     * it the rest of the way through to memory.
> > +     */
> > +    mr = address_space_translate(&address_space_memory, iotlb->translated_addr,
> > +                                 &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
> > +    if (!memory_region_is_ram(mr)) {
> > +        error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
> > +        return false;
> > +    } else if (memory_region_has_ram_discard_manager(mr)) {
> > +        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
> > +        MemoryRegionSection tmp = {
> > +            .mr = mr,
> > +            .offset_within_region = xlat,
> > +            .size = int128_make64(len),
> > +        };
> > +        if (mr_has_discard_manager) {
> > +            *mr_has_discard_manager = true;
> > +        }
> > +        /*
> > +         * Malicious VMs can map memory into the IOMMU, which is expected
> > +         * to remain discarded. vfio will pin all pages, populating memory.
> > +         * Disallow that. vmstate priorities make sure any RamDiscardManager
> > +         * were already restored before IOMMUs are restored.
> > +         */
> > +        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    /*
> > +     * Translation truncates length to the IOMMU page size,
> > +     * check that it did not truncate too much.
> > +     */
> > +    if (len & iotlb->addr_mask) {
> > +        error_report("iommu has granularity incompatible with target AS");
> > +        return false;
> > +    }
> > +
> > +    if (vaddr) {
> > +        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
> > +    }
> > +
> > +    if (ram_addr) {
> > +        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
> > +    }
> > +
> > +    if (read_only) {
> > +        *read_only = !writable || mr->readonly;
> > +    }
> > +
> > +    return true;
> > +}
> > +
> >  void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
> >  {
> >      uint8_t mask = 1 << client;
>
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ace9562a9b..e958a4435f 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -574,92 +574,6 @@  static bool vfio_listener_skipped_section(MemoryRegionSection *section)
            section->offset_within_address_space & (1ULL << 63);
 }
 
-/* Called with rcu_read_lock held.  */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-                               ram_addr_t *ram_addr, bool *read_only)
-{
-    MemoryRegion *mr;
-    hwaddr xlat;
-    hwaddr len = iotlb->addr_mask + 1;
-    bool writable = iotlb->perm & IOMMU_WO;
-
-    /*
-     * The IOMMU TLB entry we have just covers translation through
-     * this IOMMU to its immediate target.  We need to translate
-     * it the rest of the way through to memory.
-     */
-    mr = address_space_translate(&address_space_memory,
-                                 iotlb->translated_addr,
-                                 &xlat, &len, writable,
-                                 MEMTXATTRS_UNSPECIFIED);
-    if (!memory_region_is_ram(mr)) {
-        error_report("iommu map to non memory area %"HWADDR_PRIx"",
-                     xlat);
-        return false;
-    } else if (memory_region_has_ram_discard_manager(mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
-        MemoryRegionSection tmp = {
-            .mr = mr,
-            .offset_within_region = xlat,
-            .size = int128_make64(len),
-        };
-
-        /*
-         * Malicious VMs can map memory into the IOMMU, which is expected
-         * to remain discarded. vfio will pin all pages, populating memory.
-         * Disallow that. vmstate priorities make sure any RamDiscardManager
-         * were already restored before IOMMUs are restored.
-         */
-        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
-            error_report("iommu map to discarded memory (e.g., unplugged via"
-                         " virtio-mem): %"HWADDR_PRIx"",
-                         iotlb->translated_addr);
-            return false;
-        }
-
-        /*
-         * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
-         * pages will remain pinned inside vfio until unmapped, resulting in a
-         * higher memory consumption than expected. If memory would get
-         * populated again later, there would be an inconsistency between pages
-         * pinned by vfio and pages seen by QEMU. This is the case until
-         * unmapped from the IOMMU (e.g., during device reset).
-         *
-         * With malicious guests, we really only care about pinning more memory
-         * than expected. RLIMIT_MEMLOCK set for the user/process can never be
-         * exceeded and can be used to mitigate this problem.
-         */
-        warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
-                         " RAM (e.g., virtio-mem) works, however, malicious"
-                         " guests can trigger pinning of more memory than"
-                         " intended via an IOMMU. It's possible to mitigate "
-                         " by setting/adjusting RLIMIT_MEMLOCK.");
-    }
-
-    /*
-     * Translation truncates length to the IOMMU page size,
-     * check that it did not truncate too much.
-     */
-    if (len & iotlb->addr_mask) {
-        error_report("iommu has granularity incompatible with target AS");
-        return false;
-    }
-
-    if (vaddr) {
-        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
-    }
-
-    if (ram_addr) {
-        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
-    }
-
-    if (read_only) {
-        *read_only = !writable || mr->readonly;
-    }
-
-    return true;
-}
-
 static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
@@ -681,10 +595,46 @@  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
         bool read_only;
-
-        if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
+        bool mr_has_discard_manager;
+
+        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only,
+                                  &mr_has_discard_manager)) {
+            if (mr_has_discard_manager) {
+                /*
+                 * Malicious VMs can map memory into the IOMMU, which is
+                 * expected to remain discarded. vfio will pin all pages,
+                 * populating memory. Disallow that. vmstate priorities make
+                 * sure any RamDiscardManager were already restored before
+                 * IOMMUs are restored.
+                 */
+
+                error_report(
+                    "iommu map to discarded memory (e.g., unplugged via"
+                    " virtio-mem): %" HWADDR_PRIx "",
+                    iotlb->translated_addr);
+            }
             goto out;
         }
+        if (mr_has_discard_manager) {
+            /*
+             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
+             * The pages will remain pinned inside vfio until unmapped,
+             * resulting in a higher memory consumption than expected. If memory
+             * would get populated again later, there would be an inconsistency
+             * between pages pinned by vfio and pages seen by QEMU. This is the
+             * case until unmapped from the IOMMU (e.g., during device reset).
+             *
+             * With malicious guests, we really only care about pinning more
+             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
+             * never be exceeded and can be used to mitigate this problem.
+             */
+            warn_report_once(
+                "Using vfio with vIOMMUs and coordinated discarding of"
+                " RAM (e.g., virtio-mem) works, however, malicious"
+                " guests can trigger pinning of more memory than"
+                " intended via an IOMMU. It's possible to mitigate "
+                " by setting/adjusting RLIMIT_MEMLOCK.");
+        }
         /*
          * vaddr is only valid until rcu_read_unlock(). But after
          * vfio_dma_map has set up the mapping the pages will be
@@ -1349,6 +1299,7 @@  static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     VFIOContainer *container = giommu->container;
     hwaddr iova = iotlb->iova + giommu->iommu_offset;
     ram_addr_t translated_addr;
+    bool mr_has_discard_manager;
 
     trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
 
@@ -1359,9 +1310,9 @@  static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     }
 
     rcu_read_lock();
-    if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
+    if (memory_get_xlat_addr(iotlb, NULL, &translated_addr, NULL,
+                             &mr_has_discard_manager)) {
         int ret;
-
         ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
                                     translated_addr);
         if (ret) {
@@ -1370,6 +1321,38 @@  static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
                          container, iova,
                          iotlb->addr_mask + 1, ret);
         }
+        if (mr_has_discard_manager) {
+            /*
+             * Malicious VMs might trigger discarding of IOMMU-mapped memory.
+             * The pages will remain pinned inside vfio until unmapped,
+             * resulting in a higher memory consumption than expected. If memory
+             * would get populated again later, there would be an inconsistency
+             * between pages pinned by vfio and pages seen by QEMU. This is the
+             * case until unmapped from the IOMMU (e.g., during device reset).
+             *
+             * With malicious guests, we really only care about pinning more
+             * memory than expected. RLIMIT_MEMLOCK set for the user/process can
+             * never be exceeded and can be used to mitigate this problem.
+             */
+            warn_report_once(
+                "Using vfio with vIOMMUs and coordinated discarding of"
+                " RAM (e.g., virtio-mem) works, however, malicious"
+                " guests can trigger pinning of more memory than"
+                " intended via an IOMMU. It's possible to mitigate "
+                " by setting/adjusting RLIMIT_MEMLOCK.");
+        }
+    } else {
+        /*
+         * Malicious VMs can map memory into the IOMMU, which is expected
+         * to remain discarded. vfio will pin all pages, populating memory.
+         * Disallow that. vmstate priorities make sure any RamDiscardManager
+         * were already restored before IOMMUs are restored.
+         */
+        if (mr_has_discard_manager) {
+            error_report("iommu map to discarded memory (e.g., unplugged via"
+                         " virtio-mem): %" HWADDR_PRIx "",
+                         iotlb->translated_addr);
+        }
     }
     rcu_read_unlock();
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index bfb1de8eea..ed8b1e8e0e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -713,6 +713,10 @@  void ram_discard_manager_register_listener(RamDiscardManager *rdm,
 void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
                                              RamDiscardListener *rdl);
 
+bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+                          ram_addr_t *ram_addr, bool *read_only,
+                          bool *mr_discard_populated);
+
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7ba2048836..b6ac5161e2 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -33,6 +33,7 @@ 
 #include "qemu/accel.h"
 #include "hw/boards.h"
 #include "migration/vmstate.h"
+#include "exec/address-spaces.h"
 
 //#define DEBUG_UNASSIGNED
 
@@ -2121,6 +2122,74 @@  void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
     rdmc->unregister_listener(rdm, rdl);
 }
 
+/* Called with rcu_read_lock held.  */
+bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+                          ram_addr_t *ram_addr, bool *read_only,
+                          bool *mr_has_discard_manager)
+{
+    MemoryRegion *mr;
+    hwaddr xlat;
+    hwaddr len = iotlb->addr_mask + 1;
+    bool writable = iotlb->perm & IOMMU_WO;
+
+    if (mr_has_discard_manager) {
+        *mr_has_discard_manager = false;
+    }
+    /*
+     * The IOMMU TLB entry we have just covers translation through
+     * this IOMMU to its immediate target.  We need to translate
+     * it the rest of the way through to memory.
+     */
+    mr = address_space_translate(&address_space_memory, iotlb->translated_addr,
+                                 &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
+    if (!memory_region_is_ram(mr)) {
+        error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
+        return false;
+    } else if (memory_region_has_ram_discard_manager(mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+        MemoryRegionSection tmp = {
+            .mr = mr,
+            .offset_within_region = xlat,
+            .size = int128_make64(len),
+        };
+        if (mr_has_discard_manager) {
+            *mr_has_discard_manager = true;
+        }
+        /*
+         * Malicious VMs can map memory into the IOMMU, which is expected
+         * to remain discarded. vfio will pin all pages, populating memory.
+         * Disallow that. vmstate priorities make sure any RamDiscardManager
+         * were already restored before IOMMUs are restored.
+         */
+        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+            return false;
+        }
+    }
+
+    /*
+     * Translation truncates length to the IOMMU page size,
+     * check that it did not truncate too much.
+     */
+    if (len & iotlb->addr_mask) {
+        error_report("iommu has granularity incompatible with target AS");
+        return false;
+    }
+
+    if (vaddr) {
+        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    }
+
+    if (ram_addr) {
+        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
+    }
+
+    if (read_only) {
+        *read_only = !writable || mr->readonly;
+    }
+
+    return true;
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;