diff mbox

[v4,10/10] vhost: iommu: cache static mapping if there is

Message ID 1495163989-9994-11-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu May 19, 2017, 3:19 a.m. UTC
This patch pre-heat vhost iotlb cache when passthrough mode enabled.

Sometimes, even if user specified iommu_platform for vhost devices,
IOMMU might still be disabled. One case is passthrough mode in VT-d
implementation. We can detect this by observing iommu_list. If it's
empty, it means IOMMU translation is disabled, then we can actually
pre-heat the translation (it'll be static mapping then) by first
invalidating all IOTLB, then cache existing memory ranges into vhost
backend iotlb using 1:1 mapping.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/virtio/trace-events |  4 ++++
 hw/virtio/vhost.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

Comments

Michael S. Tsirkin May 19, 2017, 4:55 p.m. UTC | #1
On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
> This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> 
> Sometimes, even if user specified iommu_platform for vhost devices,
> IOMMU might still be disabled. One case is passthrough mode in VT-d
> implementation. We can detect this by observing iommu_list. If it's
> empty, it means IOMMU translation is disabled, then we can actually
> pre-heat the translation (it'll be static mapping then) by first
> invalidating all IOTLB, then cache existing memory ranges into vhost
> backend iotlb using 1:1 mapping.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

I don't really understand. Is this a performance optimization?
Can you post some #s please?

Also, if it's PT, can't we bypass iommu altogether? That would be
even faster ...

> ---
>  hw/virtio/trace-events |  4 ++++
>  hw/virtio/vhost.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 53 insertions(+)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 1f7a7c1..54dcbb3 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
>  virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
>  virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
>  virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
> +
> +# hw/virtio/vhost.c
> +vhost_iommu_commit(void) ""
> +vhost_iommu_static_preheat(void) ""
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 03a46a7..8069135 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -27,6 +27,7 @@
>  #include "hw/virtio/virtio-access.h"
>  #include "migration/blocker.h"
>  #include "sysemu/dma.h"
> +#include "trace.h"
>  
>  /* enabled until disconnected backend stabilizes */
>  #define _VHOST_DEBUG 1
> @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>      }
>  }
>  
> +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> +{
> +    return !QLIST_EMPTY(&dev->iommu_list);
> +}
> +
>  static void vhost_iommu_region_add(MemoryListener *listener,
>                                     MemoryRegionSection *section)
>  {
> @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
>      }
>  }
>  
> +static void vhost_iommu_commit(MemoryListener *listener)
> +{
> +    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> +                                         iommu_listener);
> +    struct vhost_memory_region *r;
> +    int i;
> +
> +    trace_vhost_iommu_commit();
> +
> +    if (!vhost_iommu_mr_enabled(dev)) {
> +        /*
> +        * This means iommu_platform is enabled, however iommu memory
> +        * region is disabled, e.g., when device passthrough is setup.
> +        * Then, no translation is needed any more.
> +        *
> +        * Let's first invalidate the whole IOTLB, then pre-heat the
> +        * static mapping by looping over vhost memory ranges.
> +        */
> +
> +        if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> +                                                          UINT64_MAX)) {
> +            error_report("%s: flush existing IOTLB failed", __func__);
> +            return;
> +        }
> +
> +        for (i = 0; i < dev->mem->nregions; i++) {
> +            r = &dev->mem->regions[i];
> +            /* Vhost regions are writable RAM, so IOMMU_RW suites. */
> +            if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> +                                                          r->guest_phys_addr,
> +                                                          r->userspace_addr,
> +                                                          r->memory_size,
> +                                                          IOMMU_RW)) {
> +                error_report("%s: pre-heat static mapping failed", __func__);
> +                return;
> +            }
> +        }
> +
> +        trace_vhost_iommu_static_preheat();
> +    }
> +}
> +
>  static void vhost_region_nop(MemoryListener *listener,
>                               MemoryRegionSection *section)
>  {
> @@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>      hdev->iommu_listener = (MemoryListener) {
>          .region_add = vhost_iommu_region_add,
>          .region_del = vhost_iommu_region_del,
> +        .commit = vhost_iommu_commit,
>      };
>  
>      if (hdev->migration_blocker == NULL) {
> -- 
> 2.7.4
Jason Wang May 22, 2017, 2:30 a.m. UTC | #2
On 2017年05月20日 00:55, Michael S. Tsirkin wrote:
> On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
>> This patch pre-heat vhost iotlb cache when passthrough mode enabled.
>>
>> Sometimes, even if user specified iommu_platform for vhost devices,
>> IOMMU might still be disabled. One case is passthrough mode in VT-d
>> implementation. We can detect this by observing iommu_list. If it's
>> empty, it means IOMMU translation is disabled, then we can actually
>> pre-heat the translation (it'll be static mapping then) by first
>> invalidating all IOTLB, then cache existing memory ranges into vhost
>> backend iotlb using 1:1 mapping.
>>
>> Signed-off-by: Peter Xu<peterx@redhat.com>
> I don't really understand. Is this a performance optimization?
> Can you post some #s please?
>
> Also, if it's PT, can't we bypass iommu altogether?

The problem is, since device could be moved between domains, which means 
we need new notifier to notify vhost to enable or disable IOMMU_PLATFORM.

> That would be
> even faster ...
>

Should be the same (except for the first access in no CM mode), we pass 
and use vhost_memory_regions as what we've used for non iommu case.

Thanks
Peter Xu May 22, 2017, 2:42 a.m. UTC | #3
On Fri, May 19, 2017 at 07:55:26PM +0300, Michael S. Tsirkin wrote:
> On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
> > This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> > 
> > Sometimes, even if user specified iommu_platform for vhost devices,
> > IOMMU might still be disabled. One case is passthrough mode in VT-d
> > implementation. We can detect this by observing iommu_list. If it's
> > empty, it means IOMMU translation is disabled, then we can actually
> > pre-heat the translation (it'll be static mapping then) by first
> > invalidating all IOTLB, then cache existing memory ranges into vhost
> > backend iotlb using 1:1 mapping.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> I don't really understand. Is this a performance optimization?
> Can you post some #s please?

Yes, it is. Vhost can work even without this patch, but it should be
faster when with this patch.

As mentioned in the commit message and below comment [1], this patch
pre-heat the cache for vhost. Currently the cache entries depends on
the system memory ranges (dev->mem->nregions), and it should be far
smaller than vhost's cache count (currently it is statically defined
as max_iotlb_entries=2048 in kernel). If with current patch, these
cache entries can cover the whole possible DMA ranges that PT mode
would allow, so we won't have any cache miss then.

For the comments, do you have any better suggestion besides commit
message and [1]?

> 
> Also, if it's PT, can't we bypass iommu altogether? That would be
> even faster ...

Yes, but I don't yet know a good way to do it... Any suggestion is
welcomed as well.

Btw, do you have any comment on other patches besides this one? Since
this patch can really be isolated from the whole PT support series.

Thanks,

> 
> > ---
> >  hw/virtio/trace-events |  4 ++++
> >  hw/virtio/vhost.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 53 insertions(+)
> > 
> > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > index 1f7a7c1..54dcbb3 100644
> > --- a/hw/virtio/trace-events
> > +++ b/hw/virtio/trace-events
> > @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
> >  virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
> >  virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
> >  virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
> > +
> > +# hw/virtio/vhost.c
> > +vhost_iommu_commit(void) ""
> > +vhost_iommu_static_preheat(void) ""
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 03a46a7..8069135 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -27,6 +27,7 @@
> >  #include "hw/virtio/virtio-access.h"
> >  #include "migration/blocker.h"
> >  #include "sysemu/dma.h"
> > +#include "trace.h"
> >  
> >  /* enabled until disconnected backend stabilizes */
> >  #define _VHOST_DEBUG 1
> > @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> >      }
> >  }
> >  
> > +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> > +{
> > +    return !QLIST_EMPTY(&dev->iommu_list);
> > +}
> > +
> >  static void vhost_iommu_region_add(MemoryListener *listener,
> >                                     MemoryRegionSection *section)
> >  {
> > @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
> >      }
> >  }
> >  
> > +static void vhost_iommu_commit(MemoryListener *listener)
> > +{
> > +    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> > +                                         iommu_listener);
> > +    struct vhost_memory_region *r;
> > +    int i;
> > +
> > +    trace_vhost_iommu_commit();
> > +
> > +    if (!vhost_iommu_mr_enabled(dev)) {
> > +        /*
> > +        * This means iommu_platform is enabled, however iommu memory
> > +        * region is disabled, e.g., when device passthrough is setup.
> > +        * Then, no translation is needed any more.
> > +        *
> > +        * Let's first invalidate the whole IOTLB, then pre-heat the
> > +        * static mapping by looping over vhost memory ranges.
> > +        */

[1]

> > +
> > +        if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> > +                                                          UINT64_MAX)) {
> > +            error_report("%s: flush existing IOTLB failed", __func__);
> > +            return;
> > +        }
> > +
> > +        for (i = 0; i < dev->mem->nregions; i++) {
> > +            r = &dev->mem->regions[i];
> > +            /* Vhost regions are writable RAM, so IOMMU_RW suites. */
> > +            if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> > +                                                          r->guest_phys_addr,
> > +                                                          r->userspace_addr,
> > +                                                          r->memory_size,
> > +                                                          IOMMU_RW)) {
> > +                error_report("%s: pre-heat static mapping failed", __func__);
> > +                return;
> > +            }
> > +        }
> > +
> > +        trace_vhost_iommu_static_preheat();
> > +    }
> > +}
> > +
> >  static void vhost_region_nop(MemoryListener *listener,
> >                               MemoryRegionSection *section)
> >  {
> > @@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
> >      hdev->iommu_listener = (MemoryListener) {
> >          .region_add = vhost_iommu_region_add,
> >          .region_del = vhost_iommu_region_del,
> > +        .commit = vhost_iommu_commit,
> >      };
> >  
> >      if (hdev->migration_blocker == NULL) {
> > -- 
> > 2.7.4
Michael S. Tsirkin May 25, 2017, 6:14 p.m. UTC | #4
On Mon, May 22, 2017 at 10:42:00AM +0800, Peter Xu wrote:
> On Fri, May 19, 2017 at 07:55:26PM +0300, Michael S. Tsirkin wrote:
> > On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
> > > This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> > > 
> > > Sometimes, even if user specified iommu_platform for vhost devices,
> > > IOMMU might still be disabled. One case is passthrough mode in VT-d
> > > implementation. We can detect this by observing iommu_list. If it's
> > > empty, it means IOMMU translation is disabled, then we can actually
> > > pre-heat the translation (it'll be static mapping then) by first
> > > invalidating all IOTLB, then cache existing memory ranges into vhost
> > > backend iotlb using 1:1 mapping.
> > > 
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > 
> > I don't really understand. Is this a performance optimization?
> > Can you post some #s please?
> 
> Yes, it is. Vhost can work even without this patch, but it should be
> faster when with this patch.

You'll have to include perf testing numbers then.

> As mentioned in the commit message and below comment [1], this patch
> pre-heat the cache for vhost. Currently the cache entries depends on
> the system memory ranges (dev->mem->nregions), and it should be far
> smaller than vhost's cache count (currently it is statically defined
> as max_iotlb_entries=2048 in kernel). If with current patch, these
> cache entries can cover the whole possible DMA ranges that PT mode
> would allow, so we won't have any cache miss then.
> 
> For the comments, do you have any better suggestion besides commit
> message and [1]?
> 
> > 
> > Also, if it's PT, can't we bypass iommu altogether? That would be
> > even faster ...
> 
> Yes, but I don't yet know a good way to do it... Any suggestion is
> welcomed as well.
> 
> Btw, do you have any comment on other patches besides this one? Since
> this patch can really be isolated from the whole PT support series.
> 
> Thanks,

I've applied the rest of the series.

> > 
> > > ---
> > >  hw/virtio/trace-events |  4 ++++
> > >  hw/virtio/vhost.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 53 insertions(+)
> > > 
> > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > index 1f7a7c1..54dcbb3 100644
> > > --- a/hw/virtio/trace-events
> > > +++ b/hw/virtio/trace-events
> > > @@ -24,3 +24,7 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
> > >  virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
> > >  virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
> > >  virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
> > > +
> > > +# hw/virtio/vhost.c
> > > +vhost_iommu_commit(void) ""
> > > +vhost_iommu_static_preheat(void) ""
> > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > > index 03a46a7..8069135 100644
> > > --- a/hw/virtio/vhost.c
> > > +++ b/hw/virtio/vhost.c
> > > @@ -27,6 +27,7 @@
> > >  #include "hw/virtio/virtio-access.h"
> > >  #include "migration/blocker.h"
> > >  #include "sysemu/dma.h"
> > > +#include "trace.h"
> > >  
> > >  /* enabled until disconnected backend stabilizes */
> > >  #define _VHOST_DEBUG 1
> > > @@ -730,6 +731,11 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> > >      }
> > >  }
> > >  
> > > +static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
> > > +{
> > > +    return !QLIST_EMPTY(&dev->iommu_list);
> > > +}
> > > +
> > >  static void vhost_iommu_region_add(MemoryListener *listener,
> > >                                     MemoryRegionSection *section)
> > >  {
> > > @@ -782,6 +788,48 @@ static void vhost_iommu_region_del(MemoryListener *listener,
> > >      }
> > >  }
> > >  
> > > +static void vhost_iommu_commit(MemoryListener *listener)
> > > +{
> > > +    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> > > +                                         iommu_listener);
> > > +    struct vhost_memory_region *r;
> > > +    int i;
> > > +
> > > +    trace_vhost_iommu_commit();
> > > +
> > > +    if (!vhost_iommu_mr_enabled(dev)) {
> > > +        /*
> > > +        * This means iommu_platform is enabled, however iommu memory
> > > +        * region is disabled, e.g., when device passthrough is setup.
> > > +        * Then, no translation is needed any more.
> > > +        *
> > > +        * Let's first invalidate the whole IOTLB, then pre-heat the
> > > +        * static mapping by looping over vhost memory ranges.
> > > +        */
> 
> [1]
> 
> > > +
> > > +        if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
> > > +                                                          UINT64_MAX)) {
> > > +            error_report("%s: flush existing IOTLB failed", __func__);
> > > +            return;
> > > +        }
> > > +
> > > +        for (i = 0; i < dev->mem->nregions; i++) {
> > > +            r = &dev->mem->regions[i];
> > > +            /* Vhost regions are writable RAM, so IOMMU_RW suites. */
> > > +            if (dev->vhost_ops->vhost_update_device_iotlb(dev,
> > > +                                                          r->guest_phys_addr,
> > > +                                                          r->userspace_addr,
> > > +                                                          r->memory_size,
> > > +                                                          IOMMU_RW)) {
> > > +                error_report("%s: pre-heat static mapping failed", __func__);
> > > +                return;
> > > +            }
> > > +        }
> > > +
> > > +        trace_vhost_iommu_static_preheat();
> > > +    }
> > > +}
> > > +
> > >  static void vhost_region_nop(MemoryListener *listener,
> > >                               MemoryRegionSection *section)
> > >  {
> > > @@ -1298,6 +1346,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
> > >      hdev->iommu_listener = (MemoryListener) {
> > >          .region_add = vhost_iommu_region_add,
> > >          .region_del = vhost_iommu_region_del,
> > > +        .commit = vhost_iommu_commit,
> > >      };
> > >  
> > >      if (hdev->migration_blocker == NULL) {
> > > -- 
> > > 2.7.4
> 
> -- 
> Peter Xu
Peter Xu May 29, 2017, 4:29 a.m. UTC | #5
On Thu, May 25, 2017 at 09:14:55PM +0300, Michael S. Tsirkin wrote:
> On Mon, May 22, 2017 at 10:42:00AM +0800, Peter Xu wrote:
> > On Fri, May 19, 2017 at 07:55:26PM +0300, Michael S. Tsirkin wrote:
> > > On Fri, May 19, 2017 at 11:19:49AM +0800, Peter Xu wrote:
> > > > This patch pre-heat vhost iotlb cache when passthrough mode enabled.
> > > > 
> > > > Sometimes, even if user specified iommu_platform for vhost devices,
> > > > IOMMU might still be disabled. One case is passthrough mode in VT-d
> > > > implementation. We can detect this by observing iommu_list. If it's
> > > > empty, it means IOMMU translation is disabled, then we can actually
> > > > pre-heat the translation (it'll be static mapping then) by first
> > > > invalidating all IOTLB, then cache existing memory ranges into vhost
> > > > backend iotlb using 1:1 mapping.
> > > > 
> > > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > 
> > > I don't really understand. Is this a performance optimization?
> > > Can you post some #s please?
> > 
> > Yes, it is. Vhost can work even without this patch, but it should be
> > faster when with this patch.
> 
> You'll have to include perf testing numbers then.

My mistake to not have compared the numbers before, since it's just so
obvious to me that this patch should help.

Though after some simple streaming tests, it shows that this patch
didn't boost performance at all. I added some traces in vhost kernel
to know what's happened, please see below.

Without this patch, boot with iommu=pt, I see IOTLB cache insertion
like this:

vhost_process_iotlb_msg: iotlb new: 1 (0x17879b240-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 2 (0x17879d240-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 3 (0x17879a000-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 4 (0x178570000-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 5 (0x178532606-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 6 (0x177bad0e2-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 7 (0x1768560e2-0x17fffffff)

(Note: we can see the pattern is (ADDR-0x17fffffff), while ADDR is
 increasing, finally the range will cover all addresses that vhost
 needs for DMA, then we won't have cache miss, and 0x17fffffff is the
 upper limit for a 4G memory guest)

While after this patch (this is expected):

vhost_process_iotlb_msg: iotlb new: 1 (0x100000000-0x17fffffff)
vhost_process_iotlb_msg: iotlb new: 2 (0x0-0x9ffff)
vhost_process_iotlb_msg: iotlb new: 3 (0xc0000-0x7fffffff)

(Note: this is one entry per RAM memory region)

So it explained well on why performance didn't really change even
before applying this patch: currently when iommu=pt is on,
address_space_get_iotlb_entry() can get IOTLB that is bigger than page
size (if you see the code, plen decides the page mask, and plen is
only limited by memory region sizes when PT is enabled). So until the
7th cache miss IOTLB request, the range is big enough to cover the
rest of DMA addresses.

My preference is that we still apply this patch even there is no
performance gain on simple streaming test. Reasons:

- the old code has good performance depending on implementation of
  address_space_get_iotlb_entry(), which may alter in the future

- after apply the patch, we are 100% sure that we won't cache miss,
  while we cannot guarantee that without it. If not apply the patch,
  we may still encounter cache miss (e.g., access address <0x1768560e2
  after the 7th cache miss in above test), which can introduce that
  cache-missed IO to be delayed.

>
> > As mentioned in the commit message and below comment [1], this patch
> > pre-heat the cache for vhost. Currently the cache entries depends on
> > the system memory ranges (dev->mem->nregions), and it should be far
> > smaller than vhost's cache count (currently it is statically defined
> > as max_iotlb_entries=2048 in kernel). If with current patch, these
> > cache entries can cover the whole possible DMA ranges that PT mode
> > would allow, so we won't have any cache miss then.
> > 
> > For the comments, do you have any better suggestion besides commit
> > message and [1]?
> > 
> > > 
> > > Also, if it's PT, can't we bypass iommu altogether? That would be
> > > even faster ...
> > 
> > Yes, but I don't yet know a good way to do it... Any suggestion is
> > welcomed as well.
> > 
> > Btw, do you have any comment on other patches besides this one? Since
> > this patch can really be isolated from the whole PT support series.
> > 
> > Thanks,
> 
> I've applied the rest of the series.

Thank you very much.
diff mbox

Patch

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 1f7a7c1..54dcbb3 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -24,3 +24,7 @@  virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g
 virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
 virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
 virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
+
+# hw/virtio/vhost.c
+vhost_iommu_commit(void) ""
+vhost_iommu_static_preheat(void) ""
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 03a46a7..8069135 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -27,6 +27,7 @@ 
 #include "hw/virtio/virtio-access.h"
 #include "migration/blocker.h"
 #include "sysemu/dma.h"
+#include "trace.h"
 
 /* enabled until disconnected backend stabilizes */
 #define _VHOST_DEBUG 1
@@ -730,6 +731,11 @@  static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     }
 }
 
+static bool vhost_iommu_mr_enabled(struct vhost_dev *dev)
+{
+    return !QLIST_EMPTY(&dev->iommu_list);
+}
+
 static void vhost_iommu_region_add(MemoryListener *listener,
                                    MemoryRegionSection *section)
 {
@@ -782,6 +788,48 @@  static void vhost_iommu_region_del(MemoryListener *listener,
     }
 }
 
+static void vhost_iommu_commit(MemoryListener *listener)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         iommu_listener);
+    struct vhost_memory_region *r;
+    int i;
+
+    trace_vhost_iommu_commit();
+
+    if (!vhost_iommu_mr_enabled(dev)) {
+        /*
+        * This means iommu_platform is enabled, however iommu memory
+        * region is disabled, e.g., when device passthrough is setup.
+        * Then, no translation is needed any more.
+        *
+        * Let's first invalidate the whole IOTLB, then pre-heat the
+        * static mapping by looping over vhost memory ranges.
+        */
+
+        if (dev->vhost_ops->vhost_invalidate_device_iotlb(dev, 0,
+                                                          UINT64_MAX)) {
+            error_report("%s: flush existing IOTLB failed", __func__);
+            return;
+        }
+
+        for (i = 0; i < dev->mem->nregions; i++) {
+            r = &dev->mem->regions[i];
+            /* Vhost regions are writable RAM, so IOMMU_RW suites. */
+            if (dev->vhost_ops->vhost_update_device_iotlb(dev,
+                                                          r->guest_phys_addr,
+                                                          r->userspace_addr,
+                                                          r->memory_size,
+                                                          IOMMU_RW)) {
+                error_report("%s: pre-heat static mapping failed", __func__);
+                return;
+            }
+        }
+
+        trace_vhost_iommu_static_preheat();
+    }
+}
+
 static void vhost_region_nop(MemoryListener *listener,
                              MemoryRegionSection *section)
 {
@@ -1298,6 +1346,7 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     hdev->iommu_listener = (MemoryListener) {
         .region_add = vhost_iommu_region_add,
         .region_del = vhost_iommu_region_del,
+        .commit = vhost_iommu_commit,
     };
 
     if (hdev->migration_blocker == NULL) {