diff mbox series

[v3,12/12] intel-iommu: new sync_shadow_page_table

Message ID 20180517085927.24925-13-peterx@redhat.com
State New
Headers show
Series intel-iommu: nested vIOMMU, cleanups, bug fixes | expand

Commit Message

Peter Xu May 17, 2018, 8:59 a.m. UTC
Firstly, introduce the sync_shadow_page_table() helper to resync the
whole shadow page table of an IOMMU address space.  Meanwhile, when we
receive domain invalidation or similar requests (for example, context
entry invalidations, global invalidations, ...), we should not really
run the replay logic, instead we can now use the new sync shadow page
table API to resync the whole shadow page table.

There will be two major differences:

1. We don't unmap-all before walking the page table, we just sync.  The
   global unmap-all can create a very small window that the page table
   is invalid or incomplete

2. We only walk the page table once now (while replay can be triggered
   multiple times depending on how many notifiers there are)

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin May 17, 2018, 9:06 p.m. UTC | #1
On Thu, May 17, 2018 at 04:59:27PM +0800, Peter Xu wrote:
> Firstly, introduce the sync_shadow_page_table() helper to resync the
> whole shadow page table of an IOMMU address space.  Meanwhile, when we
> receive domain invalidation or similar requests (for example, context
> entry invalidations, global invalidations, ...), we should not really
> run the replay logic, instead we can now use the new sync shadow page
> table API to resync the whole shadow page table.
> 
> There will be two major differences:
> 
> 1. We don't unmap-all before walking the page table, we just sync.  The
>    global unmap-all can create a very small window that the page table
>    is invalid or incomplete

The implication is that with vfio, device might stop working
without this change.


> 2. We only walk the page table once now (while replay can be triggered
>    multiple times depending on how many notifiers there are)
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a1a2a009c1..fbb2f763f0 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1065,6 +1065,11 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
>      return vtd_page_walk(&ce_cache, addr, addr + size, &info);
>  }
>  
> +static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
> +{
> +    return vtd_sync_shadow_page_table_range(vtd_as, NULL, 0, UINT64_MAX);
> +}
> +
>  /*
>   * Fetch translation type for specific device. Returns <0 if error
>   * happens, otherwise return the shifted type to check against
> @@ -1397,7 +1402,7 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
>      VTDAddressSpace *vtd_as;
>  
>      QLIST_FOREACH(vtd_as, &s->notifiers_list, next) {
> -        memory_region_iommu_replay_all(&vtd_as->iommu);
> +        vtd_sync_shadow_page_table(vtd_as);
>      }
>  }
>  
> @@ -1470,14 +1475,13 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>                  vtd_switch_address_space(vtd_as);
>                  /*
>                   * So a device is moving out of (or moving into) a
> -                 * domain, a replay() suites here to notify all the
> -                 * IOMMU_NOTIFIER_MAP registers about this change.
> +                 * domain, resync the shadow page table.
>                   * This won't bring bad even if we have no such
>                   * notifier registered - the IOMMU notification
>                   * framework will skip MAP notifications if that
>                   * happened.
>                   */
> -                memory_region_iommu_replay_all(&vtd_as->iommu);
> +                vtd_sync_shadow_page_table(vtd_as);
>              }
>          }
>      }
> @@ -1535,7 +1539,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
>          if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>                                        vtd_as->devfn, &ce) &&
>              domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
> -            memory_region_iommu_replay_all(&vtd_as->iommu);
> +            vtd_sync_shadow_page_table(vtd_as);
>          }
>      }
>  }
> -- 
> 2.17.0
Peter Xu May 18, 2018, 6:22 a.m. UTC | #2
On Fri, May 18, 2018 at 12:06:14AM +0300, Michael S. Tsirkin wrote:
> On Thu, May 17, 2018 at 04:59:27PM +0800, Peter Xu wrote:
> > Firstly, introduce the sync_shadow_page_table() helper to resync the
> > whole shadow page table of an IOMMU address space.  Meanwhile, when we
> > receive domain invalidation or similar requests (for example, context
> > entry invalidations, global invalidations, ...), we should not really
> > run the replay logic, instead we can now use the new sync shadow page
> > table API to resync the whole shadow page table.
> > 
> > There will be two major differences:
> > 
> > 1. We don't unmap-all before walking the page table, we just sync.  The
> >    global unmap-all can create a very small window that the page table
> >    is invalid or incomplete
> 
> The implication is that with vfio, device might stop working
> without this change.

I guess it was working before.  However patch 9 changed the page
walking to be state-ful since we have IOVA tree now (it was not) so we
might encounter the scp DMA error after patch 9.  I still kept the
patches 9-12 to be separate since logically they are isolated.
However if you want maybe we can also squash 9-12 patches into one
single (but a bit huge) patch.

Thanks,
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a1a2a009c1..fbb2f763f0 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1065,6 +1065,11 @@  static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
     return vtd_page_walk(&ce_cache, addr, addr + size, &info);
 }
 
+static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
+{
+    return vtd_sync_shadow_page_table_range(vtd_as, NULL, 0, UINT64_MAX);
+}
+
 /*
  * Fetch translation type for specific device. Returns <0 if error
  * happens, otherwise return the shifted type to check against
@@ -1397,7 +1402,7 @@  static void vtd_iommu_replay_all(IntelIOMMUState *s)
     VTDAddressSpace *vtd_as;
 
     QLIST_FOREACH(vtd_as, &s->notifiers_list, next) {
-        memory_region_iommu_replay_all(&vtd_as->iommu);
+        vtd_sync_shadow_page_table(vtd_as);
     }
 }
 
@@ -1470,14 +1475,13 @@  static void vtd_context_device_invalidate(IntelIOMMUState *s,
                 vtd_switch_address_space(vtd_as);
                 /*
                  * So a device is moving out of (or moving into) a
-                 * domain, a replay() suites here to notify all the
-                 * IOMMU_NOTIFIER_MAP registers about this change.
+                 * domain, resync the shadow page table.
                  * This won't bring bad even if we have no such
                  * notifier registered - the IOMMU notification
                  * framework will skip MAP notifications if that
                  * happened.
                  */
-                memory_region_iommu_replay_all(&vtd_as->iommu);
+                vtd_sync_shadow_page_table(vtd_as);
             }
         }
     }
@@ -1535,7 +1539,7 @@  static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
                                       vtd_as->devfn, &ce) &&
             domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
-            memory_region_iommu_replay_all(&vtd_as->iommu);
+            vtd_sync_shadow_page_table(vtd_as);
         }
     }
 }