diff mbox

[RFC,06/10] vfio: Allow hotplug of containers onto existing guest IOMMU mappings

Message ID 1442495357-26547-7-git-send-email-david@gibson.dropbear.id.au
State New
Headers show

Commit Message

David Gibson Sept. 17, 2015, 1:09 p.m. UTC
At present the memory listener used by vfio to keep host IOMMU mappings
in sync with the guest memory image assumes that if a guest IOMMU
appears, then it has no existing mappings.

This may not be true if a VFIO device is hotplugged onto a guest bus
which didn't previously include a VFIO device, and which has existing
guest IOMMU mappings.

Therefore, use the memory_region_register_iommu_notifier_replay()
function in order to fix this case, replaying existing guest IOMMU
mappings, bringing the host IOMMU into sync with the guest IOMMU.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/vfio/common.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

Comments

Alex Williamson Sept. 17, 2015, 4:54 p.m. UTC | #1
On Thu, 2015-09-17 at 23:09 +1000, David Gibson wrote:
> At present the memory listener used by vfio to keep host IOMMU mappings
> in sync with the guest memory image assumes that if a guest IOMMU
> appears, then it has no existing mappings.
> 
> This may not be true if a VFIO device is hotplugged onto a guest bus
> which didn't previously include a VFIO device, and which has existing
> guest IOMMU mappings.
> 
> Therefore, use the memory_region_register_iommu_notifier_replay()
> function in order to fix this case, replaying existing guest IOMMU
> mappings, bringing the host IOMMU into sync with the guest IOMMU.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/vfio/common.c | 34 +++++++++++++++++++---------------
>  1 file changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index daaac48..543c38e 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -312,6 +312,22 @@ out:
>      rcu_read_unlock();
>  }
>  
> +static hwaddr vfio_container_granularity(VFIOContainer *container)
> +{
> +    uint64_t pgsize;
> +
> +    assert(container->iommu_data.iova_pgsizes);

return (hwaddr)1 << (ffsl(container->iommu_data.iova_pgsizes) - 1;

?

> +
> +    /* Find the smallest page size supported by the IOMMU */
> +    for (pgsize = 1; pgsize; pgsize <<= 1) {
> +        if (pgsize & container->iommu_data.iova_pgsizes) {
> +            return pgsize;
> +        }
> +    }
> +    /* Can't happen */
> +    assert(0);
> +}
> +
>  static void vfio_listener_region_add(MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> @@ -371,26 +387,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>           * would be the right place to wire that up (tell the KVM
>           * device emulation the VFIO iommu handles to use).
>           */
> -        /*
> -         * This assumes that the guest IOMMU is empty of
> -         * mappings at this point.
> -         *
> -         * One way of doing this is:
> -         * 1. Avoid sharing IOMMUs between emulated devices or different
> -         * IOMMU groups.
> -         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
> -         * there are some mappings in IOMMU.
> -         *
> -         * VFIO on SPAPR does that. Other IOMMU models may do that different,
> -         * they must make sure there are no existing mappings or
> -         * loop through existing mappings to map them into VFIO.
> -         */
>          giommu = g_malloc0(sizeof(*giommu));
>          giommu->iommu = section->mr;
>          giommu->container = container;
>          giommu->n.notify = vfio_iommu_map_notify;
>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> -        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +
> +        memory_region_register_iommu_notifier_replay(giommu->iommu, &giommu->n,
> +            vfio_container_granularity(container), false);
>  
>          return;
>      }
David Gibson Sept. 17, 2015, 11:31 p.m. UTC | #2
On Thu, Sep 17, 2015 at 10:54:24AM -0600, Alex Williamson wrote:
> On Thu, 2015-09-17 at 23:09 +1000, David Gibson wrote:
> > At present the memory listener used by vfio to keep host IOMMU mappings
> > in sync with the guest memory image assumes that if a guest IOMMU
> > appears, then it has no existing mappings.
> > 
> > This may not be true if a VFIO device is hotplugged onto a guest bus
> > which didn't previously include a VFIO device, and which has existing
> > guest IOMMU mappings.
> > 
> > Therefore, use the memory_region_register_iommu_notifier_replay()
> > function in order to fix this case, replaying existing guest IOMMU
> > mappings, bringing the host IOMMU into sync with the guest IOMMU.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/vfio/common.c | 34 +++++++++++++++++++---------------
> >  1 file changed, 19 insertions(+), 15 deletions(-)
> > 
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index daaac48..543c38e 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -312,6 +312,22 @@ out:
> >      rcu_read_unlock();
> >  }
> >  
> > +static hwaddr vfio_container_granularity(VFIOContainer *container)
> > +{
> > +    uint64_t pgsize;
> > +
> > +    assert(container->iommu_data.iova_pgsizes);
> 
> return (hwaddr)1 << (ffsl(container->iommu_data.iova_pgsizes) - 1;

Ah, yes, that should work.  I didn't do it that way mostly because I
tend to confuse myself when I try to remember exactly how ffs
semantics work.
Thomas Huth Sept. 23, 2015, 11:02 a.m. UTC | #3
On 18/09/15 01:31, David Gibson wrote:
> On Thu, Sep 17, 2015 at 10:54:24AM -0600, Alex Williamson wrote:
>> On Thu, 2015-09-17 at 23:09 +1000, David Gibson wrote:
>>> At present the memory listener used by vfio to keep host IOMMU mappings
>>> in sync with the guest memory image assumes that if a guest IOMMU
>>> appears, then it has no existing mappings.
>>>
>>> This may not be true if a VFIO device is hotplugged onto a guest bus
>>> which didn't previously include a VFIO device, and which has existing
>>> guest IOMMU mappings.
>>>
>>> Therefore, use the memory_region_register_iommu_notifier_replay()
>>> function in order to fix this case, replaying existing guest IOMMU
>>> mappings, bringing the host IOMMU into sync with the guest IOMMU.
>>>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> ---
>>>  hw/vfio/common.c | 34 +++++++++++++++++++---------------
>>>  1 file changed, 19 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index daaac48..543c38e 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -312,6 +312,22 @@ out:
>>>      rcu_read_unlock();
>>>  }
>>>  
>>> +static hwaddr vfio_container_granularity(VFIOContainer *container)
>>> +{
>>> +    uint64_t pgsize;
>>> +
>>> +    assert(container->iommu_data.iova_pgsizes);
>>
>> return (hwaddr)1 << (ffsl(container->iommu_data.iova_pgsizes) - 1;
> 
> Ah, yes, that should work.  I didn't do it that way mostly because I
> tend to confuse myself when I try to remember exactly how ffs
> semantics work.

Maybe use ffsll instead of ffsl, in case the code ever runs on a 32-bit
host instead of a 64-bit host ?

 Thomas
Laurent Vivier Sept. 23, 2015, 6:44 p.m. UTC | #4
On 17/09/2015 15:09, David Gibson wrote:
> At present the memory listener used by vfio to keep host IOMMU mappings
> in sync with the guest memory image assumes that if a guest IOMMU
> appears, then it has no existing mappings.
> 
> This may not be true if a VFIO device is hotplugged onto a guest bus
> which didn't previously include a VFIO device, and which has existing
> guest IOMMU mappings.
> 
> Therefore, use the memory_region_register_iommu_notifier_replay()
> function in order to fix this case, replaying existing guest IOMMU
> mappings, bringing the host IOMMU into sync with the guest IOMMU.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/vfio/common.c | 34 +++++++++++++++++++---------------
>  1 file changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index daaac48..543c38e 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -312,6 +312,22 @@ out:
>      rcu_read_unlock();
>  }
>  
> +static hwaddr vfio_container_granularity(VFIOContainer *container)
> +{
> +    uint64_t pgsize;
> +
> +    assert(container->iommu_data.iova_pgsizes);
> +
> +    /* Find the smallest page size supported by the IOMMU */
> +    for (pgsize = 1; pgsize; pgsize <<= 1) {
> +        if (pgsize & container->iommu_data.iova_pgsizes) {
> +            return pgsize;
> +        }
> +    }

Perhaps we can use gcc builtin ?

	return 1 << ctz64(container->iommu_data.iova_pgsizes);

> +    /* Can't happen */
> +    assert(0);
> +}
> +
>  static void vfio_listener_region_add(MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> @@ -371,26 +387,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
>           * would be the right place to wire that up (tell the KVM
>           * device emulation the VFIO iommu handles to use).
>           */
> -        /*
> -         * This assumes that the guest IOMMU is empty of
> -         * mappings at this point.
> -         *
> -         * One way of doing this is:
> -         * 1. Avoid sharing IOMMUs between emulated devices or different
> -         * IOMMU groups.
> -         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
> -         * there are some mappings in IOMMU.
> -         *
> -         * VFIO on SPAPR does that. Other IOMMU models may do that different,
> -         * they must make sure there are no existing mappings or
> -         * loop through existing mappings to map them into VFIO.
> -         */
>          giommu = g_malloc0(sizeof(*giommu));
>          giommu->iommu = section->mr;
>          giommu->container = container;
>          giommu->n.notify = vfio_iommu_map_notify;
>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> -        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +
> +        memory_region_register_iommu_notifier_replay(giommu->iommu, &giommu->n,
> +            vfio_container_granularity(container), false);
>  
>          return;
>      }
>
David Gibson Sept. 23, 2015, 11:50 p.m. UTC | #5
On Wed, Sep 23, 2015 at 01:02:32PM +0200, Thomas Huth wrote:
> On 18/09/15 01:31, David Gibson wrote:
> > On Thu, Sep 17, 2015 at 10:54:24AM -0600, Alex Williamson wrote:
> >> On Thu, 2015-09-17 at 23:09 +1000, David Gibson wrote:
> >>> At present the memory listener used by vfio to keep host IOMMU mappings
> >>> in sync with the guest memory image assumes that if a guest IOMMU
> >>> appears, then it has no existing mappings.
> >>>
> >>> This may not be true if a VFIO device is hotplugged onto a guest bus
> >>> which didn't previously include a VFIO device, and which has existing
> >>> guest IOMMU mappings.
> >>>
> >>> Therefore, use the memory_region_register_iommu_notifier_replay()
> >>> function in order to fix this case, replaying existing guest IOMMU
> >>> mappings, bringing the host IOMMU into sync with the guest IOMMU.
> >>>
> >>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>> ---
> >>>  hw/vfio/common.c | 34 +++++++++++++++++++---------------
> >>>  1 file changed, 19 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>> index daaac48..543c38e 100644
> >>> --- a/hw/vfio/common.c
> >>> +++ b/hw/vfio/common.c
> >>> @@ -312,6 +312,22 @@ out:
> >>>      rcu_read_unlock();
> >>>  }
> >>>  
> >>> +static hwaddr vfio_container_granularity(VFIOContainer *container)
> >>> +{
> >>> +    uint64_t pgsize;
> >>> +
> >>> +    assert(container->iommu_data.iova_pgsizes);
> >>
> >> return (hwaddr)1 << (ffsl(container->iommu_data.iova_pgsizes) - 1;
> > 
> > Ah, yes, that should work.  I didn't do it that way mostly because I
> > tend to confuse myself when I try to remember exactly how ffs
> > semantics work.
> 
> Maybe use ffsll instead of ffsl, in case the code ever runs on a 32-bit
> host instead of a 64-bit host ?

Already done :)
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index daaac48..543c38e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -312,6 +312,22 @@  out:
     rcu_read_unlock();
 }
 
+static hwaddr vfio_container_granularity(VFIOContainer *container)
+{
+    uint64_t pgsize;
+
+    assert(container->iommu_data.iova_pgsizes);
+
+    /* Find the smallest page size supported by the IOMMU */
+    for (pgsize = 1; pgsize; pgsize <<= 1) {
+        if (pgsize & container->iommu_data.iova_pgsizes) {
+            return pgsize;
+        }
+    }
+    /* Can't happen */
+    assert(0);
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
@@ -371,26 +387,14 @@  static void vfio_listener_region_add(MemoryListener *listener,
          * would be the right place to wire that up (tell the KVM
          * device emulation the VFIO iommu handles to use).
          */
-        /*
-         * This assumes that the guest IOMMU is empty of
-         * mappings at this point.
-         *
-         * One way of doing this is:
-         * 1. Avoid sharing IOMMUs between emulated devices or different
-         * IOMMU groups.
-         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
-         * there are some mappings in IOMMU.
-         *
-         * VFIO on SPAPR does that. Other IOMMU models may do that different,
-         * they must make sure there are no existing mappings or
-         * loop through existing mappings to map them into VFIO.
-         */
         giommu = g_malloc0(sizeof(*giommu));
         giommu->iommu = section->mr;
         giommu->container = container;
         giommu->n.notify = vfio_iommu_map_notify;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
-        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+
+        memory_region_register_iommu_notifier_replay(giommu->iommu, &giommu->n,
+            vfio_container_granularity(container), false);
 
         return;
     }