diff mbox series

[v7,3/5] virtio-iommu: Call iommu notifier for attach/detach

Message ID 20200313074811.27175-4-bbhushan2@marvell.com
State New
Headers show
Series virtio-iommu: VFIO integration | expand

Commit Message

Bharat Bhushan March 13, 2020, 7:48 a.m. UTC
iommu-notifier are called when a device is attached
or detached to as address-space.
This is needed for VFIO.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
 hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

Comments

Eric Auger March 13, 2020, 2:41 p.m. UTC | #1
Hi Bharat

On 3/13/20 8:48 AM, Bharat Bhushan wrote:
> iommu-notifier are called when a device is attached
IOMMU notifiers
> or detached to as address-space.
> This is needed for VFIO.
and vhost for detach
> 
> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
> ---
>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index e51344a53e..2006f72901 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
>      uint32_t id;
>      VirtIOIOMMUDomain *domain;
>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> +    VirtIOIOMMU *viommu;
This needs specal care on post-load. When migrating the EPs, only the id
is migrated. On post-load you need to set viommu as it is done for
domain. migration is allowed with vhost.
>  } VirtIOIOMMUEndpoint;
>  
>  typedef struct VirtIOIOMMUInterval {
> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
>      memory_region_notify_iommu(mr, 0, entry);
>  }
>  
> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
> +                                           gpointer data)
> +{
> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> +
> +    virtio_iommu_notify_unmap(mr, interval->low,
> +                              interval->high - interval->low + 1);
> +
> +    return false;
> +}
> +
> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
> +                                         gpointer data)
> +{
> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> +
> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
> +                            interval->high - interval->low + 1);
> +
> +    return false;
> +}
> +
>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
>  {
> +    VirtioIOMMUNotifierNode *node;
> +    VirtIOIOMMU *s = ep->viommu;
> +    VirtIOIOMMUDomain *domain = ep->domain;
> +
> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> +        if (ep->id == node->iommu_dev->devfn) {
> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
> +                           &node->iommu_dev->iommu_mr);
I understand this should fo the job for domain removal
> +        }
> +    }
> +
>      if (!ep->domain) {
>          return;
>      }
> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>      }
>      ep = g_malloc0(sizeof(*ep));
>      ep->id = ep_id;
> +    ep->viommu = s;
>      trace_virtio_iommu_get_endpoint(ep_id);
>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
>      return ep;
> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>  {
>      uint32_t domain_id = le32_to_cpu(req->domain);
>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> +    VirtioIOMMUNotifierNode *node;
>      VirtIOIOMMUDomain *domain;
>      VirtIOIOMMUEndpoint *ep;
>  
> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>  
>      ep->domain = domain;
>  
> +    /* Replay existing address space mappings on the associated memory region */
maybe use the "domain" terminology here.
> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> +        if (ep_id == node->iommu_dev->devfn) {
> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
> +                           &node->iommu_dev->iommu_mr);
> +        }
> +    }
> +
>      return VIRTIO_IOMMU_S_OK;
>  }
>  
> 
Thanks

Eric
Bharat Bhushan March 16, 2020, 6:41 a.m. UTC | #2
Hi Eric,

On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
>
> Hi Bharat
>
> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
> > iommu-notifier are called when a device is attached
> IOMMU notifiers
> > or detached to as address-space.
> > This is needed for VFIO.
> and vhost for detach
> >
> > Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
> > ---
> >  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 47 insertions(+)
> >
> > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> > index e51344a53e..2006f72901 100644
> > --- a/hw/virtio/virtio-iommu.c
> > +++ b/hw/virtio/virtio-iommu.c
> > @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
> >      uint32_t id;
> >      VirtIOIOMMUDomain *domain;
> >      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> > +    VirtIOIOMMU *viommu;
> This needs specal care on post-load. When migrating the EPs, only the id
> is migrated. On post-load you need to set viommu as it is done for
> domain. migration is allowed with vhost.

ok, I have not tried vhost/migration. Below change set viommu when
reconstructing endpoint.

@@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
key, gpointer value,

     QLIST_FOREACH(iter, &d->endpoint_list, next) {
         iter->domain = d;
+       iter->viommu = s;
         g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
     }
     return false; /* continue the domain traversal */

> >  } VirtIOIOMMUEndpoint;
> >
> >  typedef struct VirtIOIOMMUInterval {
> > @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
> >      memory_region_notify_iommu(mr, 0, entry);
> >  }
> >
> > +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
> > +                                           gpointer data)
> > +{
> > +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> > +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> > +
> > +    virtio_iommu_notify_unmap(mr, interval->low,
> > +                              interval->high - interval->low + 1);
> > +
> > +    return false;
> > +}
> > +
> > +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
> > +                                         gpointer data)
> > +{
> > +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
> > +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> > +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> > +
> > +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
> > +                            interval->high - interval->low + 1);
> > +
> > +    return false;
> > +}
> > +
> >  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> >  {
> > +    VirtioIOMMUNotifierNode *node;
> > +    VirtIOIOMMU *s = ep->viommu;
> > +    VirtIOIOMMUDomain *domain = ep->domain;
> > +
> > +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> > +        if (ep->id == node->iommu_dev->devfn) {
> > +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
> > +                           &node->iommu_dev->iommu_mr);
> I understand this should fo the job for domain removal

did not get the comment, are you saying we should do this on domain removal?

> > +        }
> > +    }
> > +
> >      if (!ep->domain) {
> >          return;
> >      }
> > @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> >      }
> >      ep = g_malloc0(sizeof(*ep));
> >      ep->id = ep_id;
> > +    ep->viommu = s;
> >      trace_virtio_iommu_get_endpoint(ep_id);
> >      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
> >      return ep;
> > @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >  {
> >      uint32_t domain_id = le32_to_cpu(req->domain);
> >      uint32_t ep_id = le32_to_cpu(req->endpoint);
> > +    VirtioIOMMUNotifierNode *node;
> >      VirtIOIOMMUDomain *domain;
> >      VirtIOIOMMUEndpoint *ep;
> >
> > @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >
> >      ep->domain = domain;
> >
> > +    /* Replay existing address space mappings on the associated memory region */
> maybe use the "domain" terminology here.

ok,

Thanks
-Bharat

> > +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> > +        if (ep_id == node->iommu_dev->devfn) {
> > +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
> > +                           &node->iommu_dev->iommu_mr);
> > +        }
> > +    }
> > +
> >      return VIRTIO_IOMMU_S_OK;
> >  }
> >
> >
> Thanks
>
> Eric
>
Eric Auger March 16, 2020, 7:32 a.m. UTC | #3
Hi Bharat,

On 3/16/20 7:41 AM, Bharat Bhushan wrote:
> Hi Eric,
> 
> On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
>>
>> Hi Bharat
>>
>> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
>>> iommu-notifier are called when a device is attached
>> IOMMU notifiers
>>> or detached to as address-space.
>>> This is needed for VFIO.
>> and vhost for detach
>>>
>>> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
>>> ---
>>>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 47 insertions(+)
>>>
>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>> index e51344a53e..2006f72901 100644
>>> --- a/hw/virtio/virtio-iommu.c
>>> +++ b/hw/virtio/virtio-iommu.c
>>> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
>>>      uint32_t id;
>>>      VirtIOIOMMUDomain *domain;
>>>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
>>> +    VirtIOIOMMU *viommu;
>> This needs specal care on post-load. When migrating the EPs, only the id
>> is migrated. On post-load you need to set viommu as it is done for
>> domain. migration is allowed with vhost.
> 
> ok, I have not tried vhost/migration. Below change set viommu when
> reconstructing endpoint.


Yes I think this should be OK.

By the end I did the series a try with vhost/vfio. with vhost it works
(not with recent kernel though, but the issue may be related to kernel).
With VFIO however it does not for me.

First issue is: your guest can use 4K page and your host can use 64KB
pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
a way to pass the host settings to the VIRTIO-IOMMU device.

Even with 64KB pages, it did not work for me. I have obviously not the
storm of VFIO_DMA_MAP failures but I have some, most probably due to
some wrong notifications somewhere. I will try to investigate on my side.

Did you test with VFIO on your side?

Thanks

Eric
> 
> @@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
> key, gpointer value,
> 
>      QLIST_FOREACH(iter, &d->endpoint_list, next) {
>          iter->domain = d;
> +       iter->viommu = s;
>          g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
>      }
>      return false; /* continue the domain traversal */
> 
>>>  } VirtIOIOMMUEndpoint;
>>>
>>>  typedef struct VirtIOIOMMUInterval {
>>> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
>>>      memory_region_notify_iommu(mr, 0, entry);
>>>  }
>>>
>>> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
>>> +                                           gpointer data)
>>> +{
>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
>>> +
>>> +    virtio_iommu_notify_unmap(mr, interval->low,
>>> +                              interval->high - interval->low + 1);
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
>>> +                                         gpointer data)
>>> +{
>>> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
>>> +
>>> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
>>> +                            interval->high - interval->low + 1);
>>> +
>>> +    return false;
>>> +}
>>> +
>>>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
>>>  {
>>> +    VirtioIOMMUNotifierNode *node;
>>> +    VirtIOIOMMU *s = ep->viommu;
>>> +    VirtIOIOMMUDomain *domain = ep->domain;
>>> +
>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
>>> +        if (ep->id == node->iommu_dev->devfn) {
>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
>>> +                           &node->iommu_dev->iommu_mr);
>> I understand this should fo the job for domain removal
> 
> did not get the comment, are you saying we should do this on domain removal?
see my reply on 2/5

Note the above code should be moved after the check of !ep->domain below
> 
>>> +        }
>>> +    }
>>> +
>>>      if (!ep->domain) {
>>>          return;
>>>      }
>>> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>      }
>>>      ep = g_malloc0(sizeof(*ep));
>>>      ep->id = ep_id;
>>> +    ep->viommu = s;
>>>      trace_virtio_iommu_get_endpoint(ep_id);
>>>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
>>>      return ep;
>>> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>  {
>>>      uint32_t domain_id = le32_to_cpu(req->domain);
>>>      uint32_t ep_id = le32_to_cpu(req->endpoint);
>>> +    VirtioIOMMUNotifierNode *node;
>>>      VirtIOIOMMUDomain *domain;
>>>      VirtIOIOMMUEndpoint *ep;
>>>
>>> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>
>>>      ep->domain = domain;
>>>
>>> +    /* Replay existing address space mappings on the associated memory region */
>> maybe use the "domain" terminology here.
> 
> ok,
> 
> Thanks
> -Bharat
> 
>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
>>> +        if (ep_id == node->iommu_dev->devfn) {
>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
>>> +                           &node->iommu_dev->iommu_mr);
>>> +        }
>>> +    }
>>> +
>>>      return VIRTIO_IOMMU_S_OK;
>>>  }
>>>
>>>
>> Thanks
>>
>> Eric
>>
>
Bharat Bhushan March 16, 2020, 7:45 a.m. UTC | #4
Hi Eric,

On Mon, Mar 16, 2020 at 1:02 PM Auger Eric <eric.auger@redhat.com> wrote:
>
> Hi Bharat,
>
> On 3/16/20 7:41 AM, Bharat Bhushan wrote:
> > Hi Eric,
> >
> > On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
> >>
> >> Hi Bharat
> >>
> >> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
> >>> iommu-notifier are called when a device is attached
> >> IOMMU notifiers
> >>> or detached to as address-space.
> >>> This is needed for VFIO.
> >> and vhost for detach
> >>>
> >>> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
> >>> ---
> >>>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 47 insertions(+)
> >>>
> >>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> >>> index e51344a53e..2006f72901 100644
> >>> --- a/hw/virtio/virtio-iommu.c
> >>> +++ b/hw/virtio/virtio-iommu.c
> >>> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
> >>>      uint32_t id;
> >>>      VirtIOIOMMUDomain *domain;
> >>>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> >>> +    VirtIOIOMMU *viommu;
> >> This needs specal care on post-load. When migrating the EPs, only the id
> >> is migrated. On post-load you need to set viommu as it is done for
> >> domain. migration is allowed with vhost.
> >
> > ok, I have not tried vhost/migration. Below change set viommu when
> > reconstructing endpoint.
>
>
> Yes I think this should be OK.
>
> By the end I did the series a try with vhost/vfio. with vhost it works
> (not with recent kernel though, but the issue may be related to kernel).
> With VFIO however it does not for me.
>
> First issue is: your guest can use 4K page and your host can use 64KB
> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> a way to pass the host settings to the VIRTIO-IOMMU device.
>
> Even with 64KB pages, it did not work for me. I have obviously not the
> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> some wrong notifications somewhere. I will try to investigate on my side.
>
> Did you test with VFIO on your side?

I did not tried with different page sizes, only tested with 4K page size.

Yes it works, I tested with two n/w device assigned to VM, both interfaces works

First I will try with 64k page size.

Thanks
-Bharat

>
> Thanks
>
> Eric
> >
> > @@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
> > key, gpointer value,
> >
> >      QLIST_FOREACH(iter, &d->endpoint_list, next) {
> >          iter->domain = d;
> > +       iter->viommu = s;
> >          g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
> >      }
> >      return false; /* continue the domain traversal */
> >
> >>>  } VirtIOIOMMUEndpoint;
> >>>
> >>>  typedef struct VirtIOIOMMUInterval {
> >>> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
> >>>      memory_region_notify_iommu(mr, 0, entry);
> >>>  }
> >>>
> >>> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
> >>> +                                           gpointer data)
> >>> +{
> >>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> >>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> >>> +
> >>> +    virtio_iommu_notify_unmap(mr, interval->low,
> >>> +                              interval->high - interval->low + 1);
> >>> +
> >>> +    return false;
> >>> +}
> >>> +
> >>> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
> >>> +                                         gpointer data)
> >>> +{
> >>> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
> >>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> >>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> >>> +
> >>> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
> >>> +                            interval->high - interval->low + 1);
> >>> +
> >>> +    return false;
> >>> +}
> >>> +
> >>>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> >>>  {
> >>> +    VirtioIOMMUNotifierNode *node;
> >>> +    VirtIOIOMMU *s = ep->viommu;
> >>> +    VirtIOIOMMUDomain *domain = ep->domain;
> >>> +
> >>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> >>> +        if (ep->id == node->iommu_dev->devfn) {
> >>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
> >>> +                           &node->iommu_dev->iommu_mr);
> >> I understand this should fo the job for domain removal
> >
> > did not get the comment, are you saying we should do this on domain removal?
> see my reply on 2/5
>
> Note the above code should be moved after the check of !ep->domain below

ohh yes, will move

Thanks
-Bharat

> >
> >>> +        }
> >>> +    }
> >>> +
> >>>      if (!ep->domain) {
> >>>          return;
> >>>      }
> >>> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> >>>      }
> >>>      ep = g_malloc0(sizeof(*ep));
> >>>      ep->id = ep_id;
> >>> +    ep->viommu = s;
> >>>      trace_virtio_iommu_get_endpoint(ep_id);
> >>>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
> >>>      return ep;
> >>> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >>>  {
> >>>      uint32_t domain_id = le32_to_cpu(req->domain);
> >>>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> >>> +    VirtioIOMMUNotifierNode *node;
> >>>      VirtIOIOMMUDomain *domain;
> >>>      VirtIOIOMMUEndpoint *ep;
> >>>
> >>> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >>>
> >>>      ep->domain = domain;
> >>>
> >>> +    /* Replay existing address space mappings on the associated memory region */
> >> maybe use the "domain" terminology here.
> >
> > ok,
> >
> > Thanks
> > -Bharat
> >
> >>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> >>> +        if (ep_id == node->iommu_dev->devfn) {
> >>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
> >>> +                           &node->iommu_dev->iommu_mr);
> >>> +        }
> >>> +    }
> >>> +
> >>>      return VIRTIO_IOMMU_S_OK;
> >>>  }
> >>>
> >>>
> >> Thanks
> >>
> >> Eric
> >>
> >
>
Bharat Bhushan March 16, 2020, 8:58 a.m. UTC | #5
Hi Eric,

On Mon, Mar 16, 2020 at 1:15 PM Bharat Bhushan <bharatb.linux@gmail.com> wrote:
>
> Hi Eric,
>
> On Mon, Mar 16, 2020 at 1:02 PM Auger Eric <eric.auger@redhat.com> wrote:
> >
> > Hi Bharat,
> >
> > On 3/16/20 7:41 AM, Bharat Bhushan wrote:
> > > Hi Eric,
> > >
> > > On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
> > >>
> > >> Hi Bharat
> > >>
> > >> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
> > >>> iommu-notifier are called when a device is attached
> > >> IOMMU notifiers
> > >>> or detached to as address-space.
> > >>> This is needed for VFIO.
> > >> and vhost for detach
> > >>>
> > >>> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
> > >>> ---
> > >>>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
> > >>>  1 file changed, 47 insertions(+)
> > >>>
> > >>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> > >>> index e51344a53e..2006f72901 100644
> > >>> --- a/hw/virtio/virtio-iommu.c
> > >>> +++ b/hw/virtio/virtio-iommu.c
> > >>> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
> > >>>      uint32_t id;
> > >>>      VirtIOIOMMUDomain *domain;
> > >>>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> > >>> +    VirtIOIOMMU *viommu;
> > >> This needs specal care on post-load. When migrating the EPs, only the id
> > >> is migrated. On post-load you need to set viommu as it is done for
> > >> domain. migration is allowed with vhost.
> > >
> > > ok, I have not tried vhost/migration. Below change set viommu when
> > > reconstructing endpoint.
> >
> >
> > Yes I think this should be OK.
> >
> > By the end I did the series a try with vhost/vfio. with vhost it works
> > (not with recent kernel though, but the issue may be related to kernel).
> > With VFIO however it does not for me.
> >
> > First issue is: your guest can use 4K page and your host can use 64KB
> > pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > a way to pass the host settings to the VIRTIO-IOMMU device.
> >
> > Even with 64KB pages, it did not work for me. I have obviously not the
> > storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > some wrong notifications somewhere. I will try to investigate on my side.
> >
> > Did you test with VFIO on your side?
>
> I did not tried with different page sizes, only tested with 4K page size.
>
> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
>
> First I will try with 64k page size.

64K page size does not work for me as well,

I think we are not passing correct page_size_mask here
(config.page_size_mask is set to TARGET_PAGE_MASK ( which is
0xfffffffffffff000))

We need to set this correctly as per host page size, correct?

Thanks
-Bharat

>
> Thanks
> -Bharat
>
> >
> > Thanks
> >
> > Eric
> > >
> > > @@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
> > > key, gpointer value,
> > >
> > >      QLIST_FOREACH(iter, &d->endpoint_list, next) {
> > >          iter->domain = d;
> > > +       iter->viommu = s;
> > >          g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
> > >      }
> > >      return false; /* continue the domain traversal */
> > >
> > >>>  } VirtIOIOMMUEndpoint;
> > >>>
> > >>>  typedef struct VirtIOIOMMUInterval {
> > >>> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
> > >>>      memory_region_notify_iommu(mr, 0, entry);
> > >>>  }
> > >>>
> > >>> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
> > >>> +                                           gpointer data)
> > >>> +{
> > >>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> > >>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> > >>> +
> > >>> +    virtio_iommu_notify_unmap(mr, interval->low,
> > >>> +                              interval->high - interval->low + 1);
> > >>> +
> > >>> +    return false;
> > >>> +}
> > >>> +
> > >>> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
> > >>> +                                         gpointer data)
> > >>> +{
> > >>> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
> > >>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> > >>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> > >>> +
> > >>> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
> > >>> +                            interval->high - interval->low + 1);
> > >>> +
> > >>> +    return false;
> > >>> +}
> > >>> +
> > >>>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> > >>>  {
> > >>> +    VirtioIOMMUNotifierNode *node;
> > >>> +    VirtIOIOMMU *s = ep->viommu;
> > >>> +    VirtIOIOMMUDomain *domain = ep->domain;
> > >>> +
> > >>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> > >>> +        if (ep->id == node->iommu_dev->devfn) {
> > >>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
> > >>> +                           &node->iommu_dev->iommu_mr);
> > >> I understand this should fo the job for domain removal
> > >
> > > did not get the comment, are you saying we should do this on domain removal?
> > see my reply on 2/5
> >
> > Note the above code should be moved after the check of !ep->domain below
>
> ohh yes, will move
>
> Thanks
> -Bharat
>
> > >
> > >>> +        }
> > >>> +    }
> > >>> +
> > >>>      if (!ep->domain) {
> > >>>          return;
> > >>>      }
> > >>> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> > >>>      }
> > >>>      ep = g_malloc0(sizeof(*ep));
> > >>>      ep->id = ep_id;
> > >>> +    ep->viommu = s;
> > >>>      trace_virtio_iommu_get_endpoint(ep_id);
> > >>>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
> > >>>      return ep;
> > >>> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> > >>>  {
> > >>>      uint32_t domain_id = le32_to_cpu(req->domain);
> > >>>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> > >>> +    VirtioIOMMUNotifierNode *node;
> > >>>      VirtIOIOMMUDomain *domain;
> > >>>      VirtIOIOMMUEndpoint *ep;
> > >>>
> > >>> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> > >>>
> > >>>      ep->domain = domain;
> > >>>
> > >>> +    /* Replay existing address space mappings on the associated memory region */
> > >> maybe use the "domain" terminology here.
> > >
> > > ok,
> > >
> > > Thanks
> > > -Bharat
> > >
> > >>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> > >>> +        if (ep_id == node->iommu_dev->devfn) {
> > >>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
> > >>> +                           &node->iommu_dev->iommu_mr);
> > >>> +        }
> > >>> +    }
> > >>> +
> > >>>      return VIRTIO_IOMMU_S_OK;
> > >>>  }
> > >>>
> > >>>
> > >> Thanks
> > >>
> > >> Eric
> > >>
> > >
> >
Eric Auger March 16, 2020, 9:04 a.m. UTC | #6
Hi Bharat,

On 3/16/20 9:58 AM, Bharat Bhushan wrote:
> Hi Eric,
> 
> On Mon, Mar 16, 2020 at 1:15 PM Bharat Bhushan <bharatb.linux@gmail.com> wrote:
>>
>> Hi Eric,
>>
>> On Mon, Mar 16, 2020 at 1:02 PM Auger Eric <eric.auger@redhat.com> wrote:
>>>
>>> Hi Bharat,
>>>
>>> On 3/16/20 7:41 AM, Bharat Bhushan wrote:
>>>> Hi Eric,
>>>>
>>>> On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
>>>>>
>>>>> Hi Bharat
>>>>>
>>>>> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
>>>>>> iommu-notifier are called when a device is attached
>>>>> IOMMU notifiers
>>>>>> or detached to as address-space.
>>>>>> This is needed for VFIO.
>>>>> and vhost for detach
>>>>>>
>>>>>> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
>>>>>> ---
>>>>>>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 47 insertions(+)
>>>>>>
>>>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>>>> index e51344a53e..2006f72901 100644
>>>>>> --- a/hw/virtio/virtio-iommu.c
>>>>>> +++ b/hw/virtio/virtio-iommu.c
>>>>>> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
>>>>>>      uint32_t id;
>>>>>>      VirtIOIOMMUDomain *domain;
>>>>>>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
>>>>>> +    VirtIOIOMMU *viommu;
>>>>> This needs specal care on post-load. When migrating the EPs, only the id
>>>>> is migrated. On post-load you need to set viommu as it is done for
>>>>> domain. migration is allowed with vhost.
>>>>
>>>> ok, I have not tried vhost/migration. Below change set viommu when
>>>> reconstructing endpoint.
>>>
>>>
>>> Yes I think this should be OK.
>>>
>>> By the end I did the series a try with vhost/vfio. with vhost it works
>>> (not with recent kernel though, but the issue may be related to kernel).
>>> With VFIO however it does not for me.
>>>
>>> First issue is: your guest can use 4K page and your host can use 64KB
>>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
>>> a way to pass the host settings to the VIRTIO-IOMMU device.
>>>
>>> Even with 64KB pages, it did not work for me. I have obviously not the
>>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
>>> some wrong notifications somewhere. I will try to investigate on my side.
>>>
>>> Did you test with VFIO on your side?
>>
>> I did not tried with different page sizes, only tested with 4K page size.
>>
>> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
>>
>> First I will try with 64k page size.
> 
> 64K page size does not work for me as well,
> 
> I think we are not passing correct page_size_mask here
> (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> 0xfffffffffffff000))
I guess you mean with guest using 4K and host using 64K.
> 
> We need to set this correctly as per host page size, correct?
Yes that's correct. We need to put in place a control path to retrieve
the page settings on host through VFIO to inform the virtio-iommu device.

Besides this issue, did you try with 64kB on host and guest?

Thanks

Eric
> 
> Thanks
> -Bharat
> 
>>
>> Thanks
>> -Bharat
>>
>>>
>>> Thanks
>>>
>>> Eric
>>>>
>>>> @@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
>>>> key, gpointer value,
>>>>
>>>>      QLIST_FOREACH(iter, &d->endpoint_list, next) {
>>>>          iter->domain = d;
>>>> +       iter->viommu = s;
>>>>          g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
>>>>      }
>>>>      return false; /* continue the domain traversal */
>>>>
>>>>>>  } VirtIOIOMMUEndpoint;
>>>>>>
>>>>>>  typedef struct VirtIOIOMMUInterval {
>>>>>> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
>>>>>>      memory_region_notify_iommu(mr, 0, entry);
>>>>>>  }
>>>>>>
>>>>>> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
>>>>>> +                                           gpointer data)
>>>>>> +{
>>>>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
>>>>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
>>>>>> +
>>>>>> +    virtio_iommu_notify_unmap(mr, interval->low,
>>>>>> +                              interval->high - interval->low + 1);
>>>>>> +
>>>>>> +    return false;
>>>>>> +}
>>>>>> +
>>>>>> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
>>>>>> +                                         gpointer data)
>>>>>> +{
>>>>>> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
>>>>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
>>>>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
>>>>>> +
>>>>>> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
>>>>>> +                            interval->high - interval->low + 1);
>>>>>> +
>>>>>> +    return false;
>>>>>> +}
>>>>>> +
>>>>>>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
>>>>>>  {
>>>>>> +    VirtioIOMMUNotifierNode *node;
>>>>>> +    VirtIOIOMMU *s = ep->viommu;
>>>>>> +    VirtIOIOMMUDomain *domain = ep->domain;
>>>>>> +
>>>>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
>>>>>> +        if (ep->id == node->iommu_dev->devfn) {
>>>>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
>>>>>> +                           &node->iommu_dev->iommu_mr);
>>>>> I understand this should fo the job for domain removal
>>>>
>>>> did not get the comment, are you saying we should do this on domain removal?
>>> see my reply on 2/5
>>>
>>> Note the above code should be moved after the check of !ep->domain below
>>
>> ohh yes, will move
>>
>> Thanks
>> -Bharat
>>
>>>>
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>>      if (!ep->domain) {
>>>>>>          return;
>>>>>>      }
>>>>>> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>>>>>>      }
>>>>>>      ep = g_malloc0(sizeof(*ep));
>>>>>>      ep->id = ep_id;
>>>>>> +    ep->viommu = s;
>>>>>>      trace_virtio_iommu_get_endpoint(ep_id);
>>>>>>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
>>>>>>      return ep;
>>>>>> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>>>>  {
>>>>>>      uint32_t domain_id = le32_to_cpu(req->domain);
>>>>>>      uint32_t ep_id = le32_to_cpu(req->endpoint);
>>>>>> +    VirtioIOMMUNotifierNode *node;
>>>>>>      VirtIOIOMMUDomain *domain;
>>>>>>      VirtIOIOMMUEndpoint *ep;
>>>>>>
>>>>>> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>>>>
>>>>>>      ep->domain = domain;
>>>>>>
>>>>>> +    /* Replay existing address space mappings on the associated memory region */
>>>>> maybe use the "domain" terminology here.
>>>>
>>>> ok,
>>>>
>>>> Thanks
>>>> -Bharat
>>>>
>>>>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
>>>>>> +        if (ep_id == node->iommu_dev->devfn) {
>>>>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
>>>>>> +                           &node->iommu_dev->iommu_mr);
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>>      return VIRTIO_IOMMU_S_OK;
>>>>>>  }
>>>>>>
>>>>>>
>>>>> Thanks
>>>>>
>>>>> Eric
>>>>>
>>>>
>>>
>
Bharat Bhushan March 16, 2020, 9:10 a.m. UTC | #7
Hi Eric,

On Mon, Mar 16, 2020 at 2:35 PM Auger Eric <eric.auger@redhat.com> wrote:
>
> Hi Bharat,
>
> On 3/16/20 9:58 AM, Bharat Bhushan wrote:
> > Hi Eric,
> >
> > On Mon, Mar 16, 2020 at 1:15 PM Bharat Bhushan <bharatb.linux@gmail.com> wrote:
> >>
> >> Hi Eric,
> >>
> >> On Mon, Mar 16, 2020 at 1:02 PM Auger Eric <eric.auger@redhat.com> wrote:
> >>>
> >>> Hi Bharat,
> >>>
> >>> On 3/16/20 7:41 AM, Bharat Bhushan wrote:
> >>>> Hi Eric,
> >>>>
> >>>> On Fri, Mar 13, 2020 at 8:11 PM Auger Eric <eric.auger@redhat.com> wrote:
> >>>>>
> >>>>> Hi Bharat
> >>>>>
> >>>>> On 3/13/20 8:48 AM, Bharat Bhushan wrote:
> >>>>>> iommu-notifier are called when a device is attached
> >>>>> IOMMU notifiers
> >>>>>> or detached to as address-space.
> >>>>>> This is needed for VFIO.
> >>>>> and vhost for detach
> >>>>>>
> >>>>>> Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
> >>>>>> ---
> >>>>>>  hw/virtio/virtio-iommu.c | 47 ++++++++++++++++++++++++++++++++++++++++
> >>>>>>  1 file changed, 47 insertions(+)
> >>>>>>
> >>>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> >>>>>> index e51344a53e..2006f72901 100644
> >>>>>> --- a/hw/virtio/virtio-iommu.c
> >>>>>> +++ b/hw/virtio/virtio-iommu.c
> >>>>>> @@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUEndpoint {
> >>>>>>      uint32_t id;
> >>>>>>      VirtIOIOMMUDomain *domain;
> >>>>>>      QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
> >>>>>> +    VirtIOIOMMU *viommu;
> >>>>> This needs specal care on post-load. When migrating the EPs, only the id
> >>>>> is migrated. On post-load you need to set viommu as it is done for
> >>>>> domain. migration is allowed with vhost.
> >>>>
> >>>> ok, I have not tried vhost/migration. Below change set viommu when
> >>>> reconstructing endpoint.
> >>>
> >>>
> >>> Yes I think this should be OK.
> >>>
> >>> By the end I did the series a try with vhost/vfio. with vhost it works
> >>> (not with recent kernel though, but the issue may be related to kernel).
> >>> With VFIO however it does not for me.
> >>>
> >>> First issue is: your guest can use 4K page and your host can use 64KB
> >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> >>>
> >>> Even with 64KB pages, it did not work for me. I have obviously not the
> >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> >>> some wrong notifications somewhere. I will try to investigate on my side.
> >>>
> >>> Did you test with VFIO on your side?
> >>
> >> I did not tried with different page sizes, only tested with 4K page size.
> >>
> >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> >>
> >> First I will try with 64k page size.
> >
> > 64K page size does not work for me as well,
> >
> > I think we are not passing correct page_size_mask here
> > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > 0xfffffffffffff000))
> I guess you mean with guest using 4K and host using 64K.
> >
> > We need to set this correctly as per host page size, correct?
> Yes that's correct. We need to put in place a control path to retrieve
> the page settings on host through VFIO to inform the virtio-iommu device.
>
> Besides this issue, did you try with 64kB on host and guest?

I tried Followings
  - 4k host and 4k guest  - it works with v7 version
  - 64k host and 64k guest - it does not work with v7
    hard-coded config.page_size_mask to 0xffffffffffff0000 and it works

Thanks
-Bharat

>
> Thanks
>
> Eric
> >
> > Thanks
> > -Bharat
> >
> >>
> >> Thanks
> >> -Bharat
> >>
> >>>
> >>> Thanks
> >>>
> >>> Eric
> >>>>
> >>>> @@ -984,6 +973,7 @@ static gboolean reconstruct_endpoints(gpointer
> >>>> key, gpointer value,
> >>>>
> >>>>      QLIST_FOREACH(iter, &d->endpoint_list, next) {
> >>>>          iter->domain = d;
> >>>> +       iter->viommu = s;
> >>>>          g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
> >>>>      }
> >>>>      return false; /* continue the domain traversal */
> >>>>
> >>>>>>  } VirtIOIOMMUEndpoint;
> >>>>>>
> >>>>>>  typedef struct VirtIOIOMMUInterval {
> >>>>>> @@ -155,8 +156,44 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
> >>>>>>      memory_region_notify_iommu(mr, 0, entry);
> >>>>>>  }
> >>>>>>
> >>>>>> +static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
> >>>>>> +                                           gpointer data)
> >>>>>> +{
> >>>>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> >>>>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> >>>>>> +
> >>>>>> +    virtio_iommu_notify_unmap(mr, interval->low,
> >>>>>> +                              interval->high - interval->low + 1);
> >>>>>> +
> >>>>>> +    return false;
> >>>>>> +}
> >>>>>> +
> >>>>>> +static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
> >>>>>> +                                         gpointer data)
> >>>>>> +{
> >>>>>> +    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
> >>>>>> +    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
> >>>>>> +    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
> >>>>>> +
> >>>>>> +    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
> >>>>>> +                            interval->high - interval->low + 1);
> >>>>>> +
> >>>>>> +    return false;
> >>>>>> +}
> >>>>>> +
> >>>>>>  static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
> >>>>>>  {
> >>>>>> +    VirtioIOMMUNotifierNode *node;
> >>>>>> +    VirtIOIOMMU *s = ep->viommu;
> >>>>>> +    VirtIOIOMMUDomain *domain = ep->domain;
> >>>>>> +
> >>>>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> >>>>>> +        if (ep->id == node->iommu_dev->devfn) {
> >>>>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
> >>>>>> +                           &node->iommu_dev->iommu_mr);
> >>>>> I understand this should fo the job for domain removal
> >>>>
> >>>> did not get the comment, are you saying we should do this on domain removal?
> >>> see my reply on 2/5
> >>>
> >>> Note the above code should be moved after the check of !ep->domain below
> >>
> >> ohh yes, will move
> >>
> >> Thanks
> >> -Bharat
> >>
> >>>>
> >>>>>> +        }
> >>>>>> +    }
> >>>>>> +
> >>>>>>      if (!ep->domain) {
> >>>>>>          return;
> >>>>>>      }
> >>>>>> @@ -178,6 +215,7 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
> >>>>>>      }
> >>>>>>      ep = g_malloc0(sizeof(*ep));
> >>>>>>      ep->id = ep_id;
> >>>>>> +    ep->viommu = s;
> >>>>>>      trace_virtio_iommu_get_endpoint(ep_id);
> >>>>>>      g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
> >>>>>>      return ep;
> >>>>>> @@ -272,6 +310,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >>>>>>  {
> >>>>>>      uint32_t domain_id = le32_to_cpu(req->domain);
> >>>>>>      uint32_t ep_id = le32_to_cpu(req->endpoint);
> >>>>>> +    VirtioIOMMUNotifierNode *node;
> >>>>>>      VirtIOIOMMUDomain *domain;
> >>>>>>      VirtIOIOMMUEndpoint *ep;
> >>>>>>
> >>>>>> @@ -299,6 +338,14 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> >>>>>>
> >>>>>>      ep->domain = domain;
> >>>>>>
> >>>>>> +    /* Replay existing address space mappings on the associated memory region */
> >>>>> maybe use the "domain" terminology here.
> >>>>
> >>>> ok,
> >>>>
> >>>> Thanks
> >>>> -Bharat
> >>>>
> >>>>>> +    QLIST_FOREACH(node, &s->notifiers_list, next) {
> >>>>>> +        if (ep_id == node->iommu_dev->devfn) {
> >>>>>> +            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
> >>>>>> +                           &node->iommu_dev->iommu_mr);
> >>>>>> +        }
> >>>>>> +    }
> >>>>>> +
> >>>>>>      return VIRTIO_IOMMU_S_OK;
> >>>>>>  }
> >>>>>>
> >>>>>>
> >>>>> Thanks
> >>>>>
> >>>>> Eric
> >>>>>
> >>>>
> >>>
> >
>
Jean-Philippe Brucker March 16, 2020, 10:11 a.m. UTC | #8
Hi Bharat,

Could you Cc me on your next posting?  Unfortunately I don't have much
hardware for testing this at the moment, but I might be able to help a
little on the review.

On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > >>>
> > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > >>>
> > >>> Did you test with VFIO on your side?
> > >>
> > >> I did not tried with different page sizes, only tested with 4K page size.
> > >>
> > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > >>
> > >> First I will try with 64k page size.
> > >
> > > 64K page size does not work for me as well,
> > >
> > > I think we are not passing correct page_size_mask here
> > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > 0xfffffffffffff000))
> > I guess you mean with guest using 4K and host using 64K.
> > >
> > > We need to set this correctly as per host page size, correct?
> > Yes that's correct. We need to put in place a control path to retrieve
> > the page settings on host through VFIO to inform the virtio-iommu device.
> >
> > Besides this issue, did you try with 64kB on host and guest?
> 
> I tried Followings
>   - 4k host and 4k guest  - it works with v7 version
>   - 64k host and 64k guest - it does not work with v7
>     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works

You might get this from the iova_pgsize bitmap returned by
VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
is the usual problem of aggregating consistent properties, but I'm
guessing using the host page size as a granule here is safe enough. 

If it is a problem, we can add a PROBE property for page size mask,
allowing to define per-endpoint page masks. I have kernel patches
somewhere to do just that.

Thanks,
Jean
Bharat Bhushan March 17, 2020, 7:10 a.m. UTC | #9
Hi Jean,

On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
<jean-philippe@linaro.org> wrote:
>
> Hi Bharat,
>
> Could you Cc me on your next posting?  Unfortunately I don't have much
> hardware for testing this at the moment, but I might be able to help a
> little on the review.
>
> On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > >>>
> > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > >>>
> > > >>> Did you test with VFIO on your side?
> > > >>
> > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > >>
> > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > >>
> > > >> First I will try with 64k page size.
> > > >
> > > > 64K page size does not work for me as well,
> > > >
> > > > I think we are not passing correct page_size_mask here
> > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > 0xfffffffffffff000))
> > > I guess you mean with guest using 4K and host using 64K.
> > > >
> > > > We need to set this correctly as per host page size, correct?
> > > Yes that's correct. We need to put in place a control path to retrieve
> > > the page settings on host through VFIO to inform the virtio-iommu device.
> > >
> > > Besides this issue, did you try with 64kB on host and guest?
> >
> > I tried Followings
> >   - 4k host and 4k guest  - it works with v7 version
> >   - 64k host and 64k guest - it does not work with v7
> >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
>
> You might get this from the iova_pgsize bitmap returned by
> VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> is the usual problem of aggregating consistent properties, but I'm
> guessing using the host page size as a granule here is safe enough.
>
> If it is a problem, we can add a PROBE property for page size mask,
> allowing to define per-endpoint page masks. I have kernel patches
> somewhere to do just that.

I do not see we need page size mask per endpoint.

While I am trying to understand what "page-size-mask" guest will work with

- 4K page size host and 4k page size guest
  config.page_size_mask = 0xffffffffffff000 will work

- 64K page size host and 64k page size guest
  config.page_size_mask = 0xfffffffffff0000 will work

- 64K page size host and 4k page size guest
   1) config.page_size_mask = 0xffffffffffff000 will also not work as
VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
host)
   2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
initialization (in guest) expect minimum page-size supported by h/w to
be equal to 4k (PAGE_SIZE in guest)
       Should we look to relax this in iova allocation code?

Thanks
-Bharat


>
> Thanks,
> Jean
Eric Auger March 17, 2020, 8:25 a.m. UTC | #10
Hi Bharat,

On 3/17/20 8:10 AM, Bharat Bhushan wrote:
> Hi Jean,
> 
> On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> <jean-philippe@linaro.org> wrote:
>>
>> Hi Bharat,
>>
>> Could you Cc me on your next posting?  Unfortunately I don't have much
>> hardware for testing this at the moment, but I might be able to help a
>> little on the review.
>>
>> On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
>>>>>>> First issue is: your guest can use 4K page and your host can use 64KB
>>>>>>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
>>>>>>> a way to pass the host settings to the VIRTIO-IOMMU device.
>>>>>>>
>>>>>>> Even with 64KB pages, it did not work for me. I have obviously not the
>>>>>>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
>>>>>>> some wrong notifications somewhere. I will try to investigate on my side.
>>>>>>>
>>>>>>> Did you test with VFIO on your side?
>>>>>>
>>>>>> I did not tried with different page sizes, only tested with 4K page size.
>>>>>>
>>>>>> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
>>>>>>
>>>>>> First I will try with 64k page size.
>>>>>
>>>>> 64K page size does not work for me as well,
>>>>>
>>>>> I think we are not passing correct page_size_mask here
>>>>> (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
>>>>> 0xfffffffffffff000))
>>>> I guess you mean with guest using 4K and host using 64K.
>>>>>
>>>>> We need to set this correctly as per host page size, correct?
>>>> Yes that's correct. We need to put in place a control path to retrieve
>>>> the page settings on host through VFIO to inform the virtio-iommu device.
>>>>
>>>> Besides this issue, did you try with 64kB on host and guest?
>>>
>>> I tried Followings
>>>   - 4k host and 4k guest  - it works with v7 version
>>>   - 64k host and 64k guest - it does not work with v7
>>>     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
>>
>> You might get this from the iova_pgsize bitmap returned by
>> VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
>> is the usual problem of aggregating consistent properties, but I'm
>> guessing using the host page size as a granule here is safe enough.
>>
>> If it is a problem, we can add a PROBE property for page size mask,
>> allowing to define per-endpoint page masks. I have kernel patches
>> somewhere to do just that.
> 
> I do not see we need page size mask per endpoint.
the physical devices can be protected with different physical IOMMUs and
they may have different page size support
> 
> While I am trying to understand what "page-size-mask" guest will work with
> 
> - 4K page size host and 4k page size guest
>   config.page_size_mask = 0xffffffffffff000 will work
> 
> - 64K page size host and 64k page size guest
>   config.page_size_mask = 0xfffffffffff0000 will work
I guess not all the pages sizes should be exposed by the virtio-iommu
device, only 4K and 64K

If host supports 4K we should expose 4K and bigger
If host supports 64K we should expose page sizes of 64KB and bigger

The guest will be forced to use what is exposed and that should work.

What is missing is a way to retrieve the host supported page size
bitmask. I can try to help you on that if you want to.

Maybe we should first try to upstream vhost support and then VFIO?

Thanks

Eric
> 
> - 64K page size host and 4k page size guest
>    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> host)
>    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> initialization (in guest) expect minimum page-size supported by h/w to
> be equal to 4k (PAGE_SIZE in guest)
>        Should we look to relax this in iova allocation code?
> 
> Thanks
> -Bharat
> 
> 
>>
>> Thanks,
>> Jean
>
Jean-Philippe Brucker March 17, 2020, 8:53 a.m. UTC | #11
On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> Hi Jean,
> 
> On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> <jean-philippe@linaro.org> wrote:
> >
> > Hi Bharat,
> >
> > Could you Cc me on your next posting?  Unfortunately I don't have much
> > hardware for testing this at the moment, but I might be able to help a
> > little on the review.
> >
> > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > > >>>
> > > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > > >>>
> > > > >>> Did you test with VFIO on your side?
> > > > >>
> > > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > > >>
> > > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > > >>
> > > > >> First I will try with 64k page size.
> > > > >
> > > > > 64K page size does not work for me as well,
> > > > >
> > > > > I think we are not passing correct page_size_mask here
> > > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > > 0xfffffffffffff000))
> > > > I guess you mean with guest using 4K and host using 64K.
> > > > >
> > > > > We need to set this correctly as per host page size, correct?
> > > > Yes that's correct. We need to put in place a control path to retrieve
> > > > the page settings on host through VFIO to inform the virtio-iommu device.
> > > >
> > > > Besides this issue, did you try with 64kB on host and guest?
> > >
> > > I tried Followings
> > >   - 4k host and 4k guest  - it works with v7 version
> > >   - 64k host and 64k guest - it does not work with v7
> > >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
> >
> > You might get this from the iova_pgsize bitmap returned by
> > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> > is the usual problem of aggregating consistent properties, but I'm
> > guessing using the host page size as a granule here is safe enough.
> >
> > If it is a problem, we can add a PROBE property for page size mask,
> > allowing to define per-endpoint page masks. I have kernel patches
> > somewhere to do just that.
> 
> I do not see we need page size mask per endpoint.
> 
> While I am trying to understand what "page-size-mask" guest will work with
> 
> - 4K page size host and 4k page size guest
>   config.page_size_mask = 0xffffffffffff000 will work
> 
> - 64K page size host and 64k page size guest
>   config.page_size_mask = 0xfffffffffff0000 will work
> 
> - 64K page size host and 4k page size guest
>    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> host)
>    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> initialization (in guest) expect minimum page-size supported by h/w to
> be equal to 4k (PAGE_SIZE in guest)
>        Should we look to relax this in iova allocation code?

Oh right, that's not great. Maybe the BUG_ON() can be removed, I'll ask on
the list.

In the meantime, 64k granule is the right value to advertise to the guest
in this case. Did you try 64k guest 4k host?

Thanks,
Jean
Bharat Bhushan March 17, 2020, 9:16 a.m. UTC | #12
Hi Jean,

On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
<jean-philippe@linaro.org> wrote:
>
> On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> > Hi Jean,
> >
> > On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> > <jean-philippe@linaro.org> wrote:
> > >
> > > Hi Bharat,
> > >
> > > Could you Cc me on your next posting?  Unfortunately I don't have much
> > > hardware for testing this at the moment, but I might be able to help a
> > > little on the review.
> > >
> > > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > > > >>>
> > > > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > > > >>>
> > > > > >>> Did you test with VFIO on your side?
> > > > > >>
> > > > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > > > >>
> > > > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > > > >>
> > > > > >> First I will try with 64k page size.
> > > > > >
> > > > > > 64K page size does not work for me as well,
> > > > > >
> > > > > > I think we are not passing correct page_size_mask here
> > > > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > > > 0xfffffffffffff000))
> > > > > I guess you mean with guest using 4K and host using 64K.
> > > > > >
> > > > > > We need to set this correctly as per host page size, correct?
> > > > > Yes that's correct. We need to put in place a control path to retrieve
> > > > > the page settings on host through VFIO to inform the virtio-iommu device.
> > > > >
> > > > > Besides this issue, did you try with 64kB on host and guest?
> > > >
> > > > I tried Followings
> > > >   - 4k host and 4k guest  - it works with v7 version
> > > >   - 64k host and 64k guest - it does not work with v7
> > > >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
> > >
> > > You might get this from the iova_pgsize bitmap returned by
> > > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> > > is the usual problem of aggregating consistent properties, but I'm
> > > guessing using the host page size as a granule here is safe enough.
> > >
> > > If it is a problem, we can add a PROBE property for page size mask,
> > > allowing to define per-endpoint page masks. I have kernel patches
> > > somewhere to do just that.
> >
> > I do not see we need page size mask per endpoint.
> >
> > While I am trying to understand what "page-size-mask" guest will work with
> >
> > - 4K page size host and 4k page size guest
> >   config.page_size_mask = 0xffffffffffff000 will work
> >
> > - 64K page size host and 64k page size guest
> >   config.page_size_mask = 0xfffffffffff0000 will work
> >
> > - 64K page size host and 4k page size guest
> >    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> > VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> > host)
> >    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> > initialization (in guest) expect minimum page-size supported by h/w to
> > be equal to 4k (PAGE_SIZE in guest)
> >        Should we look to relax this in iova allocation code?
>
> Oh right, that's not great. Maybe the BUG_ON() can be removed, I'll ask on
> the list.

yes, the BUG_ON in iova_init.
I tried with removing same and it worked, but not analyzed side effects.

>
> In the meantime, 64k granule is the right value to advertise to the guest
> in this case.
> Did you try 64k guest 4k host?

no, will try.

Thanks
-Bharat

>
> Thanks,
> Jean
Jean-Philippe Brucker March 17, 2020, 3:59 p.m. UTC | #13
On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
> Hi Jean,
> 
> On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
> <jean-philippe@linaro.org> wrote:
> >
> > On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> > > Hi Jean,
> > >
> > > On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> > > <jean-philippe@linaro.org> wrote:
> > > >
> > > > Hi Bharat,
> > > >
> > > > Could you Cc me on your next posting?  Unfortunately I don't have much
> > > > hardware for testing this at the moment, but I might be able to help a
> > > > little on the review.
> > > >
> > > > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > > > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > > > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > > > > >>>
> > > > > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > > > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > > > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > > > > >>>
> > > > > > >>> Did you test with VFIO on your side?
> > > > > > >>
> > > > > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > > > > >>
> > > > > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > > > > >>
> > > > > > >> First I will try with 64k page size.
> > > > > > >
> > > > > > > 64K page size does not work for me as well,
> > > > > > >
> > > > > > > I think we are not passing correct page_size_mask here
> > > > > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > > > > 0xfffffffffffff000))
> > > > > > I guess you mean with guest using 4K and host using 64K.
> > > > > > >
> > > > > > > We need to set this correctly as per host page size, correct?
> > > > > > Yes that's correct. We need to put in place a control path to retrieve
> > > > > > the page settings on host through VFIO to inform the virtio-iommu device.
> > > > > >
> > > > > > Besides this issue, did you try with 64kB on host and guest?
> > > > >
> > > > > I tried Followings
> > > > >   - 4k host and 4k guest  - it works with v7 version
> > > > >   - 64k host and 64k guest - it does not work with v7
> > > > >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
> > > >
> > > > You might get this from the iova_pgsize bitmap returned by
> > > > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> > > > is the usual problem of aggregating consistent properties, but I'm
> > > > guessing using the host page size as a granule here is safe enough.
> > > >
> > > > If it is a problem, we can add a PROBE property for page size mask,
> > > > allowing to define per-endpoint page masks. I have kernel patches
> > > > somewhere to do just that.
> > >
> > > I do not see we need page size mask per endpoint.
> > >
> > > While I am trying to understand what "page-size-mask" guest will work with
> > >
> > > - 4K page size host and 4k page size guest
> > >   config.page_size_mask = 0xffffffffffff000 will work
> > >
> > > - 64K page size host and 64k page size guest
> > >   config.page_size_mask = 0xfffffffffff0000 will work
> > >
> > > - 64K page size host and 4k page size guest
> > >    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> > > VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> > > host)
> > >    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> > > initialization (in guest) expect minimum page-size supported by h/w to
> > > be equal to 4k (PAGE_SIZE in guest)
> > >        Should we look to relax this in iova allocation code?
> >
> > Oh right, that's not great. Maybe the BUG_ON() can be removed, I'll ask on
> > the list.
> 
> yes, the BUG_ON in iova_init.
> I tried with removing same and it worked, but not analyzed side effects.

It might break the assumption from device drivers that mapping a page is
safe. For example they call alloc_page() followed by dma_map_page(). In
our situation dma-iommu.c will oblige and create one 64k mapping to one 4k
physical page. As a result the endpoint can access the neighbouring 60k of
memory.

This isn't too terrible. After all, even when the page sizes match, device
drivers can call dma_map_single() on sub-page buffers, which will also let
the endpoint access a whole page. The solution, if you don't trust the
endpoint, is to use bounce buffers.

But I suspect it's not as simple as removing the BUG_ON(), we'll need to
go over dma-iommu.c first. And it seems like assigning endpoints to guest
userspace won't work either in this config. In vfio_dma_do_map():

        mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;

        WARN_ON(mask & PAGE_MASK);

If I read this correctly the WARN will trigger in a 4k guest under 64k
host, right?  So maybe we can just say that this config isn't supported,
unless it's an important use-case for virtio-iommu?

Thanks,
Jean
Bharat Bhushan March 18, 2020, 10:17 a.m. UTC | #14
Hi Jean,

On Tue, Mar 17, 2020 at 9:29 PM Jean-Philippe Brucker
<jean-philippe@linaro.org> wrote:
>
> On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
> > Hi Jean,
> >
> > On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
> > <jean-philippe@linaro.org> wrote:
> > >
> > > On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> > > > Hi Jean,
> > > >
> > > > On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> > > > <jean-philippe@linaro.org> wrote:
> > > > >
> > > > > Hi Bharat,
> > > > >
> > > > > Could you Cc me on your next posting?  Unfortunately I don't have much
> > > > > hardware for testing this at the moment, but I might be able to help a
> > > > > little on the review.
> > > > >
> > > > > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > > > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > > > > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > > > > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > > > > > >>>
> > > > > > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > > > > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > > > > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > > > > > >>>
> > > > > > > >>> Did you test with VFIO on your side?
> > > > > > > >>
> > > > > > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > > > > > >>
> > > > > > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > > > > > >>
> > > > > > > >> First I will try with 64k page size.
> > > > > > > >
> > > > > > > > 64K page size does not work for me as well,
> > > > > > > >
> > > > > > > > I think we are not passing correct page_size_mask here
> > > > > > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > > > > > 0xfffffffffffff000))
> > > > > > > I guess you mean with guest using 4K and host using 64K.
> > > > > > > >
> > > > > > > > We need to set this correctly as per host page size, correct?
> > > > > > > Yes that's correct. We need to put in place a control path to retrieve
> > > > > > > the page settings on host through VFIO to inform the virtio-iommu device.
> > > > > > >
> > > > > > > Besides this issue, did you try with 64kB on host and guest?
> > > > > >
> > > > > > I tried Followings
> > > > > >   - 4k host and 4k guest  - it works with v7 version
> > > > > >   - 64k host and 64k guest - it does not work with v7
> > > > > >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
> > > > >
> > > > > You might get this from the iova_pgsize bitmap returned by
> > > > > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> > > > > is the usual problem of aggregating consistent properties, but I'm
> > > > > guessing using the host page size as a granule here is safe enough.
> > > > >
> > > > > If it is a problem, we can add a PROBE property for page size mask,
> > > > > allowing to define per-endpoint page masks. I have kernel patches
> > > > > somewhere to do just that.
> > > >
> > > > I do not see we need page size mask per endpoint.
> > > >
> > > > While I am trying to understand what "page-size-mask" guest will work with
> > > >
> > > > - 4K page size host and 4k page size guest
> > > >   config.page_size_mask = 0xffffffffffff000 will work
> > > >
> > > > - 64K page size host and 64k page size guest
> > > >   config.page_size_mask = 0xfffffffffff0000 will work
> > > >
> > > > - 64K page size host and 4k page size guest
> > > >    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> > > > VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> > > > host)
> > > >    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> > > > initialization (in guest) expect minimum page-size supported by h/w to
> > > > be equal to 4k (PAGE_SIZE in guest)
> > > >        Should we look to relax this in iova allocation code?
> > >
> > > Oh right, that's not great. Maybe the BUG_ON() can be removed, I'll ask on
> > > the list.
> >
> > yes, the BUG_ON in iova_init.
> > I tried with removing same and it worked, but not analyzed side effects.
>
> It might break the assumption from device drivers that mapping a page is
> safe. For example they call alloc_page() followed by dma_map_page(). In
> our situation dma-iommu.c will oblige and create one 64k mapping to one 4k
> physical page. As a result the endpoint can access the neighbouring 60k of
> memory.
>
> This isn't too terrible. After all, even when the page sizes match, device
> drivers can call dma_map_single() on sub-page buffers, which will also let
> the endpoint access a whole page. The solution, if you don't trust the
> endpoint, is to use bounce buffers.
>
> But I suspect it's not as simple as removing the BUG_ON(), we'll need to
> go over dma-iommu.c first. And it seems like assigning endpoints to guest
> userspace won't work either in this config. In vfio_dma_do_map():
>
>         mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
>
>         WARN_ON(mask & PAGE_MASK);

Yes, Agree

>
> If I read this correctly the WARN will trigger in a 4k guest under 64k
> host, right?  So maybe we can just say that this config isn't supported,
> unless it's an important use-case for virtio-iommu?

I sent v8 version of patch and with that guest and host with same page
size should work.
While i have not yet added analyzed how to mark 4k guest and 64k host
as un-supported configuration, will analyze and send patch.

Thanks
-Bharat

>
> Thanks,
> Jean
>
Jean-Philippe Brucker March 18, 2020, 11:17 a.m. UTC | #15
On Wed, Mar 18, 2020 at 03:47:44PM +0530, Bharat Bhushan wrote:
> Hi Jean,
> 
> On Tue, Mar 17, 2020 at 9:29 PM Jean-Philippe Brucker
> <jean-philippe@linaro.org> wrote:
> >
> > On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
> > > Hi Jean,
> > >
> > > On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
> > > <jean-philippe@linaro.org> wrote:
> > > >
> > > > On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> > > > > Hi Jean,
> > > > >
> > > > > On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> > > > > <jean-philippe@linaro.org> wrote:
> > > > > >
> > > > > > Hi Bharat,
> > > > > >
> > > > > > Could you Cc me on your next posting?  Unfortunately I don't have much
> > > > > > hardware for testing this at the moment, but I might be able to help a
> > > > > > little on the review.
> > > > > >
> > > > > > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > > > > > >>> First issue is: your guest can use 4K page and your host can use 64KB
> > > > > > > > >>> pages. In that case VFIO_DMA_MAP will fail with -EINVAL. We must devise
> > > > > > > > >>> a way to pass the host settings to the VIRTIO-IOMMU device.
> > > > > > > > >>>
> > > > > > > > >>> Even with 64KB pages, it did not work for me. I have obviously not the
> > > > > > > > >>> storm of VFIO_DMA_MAP failures but I have some, most probably due to
> > > > > > > > >>> some wrong notifications somewhere. I will try to investigate on my side.
> > > > > > > > >>>
> > > > > > > > >>> Did you test with VFIO on your side?
> > > > > > > > >>
> > > > > > > > >> I did not tried with different page sizes, only tested with 4K page size.
> > > > > > > > >>
> > > > > > > > >> Yes it works, I tested with two n/w device assigned to VM, both interfaces works
> > > > > > > > >>
> > > > > > > > >> First I will try with 64k page size.
> > > > > > > > >
> > > > > > > > > 64K page size does not work for me as well,
> > > > > > > > >
> > > > > > > > > I think we are not passing correct page_size_mask here
> > > > > > > > > (config.page_size_mask is set to TARGET_PAGE_MASK ( which is
> > > > > > > > > 0xfffffffffffff000))
> > > > > > > > I guess you mean with guest using 4K and host using 64K.
> > > > > > > > >
> > > > > > > > > We need to set this correctly as per host page size, correct?
> > > > > > > > Yes that's correct. We need to put in place a control path to retrieve
> > > > > > > > the page settings on host through VFIO to inform the virtio-iommu device.
> > > > > > > >
> > > > > > > > Besides this issue, did you try with 64kB on host and guest?
> > > > > > >
> > > > > > > I tried Followings
> > > > > > >   - 4k host and 4k guest  - it works with v7 version
> > > > > > >   - 64k host and 64k guest - it does not work with v7
> > > > > > >     hard-coded config.page_size_mask to 0xffffffffffff0000 and it works
> > > > > >
> > > > > > You might get this from the iova_pgsize bitmap returned by
> > > > > > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is global so there
> > > > > > is the usual problem of aggregating consistent properties, but I'm
> > > > > > guessing using the host page size as a granule here is safe enough.
> > > > > >
> > > > > > If it is a problem, we can add a PROBE property for page size mask,
> > > > > > allowing to define per-endpoint page masks. I have kernel patches
> > > > > > somewhere to do just that.
> > > > >
> > > > > I do not see we need page size mask per endpoint.
> > > > >
> > > > > While I am trying to understand what "page-size-mask" guest will work with
> > > > >
> > > > > - 4K page size host and 4k page size guest
> > > > >   config.page_size_mask = 0xffffffffffff000 will work
> > > > >
> > > > > - 64K page size host and 64k page size guest
> > > > >   config.page_size_mask = 0xfffffffffff0000 will work
> > > > >
> > > > > - 64K page size host and 4k page size guest
> > > > >    1) config.page_size_mask = 0xffffffffffff000 will also not work as
> > > > > VFIO in host expect iova and size to be aligned to 64k (PAGE_SIZE in
> > > > > host)
> > > > >    2) config.page_size_mask = 0xfffffffffff0000 will not work, iova
> > > > > initialization (in guest) expect minimum page-size supported by h/w to
> > > > > be equal to 4k (PAGE_SIZE in guest)
> > > > >        Should we look to relax this in iova allocation code?
> > > >
> > > > Oh right, that's not great. Maybe the BUG_ON() can be removed, I'll ask on
> > > > the list.
> > >
> > > yes, the BUG_ON in iova_init.
> > > I tried with removing same and it worked, but not analyzed side effects.
> >
> > It might break the assumption from device drivers that mapping a page is
> > safe. For example they call alloc_page() followed by dma_map_page(). In
> > our situation dma-iommu.c will oblige and create one 64k mapping to one 4k
> > physical page. As a result the endpoint can access the neighbouring 60k of
> > memory.
> >
> > This isn't too terrible. After all, even when the page sizes match, device
> > drivers can call dma_map_single() on sub-page buffers, which will also let
> > the endpoint access a whole page. The solution, if you don't trust the
> > endpoint, is to use bounce buffers.
> >
> > But I suspect it's not as simple as removing the BUG_ON(), we'll need to
> > go over dma-iommu.c first. And it seems like assigning endpoints to guest
> > userspace won't work either in this config. In vfio_dma_do_map():
> >
> >         mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
> >
> >         WARN_ON(mask & PAGE_MASK);
> 
> Yes, Agree
> 
> >
> > If I read this correctly the WARN will trigger in a 4k guest under 64k
> > host, right?  So maybe we can just say that this config isn't supported,
> > unless it's an important use-case for virtio-iommu?
> 
> I sent v8 version of patch and with that guest and host with same page
> size should work.
> While i have not yet added analyzed how to mark 4k guest and 64k host
> as un-supported configuration, will analyze and send patch.

I don't think there is anything to do for QEMU, it's Linux that doesn't
support the configuration. We could add something like the attached patch,
in the virtio-iommu driver, to abort more gracefully than with a BUG_ON().

Thanks,
Jean
From 1fa08800c94f7ad6720b7e6fe26a65ed3d6ce715 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 18 Mar 2020 11:59:19 +0100
Subject: [PATCH] iommu/virtio: Reject IOMMU page granule larger than PAGE_SIZE

We don't currently support IOMMUs with a page granule larger than the
system page size. Currently the IOVA allocator has a BUG_ON() in this
case, and VFIO has a WARN_ON().

It might be possible to remove these obstacles if necessary. If the host
uses 64kB pages and the guest uses 4kB, then a device driver calling
alloc_page() followed by dma_map_page() will create a 64kB mapping for a
4kB physical page, allowing the endpoint to access the neighbouring 60kB
of memory. This problem could be worked around with bounce buffers.

For the moment, rather than triggering the IOVA BUG_ON() on mismatched
page sizes, abort the virtio-iommu probe with an error message.

Reported-by: Bharat Bhushan <bbhushan2@marvell.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/virtio-iommu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 6d4e3c2a2ddb..80d5d8f621ab 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -998,6 +998,7 @@ static int viommu_probe(struct virtio_device *vdev)
 	struct device *parent_dev = vdev->dev.parent;
 	struct viommu_dev *viommu = NULL;
 	struct device *dev = &vdev->dev;
+	unsigned long viommu_page_size;
 	u64 input_start = 0;
 	u64 input_end = -1UL;
 	int ret;
@@ -1028,6 +1029,14 @@ static int viommu_probe(struct virtio_device *vdev)
 		goto err_free_vqs;
 	}
 
+	viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap);
+	if (viommu_page_size > PAGE_SIZE) {
+		dev_err(dev, "granule 0x%lx larger than system page size 0x%lx\n",
+			viommu_page_size, PAGE_SIZE);
+		ret = -EINVAL;
+		goto err_free_vqs;
+	}
+
 	viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE;
 	viommu->last_domain = ~0U;
Bharat Bhushan March 18, 2020, 11:20 a.m. UTC | #16
> -----Original Message-----
> From: Jean-Philippe Brucker <jean-philippe@linaro.org>
> Sent: Wednesday, March 18, 2020 4:48 PM
> To: Bharat Bhushan <bharatb.linux@gmail.com>
> Cc: Auger Eric <eric.auger@redhat.com>; Peter Maydell
> <peter.maydell@linaro.org>; kevin.tian@intel.com; Tomasz Nowicki [C]
> <tnowicki@marvell.com>; mst@redhat.com; drjones@redhat.com;
> peterx@redhat.com; qemu-devel@nongnu.org; alex.williamson@redhat.com;
> qemu-arm@nongnu.org; Bharat Bhushan <bbhushan2@marvell.com>;
> linuc.decode@gmail.com; eric.auger.pro@gmail.com
> Subject: [EXT] Re: [PATCH v7 3/5] virtio-iommu: Call iommu notifier for
> attach/detach
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Wed, Mar 18, 2020 at 03:47:44PM +0530, Bharat Bhushan wrote:
> > Hi Jean,
> >
> > On Tue, Mar 17, 2020 at 9:29 PM Jean-Philippe Brucker
> > <jean-philippe@linaro.org> wrote:
> > >
> > > On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
> > > > Hi Jean,
> > > >
> > > > On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
> > > > <jean-philippe@linaro.org> wrote:
> > > > >
> > > > > On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> > > > > > Hi Jean,
> > > > > >
> > > > > > On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> > > > > > <jean-philippe@linaro.org> wrote:
> > > > > > >
> > > > > > > Hi Bharat,
> > > > > > >
> > > > > > > Could you Cc me on your next posting?  Unfortunately I don't
> > > > > > > have much hardware for testing this at the moment, but I
> > > > > > > might be able to help a little on the review.
> > > > > > >
> > > > > > > On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> > > > > > > > > >>> First issue is: your guest can use 4K page and your
> > > > > > > > > >>> host can use 64KB pages. In that case VFIO_DMA_MAP
> > > > > > > > > >>> will fail with -EINVAL. We must devise a way to pass the host
> settings to the VIRTIO-IOMMU device.
> > > > > > > > > >>>
> > > > > > > > > >>> Even with 64KB pages, it did not work for me. I have
> > > > > > > > > >>> obviously not the storm of VFIO_DMA_MAP failures but
> > > > > > > > > >>> I have some, most probably due to some wrong notifications
> somewhere. I will try to investigate on my side.
> > > > > > > > > >>>
> > > > > > > > > >>> Did you test with VFIO on your side?
> > > > > > > > > >>
> > > > > > > > > >> I did not tried with different page sizes, only tested with 4K page
> size.
> > > > > > > > > >>
> > > > > > > > > >> Yes it works, I tested with two n/w device assigned
> > > > > > > > > >> to VM, both interfaces works
> > > > > > > > > >>
> > > > > > > > > >> First I will try with 64k page size.
> > > > > > > > > >
> > > > > > > > > > 64K page size does not work for me as well,
> > > > > > > > > >
> > > > > > > > > > I think we are not passing correct page_size_mask here
> > > > > > > > > > (config.page_size_mask is set to TARGET_PAGE_MASK (
> > > > > > > > > > which is
> > > > > > > > > > 0xfffffffffffff000))
> > > > > > > > > I guess you mean with guest using 4K and host using 64K.
> > > > > > > > > >
> > > > > > > > > > We need to set this correctly as per host page size, correct?
> > > > > > > > > Yes that's correct. We need to put in place a control
> > > > > > > > > path to retrieve the page settings on host through VFIO to inform the
> virtio-iommu device.
> > > > > > > > >
> > > > > > > > > Besides this issue, did you try with 64kB on host and guest?
> > > > > > > >
> > > > > > > > I tried Followings
> > > > > > > >   - 4k host and 4k guest  - it works with v7 version
> > > > > > > >   - 64k host and 64k guest - it does not work with v7
> > > > > > > >     hard-coded config.page_size_mask to 0xffffffffffff0000
> > > > > > > > and it works
> > > > > > >
> > > > > > > You might get this from the iova_pgsize bitmap returned by
> > > > > > > VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is
> > > > > > > global so there is the usual problem of aggregating
> > > > > > > consistent properties, but I'm guessing using the host page size as a
> granule here is safe enough.
> > > > > > >
> > > > > > > If it is a problem, we can add a PROBE property for page
> > > > > > > size mask, allowing to define per-endpoint page masks. I
> > > > > > > have kernel patches somewhere to do just that.
> > > > > >
> > > > > > I do not see we need page size mask per endpoint.
> > > > > >
> > > > > > While I am trying to understand what "page-size-mask" guest
> > > > > > will work with
> > > > > >
> > > > > > - 4K page size host and 4k page size guest
> > > > > >   config.page_size_mask = 0xffffffffffff000 will work
> > > > > >
> > > > > > - 64K page size host and 64k page size guest
> > > > > >   config.page_size_mask = 0xfffffffffff0000 will work
> > > > > >
> > > > > > - 64K page size host and 4k page size guest
> > > > > >    1) config.page_size_mask = 0xffffffffffff000 will also not
> > > > > > work as VFIO in host expect iova and size to be aligned to 64k
> > > > > > (PAGE_SIZE in
> > > > > > host)
> > > > > >    2) config.page_size_mask = 0xfffffffffff0000 will not work,
> > > > > > iova initialization (in guest) expect minimum page-size
> > > > > > supported by h/w to be equal to 4k (PAGE_SIZE in guest)
> > > > > >        Should we look to relax this in iova allocation code?
> > > > >
> > > > > Oh right, that's not great. Maybe the BUG_ON() can be removed,
> > > > > I'll ask on the list.
> > > >
> > > > yes, the BUG_ON in iova_init.
> > > > I tried with removing same and it worked, but not analyzed side effects.
> > >
> > > It might break the assumption from device drivers that mapping a
> > > page is safe. For example they call alloc_page() followed by
> > > dma_map_page(). In our situation dma-iommu.c will oblige and create
> > > one 64k mapping to one 4k physical page. As a result the endpoint
> > > can access the neighbouring 60k of memory.
> > >
> > > This isn't too terrible. After all, even when the page sizes match,
> > > device drivers can call dma_map_single() on sub-page buffers, which
> > > will also let the endpoint access a whole page. The solution, if you
> > > don't trust the endpoint, is to use bounce buffers.
> > >
> > > But I suspect it's not as simple as removing the BUG_ON(), we'll
> > > need to go over dma-iommu.c first. And it seems like assigning
> > > endpoints to guest userspace won't work either in this config. In
> vfio_dma_do_map():
> > >
> > >         mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) -
> > > 1;
> > >
> > >         WARN_ON(mask & PAGE_MASK);
> >
> > Yes, Agree
> >
> > >
> > > If I read this correctly the WARN will trigger in a 4k guest under
> > > 64k host, right?  So maybe we can just say that this config isn't
> > > supported, unless it's an important use-case for virtio-iommu?
> >
> > I sent v8 version of patch and with that guest and host with same page
> > size should work.
> > While i have not yet added analyzed how to mark 4k guest and 64k host
> > as un-supported configuration, will analyze and send patch.
> 
> I don't think there is anything to do for QEMU, it's Linux that doesn't support the
> configuration. We could add something like the attached patch, in the virtio-
> iommu driver, to abort more gracefully than with a BUG_ON().

Yes agree, we need to have change in Linux side.

Thanks
-Bharat

> 
> Thanks,
> Jean
Eric Auger March 18, 2020, 11:42 a.m. UTC | #17
Hi Jean,

On 3/18/20 12:20 PM, Bharat Bhushan wrote:
> 
> 
>> -----Original Message-----
>> From: Jean-Philippe Brucker <jean-philippe@linaro.org>
>> Sent: Wednesday, March 18, 2020 4:48 PM
>> To: Bharat Bhushan <bharatb.linux@gmail.com>
>> Cc: Auger Eric <eric.auger@redhat.com>; Peter Maydell
>> <peter.maydell@linaro.org>; kevin.tian@intel.com; Tomasz Nowicki [C]
>> <tnowicki@marvell.com>; mst@redhat.com; drjones@redhat.com;
>> peterx@redhat.com; qemu-devel@nongnu.org; alex.williamson@redhat.com;
>> qemu-arm@nongnu.org; Bharat Bhushan <bbhushan2@marvell.com>;
>> linuc.decode@gmail.com; eric.auger.pro@gmail.com
>> Subject: [EXT] Re: [PATCH v7 3/5] virtio-iommu: Call iommu notifier for
>> attach/detach
>>
>> External Email
>>
>> ----------------------------------------------------------------------
>> On Wed, Mar 18, 2020 at 03:47:44PM +0530, Bharat Bhushan wrote:
>>> Hi Jean,
>>>
>>> On Tue, Mar 17, 2020 at 9:29 PM Jean-Philippe Brucker
>>> <jean-philippe@linaro.org> wrote:
>>>>
>>>> On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
>>>>> Hi Jean,
>>>>>
>>>>> On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
>>>>> <jean-philippe@linaro.org> wrote:
>>>>>>
>>>>>> On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
>>>>>>> Hi Jean,
>>>>>>>
>>>>>>> On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
>>>>>>> <jean-philippe@linaro.org> wrote:
>>>>>>>>
>>>>>>>> Hi Bharat,
>>>>>>>>
>>>>>>>> Could you Cc me on your next posting?  Unfortunately I don't
>>>>>>>> have much hardware for testing this at the moment, but I
>>>>>>>> might be able to help a little on the review.
>>>>>>>>
>>>>>>>> On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
>>>>>>>>>>>>> First issue is: your guest can use 4K page and your
>>>>>>>>>>>>> host can use 64KB pages. In that case VFIO_DMA_MAP
>>>>>>>>>>>>> will fail with -EINVAL. We must devise a way to pass the host
>> settings to the VIRTIO-IOMMU device.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Even with 64KB pages, it did not work for me. I have
>>>>>>>>>>>>> obviously not the storm of VFIO_DMA_MAP failures but
>>>>>>>>>>>>> I have some, most probably due to some wrong notifications
>> somewhere. I will try to investigate on my side.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Did you test with VFIO on your side?
>>>>>>>>>>>>
>>>>>>>>>>>> I did not tried with different page sizes, only tested with 4K page
>> size.
>>>>>>>>>>>>
>>>>>>>>>>>> Yes it works, I tested with two n/w device assigned
>>>>>>>>>>>> to VM, both interfaces works
>>>>>>>>>>>>
>>>>>>>>>>>> First I will try with 64k page size.
>>>>>>>>>>>
>>>>>>>>>>> 64K page size does not work for me as well,
>>>>>>>>>>>
>>>>>>>>>>> I think we are not passing correct page_size_mask here
>>>>>>>>>>> (config.page_size_mask is set to TARGET_PAGE_MASK (
>>>>>>>>>>> which is
>>>>>>>>>>> 0xfffffffffffff000))
>>>>>>>>>> I guess you mean with guest using 4K and host using 64K.
>>>>>>>>>>>
>>>>>>>>>>> We need to set this correctly as per host page size, correct?
>>>>>>>>>> Yes that's correct. We need to put in place a control
>>>>>>>>>> path to retrieve the page settings on host through VFIO to inform the
>> virtio-iommu device.
>>>>>>>>>>
>>>>>>>>>> Besides this issue, did you try with 64kB on host and guest?
>>>>>>>>>
>>>>>>>>> I tried Followings
>>>>>>>>>   - 4k host and 4k guest  - it works with v7 version
>>>>>>>>>   - 64k host and 64k guest - it does not work with v7
>>>>>>>>>     hard-coded config.page_size_mask to 0xffffffffffff0000
>>>>>>>>> and it works
>>>>>>>>
>>>>>>>> You might get this from the iova_pgsize bitmap returned by
>>>>>>>> VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is
>>>>>>>> global so there is the usual problem of aggregating
>>>>>>>> consistent properties, but I'm guessing using the host page size as a
>> granule here is safe enough.
>>>>>>>>
>>>>>>>> If it is a problem, we can add a PROBE property for page
>>>>>>>> size mask, allowing to define per-endpoint page masks. I
>>>>>>>> have kernel patches somewhere to do just that.
>>>>>>>
>>>>>>> I do not see we need page size mask per endpoint.
>>>>>>>
>>>>>>> While I am trying to understand what "page-size-mask" guest
>>>>>>> will work with
>>>>>>>
>>>>>>> - 4K page size host and 4k page size guest
>>>>>>>   config.page_size_mask = 0xffffffffffff000 will work
>>>>>>>
>>>>>>> - 64K page size host and 64k page size guest
>>>>>>>   config.page_size_mask = 0xfffffffffff0000 will work
>>>>>>>
>>>>>>> - 64K page size host and 4k page size guest
>>>>>>>    1) config.page_size_mask = 0xffffffffffff000 will also not
>>>>>>> work as VFIO in host expect iova and size to be aligned to 64k
>>>>>>> (PAGE_SIZE in
>>>>>>> host)
>>>>>>>    2) config.page_size_mask = 0xfffffffffff0000 will not work,
>>>>>>> iova initialization (in guest) expect minimum page-size
>>>>>>> supported by h/w to be equal to 4k (PAGE_SIZE in guest)
>>>>>>>        Should we look to relax this in iova allocation code?
>>>>>>
>>>>>> Oh right, that's not great. Maybe the BUG_ON() can be removed,
>>>>>> I'll ask on the list.
>>>>>
>>>>> yes, the BUG_ON in iova_init.
So you mean in init_iova_domain()?

I see the BUG_ON was introduced in
0fb5fe874c42942e16c450ae05da453e13a1c09e "iommu: Make IOVA domain page
size explicit" but was it meant to remain?

Logically when we allocate buffer IOVAs for DMA accesses, later on,
shouldn't it be possible to use the actual granule set on init() and
make sure the allocated size is properly aligned.

Reading the commit msg it explicitly says that "the systems may contain
heterogeneous IOMMUs supporting differing minimum page sizes, which may
also not be common with the CPU page size".

Thanks

Eric


>>>>> I tried with removing same and it worked, but not analyzed side effects.
>>>>
>>>> It might break the assumption from device drivers that mapping a
>>>> page is safe. For example they call alloc_page() followed by
>>>> dma_map_page(). In our situation dma-iommu.c will oblige and create
>>>> one 64k mapping to one 4k physical page. As a result the endpoint
>>>> can access the neighbouring 60k of memory.
>>>>
>>>> This isn't too terrible. After all, even when the page sizes match,
>>>> device drivers can call dma_map_single() on sub-page buffers, which
>>>> will also let the endpoint access a whole page. The solution, if you
>>>> don't trust the endpoint, is to use bounce buffers.
>>>>
>>>> But I suspect it's not as simple as removing the BUG_ON(), we'll
>>>> need to go over dma-iommu.c first. And it seems like assigning
>>>> endpoints to guest userspace won't work either in this config. In
>> vfio_dma_do_map():
>>>>
>>>>         mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) -
>>>> 1;
>>>>
>>>>         WARN_ON(mask & PAGE_MASK);
>>>
>>> Yes, Agree
>>>
>>>>
>>>> If I read this correctly the WARN will trigger in a 4k guest under
>>>> 64k host, right?  So maybe we can just say that this config isn't
>>>> supported, unless it's an important use-case for virtio-iommu?
>>>
>>> I sent v8 version of patch and with that guest and host with same page
>>> size should work.
>>> While i have not yet added analyzed how to mark 4k guest and 64k host
>>> as un-supported configuration, will analyze and send patch.
>>
>> I don't think there is anything to do for QEMU, it's Linux that doesn't support the
>> configuration. We could add something like the attached patch, in the virtio-
>> iommu driver, to abort more gracefully than with a BUG_ON().
> 
> Yes agree, we need to have change in Linux side.
> 
> Thanks
> -Bharat
> 
>>
>> Thanks,
>> Jean
>
Jean-Philippe Brucker March 18, 2020, noon UTC | #18
On Wed, Mar 18, 2020 at 12:42:25PM +0100, Auger Eric wrote:
> Hi Jean,
> 
> On 3/18/20 12:20 PM, Bharat Bhushan wrote:
> > 
> > 
> >> -----Original Message-----
> >> From: Jean-Philippe Brucker <jean-philippe@linaro.org>
> >> Sent: Wednesday, March 18, 2020 4:48 PM
> >> To: Bharat Bhushan <bharatb.linux@gmail.com>
> >> Cc: Auger Eric <eric.auger@redhat.com>; Peter Maydell
> >> <peter.maydell@linaro.org>; kevin.tian@intel.com; Tomasz Nowicki [C]
> >> <tnowicki@marvell.com>; mst@redhat.com; drjones@redhat.com;
> >> peterx@redhat.com; qemu-devel@nongnu.org; alex.williamson@redhat.com;
> >> qemu-arm@nongnu.org; Bharat Bhushan <bbhushan2@marvell.com>;
> >> linuc.decode@gmail.com; eric.auger.pro@gmail.com
> >> Subject: [EXT] Re: [PATCH v7 3/5] virtio-iommu: Call iommu notifier for
> >> attach/detach
> >>
> >> External Email
> >>
> >> ----------------------------------------------------------------------
> >> On Wed, Mar 18, 2020 at 03:47:44PM +0530, Bharat Bhushan wrote:
> >>> Hi Jean,
> >>>
> >>> On Tue, Mar 17, 2020 at 9:29 PM Jean-Philippe Brucker
> >>> <jean-philippe@linaro.org> wrote:
> >>>>
> >>>> On Tue, Mar 17, 2020 at 02:46:55PM +0530, Bharat Bhushan wrote:
> >>>>> Hi Jean,
> >>>>>
> >>>>> On Tue, Mar 17, 2020 at 2:23 PM Jean-Philippe Brucker
> >>>>> <jean-philippe@linaro.org> wrote:
> >>>>>>
> >>>>>> On Tue, Mar 17, 2020 at 12:40:39PM +0530, Bharat Bhushan wrote:
> >>>>>>> Hi Jean,
> >>>>>>>
> >>>>>>> On Mon, Mar 16, 2020 at 3:41 PM Jean-Philippe Brucker
> >>>>>>> <jean-philippe@linaro.org> wrote:
> >>>>>>>>
> >>>>>>>> Hi Bharat,
> >>>>>>>>
> >>>>>>>> Could you Cc me on your next posting?  Unfortunately I don't
> >>>>>>>> have much hardware for testing this at the moment, but I
> >>>>>>>> might be able to help a little on the review.
> >>>>>>>>
> >>>>>>>> On Mon, Mar 16, 2020 at 02:40:00PM +0530, Bharat Bhushan wrote:
> >>>>>>>>>>>>> First issue is: your guest can use 4K page and your
> >>>>>>>>>>>>> host can use 64KB pages. In that case VFIO_DMA_MAP
> >>>>>>>>>>>>> will fail with -EINVAL. We must devise a way to pass the host
> >> settings to the VIRTIO-IOMMU device.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Even with 64KB pages, it did not work for me. I have
> >>>>>>>>>>>>> obviously not the storm of VFIO_DMA_MAP failures but
> >>>>>>>>>>>>> I have some, most probably due to some wrong notifications
> >> somewhere. I will try to investigate on my side.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Did you test with VFIO on your side?
> >>>>>>>>>>>>
> >>>>>>>>>>>> I did not tried with different page sizes, only tested with 4K page
> >> size.
> >>>>>>>>>>>>
> >>>>>>>>>>>> Yes it works, I tested with two n/w device assigned
> >>>>>>>>>>>> to VM, both interfaces works
> >>>>>>>>>>>>
> >>>>>>>>>>>> First I will try with 64k page size.
> >>>>>>>>>>>
> >>>>>>>>>>> 64K page size does not work for me as well,
> >>>>>>>>>>>
> >>>>>>>>>>> I think we are not passing correct page_size_mask here
> >>>>>>>>>>> (config.page_size_mask is set to TARGET_PAGE_MASK (
> >>>>>>>>>>> which is
> >>>>>>>>>>> 0xfffffffffffff000))
> >>>>>>>>>> I guess you mean with guest using 4K and host using 64K.
> >>>>>>>>>>>
> >>>>>>>>>>> We need to set this correctly as per host page size, correct?
> >>>>>>>>>> Yes that's correct. We need to put in place a control
> >>>>>>>>>> path to retrieve the page settings on host through VFIO to inform the
> >> virtio-iommu device.
> >>>>>>>>>>
> >>>>>>>>>> Besides this issue, did you try with 64kB on host and guest?
> >>>>>>>>>
> >>>>>>>>> I tried Followings
> >>>>>>>>>   - 4k host and 4k guest  - it works with v7 version
> >>>>>>>>>   - 64k host and 64k guest - it does not work with v7
> >>>>>>>>>     hard-coded config.page_size_mask to 0xffffffffffff0000
> >>>>>>>>> and it works
> >>>>>>>>
> >>>>>>>> You might get this from the iova_pgsize bitmap returned by
> >>>>>>>> VFIO_IOMMU_GET_INFO. The virtio config.page_size_mask is
> >>>>>>>> global so there is the usual problem of aggregating
> >>>>>>>> consistent properties, but I'm guessing using the host page size as a
> >> granule here is safe enough.
> >>>>>>>>
> >>>>>>>> If it is a problem, we can add a PROBE property for page
> >>>>>>>> size mask, allowing to define per-endpoint page masks. I
> >>>>>>>> have kernel patches somewhere to do just that.
> >>>>>>>
> >>>>>>> I do not see we need page size mask per endpoint.
> >>>>>>>
> >>>>>>> While I am trying to understand what "page-size-mask" guest
> >>>>>>> will work with
> >>>>>>>
> >>>>>>> - 4K page size host and 4k page size guest
> >>>>>>>   config.page_size_mask = 0xffffffffffff000 will work
> >>>>>>>
> >>>>>>> - 64K page size host and 64k page size guest
> >>>>>>>   config.page_size_mask = 0xfffffffffff0000 will work
> >>>>>>>
> >>>>>>> - 64K page size host and 4k page size guest
> >>>>>>>    1) config.page_size_mask = 0xffffffffffff000 will also not
> >>>>>>> work as VFIO in host expect iova and size to be aligned to 64k
> >>>>>>> (PAGE_SIZE in
> >>>>>>> host)
> >>>>>>>    2) config.page_size_mask = 0xfffffffffff0000 will not work,
> >>>>>>> iova initialization (in guest) expect minimum page-size
> >>>>>>> supported by h/w to be equal to 4k (PAGE_SIZE in guest)
> >>>>>>>        Should we look to relax this in iova allocation code?
> >>>>>>
> >>>>>> Oh right, that's not great. Maybe the BUG_ON() can be removed,
> >>>>>> I'll ask on the list.
> >>>>>
> >>>>> yes, the BUG_ON in iova_init.
> So you mean in init_iova_domain()?
> 
> I see the BUG_ON was introduced in
> 0fb5fe874c42942e16c450ae05da453e13a1c09e "iommu: Make IOVA domain page
> size explicit" but was it meant to remain?

No I don't think iova.c is the problem, we could as well remove the
BUG_ON(). Now my concern is more with dma-iommu.c and VFIO which use the
PAGE_SIZE in some places and could have ingrained assumption about
iommu_pgsize <= PAGE_SIZE. We need a thorough audit of these drivers
before relaxing this. I started with dma-iommu yesterday but gave up after
seeing the VFIO WARN_ON.

I just sent the patch for virtio-iommu on the IOMMU list, we can continue
the discussion there

Thanks,
Jean

> 
> Logically when we allocate buffer IOVAs for DMA accesses, later on,
> shouldn't it be possible to use the actual granule set on init() and
> make sure the allocated size is properly aligned.
> 
> Reading the commit msg it explicitly says that "the systems may contain
> heterogeneous IOMMUs supporting differing minimum page sizes, which may
> also not be common with the CPU page size".
> 
> Thanks
> 
> Eric
> 
> 
> >>>>> I tried with removing same and it worked, but not analyzed side effects.
> >>>>
> >>>> It might break the assumption from device drivers that mapping a
> >>>> page is safe. For example they call alloc_page() followed by
> >>>> dma_map_page(). In our situation dma-iommu.c will oblige and create
> >>>> one 64k mapping to one 4k physical page. As a result the endpoint
> >>>> can access the neighbouring 60k of memory.
> >>>>
> >>>> This isn't too terrible. After all, even when the page sizes match,
> >>>> device drivers can call dma_map_single() on sub-page buffers, which
> >>>> will also let the endpoint access a whole page. The solution, if you
> >>>> don't trust the endpoint, is to use bounce buffers.
> >>>>
> >>>> But I suspect it's not as simple as removing the BUG_ON(), we'll
> >>>> need to go over dma-iommu.c first. And it seems like assigning
> >>>> endpoints to guest userspace won't work either in this config. In
> >> vfio_dma_do_map():
> >>>>
> >>>>         mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) -
> >>>> 1;
> >>>>
> >>>>         WARN_ON(mask & PAGE_MASK);
> >>>
> >>> Yes, Agree
> >>>
> >>>>
> >>>> If I read this correctly the WARN will trigger in a 4k guest under
> >>>> 64k host, right?  So maybe we can just say that this config isn't
> >>>> supported, unless it's an important use-case for virtio-iommu?
> >>>
> >>> I sent v8 version of patch and with that guest and host with same page
> >>> size should work.
> >>> While i have not yet added analyzed how to mark 4k guest and 64k host
> >>> as un-supported configuration, will analyze and send patch.
> >>
> >> I don't think there is anything to do for QEMU, it's Linux that doesn't support the
> >> configuration. We could add something like the attached patch, in the virtio-
> >> iommu driver, to abort more gracefully than with a BUG_ON().
> > 
> > Yes agree, we need to have change in Linux side.
> > 
> > Thanks
> > -Bharat
> > 
> >>
> >> Thanks,
> >> Jean
> > 
>
diff mbox series

Patch

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index e51344a53e..2006f72901 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -49,6 +49,7 @@  typedef struct VirtIOIOMMUEndpoint {
     uint32_t id;
     VirtIOIOMMUDomain *domain;
     QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
+    VirtIOIOMMU *viommu;
 } VirtIOIOMMUEndpoint;
 
 typedef struct VirtIOIOMMUInterval {
@@ -155,8 +156,44 @@  static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr iova,
     memory_region_notify_iommu(mr, 0, entry);
 }
 
+static gboolean virtio_iommu_mapping_unmap(gpointer key, gpointer value,
+                                           gpointer data)
+{
+    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
+    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
+
+    virtio_iommu_notify_unmap(mr, interval->low,
+                              interval->high - interval->low + 1);
+
+    return false;
+}
+
+static gboolean virtio_iommu_mapping_map(gpointer key, gpointer value,
+                                         gpointer data)
+{
+    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
+    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
+    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
+
+    virtio_iommu_notify_map(mr, interval->low, mapping->phys_addr,
+                            interval->high - interval->low + 1);
+
+    return false;
+}
+
 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
 {
+    VirtioIOMMUNotifierNode *node;
+    VirtIOIOMMU *s = ep->viommu;
+    VirtIOIOMMUDomain *domain = ep->domain;
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        if (ep->id == node->iommu_dev->devfn) {
+            g_tree_foreach(domain->mappings, virtio_iommu_mapping_unmap,
+                           &node->iommu_dev->iommu_mr);
+        }
+    }
+
     if (!ep->domain) {
         return;
     }
@@ -178,6 +215,7 @@  static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
     }
     ep = g_malloc0(sizeof(*ep));
     ep->id = ep_id;
+    ep->viommu = s;
     trace_virtio_iommu_get_endpoint(ep_id);
     g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
     return ep;
@@ -272,6 +310,7 @@  static int virtio_iommu_attach(VirtIOIOMMU *s,
 {
     uint32_t domain_id = le32_to_cpu(req->domain);
     uint32_t ep_id = le32_to_cpu(req->endpoint);
+    VirtioIOMMUNotifierNode *node;
     VirtIOIOMMUDomain *domain;
     VirtIOIOMMUEndpoint *ep;
 
@@ -299,6 +338,14 @@  static int virtio_iommu_attach(VirtIOIOMMU *s,
 
     ep->domain = domain;
 
+    /* Replay existing address space mappings on the associated memory region */
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        if (ep_id == node->iommu_dev->devfn) {
+            g_tree_foreach(domain->mappings, virtio_iommu_mapping_map,
+                           &node->iommu_dev->iommu_mr);
+        }
+    }
+
     return VIRTIO_IOMMU_S_OK;
 }