diff mbox series

[RFC,v1,03/18] hw/pci: introduce PCIPASIDOps to PCIDevice

Message ID 1562324511-2910-4-git-send-email-yi.l.liu@intel.com
State New
Headers show
Series intel_iommu: expose Shared Virtual Addressing to VM | expand

Commit Message

Yi Liu July 5, 2019, 11:01 a.m. UTC
This patch introduces PCIPASIDOps for PASID related operations in
future usage like virt-SVA. Related discussions can be found in
below links.

https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00078.html
https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00940.html

So far, to setup virt-SVA for assigned SVA capable device, needs to
configure host translation structures for specific pasid. (e.g. bind
guest page table to host and enable nested translation in host).
Besides, vIOMMU emulator needs to forward guest's cache invalidation
to host since host nested translation is enabled. e.g. on VT-d, guest
owns 1st level translation table, thus cache invalidation for 1st
level should be propagated to host.

This patch adds two functions: alloc_pasid and free_pasid to support
guest pasid allocation and free. The implementations of the callbacks
would be device passthru modules. Like vfio.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
---
 hw/pci/pci.c         | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/pci/pci.h | 14 ++++++++++++++
 2 files changed, 64 insertions(+)

Comments

Peter Xu July 9, 2019, 2:12 a.m. UTC | #1
On Fri, Jul 05, 2019 at 07:01:36PM +0800, Liu Yi L wrote:
> +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
> +{
> +    assert(ops && !dev->pasid_ops);
> +    dev->pasid_ops = ops;
> +}
> +
> +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)

Name should be "pci_device_is_pasid_ops_set".  Or maybe you can simply
drop this function because as long as you check it in helper functions
like [1] below always then it seems even unecessary.

> +{
> +    PCIDevice *dev;
> +
> +    if (!bus) {
> +        return false;
> +    }
> +
> +    dev = bus->devices[devfn];
> +    return !!(dev && dev->pasid_ops);
> +}
> +
> +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
> +                                   uint32_t min_pasid, uint32_t max_pasid)

From VT-d spec I see that the virtual command "allocate pasid" does
not have bdf information so it's global, but here we've got bus/devfn.
I'm curious is that reserved for ARM or some other arch?

> +{
> +    PCIDevice *dev;
> +
> +    if (!bus) {
> +        return -1;
> +    }
> +
> +    dev = bus->devices[devfn];
> +    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {

[1]

> +        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);
> +    }
> +    return -1;
> +}
> +
> +int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn,
> +                                  uint32_t pasid)
> +{
> +    PCIDevice *dev;
> +
> +    if (!bus) {
> +        return -1;
> +    }
> +
> +    dev = bus->devices[devfn];
> +    if (dev && dev->pasid_ops && dev->pasid_ops->free_pasid) {
> +        return dev->pasid_ops->free_pasid(bus, devfn, pasid);
> +    }
> +    return -1;
> +}
> +
>  static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>  {
>      Range *range = opaque;
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index d082707..16e5b8e 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -262,6 +262,13 @@ struct PCIReqIDCache {
>  };
>  typedef struct PCIReqIDCache PCIReqIDCache;
>  
> +typedef struct PCIPASIDOps PCIPASIDOps;
> +struct PCIPASIDOps {
> +    int (*alloc_pasid)(PCIBus *bus, int32_t devfn,
> +                         uint32_t min_pasid, uint32_t max_pasid);
> +    int (*free_pasid)(PCIBus *bus, int32_t devfn, uint32_t pasid);
> +};
> +
>  struct PCIDevice {
>      DeviceState qdev;
>  
> @@ -351,6 +358,7 @@ struct PCIDevice {
>      MSIVectorUseNotifier msix_vector_use_notifier;
>      MSIVectorReleaseNotifier msix_vector_release_notifier;
>      MSIVectorPollNotifier msix_vector_poll_notifier;
> +    PCIPASIDOps *pasid_ops;
>  };
>  
>  void pci_register_bar(PCIDevice *pci_dev, int region_num,
> @@ -484,6 +492,12 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>  
> +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
> +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn);
> +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
> +                                   uint32_t min_pasid, uint32_t max_pasid);
> +int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn, uint32_t pasid);
> +
>  static inline void
>  pci_set_byte(uint8_t *config, uint8_t val)
>  {
> -- 
> 2.7.4
> 

Regards,
Eric Auger July 9, 2019, 10:41 a.m. UTC | #2
Hi Liu, Peter,

On 7/9/19 4:12 AM, Peter Xu wrote:
> On Fri, Jul 05, 2019 at 07:01:36PM +0800, Liu Yi L wrote:
>> +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
>> +{
>> +    assert(ops && !dev->pasid_ops);
>> +    dev->pasid_ops = ops;
>> +}
>> +
>> +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)
> 
> Name should be "pci_device_is_pasid_ops_set".  Or maybe you can simply
> drop this function because as long as you check it in helper functions
> like [1] below always then it seems even unecessary.

I think we need such query to know whether the PCI device needs to be
notified. This is somehow equivalent to the flags we had before but less
precise as we cannot query whether a specific callback is implemented.

Thanks

Eric
> 
>> +{
>> +    PCIDevice *dev;
>> +
>> +    if (!bus) {
>> +        return false;
>> +    }
>> +
>> +    dev = bus->devices[devfn];
>> +    return !!(dev && dev->pasid_ops);
>> +}
>> +
>> +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
>> +                                   uint32_t min_pasid, uint32_t max_pasid)
> 
> From VT-d spec I see that the virtual command "allocate pasid" does
> not have bdf information so it's global, but here we've got bus/devfn.
> I'm curious is that reserved for ARM or some other arch?
> 
>> +{
>> +    PCIDevice *dev;
>> +
>> +    if (!bus) {
>> +        return -1;
>> +    }
>> +
>> +    dev = bus->devices[devfn];
>> +    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {
> 
> [1]
> 
>> +        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);
>> +    }
>> +    return -1;
>> +}
>> +
>> +int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn,
>> +                                  uint32_t pasid)
>> +{
>> +    PCIDevice *dev;
>> +
>> +    if (!bus) {
>> +        return -1;
>> +    }
>> +
>> +    dev = bus->devices[devfn];
>> +    if (dev && dev->pasid_ops && dev->pasid_ops->free_pasid) {
>> +        return dev->pasid_ops->free_pasid(bus, devfn, pasid);
>> +    }
>> +    return -1;
>> +}
>> +
>>  static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>  {
>>      Range *range = opaque;
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index d082707..16e5b8e 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -262,6 +262,13 @@ struct PCIReqIDCache {
>>  };
>>  typedef struct PCIReqIDCache PCIReqIDCache;
>>  
>> +typedef struct PCIPASIDOps PCIPASIDOps;
>> +struct PCIPASIDOps {
>> +    int (*alloc_pasid)(PCIBus *bus, int32_t devfn,
>> +                         uint32_t min_pasid, uint32_t max_pasid);
>> +    int (*free_pasid)(PCIBus *bus, int32_t devfn, uint32_t pasid);
>> +};
>> +
>>  struct PCIDevice {
>>      DeviceState qdev;
>>  
>> @@ -351,6 +358,7 @@ struct PCIDevice {
>>      MSIVectorUseNotifier msix_vector_use_notifier;
>>      MSIVectorReleaseNotifier msix_vector_release_notifier;
>>      MSIVectorPollNotifier msix_vector_poll_notifier;
>> +    PCIPASIDOps *pasid_ops;
>>  };
>>  
>>  void pci_register_bar(PCIDevice *pci_dev, int region_num,
>> @@ -484,6 +492,12 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>>  
>> +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
>> +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn);
>> +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
>> +                                   uint32_t min_pasid, uint32_t max_pasid);
>> +int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn, uint32_t pasid);
>> +
>>  static inline void
>>  pci_set_byte(uint8_t *config, uint8_t val)
>>  {
>> -- 
>> 2.7.4
>>
> 
> Regards,
>
Yi Liu July 10, 2019, 11:08 a.m. UTC | #3
> From: Peter Xu [mailto:zhexu@redhat.com]
> Sent: Tuesday, July 9, 2019 10:12 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v1 03/18] hw/pci: introduce PCIPASIDOps to PCIDevice
> 
> On Fri, Jul 05, 2019 at 07:01:36PM +0800, Liu Yi L wrote:
> > +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
> > +{
> > +    assert(ops && !dev->pasid_ops);
> > +    dev->pasid_ops = ops;
> > +}
> > +
> > +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)
> 
> Name should be "pci_device_is_pasid_ops_set".  Or maybe you can simply
> drop this function because as long as you check it in helper functions
> like [1] below always then it seems even unecessary.

yes, the name should be "pci_device_is_pasid_ops_set". I noticed your
comments on the necessity in another, let's talk in that thread. :-)

> > +{
> > +    PCIDevice *dev;
> > +
> > +    if (!bus) {
> > +        return false;
> > +    }
> > +
> > +    dev = bus->devices[devfn];
> > +    return !!(dev && dev->pasid_ops);
> > +}
> > +
> > +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
> > +                                   uint32_t min_pasid, uint32_t max_pasid)
> 
> From VT-d spec I see that the virtual command "allocate pasid" does
> not have bdf information so it's global, but here we've got bus/devfn.
> I'm curious is that reserved for ARM or some other arch?

You are right. VT-d spec doesn’t have bdf info. But we need to pass the
allocation request via vfio. So this function has bdf info. In vIOMMU side,
it should select a vfio-pci device and invoke this callback when it wants to
request PASID alloc/free.

> > +{
> > +    PCIDevice *dev;
> > +
> > +    if (!bus) {
> > +        return -1;
> > +    }
> > +
> > +    dev = bus->devices[devfn];
> > +    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {
> 
> [1]
> 
> > +        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);

Thanks,
Yi Liu
David Gibson July 11, 2019, 3:51 a.m. UTC | #4
On Wed, Jul 10, 2019 at 11:08:15AM +0000, Liu, Yi L wrote:
> > From: Peter Xu [mailto:zhexu@redhat.com]
> > Sent: Tuesday, July 9, 2019 10:12 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>
> > Subject: Re: [RFC v1 03/18] hw/pci: introduce PCIPASIDOps to PCIDevice
> > 
> > On Fri, Jul 05, 2019 at 07:01:36PM +0800, Liu Yi L wrote:
> > > +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
> > > +{
> > > +    assert(ops && !dev->pasid_ops);
> > > +    dev->pasid_ops = ops;
> > > +}
> > > +
> > > +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)
> > 
> > Name should be "pci_device_is_pasid_ops_set".  Or maybe you can simply
> > drop this function because as long as you check it in helper functions
> > like [1] below always then it seems even unecessary.
> 
> yes, the name should be "pci_device_is_pasid_ops_set". I noticed your
> comments on the necessity in another, let's talk in that thread. :-)
> 
> > > +{
> > > +    PCIDevice *dev;
> > > +
> > > +    if (!bus) {
> > > +        return false;
> > > +    }
> > > +
> > > +    dev = bus->devices[devfn];
> > > +    return !!(dev && dev->pasid_ops);
> > > +}
> > > +
> > > +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
> > > +                                   uint32_t min_pasid, uint32_t max_pasid)
> > 
> > From VT-d spec I see that the virtual command "allocate pasid" does
> > not have bdf information so it's global, but here we've got bus/devfn.
> > I'm curious is that reserved for ARM or some other arch?
> 
> You are right. VT-d spec doesn’t have bdf info. But we need to pass the
> allocation request via vfio. So this function has bdf info. In vIOMMU side,
> it should select a vfio-pci device and invoke this callback when it wants to
> request PASID alloc/free.

That doesn't seem conceptually right.  IIUC, the pasids "belong" to a
sort of SVM context.  It seems to be the alloc should be on that
object - and that object would already have some connection to any
relevant vfio containers.  At the vfio level this seems like it should
be a container operation rather than a device operation.

> > > +{
> > > +    PCIDevice *dev;
> > > +
> > > +    if (!bus) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    dev = bus->devices[devfn];
> > > +    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {
> > 
> > [1]
> > 
> > > +        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);
> 
> Thanks,
> Yi Liu
Yi Liu July 11, 2019, 7:13 a.m. UTC | #5
> From: david@gibson.dropbear.id.au [mailto:david@gibson.dropbear.id.au]
> Sent: Thursday, July 11, 2019 11:52 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v1 03/18] hw/pci: introduce PCIPASIDOps to PCIDevice
> 
> On Wed, Jul 10, 2019 at 11:08:15AM +0000, Liu, Yi L wrote:
> > > From: Peter Xu [mailto:zhexu@redhat.com]
> > > Sent: Tuesday, July 9, 2019 10:12 AM
> > > To: Liu, Yi L <yi.l.liu@intel.com>
> > > Subject: Re: [RFC v1 03/18] hw/pci: introduce PCIPASIDOps to PCIDevice
> > >
> > > On Fri, Jul 05, 2019 at 07:01:36PM +0800, Liu Yi L wrote:
> > > > +void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
> > > > +{
> > > > +    assert(ops && !dev->pasid_ops);
> > > > +    dev->pasid_ops = ops;
> > > > +}
> > > > +
> > > > +bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)
> > >
> > > Name should be "pci_device_is_pasid_ops_set".  Or maybe you can simply
> > > drop this function because as long as you check it in helper functions
> > > like [1] below always then it seems even unecessary.
> >
> > yes, the name should be "pci_device_is_pasid_ops_set". I noticed your
> > comments on the necessity in another, let's talk in that thread. :-)
> >
> > > > +{
> > > > +    PCIDevice *dev;
> > > > +
> > > > +    if (!bus) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    dev = bus->devices[devfn];
> > > > +    return !!(dev && dev->pasid_ops);
> > > > +}
> > > > +
> > > > +int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
> > > > +                                   uint32_t min_pasid, uint32_t max_pasid)
> > >
> > > From VT-d spec I see that the virtual command "allocate pasid" does
> > > not have bdf information so it's global, but here we've got bus/devfn.
> > > I'm curious is that reserved for ARM or some other arch?
> >
> > You are right. VT-d spec doesn’t have bdf info. But we need to pass the
> > allocation request via vfio. So this function has bdf info. In vIOMMU side,
> > it should select a vfio-pci device and invoke this callback when it wants to
> > request PASID alloc/free.
> 
> That doesn't seem conceptually right.  IIUC, the pasids "belong" to a
> sort of SVM context.  It seems to be the alloc should be on that
> object - and that object would already have some connection to any
> relevant vfio containers.  At the vfio level this seems like it should
> be a container operation rather than a device operation.

Hi David,

Yeah, I agree it should finally be a container operation. Actually, in the
callback implementation, it is a container operation. May refer to the
implementation in below patch. :-)

[RFC v1 05/18] vfio/pci: add pasid alloc/free implementation

Thanks,
Yi Liu

> > > > +{
> > > > +    PCIDevice *dev;
> > > > +
> > > > +    if (!bus) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    dev = bus->devices[devfn];
> > > > +    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {
> > >
> > > [1]
> > >
> > > > +        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);
> >
> > Thanks,
> > Yi Liu
> 
> --
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
diff mbox series

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 8076a80..710f9e9 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2626,6 +2626,56 @@  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
     bus->iommu_opaque = opaque;
 }
 
+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops)
+{
+    assert(ops && !dev->pasid_ops);
+    dev->pasid_ops = ops;
+}
+
+bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return false;
+    }
+
+    dev = bus->devices[devfn];
+    return !!(dev && dev->pasid_ops);
+}
+
+int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
+                                   uint32_t min_pasid, uint32_t max_pasid)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return -1;
+    }
+
+    dev = bus->devices[devfn];
+    if (dev && dev->pasid_ops && dev->pasid_ops->alloc_pasid) {
+        return dev->pasid_ops->alloc_pasid(bus, devfn, min_pasid, max_pasid);
+    }
+    return -1;
+}
+
+int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn,
+                                  uint32_t pasid)
+{
+    PCIDevice *dev;
+
+    if (!bus) {
+        return -1;
+    }
+
+    dev = bus->devices[devfn];
+    if (dev && dev->pasid_ops && dev->pasid_ops->free_pasid) {
+        return dev->pasid_ops->free_pasid(bus, devfn, pasid);
+    }
+    return -1;
+}
+
 static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
 {
     Range *range = opaque;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d082707..16e5b8e 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -262,6 +262,13 @@  struct PCIReqIDCache {
 };
 typedef struct PCIReqIDCache PCIReqIDCache;
 
+typedef struct PCIPASIDOps PCIPASIDOps;
+struct PCIPASIDOps {
+    int (*alloc_pasid)(PCIBus *bus, int32_t devfn,
+                         uint32_t min_pasid, uint32_t max_pasid);
+    int (*free_pasid)(PCIBus *bus, int32_t devfn, uint32_t pasid);
+};
+
 struct PCIDevice {
     DeviceState qdev;
 
@@ -351,6 +358,7 @@  struct PCIDevice {
     MSIVectorUseNotifier msix_vector_use_notifier;
     MSIVectorReleaseNotifier msix_vector_release_notifier;
     MSIVectorPollNotifier msix_vector_poll_notifier;
+    PCIPASIDOps *pasid_ops;
 };
 
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
@@ -484,6 +492,12 @@  typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
 void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
 
+void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops);
+bool pci_device_is_ops_set(PCIBus *bus, int32_t devfn);
+int pci_device_request_pasid_alloc(PCIBus *bus, int32_t devfn,
+                                   uint32_t min_pasid, uint32_t max_pasid);
+int pci_device_request_pasid_free(PCIBus *bus, int32_t devfn, uint32_t pasid);
+
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
 {