diff mbox series

[v4,01/15] hw/pci: Add a pci_setup_iommu_ops() helper

Message ID 20230622214845.3980-2-joao.m.martins@oracle.com
State New
Headers show
Series vfio: VFIO migration support with vIOMMU | expand

Commit Message

Joao Martins June 22, 2023, 9:48 p.m. UTC
From: Yi Liu <yi.l.liu@intel.com>

Add a pci_setup_iommu_ops() that uses a newly added structure
(PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
an address space for a PCI device in vendor specific way.

In preparation to expand to supplying vIOMMU attributes, add a
alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
For now the PCIIOMMUOps just defines the address_space, but it will
be extended to have another callback.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
[joao: Massage commit message and subject, and make it a complementary
rather than changing every single consumer of pci_setup_iommu()]
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
---
 include/hw/pci/pci.h     |  7 +++++++
 include/hw/pci/pci_bus.h |  1 +
 hw/pci/pci.c             | 26 +++++++++++++++++++++++---
 3 files changed, 31 insertions(+), 3 deletions(-)

Comments

Cédric Le Goater Oct. 2, 2023, 3:12 p.m. UTC | #1
Hello Joao,

On 6/22/23 23:48, Joao Martins wrote:
> From: Yi Liu <yi.l.liu@intel.com>
> 
> Add a pci_setup_iommu_ops() that uses a newly added structure
> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
> an address space for a PCI device in vendor specific way.
> 
> In preparation to expand to supplying vIOMMU attributes, add a
> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
> For now the PCIIOMMUOps just defines the address_space, but it will
> be extended to have another callback.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> [joao: Massage commit message and subject, and make it a complementary
> rather than changing every single consumer of pci_setup_iommu()]
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
> ---
>   include/hw/pci/pci.h     |  7 +++++++
>   include/hw/pci/pci_bus.h |  1 +
>   hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>   3 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index e6d0574a2999..f59aef5a329a 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>   void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>   
> +typedef struct PCIIOMMUOps PCIIOMMUOps;
> +struct PCIIOMMUOps {
> +    AddressSpace * (*get_address_space)(PCIBus *bus,
> +                                void *opaque, int32_t devfn);
> +};
> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void *opaque);
> +

I think you should first convert all PHBs to PCIIOMMUOps to avoid all the
tests as below and adapt pci_setup_iommu_ops() with the new parameter.

Thanks,

C.

>   pcibus_t pci_bar_address(PCIDevice *d,
>                            int reg, uint8_t type, pcibus_t size);
>   
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 56531759578f..fb770b236d69 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -35,6 +35,7 @@ struct PCIBus {
>       enum PCIBusFlags flags;
>       PCIIOMMUFunc iommu_fn;
>       void *iommu_opaque;
> +    const PCIIOMMUOps *iommu_ops;
>       uint8_t devfn_min;
>       uint32_t slot_reserved_mask;
>       pci_set_irq_fn set_irq;
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bf38905b7dc0..4e32c09e81d6 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2639,7 +2639,15 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>       PCIBus *iommu_bus = bus;
>       uint8_t devfn = dev->devfn;
>   
> -    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
> +    /*
> +     * get_address_space() callback is mandatory when iommu uses
> +     * pci_setup_iommu_ops(), so needs to ensure its presence in
> +     * the iommu_bus search.
> +     */
> +    while (iommu_bus &&
> +           !(iommu_bus->iommu_fn ||
> +            (iommu_bus->iommu_ops && iommu_bus->iommu_ops->get_address_space)) &&
> +           iommu_bus->parent_dev) {
>           PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
>   
>           /*
> @@ -2678,8 +2686,14 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>   
>           iommu_bus = parent_bus;
>       }
> -    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
> -        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus) {
> +        if (iommu_bus->iommu_fn) {
> +           return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
> +        } else if (iommu_bus->iommu_ops &&
> +                   iommu_bus->iommu_ops->get_address_space) {
> +           return iommu_bus->iommu_ops->get_address_space(bus,
> +                                           iommu_bus->iommu_opaque, devfn);
> +        }
>       }
>       return &address_space_memory;
>   }
> @@ -2690,6 +2704,12 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
>       bus->iommu_opaque = opaque;
>   }
>   
> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
> +{
> +    bus->iommu_ops = ops;
> +    bus->iommu_opaque = opaque;
> +}
> +
>   static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>   {
>       Range *range = opaque;
Joao Martins Oct. 6, 2023, 8:38 a.m. UTC | #2
On 02/10/2023 16:12, Cédric Le Goater wrote:
> Hello Joao,
> 
> On 6/22/23 23:48, Joao Martins wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Add a pci_setup_iommu_ops() that uses a newly added structure
>> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
>> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
>> an address space for a PCI device in vendor specific way.
>>
>> In preparation to expand to supplying vIOMMU attributes, add a
>> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
>> For now the PCIIOMMUOps just defines the address_space, but it will
>> be extended to have another callback.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> [joao: Massage commit message and subject, and make it a complementary
>> rather than changing every single consumer of pci_setup_iommu()]
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>> ---
>>   include/hw/pci/pci.h     |  7 +++++++
>>   include/hw/pci/pci_bus.h |  1 +
>>   hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>>   3 files changed, 31 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index e6d0574a2999..f59aef5a329a 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *,
>> int);
>>   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>   void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>>   +typedef struct PCIIOMMUOps PCIIOMMUOps;
>> +struct PCIIOMMUOps {
>> +    AddressSpace * (*get_address_space)(PCIBus *bus,
>> +                                void *opaque, int32_t devfn);
>> +};
>> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void
>> *opaque);
>> +
> 
> I think you should first convert all PHBs to PCIIOMMUOps to avoid all the
> tests as below and adapt pci_setup_iommu_ops() with the new parameter.
> 

OK, that's Yi's original patch:

https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/

I went with this one is that 1) it might take eons to get every single IOMMU
maintainer ack; and 2) it would allow each IOMMU to move at its own speed
specially as I can't test most of the other ones. essentially iterative, rather
than invasive change? Does that make sense?

	Joao
Eric Auger Oct. 6, 2023, 8:45 a.m. UTC | #3
Hi Joao,

On 6/22/23 23:48, Joao Martins wrote:
> From: Yi Liu <yi.l.liu@intel.com>
> 
> Add a pci_setup_iommu_ops() that uses a newly added structure
> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
> an address space for a PCI device in vendor specific way.
double 'an'
> 
> In preparation to expand to supplying vIOMMU attributes, add a
> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
> For now the PCIIOMMUOps just defines the address_space, but it will
> be extended to have another callback.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> [joao: Massage commit message and subject, and make it a complementary
> rather than changing every single consumer of pci_setup_iommu()]
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
> ---
>  include/hw/pci/pci.h     |  7 +++++++
>  include/hw/pci/pci_bus.h |  1 +
>  hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>  3 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index e6d0574a2999..f59aef5a329a 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>  
> +typedef struct PCIIOMMUOps PCIIOMMUOps;
> +struct PCIIOMMUOps {
> +    AddressSpace * (*get_address_space)(PCIBus *bus,
> +                                void *opaque, int32_t devfn);
> +};
> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void *opaque);
> +
>  pcibus_t pci_bar_address(PCIDevice *d,
>                           int reg, uint8_t type, pcibus_t size);
>  
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 56531759578f..fb770b236d69 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -35,6 +35,7 @@ struct PCIBus {
>      enum PCIBusFlags flags;
>      PCIIOMMUFunc iommu_fn;
>      void *iommu_opaque;
> +    const PCIIOMMUOps *iommu_ops;
>      uint8_t devfn_min;
>      uint32_t slot_reserved_mask;
>      pci_set_irq_fn set_irq;
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bf38905b7dc0..4e32c09e81d6 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2639,7 +2639,15 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>      PCIBus *iommu_bus = bus;
>      uint8_t devfn = dev->devfn;
>  
> -    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
> +    /*
> +     * get_address_space() callback is mandatory when iommu uses
> +     * pci_setup_iommu_ops(), so needs to ensure its presence in
> +     * the iommu_bus search.
the fact that it is mandatory should also be documented along with the
PCIIOMMUOps struct definition and enforced at  pci_setup_iommu_ops()
removing the need for checking iommu_bus->iommu_ops->get_address_space
> +     */
> +    while (iommu_bus &&
> +           !(iommu_bus->iommu_fn ||
> +            (iommu_bus->iommu_ops && iommu_bus->iommu_ops->get_address_space)) &&
> +           iommu_bus->parent_dev) {
>          PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
>  
>          /*
> @@ -2678,8 +2686,14 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>  
>          iommu_bus = parent_bus;
>      }
> -    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
> -        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus) {
> +        if (iommu_bus->iommu_fn) {
> +           return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
> +        } else if (iommu_bus->iommu_ops &&
> +                   iommu_bus->iommu_ops->get_address_space) {
> +           return iommu_bus->iommu_ops->get_address_space(bus,
> +                                           iommu_bus->iommu_opaque, devfn);
> +        }
>      }
>      return &address_space_memory;
>  }
> @@ -2690,6 +2704,12 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
>      bus->iommu_opaque = opaque;
>  }
>  
> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
> +{
> +    bus->iommu_ops = ops;
> +    bus->iommu_opaque = opaque;
maybe assert if both iommu_fn and iommu_ops are set to make sure the
conversion is not done partially? If I understand it correctly either of
those 2 ops should be set and not both.
> +}
> +
>  static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>  {
>      Range *range = opaque;

Thanks

Eric
Cédric Le Goater Oct. 6, 2023, 8:50 a.m. UTC | #4
On 10/6/23 10:38, Joao Martins wrote:
> On 02/10/2023 16:12, Cédric Le Goater wrote:
>> Hello Joao,
>>
>> On 6/22/23 23:48, Joao Martins wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> Add a pci_setup_iommu_ops() that uses a newly added structure
>>> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
>>> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
>>> an address space for a PCI device in vendor specific way.
>>>
>>> In preparation to expand to supplying vIOMMU attributes, add a
>>> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
>>> For now the PCIIOMMUOps just defines the address_space, but it will
>>> be extended to have another callback.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> [joao: Massage commit message and subject, and make it a complementary
>>> rather than changing every single consumer of pci_setup_iommu()]
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> ---
>>> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>>> ---
>>>    include/hw/pci/pci.h     |  7 +++++++
>>>    include/hw/pci/pci_bus.h |  1 +
>>>    hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>>>    3 files changed, 31 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>> index e6d0574a2999..f59aef5a329a 100644
>>> --- a/include/hw/pci/pci.h
>>> +++ b/include/hw/pci/pci.h
>>> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *,
>>> int);
>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>>    void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>>>    +typedef struct PCIIOMMUOps PCIIOMMUOps;
>>> +struct PCIIOMMUOps {
>>> +    AddressSpace * (*get_address_space)(PCIBus *bus,
>>> +                                void *opaque, int32_t devfn);
>>> +};
>>> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void
>>> *opaque);
>>> +
>>
>> I think you should first convert all PHBs to PCIIOMMUOps to avoid all the
>> tests as below and adapt pci_setup_iommu_ops() with the new parameter.
>>
> 
> OK, that's Yi's original patch:
> 
> https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
> 
> I went with this one is that 1) it might take eons to get every single IOMMU
> maintainer ack; and 2) it would allow each IOMMU to move at its own speed
> specially as I can't test most of the other ones. essentially iterative, rather
> than invasive change? Does that make sense?

I think it is ok to make global changes to replace a function by a struct
of ops. This is not major (unless the extra indirection has a major perf
impact on some platforms). Getting acks from everyone will be difficult
since some PHBs are orphans.

C.
Joao Martins Oct. 6, 2023, 11:03 a.m. UTC | #5
On 06/10/2023 09:45, Eric Auger wrote:
> Hi Joao,
> 
> On 6/22/23 23:48, Joao Martins wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Add a pci_setup_iommu_ops() that uses a newly added structure
>> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
>> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
>> an address space for a PCI device in vendor specific way.
> double 'an'

OK

>>
>> In preparation to expand to supplying vIOMMU attributes, add a
>> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
>> For now the PCIIOMMUOps just defines the address_space, but it will
>> be extended to have another callback.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> [joao: Massage commit message and subject, and make it a complementary
>> rather than changing every single consumer of pci_setup_iommu()]
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>> ---
>>  include/hw/pci/pci.h     |  7 +++++++
>>  include/hw/pci/pci_bus.h |  1 +
>>  hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>>  3 files changed, 31 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index e6d0574a2999..f59aef5a329a 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
>>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>>  
>> +typedef struct PCIIOMMUOps PCIIOMMUOps;
>> +struct PCIIOMMUOps {
>> +    AddressSpace * (*get_address_space)(PCIBus *bus,
>> +                                void *opaque, int32_t devfn);
>> +};
>> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void *opaque);
>> +
>>  pcibus_t pci_bar_address(PCIDevice *d,
>>                           int reg, uint8_t type, pcibus_t size);
>>  
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 56531759578f..fb770b236d69 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -35,6 +35,7 @@ struct PCIBus {
>>      enum PCIBusFlags flags;
>>      PCIIOMMUFunc iommu_fn;
>>      void *iommu_opaque;
>> +    const PCIIOMMUOps *iommu_ops;
>>      uint8_t devfn_min;
>>      uint32_t slot_reserved_mask;
>>      pci_set_irq_fn set_irq;
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bf38905b7dc0..4e32c09e81d6 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -2639,7 +2639,15 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>      PCIBus *iommu_bus = bus;
>>      uint8_t devfn = dev->devfn;
>>  
>> -    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
>> +    /*
>> +     * get_address_space() callback is mandatory when iommu uses
>> +     * pci_setup_iommu_ops(), so needs to ensure its presence in
>> +     * the iommu_bus search.
> the fact that it is mandatory should also be documented along with the
> PCIIOMMUOps struct definition and enforced at  pci_setup_iommu_ops()
> removing the need for checking iommu_bus->iommu_ops->get_address_space

OK

>> +     */
>> +    while (iommu_bus &&
>> +           !(iommu_bus->iommu_fn ||
>> +            (iommu_bus->iommu_ops && iommu_bus->iommu_ops->get_address_space)) &&
>> +           iommu_bus->parent_dev) {
>>          PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
>>  
>>          /*
>> @@ -2678,8 +2686,14 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>  
>>          iommu_bus = parent_bus;
>>      }
>> -    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
>> -        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus) {
>> +        if (iommu_bus->iommu_fn) {
>> +           return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
>> +        } else if (iommu_bus->iommu_ops &&
>> +                   iommu_bus->iommu_ops->get_address_space) {
>> +           return iommu_bus->iommu_ops->get_address_space(bus,
>> +                                           iommu_bus->iommu_opaque, devfn);
>> +        }
>>      }
>>      return &address_space_memory;
>>  }
>> @@ -2690,6 +2704,12 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
>>      bus->iommu_opaque = opaque;
>>  }
>>  
>> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
>> +{
>> +    bus->iommu_ops = ops;
>> +    bus->iommu_opaque = opaque;
> maybe assert if both iommu_fn and iommu_ops are set to make sure the
> conversion is not done partially? If I understand it correctly either of
> those 2 ops should be set and not both.

Good idea.
Joao Martins Oct. 6, 2023, 11:06 a.m. UTC | #6
On 06/10/2023 09:50, Cédric Le Goater wrote:
> On 10/6/23 10:38, Joao Martins wrote:
>> On 02/10/2023 16:12, Cédric Le Goater wrote:
>>> Hello Joao,
>>>
>>> On 6/22/23 23:48, Joao Martins wrote:
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> Add a pci_setup_iommu_ops() that uses a newly added structure
>>>> (PCIIOMMUOps) instead of using PCIIOMMUFunc. The old pci_setup_iommu()
>>>> that uses PCIIOMMUFunc is still kept for other IOMMUs to get an
>>>> an address space for a PCI device in vendor specific way.
>>>>
>>>> In preparation to expand to supplying vIOMMU attributes, add a
>>>> alternate helper pci_setup_iommu_ops() to setup the PCI device IOMMU.
>>>> For now the PCIIOMMUOps just defines the address_space, but it will
>>>> be extended to have another callback.
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> [joao: Massage commit message and subject, and make it a complementary
>>>> rather than changing every single consumer of pci_setup_iommu()]
>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>> ---
>>>> v1: https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>>>> ---
>>>>    include/hw/pci/pci.h     |  7 +++++++
>>>>    include/hw/pci/pci_bus.h |  1 +
>>>>    hw/pci/pci.c             | 26 +++++++++++++++++++++++---
>>>>    3 files changed, 31 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>> index e6d0574a2999..f59aef5a329a 100644
>>>> --- a/include/hw/pci/pci.h
>>>> +++ b/include/hw/pci/pci.h
>>>> @@ -368,6 +368,13 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *,
>>>> int);
>>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>>>    void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
>>>>    +typedef struct PCIIOMMUOps PCIIOMMUOps;
>>>> +struct PCIIOMMUOps {
>>>> +    AddressSpace * (*get_address_space)(PCIBus *bus,
>>>> +                                void *opaque, int32_t devfn);
>>>> +};
>>>> +void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void
>>>> *opaque);
>>>> +
>>>
>>> I think you should first convert all PHBs to PCIIOMMUOps to avoid all the
>>> tests as below and adapt pci_setup_iommu_ops() with the new parameter.
>>>
>>
>> OK, that's Yi's original patch:
>>
>> https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>>
>> I went with this one is that 1) it might take eons to get every single IOMMU
>> maintainer ack; and 2) it would allow each IOMMU to move at its own speed
>> specially as I can't test most of the other ones. essentially iterative, rather
>> than invasive change? Does that make sense?
> 
> I think it is ok to make global changes to replace a function by a struct
> of ops. This is not major (unless the extra indirection has a major perf
> impact on some platforms). 

It should be a mechanical change. As the pci_setup_iommu_ops() should be
functionally equivalent to pci_setup_iommu() [...]

> Getting acks from everyone will be difficult
> since some PHBs are orphans.

[...] This is what gets me a bit hesitant
Cédric Le Goater Oct. 6, 2023, 5:09 p.m. UTC | #7
Hello Joao,

>>>> I think you should first convert all PHBs to PCIIOMMUOps to avoid all the
>>>> tests as below and adapt pci_setup_iommu_ops() with the new parameter.
>>>>
>>>
>>> OK, that's Yi's original patch:
>>>
>>> https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/
>>>
>>> I went with this one is that 1) it might take eons to get every single IOMMU
>>> maintainer ack; and 2) it would allow each IOMMU to move at its own speed
>>> specially as I can't test most of the other ones. essentially iterative, rather
>>> than invasive change? Does that make sense?
>>
>> I think it is ok to make global changes to replace a function by a struct
>> of ops. This is not major (unless the extra indirection has a major perf
>> impact on some platforms).
> 
> It should be a mechanical change. As the pci_setup_iommu_ops() should be
> functionally equivalent to pci_setup_iommu() [...]

Thanks for going back to the previous proposal.

>> Getting acks from everyone will be difficultsince some PHBs are orphans.
> 
> [...] This is what gets me a bit hesitant

orphans shouldn't be an issue, nor the PPC emulated machines. We will see
what other maintainers have to say.

Thanks,

C.
Joao Martins Oct. 6, 2023, 5:59 p.m. UTC | #8
On 06/10/2023 18:09, Cédric Le Goater wrote:
>>> Getting acks from everyone will be difficultsince some PHBs are orphans.
>>
>> [...] This is what gets me a bit hesitant
> 
> orphans shouldn't be an issue, nor the PPC emulated machines. We will see
> what other maintainers have to say.

How about this as a compromise: to have a separate patch at the end of the
series that converts every other PHB? This way the rest can iterate, while we
await maintainers feedback without potentially blocking everything else.

Also, one other patch I'll add to this series at the end is this one:

https://lore.kernel.org/qemu-devel/20230908120521.50903-1-joao.m.martins@oracle.com/

This way the vIOMMU series is a complete thing for old and new guests, as
opposed to just new.

	Joao
Cédric Le Goater Oct. 9, 2023, 1:01 p.m. UTC | #9
On 10/6/23 19:59, Joao Martins wrote:
> On 06/10/2023 18:09, Cédric Le Goater wrote:
>>>> Getting acks from everyone will be difficultsince some PHBs are orphans.
>>>
>>> [...] This is what gets me a bit hesitant
>>
>> orphans shouldn't be an issue, nor the PPC emulated machines. We will see
>> what other maintainers have to say.
> 
> How about this as a compromise: to have a separate patch at the end of the
> series that converts every other PHB? This way the rest can iterate, while we
> await maintainers feedback without potentially blocking everything else.

In patch [1], impacted files are :

* PCIIOMMUFunc -> PCIIOMMUOps change in models

   hw/ppc/spapr_pci.c       (David)  R-b
   hw/i386/intel_iommu.c    (Peter Xu) R-b
   hw/arm/smmu-common.c     (Eric)
   hw/virtio/virtio-iommu.c (Eric)
   hw/pci-host/pnv_phb3.c   (Cédric)
   hw/pci-host/pnv_phb4.c   (Cédric)
   hw/pci-host/designware.c (Peter M.)
   hw/pci-host/prep.c       (Hervé)
   hw/pci-host/sabre.c      (Mark)
   hw/s390x/s390-pci-bus.c  (Thomas)
   hw/alpha/typhoon.c       (Richard)
   hw/hppa/dino.c           (Richard)

* Common PCI

   hw/pci/pci.c             (Michael)
   include/hw/pci/pci.h     (Michael)
   include/hw/pci/pci_bus.h (Michael)

* Orphans
  
   hw/i386/amd_iommu.c      (Orphan)
   hw/ppc/ppc440_pcix.c     (Orphan)
   hw/pci-host/ppce500.c    (Orphan)

I will add my R-b on the PPC parts I maintain. The rest doesn't seem it
would raise issues and if so, it should be quick to have since the change
is simple.

[1] https://lore.kernel.org/all/20210302203827.437645-5-yi.l.liu@intel.com/

> 
> Also, one other patch I'll add to this series at the end is this one:
> 
> https://lore.kernel.org/qemu-devel/20230908120521.50903-1-joao.m.martins@oracle.com/
> 
> This way the vIOMMU series is a complete thing for old and new guests, as
> opposed to just new.

Ok good. Let's have it.

Thanks,

C.
diff mbox series

Patch

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6d0574a2999..f59aef5a329a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -368,6 +368,13 @@  typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
 void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
 
+typedef struct PCIIOMMUOps PCIIOMMUOps;
+struct PCIIOMMUOps {
+    AddressSpace * (*get_address_space)(PCIBus *bus,
+                                void *opaque, int32_t devfn);
+};
+void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *iommu_ops, void *opaque);
+
 pcibus_t pci_bar_address(PCIDevice *d,
                          int reg, uint8_t type, pcibus_t size);
 
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 56531759578f..fb770b236d69 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -35,6 +35,7 @@  struct PCIBus {
     enum PCIBusFlags flags;
     PCIIOMMUFunc iommu_fn;
     void *iommu_opaque;
+    const PCIIOMMUOps *iommu_ops;
     uint8_t devfn_min;
     uint32_t slot_reserved_mask;
     pci_set_irq_fn set_irq;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bf38905b7dc0..4e32c09e81d6 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2639,7 +2639,15 @@  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
     PCIBus *iommu_bus = bus;
     uint8_t devfn = dev->devfn;
 
-    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
+    /*
+     * get_address_space() callback is mandatory when iommu uses
+     * pci_setup_iommu_ops(), so needs to ensure its presence in
+     * the iommu_bus search.
+     */
+    while (iommu_bus &&
+           !(iommu_bus->iommu_fn ||
+            (iommu_bus->iommu_ops && iommu_bus->iommu_ops->get_address_space)) &&
+           iommu_bus->parent_dev) {
         PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
 
         /*
@@ -2678,8 +2686,14 @@  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
 
         iommu_bus = parent_bus;
     }
-    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
-        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
+    if (!pci_bus_bypass_iommu(bus) && iommu_bus) {
+        if (iommu_bus->iommu_fn) {
+           return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
+        } else if (iommu_bus->iommu_ops &&
+                   iommu_bus->iommu_ops->get_address_space) {
+           return iommu_bus->iommu_ops->get_address_space(bus,
+                                           iommu_bus->iommu_opaque, devfn);
+        }
     }
     return &address_space_memory;
 }
@@ -2690,6 +2704,12 @@  void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
     bus->iommu_opaque = opaque;
 }
 
+void pci_setup_iommu_ops(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
+{
+    bus->iommu_ops = ops;
+    bus->iommu_opaque = opaque;
+}
+
 static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
 {
     Range *range = opaque;