Patchwork [2/7] iommu: add api to get iommu_domain of a device

login
register
mail settings
Submitter Bharat Bhushan
Date Sept. 19, 2013, 7:29 a.m.
Message ID <1379575763-2091-3-git-send-email-Bharat.Bhushan@freescale.com>
Download mbox | patch
Permalink /patch/275892/
State Not Applicable
Headers show

Comments

Bharat Bhushan - Sept. 19, 2013, 7:29 a.m.
This api return the iommu domain to which the device is attached.
The iommu_domain is required for making API calls related to iommu.
Follow up patches which use this API to know iommu maping.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 drivers/iommu/iommu.c |   10 ++++++++++
 include/linux/iommu.h |    7 +++++++
 2 files changed, 17 insertions(+), 0 deletions(-)
Alex Williamson - Sept. 25, 2013, 4:45 p.m.
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> This api return the iommu domain to which the device is attached.
> The iommu_domain is required for making API calls related to iommu.
> Follow up patches which use this API to know iommu maping.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>  drivers/iommu/iommu.c |   10 ++++++++++
>  include/linux/iommu.h |    7 +++++++
>  2 files changed, 17 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index fbe9ca7..6ac5f50 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_detach_device);
>  
> +struct iommu_domain *iommu_get_dev_domain(struct device *dev)
> +{
> +	struct iommu_ops *ops = dev->bus->iommu_ops;
> +
> +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> +		return NULL;
> +
> +	return ops->get_dev_iommu_domain(dev);
> +}
> +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);

What prevents this from racing iommu_domain_free()?  There's no
references acquired, so there's no reason for the caller to assume the
pointer is valid.

>  /*
>   * IOMMU groups are really the natrual working unit of the IOMMU, but
>   * the IOMMU API works on domains and devices.  Bridge that gap by
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 7ea319e..fa046bd 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -127,6 +127,7 @@ struct iommu_ops {
>  	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
>  	/* Get the numer of window per domain */
>  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> +	struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
>  
>  	unsigned long pgsize_bitmap;
>  };
> @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
>  				      phys_addr_t offset, u64 size,
>  				      int prot);
>  extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
> +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
>  /**
>   * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
>   * @domain: the iommu domain where the fault has happened
> @@ -284,6 +286,11 @@ static inline void iommu_domain_window_disable(struct iommu_domain *domain,
>  {
>  }
>  
> +static inline struct iommu_domain *iommu_get_dev_domain(struct device *dev)
> +{
> +	return NULL;
> +}
> +
>  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
>  {
>  	return 0;
Bharat Bhushan - Oct. 4, 2013, 9:54 a.m.
> -----Original Message-----
> From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-owner@vger.kernel.org]
> On Behalf Of Alex Williamson
> Sent: Wednesday, September 25, 2013 10:16 PM
> To: Bhushan Bharat-R65777
> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > This api return the iommu domain to which the device is attached.
> > The iommu_domain is required for making API calls related to iommu.
> > Follow up patches which use this API to know iommu maping.
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> >  drivers/iommu/iommu.c |   10 ++++++++++
> >  include/linux/iommu.h |    7 +++++++
> >  2 files changed, 17 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > fbe9ca7..6ac5f50 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > *domain, struct device *dev)  }
> > EXPORT_SYMBOL_GPL(iommu_detach_device);
> >
> > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > +
> > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > +		return NULL;
> > +
> > +	return ops->get_dev_iommu_domain(dev); }
> > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> 
> What prevents this from racing iommu_domain_free()?  There's no references
> acquired, so there's no reason for the caller to assume the pointer is valid.

Sorry for late query, somehow this email went into a folder and escaped;

Just to be sure, there is not lock at generic "struct iommu_domain", but IP specific structure (link FSL domain) linked in iommu_domain->priv have a lock, so we need to ensure this race in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?

Thanks
-Bharat

> 
> >  /*
> >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> >   * the IOMMU API works on domains and devices.  Bridge that gap by
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > 7ea319e..fa046bd 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -127,6 +127,7 @@ struct iommu_ops {
> >  	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
> >  	/* Get the numer of window per domain */
> >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
> >
> >  	unsigned long pgsize_bitmap;
> >  };
> > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct iommu_domain
> *domain, u32 wnd_nr,
> >  				      phys_addr_t offset, u64 size,
> >  				      int prot);
> >  extern void iommu_domain_window_disable(struct iommu_domain *domain,
> > u32 wnd_nr);
> > +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
> >  /**
> >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
> >   * @domain: the iommu domain where the fault has happened @@ -284,6
> > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > iommu_domain *domain,  {  }
> >
> > +static inline struct iommu_domain *iommu_get_dev_domain(struct device
> > +*dev) {
> > +	return NULL;
> > +}
> > +
> >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > *domain, dma_addr_t iova)  {
> >  	return 0;
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
Bharat Bhushan - Oct. 4, 2013, 10:42 a.m.
> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Friday, October 04, 2013 3:24 PM
> To: 'Alex Williamson'
> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> 
> 
> > -----Original Message-----
> > From: linux-pci-owner@vger.kernel.org
> > [mailto:linux-pci-owner@vger.kernel.org]
> > On Behalf Of Alex Williamson
> > Sent: Wednesday, September 25, 2013 10:16 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
> > Bhushan Bharat-R65777
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > device
> >
> > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > This api return the iommu domain to which the device is attached.
> > > The iommu_domain is required for making API calls related to iommu.
> > > Follow up patches which use this API to know iommu maping.
> > >
> > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > ---
> > >  drivers/iommu/iommu.c |   10 ++++++++++
> > >  include/linux/iommu.h |    7 +++++++
> > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > fbe9ca7..6ac5f50 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > *domain, struct device *dev)  }
> > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > >
> > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > +
> > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > +		return NULL;
> > > +
> > > +	return ops->get_dev_iommu_domain(dev); }
> > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> >
> > What prevents this from racing iommu_domain_free()?  There's no
> > references acquired, so there's no reason for the caller to assume the pointer
> is valid.
> 
> Sorry for late query, somehow this email went into a folder and escaped;
> 
> Just to be sure, there is not lock at generic "struct iommu_domain", but IP
> specific structure (link FSL domain) linked in iommu_domain->priv have a lock,
> so we need to ensure this race in FSL iommu code (say
> drivers/iommu/fsl_pamu_domain.c), right?

Further thinking of this, there are more problems here:
 - Like MSI subsystem will call iommu_get_dev_domain(), which will take a lock, find the domain pointer, release the lock, and return the domain
 - Now if domain in freed up
 - While MSI subsystem tries to do work on domain (like get_attribute/set_attribute etc) ???

So can we do like iommu_get_dev_domain() will return domain with the lock held, and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() must always be followed by iommu_get_dev_domain()

Thanks
-Bharat

> 
> Thanks
> -Bharat
> 
> >
> > >  /*
> > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > >   * the IOMMU API works on domains and devices.  Bridge that gap by
> > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > 7ea319e..fa046bd 100644
> > > --- a/include/linux/iommu.h
> > > +++ b/include/linux/iommu.h
> > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > >  	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
> > >  	/* Get the numer of window per domain */
> > >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
> > >
> > >  	unsigned long pgsize_bitmap;
> > >  };
> > > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
> > > iommu_domain
> > *domain, u32 wnd_nr,
> > >  				      phys_addr_t offset, u64 size,
> > >  				      int prot);
> > >  extern void iommu_domain_window_disable(struct iommu_domain
> > > *domain,
> > > u32 wnd_nr);
> > > +extern struct iommu_domain *iommu_get_dev_domain(struct device
> > > +*dev);
> > >  /**
> > >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU
> framework
> > >   * @domain: the iommu domain where the fault has happened @@ -284,6
> > > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > > iommu_domain *domain,  {  }
> > >
> > > +static inline struct iommu_domain *iommu_get_dev_domain(struct
> > > +device
> > > +*dev) {
> > > +	return NULL;
> > > +}
> > > +
> > >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > > *domain, dma_addr_t iova)  {
> > >  	return 0;
> >
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci"
> > in the body of a message to majordomo@vger.kernel.org More majordomo
> > info at http://vger.kernel.org/majordomo-info.html
Alex Williamson - Oct. 4, 2013, 3:45 p.m.
On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> 
> > -----Original Message-----
> > From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-owner@vger.kernel.org]
> > On Behalf Of Alex Williamson
> > Sent: Wednesday, September 25, 2013 10:16 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > foundation.org; Bhushan Bharat-R65777
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > This api return the iommu domain to which the device is attached.
> > > The iommu_domain is required for making API calls related to iommu.
> > > Follow up patches which use this API to know iommu maping.
> > >
> > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > ---
> > >  drivers/iommu/iommu.c |   10 ++++++++++
> > >  include/linux/iommu.h |    7 +++++++
> > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > fbe9ca7..6ac5f50 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > *domain, struct device *dev)  }
> > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > >
> > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > +
> > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > +		return NULL;
> > > +
> > > +	return ops->get_dev_iommu_domain(dev); }
> > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > 
> > What prevents this from racing iommu_domain_free()?  There's no references
> > acquired, so there's no reason for the caller to assume the pointer is valid.
> 
> Sorry for late query, somehow this email went into a folder and escaped;
> 
> Just to be sure, there is not lock at generic "struct iommu_domain", but IP specific structure (link FSL domain) linked in iommu_domain->priv have a lock, so we need to ensure this race in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?

No, it's not sufficient to make sure that your use of the interface is
race free.  The interface itself needs to be designed so that it's
difficult to use incorrectly.  That's not the case here.  This is a
backdoor to get the iommu domain from the iommu driver regardless of who
is using it or how.  The iommu domain is created and managed by vfio, so
shouldn't we be looking at how to do this through vfio?  It seems like
you'd want to use your device to get a vfio group reference, from which
you could do something with the vfio external user interface and get the
iommu domain reference.  Thanks,

Alex
 
> > >  /*
> > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > >   * the IOMMU API works on domains and devices.  Bridge that gap by
> > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > 7ea319e..fa046bd 100644
> > > --- a/include/linux/iommu.h
> > > +++ b/include/linux/iommu.h
> > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > >  	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
> > >  	/* Get the numer of window per domain */
> > >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
> > >
> > >  	unsigned long pgsize_bitmap;
> > >  };
> > > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct iommu_domain
> > *domain, u32 wnd_nr,
> > >  				      phys_addr_t offset, u64 size,
> > >  				      int prot);
> > >  extern void iommu_domain_window_disable(struct iommu_domain *domain,
> > > u32 wnd_nr);
> > > +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
> > >  /**
> > >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
> > >   * @domain: the iommu domain where the fault has happened @@ -284,6
> > > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > > iommu_domain *domain,  {  }
> > >
> > > +static inline struct iommu_domain *iommu_get_dev_domain(struct device
> > > +*dev) {
> > > +	return NULL;
> > > +}
> > > +
> > >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > > *domain, dma_addr_t iova)  {
> > >  	return 0;
> > 
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body
> > of a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html
>
Bharat Bhushan - Oct. 4, 2013, 4:47 p.m.
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, October 04, 2013 9:15 PM
> To: Bhushan Bharat-R65777
> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> >
> > > -----Original Message-----
> > > From: linux-pci-owner@vger.kernel.org
> > > [mailto:linux-pci-owner@vger.kernel.org]
> > > On Behalf Of Alex Williamson
> > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
> > > Bhushan Bharat-R65777
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > This api return the iommu domain to which the device is attached.
> > > > The iommu_domain is required for making API calls related to iommu.
> > > > Follow up patches which use this API to know iommu maping.
> > > >
> > > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > > ---
> > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > >  include/linux/iommu.h |    7 +++++++
> > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > >
> > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > > fbe9ca7..6ac5f50 100644
> > > > --- a/drivers/iommu/iommu.c
> > > > +++ b/drivers/iommu/iommu.c
> > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > > *domain, struct device *dev)  }
> > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > >
> > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > +
> > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > +		return NULL;
> > > > +
> > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > >
> > > What prevents this from racing iommu_domain_free()?  There's no
> > > references acquired, so there's no reason for the caller to assume the
> pointer is valid.
> >
> > Sorry for late query, somehow this email went into a folder and
> > escaped;
> >
> > Just to be sure, there is not lock at generic "struct iommu_domain", but IP
> specific structure (link FSL domain) linked in iommu_domain->priv have a lock,
> so we need to ensure this race in FSL iommu code (say
> drivers/iommu/fsl_pamu_domain.c), right?
> 
> No, it's not sufficient to make sure that your use of the interface is race
> free.  The interface itself needs to be designed so that it's difficult to use
> incorrectly.

So we can define iommu_get_dev_domain()/iommu_put_dev_domain();  iommu_get_dev_domain() will return domain with the lock held, and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() must always be followed by iommu_get_dev_domain().


> That's not the case here.  This is a backdoor to get the iommu
> domain from the iommu driver regardless of who is using it or how.  The iommu
> domain is created and managed by vfio, so shouldn't we be looking at how to do
> this through vfio?

Let me first describe what we are doing here:
During initialization:-
 - vfio talks to MSI system to know the MSI-page and size
 - vfio then interacts with iommu to map the MSI-page in iommu (IOVA is decided by userspace and physical address is the MSI-page)
 - So the IOVA subwindow mapping is created in iommu and yes VFIO know about this mapping.

Now do SET_IRQ(MSI/MSIX) ioctl:
 - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set MSI address/data in device.
 - So in current implementation (this patchset) msi-subsystem gets the IOVA from iommu via this defined interface.
 - Are you saying that rather than getting this from iommu, we should get this from vfio? What difference does this make?

Thanks
-Bharat

> It seems like you'd want to use your device to get a vfio
> group reference, from which you could do something with the vfio external user
> interface and get the iommu domain reference.  Thanks,
> 
> Alex
> 
> > > >  /*
> > > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > > >   * the IOMMU API works on domains and devices.  Bridge that gap
> > > > by diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > > > index 7ea319e..fa046bd 100644
> > > > --- a/include/linux/iommu.h
> > > > +++ b/include/linux/iommu.h
> > > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > > >  	int (*domain_set_windows)(struct iommu_domain *domain, u32
> w_count);
> > > >  	/* Get the numer of window per domain */
> > > >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device
> > > > +*dev);
> > > >
> > > >  	unsigned long pgsize_bitmap;
> > > >  };
> > > > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
> > > > iommu_domain
> > > *domain, u32 wnd_nr,
> > > >  				      phys_addr_t offset, u64 size,
> > > >  				      int prot);
> > > >  extern void iommu_domain_window_disable(struct iommu_domain
> > > > *domain,
> > > > u32 wnd_nr);
> > > > +extern struct iommu_domain *iommu_get_dev_domain(struct device
> > > > +*dev);
> > > >  /**
> > > >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU
> framework
> > > >   * @domain: the iommu domain where the fault has happened @@
> > > > -284,6
> > > > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > > > iommu_domain *domain,  {  }
> > > >
> > > > +static inline struct iommu_domain *iommu_get_dev_domain(struct
> > > > +device
> > > > +*dev) {
> > > > +	return NULL;
> > > > +}
> > > > +
> > > >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > > > *domain, dma_addr_t iova)  {
> > > >  	return 0;
> > >
> > >
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-pci"
> > > in the body of a message to majordomo@vger.kernel.org More majordomo
> > > info at http://vger.kernel.org/majordomo-info.html
> >
> 
> 
>
Alex Williamson - Oct. 4, 2013, 5:12 p.m.
On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, October 04, 2013 9:15 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > foundation.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > >
> > > > -----Original Message-----
> > > > From: linux-pci-owner@vger.kernel.org
> > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > On Behalf Of Alex Williamson
> > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
> > > > Bhushan Bharat-R65777
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > This api return the iommu domain to which the device is attached.
> > > > > The iommu_domain is required for making API calls related to iommu.
> > > > > Follow up patches which use this API to know iommu maping.
> > > > >
> > > > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > > > ---
> > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > >  include/linux/iommu.h |    7 +++++++
> > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > >
> > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > > > fbe9ca7..6ac5f50 100644
> > > > > --- a/drivers/iommu/iommu.c
> > > > > +++ b/drivers/iommu/iommu.c
> > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > > > *domain, struct device *dev)  }
> > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > >
> > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > +
> > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > > +		return NULL;
> > > > > +
> > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > >
> > > > What prevents this from racing iommu_domain_free()?  There's no
> > > > references acquired, so there's no reason for the caller to assume the
> > pointer is valid.
> > >
> > > Sorry for late query, somehow this email went into a folder and
> > > escaped;
> > >
> > > Just to be sure, there is not lock at generic "struct iommu_domain", but IP
> > specific structure (link FSL domain) linked in iommu_domain->priv have a lock,
> > so we need to ensure this race in FSL iommu code (say
> > drivers/iommu/fsl_pamu_domain.c), right?
> > 
> > No, it's not sufficient to make sure that your use of the interface is race
> > free.  The interface itself needs to be designed so that it's difficult to use
> > incorrectly.
> 
> So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> iommu_get_dev_domain() will return domain with the lock held, and
> iommu_put_dev_domain() will release the lock? And
> iommu_get_dev_domain() must always be followed by
> iommu_get_dev_domain().

What lock?  get/put are generally used for reference counting, not
locking in the kernel.

> > That's not the case here.  This is a backdoor to get the iommu
> > domain from the iommu driver regardless of who is using it or how.  The iommu
> > domain is created and managed by vfio, so shouldn't we be looking at how to do
> > this through vfio?
> 
> Let me first describe what we are doing here:
> During initialization:-
>  - vfio talks to MSI system to know the MSI-page and size
>  - vfio then interacts with iommu to map the MSI-page in iommu (IOVA is decided by userspace and physical address is the MSI-page)
>  - So the IOVA subwindow mapping is created in iommu and yes VFIO know about this mapping.
> 
> Now do SET_IRQ(MSI/MSIX) ioctl:
>  - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set MSI address/data in device.
>  - So in current implementation (this patchset) msi-subsystem gets the IOVA from iommu via this defined interface.
>  - Are you saying that rather than getting this from iommu, we should get this from vfio? What difference does this make?

Yes, you just said above that vfio knows the msi to iova mapping, so why
go outside of vfio to find it later?  The difference is one case you can
have a proper reference to data structures to make sure the pointer you
get back actually has meaning at the time you're using it vs the code
here where you're defining an API that returns a meaningless value
because you can't check or enforce that an arbitrary caller is using it
correctly.  It's not maintainable.  Thanks,

Alex

> > It seems like you'd want to use your device to get a vfio
> > group reference, from which you could do something with the vfio external user
> > interface and get the iommu domain reference.  Thanks,
> > 
> > Alex
> > 
> > > > >  /*
> > > > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > > > >   * the IOMMU API works on domains and devices.  Bridge that gap
> > > > > by diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > > > > index 7ea319e..fa046bd 100644
> > > > > --- a/include/linux/iommu.h
> > > > > +++ b/include/linux/iommu.h
> > > > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > > > >  	int (*domain_set_windows)(struct iommu_domain *domain, u32
> > w_count);
> > > > >  	/* Get the numer of window per domain */
> > > > >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > > > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device
> > > > > +*dev);
> > > > >
> > > > >  	unsigned long pgsize_bitmap;
> > > > >  };
> > > > > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
> > > > > iommu_domain
> > > > *domain, u32 wnd_nr,
> > > > >  				      phys_addr_t offset, u64 size,
> > > > >  				      int prot);
> > > > >  extern void iommu_domain_window_disable(struct iommu_domain
> > > > > *domain,
> > > > > u32 wnd_nr);
> > > > > +extern struct iommu_domain *iommu_get_dev_domain(struct device
> > > > > +*dev);
> > > > >  /**
> > > > >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU
> > framework
> > > > >   * @domain: the iommu domain where the fault has happened @@
> > > > > -284,6
> > > > > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > > > > iommu_domain *domain,  {  }
> > > > >
> > > > > +static inline struct iommu_domain *iommu_get_dev_domain(struct
> > > > > +device
> > > > > +*dev) {
> > > > > +	return NULL;
> > > > > +}
> > > > > +
> > > > >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > > > > *domain, dma_addr_t iova)  {
> > > > >  	return 0;
> > > >
> > > >
> > > >
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-pci"
> > > > in the body of a message to majordomo@vger.kernel.org More majordomo
> > > > info at http://vger.kernel.org/majordomo-info.html
> > >
> > 
> > 
> > 
>
Bharat Bhushan - Oct. 4, 2013, 5:23 p.m.
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, October 04, 2013 10:43 PM
> To: Bhushan Bharat-R65777
> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> >
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Friday, October 04, 2013 9:15 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: linux-pci-owner@vger.kernel.org
> > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > On Behalf Of Alex Williamson
> > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org; Bhushan Bharat-R65777
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > This api return the iommu domain to which the device is attached.
> > > > > > The iommu_domain is required for making API calls related to iommu.
> > > > > > Follow up patches which use this API to know iommu maping.
> > > > > >
> > > > > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > > > > ---
> > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > index
> > > > > > fbe9ca7..6ac5f50 100644
> > > > > > --- a/drivers/iommu/iommu.c
> > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > >
> > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > +
> > > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > > > +		return NULL;
> > > > > > +
> > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > >
> > > > > What prevents this from racing iommu_domain_free()?  There's no
> > > > > references acquired, so there's no reason for the caller to
> > > > > assume the
> > > pointer is valid.
> > > >
> > > > Sorry for late query, somehow this email went into a folder and
> > > > escaped;
> > > >
> > > > Just to be sure, there is not lock at generic "struct
> > > > iommu_domain", but IP
> > > specific structure (link FSL domain) linked in iommu_domain->priv
> > > have a lock, so we need to ensure this race in FSL iommu code (say
> > > drivers/iommu/fsl_pamu_domain.c), right?
> > >
> > > No, it's not sufficient to make sure that your use of the interface
> > > is race free.  The interface itself needs to be designed so that
> > > it's difficult to use incorrectly.
> >
> > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > iommu_get_dev_domain() will return domain with the lock held, and
> > iommu_put_dev_domain() will release the lock? And
> > iommu_get_dev_domain() must always be followed by
> > iommu_get_dev_domain().
> 
> What lock?  get/put are generally used for reference counting, not locking in
> the kernel.
> 
> > > That's not the case here.  This is a backdoor to get the iommu
> > > domain from the iommu driver regardless of who is using it or how.
> > > The iommu domain is created and managed by vfio, so shouldn't we be
> > > looking at how to do this through vfio?
> >
> > Let me first describe what we are doing here:
> > During initialization:-
> >  - vfio talks to MSI system to know the MSI-page and size
> >  - vfio then interacts with iommu to map the MSI-page in iommu (IOVA
> > is decided by userspace and physical address is the MSI-page)
> >  - So the IOVA subwindow mapping is created in iommu and yes VFIO know about
> this mapping.
> >
> > Now do SET_IRQ(MSI/MSIX) ioctl:
> >  - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set
> MSI address/data in device.
> >  - So in current implementation (this patchset) msi-subsystem gets the IOVA
> from iommu via this defined interface.
> >  - Are you saying that rather than getting this from iommu, we should get this
> from vfio? What difference does this make?
> 
> Yes, you just said above that vfio knows the msi to iova mapping, so why go
> outside of vfio to find it later?  The difference is one case you can have a
> proper reference to data structures to make sure the pointer you get back
> actually has meaning at the time you're using it vs the code here where you're
> defining an API that returns a meaningless value

With FSL-PAMU we will always get consistant data from iommu or vfio-data structure.

> because you can't check or
> enforce that an arbitrary caller is using it correctly.

I am not sure what is arbitrary caller? pdev is known to vfio, so vfio will only make pci_enable_msix()/pci_enable_msi_block() for this pdev. If anyother code makes then it is some other unexpectedly thing happening in system, no?


>  It's not maintainable.
> Thanks,

I do not have any issue with this as well, can you also describe the type of API you are envisioning;
I can think of defining some function in vfio.c/vfio_iommu*.c, make them global and declare then in include/Linux/vfio.h
And include <Linux/vfio.h> in caller file (arch/powerpc/kernel/msi.c)

Thanks
-Bharat
> 
> Alex
> 
> > > It seems like you'd want to use your device to get a vfio group
> > > reference, from which you could do something with the vfio external
> > > user interface and get the iommu domain reference.  Thanks,
> > >
> > > Alex
> > >
> > > > > >  /*
> > > > > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > > > > >   * the IOMMU API works on domains and devices.  Bridge that
> > > > > > gap by diff --git a/include/linux/iommu.h
> > > > > > b/include/linux/iommu.h index 7ea319e..fa046bd 100644
> > > > > > --- a/include/linux/iommu.h
> > > > > > +++ b/include/linux/iommu.h
> > > > > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > > > > >  	int (*domain_set_windows)(struct iommu_domain *domain, u32
> > > w_count);
> > > > > >  	/* Get the numer of window per domain */
> > > > > >  	u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > > > > +	struct iommu_domain *(*get_dev_iommu_domain)(struct device
> > > > > > +*dev);
> > > > > >
> > > > > >  	unsigned long pgsize_bitmap;  }; @@ -190,6 +191,7 @@ extern
> > > > > > int iommu_domain_window_enable(struct iommu_domain
> > > > > *domain, u32 wnd_nr,
> > > > > >  				      phys_addr_t offset, u64 size,
> > > > > >  				      int prot);
> > > > > >  extern void iommu_domain_window_disable(struct iommu_domain
> > > > > > *domain,
> > > > > > u32 wnd_nr);
> > > > > > +extern struct iommu_domain *iommu_get_dev_domain(struct
> > > > > > +device *dev);
> > > > > >  /**
> > > > > >   * report_iommu_fault() - report about an IOMMU fault to the
> > > > > > IOMMU
> > > framework
> > > > > >   * @domain: the iommu domain where the fault has happened @@
> > > > > > -284,6
> > > > > > +286,11 @@ static inline void
> > > > > > +iommu_domain_window_disable(struct
> > > > > > iommu_domain *domain,  {  }
> > > > > >
> > > > > > +static inline struct iommu_domain
> > > > > > +*iommu_get_dev_domain(struct device
> > > > > > +*dev) {
> > > > > > +	return NULL;
> > > > > > +}
> > > > > > +
> > > > > >  static inline phys_addr_t iommu_iova_to_phys(struct
> > > > > > iommu_domain *domain, dma_addr_t iova)  {
> > > > > >  	return 0;
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-pci"
> > > > > in the body of a message to majordomo@vger.kernel.org More
> > > > > majordomo info at http://vger.kernel.org/majordomo-info.html
> > > >
> > >
> > >
> > >
> >
> 
> 
>
Alex Williamson - Oct. 4, 2013, 6:12 p.m.
On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, October 04, 2013 10:43 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > foundation.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > On Behalf Of Alex Williamson
> > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > To: Bhushan Bharat-R65777
> > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > > device
> > > > > >
> > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > This api return the iommu domain to which the device is attached.
> > > > > > > The iommu_domain is required for making API calls related to iommu.
> > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > >
> > > > > > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > > > > > > ---
> > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > index
> > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > >
> > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > +
> > > > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > > > > +		return NULL;
> > > > > > > +
> > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > >
> > > > > > What prevents this from racing iommu_domain_free()?  There's no
> > > > > > references acquired, so there's no reason for the caller to
> > > > > > assume the
> > > > pointer is valid.
> > > > >
> > > > > Sorry for late query, somehow this email went into a folder and
> > > > > escaped;
> > > > >
> > > > > Just to be sure, there is not lock at generic "struct
> > > > > iommu_domain", but IP
> > > > specific structure (link FSL domain) linked in iommu_domain->priv
> > > > have a lock, so we need to ensure this race in FSL iommu code (say
> > > > drivers/iommu/fsl_pamu_domain.c), right?
> > > >
> > > > No, it's not sufficient to make sure that your use of the interface
> > > > is race free.  The interface itself needs to be designed so that
> > > > it's difficult to use incorrectly.
> > >
> > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > > iommu_get_dev_domain() will return domain with the lock held, and
> > > iommu_put_dev_domain() will release the lock? And
> > > iommu_get_dev_domain() must always be followed by
> > > iommu_get_dev_domain().
> > 
> > What lock?  get/put are generally used for reference counting, not locking in
> > the kernel.
> > 
> > > > That's not the case here.  This is a backdoor to get the iommu
> > > > domain from the iommu driver regardless of who is using it or how.
> > > > The iommu domain is created and managed by vfio, so shouldn't we be
> > > > looking at how to do this through vfio?
> > >
> > > Let me first describe what we are doing here:
> > > During initialization:-
> > >  - vfio talks to MSI system to know the MSI-page and size
> > >  - vfio then interacts with iommu to map the MSI-page in iommu (IOVA
> > > is decided by userspace and physical address is the MSI-page)
> > >  - So the IOVA subwindow mapping is created in iommu and yes VFIO know about
> > this mapping.
> > >
> > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set
> > MSI address/data in device.
> > >  - So in current implementation (this patchset) msi-subsystem gets the IOVA
> > from iommu via this defined interface.
> > >  - Are you saying that rather than getting this from iommu, we should get this
> > from vfio? What difference does this make?
> > 
> > Yes, you just said above that vfio knows the msi to iova mapping, so why go
> > outside of vfio to find it later?  The difference is one case you can have a
> > proper reference to data structures to make sure the pointer you get back
> > actually has meaning at the time you're using it vs the code here where you're
> > defining an API that returns a meaningless value
> 
> With FSL-PAMU we will always get consistant data from iommu or vfio-data structure.

Great, but you're trying to add a generic API to the IOMMU subsystem
that's difficult to use correctly.  The fact that you use it correctly
does not justify the API.

> > because you can't check or
> > enforce that an arbitrary caller is using it correctly.
> 
> I am not sure what is arbitrary caller? pdev is known to vfio, so vfio
> will only make pci_enable_msix()/pci_enable_msi_block() for this pdev.
> If anyother code makes then it is some other unexpectedly thing
> happening in system, no?

What's proposed here is a generic IOMMU API.  Anybody can call this.
What if the host SCSI driver decides to go get the iommu domain for it's
device (or any other device)?  Does that fit your usage model?

> >  It's not maintainable.
> > Thanks,
> 
> I do not have any issue with this as well, can you also describe the type of API you are envisioning;
> I can think of defining some function in vfio.c/vfio_iommu*.c, make them global and declare then in include/Linux/vfio.h
> And include <Linux/vfio.h> in caller file (arch/powerpc/kernel/msi.c)

Do you really want module dependencies between vfio and your core kernel
MSI setup?  Look at the vfio external user interface that we've already
defined.  That allows other components of the kernel to get a proper
reference to a vfio group.  From there you can work out how to get what
you want.  Another alternative is that vfio could register an MSI to
IOVA mapping with architecture code when the mapping is created.  The
MSI setup path could then do a lookup in architecture code for the
mapping.  You could even store the MSI to IOVA mapping in VFIO and
create an interface where SET_IRQ passes that mapping into setup code.
Thanks,

Alex
Bharat Bhushan - Oct. 7, 2013, 5:46 a.m.
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, October 04, 2013 11:42 PM
> To: Bhushan Bharat-R65777
> Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> >
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Friday, October 04, 2013 10:43 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > On Behalf Of Alex Williamson
> > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > To: Bhushan Bharat-R65777
> > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > of a device
> > > > > > >
> > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > This api return the iommu domain to which the device is attached.
> > > > > > > > The iommu_domain is required for making API calls related to
> iommu.
> > > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > > >
> > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > ---
> > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > > index
> > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > >
> > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > +
> > > > > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain ==
> NULL))
> > > > > > > > +		return NULL;
> > > > > > > > +
> > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > >
> > > > > > > What prevents this from racing iommu_domain_free()?  There's
> > > > > > > no references acquired, so there's no reason for the caller
> > > > > > > to assume the
> > > > > pointer is valid.
> > > > > >
> > > > > > Sorry for late query, somehow this email went into a folder
> > > > > > and escaped;
> > > > > >
> > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > iommu_domain", but IP
> > > > > specific structure (link FSL domain) linked in
> > > > > iommu_domain->priv have a lock, so we need to ensure this race
> > > > > in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?
> > > > >
> > > > > No, it's not sufficient to make sure that your use of the
> > > > > interface is race free.  The interface itself needs to be
> > > > > designed so that it's difficult to use incorrectly.
> > > >
> > > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > iommu_get_dev_domain() will return domain with the lock held, and
> > > > iommu_put_dev_domain() will release the lock? And
> > > > iommu_get_dev_domain() must always be followed by
> > > > iommu_get_dev_domain().
> > >
> > > What lock?  get/put are generally used for reference counting, not
> > > locking in the kernel.
> > >
> > > > > That's not the case here.  This is a backdoor to get the iommu
> > > > > domain from the iommu driver regardless of who is using it or how.
> > > > > The iommu domain is created and managed by vfio, so shouldn't we
> > > > > be looking at how to do this through vfio?
> > > >
> > > > Let me first describe what we are doing here:
> > > > During initialization:-
> > > >  - vfio talks to MSI system to know the MSI-page and size
> > > >  - vfio then interacts with iommu to map the MSI-page in iommu
> > > > (IOVA is decided by userspace and physical address is the
> > > > MSI-page)
> > > >  - So the IOVA subwindow mapping is created in iommu and yes VFIO
> > > > know about
> > > this mapping.
> > > >
> > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is
> > > > supposed to set
> > > MSI address/data in device.
> > > >  - So in current implementation (this patchset) msi-subsystem gets
> > > > the IOVA
> > > from iommu via this defined interface.
> > > >  - Are you saying that rather than getting this from iommu, we
> > > > should get this
> > > from vfio? What difference does this make?
> > >
> > > Yes, you just said above that vfio knows the msi to iova mapping, so
> > > why go outside of vfio to find it later?  The difference is one case
> > > you can have a proper reference to data structures to make sure the
> > > pointer you get back actually has meaning at the time you're using
> > > it vs the code here where you're defining an API that returns a
> > > meaningless value
> >
> > With FSL-PAMU we will always get consistant data from iommu or vfio-data
> structure.
> 
> Great, but you're trying to add a generic API to the IOMMU subsystem that's
> difficult to use correctly.  The fact that you use it correctly does not justify
> the API.
> 
> > > because you can't check or
> > > enforce that an arbitrary caller is using it correctly.
> >
> > I am not sure what is arbitrary caller? pdev is known to vfio, so vfio
> > will only make pci_enable_msix()/pci_enable_msi_block() for this pdev.
> > If anyother code makes then it is some other unexpectedly thing
> > happening in system, no?
> 
> What's proposed here is a generic IOMMU API.  Anybody can call this.
> What if the host SCSI driver decides to go get the iommu domain for it's device
> (or any other device)?  Does that fit your usage model?
> 
> > >  It's not maintainable.
> > > Thanks,
> >
> > I do not have any issue with this as well, can you also describe the
> > type of API you are envisioning; I can think of defining some function
> > in vfio.c/vfio_iommu*.c, make them global and declare then in
> > include/Linux/vfio.h And include <Linux/vfio.h> in caller file
> > (arch/powerpc/kernel/msi.c)
> 
> Do you really want module dependencies between vfio and your core kernel MSI
> setup?  Look at the vfio external user interface that we've already defined.
> That allows other components of the kernel to get a proper reference to a vfio
> group.  From there you can work out how to get what you want.  Another
> alternative is that vfio could register an MSI to IOVA mapping with architecture
> code when the mapping is created.  The MSI setup path could then do a lookup in
> architecture code for the mapping.  You could even store the MSI to IOVA mapping
> in VFIO and create an interface where SET_IRQ passes that mapping into setup
> code.

Ok, What I want is to get IOVA associated with a physical address (physical address of MSI-bank).
And currently I do not see a way to know IOVA of a physical address and doing all this domain get and then search through all of iommu-windows of that domain.

What if we add an iommu-API which can return the IOVA mapping of a physical address. Current use case is setting up MSI's for aperture type of IOMMU also getting a phys_to_iova() mapping is independent of VFIO, your thought?

Thanks
-Bharat

> Thanks,
> 
> Alex
>
Alex Williamson - Oct. 8, 2013, 3:13 a.m.
On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, October 04, 2013 11:42 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > foundation.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > To: Bhushan Bharat-R65777
> > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > foundation.org
> > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > > device
> > > > > >
> > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > > of a device
> > > > > > > >
> > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > > This api return the iommu domain to which the device is attached.
> > > > > > > > > The iommu_domain is required for making API calls related to
> > iommu.
> > > > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > > > index
> > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > >
> > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > +
> > > > > > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain ==
> > NULL))
> > > > > > > > > +		return NULL;
> > > > > > > > > +
> > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > >
> > > > > > > > What prevents this from racing iommu_domain_free()?  There's
> > > > > > > > no references acquired, so there's no reason for the caller
> > > > > > > > to assume the
> > > > > > pointer is valid.
> > > > > > >
> > > > > > > Sorry for late query, somehow this email went into a folder
> > > > > > > and escaped;
> > > > > > >
> > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > iommu_domain", but IP
> > > > > > specific structure (link FSL domain) linked in
> > > > > > iommu_domain->priv have a lock, so we need to ensure this race
> > > > > > in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?
> > > > > >
> > > > > > No, it's not sufficient to make sure that your use of the
> > > > > > interface is race free.  The interface itself needs to be
> > > > > > designed so that it's difficult to use incorrectly.
> > > > >
> > > > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > iommu_get_dev_domain() will return domain with the lock held, and
> > > > > iommu_put_dev_domain() will release the lock? And
> > > > > iommu_get_dev_domain() must always be followed by
> > > > > iommu_get_dev_domain().
> > > >
> > > > What lock?  get/put are generally used for reference counting, not
> > > > locking in the kernel.
> > > >
> > > > > > That's not the case here.  This is a backdoor to get the iommu
> > > > > > domain from the iommu driver regardless of who is using it or how.
> > > > > > The iommu domain is created and managed by vfio, so shouldn't we
> > > > > > be looking at how to do this through vfio?
> > > > >
> > > > > Let me first describe what we are doing here:
> > > > > During initialization:-
> > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > >  - vfio then interacts with iommu to map the MSI-page in iommu
> > > > > (IOVA is decided by userspace and physical address is the
> > > > > MSI-page)
> > > > >  - So the IOVA subwindow mapping is created in iommu and yes VFIO
> > > > > know about
> > > > this mapping.
> > > > >
> > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is
> > > > > supposed to set
> > > > MSI address/data in device.
> > > > >  - So in current implementation (this patchset) msi-subsystem gets
> > > > > the IOVA
> > > > from iommu via this defined interface.
> > > > >  - Are you saying that rather than getting this from iommu, we
> > > > > should get this
> > > > from vfio? What difference does this make?
> > > >
> > > > Yes, you just said above that vfio knows the msi to iova mapping, so
> > > > why go outside of vfio to find it later?  The difference is one case
> > > > you can have a proper reference to data structures to make sure the
> > > > pointer you get back actually has meaning at the time you're using
> > > > it vs the code here where you're defining an API that returns a
> > > > meaningless value
> > >
> > > With FSL-PAMU we will always get consistant data from iommu or vfio-data
> > structure.
> > 
> > Great, but you're trying to add a generic API to the IOMMU subsystem that's
> > difficult to use correctly.  The fact that you use it correctly does not justify
> > the API.
> > 
> > > > because you can't check or
> > > > enforce that an arbitrary caller is using it correctly.
> > >
> > > I am not sure what is arbitrary caller? pdev is known to vfio, so vfio
> > > will only make pci_enable_msix()/pci_enable_msi_block() for this pdev.
> > > If anyother code makes then it is some other unexpectedly thing
> > > happening in system, no?
> > 
> > What's proposed here is a generic IOMMU API.  Anybody can call this.
> > What if the host SCSI driver decides to go get the iommu domain for it's device
> > (or any other device)?  Does that fit your usage model?
> > 
> > > >  It's not maintainable.
> > > > Thanks,
> > >
> > > I do not have any issue with this as well, can you also describe the
> > > type of API you are envisioning; I can think of defining some function
> > > in vfio.c/vfio_iommu*.c, make them global and declare then in
> > > include/Linux/vfio.h And include <Linux/vfio.h> in caller file
> > > (arch/powerpc/kernel/msi.c)
> > 
> > Do you really want module dependencies between vfio and your core kernel MSI
> > setup?  Look at the vfio external user interface that we've already defined.
> > That allows other components of the kernel to get a proper reference to a vfio
> > group.  From there you can work out how to get what you want.  Another
> > alternative is that vfio could register an MSI to IOVA mapping with architecture
> > code when the mapping is created.  The MSI setup path could then do a lookup in
> > architecture code for the mapping.  You could even store the MSI to IOVA mapping
> > in VFIO and create an interface where SET_IRQ passes that mapping into setup
> > code.
> 
> Ok, What I want is to get IOVA associated with a physical address
> (physical address of MSI-bank).
> And currently I do not see a way to know IOVA of a physical address
> and doing all this domain get and then search through all of
> iommu-windows of that domain.
> 
> What if we add an iommu-API which can return the IOVA mapping of a
> physical address. Current use case is setting up MSI's for aperture
> type of IOMMU also getting a phys_to_iova() mapping is independent of
> VFIO, your thought?

A physical address can be mapped to multiple IOVAs, so the interface
seems flawed by design.  It also has the same problem as above, it's a
backdoor that can be called asynchronous to the owner of the domain, so
what reason is there to believe the result?  It just replaces an
iommu_domain pointer with an IOVA.  VFIO knows this mapping, so why are
we trying to go behind its back and ask the IOMMU?  Thanks,

Alex
Bharat Bhushan - Oct. 8, 2013, 3:42 a.m.
> > > Do you really want module dependencies between vfio and your core
> > > kernel MSI setup?  Look at the vfio external user interface that we've
> already defined.
> > > That allows other components of the kernel to get a proper reference
> > > to a vfio group.  From there you can work out how to get what you
> > > want.  Another alternative is that vfio could register an MSI to
> > > IOVA mapping with architecture code when the mapping is created.
> > > The MSI setup path could then do a lookup in architecture code for
> > > the mapping.  You could even store the MSI to IOVA mapping in VFIO
> > > and create an interface where SET_IRQ passes that mapping into setup code.
> >
> > Ok, What I want is to get IOVA associated with a physical address
> > (physical address of MSI-bank).
> > And currently I do not see a way to know IOVA of a physical address
> > and doing all this domain get and then search through all of
> > iommu-windows of that domain.
> >
> > What if we add an iommu-API which can return the IOVA mapping of a
> > physical address. Current use case is setting up MSI's for aperture
> > type of IOMMU also getting a phys_to_iova() mapping is independent of
> > VFIO, your thought?
> 
> A physical address can be mapped to multiple IOVAs, so the interface seems
> flawed by design.  It also has the same problem as above, it's a backdoor that
> can be called asynchronous to the owner of the domain, so what reason is there
> to believe the result?  It just replaces an iommu_domain pointer with an IOVA.
> VFIO knows this mapping, so why are we trying to go behind its back and ask the
> IOMMU?
IOMMU is the final place where mapping is created, so may be today it is calling on behalf of VFIO, tomorrow it can be for normal Linux or some other interface. But I am fine to directly talk to vfio and will not try to solve a problem which does not exists today.

MSI subsystem knows pdev (pci device) and physical address, then what interface it will use to get the IOVA from VFIO?

Thanks
-Bharat

>  Thanks,
> 
> Alex
>
Sethi Varun-B16395 - Oct. 10, 2013, 8:09 p.m.
> -----Original Message-----
> From: iommu-bounces@lists.linux-foundation.org [mailto:iommu-
> bounces@lists.linux-foundation.org] On Behalf Of Alex Williamson
> Sent: Tuesday, October 08, 2013 8:43 AM
> To: Bhushan Bharat-R65777
> Cc: agraf@suse.de; Wood Scott-B07421; linux-pci@vger.kernel.org;
> galak@kernel.crashing.org; linux-kernel@vger.kernel.org;
> iommu@lists.linux-foundation.org; benh@kernel.crashing.org; linuxppc-
> dev@lists.ozlabs.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> >
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Friday, October 04, 2013 11:42 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > > To: Bhushan Bharat-R65777
> > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > foundation.org
> > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > of a device
> > > > > > >
> > > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777
> wrote:
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org; Bhushan
> > > > > > > > > Bharat-R65777
> > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > iommu_domain of a device
> > > > > > > > >
> > > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > > > This api return the iommu domain to which the device is
> attached.
> > > > > > > > > > The iommu_domain is required for making API calls
> > > > > > > > > > related to
> > > iommu.
> > > > > > > > > > Follow up patches which use this API to know iommu
> maping.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/iommu/iommu.c
> > > > > > > > > > b/drivers/iommu/iommu.c index
> > > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > > >
> > > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct
> device *dev) {
> > > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > > +
> > > > > > > > > > +	if (unlikely(ops == NULL ||
> > > > > > > > > > +ops->get_dev_iommu_domain ==
> > > NULL))
> > > > > > > > > > +		return NULL;
> > > > > > > > > > +
> > > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > > >
> > > > > > > > > What prevents this from racing iommu_domain_free()?
> > > > > > > > > There's no references acquired, so there's no reason for
> > > > > > > > > the caller to assume the
> > > > > > > pointer is valid.
> > > > > > > >
> > > > > > > > Sorry for late query, somehow this email went into a
> > > > > > > > folder and escaped;
> > > > > > > >
> > > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > > iommu_domain", but IP
> > > > > > > specific structure (link FSL domain) linked in
> > > > > > > iommu_domain->priv have a lock, so we need to ensure this
> > > > > > > race in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c),
> right?
> > > > > > >
> > > > > > > No, it's not sufficient to make sure that your use of the
> > > > > > > interface is race free.  The interface itself needs to be
> > > > > > > designed so that it's difficult to use incorrectly.
> > > > > >
> > > > > > So we can define
> > > > > > iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > > iommu_get_dev_domain() will return domain with the lock held,
> > > > > > and
> > > > > > iommu_put_dev_domain() will release the lock? And
> > > > > > iommu_get_dev_domain() must always be followed by
> > > > > > iommu_get_dev_domain().
> > > > >
> > > > > What lock?  get/put are generally used for reference counting,
> > > > > not locking in the kernel.
> > > > >
> > > > > > > That's not the case here.  This is a backdoor to get the
> > > > > > > iommu domain from the iommu driver regardless of who is using
> it or how.
> > > > > > > The iommu domain is created and managed by vfio, so
> > > > > > > shouldn't we be looking at how to do this through vfio?
> > > > > >
> > > > > > Let me first describe what we are doing here:
> > > > > > During initialization:-
> > > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > > >  - vfio then interacts with iommu to map the MSI-page in iommu
> > > > > > (IOVA is decided by userspace and physical address is the
> > > > > > MSI-page)
> > > > > >  - So the IOVA subwindow mapping is created in iommu and yes
> > > > > > VFIO know about
> > > > > this mapping.
> > > > > >
> > > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is
> > > > > > supposed to set
> > > > > MSI address/data in device.
> > > > > >  - So in current implementation (this patchset) msi-subsystem
> > > > > > gets the IOVA
> > > > > from iommu via this defined interface.
> > > > > >  - Are you saying that rather than getting this from iommu, we
> > > > > > should get this
> > > > > from vfio? What difference does this make?
> > > > >
> > > > > Yes, you just said above that vfio knows the msi to iova
> > > > > mapping, so why go outside of vfio to find it later?  The
> > > > > difference is one case you can have a proper reference to data
> > > > > structures to make sure the pointer you get back actually has
> > > > > meaning at the time you're using it vs the code here where
> > > > > you're defining an API that returns a meaningless value
> > > >
> > > > With FSL-PAMU we will always get consistant data from iommu or
> > > > vfio-data
> > > structure.
> > >
> > > Great, but you're trying to add a generic API to the IOMMU subsystem
> > > that's difficult to use correctly.  The fact that you use it
> > > correctly does not justify the API.
> > >
> > > > > because you can't check or
> > > > > enforce that an arbitrary caller is using it correctly.
> > > >
> > > > I am not sure what is arbitrary caller? pdev is known to vfio, so
> > > > vfio will only make pci_enable_msix()/pci_enable_msi_block() for
> this pdev.
> > > > If anyother code makes then it is some other unexpectedly thing
> > > > happening in system, no?
> > >
> > > What's proposed here is a generic IOMMU API.  Anybody can call this.
> > > What if the host SCSI driver decides to go get the iommu domain for
> > > it's device (or any other device)?  Does that fit your usage model?
> > >
> > > > >  It's not maintainable.
> > > > > Thanks,
> > > >
> > > > I do not have any issue with this as well, can you also describe
> > > > the type of API you are envisioning; I can think of defining some
> > > > function in vfio.c/vfio_iommu*.c, make them global and declare
> > > > then in include/Linux/vfio.h And include <Linux/vfio.h> in caller
> > > > file
> > > > (arch/powerpc/kernel/msi.c)
> > >
> > > Do you really want module dependencies between vfio and your core
> > > kernel MSI setup?  Look at the vfio external user interface that
> we've already defined.
> > > That allows other components of the kernel to get a proper reference
> > > to a vfio group.  From there you can work out how to get what you
> > > want.  Another alternative is that vfio could register an MSI to
> > > IOVA mapping with architecture code when the mapping is created.
> > > The MSI setup path could then do a lookup in architecture code for
> > > the mapping.  You could even store the MSI to IOVA mapping in VFIO
> > > and create an interface where SET_IRQ passes that mapping into setup
> code.
[Sethi Varun-B16395] What you are suggesting is that the MSI setup path queries the vfio subsystem for the mapping, rather than directly querying the iommu subsystem. So, say if we add an interface for getting MSI to IOVA mapping in the msi setup path, wouldn't this again be specific to vfio? I reckon that this interface would again ppc machine specific interface.

-Varun
Alex Williamson - Oct. 10, 2013, 8:41 p.m.
On Thu, 2013-10-10 at 20:09 +0000, Sethi Varun-B16395 wrote:
> 
> > -----Original Message-----
> > From: iommu-bounces@lists.linux-foundation.org [mailto:iommu-
> > bounces@lists.linux-foundation.org] On Behalf Of Alex Williamson
> > Sent: Tuesday, October 08, 2013 8:43 AM
> > To: Bhushan Bharat-R65777
> > Cc: agraf@suse.de; Wood Scott-B07421; linux-pci@vger.kernel.org;
> > galak@kernel.crashing.org; linux-kernel@vger.kernel.org;
> > iommu@lists.linux-foundation.org; benh@kernel.crashing.org; linuxppc-
> > dev@lists.ozlabs.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > Sent: Friday, October 04, 2013 11:42 PM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > > > To: Bhushan Bharat-R65777
> > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > foundation.org
> > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > > device
> > > > > >
> > > > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > > foundation.org
> > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > > of a device
> > > > > > > >
> > > > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777
> > wrote:
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org; Bhushan
> > > > > > > > > > Bharat-R65777
> > > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > > iommu_domain of a device
> > > > > > > > > >
> > > > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > > > > This api return the iommu domain to which the device is
> > attached.
> > > > > > > > > > > The iommu_domain is required for making API calls
> > > > > > > > > > > related to
> > > > iommu.
> > > > > > > > > > > Follow up patches which use this API to know iommu
> > maping.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/iommu/iommu.c
> > > > > > > > > > > b/drivers/iommu/iommu.c index
> > > > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > > > >
> > > > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct
> > device *dev) {
> > > > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > > > +
> > > > > > > > > > > +	if (unlikely(ops == NULL ||
> > > > > > > > > > > +ops->get_dev_iommu_domain ==
> > > > NULL))
> > > > > > > > > > > +		return NULL;
> > > > > > > > > > > +
> > > > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > > > >
> > > > > > > > > > What prevents this from racing iommu_domain_free()?
> > > > > > > > > > There's no references acquired, so there's no reason for
> > > > > > > > > > the caller to assume the
> > > > > > > > pointer is valid.
> > > > > > > > >
> > > > > > > > > Sorry for late query, somehow this email went into a
> > > > > > > > > folder and escaped;
> > > > > > > > >
> > > > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > > > iommu_domain", but IP
> > > > > > > > specific structure (link FSL domain) linked in
> > > > > > > > iommu_domain->priv have a lock, so we need to ensure this
> > > > > > > > race in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c),
> > right?
> > > > > > > >
> > > > > > > > No, it's not sufficient to make sure that your use of the
> > > > > > > > interface is race free.  The interface itself needs to be
> > > > > > > > designed so that it's difficult to use incorrectly.
> > > > > > >
> > > > > > > So we can define
> > > > > > > iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > > > iommu_get_dev_domain() will return domain with the lock held,
> > > > > > > and
> > > > > > > iommu_put_dev_domain() will release the lock? And
> > > > > > > iommu_get_dev_domain() must always be followed by
> > > > > > > iommu_get_dev_domain().
> > > > > >
> > > > > > What lock?  get/put are generally used for reference counting,
> > > > > > not locking in the kernel.
> > > > > >
> > > > > > > > That's not the case here.  This is a backdoor to get the
> > > > > > > > iommu domain from the iommu driver regardless of who is using
> > it or how.
> > > > > > > > The iommu domain is created and managed by vfio, so
> > > > > > > > shouldn't we be looking at how to do this through vfio?
> > > > > > >
> > > > > > > Let me first describe what we are doing here:
> > > > > > > During initialization:-
> > > > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > > > >  - vfio then interacts with iommu to map the MSI-page in iommu
> > > > > > > (IOVA is decided by userspace and physical address is the
> > > > > > > MSI-page)
> > > > > > >  - So the IOVA subwindow mapping is created in iommu and yes
> > > > > > > VFIO know about
> > > > > > this mapping.
> > > > > > >
> > > > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is
> > > > > > > supposed to set
> > > > > > MSI address/data in device.
> > > > > > >  - So in current implementation (this patchset) msi-subsystem
> > > > > > > gets the IOVA
> > > > > > from iommu via this defined interface.
> > > > > > >  - Are you saying that rather than getting this from iommu, we
> > > > > > > should get this
> > > > > > from vfio? What difference does this make?
> > > > > >
> > > > > > Yes, you just said above that vfio knows the msi to iova
> > > > > > mapping, so why go outside of vfio to find it later?  The
> > > > > > difference is one case you can have a proper reference to data
> > > > > > structures to make sure the pointer you get back actually has
> > > > > > meaning at the time you're using it vs the code here where
> > > > > > you're defining an API that returns a meaningless value
> > > > >
> > > > > With FSL-PAMU we will always get consistant data from iommu or
> > > > > vfio-data
> > > > structure.
> > > >
> > > > Great, but you're trying to add a generic API to the IOMMU subsystem
> > > > that's difficult to use correctly.  The fact that you use it
> > > > correctly does not justify the API.
> > > >
> > > > > > because you can't check or
> > > > > > enforce that an arbitrary caller is using it correctly.
> > > > >
> > > > > I am not sure what is arbitrary caller? pdev is known to vfio, so
> > > > > vfio will only make pci_enable_msix()/pci_enable_msi_block() for
> > this pdev.
> > > > > If anyother code makes then it is some other unexpectedly thing
> > > > > happening in system, no?
> > > >
> > > > What's proposed here is a generic IOMMU API.  Anybody can call this.
> > > > What if the host SCSI driver decides to go get the iommu domain for
> > > > it's device (or any other device)?  Does that fit your usage model?
> > > >
> > > > > >  It's not maintainable.
> > > > > > Thanks,
> > > > >
> > > > > I do not have any issue with this as well, can you also describe
> > > > > the type of API you are envisioning; I can think of defining some
> > > > > function in vfio.c/vfio_iommu*.c, make them global and declare
> > > > > then in include/Linux/vfio.h And include <Linux/vfio.h> in caller
> > > > > file
> > > > > (arch/powerpc/kernel/msi.c)
> > > >
> > > > Do you really want module dependencies between vfio and your core
> > > > kernel MSI setup?  Look at the vfio external user interface that
> > we've already defined.
> > > > That allows other components of the kernel to get a proper reference
> > > > to a vfio group.  From there you can work out how to get what you
> > > > want.  Another alternative is that vfio could register an MSI to
> > > > IOVA mapping with architecture code when the mapping is created.
> > > > The MSI setup path could then do a lookup in architecture code for
> > > > the mapping.  You could even store the MSI to IOVA mapping in VFIO
> > > > and create an interface where SET_IRQ passes that mapping into setup
> > code.
> [Sethi Varun-B16395] What you are suggesting is that the MSI setup
> path queries the vfio subsystem for the mapping, rather than directly
> querying the iommu subsystem. So, say if we add an interface for
> getting MSI to IOVA mapping in the msi setup path, wouldn't this again
> be specific to vfio? I reckon that this interface would again ppc
> machine specific interface.

Sure, if this is a generic MSI setup path then clearly vfio should not
be involved.  But by that same notion, if this is a generic MSI setup
path, how can the proposed solutions guarantee that the iommu_domain or
iova returned is still valid in all cases?  It's racy.  If the caller
trying to setup MSI has the information needed, why doesn't it pass it
in as part of the setup?  Thanks,

Alex
Sethi Varun-B16395 - Oct. 14, 2013, 12:58 p.m.
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, October 11, 2013 2:12 AM
> To: Sethi Varun-B16395
> Cc: Bhushan Bharat-R65777; agraf@suse.de; Wood Scott-B07421; linux-
> pci@vger.kernel.org; galak@kernel.crashing.org; linux-
> kernel@vger.kernel.org; iommu@lists.linux-foundation.org;
> benh@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Thu, 2013-10-10 at 20:09 +0000, Sethi Varun-B16395 wrote:
> >
> > > -----Original Message-----
> > > From: iommu-bounces@lists.linux-foundation.org [mailto:iommu-
> > > bounces@lists.linux-foundation.org] On Behalf Of Alex Williamson
> > > Sent: Tuesday, October 08, 2013 8:43 AM
> > > To: Bhushan Bharat-R65777
> > > Cc: agraf@suse.de; Wood Scott-B07421; linux-pci@vger.kernel.org;
> > > galak@kernel.crashing.org; linux-kernel@vger.kernel.org;
> > > iommu@lists.linux-foundation.org; benh@kernel.crashing.org;
> > > linuxppc- dev@lists.ozlabs.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -----Original Message-----
> > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > Sent: Friday, October 04, 2013 11:42 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > > > > To: Bhushan Bharat-R65777
> > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > foundation.org
> > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > of a device
> > > > > > >
> > > > > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777
> wrote:
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Alex Williamson
> > > > > > > > > [mailto:alex.williamson@redhat.com]
> > > > > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org
> > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > iommu_domain of a device
> > > > > > > > >
> > > > > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777
> > > wrote:
> > > > > > > > > >
> > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > > > kernel@vger.kernel.org;
> > > > > > > > > > > linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org;
> > > > > > > > > > > Bhushan
> > > > > > > > > > > Bharat-R65777
> > > > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > > > iommu_domain of a device
> > > > > > > > > > >
> > > > > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan
> wrote:
> > > > > > > > > > > > This api return the iommu domain to which the
> > > > > > > > > > > > device is
> > > attached.
> > > > > > > > > > > > The iommu_domain is required for making API calls
> > > > > > > > > > > > related to
> > > > > iommu.
> > > > > > > > > > > > Follow up patches which use this API to know iommu
> > > maping.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > > > > ---
> > > > > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/iommu/iommu.c
> > > > > > > > > > > > b/drivers/iommu/iommu.c index
> > > > > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > > > > @@ -696,6 +696,16 @@ void
> > > > > > > > > > > > iommu_detach_device(struct iommu_domain *domain,
> > > > > > > > > > > > struct device *dev)  }
> > > > > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > > > > >
> > > > > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct
> > > device *dev) {
> > > > > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	if (unlikely(ops == NULL ||
> > > > > > > > > > > > +ops->get_dev_iommu_domain ==
> > > > > NULL))
> > > > > > > > > > > > +		return NULL;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > > > > >
> > > > > > > > > > > What prevents this from racing iommu_domain_free()?
> > > > > > > > > > > There's no references acquired, so there's no reason
> > > > > > > > > > > for the caller to assume the
> > > > > > > > > pointer is valid.
> > > > > > > > > >
> > > > > > > > > > Sorry for late query, somehow this email went into a
> > > > > > > > > > folder and escaped;
> > > > > > > > > >
> > > > > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > > > > iommu_domain", but IP
> > > > > > > > > specific structure (link FSL domain) linked in
> > > > > > > > > iommu_domain->priv have a lock, so we need to ensure
> > > > > > > > > this race in FSL iommu code (say
> > > > > > > > > drivers/iommu/fsl_pamu_domain.c),
> > > right?
> > > > > > > > >
> > > > > > > > > No, it's not sufficient to make sure that your use of
> > > > > > > > > the interface is race free.  The interface itself needs
> > > > > > > > > to be designed so that it's difficult to use incorrectly.
> > > > > > > >
> > > > > > > > So we can define
> > > > > > > > iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > > > > iommu_get_dev_domain() will return domain with the lock
> > > > > > > > held, and
> > > > > > > > iommu_put_dev_domain() will release the lock? And
> > > > > > > > iommu_get_dev_domain() must always be followed by
> > > > > > > > iommu_get_dev_domain().
> > > > > > >
> > > > > > > What lock?  get/put are generally used for reference
> > > > > > > counting, not locking in the kernel.
> > > > > > >
> > > > > > > > > That's not the case here.  This is a backdoor to get the
> > > > > > > > > iommu domain from the iommu driver regardless of who is
> > > > > > > > > using
> > > it or how.
> > > > > > > > > The iommu domain is created and managed by vfio, so
> > > > > > > > > shouldn't we be looking at how to do this through vfio?
> > > > > > > >
> > > > > > > > Let me first describe what we are doing here:
> > > > > > > > During initialization:-
> > > > > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > > > > >  - vfio then interacts with iommu to map the MSI-page in
> > > > > > > > iommu (IOVA is decided by userspace and physical address
> > > > > > > > is the
> > > > > > > > MSI-page)
> > > > > > > >  - So the IOVA subwindow mapping is created in iommu and
> > > > > > > > yes VFIO know about
> > > > > > > this mapping.
> > > > > > > >
> > > > > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which
> > > > > > > > is supposed to set
> > > > > > > MSI address/data in device.
> > > > > > > >  - So in current implementation (this patchset)
> > > > > > > > msi-subsystem gets the IOVA
> > > > > > > from iommu via this defined interface.
> > > > > > > >  - Are you saying that rather than getting this from
> > > > > > > > iommu, we should get this
> > > > > > > from vfio? What difference does this make?
> > > > > > >
> > > > > > > Yes, you just said above that vfio knows the msi to iova
> > > > > > > mapping, so why go outside of vfio to find it later?  The
> > > > > > > difference is one case you can have a proper reference to
> > > > > > > data structures to make sure the pointer you get back
> > > > > > > actually has meaning at the time you're using it vs the code
> > > > > > > here where you're defining an API that returns a meaningless
> > > > > > > value
> > > > > >
> > > > > > With FSL-PAMU we will always get consistant data from iommu or
> > > > > > vfio-data
> > > > > structure.
> > > > >
> > > > > Great, but you're trying to add a generic API to the IOMMU
> > > > > subsystem that's difficult to use correctly.  The fact that you
> > > > > use it correctly does not justify the API.
> > > > >
> > > > > > > because you can't check or
> > > > > > > enforce that an arbitrary caller is using it correctly.
> > > > > >
> > > > > > I am not sure what is arbitrary caller? pdev is known to vfio,
> > > > > > so vfio will only make
> > > > > > pci_enable_msix()/pci_enable_msi_block() for
> > > this pdev.
> > > > > > If anyother code makes then it is some other unexpectedly
> > > > > > thing happening in system, no?
> > > > >
> > > > > What's proposed here is a generic IOMMU API.  Anybody can call
> this.
> > > > > What if the host SCSI driver decides to go get the iommu domain
> > > > > for it's device (or any other device)?  Does that fit your usage
> model?
> > > > >
> > > > > > >  It's not maintainable.
> > > > > > > Thanks,
> > > > > >
> > > > > > I do not have any issue with this as well, can you also
> > > > > > describe the type of API you are envisioning; I can think of
> > > > > > defining some function in vfio.c/vfio_iommu*.c, make them
> > > > > > global and declare then in include/Linux/vfio.h And include
> > > > > > <Linux/vfio.h> in caller file
> > > > > > (arch/powerpc/kernel/msi.c)
> > > > >
> > > > > Do you really want module dependencies between vfio and your
> > > > > core kernel MSI setup?  Look at the vfio external user interface
> > > > > that
> > > we've already defined.
> > > > > That allows other components of the kernel to get a proper
> > > > > reference to a vfio group.  From there you can work out how to
> > > > > get what you want.  Another alternative is that vfio could
> > > > > register an MSI to IOVA mapping with architecture code when the
> mapping is created.
> > > > > The MSI setup path could then do a lookup in architecture code
> > > > > for the mapping.  You could even store the MSI to IOVA mapping
> > > > > in VFIO and create an interface where SET_IRQ passes that
> > > > > mapping into setup
> > > code.
> > [Sethi Varun-B16395] What you are suggesting is that the MSI setup
> > path queries the vfio subsystem for the mapping, rather than directly
> > querying the iommu subsystem. So, say if we add an interface for
> > getting MSI to IOVA mapping in the msi setup path, wouldn't this again
> > be specific to vfio? I reckon that this interface would again ppc
> > machine specific interface.
> 
> Sure, if this is a generic MSI setup path then clearly vfio should not be
> involved.  But by that same notion, if this is a generic MSI setup path,
> how can the proposed solutions guarantee that the iommu_domain or iova
> returned is still valid in all cases?  It's racy.  If the caller trying
> to setup MSI has the information needed, why doesn't it pass it in as
> part of the setup?  Thanks,
[Sethi Varun-B16395] Agreed, the proposed interface is not clean. But, we still need an interface through which MSI driver can call in to the vfio layer.

-Varun
Alex Williamson - Oct. 14, 2013, 2:20 p.m.
On Mon, 2013-10-14 at 12:58 +0000, Sethi Varun-B16395 wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, October 11, 2013 2:12 AM
> > To: Sethi Varun-B16395
> > Cc: Bhushan Bharat-R65777; agraf@suse.de; Wood Scott-B07421; linux-
> > pci@vger.kernel.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; iommu@lists.linux-foundation.org;
> > benh@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Thu, 2013-10-10 at 20:09 +0000, Sethi Varun-B16395 wrote:
> > >
> > > > -----Original Message-----
> > > > From: iommu-bounces@lists.linux-foundation.org [mailto:iommu-
> > > > bounces@lists.linux-foundation.org] On Behalf Of Alex Williamson
> > > > Sent: Tuesday, October 08, 2013 8:43 AM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: agraf@suse.de; Wood Scott-B07421; linux-pci@vger.kernel.org;
> > > > galak@kernel.crashing.org; linux-kernel@vger.kernel.org;
> > > > iommu@lists.linux-foundation.org; benh@kernel.crashing.org;
> > > > linuxppc- dev@lists.ozlabs.org
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > Sent: Friday, October 04, 2013 11:42 PM
> > > > > > To: Bhushan Bharat-R65777
> > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > foundation.org
> > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > > device
> > > > > >
> > > > > > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > > foundation.org
> > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > > of a device
> > > > > > > >
> > > > > > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777
> > wrote:
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: Alex Williamson
> > > > > > > > > > [mailto:alex.williamson@redhat.com]
> > > > > > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org
> > > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > > iommu_domain of a device
> > > > > > > > > >
> > > > > > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777
> > > > wrote:
> > > > > > > > > > >
> > > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > > > > > galak@kernel.crashing.org; linux-
> > > > > > > > > > > > kernel@vger.kernel.org;
> > > > > > > > > > > > linuxppc-dev@lists.ozlabs.org;
> > > > > > > > > > > > linux- pci@vger.kernel.org; agraf@suse.de; Wood
> > > > > > > > > > > > Scott-B07421; iommu@lists.linux- foundation.org;
> > > > > > > > > > > > Bhushan
> > > > > > > > > > > > Bharat-R65777
> > > > > > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get
> > > > > > > > > > > > iommu_domain of a device
> > > > > > > > > > > >
> > > > > > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan
> > wrote:
> > > > > > > > > > > > > This api return the iommu domain to which the
> > > > > > > > > > > > > device is
> > > > attached.
> > > > > > > > > > > > > The iommu_domain is required for making API calls
> > > > > > > > > > > > > related to
> > > > > > iommu.
> > > > > > > > > > > > > Follow up patches which use this API to know iommu
> > > > maping.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/iommu/iommu.c
> > > > > > > > > > > > > b/drivers/iommu/iommu.c index
> > > > > > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > > > > > @@ -696,6 +696,16 @@ void
> > > > > > > > > > > > > iommu_detach_device(struct iommu_domain *domain,
> > > > > > > > > > > > > struct device *dev)  }
> > > > > > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > > > > > >
> > > > > > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct
> > > > device *dev) {
> > > > > > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	if (unlikely(ops == NULL ||
> > > > > > > > > > > > > +ops->get_dev_iommu_domain ==
> > > > > > NULL))
> > > > > > > > > > > > > +		return NULL;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > > > > > >
> > > > > > > > > > > > What prevents this from racing iommu_domain_free()?
> > > > > > > > > > > > There's no references acquired, so there's no reason
> > > > > > > > > > > > for the caller to assume the
> > > > > > > > > > pointer is valid.
> > > > > > > > > > >
> > > > > > > > > > > Sorry for late query, somehow this email went into a
> > > > > > > > > > > folder and escaped;
> > > > > > > > > > >
> > > > > > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > > > > > iommu_domain", but IP
> > > > > > > > > > specific structure (link FSL domain) linked in
> > > > > > > > > > iommu_domain->priv have a lock, so we need to ensure
> > > > > > > > > > this race in FSL iommu code (say
> > > > > > > > > > drivers/iommu/fsl_pamu_domain.c),
> > > > right?
> > > > > > > > > >
> > > > > > > > > > No, it's not sufficient to make sure that your use of
> > > > > > > > > > the interface is race free.  The interface itself needs
> > > > > > > > > > to be designed so that it's difficult to use incorrectly.
> > > > > > > > >
> > > > > > > > > So we can define
> > > > > > > > > iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > > > > > iommu_get_dev_domain() will return domain with the lock
> > > > > > > > > held, and
> > > > > > > > > iommu_put_dev_domain() will release the lock? And
> > > > > > > > > iommu_get_dev_domain() must always be followed by
> > > > > > > > > iommu_get_dev_domain().
> > > > > > > >
> > > > > > > > What lock?  get/put are generally used for reference
> > > > > > > > counting, not locking in the kernel.
> > > > > > > >
> > > > > > > > > > That's not the case here.  This is a backdoor to get the
> > > > > > > > > > iommu domain from the iommu driver regardless of who is
> > > > > > > > > > using
> > > > it or how.
> > > > > > > > > > The iommu domain is created and managed by vfio, so
> > > > > > > > > > shouldn't we be looking at how to do this through vfio?
> > > > > > > > >
> > > > > > > > > Let me first describe what we are doing here:
> > > > > > > > > During initialization:-
> > > > > > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > > > > > >  - vfio then interacts with iommu to map the MSI-page in
> > > > > > > > > iommu (IOVA is decided by userspace and physical address
> > > > > > > > > is the
> > > > > > > > > MSI-page)
> > > > > > > > >  - So the IOVA subwindow mapping is created in iommu and
> > > > > > > > > yes VFIO know about
> > > > > > > > this mapping.
> > > > > > > > >
> > > > > > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > > > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which
> > > > > > > > > is supposed to set
> > > > > > > > MSI address/data in device.
> > > > > > > > >  - So in current implementation (this patchset)
> > > > > > > > > msi-subsystem gets the IOVA
> > > > > > > > from iommu via this defined interface.
> > > > > > > > >  - Are you saying that rather than getting this from
> > > > > > > > > iommu, we should get this
> > > > > > > > from vfio? What difference does this make?
> > > > > > > >
> > > > > > > > Yes, you just said above that vfio knows the msi to iova
> > > > > > > > mapping, so why go outside of vfio to find it later?  The
> > > > > > > > difference is one case you can have a proper reference to
> > > > > > > > data structures to make sure the pointer you get back
> > > > > > > > actually has meaning at the time you're using it vs the code
> > > > > > > > here where you're defining an API that returns a meaningless
> > > > > > > > value
> > > > > > >
> > > > > > > With FSL-PAMU we will always get consistant data from iommu or
> > > > > > > vfio-data
> > > > > > structure.
> > > > > >
> > > > > > Great, but you're trying to add a generic API to the IOMMU
> > > > > > subsystem that's difficult to use correctly.  The fact that you
> > > > > > use it correctly does not justify the API.
> > > > > >
> > > > > > > > because you can't check or
> > > > > > > > enforce that an arbitrary caller is using it correctly.
> > > > > > >
> > > > > > > I am not sure what is arbitrary caller? pdev is known to vfio,
> > > > > > > so vfio will only make
> > > > > > > pci_enable_msix()/pci_enable_msi_block() for
> > > > this pdev.
> > > > > > > If anyother code makes then it is some other unexpectedly
> > > > > > > thing happening in system, no?
> > > > > >
> > > > > > What's proposed here is a generic IOMMU API.  Anybody can call
> > this.
> > > > > > What if the host SCSI driver decides to go get the iommu domain
> > > > > > for it's device (or any other device)?  Does that fit your usage
> > model?
> > > > > >
> > > > > > > >  It's not maintainable.
> > > > > > > > Thanks,
> > > > > > >
> > > > > > > I do not have any issue with this as well, can you also
> > > > > > > describe the type of API you are envisioning; I can think of
> > > > > > > defining some function in vfio.c/vfio_iommu*.c, make them
> > > > > > > global and declare then in include/Linux/vfio.h And include
> > > > > > > <Linux/vfio.h> in caller file
> > > > > > > (arch/powerpc/kernel/msi.c)
> > > > > >
> > > > > > Do you really want module dependencies between vfio and your
> > > > > > core kernel MSI setup?  Look at the vfio external user interface
> > > > > > that
> > > > we've already defined.
> > > > > > That allows other components of the kernel to get a proper
> > > > > > reference to a vfio group.  From there you can work out how to
> > > > > > get what you want.  Another alternative is that vfio could
> > > > > > register an MSI to IOVA mapping with architecture code when the
> > mapping is created.
> > > > > > The MSI setup path could then do a lookup in architecture code
> > > > > > for the mapping.  You could even store the MSI to IOVA mapping
> > > > > > in VFIO and create an interface where SET_IRQ passes that
> > > > > > mapping into setup
> > > > code.
> > > [Sethi Varun-B16395] What you are suggesting is that the MSI setup
> > > path queries the vfio subsystem for the mapping, rather than directly
> > > querying the iommu subsystem. So, say if we add an interface for
> > > getting MSI to IOVA mapping in the msi setup path, wouldn't this again
> > > be specific to vfio? I reckon that this interface would again ppc
> > > machine specific interface.
> > 
> > Sure, if this is a generic MSI setup path then clearly vfio should not be
> > involved.  But by that same notion, if this is a generic MSI setup path,
> > how can the proposed solutions guarantee that the iommu_domain or iova
> > returned is still valid in all cases?  It's racy.  If the caller trying
> > to setup MSI has the information needed, why doesn't it pass it in as
> > part of the setup?  Thanks,
> [Sethi Varun-B16395] Agreed, the proposed interface is not clean. But, we still need an interface through which MSI driver can call in to the vfio layer.

Or one that allows vfio to pass in the iova when the interrupt is being
setup.  I'm afraid any kind of reverse interface where MSI calls back
into vfio is going to be racy.  Thanks,

Alex

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index fbe9ca7..6ac5f50 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -696,6 +696,16 @@  void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
+struct iommu_domain *iommu_get_dev_domain(struct device *dev)
+{
+	struct iommu_ops *ops = dev->bus->iommu_ops;
+
+	if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
+		return NULL;
+
+	return ops->get_dev_iommu_domain(dev);
+}
+EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 /*
  * IOMMU groups are really the natrual working unit of the IOMMU, but
  * the IOMMU API works on domains and devices.  Bridge that gap by
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ea319e..fa046bd 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,7 @@  struct iommu_ops {
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
 	/* Get the numer of window per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
+	struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
 
 	unsigned long pgsize_bitmap;
 };
@@ -190,6 +191,7 @@  extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 				      phys_addr_t offset, u64 size,
 				      int prot);
 extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
+extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
  * @domain: the iommu domain where the fault has happened
@@ -284,6 +286,11 @@  static inline void iommu_domain_window_disable(struct iommu_domain *domain,
 {
 }
 
+static inline struct iommu_domain *iommu_get_dev_domain(struct device *dev)
+{
+	return NULL;
+}
+
 static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
 	return 0;