diff mbox series

[v5,01/14] iommu: Add dma ownership management interfaces

Message ID 20220104015644.2294354-2-baolu.lu@linux.intel.com
State New
Headers show
Series Fix BUG_ON in vfio_iommu_group_notifier() | expand

Commit Message

Baolu Lu Jan. 4, 2022, 1:56 a.m. UTC
Multiple devices may be placed in the same IOMMU group because they
cannot be isolated from each other. These devices must either be
entirely under kernel control or userspace control, never a mixture.

This adds dma ownership management in iommu core and exposes several
interfaces for the device drivers and the device userspace assignment
framework (i.e. vfio), so that any conflict between user and kernel
controlled DMA could be detected at the beginning.

The device driver oriented interfaces are,

	int iommu_device_use_dma_api(struct device *dev);
	void iommu_device_unuse_dma_api(struct device *dev);

Devices under kernel drivers control must call iommu_device_use_dma_api()
before driver probes. The driver binding process must be aborted if it
returns failure.

The vfio oriented interfaces are,

	int iommu_group_set_dma_owner(struct iommu_group *group,
				      void *owner);
	void iommu_group_release_dma_owner(struct iommu_group *group);
	bool iommu_group_dma_owner_claimed(struct iommu_group *group);

The device userspace assignment must be disallowed if the set dma owner
interface returns failure.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 include/linux/iommu.h |  31 ++++++++
 drivers/iommu/iommu.c | 161 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Jan. 4, 2022, 10:08 a.m. UTC | #1
On Tue, Jan 04, 2022 at 09:56:31AM +0800, Lu Baolu wrote:
> Multiple devices may be placed in the same IOMMU group because they
> cannot be isolated from each other. These devices must either be
> entirely under kernel control or userspace control, never a mixture.
> 
> This adds dma ownership management in iommu core and exposes several
> interfaces for the device drivers and the device userspace assignment
> framework (i.e. vfio), so that any conflict between user and kernel
> controlled DMA could be detected at the beginning.
> 
> The device driver oriented interfaces are,
> 
> 	int iommu_device_use_dma_api(struct device *dev);
> 	void iommu_device_unuse_dma_api(struct device *dev);
> 
> Devices under kernel drivers control must call iommu_device_use_dma_api()
> before driver probes. The driver binding process must be aborted if it
> returns failure.
> 
> The vfio oriented interfaces are,
> 
> 	int iommu_group_set_dma_owner(struct iommu_group *group,
> 				      void *owner);
> 	void iommu_group_release_dma_owner(struct iommu_group *group);
> 	bool iommu_group_dma_owner_claimed(struct iommu_group *group);
> 
> The device userspace assignment must be disallowed if the set dma owner
> interface returns failure.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  include/linux/iommu.h |  31 ++++++++
>  drivers/iommu/iommu.c | 161 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 189 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index de0c57a567c8..568f285468cf 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -682,6 +682,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
>  void iommu_sva_unbind_device(struct iommu_sva *handle);
>  u32 iommu_sva_get_pasid(struct iommu_sva *handle);
>  
> +int iommu_device_use_dma_api(struct device *dev);
> +void iommu_device_unuse_dma_api(struct device *dev);
> +
> +int iommu_group_set_dma_owner(struct iommu_group *group, void *owner);
> +void iommu_group_release_dma_owner(struct iommu_group *group);
> +bool iommu_group_dma_owner_claimed(struct iommu_group *group);
> +
>  #else /* CONFIG_IOMMU_API */
>  
>  struct iommu_ops {};
> @@ -1082,6 +1089,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
>  {
>  	return NULL;
>  }
> +
> +static inline int iommu_device_use_dma_api(struct device *dev)
> +{
> +	return 0;
> +}
> +
> +static inline void iommu_device_unuse_dma_api(struct device *dev)
> +{
> +}
> +
> +static inline int
> +iommu_group_set_dma_owner(struct iommu_group *group, void *owner)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline void iommu_group_release_dma_owner(struct iommu_group *group)
> +{
> +}
> +
> +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_IOMMU_API */
>  
>  /**
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 8b86406b7162..ff0c8c1ad5af 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -48,6 +48,8 @@ struct iommu_group {
>  	struct iommu_domain *default_domain;
>  	struct iommu_domain *domain;
>  	struct list_head entry;
> +	unsigned int owner_cnt;
> +	void *owner;
>  };
>  
>  struct group_device {
> @@ -289,7 +291,12 @@ int iommu_probe_device(struct device *dev)
>  	mutex_lock(&group->mutex);
>  	iommu_alloc_default_domain(group, dev);
>  
> -	if (group->default_domain) {
> +	/*
> +	 * If device joined an existing group which has been claimed
> +	 * for none kernel DMA purpose, avoid attaching the default
> +	 * domain.
> +	 */
> +	if (group->default_domain && !group->owner) {
>  		ret = __iommu_attach_device(group->default_domain, dev);
>  		if (ret) {
>  			mutex_unlock(&group->mutex);
> @@ -2320,7 +2327,7 @@ static int __iommu_attach_group(struct iommu_domain *domain,
>  {
>  	int ret;
>  
> -	if (group->default_domain && group->domain != group->default_domain)
> +	if (group->domain && group->domain != group->default_domain)
>  		return -EBUSY;
>  
>  	ret = __iommu_group_for_each_dev(group, domain,
> @@ -2357,7 +2364,11 @@ static void __iommu_detach_group(struct iommu_domain *domain,
>  {
>  	int ret;
>  
> -	if (!group->default_domain) {
> +	/*
> +	 * If group has been claimed for none kernel DMA purpose, avoid
> +	 * re-attaching the default domain.
> +	 */

none kernel reads odd.  But maybe drop that and just say 'claimed
already' ala:

	/*
	 * If the group has been claimed already, do not re-attach the default
	 * domain.
	 */
Bjorn Helgaas Jan. 4, 2022, 4:41 p.m. UTC | #2
On Tue, Jan 04, 2022 at 02:08:00AM -0800, Christoph Hellwig wrote:
> On Tue, Jan 04, 2022 at 09:56:31AM +0800, Lu Baolu wrote:
> > Multiple devices may be placed in the same IOMMU group because they
> > cannot be isolated from each other. These devices must either be
> > entirely under kernel control or userspace control, never a mixture.

I guess the reason is that if a group contained a mixture, userspace
could attack the kernel by programming a device to DMA to a device
owned by the kernel?

> > This adds dma ownership management in iommu core and exposes several
> > interfaces for the device drivers and the device userspace assignment
> > framework (i.e. vfio), so that any conflict between user and kernel
> > controlled DMA could be detected at the beginning.

Maybe I'm missing the point because I don't know what "conflict
between user and kernel controlled DMA" is.  Are you talking about
both userspace and the kernel programming the same device to do DMA?

> > The device driver oriented interfaces are,
> > 
> > 	int iommu_device_use_dma_api(struct device *dev);
> > 	void iommu_device_unuse_dma_api(struct device *dev);

Nit, do we care whether it uses the actual DMA API?  Or is it just
that iommu_device_use_dma_api() tells us the driver may program the
device to do DMA?

> > Devices under kernel drivers control must call iommu_device_use_dma_api()
> > before driver probes. The driver binding process must be aborted if it
> > returns failure.

"Devices" don't call functions.  Drivers do, or in this case, it looks
like the bus DMA code (platform, amba, fsl, pci, etc).

These functions are EXPORT_SYMBOL_GPL(), but it looks like all the
callers are built-in, so maybe the export is unnecessary?

You use "iommu"/"IOMMU" and "dma"/"DMA" interchangeably above.  Would
be easier to read if you picked one.

> > The vfio oriented interfaces are,
> > 
> > 	int iommu_group_set_dma_owner(struct iommu_group *group,
> > 				      void *owner);
> > 	void iommu_group_release_dma_owner(struct iommu_group *group);
> > 	bool iommu_group_dma_owner_claimed(struct iommu_group *group);
> > 
> > The device userspace assignment must be disallowed if the set dma owner
> > interface returns failure.

Can you connect this back to the "never a mixture" from the beginning?
If all you cared about was prevent an IOMMU group from containing
devices with a mixture of kernel drivers and userspace drivers, I
assume you could do that without iommu_device_use_dma_api().  So is
this a way to *allow* a mixture under certain restricted conditions?

Another nit below.

> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > ---
> >  include/linux/iommu.h |  31 ++++++++
> >  drivers/iommu/iommu.c | 161 +++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 189 insertions(+), 3 deletions(-)
> > 
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index de0c57a567c8..568f285468cf 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -682,6 +682,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> >  void iommu_sva_unbind_device(struct iommu_sva *handle);
> >  u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> >  
> > +int iommu_device_use_dma_api(struct device *dev);
> > +void iommu_device_unuse_dma_api(struct device *dev);
> > +
> > +int iommu_group_set_dma_owner(struct iommu_group *group, void *owner);
> > +void iommu_group_release_dma_owner(struct iommu_group *group);
> > +bool iommu_group_dma_owner_claimed(struct iommu_group *group);
> > +
> >  #else /* CONFIG_IOMMU_API */
> >  
> >  struct iommu_ops {};
> > @@ -1082,6 +1089,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
> >  {
> >  	return NULL;
> >  }
> > +
> > +static inline int iommu_device_use_dma_api(struct device *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void iommu_device_unuse_dma_api(struct device *dev)
> > +{
> > +}
> > +
> > +static inline int
> > +iommu_group_set_dma_owner(struct iommu_group *group, void *owner)
> > +{
> > +	return -ENODEV;
> > +}
> > +
> > +static inline void iommu_group_release_dma_owner(struct iommu_group *group)
> > +{
> > +}
> > +
> > +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
> > +{
> > +	return false;
> > +}
> >  #endif /* CONFIG_IOMMU_API */
> >  
> >  /**
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 8b86406b7162..ff0c8c1ad5af 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -48,6 +48,8 @@ struct iommu_group {
> >  	struct iommu_domain *default_domain;
> >  	struct iommu_domain *domain;
> >  	struct list_head entry;
> > +	unsigned int owner_cnt;
> > +	void *owner;
> >  };
> >  
> >  struct group_device {
> > @@ -289,7 +291,12 @@ int iommu_probe_device(struct device *dev)
> >  	mutex_lock(&group->mutex);
> >  	iommu_alloc_default_domain(group, dev);
> >  
> > -	if (group->default_domain) {
> > +	/*
> > +	 * If device joined an existing group which has been claimed
> > +	 * for none kernel DMA purpose, avoid attaching the default
> > +	 * domain.

AOL: another "none kernel DMA purpose" that doesn't read well.  Is
this supposed to be "non-kernel"?  What does "claimed for non-kernel
DMA purpose" mean?  What interface does that?

> > +	 */
> > +	if (group->default_domain && !group->owner) {
> >  		ret = __iommu_attach_device(group->default_domain, dev);
> >  		if (ret) {
> >  			mutex_unlock(&group->mutex);
> > @@ -2320,7 +2327,7 @@ static int __iommu_attach_group(struct iommu_domain *domain,
> >  {
> >  	int ret;
> >  
> > -	if (group->default_domain && group->domain != group->default_domain)
> > +	if (group->domain && group->domain != group->default_domain)
> >  		return -EBUSY;
> >  
> >  	ret = __iommu_group_for_each_dev(group, domain,
> > @@ -2357,7 +2364,11 @@ static void __iommu_detach_group(struct iommu_domain *domain,
> >  {
> >  	int ret;
> >  
> > -	if (!group->default_domain) {
> > +	/*
> > +	 * If group has been claimed for none kernel DMA purpose, avoid
> > +	 * re-attaching the default domain.
> > +	 */
> 
> none kernel reads odd.  But maybe drop that and just say 'claimed
> already' ala:
> 
> 	/*
> 	 * If the group has been claimed already, do not re-attach the default
> 	 * domain.
> 	 */
Jason Gunthorpe Jan. 4, 2022, 7:23 p.m. UTC | #3
On Tue, Jan 04, 2022 at 10:41:00AM -0600, Bjorn Helgaas wrote:
> On Tue, Jan 04, 2022 at 02:08:00AM -0800, Christoph Hellwig wrote:
> > On Tue, Jan 04, 2022 at 09:56:31AM +0800, Lu Baolu wrote:
> > > Multiple devices may be placed in the same IOMMU group because they
> > > cannot be isolated from each other. These devices must either be
> > > entirely under kernel control or userspace control, never a mixture.
> 
> I guess the reason is that if a group contained a mixture, userspace
> could attack the kernel by programming a device to DMA to a device
> owned by the kernel?

There are several of reasons, including what you guess, but for the
design of the series now we can just focus on the group's domain.

If the kernel is using a device then the kernel driver uses the DMA
API and the group's domain must always point to the default domain so
long as a DMA API user exists. Hopefully it is clear to understand

> > > The device driver oriented interfaces are,
> > > 
> > > 	int iommu_device_use_dma_api(struct device *dev);
> > > 	void iommu_device_unuse_dma_api(struct device *dev);
> 
> Nit, do we care whether it uses the actual DMA API?  Or is it just
> that iommu_device_use_dma_api() tells us the driver may program the
> device to do DMA?

As the main purpose, yes this is all about the DMA API because it
asserts the group domain is the DMA API's domain.

There is a secondary purpose that has to do with the user/kernel
attack you mentioned above. Maintaining the DMA API domain also
prevents VFIO from allowing userspace to operate any device in the
group which blocks P2P attacks to MMIO of other devices.

This is why, even if the driver doesn't use DMA, it should still do a
iommu_device_use_dma_api(), except in the special cases where we don't
care about P2P attacks (eg pci-stub, bridges, etc).

> > > The vfio oriented interfaces are,
> > > 
> > > 	int iommu_group_set_dma_owner(struct iommu_group *group,
> > > 				      void *owner);
> > > 	void iommu_group_release_dma_owner(struct iommu_group *group);
> > > 	bool iommu_group_dma_owner_claimed(struct iommu_group *group);
> > > 
> > > The device userspace assignment must be disallowed if the set dma owner
> > > interface returns failure.
> 
> Can you connect this back to the "never a mixture" from the beginning?
> If all you cared about was prevent an IOMMU group from containing
> devices with a mixture of kernel drivers and userspace drivers, I
> assume you could do that without iommu_device_use_dma_api().  So is
> this a way to *allow* a mixture under certain restricted conditions?

It is not about user/kernel, it is about arbitrating the shared
group->domain against multiple different requests to set it to
something else.

Lu, Given that the word 'user' was deleted from the API entirely it
makes sense to reword these commit messages to focus less on user vs
kernel and more on ownership of the domain pointer.

Jason
Baolu Lu Jan. 5, 2022, 6:57 a.m. UTC | #4
Hi Christoph,

On 1/4/22 6:08 PM, Christoph Hellwig wrote:
> On Tue, Jan 04, 2022 at 09:56:31AM +0800, Lu Baolu wrote:
>> Multiple devices may be placed in the same IOMMU group because they
>> cannot be isolated from each other. These devices must either be
>> entirely under kernel control or userspace control, never a mixture.
>>
>> This adds dma ownership management in iommu core and exposes several
>> interfaces for the device drivers and the device userspace assignment
>> framework (i.e. vfio), so that any conflict between user and kernel
>> controlled DMA could be detected at the beginning.
>>
>> The device driver oriented interfaces are,
>>
>> 	int iommu_device_use_dma_api(struct device *dev);
>> 	void iommu_device_unuse_dma_api(struct device *dev);
>>
>> Devices under kernel drivers control must call iommu_device_use_dma_api()
>> before driver probes. The driver binding process must be aborted if it
>> returns failure.
>>
>> The vfio oriented interfaces are,
>>
>> 	int iommu_group_set_dma_owner(struct iommu_group *group,
>> 				      void *owner);
>> 	void iommu_group_release_dma_owner(struct iommu_group *group);
>> 	bool iommu_group_dma_owner_claimed(struct iommu_group *group);
>>
>> The device userspace assignment must be disallowed if the set dma owner
>> interface returns failure.
>>
>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
>> ---
>>   include/linux/iommu.h |  31 ++++++++
>>   drivers/iommu/iommu.c | 161 +++++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 189 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index de0c57a567c8..568f285468cf 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -682,6 +682,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
>>   void iommu_sva_unbind_device(struct iommu_sva *handle);
>>   u32 iommu_sva_get_pasid(struct iommu_sva *handle);
>>   
>> +int iommu_device_use_dma_api(struct device *dev);
>> +void iommu_device_unuse_dma_api(struct device *dev);
>> +
>> +int iommu_group_set_dma_owner(struct iommu_group *group, void *owner);
>> +void iommu_group_release_dma_owner(struct iommu_group *group);
>> +bool iommu_group_dma_owner_claimed(struct iommu_group *group);
>> +
>>   #else /* CONFIG_IOMMU_API */
>>   
>>   struct iommu_ops {};
>> @@ -1082,6 +1089,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
>>   {
>>   	return NULL;
>>   }
>> +
>> +static inline int iommu_device_use_dma_api(struct device *dev)
>> +{
>> +	return 0;
>> +}
>> +
>> +static inline void iommu_device_unuse_dma_api(struct device *dev)
>> +{
>> +}
>> +
>> +static inline int
>> +iommu_group_set_dma_owner(struct iommu_group *group, void *owner)
>> +{
>> +	return -ENODEV;
>> +}
>> +
>> +static inline void iommu_group_release_dma_owner(struct iommu_group *group)
>> +{
>> +}
>> +
>> +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
>> +{
>> +	return false;
>> +}
>>   #endif /* CONFIG_IOMMU_API */
>>   
>>   /**
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 8b86406b7162..ff0c8c1ad5af 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -48,6 +48,8 @@ struct iommu_group {
>>   	struct iommu_domain *default_domain;
>>   	struct iommu_domain *domain;
>>   	struct list_head entry;
>> +	unsigned int owner_cnt;
>> +	void *owner;
>>   };
>>   
>>   struct group_device {
>> @@ -289,7 +291,12 @@ int iommu_probe_device(struct device *dev)
>>   	mutex_lock(&group->mutex);
>>   	iommu_alloc_default_domain(group, dev);
>>   
>> -	if (group->default_domain) {
>> +	/*
>> +	 * If device joined an existing group which has been claimed
>> +	 * for none kernel DMA purpose, avoid attaching the default
>> +	 * domain.
>> +	 */
>> +	if (group->default_domain && !group->owner) {
>>   		ret = __iommu_attach_device(group->default_domain, dev);
>>   		if (ret) {
>>   			mutex_unlock(&group->mutex);
>> @@ -2320,7 +2327,7 @@ static int __iommu_attach_group(struct iommu_domain *domain,
>>   {
>>   	int ret;
>>   
>> -	if (group->default_domain && group->domain != group->default_domain)
>> +	if (group->domain && group->domain != group->default_domain)
>>   		return -EBUSY;
>>   
>>   	ret = __iommu_group_for_each_dev(group, domain,
>> @@ -2357,7 +2364,11 @@ static void __iommu_detach_group(struct iommu_domain *domain,
>>   {
>>   	int ret;
>>   
>> -	if (!group->default_domain) {
>> +	/*
>> +	 * If group has been claimed for none kernel DMA purpose, avoid
>> +	 * re-attaching the default domain.
>> +	 */
> 
> none kernel reads odd.  But maybe drop that and just say 'claimed
> already' ala:
> 
> 	/*
> 	 * If the group has been claimed already, do not re-attach the default
> 	 * domain.
> 	 */
> 

Sure!

Best regards,
baolu
Baolu Lu Jan. 6, 2022, 3:18 a.m. UTC | #5
On 1/5/22 3:23 AM, Jason Gunthorpe wrote:
>>>> The vfio oriented interfaces are,
>>>>
>>>> 	int iommu_group_set_dma_owner(struct iommu_group *group,
>>>> 				      void *owner);
>>>> 	void iommu_group_release_dma_owner(struct iommu_group *group);
>>>> 	bool iommu_group_dma_owner_claimed(struct iommu_group *group);
>>>>
>>>> The device userspace assignment must be disallowed if the set dma owner
>>>> interface returns failure.
>> Can you connect this back to the "never a mixture" from the beginning?
>> If all you cared about was prevent an IOMMU group from containing
>> devices with a mixture of kernel drivers and userspace drivers, I
>> assume you could do that without iommu_device_use_dma_api().  So is
>> this a way to*allow*  a mixture under certain restricted conditions?
> It is not about user/kernel, it is about arbitrating the shared
> group->domain against multiple different requests to set it to
> something else.
> 
> Lu, Given that the word 'user' was deleted from the API entirely it
> makes sense to reword these commit messages to focus less on user vs
> kernel and more on ownership of the domain pointer.

Sure. Will do it.

Best regards,
baolu
Baolu Lu Jan. 6, 2022, 3:43 a.m. UTC | #6
Hi Bjorn,

On 1/5/22 12:41 AM, Bjorn Helgaas wrote:
>>> This adds dma ownership management in iommu core and exposes several
>>> interfaces for the device drivers and the device userspace assignment
>>> framework (i.e. vfio), so that any conflict between user and kernel
>>> controlled DMA could be detected at the beginning.
> Maybe I'm missing the point because I don't know what "conflict
> between user and kernel controlled DMA" is.  Are you talking about
> both userspace and the kernel programming the same device to do DMA?

It's about both userspace and kernel programming different devices
belonging to a same iommu group to do DMA. For example, PCI device A and
B sit in a some iommu group. Userspace programs device A for DMA and a
kernel driver programs device B. Userspace may intentionally or
unintentionally program device A to access and change device B's MMIO
with P2P DMA transactions which cause the kernel driver for device B
result in an inconsistent state.

This may not be all, just an example.

Best regards,
baolu
Baolu Lu Jan. 6, 2022, 3:47 a.m. UTC | #7
On 1/5/22 12:41 AM, Bjorn Helgaas wrote:
>>> Devices under kernel drivers control must call iommu_device_use_dma_api()
>>> before driver probes. The driver binding process must be aborted if it
>>> returns failure.
> "Devices" don't call functions.  Drivers do, or in this case, it looks
> like the bus DMA code (platform, amba, fsl, pci, etc).
> 
> These functions are EXPORT_SYMBOL_GPL(), but it looks like all the
> callers are built-in, so maybe the export is unnecessary?

Yes. If all callers are built-in, we can remove this export.

> 
> You use "iommu"/"IOMMU" and "dma"/"DMA" interchangeably above.  Would
> be easier to read if you picked one.
> 

I should cleanup all these. I also realized that I forgot to
cleanup some typos you pointed out in v4:

https://lore.kernel.org/linux-iommu/20211229211626.GA1701512@bhelgaas/

Sorry about it. I will take care of all these in the next version.

Best regards,
baolu
Baolu Lu Jan. 6, 2022, 3:51 a.m. UTC | #8
On 1/5/22 12:41 AM, Bjorn Helgaas wrote:
>>>   struct group_device {
>>> @@ -289,7 +291,12 @@ int iommu_probe_device(struct device *dev)
>>>   	mutex_lock(&group->mutex);
>>>   	iommu_alloc_default_domain(group, dev);
>>>   
>>> -	if (group->default_domain) {
>>> +	/*
>>> +	 * If device joined an existing group which has been claimed
>>> +	 * for none kernel DMA purpose, avoid attaching the default
>>> +	 * domain.
> AOL: another "none kernel DMA purpose" that doesn't read well.  Is
> this supposed to be "non-kernel"?  What does "claimed for non-kernel
> DMA purpose" mean?  What interface does that?
> 

It's hard to read. I will rephrase it like this:

/*
  * If device joined an existing group which has been claimed, don't
  * attach the default domain.
  */

Best regards,
baolu
Baolu Lu Jan. 6, 2022, 3:54 a.m. UTC | #9
On 1/5/22 3:23 AM, Jason Gunthorpe wrote:
>>>> The device driver oriented interfaces are,
>>>>
>>>> 	int iommu_device_use_dma_api(struct device *dev);
>>>> 	void iommu_device_unuse_dma_api(struct device *dev);
>> Nit, do we care whether it uses the actual DMA API?  Or is it just
>> that iommu_device_use_dma_api() tells us the driver may program the
>> device to do DMA?
> As the main purpose, yes this is all about the DMA API because it
> asserts the group domain is the DMA API's domain.
> 
> There is a secondary purpose that has to do with the user/kernel
> attack you mentioned above. Maintaining the DMA API domain also
> prevents VFIO from allowing userspace to operate any device in the
> group which blocks P2P attacks to MMIO of other devices.
> 
> This is why, even if the driver doesn't use DMA, it should still do a
> iommu_device_use_dma_api(), except in the special cases where we don't
> care about P2P attacks (eg pci-stub, bridges, etc).
> 

By the way, use_dma_api seems hard to read. How about

	iommu_device_use_default_dma()?

Best regards,
baolu
Jason Gunthorpe Jan. 6, 2022, 3:46 p.m. UTC | #10
On Thu, Jan 06, 2022 at 11:54:06AM +0800, Lu Baolu wrote:
> On 1/5/22 3:23 AM, Jason Gunthorpe wrote:
> > > > > The device driver oriented interfaces are,
> > > > > 
> > > > > 	int iommu_device_use_dma_api(struct device *dev);
> > > > > 	void iommu_device_unuse_dma_api(struct device *dev);
> > > Nit, do we care whether it uses the actual DMA API?  Or is it just
> > > that iommu_device_use_dma_api() tells us the driver may program the
> > > device to do DMA?
> > As the main purpose, yes this is all about the DMA API because it
> > asserts the group domain is the DMA API's domain.
> > 
> > There is a secondary purpose that has to do with the user/kernel
> > attack you mentioned above. Maintaining the DMA API domain also
> > prevents VFIO from allowing userspace to operate any device in the
> > group which blocks P2P attacks to MMIO of other devices.
> > 
> > This is why, even if the driver doesn't use DMA, it should still do a
> > iommu_device_use_dma_api(), except in the special cases where we don't
> > care about P2P attacks (eg pci-stub, bridges, etc).
> > 
> 
> By the way, use_dma_api seems hard to read. How about
> 
> 	iommu_device_use_default_dma()?

You could just say "use default domain"

IMHO the way the iommu subsystem has its own wonky language is a
little troublesome. In the rest of the kernel we call this the DMA
API, while the iommu subsystem calls the domain that the DMA API uses
the 'default domain' not the 'DMA API' domain.

Still, it is probably better to align to the iommu language - just be
sure to put in the function comment that this API 'allows the driver
to use the DMA API eg dma_map_sg()'

Jason
Baolu Lu Jan. 7, 2022, 1:50 a.m. UTC | #11
On 1/6/22 11:46 PM, Jason Gunthorpe wrote:
> On Thu, Jan 06, 2022 at 11:54:06AM +0800, Lu Baolu wrote:
>> On 1/5/22 3:23 AM, Jason Gunthorpe wrote:
>>>>>> The device driver oriented interfaces are,
>>>>>>
>>>>>> 	int iommu_device_use_dma_api(struct device *dev);
>>>>>> 	void iommu_device_unuse_dma_api(struct device *dev);
>>>> Nit, do we care whether it uses the actual DMA API?  Or is it just
>>>> that iommu_device_use_dma_api() tells us the driver may program the
>>>> device to do DMA?
>>> As the main purpose, yes this is all about the DMA API because it
>>> asserts the group domain is the DMA API's domain.
>>>
>>> There is a secondary purpose that has to do with the user/kernel
>>> attack you mentioned above. Maintaining the DMA API domain also
>>> prevents VFIO from allowing userspace to operate any device in the
>>> group which blocks P2P attacks to MMIO of other devices.
>>>
>>> This is why, even if the driver doesn't use DMA, it should still do a
>>> iommu_device_use_dma_api(), except in the special cases where we don't
>>> care about P2P attacks (eg pci-stub, bridges, etc).
>>>
>>
>> By the way, use_dma_api seems hard to read. How about
>>
>> 	iommu_device_use_default_dma()?
> 
> You could just say "use default domain"
> 
> IMHO the way the iommu subsystem has its own wonky language is a
> little troublesome. In the rest of the kernel we call this the DMA
> API, while the iommu subsystem calls the domain that the DMA API uses
> the 'default domain' not the 'DMA API' domain.
> 
> Still, it is probably better to align to the iommu language - just be
> sure to put in the function comment that this API 'allows the driver
> to use the DMA API eg dma_map_sg()'

iommu_device_use_default_domain() reads better. And add some comments
to link "default domain" with "DMA API". Thanks!

> 
> Jason
> 

Best regards,
baolu
diff mbox series

Patch

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index de0c57a567c8..568f285468cf 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -682,6 +682,13 @@  struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 void iommu_sva_unbind_device(struct iommu_sva *handle);
 u32 iommu_sva_get_pasid(struct iommu_sva *handle);
 
+int iommu_device_use_dma_api(struct device *dev);
+void iommu_device_unuse_dma_api(struct device *dev);
+
+int iommu_group_set_dma_owner(struct iommu_group *group, void *owner);
+void iommu_group_release_dma_owner(struct iommu_group *group);
+bool iommu_group_dma_owner_claimed(struct iommu_group *group);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -1082,6 +1089,30 @@  static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
 {
 	return NULL;
 }
+
+static inline int iommu_device_use_dma_api(struct device *dev)
+{
+	return 0;
+}
+
+static inline void iommu_device_unuse_dma_api(struct device *dev)
+{
+}
+
+static inline int
+iommu_group_set_dma_owner(struct iommu_group *group, void *owner)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_group_release_dma_owner(struct iommu_group *group)
+{
+}
+
+static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
+{
+	return false;
+}
 #endif /* CONFIG_IOMMU_API */
 
 /**
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8b86406b7162..ff0c8c1ad5af 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -48,6 +48,8 @@  struct iommu_group {
 	struct iommu_domain *default_domain;
 	struct iommu_domain *domain;
 	struct list_head entry;
+	unsigned int owner_cnt;
+	void *owner;
 };
 
 struct group_device {
@@ -289,7 +291,12 @@  int iommu_probe_device(struct device *dev)
 	mutex_lock(&group->mutex);
 	iommu_alloc_default_domain(group, dev);
 
-	if (group->default_domain) {
+	/*
+	 * If device joined an existing group which has been claimed
+	 * for none kernel DMA purpose, avoid attaching the default
+	 * domain.
+	 */
+	if (group->default_domain && !group->owner) {
 		ret = __iommu_attach_device(group->default_domain, dev);
 		if (ret) {
 			mutex_unlock(&group->mutex);
@@ -2320,7 +2327,7 @@  static int __iommu_attach_group(struct iommu_domain *domain,
 {
 	int ret;
 
-	if (group->default_domain && group->domain != group->default_domain)
+	if (group->domain && group->domain != group->default_domain)
 		return -EBUSY;
 
 	ret = __iommu_group_for_each_dev(group, domain,
@@ -2357,7 +2364,11 @@  static void __iommu_detach_group(struct iommu_domain *domain,
 {
 	int ret;
 
-	if (!group->default_domain) {
+	/*
+	 * If group has been claimed for none kernel DMA purpose, avoid
+	 * re-attaching the default domain.
+	 */
+	if (!group->default_domain || group->owner) {
 		__iommu_group_for_each_dev(group, domain,
 					   iommu_group_do_detach_device);
 		group->domain = NULL;
@@ -3351,3 +3362,147 @@  static ssize_t iommu_group_store_type(struct iommu_group *group,
 
 	return ret;
 }
+
+/**
+ * iommu_device_use_dma_api() - Device driver wants to do DMA through
+ *                              kernel DMA API.
+ * @dev: The device.
+ *
+ * The device driver about to bind @dev wants to do DMA through the kernel
+ * DMA API. Return 0 if it is allowed, otherwise an error.
+ */
+int iommu_device_use_dma_api(struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get(dev);
+	int ret = 0;
+
+	if (!group)
+		return 0;
+
+	mutex_lock(&group->mutex);
+	if (group->owner_cnt) {
+		if (group->domain != group->default_domain ||
+		    group->owner) {
+			ret = -EBUSY;
+			goto unlock_out;
+		}
+	}
+
+	group->owner_cnt++;
+
+unlock_out:
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_device_use_dma_api);
+
+/**
+ * iommu_device_unuse_dma_api() - Device driver doesn't want to do DMA
+ *                                through kernel DMA API anymore.
+ * @dev: The device.
+ *
+ * The device driver doesn't want to do DMA through kernel DMA API anymore.
+ * It must be called after iommu_device_use_dma_api().
+ */
+void iommu_device_unuse_dma_api(struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get(dev);
+
+	if (!group)
+		return;
+
+	mutex_lock(&group->mutex);
+	if (!WARN_ON(!group->owner_cnt))
+		group->owner_cnt--;
+
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(iommu_device_unuse_dma_api);
+
+/**
+ * iommu_group_set_dma_owner() - Set DMA ownership of a group
+ * @group: The group.
+ * @owner: Caller specified pointer. Used for exclusive ownership.
+ *
+ * This is to support backward compatibility for vfio which manages
+ * the dma ownership in iommu_group level. New invocations on this
+ * interface should be prohibited.
+ */
+int iommu_group_set_dma_owner(struct iommu_group *group, void *owner)
+{
+	int ret = 0;
+
+	mutex_lock(&group->mutex);
+	if (group->owner_cnt) {
+		if (group->owner != owner) {
+			ret = -EPERM;
+			goto unlock_out;
+		}
+	} else {
+		if (group->domain && group->domain != group->default_domain) {
+			ret = -EBUSY;
+			goto unlock_out;
+		}
+
+		group->owner = owner;
+		if (group->domain)
+			__iommu_detach_group(group->domain, group);
+	}
+
+	group->owner_cnt++;
+unlock_out:
+	mutex_unlock(&group->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_set_dma_owner);
+
+/**
+ * iommu_group_release_dma_owner() - Release DMA ownership of a group
+ * @group: The group.
+ *
+ * Release the DMA ownership claimed by iommu_group_set_dma_owner().
+ */
+void iommu_group_release_dma_owner(struct iommu_group *group)
+{
+	mutex_lock(&group->mutex);
+	if (WARN_ON(!group->owner_cnt || !group->owner))
+		goto unlock_out;
+
+	if (--group->owner_cnt > 0)
+		goto unlock_out;
+
+	/*
+	 * The UNMANAGED domain should be detached before all USER
+	 * owners have been released.
+	 */
+	if (!WARN_ON(group->domain) && group->default_domain)
+		__iommu_attach_group(group->default_domain, group);
+	group->owner = NULL;
+
+unlock_out:
+	mutex_unlock(&group->mutex);
+}
+EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
+
+/**
+ * iommu_group_dma_owner_claimed() - Query group dma ownership status
+ * @group: The group.
+ *
+ * This provides status query on a given group. It is racey and only for
+ * non-binding status reporting.
+ */
+bool iommu_group_dma_owner_claimed(struct iommu_group *group)
+{
+	unsigned int user;
+
+	mutex_lock(&group->mutex);
+	user = group->owner_cnt;
+	mutex_unlock(&group->mutex);
+
+	return user;
+}
+EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);