diff mbox series

[v5,07/14] PCI: Add driver dma ownership management

Message ID 20220104015644.2294354-8-baolu.lu@linux.intel.com
State New
Headers show
Series Fix BUG_ON in vfio_iommu_group_notifier() | expand

Commit Message

Baolu Lu Jan. 4, 2022, 1:56 a.m. UTC
Multiple PCI devices may be placed in the same IOMMU group because
they cannot be isolated from each other. These devices must either be
entirely under kernel control or userspace control, never a mixture. This
checks and sets DMA ownership during driver binding, and release the
ownership during driver unbinding.

The device driver may set a new flag (no_kernel_api_dma) to skip calling
iommu_device_use_dma_api() during the binding process. For instance, the
userspace framework drivers (vfio etc.) which need to manually claim
their own dma ownership when assigning the device to userspace.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 include/linux/pci.h      |  5 +++++
 drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+)

Comments

gregkh@linuxfoundation.org Feb. 14, 2022, 10:03 a.m. UTC | #1
On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> Multiple PCI devices may be placed in the same IOMMU group because
> they cannot be isolated from each other. These devices must either be
> entirely under kernel control or userspace control, never a mixture. This
> checks and sets DMA ownership during driver binding, and release the
> ownership during driver unbinding.
> 
> The device driver may set a new flag (no_kernel_api_dma) to skip calling
> iommu_device_use_dma_api() during the binding process. For instance, the
> userspace framework drivers (vfio etc.) which need to manually claim
> their own dma ownership when assigning the device to userspace.
> 
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  include/linux/pci.h      |  5 +++++
>  drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
>  2 files changed, 26 insertions(+)
> 
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 18a75c8e615c..d29a990e3f02 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -882,6 +882,10 @@ struct module;
>   *              created once it is bound to the driver.
>   * @driver:	Driver model structure.
>   * @dynids:	List of dynamically added device IDs.
> + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> + *		Drivers which don't require DMA or want to manually claim the
> + *		owner type (e.g. userspace driver frameworks) could set this
> + *		flag.

Again with the bikeshedding, but this name is a bit odd.  Of course it's
in the kernel, this is all kernel code, so you can drop that.  And
again, "negative" flags are rough.  So maybe just "prevent_dma"?

thanks,

greg k-h
Jason Gunthorpe Feb. 14, 2022, 12:38 p.m. UTC | #2
On Mon, Feb 14, 2022 at 11:03:42AM +0100, Greg Kroah-Hartman wrote:
> On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> > Multiple PCI devices may be placed in the same IOMMU group because
> > they cannot be isolated from each other. These devices must either be
> > entirely under kernel control or userspace control, never a mixture. This
> > checks and sets DMA ownership during driver binding, and release the
> > ownership during driver unbinding.
> > 
> > The device driver may set a new flag (no_kernel_api_dma) to skip calling
> > iommu_device_use_dma_api() during the binding process. For instance, the
> > userspace framework drivers (vfio etc.) which need to manually claim
> > their own dma ownership when assigning the device to userspace.
> > 
> > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> >  include/linux/pci.h      |  5 +++++
> >  drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
> >  2 files changed, 26 insertions(+)
> > 
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 18a75c8e615c..d29a990e3f02 100644
> > +++ b/include/linux/pci.h
> > @@ -882,6 +882,10 @@ struct module;
> >   *              created once it is bound to the driver.
> >   * @driver:	Driver model structure.
> >   * @dynids:	List of dynamically added device IDs.
> > + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> > + *		Drivers which don't require DMA or want to manually claim the
> > + *		owner type (e.g. userspace driver frameworks) could set this
> > + *		flag.
> 
> Again with the bikeshedding, but this name is a bit odd.  Of course it's
> in the kernel, this is all kernel code, so you can drop that.  And
> again, "negative" flags are rough.  So maybe just "prevent_dma"?

That is misleading too, it is not that DMA is prevented, but that the
kernel's dma_api has not been setup.

Though I agree the name as-is isn't great, I think the comment is good.

Jason
gregkh@linuxfoundation.org Feb. 14, 2022, 12:51 p.m. UTC | #3
On Mon, Feb 14, 2022 at 08:38:42AM -0400, Jason Gunthorpe wrote:
> On Mon, Feb 14, 2022 at 11:03:42AM +0100, Greg Kroah-Hartman wrote:
> > On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> > > Multiple PCI devices may be placed in the same IOMMU group because
> > > they cannot be isolated from each other. These devices must either be
> > > entirely under kernel control or userspace control, never a mixture. This
> > > checks and sets DMA ownership during driver binding, and release the
> > > ownership during driver unbinding.
> > > 
> > > The device driver may set a new flag (no_kernel_api_dma) to skip calling
> > > iommu_device_use_dma_api() during the binding process. For instance, the
> > > userspace framework drivers (vfio etc.) which need to manually claim
> > > their own dma ownership when assigning the device to userspace.
> > > 
> > > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > >  include/linux/pci.h      |  5 +++++
> > >  drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
> > >  2 files changed, 26 insertions(+)
> > > 
> > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > index 18a75c8e615c..d29a990e3f02 100644
> > > +++ b/include/linux/pci.h
> > > @@ -882,6 +882,10 @@ struct module;
> > >   *              created once it is bound to the driver.
> > >   * @driver:	Driver model structure.
> > >   * @dynids:	List of dynamically added device IDs.
> > > + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> > > + *		Drivers which don't require DMA or want to manually claim the
> > > + *		owner type (e.g. userspace driver frameworks) could set this
> > > + *		flag.
> > 
> > Again with the bikeshedding, but this name is a bit odd.  Of course it's
> > in the kernel, this is all kernel code, so you can drop that.  And
> > again, "negative" flags are rough.  So maybe just "prevent_dma"?
> 
> That is misleading too, it is not that DMA is prevented, but that the
> kernel's dma_api has not been setup.

"has not been" or "will not be"?

What you want to prevent is the iommu core claiming the device
automatically, right?  So how about "prevent_iommu_dma"?

naming is hard,

greg k-h
Jason Gunthorpe Feb. 14, 2022, 1:11 p.m. UTC | #4
On Mon, Feb 14, 2022 at 01:51:06PM +0100, Greg Kroah-Hartman wrote:
> On Mon, Feb 14, 2022 at 08:38:42AM -0400, Jason Gunthorpe wrote:
> > On Mon, Feb 14, 2022 at 11:03:42AM +0100, Greg Kroah-Hartman wrote:
> > > On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> > > > Multiple PCI devices may be placed in the same IOMMU group because
> > > > they cannot be isolated from each other. These devices must either be
> > > > entirely under kernel control or userspace control, never a mixture. This
> > > > checks and sets DMA ownership during driver binding, and release the
> > > > ownership during driver unbinding.
> > > > 
> > > > The device driver may set a new flag (no_kernel_api_dma) to skip calling
> > > > iommu_device_use_dma_api() during the binding process. For instance, the
> > > > userspace framework drivers (vfio etc.) which need to manually claim
> > > > their own dma ownership when assigning the device to userspace.
> > > > 
> > > > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > > >  include/linux/pci.h      |  5 +++++
> > > >  drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
> > > >  2 files changed, 26 insertions(+)
> > > > 
> > > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > > index 18a75c8e615c..d29a990e3f02 100644
> > > > +++ b/include/linux/pci.h
> > > > @@ -882,6 +882,10 @@ struct module;
> > > >   *              created once it is bound to the driver.
> > > >   * @driver:	Driver model structure.
> > > >   * @dynids:	List of dynamically added device IDs.
> > > > + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> > > > + *		Drivers which don't require DMA or want to manually claim the
> > > > + *		owner type (e.g. userspace driver frameworks) could set this
> > > > + *		flag.
> > > 
> > > Again with the bikeshedding, but this name is a bit odd.  Of course it's
> > > in the kernel, this is all kernel code, so you can drop that.  And
> > > again, "negative" flags are rough.  So maybe just "prevent_dma"?
> > 
> > That is misleading too, it is not that DMA is prevented, but that the
> > kernel's dma_api has not been setup.
> 
> "has not been" or "will not be"?

"has not been" as that action was supposed to happen before probe(),
but the flag skips it.

A driver that sets this flag can still decide to enable the dma API on
its own. eg tegra drivers do this.

> What you want to prevent is the iommu core claiming the device
> automatically, right?  So how about "prevent_iommu_dma"?

"claim" is not a good description. iommu always "claims" the device -
eg sets a domain, sets the dev and bus parameters, etc.

This really is only about setting up the in-kernel dma api, eg
allowing dma_map_sg()/etc to work.

dma api is just one way to operate the iommu, there are others too.

Think of this flag as 
  false = the driver is going to use the dma api (most common)
  true = the driver will decide how to use the iommu by itself

Does it help think of a clearer name?

Jason
gregkh@linuxfoundation.org Feb. 14, 2022, 1:39 p.m. UTC | #5
On Mon, Feb 14, 2022 at 09:11:17AM -0400, Jason Gunthorpe wrote:
> On Mon, Feb 14, 2022 at 01:51:06PM +0100, Greg Kroah-Hartman wrote:
> > On Mon, Feb 14, 2022 at 08:38:42AM -0400, Jason Gunthorpe wrote:
> > > On Mon, Feb 14, 2022 at 11:03:42AM +0100, Greg Kroah-Hartman wrote:
> > > > On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> > > > > Multiple PCI devices may be placed in the same IOMMU group because
> > > > > they cannot be isolated from each other. These devices must either be
> > > > > entirely under kernel control or userspace control, never a mixture. This
> > > > > checks and sets DMA ownership during driver binding, and release the
> > > > > ownership during driver unbinding.
> > > > > 
> > > > > The device driver may set a new flag (no_kernel_api_dma) to skip calling
> > > > > iommu_device_use_dma_api() during the binding process. For instance, the
> > > > > userspace framework drivers (vfio etc.) which need to manually claim
> > > > > their own dma ownership when assigning the device to userspace.
> > > > > 
> > > > > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> > > > >  include/linux/pci.h      |  5 +++++
> > > > >  drivers/pci/pci-driver.c | 21 +++++++++++++++++++++
> > > > >  2 files changed, 26 insertions(+)
> > > > > 
> > > > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > > > index 18a75c8e615c..d29a990e3f02 100644
> > > > > +++ b/include/linux/pci.h
> > > > > @@ -882,6 +882,10 @@ struct module;
> > > > >   *              created once it is bound to the driver.
> > > > >   * @driver:	Driver model structure.
> > > > >   * @dynids:	List of dynamically added device IDs.
> > > > > + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> > > > > + *		Drivers which don't require DMA or want to manually claim the
> > > > > + *		owner type (e.g. userspace driver frameworks) could set this
> > > > > + *		flag.
> > > > 
> > > > Again with the bikeshedding, but this name is a bit odd.  Of course it's
> > > > in the kernel, this is all kernel code, so you can drop that.  And
> > > > again, "negative" flags are rough.  So maybe just "prevent_dma"?
> > > 
> > > That is misleading too, it is not that DMA is prevented, but that the
> > > kernel's dma_api has not been setup.
> > 
> > "has not been" or "will not be"?
> 
> "has not been" as that action was supposed to happen before probe(),
> but the flag skips it.
> 
> A driver that sets this flag can still decide to enable the dma API on
> its own. eg tegra drivers do this.

So you are just forcing the driver to manage this all on their own, so
how about, "driver_managed_dma", or even shorter "managed_dma"?

> > What you want to prevent is the iommu core claiming the device
> > automatically, right?  So how about "prevent_iommu_dma"?
> 
> "claim" is not a good description. iommu always "claims" the device -
> eg sets a domain, sets the dev and bus parameters, etc.
> 
> This really is only about setting up the in-kernel dma api, eg
> allowing dma_map_sg()/etc to work.
> 
> dma api is just one way to operate the iommu, there are others too.
> 
> Think of this flag as 
>   false = the driver is going to use the dma api (most common)
>   true = the driver will decide how to use the iommu by itself
> 
> Does it help think of a clearer name?

See above, you want a driver author to know instantly what this is and
not have to look anything up.
"I_will_manage_the_dma_myself_as_I_really_know_what_I_am_doing" might be
good, but a bit too long :)

thanks,

greg k-h
Jason Gunthorpe Feb. 14, 2022, 1:43 p.m. UTC | #6
On Mon, Feb 14, 2022 at 02:39:18PM +0100, Greg Kroah-Hartman wrote:

> > A driver that sets this flag can still decide to enable the dma API on
> > its own. eg tegra drivers do this.
> 
> So you are just forcing the driver to manage this all on their own, so
> how about, "driver_managed_dma", or even shorter "managed_dma"?

Yeah, I like "driver_managed_dma" alot, it captures the entire idea

Thanks,
Jason
Baolu Lu Feb. 15, 2022, 3:06 a.m. UTC | #7
On 2/14/22 9:43 PM, Jason Gunthorpe wrote:
> On Mon, Feb 14, 2022 at 02:39:18PM +0100, Greg Kroah-Hartman wrote:
> 
>>> A driver that sets this flag can still decide to enable the dma API on
>>> its own. eg tegra drivers do this.
>>
>> So you are just forcing the driver to manage this all on their own, so
>> how about, "driver_managed_dma", or even shorter "managed_dma"?
> 
> Yeah, I like "driver_managed_dma" alot, it captures the entire idea

This makes a lot of sense. For most drivers, they don't need to care
about this flag as all DMAs are handled through the kernel DMA API. For
VFIO or similar drivers, they know how to manage the DMA themselves and
set this flag so that the IOMMU layer will allow them to setup and
manage their own I/O address space.

If there is no better naming, I'd like to use this and add some comments
for device drivers developers.

Thanks a lot to Greg and Jason.

Best regards,
baolu
Bjorn Helgaas Feb. 23, 2022, 6 p.m. UTC | #8
In subject,

s/dma/DMA/ to match the other patches

On Tue, Jan 04, 2022 at 09:56:37AM +0800, Lu Baolu wrote:
> Multiple PCI devices may be placed in the same IOMMU group because
> they cannot be isolated from each other. These devices must either be
> entirely under kernel control or userspace control, never a mixture. This
> checks and sets DMA ownership during driver binding, and release the
> ownership during driver unbinding.
> 
> The device driver may set a new flag (no_kernel_api_dma) to skip calling
> iommu_device_use_dma_api() during the binding process. For instance, the
> userspace framework drivers (vfio etc.) which need to manually claim
> their own dma ownership when assigning the device to userspace.

s/vfio/VFIO/ when used as an acronym (occurs in several patches)

> + * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
> + *		Drivers which don't require DMA or want to manually claim the
> + *		owner type (e.g. userspace driver frameworks) could set this
> + *		flag.

s/Drivers which/Drivers that/

>  static int pci_dma_configure(struct device *dev)
>  {
> +	struct pci_driver *driver = to_pci_driver(dev->driver);
>  	struct device *bridge;
>  	int ret = 0;
>  
> +	if (!driver->no_kernel_api_dma) {

Ugh.  Double negative, totally agree this needs a better name that
reverses the sense.  Every place you use it, you negate it again.

> +	if (ret && !driver->no_kernel_api_dma)
> +		iommu_device_unuse_dma_api(dev);

> +static void pci_dma_cleanup(struct device *dev)
> +{
> +	struct pci_driver *driver = to_pci_driver(dev->driver);
> +
> +	if (!driver->no_kernel_api_dma)
> +		iommu_device_unuse_dma_api(dev);
Jason Gunthorpe Feb. 23, 2022, 6:07 p.m. UTC | #9
On Wed, Feb 23, 2022 at 12:00:56PM -0600, Bjorn Helgaas wrote:

> >  static int pci_dma_configure(struct device *dev)
> >  {
> > +	struct pci_driver *driver = to_pci_driver(dev->driver);
> >  	struct device *bridge;
> >  	int ret = 0;
> >  
> > +	if (!driver->no_kernel_api_dma) {
> 
> Ugh.  Double negative, totally agree this needs a better name that
> reverses the sense.  Every place you use it, you negate it again.

Greg came up with driver_managed_dma which is in the v6 version:

https://lore.kernel.org/all/20220218005521.172832-5-baolu.lu@linux.intel.com/

Thanks,
Jason
diff mbox series

Patch

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 18a75c8e615c..d29a990e3f02 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -882,6 +882,10 @@  struct module;
  *              created once it is bound to the driver.
  * @driver:	Driver model structure.
  * @dynids:	List of dynamically added device IDs.
+ * @no_kernel_api_dma: Device driver doesn't use kernel DMA API for DMA.
+ *		Drivers which don't require DMA or want to manually claim the
+ *		owner type (e.g. userspace driver frameworks) could set this
+ *		flag.
  */
 struct pci_driver {
 	struct list_head	node;
@@ -900,6 +904,7 @@  struct pci_driver {
 	const struct attribute_group **dev_groups;
 	struct device_driver	driver;
 	struct pci_dynids	dynids;
+	bool no_kernel_api_dma;
 };
 
 static inline struct pci_driver *to_pci_driver(struct device_driver *drv)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 588588cfda48..4e003ea12718 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -20,6 +20,7 @@ 
 #include <linux/of_device.h>
 #include <linux/acpi.h>
 #include <linux/dma-map-ops.h>
+#include <linux/iommu.h>
 #include "pci.h"
 #include "pcie/portdrv.h"
 
@@ -1590,9 +1591,16 @@  static int pci_bus_num_vf(struct device *dev)
  */
 static int pci_dma_configure(struct device *dev)
 {
+	struct pci_driver *driver = to_pci_driver(dev->driver);
 	struct device *bridge;
 	int ret = 0;
 
+	if (!driver->no_kernel_api_dma) {
+		ret = iommu_device_use_dma_api(dev);
+		if (ret)
+			return ret;
+	}
+
 	bridge = pci_get_host_bridge_device(to_pci_dev(dev));
 
 	if (IS_ENABLED(CONFIG_OF) && bridge->parent &&
@@ -1605,9 +1613,21 @@  static int pci_dma_configure(struct device *dev)
 	}
 
 	pci_put_host_bridge_device(bridge);
+
+	if (ret && !driver->no_kernel_api_dma)
+		iommu_device_unuse_dma_api(dev);
+
 	return ret;
 }
 
+static void pci_dma_cleanup(struct device *dev)
+{
+	struct pci_driver *driver = to_pci_driver(dev->driver);
+
+	if (!driver->no_kernel_api_dma)
+		iommu_device_unuse_dma_api(dev);
+}
+
 struct bus_type pci_bus_type = {
 	.name		= "pci",
 	.match		= pci_bus_match,
@@ -1621,6 +1641,7 @@  struct bus_type pci_bus_type = {
 	.pm		= PCI_PM_OPS_PTR,
 	.num_vf		= pci_bus_num_vf,
 	.dma_configure	= pci_dma_configure,
+	.dma_cleanup	= pci_dma_cleanup,
 };
 EXPORT_SYMBOL(pci_bus_type);