diff mbox series

[v5,3/3] PCI: Introduce the disable_acs_redir parameter

Message ID 20180627174650.27433-4-logang@deltatee.com
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series Add parameter for disabling ACS redirection for P2P | expand

Commit Message

Logan Gunthorpe June 27, 2018, 5:46 p.m. UTC
In order to support P2P traffic on a segment of the PCI hierarchy,
we must be able to disable the ACS redirect bits for select
PCI bridges. The bridges must be selected before the devices are
discovered by the kernel and the IOMMU groups created. Therefore,
a kernel command line parameter is created to specify devices
which must have their ACS bits disabled.

The new parameter takes a list of devices separated by a semicolon.
Each device specified will have it's ACS redirect bits disabled.
This is similar to the existing 'resource_alignment' parameter.

The ACS Request P2P Request Redirect, P2P Completion Redirect and P2P
Egress Control bits are disabled which is sufficient to always allow
passing P2P traffic uninterrupted. The bits are set after the kernel
(optionally) enables the ACS bits itself. It is also done regardless of
whether the kernel sets the bits or not seeing some BIOS firmware is known
to set the bits on boot.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Stephen Bates <sbates@raithlin.com>
Acked-by: Christian König <christian.koenig@amd.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 ++++
 drivers/pci/pci.c                               | 71 ++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 2 deletions(-)

Comments

Alex Williamson July 6, 2018, 10:56 p.m. UTC | #1
On Wed, 27 Jun 2018 11:46:50 -0600
Logan Gunthorpe <logang@deltatee.com> wrote:

> In order to support P2P traffic on a segment of the PCI hierarchy,
> we must be able to disable the ACS redirect bits for select
> PCI bridges. The bridges must be selected before the devices are
> discovered by the kernel and the IOMMU groups created. Therefore,
> a kernel command line parameter is created to specify devices
> which must have their ACS bits disabled.
> 
> The new parameter takes a list of devices separated by a semicolon.
> Each device specified will have it's ACS redirect bits disabled.
> This is similar to the existing 'resource_alignment' parameter.
> 
> The ACS Request P2P Request Redirect, P2P Completion Redirect and P2P
> Egress Control bits are disabled which is sufficient to always allow
> passing P2P traffic uninterrupted. The bits are set after the kernel
> (optionally) enables the ACS bits itself. It is also done regardless of
> whether the kernel sets the bits or not seeing some BIOS firmware is known
> to set the bits on boot.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Reviewed-by: Stephen Bates <sbates@raithlin.com>
> Acked-by: Christian König <christian.koenig@amd.com>
> ---
>  Documentation/admin-guide/kernel-parameters.txt |  9 ++++
>  drivers/pci/pci.c                               | 71 ++++++++++++++++++++++++-
>  2 files changed, 78 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index a69947d9e14e..02634c4ab181 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3192,6 +3192,15 @@
>  				Adding the window is slightly risky (it may
>  				conflict with unreported devices), so this
>  				taints the kernel.
> +		disable_acs_redir=<pci_dev>[; ...]
> +				Specify one or more PCI devices (in the format
> +				specified above) separated by semicolons.
> +				Each device specified will have the PCI ACS
> +				redirect capabilities forced off which will
> +				allow P2P traffic between devices through
> +				bridges without forcing it upstream. Note:
> +				this removes isolation between devices and
> +				will make the IOMMU groups less granular.
>  
>  	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
>  			Management.
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 59638075b4df..079f7c911e09 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -2983,6 +2983,61 @@ void pci_request_acs(void)
>  	pci_acs_enable = 1;
>  }
>  
> +static const char *disable_acs_redir_param;
> +
> +/**
> + * pci_disable_acs_redir - disable ACS redirect capabilities
> + * @dev: the PCI device
> + *
> + * For only devices specified in the disable_acs_redir parameter.
> + */
> +static void pci_disable_acs_redir(struct pci_dev *dev)
> +{
> +	int ret = 0;
> +	const char *p;
> +	int pos;
> +	u16 ctrl;
> +
> +	if (!disable_acs_redir_param)
> +		return;
> +
> +	p = disable_acs_redir_param;
> +	while (*p) {
> +		ret = pci_dev_str_match(dev, p, &p);
> +		if (ret < 0) {
> +			pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n",
> +				     disable_acs_redir_param);
> +
> +			break;
> +		} else if (ret == 1) {
> +			/* Found a match */
> +			break;
> +		}
> +
> +		if (*p != ';' && *p != ',') {
> +			/* End of param or invalid format */
> +			break;
> +		}
> +		p++;
> +	}
> +
> +	if (ret != 1)
> +		return;
> +
> +	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
> +	if (!pos)
> +		return;
> +
> +	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
> +
> +	/* P2P Request & Completion Redirect */
> +	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
> +
> +	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
> +
> +	pci_info(dev, "disabled ACS redirect\n");
> +}
> +
>  /**
>   * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites
>   * @dev: the PCI device
> @@ -3022,12 +3077,22 @@ static void pci_std_enable_acs(struct pci_dev *dev)
>  void pci_enable_acs(struct pci_dev *dev)
>  {
>  	if (!pci_acs_enable)
> -		return;
> +		goto disable_acs_redir;
>  
>  	if (!pci_dev_specific_enable_acs(dev))
> -		return;
> +		goto disable_acs_redir;
>  
>  	pci_std_enable_acs(dev);
> +
> +disable_acs_redir:
> +	/*
> +	 * Note: pci_disable_acs_redir() must be called even if
> +	 * ACS is not enabled by the kernel because the firmware
> +	 * may have unexpectedly set the flags. So if we are told
> +	 * to disable it, we should always disable it after setting
> +	 * the kernel's default preferences.
> +	 */
> +	pci_disable_acs_redir(dev);
>  }
>  
>  static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
> @@ -5967,6 +6032,8 @@ static int __init pci_setup(char *str)
>  				pcie_bus_config = PCIE_BUS_PEER2PEER;
>  			} else if (!strncmp(str, "pcie_scan_all", 13)) {
>  				pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
> +			} else if (!strncmp(str, "disable_acs_redir=", 18)) {
> +				disable_acs_redir_param = str + 18;
>  			} else {
>  				printk(KERN_ERR "PCI: Unknown option `%s'\n",
>  						str);

If we lived in a perfect world where vendors actually implemented ACS
correctly, this looks fine, I approve.  But take a look at the quirks
we call out to via pci_dev_specific_enable_acs().  Both of these are
for Intel root ports, in one case we're twiddling ACS comparable
features in the device, in the other Intel has decided specs don't
matter and used dwords rather than words for the ACS capability and
control registers.  Is disabling redirect via device specific means,
then attempting to re-enable via standard mechanisms perhaps not the
right approach here?  Maybe if we determine a device is slated to
enable p2p we shouldn't call the device specific enabler.  The
non-standard ACS implementation on Intel root ports is still
troublesome though, I wonder if we need a more generic approach for
those...

Maybe we track if we enabled ACS via a device specific quirk and
minimally print an incompatibility error if it's also specified for
disable_acs_redir?  Thanks,

Alex
Logan Gunthorpe July 9, 2018, 4:59 p.m. UTC | #2
On 7/6/2018 4:56 PM, Alex Williamson wrote:
> Maybe we track if we enabled ACS via a device specific quirk and
> minimally print an incompatibility error if it's also specified for
> disable_acs_redir?  Thanks,

Ok, makes sense. I'll look at it for v6.

Logan
Logan Gunthorpe July 9, 2018, 10:27 p.m. UTC | #3
Hey Alex,

On 06/07/18 04:56 PM, Alex Williamson wrote:
> Maybe we track if we enabled ACS via a device specific quirk and
> minimally print an incompatibility error if it's also specified for
> disable_acs_redir?  Thanks,

Ok, I dug into this a bit and I think tracking if we enabled via a
device specific quirk does not work. If 'pci_acs_enable' is not set, we
wouldn't know if we used a device specific quirk and still try to
disable a quirked device the standard way.

Instead, I've looked at adding a device specific disable_acs_redir
function next to enable_acs. In this way, the device specific quirks can
override the operation. See the diff below.

Implementing the Intel SPT PCH version is pretty trivial, so I've done
that. The Intel PCH variant I've just left as unsupported and printed a
warning.

If this sounds good to you, I'll fold it into my patchset and resubmit a v6.

Thanks,

Logan

--

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 079f7c911e09..54001b307496 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3028,6 +3028,9 @@ static void pci_disable_acs_redir(struct pci_dev *dev)
 	if (!pos)
 		return;

+	if (!pci_dev_specific_disable_acs_redir(dev))
+		return;
+
 	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);

 	/* P2P Request & Completion Redirect */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f439de848658..c976a025ae92 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4526,6 +4526,16 @@ static int pci_quirk_enable_intel_pch_acs(struct
pci_dev *dev)
 	return 0;
 }

+static int pci_quirk_disable_intel_pch_acs_redir(struct pci_dev *dev)
+{
+	if (!pci_quirk_intel_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pci_warn(dev, "Disabling ACS redirect on the Intel PCH root port is
not supported\n");
+
+	return 0;
+}
+
 static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
 {
 	int pos;
@@ -4553,22 +4563,53 @@ static int
pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
 	return 0;
 }

-static const struct pci_dev_enable_acs {
+static int pci_quirk_disable_intel_spt_pch_acs_redir(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap, ctrl;
+
+	if (!pci_quirk_intel_spt_pch_acs_match(dev))
+		return -ENOTTY;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return -ENOTTY;
+
+	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
+	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
+
+	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
+
+	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
+
+	pci_info(dev, "Intel SPT PCH root port workaround: disabled ACS
redirect\n");
+
+	return 0;
+}
+
+static const struct pci_dev_acs_ops {
 	u16 vendor;
 	u16 device;
 	int (*enable_acs)(struct pci_dev *dev);
-} pci_dev_enable_acs[] = {
-	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_pch_acs },
-	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_spt_pch_acs },
+	int (*disable_acs_redir)(struct pci_dev *dev);
+} pci_dev_acs_ops[] = {
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+	  .enable_acs = pci_quirk_enable_intel_pch_acs,
+	  .disable_acs_redir = pci_quirk_disable_intel_pch_acs_redir,
+	},
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+	  .enable_acs = pci_quirk_enable_intel_spt_pch_acs,
+	  .disable_acs_redir = pci_quirk_disable_intel_spt_pch_acs_redir
+	},
 	{ 0 }
 };

 int pci_dev_specific_enable_acs(struct pci_dev *dev)
 {
-	const struct pci_dev_enable_acs *i;
+	const struct pci_dev_acs_ops *i;
 	int ret;

-	for (i = pci_dev_enable_acs; i->enable_acs; i++) {
+	for (i = pci_dev_acs_ops; i->enable_acs; i++) {
 		if ((i->vendor == dev->vendor ||
 		     i->vendor == (u16)PCI_ANY_ID) &&
 		    (i->device == dev->device ||
@@ -4582,6 +4623,25 @@ int pci_dev_specific_enable_acs(struct pci_dev *dev)
 	return -ENOTTY;
 }

+int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
+{
+	const struct pci_dev_acs_ops *i;
+	int ret;
+
+	for (i = pci_dev_acs_ops; i->enable_acs; i++) {
+		if ((i->vendor == dev->vendor ||
+		     i->vendor == (u16)PCI_ANY_ID) &&
+		    (i->device == dev->device ||
+		     i->device == (u16)PCI_ANY_ID)) {
+			ret = i->disable_acs_redir(dev);
+			if (ret >= 0)
+				return ret;
+		}
+	}
+
+	return -ENOTTY;
+}
+
 /*
  * The PCI capabilities list for Intel DH895xCC VFs (device ID 0x0443) with
  * QuickAssist Technology (QAT) is prematurely terminated in hardware.  The
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..7ee208aa1a31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1878,6 +1878,7 @@ enum pci_fixup_pass {
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
 int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
 int pci_dev_specific_enable_acs(struct pci_dev *dev);
+int pci_dev_specific_disable_acs_redir(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
Alex Williamson July 10, 2018, 7:19 p.m. UTC | #4
On Mon, 9 Jul 2018 16:27:40 -0600
Logan Gunthorpe <logang@deltatee.com> wrote:

> Hey Alex,
> 
> On 06/07/18 04:56 PM, Alex Williamson wrote:
> > Maybe we track if we enabled ACS via a device specific quirk and
> > minimally print an incompatibility error if it's also specified for
> > disable_acs_redir?  Thanks,  
> 
> Ok, I dug into this a bit and I think tracking if we enabled via a
> device specific quirk does not work. If 'pci_acs_enable' is not set, we
> wouldn't know if we used a device specific quirk and still try to
> disable a quirked device the standard way.
> 
> Instead, I've looked at adding a device specific disable_acs_redir
> function next to enable_acs. In this way, the device specific quirks can
> override the operation. See the diff below.
> 
> Implementing the Intel SPT PCH version is pretty trivial, so I've done
> that. The Intel PCH variant I've just left as unsupported and printed a
> warning.
> 
> If this sounds good to you, I'll fold it into my patchset and resubmit a v6.
> 
> Thanks,
> 
> Logan
> 
> --
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 079f7c911e09..54001b307496 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -3028,6 +3028,9 @@ static void pci_disable_acs_redir(struct pci_dev *dev)
>  	if (!pos)
>  		return;
> 
> +	if (!pci_dev_specific_disable_acs_redir(dev))
> +		return;
> +
>  	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
> 
>  	/* P2P Request & Completion Redirect */
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index f439de848658..c976a025ae92 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -4526,6 +4526,16 @@ static int pci_quirk_enable_intel_pch_acs(struct
> pci_dev *dev)
>  	return 0;
>  }
> 
> +static int pci_quirk_disable_intel_pch_acs_redir(struct pci_dev *dev)
> +{
> +	if (!pci_quirk_intel_pch_acs_match(dev))
> +		return -ENOTTY;
> +
> +	pci_warn(dev, "Disabling ACS redirect on the Intel PCH root port is
> not supported\n");
> +
> +	return 0;
> +}

Note that these devices don't have an ACS capability, so they should
drop out just as any other device without an ACS capability would.
Should pci_disable_acs_redir() perhaps issue the pci_warn() for all
such devices, removing this device specific disable function?

> +
>  static int pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
>  {
>  	int pos;
> @@ -4553,22 +4563,53 @@ static int
> pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev)
>  	return 0;
>  }
> 
> -static const struct pci_dev_enable_acs {
> +static int pci_quirk_disable_intel_spt_pch_acs_redir(struct pci_dev *dev)
> +{
> +	int pos;
> +	u32 cap, ctrl;
> +
> +	if (!pci_quirk_intel_spt_pch_acs_match(dev))
> +		return -ENOTTY;
> +
> +	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
> +	if (!pos)
> +		return -ENOTTY;
> +
> +	pci_read_config_dword(dev, pos + PCI_ACS_CAP, &cap);
> +	pci_read_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, &ctrl);
> +
> +	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
> +
> +	pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl);
> +
> +	pci_info(dev, "Intel SPT PCH root port workaround: disabled ACS
> redirect\n");
> +
> +	return 0;
> +}
> +
> +static const struct pci_dev_acs_ops {
>  	u16 vendor;
>  	u16 device;
>  	int (*enable_acs)(struct pci_dev *dev);
> -} pci_dev_enable_acs[] = {
> -	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_pch_acs },
> -	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_enable_intel_spt_pch_acs },
> +	int (*disable_acs_redir)(struct pci_dev *dev);
> +} pci_dev_acs_ops[] = {
> +	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
> +	  .enable_acs = pci_quirk_enable_intel_pch_acs,
> +	  .disable_acs_redir = pci_quirk_disable_intel_pch_acs_redir,
> +	},
> +	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
> +	  .enable_acs = pci_quirk_enable_intel_spt_pch_acs,
> +	  .disable_acs_redir = pci_quirk_disable_intel_spt_pch_acs_redir
> +	},

Kind of cumbersome, and as above, maybe the reverse path is optional.
I wonder if there's a better callback we should use or if we should not
rely on quirks providing both.

>  	{ 0 }
>  };
> 
>  int pci_dev_specific_enable_acs(struct pci_dev *dev)
>  {
> -	const struct pci_dev_enable_acs *i;
> +	const struct pci_dev_acs_ops *i;
>  	int ret;
> 
> -	for (i = pci_dev_enable_acs; i->enable_acs; i++) {
> +	for (i = pci_dev_acs_ops; i->enable_acs; i++) {

Perhaps this would walk via ARRAY_SIZE if we decide one or the other
callback is optional.

>  		if ((i->vendor == dev->vendor ||
>  		     i->vendor == (u16)PCI_ANY_ID) &&
>  		    (i->device == dev->device ||
> @@ -4582,6 +4623,25 @@ int pci_dev_specific_enable_acs(struct pci_dev *dev)
>  	return -ENOTTY;
>  }
> 
> +int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
> +{
> +	const struct pci_dev_acs_ops *i;
> +	int ret;
> +
> +	for (i = pci_dev_acs_ops; i->enable_acs; i++) {

Test i->disable_acs_redir?

> +		if ((i->vendor == dev->vendor ||
> +		     i->vendor == (u16)PCI_ANY_ID) &&
> +		    (i->device == dev->device ||
> +		     i->device == (u16)PCI_ANY_ID)) {
> +			ret = i->disable_acs_redir(dev);
> +			if (ret >= 0)
> +				return ret;
> +		}
> +	}
> +
> +	return -ENOTTY;
> +}
> +
>  /*
>   * The PCI capabilities list for Intel DH895xCC VFs (device ID 0x0443) with
>   * QuickAssist Technology (QAT) is prematurely terminated in hardware.  The
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 340029b2fb38..7ee208aa1a31 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1878,6 +1878,7 @@ enum pci_fixup_pass {
>  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
>  int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
>  int pci_dev_specific_enable_acs(struct pci_dev *dev);
> +int pci_dev_specific_disable_acs_redir(struct pci_dev *dev);

static inline version for !CONFIG_PCI_QUIRKS?  Thanks,

Alex

>  #else
>  static inline void pci_fixup_device(enum pci_fixup_pass pass,
>  				    struct pci_dev *dev) { }
Logan Gunthorpe July 10, 2018, 7:26 p.m. UTC | #5
On 10/07/18 01:19 PM, Alex Williamson wrote:
> Note that these devices don't have an ACS capability, so they should
> drop out just as any other device without an ACS capability would.
> Should pci_disable_acs_redir() perhaps issue the pci_warn() for all
> such devices, removing this device specific disable function?

Ok, that sounds like a good idea.


> Kind of cumbersome, and as above, maybe the reverse path is optional.
> I wonder if there's a better callback we should use or if we should not
> rely on quirks providing both.

Well, keep in mind enable_acs() and disable_acs_redir() are not inverse
operations. The disable function is only disabling specific ACS bits to
enable redirect -- which are not the same bits being set by the enable
function.

>>  	{ 0 }
>>  };
>>
>>  int pci_dev_specific_enable_acs(struct pci_dev *dev)
>>  {
>> -	const struct pci_dev_enable_acs *i;
>> +	const struct pci_dev_acs_ops *i;
>>  	int ret;
>>
>> -	for (i = pci_dev_enable_acs; i->enable_acs; i++) {
>> +	for (i = pci_dev_acs_ops; i->enable_acs; i++) {
> 
> Perhaps this would walk via ARRAY_SIZE if we decide one or the other
> callback is optional.

> Test i->disable_acs_redir?

Yes, both points make sense if we start saying the operations are optional.


> static inline version for !CONFIG_PCI_QUIRKS?  Thanks,

Oops, yes, I forgot that.

Logan
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a69947d9e14e..02634c4ab181 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3192,6 +3192,15 @@ 
 				Adding the window is slightly risky (it may
 				conflict with unreported devices), so this
 				taints the kernel.
+		disable_acs_redir=<pci_dev>[; ...]
+				Specify one or more PCI devices (in the format
+				specified above) separated by semicolons.
+				Each device specified will have the PCI ACS
+				redirect capabilities forced off which will
+				allow P2P traffic between devices through
+				bridges without forcing it upstream. Note:
+				this removes isolation between devices and
+				will make the IOMMU groups less granular.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 59638075b4df..079f7c911e09 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2983,6 +2983,61 @@  void pci_request_acs(void)
 	pci_acs_enable = 1;
 }
 
+static const char *disable_acs_redir_param;
+
+/**
+ * pci_disable_acs_redir - disable ACS redirect capabilities
+ * @dev: the PCI device
+ *
+ * For only devices specified in the disable_acs_redir parameter.
+ */
+static void pci_disable_acs_redir(struct pci_dev *dev)
+{
+	int ret = 0;
+	const char *p;
+	int pos;
+	u16 ctrl;
+
+	if (!disable_acs_redir_param)
+		return;
+
+	p = disable_acs_redir_param;
+	while (*p) {
+		ret = pci_dev_str_match(dev, p, &p);
+		if (ret < 0) {
+			pr_info_once("PCI: Can't parse disable_acs_redir parameter: %s\n",
+				     disable_acs_redir_param);
+
+			break;
+		} else if (ret == 1) {
+			/* Found a match */
+			break;
+		}
+
+		if (*p != ';' && *p != ',') {
+			/* End of param or invalid format */
+			break;
+		}
+		p++;
+	}
+
+	if (ret != 1)
+		return;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return;
+
+	pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
+
+	/* P2P Request & Completion Redirect */
+	ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC);
+
+	pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
+
+	pci_info(dev, "disabled ACS redirect\n");
+}
+
 /**
  * pci_std_enable_acs - enable ACS on devices using standard ACS capabilites
  * @dev: the PCI device
@@ -3022,12 +3077,22 @@  static void pci_std_enable_acs(struct pci_dev *dev)
 void pci_enable_acs(struct pci_dev *dev)
 {
 	if (!pci_acs_enable)
-		return;
+		goto disable_acs_redir;
 
 	if (!pci_dev_specific_enable_acs(dev))
-		return;
+		goto disable_acs_redir;
 
 	pci_std_enable_acs(dev);
+
+disable_acs_redir:
+	/*
+	 * Note: pci_disable_acs_redir() must be called even if
+	 * ACS is not enabled by the kernel because the firmware
+	 * may have unexpectedly set the flags. So if we are told
+	 * to disable it, we should always disable it after setting
+	 * the kernel's default preferences.
+	 */
+	pci_disable_acs_redir(dev);
 }
 
 static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags)
@@ -5967,6 +6032,8 @@  static int __init pci_setup(char *str)
 				pcie_bus_config = PCIE_BUS_PEER2PEER;
 			} else if (!strncmp(str, "pcie_scan_all", 13)) {
 				pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
+			} else if (!strncmp(str, "disable_acs_redir=", 18)) {
+				disable_acs_redir_param = str + 18;
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);