diff mbox series

[2/3] pci-quirk: Microsemi switch UR workaround

Message ID 20190719093821.14278-2-oohall@gmail.com
State Superseded
Headers show
Series [1/3] pci-quirk: Re-order struct members | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (3a6fdede6ce117facec0108afe716cf5d0472c3f)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot fail Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Oliver O'Halloran July 19, 2019, 9:38 a.m. UTC
Some Microsemi switches have a bug where they send URs if you perform a
config read outside of a PCI Capability register block. Linux allows
users to read the whole of config space and some tools (like lspci) will
do this.

On POWER chips this UR triggers a spurious EEH which causes all manner
of hell. This patch adds a pci-quirk for the relevant switch that blocks
config space read and writes to the switch to prevent the issue.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 core/pci-quirk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

Comments

Michael Neuling July 23, 2019, 7:30 a.m. UTC | #1
On Fri, 2019-07-19 at 19:38 +1000, Oliver O'Halloran wrote:
> Some Microsemi switches have a bug where they send URs if you perform a
> config read outside of a PCI Capability register block. Linux allows
> users to read the whole of config space and some tools (like lspci) will
> do this.
> 
> On POWER chips this UR triggers a spurious EEH which causes all manner
> of hell. This patch adds a pci-quirk for the relevant switch that blocks
> config space read and writes to the switch to prevent the issue.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  core/pci-quirk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/core/pci-quirk.c b/core/pci-quirk.c
> index 0862e8dc4072..371ff62b4b72 100644
> --- a/core/pci-quirk.c
> +++ b/core/pci-quirk.c
> @@ -18,10 +18,67 @@
>  
>  #include <skiboot.h>
>  #include <pci.h>
> +#include <pci-cfg.h>
>  #include <pci-quirk.h>
>  #include <platform.h>
>  #include <ast.h>
>  
> +static int64_t cfg_block_filter(void *dev __unused,
> +				struct pci_cfg_reg_filter *pcrf __unused,
> +				uint32_t offset __unused, uint32_t len,
> +				uint32_t *data, bool write)
> +{
> +	if (write)
> +		return OPAL_SUCCESS;
> +
> +	switch (len) {
> +	case 4:
> +		*data = 0xF0F0F0F0;
> +		return OPAL_SUCCESS;
> +	case 2:
> +		*((uint16_t *)data) = 0xF0F0;
> +		return OPAL_SUCCESS;
> +	case 1:
> +		*((uint8_t *)data) = 0xF0;
> +		return OPAL_SUCCESS;
> +	}
> +
> +	return OPAL_PARAMETER; /* should never happen */
> +}
> +
> +/* blocks config accesses to registers in the range: [start, end] */
> +#define BLOCK_CFG_RANGE(pd, start, end) \
> +	pci_add_cfg_reg_filter(pd, start, end - start + 1, \
> +		PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
> +		cfg_block_filter);
> +
> +static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
> +{
> +	/*
> +	 * The gen4 pcie switch used on some ZZ systems has a bug where it'll
> +	 * throw URs in response to a cfg read to a range that's "reserved"
> +	 * work around it by blackholing.
> +	 */
> +	if (pd->dev_type == PCIE_TYPE_ENDPOINT && pd->class == 0x058000) {
> +		/*
> +		 * we match on the class code too since the switch might
> +		 * support an NTB endpoint.
> +		 */
> +		BLOCK_CFG_RANGE(pd, 0xa0, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x17c, 0xfff);
> +	} else if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
> +		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x284, 0x7f7);
> +	} else if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
> +		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x288, 0x7f7);
> 

Can we add the info from patch 3 about how we discovered these numbers here? 

Other than that, this looks ok.


> +	} else {
> +		return;
> +	}
> +
> +	PCINOTICE(phb, pd->bdfn, "Applied Microsemi Gen4 UR workaround\n");

> +}
> +
>  static void quirk_astbmc_vga(struct phb *phb __unused,
>  			     struct pci_device *pd)
>  {
> @@ -53,6 +110,7 @@ static void quirk_astbmc_vga(struct phb *phb __unused,
>  static const struct pci_quirk quirk_table[] = {
>  	/* ASPEED 2400 VGA device */
>  	{ 0x1a03, 0x2000, &quirk_astbmc_vga },
> +	{ 0x11f8, 0x4052, &quirk_microsemi_gen4_sw },
>  	{ 0, 0, NULL }
>  };
>
Alistair Popple July 24, 2019, 1:55 a.m. UTC | #2
On Friday, 19 July 2019 7:38:20 PM AEST Oliver O'Halloran wrote:
> Some Microsemi switches have a bug where they send URs if you perform a
> config read outside of a PCI Capability register block. Linux allows
> users to read the whole of config space and some tools (like lspci) will
> do this.
> 
> On POWER chips this UR triggers a spurious EEH which causes all manner
> of hell. This patch adds a pci-quirk for the relevant switch that blocks
> config space read and writes to the switch to prevent the issue.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  core/pci-quirk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/core/pci-quirk.c b/core/pci-quirk.c
> index 0862e8dc4072..371ff62b4b72 100644
> --- a/core/pci-quirk.c
> +++ b/core/pci-quirk.c
> @@ -18,10 +18,67 @@
> 
>  #include <skiboot.h>
>  #include <pci.h>
> +#include <pci-cfg.h>
>  #include <pci-quirk.h>
>  #include <platform.h>
>  #include <ast.h>
> 
> +static int64_t cfg_block_filter(void *dev __unused,
> +				struct pci_cfg_reg_filter *pcrf __unused,
> +				uint32_t offset __unused, uint32_t len,
> +				uint32_t *data, bool write)
> +{
> +	if (write)
> +		return OPAL_SUCCESS;
> +
> +	switch (len) {
> +	case 4:
> +		*data = 0xF0F0F0F0;

Why 0xF0 and not 0x00? The PCIe spec seems to imply that unimplemented config 
space registers should read 0. Although if everything followed the spec this 
patch wouldn't exist.

> +		return OPAL_SUCCESS;
> +	case 2:
> +		*((uint16_t *)data) = 0xF0F0;
> +		return OPAL_SUCCESS;
> +	case 1:
> +		*((uint8_t *)data) = 0xF0;
> +		return OPAL_SUCCESS;
> +	}

Couldn't the above could be simplified to:

	*data = 0xF0F0F0F0;
	return OPAL_SUCCESS;

> +	return OPAL_PARAMETER; /* should never happen */
> +}
> +
> +/* blocks config accesses to registers in the range: [start, end] */
> +#define BLOCK_CFG_RANGE(pd, start, end) \
> +	pci_add_cfg_reg_filter(pd, start, end - start + 1, \
> +		PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
> +		cfg_block_filter);
> +
> +static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
> +{
> +	/*
> +	 * The gen4 pcie switch used on some ZZ systems has a bug where it'll
> +	 * throw URs in response to a cfg read to a range that's "reserved"
> +	 * work around it by blackholing.
> +	 */
> +	if (pd->dev_type == PCIE_TYPE_ENDPOINT && pd->class == 0x058000) {
> +		/*
> +		 * we match on the class code too since the switch might
> +		 * support an NTB endpoint.
> +		 */
> +		BLOCK_CFG_RANGE(pd, 0xa0, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x17c, 0xfff);
> +	} else if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
> +		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x284, 0x7f7);
> +	} else if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
> +		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> +		BLOCK_CFG_RANGE(pd, 0x288, 0x7f7);

I assume we don't expect a device with the same VDID to ever have a different 
config space layout? Would it be worth matching on the RevID as well? You would 
hope at least that would get bumped if the config space changed.

- Alistair

> +	} else {
> +		return;
> +	}
> +
> +	PCINOTICE(phb, pd->bdfn, "Applied Microsemi Gen4 UR workaround\n");
> +}
> +
>  static void quirk_astbmc_vga(struct phb *phb __unused,
>  			     struct pci_device *pd)
>  {
> @@ -53,6 +110,7 @@ static void quirk_astbmc_vga(struct phb *phb __unused,
>  static const struct pci_quirk quirk_table[] = {
>  	/* ASPEED 2400 VGA device */
>  	{ 0x1a03, 0x2000, &quirk_astbmc_vga },
> +	{ 0x11f8, 0x4052, &quirk_microsemi_gen4_sw },
>  	{ 0, 0, NULL }
>  };
Oliver O'Halloran July 24, 2019, 2:06 a.m. UTC | #3
On Wed, Jul 24, 2019 at 11:55 AM Alistair Popple <alistair@popple.id.au> wrote:
>
> On Friday, 19 July 2019 7:38:20 PM AEST Oliver O'Halloran wrote:
> > Some Microsemi switches have a bug where they send URs if you perform a
> > config read outside of a PCI Capability register block. Linux allows
> > users to read the whole of config space and some tools (like lspci) will
> > do this.
> >
> > On POWER chips this UR triggers a spurious EEH which causes all manner
> > of hell. This patch adds a pci-quirk for the relevant switch that blocks
> > config space read and writes to the switch to prevent the issue.
> >
> > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> > ---
> >  core/pci-quirk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 58 insertions(+)
> >
> > diff --git a/core/pci-quirk.c b/core/pci-quirk.c
> > index 0862e8dc4072..371ff62b4b72 100644
> > --- a/core/pci-quirk.c
> > +++ b/core/pci-quirk.c
> > @@ -18,10 +18,67 @@
> >
> >  #include <skiboot.h>
> >  #include <pci.h>
> > +#include <pci-cfg.h>
> >  #include <pci-quirk.h>
> >  #include <platform.h>
> >  #include <ast.h>
> >
> > +static int64_t cfg_block_filter(void *dev __unused,
> > +                             struct pci_cfg_reg_filter *pcrf __unused,
> > +                             uint32_t offset __unused, uint32_t len,
> > +                             uint32_t *data, bool write)
> > +{
> > +     if (write)
> > +             return OPAL_SUCCESS;
> > +
> > +     switch (len) {
> > +     case 4:
> > +             *data = 0xF0F0F0F0;
>
> Why 0xF0 and not 0x00? The PCIe spec seems to imply that unimplemented config
> space registers should read 0. Although if everything followed the spec this
> patch wouldn't exist.

No real reason, I just wanted a value that looked obviously invalid.
Making it zero is probably safer.

>
> > +             return OPAL_SUCCESS;
> > +     case 2:
> > +             *((uint16_t *)data) = 0xF0F0;
> > +             return OPAL_SUCCESS;
> > +     case 1:
> > +             *((uint8_t *)data) = 0xF0;
> > +             return OPAL_SUCCESS;
> > +     }
>
> Couldn't the above could be simplified to:
>
>         *data = 0xF0F0F0F0;
>         return OPAL_SUCCESS;

I don't think so. The config filter API is horrific and that u32
pointer is actually a pointer to a u8, u16, or u32 depending on what
type of config access is being done.

>
> > +     return OPAL_PARAMETER; /* should never happen */
> > +}
> > +
> > +/* blocks config accesses to registers in the range: [start, end] */
> > +#define BLOCK_CFG_RANGE(pd, start, end) \
> > +     pci_add_cfg_reg_filter(pd, start, end - start + 1, \
> > +             PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
> > +             cfg_block_filter);
> > +
> > +static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
> > +{
> > +     /*
> > +      * The gen4 pcie switch used on some ZZ systems has a bug where it'll
> > +      * throw URs in response to a cfg read to a range that's "reserved"
> > +      * work around it by blackholing.
> > +      */
> > +     if (pd->dev_type == PCIE_TYPE_ENDPOINT && pd->class == 0x058000) {
> > +             /*
> > +              * we match on the class code too since the switch might
> > +              * support an NTB endpoint.
> > +              */
> > +             BLOCK_CFG_RANGE(pd, 0xa0, 0xff);
> > +             BLOCK_CFG_RANGE(pd, 0x17c, 0xfff);
> > +     } else if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
> > +             BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> > +             BLOCK_CFG_RANGE(pd, 0x284, 0x7f7);
> > +     } else if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
> > +             BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
> > +             BLOCK_CFG_RANGE(pd, 0x288, 0x7f7);
>
> I assume we don't expect a device with the same VDID to ever have a different
> config space layout? Would it be worth matching on the RevID as well? You would
> hope at least that would get bumped if the config space changed.

Yeah that's one of the things I was wondering about. If you can enable
another capability by changing the switch configuration it'll change
the address ranges that need to be blacklisted. The 3rd patch (the
hack: one) in this series detects what offsets cause the UR rather
than assuming them, but it does take a relatively long time to scan
the config space of each function so I'm wasn't sure if we want to go
down that path or not.

Oliver
diff mbox series

Patch

diff --git a/core/pci-quirk.c b/core/pci-quirk.c
index 0862e8dc4072..371ff62b4b72 100644
--- a/core/pci-quirk.c
+++ b/core/pci-quirk.c
@@ -18,10 +18,67 @@ 
 
 #include <skiboot.h>
 #include <pci.h>
+#include <pci-cfg.h>
 #include <pci-quirk.h>
 #include <platform.h>
 #include <ast.h>
 
+static int64_t cfg_block_filter(void *dev __unused,
+				struct pci_cfg_reg_filter *pcrf __unused,
+				uint32_t offset __unused, uint32_t len,
+				uint32_t *data, bool write)
+{
+	if (write)
+		return OPAL_SUCCESS;
+
+	switch (len) {
+	case 4:
+		*data = 0xF0F0F0F0;
+		return OPAL_SUCCESS;
+	case 2:
+		*((uint16_t *)data) = 0xF0F0;
+		return OPAL_SUCCESS;
+	case 1:
+		*((uint8_t *)data) = 0xF0;
+		return OPAL_SUCCESS;
+	}
+
+	return OPAL_PARAMETER; /* should never happen */
+}
+
+/* blocks config accesses to registers in the range: [start, end] */
+#define BLOCK_CFG_RANGE(pd, start, end) \
+	pci_add_cfg_reg_filter(pd, start, end - start + 1, \
+		PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
+		cfg_block_filter);
+
+static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
+{
+	/*
+	 * The gen4 pcie switch used on some ZZ systems has a bug where it'll
+	 * throw URs in response to a cfg read to a range that's "reserved"
+	 * work around it by blackholing.
+	 */
+	if (pd->dev_type == PCIE_TYPE_ENDPOINT && pd->class == 0x058000) {
+		/*
+		 * we match on the class code too since the switch might
+		 * support an NTB endpoint.
+		 */
+		BLOCK_CFG_RANGE(pd, 0xa0, 0xff);
+		BLOCK_CFG_RANGE(pd, 0x17c, 0xfff);
+	} else if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
+		BLOCK_CFG_RANGE(pd, 0x284, 0x7f7);
+	} else if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
+		BLOCK_CFG_RANGE(pd, 0x288, 0x7f7);
+	} else {
+		return;
+	}
+
+	PCINOTICE(phb, pd->bdfn, "Applied Microsemi Gen4 UR workaround\n");
+}
+
 static void quirk_astbmc_vga(struct phb *phb __unused,
 			     struct pci_device *pd)
 {
@@ -53,6 +110,7 @@  static void quirk_astbmc_vga(struct phb *phb __unused,
 static const struct pci_quirk quirk_table[] = {
 	/* ASPEED 2400 VGA device */
 	{ 0x1a03, 0x2000, &quirk_astbmc_vga },
+	{ 0x11f8, 0x4052, &quirk_microsemi_gen4_sw },
 	{ 0, 0, NULL }
 };