diff mbox series

[6/6] phb4: Retrain link if degraded

Message ID 20170912045619.31386-6-mikey@neuling.org
State Accepted
Headers show
Series [1/6] phb4: Remove stable retries | expand

Commit Message

Michael Neuling Sept. 12, 2017, 4:56 a.m. UTC
On P9 Scale Out (Nimbus) DD2.0 and Scale in (Cumulus) DD1.0 (and
below) the PCIe PHY can lockup causing training issues. This can cause
a degradation in speed or width in ~5% of training cases (depending on
the card). This is fixed in later chip revisions. This issue can also
cause PCIe links to not train at all, but this case is already
handled.

This patch checks if the PCIe link has trained optimally and if not,
does a full PHB reset (to fix the PHY lockup) and retrain.

One complication is some devices are known to train degraded unless
device specific configuration is performed. Because of this, we only
retrain when the device is in a whitelist. All devices in the current
whitelist have been testing on a P9DSU/Boston, ZZ and Witherspoon.

We always gather information on the link and print it in the logs even
if the card is not in the whitelist.

For testing purposes, there's an nvram to retry all PCIe cards and all
P9 chips when a degraded link is detected. The new option is
'pci-retry-all=true' which can be set using:
  nvram -p ibm,skiboot --update-config pci-retry-all=true
This option may increase the boot time if used on a badly behaving
card.

Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 hw/phb4.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 133 insertions(+), 1 deletion(-)

Comments

Michael Neuling Sept. 12, 2017, 6:28 a.m. UTC | #1
> +	/* Chips that need this retry are:
> +	 *  - CUMULUS DD1.0
> +	 *  - NIMBUS DD2.0 and below
> +	 */
> +	pvr = mfspr(SPR_PVR);
> +	if (pvr & PVR_POWER9_CUMULUS) {
> +		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
> +			return true;

Oops, that's wrong... Major should be 1.  ie.

diff --git a/hw/phb4.c b/hw/phb4.c
index a3bde18756..c6bb6292ab 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -2408,7 +2408,7 @@ static bool phb4_chip_retry_workaround(void)
 	 */
 	pvr = mfspr(SPR_PVR);
 	if (pvr & PVR_POWER9_CUMULUS) {
-		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
+		if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
 			return true;
 	} else { /* NIMBUS */
 		if (PVR_VERS_MAJ(pvr) == 1)
Stewart Smith Sept. 13, 2017, 7:09 a.m. UTC | #2
Michael Neuling <mikey@neuling.org> writes:
> Oops, that's wrong... Major should be 1.  ie.
>
> diff --git a/hw/phb4.c b/hw/phb4.c
> index a3bde18756..c6bb6292ab 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -2408,7 +2408,7 @@ static bool phb4_chip_retry_workaround(void)
>  	 */
>  	pvr = mfspr(SPR_PVR);
>  	if (pvr & PVR_POWER9_CUMULUS) {
> -		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
> +		if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
>  			return true;
>  	} else { /* NIMBUS */
>  		if (PVR_VERS_MAJ(pvr) == 1)

With this fixup, series merged to master as of
3f936bae970e8ce116151b7106bf7d6437046c19

(now to just shepherd this into all the various places)
diff mbox series

Patch

diff --git a/hw/phb4.c b/hw/phb4.c
index 8c24238f8e..52270482a8 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -141,6 +141,7 @@  static void phb4_init_hw(struct phb4 *p, bool first_init);
 static bool verbose_eeh;
 static bool pci_tracing;
 static bool pci_eeh_mmio;
+static bool pci_retry_all;
 
 enum capi_dma_tvt {
 	CAPI_DMA_TVT0,
@@ -2261,7 +2262,7 @@  static int64_t phb4_retry_state(struct pci_slot *slot)
 			PHBERR(p, "Electrical link detected but won't train\n");
 			break;
 		case PHB4_SLOT_LINK_STABLE:
-			PHBERR(p, "Linked trained but wasn't stable\n");
+			PHBERR(p, "Linked trained but was degraded or unstable\n");
 			break;
 		default:
 			PHBERR(p, "Unknown link issue\n");
@@ -2380,6 +2381,131 @@  static bool phb4_check_reg(struct phb4 *p, uint64_t reg)
 	return true;
 }
 
+static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed,
+			  uint8_t *width)
+{
+	int32_t ecap;
+	uint32_t cap;
+
+	ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
+	pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap);
+	*width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4;
+	*speed = cap & PCICAP_EXP_LCAP_MAXSPD;
+}
+
+#define PVR_POWER9_CUMULUS		0x00002000
+
+static bool phb4_chip_retry_workaround(void)
+{
+	unsigned int pvr;
+
+	if (pci_retry_all)
+		return true;
+
+	/* Chips that need this retry are:
+	 *  - CUMULUS DD1.0
+	 *  - NIMBUS DD2.0 and below
+	 */
+	pvr = mfspr(SPR_PVR);
+	if (pvr & PVR_POWER9_CUMULUS) {
+		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
+			return true;
+	} else { /* NIMBUS */
+		if (PVR_VERS_MAJ(pvr) == 1)
+			return true;
+		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
+			return true;
+	}
+	return false;
+}
+
+struct pci_card_id {
+	uint16_t vendor;
+	uint16_t device;
+};
+
+struct pci_card_id retry_whitelist[] = {
+	{ 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */
+	{ 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */
+	{ 0x104c, 0x8241 }, /* TI xHCI USB */
+	{ 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */
+	{ 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */
+	{ 0x10b5, 0x8748 }, /* PLX Switch: ZZ */
+	{ 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */
+	{ 0x15b3, 0x1013 }, /* Mellanox CX-4 */
+	{ 0x15b3, 0x1019 }, /* Mellanox CX-5 */
+	{ 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */
+	{ 0x8086, 0x10fb }, /* Intel x520 10G Eth */
+	{ 0x9005, 0x028d }, /* MicroSemi PM8069 */
+};
+
+#define VENDOR(vdid) ((vdid) & 0xffff)
+#define DEVICE(vdid) (((vdid) >> 16) & 0xffff)
+
+static bool phb4_adapter_in_whitelist(uint32_t vdid)
+{
+	int i;
+
+	if (pci_retry_all)
+		return true;
+
+	for (i = 0; i < ARRAY_SIZE(retry_whitelist); i++)
+		if ((retry_whitelist[i].vendor == VENDOR(vdid)) &&
+		    (retry_whitelist[i].device == DEVICE(vdid)))
+			return true;
+
+	return false;
+}
+
+#define min(x,y) ((x) < (y) ? x : y)
+
+static bool phb4_link_optimal(struct pci_slot *slot)
+{
+	struct phb4 *p = phb_to_phb4(slot->phb);
+	uint32_t vdid;
+	uint16_t bdfn;
+	uint8_t trained_speed, phb_speed, dev_speed, target_speed;
+	uint8_t trained_width, phb_width, dev_width, target_width;
+	bool optimal_speed, optimal_width, optimal, retry_enabled;
+
+
+	/* Current trained state */
+	phb4_get_link_info(slot, &trained_speed, &trained_width);
+
+	/* Get PHB capability */
+	/* NOTE: phb_speed will account for the software speed limit */
+	phb4_get_info(slot->phb, 0, &phb_speed, &phb_width);
+
+	/* Get device capability */
+	bdfn = 0x0100; /* bus=1 dev=0 device=0 */
+	/* Since this is the first access, we need to wait for CRS */
+	if (!pci_wait_crs(slot->phb, bdfn , &vdid))
+		return true;
+	phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width);
+
+	/* Work out if we are optimally trained */
+	target_speed = min(phb_speed, dev_speed);
+	optimal_speed = (trained_speed >= target_speed);
+	target_width = min(phb_width, dev_width);
+	optimal_width = (trained_width >= target_width);
+	optimal = optimal_width && optimal_speed;
+	retry_enabled = phb4_chip_retry_workaround() &&
+		phb4_adapter_in_whitelist(vdid);
+
+	PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(vdid),
+	       DEVICE(vdid), optimal ? "Optimal" : "Degraded",
+	       retry_enabled ? "enabled" : "disabled");
+	PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n",
+	       trained_speed, phb_speed, dev_speed, optimal_speed ? "" : " *");
+	PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n",
+	       trained_width, phb_width, dev_width, optimal_width ? "" : " *");
+
+	if (!retry_enabled)
+		return true;
+
+	return optimal;
+}
+
 /*
  * This is a trace function to watch what's happening duing pcie link
  * training.  If any errors are detected it simply returns so the
@@ -2494,6 +2620,10 @@  static int64_t phb4_poll_link(struct pci_slot *slot)
 		}
 		if (reg & PHB_PCIE_DLP_TL_LINKACT) {
 			PHBDBG(p, "LINK: Link is stable\n");
+			if (!phb4_link_optimal(slot)) {
+				PHBERR(p, "LINK: Link degraded\n");
+				return phb4_retry_state(slot);
+			}
 			pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
 			return OPAL_SUCCESS;
 		}
@@ -4963,6 +5093,8 @@  void probe_phb4(void)
 
 	pci_tracing = nvram_query_eq("pci-tracing", "true");
 	pci_eeh_mmio = !nvram_query_eq("pci-eeh-mmio", "disabled");
+	pci_retry_all = nvram_query_eq("pci-retry-all", "true");
+
 	/* Look for PBCQ XSCOM nodes */
 	dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
 		phb4_probe_pbcq(np);