diff mbox series

[v3,06/10] npu2-hw-procedures: Add support for OpenCAPI PHY link training

Message ID e481e43eb16556018010e91b4c2cda8c01a4ded4.1517391242.git-series.andrew.donnellan@au1.ibm.com
State Superseded
Headers show
Series Initial OpenCAPI 3.0 Support for P9 | expand

Commit Message

Andrew Donnellan Jan. 31, 2018, 9:34 a.m. UTC
Unlike NVLink, which uses the pci-virt framework to fake a PCI
configuration space for NVLink devices, the OpenCAPI device model presents
us with a real configuration space handled by the device over the OpenCAPI
link.

As a result, we have to train the OpenCAPI link in skiboot before we do PCI
probing, so that config space can be accessed, rather than having link
training being triggered by the Linux driver.

Add some helper functions to wrap the existing NVLink PHY training sequence
so we can easily run it within skiboot.

Additionally, we add OpenCAPI-specific lane settings, and a function to
"bump" lanes that haven't trained properly (this process isn't documented
in the workbook, but the hardware experts assure us that this improves link
training reliability...)

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
---
 hw/npu2-hw-procedures.c | 77 ++++++++++++++++++++++++++++++++++++++++++-
 include/npu2-regs.h     |  6 +++-
 include/npu2.h          |  3 ++-
 3 files changed, 86 insertions(+)

Comments

Frederic Barrat Jan. 31, 2018, 7:01 p.m. UTC | #1
Le 31/01/2018 à 10:34, Andrew Donnellan a écrit :
> Unlike NVLink, which uses the pci-virt framework to fake a PCI
> configuration space for NVLink devices, the OpenCAPI device model presents
> us with a real configuration space handled by the device over the OpenCAPI
> link.
> 
> As a result, we have to train the OpenCAPI link in skiboot before we do PCI
> probing, so that config space can be accessed, rather than having link
> training being triggered by the Linux driver.
> 
> Add some helper functions to wrap the existing NVLink PHY training sequence
> so we can easily run it within skiboot.
> 
> Additionally, we add OpenCAPI-specific lane settings, and a function to
> "bump" lanes that haven't trained properly (this process isn't documented
> in the workbook, but the hardware experts assure us that this improves link
> training reliability...)
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> ---

Reviewed-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>


>   hw/npu2-hw-procedures.c | 77 ++++++++++++++++++++++++++++++++++++++++++-
>   include/npu2-regs.h     |  6 +++-
>   include/npu2.h          |  3 ++-
>   3 files changed, 86 insertions(+)
> 
> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
> index d66632f..1dc583b 100644
> --- a/hw/npu2-hw-procedures.c
> +++ b/hw/npu2-hw-procedures.c
> @@ -60,6 +60,7 @@ struct npu2_phy_reg NPU2_PHY_TX_FIFO_INIT		= {0x105, 53, 1};
>   struct npu2_phy_reg NPU2_PHY_TX_RXCAL			= {0x103, 57, 1};
>   struct npu2_phy_reg NPU2_PHY_RX_INIT_DONE		= {0x0ca, 48, 1};
>   struct npu2_phy_reg NPU2_PHY_RX_PR_EDGE_TRACK_CNTL	= {0x092, 48, 2};
> +struct npu2_phy_reg NPU2_PHY_RX_PR_BUMP_SL_1UI		= {0x092, 57, 1};
>   struct npu2_phy_reg NPU2_PHY_RX_PR_FW_OFF		= {0x08a, 56, 1};
>   struct npu2_phy_reg NPU2_PHY_RX_PR_FW_INERTIA_AMT	= {0x08a, 57, 3};
>   struct npu2_phy_reg NPU2_PHY_RX_CFG_LTE_MC		= {0x000, 60, 4};
> @@ -68,6 +69,8 @@ struct npu2_phy_reg NPU2_PHY_RX_B_INTEG_COARSE_GAIN	= {0x026, 48, 4};
>   struct npu2_phy_reg NPU2_PHY_RX_E_INTEG_COARSE_GAIN	= {0x030, 48, 4};
> 
>   /* These registers are per-PHY, not per lane */
> +struct npu2_phy_reg NPU2_PHY_RX_SPEED_SELECT		= {0x262, 51, 2};
> +struct npu2_phy_reg NPU2_PHY_RX_AC_COUPLED		= {0x262, 53, 1};
>   struct npu2_phy_reg NPU2_PHY_TX_ZCAL_SWO_EN		= {0x3c9, 48, 1};
>   struct npu2_phy_reg NPU2_PHY_TX_ZCAL_REQ		= {0x3c1, 49, 1};
>   struct npu2_phy_reg NPU2_PHY_TX_ZCAL_DONE		= {0x3c1, 50, 1};
> @@ -378,6 +381,11 @@ static uint32_t phy_reset_complete(struct npu2_dev *ndev)
>   {
>   	int lane;
> 
> +	if (ndev->type == NPU2_DEV_TYPE_OPENCAPI) {
> +		phy_write(ndev, &NPU2_PHY_RX_AC_COUPLED, 1);
> +		phy_write(ndev, &NPU2_PHY_RX_SPEED_SELECT, 1);
> +	}
> +
>   	FOR_EACH_LANE(ndev, lane) {
>   		phy_write_lane(ndev, &NPU2_PHY_RX_LANE_ANA_PDWN, lane, 0);
>   		phy_write_lane(ndev, &NPU2_PHY_RX_LANE_DIG_PDWN, lane, 0);
> @@ -887,3 +895,72 @@ void npu2_dev_procedure_reset(struct npu2_dev *dev)
>   {
>   	npu2_clear_link_flag(dev, NPU2_DEV_DL_RESET);
>   }
> +
> +static uint32_t run_procedure(struct npu2_dev *dev, uint16_t procedure_number)
> +{
> +	struct procedure *proc;
> +	const char *name;
> +	uint32_t result;
> +
> +	assert(procedure_number <= ARRAY_SIZE(npu_procedures));
> +	proc = npu_procedures[procedure_number];
> +	assert(proc);
> +
> +	name = proc->name;
> +	NPU2DEVINF(dev, "Running procedure %s\n", name);
> +	dev->procedure_status = PROCEDURE_INPROGRESS;
> +	dev->procedure_number = procedure_number;
> +	dev->procedure_step = 0;
> +	dev->procedure_data = 0;
> +	dev->procedure_tb = mftb();
> +
> +	result = get_procedure_status(dev);
> +	while (!(result & PROCEDURE_COMPLETE)) {
> +		time_wait_ms(1);
> +		result = get_procedure_status(dev);
> +	}
> +	return result;
> +}
> +
> +void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev)
> +{
> +	uint64_t reg;
> +	uint64_t status_xscom;
> +	int lane, bit = 7;
> +
> +	switch (dev->index) {
> +	case 2:
> +		status_xscom = OB0_ODL0_TRAINING_STATUS;
> +		break;
> +	case 3:
> +		status_xscom = OB0_ODL1_TRAINING_STATUS;
> +		break;
> +	case 4:
> +		status_xscom = OB3_ODL0_TRAINING_STATUS;
> +		break;
> +	case 5:
> +		status_xscom = OB3_ODL1_TRAINING_STATUS;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +	xscom_read(dev->npu->chip_id, status_xscom, &reg);
> +	reg = GETFIELD(OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B, reg);
> +
> +	FOR_EACH_LANE(dev, lane) {
> +		if (reg & (1 << bit--))
> +			continue;
> +		prlog(PR_TRACE, "OCAPI: bumpui bumping lane %d\n", lane);
> +		for (int i = 0; i < 4; i++) {
> +			phy_write_lane(dev, &NPU2_PHY_RX_PR_BUMP_SL_1UI, lane, 1);
> +			phy_write_lane(dev, &NPU2_PHY_RX_PR_BUMP_SL_1UI, lane, 0);
> +		}
> +	}
> +}
> +
> +void npu2_opencapi_phy_setup(struct npu2_dev *dev)
> +{
> +	run_procedure(dev, 4); /* procedure_phy_reset */
> +	run_procedure(dev, 5); /* procedure_phy_tx_zcal */
> +	run_procedure(dev, 6); /* procedure_phy_rx_dccal */
> +}
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 9732ec9..b219ad7 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -553,4 +553,10 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define    PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE	PPC_BIT(52)
>   #define    PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE	PPC_BIT(57)
> 
> +#define OB0_ODL0_TRAINING_STATUS		0x901082E
> +#define OB0_ODL1_TRAINING_STATUS		0x901082F
> +#define OB3_ODL0_TRAINING_STATUS		0xC01082E
> +#define OB3_ODL1_TRAINING_STATUS		0xC01082F
> +#define   OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B PPC_BITMASK(8, 15)
> +
>   #endif /* __NPU2_REGS_H */
> diff --git a/include/npu2.h b/include/npu2.h
> index 7b2b509..2922d21 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -200,9 +200,12 @@ int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
>   			   uint32_t offset, uint32_t len, uint32_t *data,
>   			   bool write);
>   void npu2_dev_procedure_reset(struct npu2_dev *dev);
> +
>   void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag);
>   void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag);
>   extern int nv_zcal_nominal;
>   bool is_p9dd1(void);
> +void npu2_opencapi_phy_setup(struct npu2_dev *dev);
> +void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
> 
>   #endif /* __NPU2_H */
>
Reza Arbab Jan. 31, 2018, 9:30 p.m. UTC | #2
On Wed, Jan 31, 2018 at 08:34:46PM +1100, Andrew Donnellan wrote:
>Unlike NVLink, which uses the pci-virt framework to fake a PCI
>configuration space for NVLink devices, the OpenCAPI device model presents
>us with a real configuration space handled by the device over the OpenCAPI
>link.
>
>As a result, we have to train the OpenCAPI link in skiboot before we do PCI
>probing, so that config space can be accessed, rather than having link
>training being triggered by the Linux driver.
>
>Add some helper functions to wrap the existing NVLink PHY training sequence
>so we can easily run it within skiboot.
>
>Additionally, we add OpenCAPI-specific lane settings, and a function to
>"bump" lanes that haven't trained properly (this process isn't documented
>in the workbook, but the hardware experts assure us that this improves link
>training reliability...)
>
>Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

Acked-by: Reza Arbab <arbab@linux.vnet.ibm.com>
diff mbox series

Patch

diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
index d66632f..1dc583b 100644
--- a/hw/npu2-hw-procedures.c
+++ b/hw/npu2-hw-procedures.c
@@ -60,6 +60,7 @@  struct npu2_phy_reg NPU2_PHY_TX_FIFO_INIT		= {0x105, 53, 1};
 struct npu2_phy_reg NPU2_PHY_TX_RXCAL			= {0x103, 57, 1};
 struct npu2_phy_reg NPU2_PHY_RX_INIT_DONE		= {0x0ca, 48, 1};
 struct npu2_phy_reg NPU2_PHY_RX_PR_EDGE_TRACK_CNTL	= {0x092, 48, 2};
+struct npu2_phy_reg NPU2_PHY_RX_PR_BUMP_SL_1UI		= {0x092, 57, 1};
 struct npu2_phy_reg NPU2_PHY_RX_PR_FW_OFF		= {0x08a, 56, 1};
 struct npu2_phy_reg NPU2_PHY_RX_PR_FW_INERTIA_AMT	= {0x08a, 57, 3};
 struct npu2_phy_reg NPU2_PHY_RX_CFG_LTE_MC		= {0x000, 60, 4};
@@ -68,6 +69,8 @@  struct npu2_phy_reg NPU2_PHY_RX_B_INTEG_COARSE_GAIN	= {0x026, 48, 4};
 struct npu2_phy_reg NPU2_PHY_RX_E_INTEG_COARSE_GAIN	= {0x030, 48, 4};
 
 /* These registers are per-PHY, not per lane */
+struct npu2_phy_reg NPU2_PHY_RX_SPEED_SELECT		= {0x262, 51, 2};
+struct npu2_phy_reg NPU2_PHY_RX_AC_COUPLED		= {0x262, 53, 1};
 struct npu2_phy_reg NPU2_PHY_TX_ZCAL_SWO_EN		= {0x3c9, 48, 1};
 struct npu2_phy_reg NPU2_PHY_TX_ZCAL_REQ		= {0x3c1, 49, 1};
 struct npu2_phy_reg NPU2_PHY_TX_ZCAL_DONE		= {0x3c1, 50, 1};
@@ -378,6 +381,11 @@  static uint32_t phy_reset_complete(struct npu2_dev *ndev)
 {
 	int lane;
 
+	if (ndev->type == NPU2_DEV_TYPE_OPENCAPI) {
+		phy_write(ndev, &NPU2_PHY_RX_AC_COUPLED, 1);
+		phy_write(ndev, &NPU2_PHY_RX_SPEED_SELECT, 1);
+	}
+
 	FOR_EACH_LANE(ndev, lane) {
 		phy_write_lane(ndev, &NPU2_PHY_RX_LANE_ANA_PDWN, lane, 0);
 		phy_write_lane(ndev, &NPU2_PHY_RX_LANE_DIG_PDWN, lane, 0);
@@ -887,3 +895,72 @@  void npu2_dev_procedure_reset(struct npu2_dev *dev)
 {
 	npu2_clear_link_flag(dev, NPU2_DEV_DL_RESET);
 }
+
+static uint32_t run_procedure(struct npu2_dev *dev, uint16_t procedure_number)
+{
+	struct procedure *proc;
+	const char *name;
+	uint32_t result;
+
+	assert(procedure_number <= ARRAY_SIZE(npu_procedures));
+	proc = npu_procedures[procedure_number];
+	assert(proc);
+
+	name = proc->name;
+	NPU2DEVINF(dev, "Running procedure %s\n", name);
+	dev->procedure_status = PROCEDURE_INPROGRESS;
+	dev->procedure_number = procedure_number;
+	dev->procedure_step = 0;
+	dev->procedure_data = 0;
+	dev->procedure_tb = mftb();
+
+	result = get_procedure_status(dev);
+	while (!(result & PROCEDURE_COMPLETE)) {
+		time_wait_ms(1);
+		result = get_procedure_status(dev);
+	}
+	return result;
+}
+
+void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev)
+{
+	uint64_t reg;
+	uint64_t status_xscom;
+	int lane, bit = 7;
+
+	switch (dev->index) {
+	case 2:
+		status_xscom = OB0_ODL0_TRAINING_STATUS;
+		break;
+	case 3:
+		status_xscom = OB0_ODL1_TRAINING_STATUS;
+		break;
+	case 4:
+		status_xscom = OB3_ODL0_TRAINING_STATUS;
+		break;
+	case 5:
+		status_xscom = OB3_ODL1_TRAINING_STATUS;
+		break;
+	default:
+		assert(false);
+	}
+	xscom_read(dev->npu->chip_id, status_xscom, &reg);
+	reg = GETFIELD(OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B, reg);
+
+	FOR_EACH_LANE(dev, lane) {
+		if (reg & (1 << bit--))
+			continue;
+		prlog(PR_TRACE, "OCAPI: bumpui bumping lane %d\n", lane);
+		for (int i = 0; i < 4; i++) {
+			phy_write_lane(dev, &NPU2_PHY_RX_PR_BUMP_SL_1UI, lane, 1);
+			phy_write_lane(dev, &NPU2_PHY_RX_PR_BUMP_SL_1UI, lane, 0);
+		}
+	}
+}
+
+void npu2_opencapi_phy_setup(struct npu2_dev *dev)
+{
+	run_procedure(dev, 4); /* procedure_phy_reset */
+	run_procedure(dev, 5); /* procedure_phy_tx_zcal */
+	run_procedure(dev, 6); /* procedure_phy_rx_dccal */
+}
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 9732ec9..b219ad7 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -553,4 +553,10 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define    PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE	PPC_BIT(52)
 #define    PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE	PPC_BIT(57)
 
+#define OB0_ODL0_TRAINING_STATUS		0x901082E
+#define OB0_ODL1_TRAINING_STATUS		0x901082F
+#define OB3_ODL0_TRAINING_STATUS		0xC01082E
+#define OB3_ODL1_TRAINING_STATUS		0xC01082F
+#define   OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B PPC_BITMASK(8, 15)
+
 #endif /* __NPU2_REGS_H */
diff --git a/include/npu2.h b/include/npu2.h
index 7b2b509..2922d21 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -200,9 +200,12 @@  int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
 			   uint32_t offset, uint32_t len, uint32_t *data,
 			   bool write);
 void npu2_dev_procedure_reset(struct npu2_dev *dev);
+
 void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag);
 void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag);
 extern int nv_zcal_nominal;
 bool is_p9dd1(void);
+void npu2_opencapi_phy_setup(struct npu2_dev *dev);
+void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
 
 #endif /* __NPU2_H */