phb4: Dump CAPP error registers when it asserts link down

Message ID 20180108115305.9998-1-vaibhav@linux.vnet.ibm.com
State Accepted
Headers show
Series
  • phb4: Dump CAPP error registers when it asserts link down
Related show

Commit Message

Vaibhav Jain Jan. 8, 2018, 11:53 a.m.
This patch introduces a new function phb4_dump_app_err_regs() that
dumps CAPP error registers in case the PEC nestfir register indicates
that the fence was due to a CAPP error (BIT-24).

Contents of these registers are helpful in diagnosing CAPP
issues. Registers that are dumped in phb4_dump_app_err_regs() are:

* CAPP FIR Register
* CAPP APC Master Error Report Register
* CAPP Snoop Error Report Register
* CAPP Transport Error Report Register
* CAPP TLBI Error Report Register
* CAPP Error Status and Control Register

Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
---
 hw/phb4.c           | 36 +++++++++++++++++++++++++++++++-----
 include/phb4-capp.h |  8 ++++++--
 include/phb4-regs.h |  1 +
 3 files changed, 38 insertions(+), 7 deletions(-)

Comments

Andrew Donnellan Jan. 11, 2018, 6:59 a.m. | #1
On 08/01/18 22:53, Vaibhav Jain wrote:
> This patch introduces a new function phb4_dump_app_err_regs() that
> dumps CAPP error registers in case the PEC nestfir register indicates
> that the fence was due to a CAPP error (BIT-24).
> 
> Contents of these registers are helpful in diagnosing CAPP
> issues. Registers that are dumped in phb4_dump_app_err_regs() are:
> 
> * CAPP FIR Register
> * CAPP APC Master Error Report Register
> * CAPP Snoop Error Report Register
> * CAPP Transport Error Report Register
> * CAPP TLBI Error Report Register
> * CAPP Error Status and Control Register
> 
> Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>

Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

> ---
>   hw/phb4.c           | 36 +++++++++++++++++++++++++++++++-----
>   include/phb4-capp.h |  8 ++++++--
>   include/phb4-regs.h |  1 +
>   3 files changed, 38 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 6c59462b..ff912e1f 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -2338,6 +2338,28 @@ static void phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long time)
>   	PHBERR(p, "%s\n", s);
>   }
> 
> +static void phb4_dump_capp_err_regs(struct phb4 *p)
> +{
> +	uint64_t fir, apc_master_err, snoop_err, transport_err;
> +	uint64_t tlbi_err, capp_err_status;
> +	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
> +
> +	xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
> +	xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
> +		   &apc_master_err);
> +	xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
> +	xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
> +	xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
> +	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
> +
> +	PHBERR(p, "           CAPP FIR=%016llx\n", fir);
> +	PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
> +	PHBERR(p, "     CAPP SNOOP ERR=%016llx\n", snoop_err);
> +	PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
> +	PHBERR(p, "      CAPP TLBI ERR=%016llx\n", tlbi_err);
> +	PHBERR(p, "    CAPP ERR STATUS=%016llx\n", capp_err_status);
> +}
> +
>   /* Check if AIB is fenced via PBCQ NFIR */
>   static bool phb4_fenced(struct phb4 *p)
>   {
> @@ -2369,16 +2391,20 @@ static bool phb4_fenced(struct phb4 *p)
>   	xscom_read(p->chip_id,
>   		   p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
> 
> -	PHBERR(p, " PCI FIR=%016llx\n", nfir_p);
> -	PHBERR(p, "NEST FIR=%016llx\n", nfir_n);
> -	PHBERR(p, "ERR RPT0=%016llx\n", err_rpt0);
> -	PHBERR(p, "ERR RPT1=%016llx\n", err_rpt1);
> -	PHBERR(p, " AIB ERR=%016llx\n", err_aib);
> +	PHBERR(p, "            PCI FIR=%016llx\n", nfir_p);
> +	PHBERR(p, "           NEST FIR=%016llx\n", nfir_n);
> +	PHBERR(p, "           ERR RPT0=%016llx\n", err_rpt0);
> +	PHBERR(p, "           ERR RPT1=%016llx\n", err_rpt1);
> +	PHBERR(p, "            AIB ERR=%016llx\n", err_aib);
> 
>   	/* Mark ourselves fenced */
>   	p->flags |= PHB4_AIB_FENCED;
>   	p->state = PHB4_STATE_FENCED;
> 
> +	/* dump capp error registers in case phb was fenced due to capp */
> +	if (nfir_n & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
> +		phb4_dump_capp_err_regs(p);
> +
>   	phb4_eeh_dump_regs(p);
> 
>   	return true;
> diff --git a/include/phb4-capp.h b/include/phb4-capp.h
> index 10cdc406..68200ac5 100644
> --- a/include/phb4-capp.h
> +++ b/include/phb4-capp.h
> @@ -26,6 +26,12 @@
>   #define CAPP_FIR_MASK				0x2010803
>   #define CAPP_FIR_ACTION0			0x2010806
>   #define CAPP_FIR_ACTION1			0x2010807
> +#define CAPP_SNOOP_ERR_RTP			0x201080A
> +#define CAPP_APC_MASTER_ERR_RPT			0x201080B
> +#define CAPP_TRANSPORT_ERR_RPT			0x201080C
> +#define CAPP_TLBI_ERR_RPT			0x201080D
> +#define CAPP_ERR_STATUS_CTRL			0x201080E
> +#define FLUSH_SUE_STATE_MAP			0x201080F
>   #define CAPP_ERR_RPT_CLR			0x2010813
>   #define APC_MASTER_PB_CTRL			0x2010818
>   #define APC_MASTER_CAPI_CTRL			0x2010819
> @@ -36,8 +42,6 @@
>   #define TRANSPORT_CONTROL			0x201081C
>   #define CAPP_TB					0x2010826
>   #define CAPP_TFMR				0x2010827
> -#define CAPP_ERR_STATUS_CTRL			0x201080E
> -#define FLUSH_SUE_STATE_MAP			0x201080F
>   #define FLUSH_CPIG_STATE_MAP			0x2010820
>   #define FLUSH_SUE_UOP1				0x2010843  /* Satellite 2 */
>   #define APC_FSM_READ_MASK			0x2010823
> diff --git a/include/phb4-regs.h b/include/phb4-regs.h
> index 0d8aa48b..2dc64fe5 100644
> --- a/include/phb4-regs.h
> +++ b/include/phb4-regs.h
> @@ -343,6 +343,7 @@
> 
>   /* Nest base per-stack registers */
>   #define XPEC_NEST_STK_PCI_NFIR			0x0
> +#define   XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP	PPC_BIT(24)
>   #define XPEC_NEST_STK_PCI_NFIR_CLR		0x1
>   #define XPEC_NEST_STK_PCI_NFIR_SET		0x2
>   #define XPEC_NEST_STK_PCI_NFIR_MSK		0x3
>
christophe lombard Jan. 11, 2018, 10:19 a.m. | #2
Le 08/01/2018 à 12:53, Vaibhav Jain a écrit :
> This patch introduces a new function phb4_dump_app_err_regs() that
> dumps CAPP error registers in case the PEC nestfir register indicates
> that the fence was due to a CAPP error (BIT-24).
> 
> Contents of these registers are helpful in diagnosing CAPP
> issues. Registers that are dumped in phb4_dump_app_err_regs() are:
> 

This sounds good. Thanks
Reviewed-by: Christophe Lombard<clombard@linux.vnet.ibm.com>
Russell Currey Jan. 11, 2018, 11:52 p.m. | #3
On Mon, 2018-01-08 at 17:23 +0530, Vaibhav Jain wrote:
> This patch introduces a new function phb4_dump_app_err_regs() that
> dumps CAPP error registers in case the PEC nestfir register indicates
> that the fence was due to a CAPP error (BIT-24).
> 
> Contents of these registers are helpful in diagnosing CAPP
> issues. Registers that are dumped in phb4_dump_app_err_regs() are:
> 
> * CAPP FIR Register
> * CAPP APC Master Error Report Register
> * CAPP Snoop Error Report Register
> * CAPP Transport Error Report Register
> * CAPP TLBI Error Report Register
> * CAPP Error Status and Control Register
> 
> Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>

Acked-by: Russell Currey <ruscur@russell.cc>
Stewart Smith Jan. 15, 2018, 3:55 a.m. | #4
Vaibhav Jain <vaibhav@linux.vnet.ibm.com> writes:
> This patch introduces a new function phb4_dump_app_err_regs() that
> dumps CAPP error registers in case the PEC nestfir register indicates
> that the fence was due to a CAPP error (BIT-24).
>
> Contents of these registers are helpful in diagnosing CAPP
> issues. Registers that are dumped in phb4_dump_app_err_regs() are:
>
> * CAPP FIR Register
> * CAPP APC Master Error Report Register
> * CAPP Snoop Error Report Register
> * CAPP Transport Error Report Register
> * CAPP TLBI Error Report Register
> * CAPP Error Status and Control Register
>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
> ---
>  hw/phb4.c           | 36 +++++++++++++++++++++++++++++++-----
>  include/phb4-capp.h |  8 ++++++--
>  include/phb4-regs.h |  1 +
>  3 files changed, 38 insertions(+), 7 deletions(-)

Thanks, merged to master as of 7b613693d537859acfa13968ecfa28082ec8f72f

Patch

diff --git a/hw/phb4.c b/hw/phb4.c
index 6c59462b..ff912e1f 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -2338,6 +2338,28 @@  static void phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long time)
 	PHBERR(p, "%s\n", s);
 }
 
+static void phb4_dump_capp_err_regs(struct phb4 *p)
+{
+	uint64_t fir, apc_master_err, snoop_err, transport_err;
+	uint64_t tlbi_err, capp_err_status;
+	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
+
+	xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
+	xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
+		   &apc_master_err);
+	xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
+	xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
+	xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
+	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
+
+	PHBERR(p, "           CAPP FIR=%016llx\n", fir);
+	PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
+	PHBERR(p, "     CAPP SNOOP ERR=%016llx\n", snoop_err);
+	PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
+	PHBERR(p, "      CAPP TLBI ERR=%016llx\n", tlbi_err);
+	PHBERR(p, "    CAPP ERR STATUS=%016llx\n", capp_err_status);
+}
+
 /* Check if AIB is fenced via PBCQ NFIR */
 static bool phb4_fenced(struct phb4 *p)
 {
@@ -2369,16 +2391,20 @@  static bool phb4_fenced(struct phb4 *p)
 	xscom_read(p->chip_id,
 		   p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
 
-	PHBERR(p, " PCI FIR=%016llx\n", nfir_p);
-	PHBERR(p, "NEST FIR=%016llx\n", nfir_n);
-	PHBERR(p, "ERR RPT0=%016llx\n", err_rpt0);
-	PHBERR(p, "ERR RPT1=%016llx\n", err_rpt1);
-	PHBERR(p, " AIB ERR=%016llx\n", err_aib);
+	PHBERR(p, "            PCI FIR=%016llx\n", nfir_p);
+	PHBERR(p, "           NEST FIR=%016llx\n", nfir_n);
+	PHBERR(p, "           ERR RPT0=%016llx\n", err_rpt0);
+	PHBERR(p, "           ERR RPT1=%016llx\n", err_rpt1);
+	PHBERR(p, "            AIB ERR=%016llx\n", err_aib);
 
 	/* Mark ourselves fenced */
 	p->flags |= PHB4_AIB_FENCED;
 	p->state = PHB4_STATE_FENCED;
 
+	/* dump capp error registers in case phb was fenced due to capp */
+	if (nfir_n & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
+		phb4_dump_capp_err_regs(p);
+
 	phb4_eeh_dump_regs(p);
 
 	return true;
diff --git a/include/phb4-capp.h b/include/phb4-capp.h
index 10cdc406..68200ac5 100644
--- a/include/phb4-capp.h
+++ b/include/phb4-capp.h
@@ -26,6 +26,12 @@ 
 #define CAPP_FIR_MASK				0x2010803
 #define CAPP_FIR_ACTION0			0x2010806
 #define CAPP_FIR_ACTION1			0x2010807
+#define CAPP_SNOOP_ERR_RTP			0x201080A
+#define CAPP_APC_MASTER_ERR_RPT			0x201080B
+#define CAPP_TRANSPORT_ERR_RPT			0x201080C
+#define CAPP_TLBI_ERR_RPT			0x201080D
+#define CAPP_ERR_STATUS_CTRL			0x201080E
+#define FLUSH_SUE_STATE_MAP			0x201080F
 #define CAPP_ERR_RPT_CLR			0x2010813
 #define APC_MASTER_PB_CTRL			0x2010818
 #define APC_MASTER_CAPI_CTRL			0x2010819
@@ -36,8 +42,6 @@ 
 #define TRANSPORT_CONTROL			0x201081C
 #define CAPP_TB					0x2010826
 #define CAPP_TFMR				0x2010827
-#define CAPP_ERR_STATUS_CTRL			0x201080E
-#define FLUSH_SUE_STATE_MAP			0x201080F
 #define FLUSH_CPIG_STATE_MAP			0x2010820
 #define FLUSH_SUE_UOP1				0x2010843  /* Satellite 2 */
 #define APC_FSM_READ_MASK			0x2010823
diff --git a/include/phb4-regs.h b/include/phb4-regs.h
index 0d8aa48b..2dc64fe5 100644
--- a/include/phb4-regs.h
+++ b/include/phb4-regs.h
@@ -343,6 +343,7 @@ 
 
 /* Nest base per-stack registers */
 #define XPEC_NEST_STK_PCI_NFIR			0x0
+#define   XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP	PPC_BIT(24)
 #define XPEC_NEST_STK_PCI_NFIR_CLR		0x1
 #define XPEC_NEST_STK_PCI_NFIR_SET		0x2
 #define XPEC_NEST_STK_PCI_NFIR_MSK		0x3