diff mbox series

hw/imc: Detect BML and fix core counters

Message ID 20230810211149.428158-1-grimm@linux.ibm.com
State Superseded
Headers show
Series hw/imc: Detect BML and fix core counters | expand

Commit Message

Ryan Grimm Aug. 10, 2023, 9:11 p.m. UTC
On systems running BML we started noticing this in the skiboot log:

[  409.088819302,3] XSCOM: write error gcid=0x0 pcb_addr=0x20000060 stat=0x4
[  409.088823446,3] ELOG: Error getting buffer to log error
[  409.088824806,3] XSCOM: Write failed, ret =  -26
[  409.088825797,3] IMC: error in xscom_write for pdbar
[    0.468976][   T19] core_imc memory allocation for cpu 0 failed
[    0.468993][    T1] IMC PMU core_imc Register failed

I tracked down that bad pcb_addr to this line in the code:

        pdbar_addr = get_imc_scom_addr_for_quad(phys_core_id,
                                pdbar_scom_index[port_id]);

I found that pdbar_scom_index was not initialized because, like mambo, we don't
have the IMC catalog in memory.  So, in imc_init we error out and never
initialize it in setup_imc_scoms.

This patch adds a chip quirk QUIRK_BML because it seems like a reasonable thing
to do and it's easy to put a BML {}; in the device tree like Mambo, Awan, etc.

It is tested on a Rainier and errors are gone and /sys/devices/core_imc shows
up as expected.

Signed-off-by: Ryan Grimm <grimm@linux.ibm.com>
---
 core/chip.c    |  4 ++++
 hw/imc.c       | 10 +++++-----
 include/chip.h |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

Comments

Madhavan Srinivasan Aug. 14, 2023, 3:58 a.m. UTC | #1
On 8/11/23 2:41 AM, Ryan Grimm wrote:
> On systems running BML we started noticing this in the skiboot log:
>
> [  409.088819302,3] XSCOM: write error gcid=0x0 pcb_addr=0x20000060 stat=0x4
> [  409.088823446,3] ELOG: Error getting buffer to log error
> [  409.088824806,3] XSCOM: Write failed, ret =  -26
> [  409.088825797,3] IMC: error in xscom_write for pdbar
> [    0.468976][   T19] core_imc memory allocation for cpu 0 failed
> [    0.468993][    T1] IMC PMU core_imc Register failed
>
> I tracked down that bad pcb_addr to this line in the code:
>
>          pdbar_addr = get_imc_scom_addr_for_quad(phys_core_id,
>                                  pdbar_scom_index[port_id]);
>
> I found that pdbar_scom_index was not initialized because, like mambo, we don't
> have the IMC catalog in memory.  So, in imc_init we error out and never
> initialize it in setup_imc_scoms.
>
> This patch adds a chip quirk QUIRK_BML because it seems like a reasonable thing
> to do and it's easy to put a BML {}; in the device tree like Mambo, Awan, etc.
>
> It is tested on a Rainier and errors are gone and /sys/devices/core_imc shows
> up as expected.
>
> Signed-off-by: Ryan Grimm <grimm@linux.ibm.com>
> ---
>   core/chip.c    |  4 ++++
>   hw/imc.c       | 10 +++++-----
>   include/chip.h |  1 +
>   3 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/core/chip.c b/core/chip.c
> index cee65822..0e96e625 100644
> --- a/core/chip.c
> +++ b/core/chip.c
> @@ -190,6 +190,10 @@ void init_chips(void)
>   		proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG;
>   		prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n");
>   	}
> +	if (dt_find_by_path(dt_root, "/bml")) {
> +		proc_chip_quirks |= QUIRK_BML;
> +		prlog(PR_NOTICE, "CHIP: Detected BML\n");
> +	}
>
>   	/* We walk the chips based on xscom nodes in the tree */
>   	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> diff --git a/hw/imc.c b/hw/imc.c
> index 97e0809f..dc3a59ae 100644
> --- a/hw/imc.c
> +++ b/hw/imc.c
> @@ -475,7 +475,7 @@ void imc_catalog_preload(void)
>   	int ret = OPAL_SUCCESS;
>   	compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE;
>
> -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
> +	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
>   		return;
>
>   	/* Enable only for power 9/10 */
> @@ -613,13 +613,13 @@ void imc_init(void)
>   	struct dt_node *dev;
>   	int err_flag = -1;
>
> -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
> +	if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) {

Guess idea is to check for MAMBO or BML bits set in proc_chip_quirks.
But then why Close Parentheses after QUIRK_MAMBO_CALLOUTS?

Maddy

>   		dev = dt_find_compatible_node(dt_root, NULL,
>   					"ibm,opal-in-memory-counters");
>   		if (!dev)
>   			return;
>
> -		goto imc_mambo;
> +		goto imc_mambo_bml;
>   	}
>
>   	/* Enable only for power 9/10 */
> @@ -662,7 +662,7 @@ void imc_init(void)
>   		goto err;
>   	}
>
> -imc_mambo:
> +imc_mambo_bml:
>   	if (setup_imc_scoms()) {
>   		prerror("IMC: Failed to setup the scoms\n");
>   		goto err;
> @@ -683,7 +683,7 @@ imc_mambo:
>   	/* Update the base_addr and chip-id for nest nodes */
>   	imc_dt_update_nest_node(dev);
>
> -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
> +	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
>   		return;
>
>   	/*
> diff --git a/include/chip.h b/include/chip.h
> index cfa5ce35..c90b8a7f 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -187,6 +187,7 @@ enum proc_chip_quirks {
>   	QUIRK_NO_RNG		= 0x00000100,
>   	QUIRK_QEMU              = 0x00000200,
>   	QUIRK_AWAN		= 0x00000400,
> +	QUIRK_BML		= 0x00000800,
>   };
>
>   extern enum proc_chip_quirks proc_chip_quirks;
Ryan Grimm Aug. 14, 2023, 2:22 p.m. UTC | #2
On Mon, 2023-08-14 at 09:28 +0530, Madhavan Srinivasan wrote:
> On 8/11/23 2:41 AM, Ryan Grimm wrote:
> > 
> > diff --git a/hw/imc.c b/hw/imc.c
> > index 97e0809f..dc3a59ae 100644
> > --- a/hw/imc.c
> > +++ b/hw/imc.c
> > @@ -475,7 +475,7 @@ void imc_catalog_preload(void)
> >   	int ret = OPAL_SUCCESS;
> >   	compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE;
> > 
> > -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
> > +	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
> >   		return;
> > 
> >   	/* Enable only for power 9/10 */
> > @@ -613,13 +613,13 @@ void imc_init(void)
> >   	struct dt_node *dev;
> >   	int err_flag = -1;
> > 
> > -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
> > +	if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) {
> 
> Guess idea is to check for MAMBO or BML bits set in proc_chip_quirks.
> But then why Close Parentheses after QUIRK_MAMBO_CALLOUTS?
> 

Oh, oops, unintentional extra parenthesis, will fix, thanks!

Ryan

> Maddy
> 
> >   		dev = dt_find_compatible_node(dt_root, NULL,
> >   					"ibm,opal-in-memory-counters");
> >   		if (!dev)
> >   			return;
> > 
> > -		goto imc_mambo;
> > +		goto imc_mambo_bml;
> >   	}
> > 
> >   	/* Enable only for power 9/10 */
> > @@ -662,7 +662,7 @@ void imc_init(void)
> >   		goto err;
> >   	}
> > 
> > -imc_mambo:
> > +imc_mambo_bml:
> >   	if (setup_imc_scoms()) {
> >   		prerror("IMC: Failed to setup the scoms\n");
> >   		goto err;
> > @@ -683,7 +683,7 @@ imc_mambo:
> >   	/* Update the base_addr and chip-id for nest nodes */
> >   	imc_dt_update_nest_node(dev);
> > 
> > -	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
> > +	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
> >   		return;
> > 
> >   	/*
> > diff --git a/include/chip.h b/include/chip.h
> > index cfa5ce35..c90b8a7f 100644
> > --- a/include/chip.h
> > +++ b/include/chip.h
> > @@ -187,6 +187,7 @@ enum proc_chip_quirks {
> >   	QUIRK_NO_RNG		= 0x00000100,
> >   	QUIRK_QEMU              = 0x00000200,
> >   	QUIRK_AWAN		= 0x00000400,
> > +	QUIRK_BML		= 0x00000800,
> >   };
> > 
> >   extern enum proc_chip_quirks proc_chip_quirks;
diff mbox series

Patch

diff --git a/core/chip.c b/core/chip.c
index cee65822..0e96e625 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -190,6 +190,10 @@  void init_chips(void)
 		proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG;
 		prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n");
 	}
+	if (dt_find_by_path(dt_root, "/bml")) {
+		proc_chip_quirks |= QUIRK_BML;
+		prlog(PR_NOTICE, "CHIP: Detected BML\n");
+	}
 
 	/* We walk the chips based on xscom nodes in the tree */
 	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
diff --git a/hw/imc.c b/hw/imc.c
index 97e0809f..dc3a59ae 100644
--- a/hw/imc.c
+++ b/hw/imc.c
@@ -475,7 +475,7 @@  void imc_catalog_preload(void)
 	int ret = OPAL_SUCCESS;
 	compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE;
 
-	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
+	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
 		return;
 
 	/* Enable only for power 9/10 */
@@ -613,13 +613,13 @@  void imc_init(void)
 	struct dt_node *dev;
 	int err_flag = -1;
 
-	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
+	if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) {
 		dev = dt_find_compatible_node(dt_root, NULL,
 					"ibm,opal-in-memory-counters");
 		if (!dev)
 			return;
 
-		goto imc_mambo;
+		goto imc_mambo_bml;
 	}
 
 	/* Enable only for power 9/10 */
@@ -662,7 +662,7 @@  void imc_init(void)
 		goto err;
 	}
 
-imc_mambo:
+imc_mambo_bml:
 	if (setup_imc_scoms()) {
 		prerror("IMC: Failed to setup the scoms\n");
 		goto err;
@@ -683,7 +683,7 @@  imc_mambo:
 	/* Update the base_addr and chip-id for nest nodes */
 	imc_dt_update_nest_node(dev);
 
-	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS)
+	if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML))
 		return;
 
 	/*
diff --git a/include/chip.h b/include/chip.h
index cfa5ce35..c90b8a7f 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -187,6 +187,7 @@  enum proc_chip_quirks {
 	QUIRK_NO_RNG		= 0x00000100,
 	QUIRK_QEMU              = 0x00000200,
 	QUIRK_AWAN		= 0x00000400,
+	QUIRK_BML		= 0x00000800,
 };
 
 extern enum proc_chip_quirks proc_chip_quirks;