[RFC/PATCH] affinity: Use fabric node ID on most systems

Message ID 1491272483.4166.24.camel@kernel.crashing.org
State New
Headers show

Commit Message

Benjamin Herrenschmidt April 4, 2017, 2:21 a.m.
Except Murano where we stick to the module ID from HDAT

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 core/affinity.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

Comments

Michael Ellerman Nov. 6, 2017, 6:03 a.m. | #1
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:

> Except Murano where we stick to the module ID from HDAT
>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  core/affinity.c | 42 ++++++++++++++++++++++--------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)

Ping?

It seems this patch is being used in the wild, so it would be good to
either get it merged or nack'ed.

Changing the numbering on existing P8 systems seems like it could
definitely cause breakage in the kernel or apps, but on P9 maybe we can
get away with it.

cheers


> diff --git a/core/affinity.c b/core/affinity.c
> index 9f489d3..d3df910 100644
> --- a/core/affinity.c
> +++ b/core/affinity.c
> @@ -54,35 +54,37 @@
>  
>  static uint32_t get_chip_node_id(struct proc_chip *chip)
>  {
> -	/* If the xscom node has an ibm,ccm-node-id property, use it */
> -	if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL))
> -		return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id");
> -
>  	/*
>  	 * Else use the 3 top bits of the chip ID which should be
> -	 * the node on both P7 and P8
> +	 * the node on P7, P8 and P9
>  	 */
>  	return chip->id >> 3;
>  }
>  
>  void add_associativity_ref_point(void)
>  {
> -	int ref2 = 0x4;
> +	uint32_t ref2 = 0x1;
>  
>  	/*
>  	 * Note about our use of reference points:
>  	 *
> -	 * Linux currently supports two levels of NUMA. We use the first
> +	 * Linux currently supports two levels of NUMA. It uses the first
>  	 * reference point for the node ID and the second reference point
> -	 * for a second level of affinity. We always use the chip ID (4)
> -	 * for the first reference point.
> +	 * for a second level of affinity.
> +	 *
> +	 * We always pass the chip ID (4) for the first reference point.
> +	 *
> +	 * We pass the "module ID" (3) as the second point. When it doesn't
> +	 * exist in the DT we inherit the chip node ID which on Venice
> +	 * works fine. On FSP machine and P9, HDAT will provide it always.
>  	 *
> -	 * Choosing the second level of affinity is model specific
>  	 * unfortunately. Current POWER8E models should use the DCM
>  	 * as a second level of NUMA.
>  	 *
>  	 * If there is a way to obtain this information from the FSP
>  	 * that would be ideal, but for now hardwire our POWER8E setting.
> +	 *
> +	 * On P9 system we always use the PowerBus node ID
>  	 */
>  	if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E)
>  		ref2 = 0x3;
> @@ -95,23 +97,23 @@ void add_chip_dev_associativity(struct dt_node *dev)
>  {
>  	uint32_t chip_id = dt_get_chip_id(dev);
>  	struct proc_chip *chip = get_chip(chip_id);
> -	uint32_t hw_cid, hw_mid;
> +	uint32_t hw_cid, hw_mid, cn_id;
>  
>  	if (!chip)
>  		return;
>  
> -	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
> -	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
> +	cn_id = get_chip_node_id(chip);
> +	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", cn_id);
> +	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", cn_id);
>  
>  	dt_add_property_cells(dev, "ibm,associativity", 4,
> -			      get_chip_node_id(chip),
> -			      hw_cid, hw_mid, chip_id);
> +			      cn_id, hw_cid, hw_mid, chip_id);
>  }
>  
>  void add_core_associativity(struct cpu_thread *cpu)
>  {
>  	struct proc_chip *chip = get_chip(cpu->chip_id);
> -	uint32_t hw_cid, hw_mid, core_id;
> +	uint32_t hw_cid, hw_mid, core_id, cn_id;
>  
>  	if (!chip)
>  		return;
> @@ -125,10 +127,10 @@ void add_core_associativity(struct cpu_thread *cpu)
>  	else
>  		return;
>  
> -	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
> -	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
> +	cn_id = get_chip_node_id(chip);
> +	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", cn_id);
> +	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", cn_id);
>  
>  	dt_add_property_cells(cpu->node, "ibm,associativity", 5,
> -			      get_chip_node_id(chip),
> -			      hw_cid, hw_mid, chip->id, core_id);
> +			      cn_id, hw_cid, hw_mid, chip->id, core_id);
>  }
>
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Stewart Smith Nov. 10, 2017, 6:20 a.m. | #2
Michael Ellerman <mpe@ellerman.id.au> writes:
> Benjamin Herrenschmidt <benh@kernel.crashing.org> writes:
>
>> Except Murano where we stick to the module ID from HDAT
>>
>> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> ---
>>  core/affinity.c | 42 ++++++++++++++++++++++--------------------
>>  1 file changed, 22 insertions(+), 20 deletions(-)
>
> Ping?
>
> It seems this patch is being used in the wild, so it would be good to
> either get it merged or nack'ed.
>
> Changing the numbering on existing P8 systems seems like it could
> definitely cause breakage in the kernel or apps, but on P9 maybe we can
> get away with it.

Yeah, I'm thinking we should be able to get away with it on P9...

Early next week I'll take a stab at running this through a bunch of
what-I-can-think-of tests, probably hacking something into op-test, and
generally trying to ensure that we're not too batshit insane to do it.

Or, I could merge it right now, git push and just FRIDAY YOLO the whole
thing (but I won't :)

Patch

diff --git a/core/affinity.c b/core/affinity.c
index 9f489d3..d3df910 100644
--- a/core/affinity.c
+++ b/core/affinity.c
@@ -54,35 +54,37 @@ 
 
 static uint32_t get_chip_node_id(struct proc_chip *chip)
 {
-	/* If the xscom node has an ibm,ccm-node-id property, use it */
-	if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL))
-		return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id");
-
 	/*
 	 * Else use the 3 top bits of the chip ID which should be
-	 * the node on both P7 and P8
+	 * the node on P7, P8 and P9
 	 */
 	return chip->id >> 3;
 }
 
 void add_associativity_ref_point(void)
 {
-	int ref2 = 0x4;
+	uint32_t ref2 = 0x1;
 
 	/*
 	 * Note about our use of reference points:
 	 *
-	 * Linux currently supports two levels of NUMA. We use the first
+	 * Linux currently supports two levels of NUMA. It uses the first
 	 * reference point for the node ID and the second reference point
-	 * for a second level of affinity. We always use the chip ID (4)
-	 * for the first reference point.
+	 * for a second level of affinity.
+	 *
+	 * We always pass the chip ID (4) for the first reference point.
+	 *
+	 * We pass the "module ID" (3) as the second point. When it doesn't
+	 * exist in the DT we inherit the chip node ID which on Venice
+	 * works fine. On FSP machine and P9, HDAT will provide it always.
 	 *
-	 * Choosing the second level of affinity is model specific
 	 * unfortunately. Current POWER8E models should use the DCM
 	 * as a second level of NUMA.
 	 *
 	 * If there is a way to obtain this information from the FSP
 	 * that would be ideal, but for now hardwire our POWER8E setting.
+	 *
+	 * On P9 system we always use the PowerBus node ID
 	 */
 	if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E)
 		ref2 = 0x3;
@@ -95,23 +97,23 @@  void add_chip_dev_associativity(struct dt_node *dev)
 {
 	uint32_t chip_id = dt_get_chip_id(dev);
 	struct proc_chip *chip = get_chip(chip_id);
-	uint32_t hw_cid, hw_mid;
+	uint32_t hw_cid, hw_mid, cn_id;
 
 	if (!chip)
 		return;
 
-	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
-	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+	cn_id = get_chip_node_id(chip);
+	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", cn_id);
+	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", cn_id);
 
 	dt_add_property_cells(dev, "ibm,associativity", 4,
-			      get_chip_node_id(chip),
-			      hw_cid, hw_mid, chip_id);
+			      cn_id, hw_cid, hw_mid, chip_id);
 }
 
 void add_core_associativity(struct cpu_thread *cpu)
 {
 	struct proc_chip *chip = get_chip(cpu->chip_id);
-	uint32_t hw_cid, hw_mid, core_id;
+	uint32_t hw_cid, hw_mid, core_id, cn_id;
 
 	if (!chip)
 		return;
@@ -125,10 +127,10 @@  void add_core_associativity(struct cpu_thread *cpu)
 	else
 		return;
 
-	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
-	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+	cn_id = get_chip_node_id(chip);
+	hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", cn_id);
+	hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", cn_id);
 
 	dt_add_property_cells(cpu->node, "ibm,associativity", 5,
-			      get_chip_node_id(chip),
-			      hw_cid, hw_mid, chip->id, core_id);
+			      cn_id, hw_cid, hw_mid, chip->id, core_id);
 }