Patchwork [v2] powerpc: VPHN topology change updates all siblings

login
register
mail settings
Submitter Robert Jennings
Date July 24, 2013, 3 p.m.
Message ID <20130724150005.GB13737@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/261432/
State Superseded
Headers show

Comments

Robert Jennings - July 24, 2013, 3 p.m.
When an associativity level change is found for one thread, the
siblings threads need to be updated as well.  This is done today
for PRRN in stage_topology_update() but is missing for VPHN in
update_cpu_associativity_changes_mask().

All threads should be updated to move to the new node.  Without this
patch, a single thread may be flagged for a topology change, leaving it
in a different node from its siblings, which is incorrect.  This causes
problems for the scheduler where overlapping scheduler groups are created
and a loop is formed in those groups.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
---
cpu_sibling_mask is now defined for UP which fixes that build break.
---
 arch/powerpc/include/asm/smp.h |  4 +++
 arch/powerpc/mm/numa.c         | 59 +++++++++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 15 deletions(-)
Greg KH - July 24, 2013, 3:28 p.m.
On Wed, Jul 24, 2013 at 10:00:05AM -0500, Robert Jennings wrote:
> When an associativity level change is found for one thread, the
> siblings threads need to be updated as well.  This is done today
> for PRRN in stage_topology_update() but is missing for VPHN in
> update_cpu_associativity_changes_mask().
> 
> All threads should be updated to move to the new node.  Without this
> patch, a single thread may be flagged for a topology change, leaving it
> in a different node from its siblings, which is incorrect.  This causes
> problems for the scheduler where overlapping scheduler groups are created
> and a loop is formed in those groups.
> 
> Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
> ---
> cpu_sibling_mask is now defined for UP which fixes that build break.
> ---
>  arch/powerpc/include/asm/smp.h |  4 +++
>  arch/powerpc/mm/numa.c         | 59 +++++++++++++++++++++++++++++++-----------
>  2 files changed, 48 insertions(+), 15 deletions(-)

<formletter>

This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read Documentation/stable_kernel_rules.txt
for how to do this properly.

</formletter>
Benjamin Herrenschmidt - July 24, 2013, 10:49 p.m.
On Wed, 2013-07-24 at 10:00 -0500, Robert Jennings wrote:
> When an associativity level change is found for one thread, the
> siblings threads need to be updated as well.  This is done today
> for PRRN in stage_topology_update() but is missing for VPHN in
> update_cpu_associativity_changes_mask().
> 
> All threads should be updated to move to the new node.  Without this
> patch, a single thread may be flagged for a topology change, leaving it
> in a different node from its siblings, which is incorrect.  This causes
> problems for the scheduler where overlapping scheduler groups are created
> and a loop is formed in those groups.
> 
> Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
> ---

This is big for a CC stable ... Can you be a bit more verbose on what
the consequences are of not having this patch ? Ie, what happens when "a
loop loop is formed in [the scheduler] groups" ?

Also you shouldn't CC stable on the actual patch email. You should add a
CC: <stable@vger.kernel.org> tag along with your Signed-off-by:

Also how far back in stable should this go ?

Cheers,
Ben.

> cpu_sibling_mask is now defined for UP which fixes that build break.
> ---
>  arch/powerpc/include/asm/smp.h |  4 +++
>  arch/powerpc/mm/numa.c         | 59 +++++++++++++++++++++++++++++++-----------
>  2 files changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index ffbaabe..48cfc85 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -145,6 +145,10 @@ extern void __cpu_die(unsigned int cpu);
>  #define smp_setup_cpu_maps()
>  static inline void inhibit_secondary_onlining(void) {}
>  static inline void uninhibit_secondary_onlining(void) {}
> +static inline const struct cpumask *cpu_sibling_mask(int cpu)
> +{
> +	return cpumask_of(cpu);
> +}
>  
>  #endif /* CONFIG_SMP */
>  
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 0839721..5850798 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -27,6 +27,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/uaccess.h>
>  #include <linux/slab.h>
> +#include <asm/cputhreads.h>
>  #include <asm/sparsemem.h>
>  #include <asm/prom.h>
>  #include <asm/smp.h>
> @@ -1318,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void)
>  			}
>  		}
>  		if (changed) {
> -			cpumask_set_cpu(cpu, changes);
> +			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
> +			cpu = cpu_last_thread_sibling(cpu);
>  		}
>  	}
>  
> @@ -1426,7 +1428,7 @@ static int update_cpu_topology(void *data)
>  	if (!data)
>  		return -EINVAL;
>  
> -	cpu = get_cpu();
> +	cpu = smp_processor_id();
>  
>  	for (update = data; update; update = update->next) {
>  		if (cpu != update->cpu)
> @@ -1446,12 +1448,12 @@ static int update_cpu_topology(void *data)
>   */
>  int arch_update_cpu_topology(void)
>  {
> -	unsigned int cpu, changed = 0;
> +	unsigned int cpu, sibling, changed = 0;
>  	struct topology_update_data *updates, *ud;
>  	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
>  	cpumask_t updated_cpus;
>  	struct device *dev;
> -	int weight, i = 0;
> +	int weight, new_nid, i = 0;
>  
>  	weight = cpumask_weight(&cpu_associativity_changes_mask);
>  	if (!weight)
> @@ -1464,19 +1466,46 @@ int arch_update_cpu_topology(void)
>  	cpumask_clear(&updated_cpus);
>  
>  	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
> -		ud = &updates[i++];
> -		ud->cpu = cpu;
> -		vphn_get_associativity(cpu, associativity);
> -		ud->new_nid = associativity_to_nid(associativity);
> -
> -		if (ud->new_nid < 0 || !node_online(ud->new_nid))
> -			ud->new_nid = first_online_node;
> +		/*
> +		 * If siblings aren't flagged for changes, updates list
> +		 * will be too short. Skip on this update and set for next
> +		 * update.
> +		 */
> +		if (!cpumask_subset(cpu_sibling_mask(cpu),
> +					&cpu_associativity_changes_mask)) {
> +			pr_info("Sibling bits not set for associativity "
> +					"change, cpu%d\n", cpu);
> +			cpumask_or(&cpu_associativity_changes_mask,
> +					&cpu_associativity_changes_mask,
> +					cpu_sibling_mask(cpu));
> +			cpu = cpu_last_thread_sibling(cpu);
> +			continue;
> +		}
>  
> -		ud->old_nid = numa_cpu_lookup_table[cpu];
> -		cpumask_set_cpu(cpu, &updated_cpus);
> +		/* Use associativity from first thread for all siblings */
> +		vphn_get_associativity(cpu, associativity);
> +		new_nid = associativity_to_nid(associativity);
> +		if (new_nid < 0 || !node_online(new_nid))
> +			new_nid = first_online_node;
> +
> +		if (new_nid == numa_cpu_lookup_table[cpu]) {
> +			cpumask_andnot(&cpu_associativity_changes_mask,
> +					&cpu_associativity_changes_mask,
> +					cpu_sibling_mask(cpu));
> +			cpu = cpu_last_thread_sibling(cpu);
> +			continue;
> +		}
>  
> -		if (i < weight)
> -			ud->next = &updates[i];
> +		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
> +			ud = &updates[i++];
> +			ud->cpu = sibling;
> +			ud->new_nid = new_nid;
> +			ud->old_nid = numa_cpu_lookup_table[sibling];
> +			cpumask_set_cpu(sibling, &updated_cpus);
> +			if (i < weight)
> +				ud->next = &updates[i];
> +		}
> +		cpu = cpu_last_thread_sibling(cpu);
>  	}
>  
>  	stop_machine(update_cpu_topology, &updates[0], &updated_cpus);

Patch

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ffbaabe..48cfc85 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -145,6 +145,10 @@  extern void __cpu_die(unsigned int cpu);
 #define smp_setup_cpu_maps()
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
+static inline const struct cpumask *cpu_sibling_mask(int cpu)
+{
+	return cpumask_of(cpu);
+}
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0839721..5850798 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -27,6 +27,7 @@ 
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <asm/cputhreads.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/smp.h>
@@ -1318,7 +1319,8 @@  static int update_cpu_associativity_changes_mask(void)
 			}
 		}
 		if (changed) {
-			cpumask_set_cpu(cpu, changes);
+			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
 		}
 	}
 
@@ -1426,7 +1428,7 @@  static int update_cpu_topology(void *data)
 	if (!data)
 		return -EINVAL;
 
-	cpu = get_cpu();
+	cpu = smp_processor_id();
 
 	for (update = data; update; update = update->next) {
 		if (cpu != update->cpu)
@@ -1446,12 +1448,12 @@  static int update_cpu_topology(void *data)
  */
 int arch_update_cpu_topology(void)
 {
-	unsigned int cpu, changed = 0;
+	unsigned int cpu, sibling, changed = 0;
 	struct topology_update_data *updates, *ud;
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	cpumask_t updated_cpus;
 	struct device *dev;
-	int weight, i = 0;
+	int weight, new_nid, i = 0;
 
 	weight = cpumask_weight(&cpu_associativity_changes_mask);
 	if (!weight)
@@ -1464,19 +1466,46 @@  int arch_update_cpu_topology(void)
 	cpumask_clear(&updated_cpus);
 
 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-		ud = &updates[i++];
-		ud->cpu = cpu;
-		vphn_get_associativity(cpu, associativity);
-		ud->new_nid = associativity_to_nid(associativity);
-
-		if (ud->new_nid < 0 || !node_online(ud->new_nid))
-			ud->new_nid = first_online_node;
+		/*
+		 * If siblings aren't flagged for changes, updates list
+		 * will be too short. Skip on this update and set for next
+		 * update.
+		 */
+		if (!cpumask_subset(cpu_sibling_mask(cpu),
+					&cpu_associativity_changes_mask)) {
+			pr_info("Sibling bits not set for associativity "
+					"change, cpu%d\n", cpu);
+			cpumask_or(&cpu_associativity_changes_mask,
+					&cpu_associativity_changes_mask,
+					cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
+			continue;
+		}
 
-		ud->old_nid = numa_cpu_lookup_table[cpu];
-		cpumask_set_cpu(cpu, &updated_cpus);
+		/* Use associativity from first thread for all siblings */
+		vphn_get_associativity(cpu, associativity);
+		new_nid = associativity_to_nid(associativity);
+		if (new_nid < 0 || !node_online(new_nid))
+			new_nid = first_online_node;
+
+		if (new_nid == numa_cpu_lookup_table[cpu]) {
+			cpumask_andnot(&cpu_associativity_changes_mask,
+					&cpu_associativity_changes_mask,
+					cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
+			continue;
+		}
 
-		if (i < weight)
-			ud->next = &updates[i];
+		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+			ud = &updates[i++];
+			ud->cpu = sibling;
+			ud->new_nid = new_nid;
+			ud->old_nid = numa_cpu_lookup_table[sibling];
+			cpumask_set_cpu(sibling, &updated_cpus);
+			if (i < weight)
+				ud->next = &updates[i];
+		}
+		cpu = cpu_last_thread_sibling(cpu);
 	}
 
 	stop_machine(update_cpu_topology, &updates[0], &updated_cpus);