diff mbox series

[PATCHv2,3/3] powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings

Message ID 20210728175607.591679-4-parth@linux.ibm.com (mailing list archive)
State Accepted
Headers show
Series Subject: [PATCHv2 0/3] Make cache-object aware of L3 siblings by parsing "ibm, thread-groups" property | expand
Related show

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 25 jobs.

Commit Message

Parth Shah July 28, 2021, 5:56 p.m. UTC
On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
itself to find both the L2 and L3 cache siblings.
Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings,
but fill the mask using same property "2" array.

Signed-off-by: Parth Shah <parth@linux.ibm.com>
---
 arch/powerpc/include/asm/smp.h  |  3 ++
 arch/powerpc/kernel/cacheinfo.c |  3 ++
 arch/powerpc/kernel/smp.c       | 66 ++++++++++++++++++++++-----------
 3 files changed, 51 insertions(+), 21 deletions(-)

Comments

Gautham R Shenoy July 30, 2021, 10:08 a.m. UTC | #1
On Wed, Jul 28, 2021 at 11:26:07PM +0530, Parth Shah wrote:
> On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
> in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
> itself to find both the L2 and L3 cache siblings.
> Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings,
> but fill the mask using same property "2" array.

This version looks good to me.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> 
> Signed-off-by: Parth Shah <parth@linux.ibm.com>

> ---
>  arch/powerpc/include/asm/smp.h  |  3 ++
>  arch/powerpc/kernel/cacheinfo.c |  3 ++
>  arch/powerpc/kernel/smp.c       | 66 ++++++++++++++++++++++-----------
>  3 files changed, 51 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index 1259040cc3a4..7ef1cd8168a0 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
> 
>  DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
>  DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
> 
>  #ifdef CONFIG_SMP
> 
> @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
> 
>  extern bool has_big_cores;
>  extern bool thread_group_shares_l2;
> +extern bool thread_group_shares_l3;
> 
>  #define cpu_smt_mask cpu_smt_mask
>  #ifdef CONFIG_SCHED_SMT
> @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
>  #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
>  #define smp_setup_cpu_maps()
>  #define thread_group_shares_l2  0
> +#define thread_group_shares_l3	0
>  static inline void inhibit_secondary_onlining(void) {}
>  static inline void uninhibit_secondary_onlining(void) {}
>  static inline const struct cpumask *cpu_sibling_mask(int cpu)
> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
> index 20d91693eac1..cf1be75b7833 100644
> --- a/arch/powerpc/kernel/cacheinfo.c
> +++ b/arch/powerpc/kernel/cacheinfo.c
> @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
>  	else if (thread_group_shares_l2 && level == 2)
>  		return cpumask_first(per_cpu(thread_group_l2_cache_map,
>  					     cpu_id));
> +	else if (thread_group_shares_l3 && level == 3)
> +		return cpumask_first(per_cpu(thread_group_l3_cache_map,
> +					     cpu_id));
>  	return -1;
>  }
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index a7fcac44a8e2..f2abd88e0c25 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -78,6 +78,7 @@ struct task_struct *secondary_current;
>  bool has_big_cores;
>  bool coregroup_enabled;
>  bool thread_group_shares_l2;
> +bool thread_group_shares_l3;
> 
>  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -101,7 +102,7 @@ enum {
> 
>  #define MAX_THREAD_LIST_SIZE	8
>  #define THREAD_GROUP_SHARE_L1   1
> -#define THREAD_GROUP_SHARE_L2   2
> +#define THREAD_GROUP_SHARE_L2_L3 2
>  struct thread_groups {
>  	unsigned int property;
>  	unsigned int nr_groups;
> @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
>   */
>  DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> 
> +/*
> + * On P10, thread_group_l3_cache_map for each CPU is equal to the
> + * thread_group_l2_cache_map
> + */
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
> +
>  /* SMP operations for this machine */
>  struct smp_ops_t *smp_ops;
> 
> @@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu,
>  	return tg;
>  }
> 
> +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
> +{
> +	int first_thread = cpu_first_thread_sibling(cpu);
> +	int i;
> +
> +	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> +
> +	for (i = first_thread; i < first_thread + threads_per_core; i++) {
> +		int i_group_start = get_cpu_thread_group_start(i, tg);
> +
> +		if (unlikely(i_group_start == -1)) {
> +			WARN_ON_ONCE(1);
> +			return -ENODATA;
> +		}
> +
> +		if (i_group_start == cpu_group_start)
> +			cpumask_set_cpu(i, *mask);
> +	}
> +
> +	return 0;
> +}
> +
>  static int __init init_thread_group_cache_map(int cpu, int cache_property)
> 
>  {
> -	int first_thread = cpu_first_thread_sibling(cpu);
> -	int i, cpu_group_start = -1, err = 0;
> +	int cpu_group_start = -1, err = 0;
>  	struct thread_groups *tg = NULL;
>  	cpumask_var_t *mask = NULL;
> 
>  	if (cache_property != THREAD_GROUP_SHARE_L1 &&
> -	    cache_property != THREAD_GROUP_SHARE_L2)
> +	    cache_property != THREAD_GROUP_SHARE_L2_L3)
>  		return -EINVAL;
> 
>  	tg = get_thread_groups(cpu, cache_property, &err);
> +
>  	if (!tg)
>  		return err;
> 
> @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
>  		return -ENODATA;
>  	}
> 
> -	if (cache_property == THREAD_GROUP_SHARE_L1)
> +	if (cache_property == THREAD_GROUP_SHARE_L1) {
>  		mask = &per_cpu(thread_group_l1_cache_map, cpu);
> -	else if (cache_property == THREAD_GROUP_SHARE_L2)
> +		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
> +	}
> +	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
>  		mask = &per_cpu(thread_group_l2_cache_map, cpu);
> -
> -	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> -
> -	for (i = first_thread; i < first_thread + threads_per_core; i++) {
> -		int i_group_start = get_cpu_thread_group_start(i, tg);
> -
> -		if (unlikely(i_group_start == -1)) {
> -			WARN_ON_ONCE(1);
> -			return -ENODATA;
> -		}
> -
> -		if (i_group_start == cpu_group_start)
> -			cpumask_set_cpu(i, *mask);
> +		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
> +		mask = &per_cpu(thread_group_l3_cache_map, cpu);
> +		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
>  	}
> 
> +
>  	return 0;
>  }
> 
> @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
>  	has_big_cores = true;
> 
>  	for_each_possible_cpu(cpu) {
> -		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
> +		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
> 
>  		if (err)
>  			return err;
>  	}
> 
>  	thread_group_shares_l2 = true;
> -	pr_debug("L2 cache only shared by the threads in the small core\n");
> +	thread_group_shares_l3 = true;
> +	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
> +
>  	return 0;
>  }
> 
> -- 
> 2.26.3
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 1259040cc3a4..7ef1cd8168a0 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -35,6 +35,7 @@  extern int *chip_id_lookup_table;
 
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
 
 #ifdef CONFIG_SMP
 
@@ -144,6 +145,7 @@  extern int cpu_to_core_id(int cpu);
 
 extern bool has_big_cores;
 extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
 
 #define cpu_smt_mask cpu_smt_mask
 #ifdef CONFIG_SCHED_SMT
@@ -198,6 +200,7 @@  extern void __cpu_die(unsigned int cpu);
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
 #define thread_group_shares_l2  0
+#define thread_group_shares_l3	0
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 20d91693eac1..cf1be75b7833 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -469,6 +469,9 @@  static int get_group_id(unsigned int cpu_id, int level)
 	else if (thread_group_shares_l2 && level == 2)
 		return cpumask_first(per_cpu(thread_group_l2_cache_map,
 					     cpu_id));
+	else if (thread_group_shares_l3 && level == 3)
+		return cpumask_first(per_cpu(thread_group_l3_cache_map,
+					     cpu_id));
 	return -1;
 }
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a7fcac44a8e2..f2abd88e0c25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -78,6 +78,7 @@  struct task_struct *secondary_current;
 bool has_big_cores;
 bool coregroup_enabled;
 bool thread_group_shares_l2;
+bool thread_group_shares_l3;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -101,7 +102,7 @@  enum {
 
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
-#define THREAD_GROUP_SHARE_L2   2
+#define THREAD_GROUP_SHARE_L2_L3 2
 struct thread_groups {
 	unsigned int property;
 	unsigned int nr_groups;
@@ -131,6 +132,12 @@  DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  */
 DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
 
@@ -889,19 +896,41 @@  static struct thread_groups *__init get_thread_groups(int cpu,
 	return tg;
 }
 
+static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int i;
+
+	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
+		int i_group_start = get_cpu_thread_group_start(i, tg);
+
+		if (unlikely(i_group_start == -1)) {
+			WARN_ON_ONCE(1);
+			return -ENODATA;
+		}
+
+		if (i_group_start == cpu_group_start)
+			cpumask_set_cpu(i, *mask);
+	}
+
+	return 0;
+}
+
 static int __init init_thread_group_cache_map(int cpu, int cache_property)
 
 {
-	int first_thread = cpu_first_thread_sibling(cpu);
-	int i, cpu_group_start = -1, err = 0;
+	int cpu_group_start = -1, err = 0;
 	struct thread_groups *tg = NULL;
 	cpumask_var_t *mask = NULL;
 
 	if (cache_property != THREAD_GROUP_SHARE_L1 &&
-	    cache_property != THREAD_GROUP_SHARE_L2)
+	    cache_property != THREAD_GROUP_SHARE_L2_L3)
 		return -EINVAL;
 
 	tg = get_thread_groups(cpu, cache_property, &err);
+
 	if (!tg)
 		return err;
 
@@ -912,25 +941,18 @@  static int __init init_thread_group_cache_map(int cpu, int cache_property)
 		return -ENODATA;
 	}
 
-	if (cache_property == THREAD_GROUP_SHARE_L1)
+	if (cache_property == THREAD_GROUP_SHARE_L1) {
 		mask = &per_cpu(thread_group_l1_cache_map, cpu);
-	else if (cache_property == THREAD_GROUP_SHARE_L2)
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+	}
+	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
 		mask = &per_cpu(thread_group_l2_cache_map, cpu);
-
-	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
-
-	for (i = first_thread; i < first_thread + threads_per_core; i++) {
-		int i_group_start = get_cpu_thread_group_start(i, tg);
-
-		if (unlikely(i_group_start == -1)) {
-			WARN_ON_ONCE(1);
-			return -ENODATA;
-		}
-
-		if (i_group_start == cpu_group_start)
-			cpumask_set_cpu(i, *mask);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+		mask = &per_cpu(thread_group_l3_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
 	}
 
+
 	return 0;
 }
 
@@ -1020,14 +1042,16 @@  static int __init init_big_cores(void)
 	has_big_cores = true;
 
 	for_each_possible_cpu(cpu) {
-		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
 
 		if (err)
 			return err;
 	}
 
 	thread_group_shares_l2 = true;
-	pr_debug("L2 cache only shared by the threads in the small core\n");
+	thread_group_shares_l3 = true;
+	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
 	return 0;
 }