Message ID | 20210728175607.591679-4-parth@linux.ibm.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | Subject: [PATCHv2 0/3] Make cache-object aware of L3 siblings by parsing "ibm, thread-groups" property | expand |
Related | show |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/github-powerpc_ppctests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_selftests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_sparse | success | Successfully ran 4 jobs. |
snowpatch_ozlabs/github-powerpc_clang | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_kernel_qemu | success | Successfully ran 25 jobs. |
On Wed, Jul 28, 2021 at 11:26:07PM +0530, Parth Shah wrote: > On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus > in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 > itself to find both the L2 and L3 cache siblings. > Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings, > but fill the mask using same property "2" array. This version looks good to me. Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> > > Signed-off-by: Parth Shah <parth@linux.ibm.com> > --- > arch/powerpc/include/asm/smp.h | 3 ++ > arch/powerpc/kernel/cacheinfo.c | 3 ++ > arch/powerpc/kernel/smp.c | 66 ++++++++++++++++++++++----------- > 3 files changed, 51 insertions(+), 21 deletions(-) > > diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h > index 1259040cc3a4..7ef1cd8168a0 100644 > --- a/arch/powerpc/include/asm/smp.h > +++ b/arch/powerpc/include/asm/smp.h > @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table; > > DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); > > #ifdef CONFIG_SMP > > @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu); > > extern bool has_big_cores; > extern bool thread_group_shares_l2; > +extern bool thread_group_shares_l3; > > #define cpu_smt_mask cpu_smt_mask > #ifdef CONFIG_SCHED_SMT > @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu); > #define hard_smp_processor_id() get_hard_smp_processor_id(0) > #define smp_setup_cpu_maps() > #define thread_group_shares_l2 0 > +#define thread_group_shares_l3 0 > static inline void inhibit_secondary_onlining(void) {} > static inline void uninhibit_secondary_onlining(void) {} > static inline const struct cpumask *cpu_sibling_mask(int cpu) > diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c > index 20d91693eac1..cf1be75b7833 100644 > --- a/arch/powerpc/kernel/cacheinfo.c > +++ b/arch/powerpc/kernel/cacheinfo.c > @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) > else if (thread_group_shares_l2 && level == 2) > return cpumask_first(per_cpu(thread_group_l2_cache_map, > cpu_id)); > + else if (thread_group_shares_l3 && level == 3) > + return cpumask_first(per_cpu(thread_group_l3_cache_map, > + cpu_id)); > return -1; > } > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index a7fcac44a8e2..f2abd88e0c25 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -78,6 +78,7 @@ struct task_struct *secondary_current; > bool has_big_cores; > bool coregroup_enabled; > bool thread_group_shares_l2; > +bool thread_group_shares_l3; > > DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); > DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); > @@ -101,7 +102,7 @@ enum { > > #define MAX_THREAD_LIST_SIZE 8 > #define THREAD_GROUP_SHARE_L1 1 > -#define THREAD_GROUP_SHARE_L2 2 > +#define THREAD_GROUP_SHARE_L2_L3 2 > struct thread_groups { > unsigned int property; > unsigned int nr_groups; > @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > */ > DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > > +/* > + * On P10, thread_group_l3_cache_map for each CPU is equal to the > + * thread_group_l2_cache_map > + */ > +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); > + > /* SMP operations for this machine */ > struct smp_ops_t *smp_ops; > > @@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu, > return tg; > } > > +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) > +{ > + int first_thread = cpu_first_thread_sibling(cpu); > + int i; > + > + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > + > + for (i = first_thread; i < first_thread + threads_per_core; i++) { > + int i_group_start = get_cpu_thread_group_start(i, tg); > + > + if (unlikely(i_group_start == -1)) { > + WARN_ON_ONCE(1); > + return -ENODATA; > + } > + > + if (i_group_start == cpu_group_start) > + cpumask_set_cpu(i, *mask); > + } > + > + return 0; > +} > + > static int __init init_thread_group_cache_map(int cpu, int cache_property) > > { > - int first_thread = cpu_first_thread_sibling(cpu); > - int i, cpu_group_start = -1, err = 0; > + int cpu_group_start = -1, err = 0; > struct thread_groups *tg = NULL; > cpumask_var_t *mask = NULL; > > if (cache_property != THREAD_GROUP_SHARE_L1 && > - cache_property != THREAD_GROUP_SHARE_L2) > + cache_property != THREAD_GROUP_SHARE_L2_L3) > return -EINVAL; > > tg = get_thread_groups(cpu, cache_property, &err); > + > if (!tg) > return err; > > @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) > return -ENODATA; > } > > - if (cache_property == THREAD_GROUP_SHARE_L1) > + if (cache_property == THREAD_GROUP_SHARE_L1) { > mask = &per_cpu(thread_group_l1_cache_map, cpu); > - else if (cache_property == THREAD_GROUP_SHARE_L2) > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > + } > + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) { > mask = &per_cpu(thread_group_l2_cache_map, cpu); > - > - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > - > - for (i = first_thread; i < first_thread + threads_per_core; i++) { > - int i_group_start = get_cpu_thread_group_start(i, tg); > - > - if (unlikely(i_group_start == -1)) { > - WARN_ON_ONCE(1); > - return -ENODATA; > - } > - > - if (i_group_start == cpu_group_start) > - cpumask_set_cpu(i, *mask); > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > + mask = &per_cpu(thread_group_l3_cache_map, cpu); > + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); > } > > + > return 0; > } > > @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void) > has_big_cores = true; > > for_each_possible_cpu(cpu) { > - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); > + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); > > if (err) > return err; > } > > thread_group_shares_l2 = true; > - pr_debug("L2 cache only shared by the threads in the small core\n"); > + thread_group_shares_l3 = true; > + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); > + > return 0; > } > > -- > 2.26.3 >
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 1259040cc3a4..7ef1cd8168a0 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table; DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); #ifdef CONFIG_SMP @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu); extern bool has_big_cores; extern bool thread_group_shares_l2; +extern bool thread_group_shares_l3; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu); #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() #define thread_group_shares_l2 0 +#define thread_group_shares_l3 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 20d91693eac1..cf1be75b7833 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) else if (thread_group_shares_l2 && level == 2) return cpumask_first(per_cpu(thread_group_l2_cache_map, cpu_id)); + else if (thread_group_shares_l3 && level == 3) + return cpumask_first(per_cpu(thread_group_l3_cache_map, + cpu_id)); return -1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a7fcac44a8e2..f2abd88e0c25 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -78,6 +78,7 @@ struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; bool thread_group_shares_l2; +bool thread_group_shares_l3; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -101,7 +102,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 -#define THREAD_GROUP_SHARE_L2 2 +#define THREAD_GROUP_SHARE_L2_L3 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); */ DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +/* + * On P10, thread_group_l3_cache_map for each CPU is equal to the + * thread_group_l2_cache_map + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); + /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu, return tg; } +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +{ + int first_thread = cpu_first_thread_sibling(cpu); + int i; + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); + + for (i = first_thread; i < first_thread + threads_per_core; i++) { + int i_group_start = get_cpu_thread_group_start(i, tg); + + if (unlikely(i_group_start == -1)) { + WARN_ON_ONCE(1); + return -ENODATA; + } + + if (i_group_start == cpu_group_start) + cpumask_set_cpu(i, *mask); + } + + return 0; +} + static int __init init_thread_group_cache_map(int cpu, int cache_property) { - int first_thread = cpu_first_thread_sibling(cpu); - int i, cpu_group_start = -1, err = 0; + int cpu_group_start = -1, err = 0; struct thread_groups *tg = NULL; cpumask_var_t *mask = NULL; if (cache_property != THREAD_GROUP_SHARE_L1 && - cache_property != THREAD_GROUP_SHARE_L2) + cache_property != THREAD_GROUP_SHARE_L2_L3) return -EINVAL; tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) return err; @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return -ENODATA; } - if (cache_property == THREAD_GROUP_SHARE_L1) + if (cache_property == THREAD_GROUP_SHARE_L1) { mask = &per_cpu(thread_group_l1_cache_map, cpu); - else if (cache_property == THREAD_GROUP_SHARE_L2) + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); + } + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) { mask = &per_cpu(thread_group_l2_cache_map, cpu); - - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); - - for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, tg); - - if (unlikely(i_group_start == -1)) { - WARN_ON_ONCE(1); - return -ENODATA; - } - - if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, *mask); + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); + mask = &per_cpu(thread_group_l3_cache_map, cpu); + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); } + return 0; } @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void) has_big_cores = true; for_each_possible_cpu(cpu) { - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); if (err) return err; } thread_group_shares_l2 = true; - pr_debug("L2 cache only shared by the threads in the small core\n"); + thread_group_shares_l3 = true; + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); + return 0; }
On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 itself to find both the L2 and L3 cache siblings. Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings, but fill the mask using same property "2" array. Signed-off-by: Parth Shah <parth@linux.ibm.com> --- arch/powerpc/include/asm/smp.h | 3 ++ arch/powerpc/kernel/cacheinfo.c | 3 ++ arch/powerpc/kernel/smp.c | 66 ++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 21 deletions(-)