Message ID | 20210615070804.390341-4-parth@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Make cache-object aware of L3 siblings by parsing "ibm, thread-groups" property | expand |
Related | show |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (419dfbc3e05d80c5f6d6856534cd0a21c22c22de) |
snowpatch_ozlabs/build-ppc64le | success | Build succeeded |
snowpatch_ozlabs/build-ppc64be | success | Build succeeded |
snowpatch_ozlabs/build-ppc64e | success | Build succeeded |
snowpatch_ozlabs/build-pmac32 | success | Build succeeded |
snowpatch_ozlabs/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 81 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hi Parth, Sorry for the late review. On Tue, Jun 15, 2021 at 12:38:04PM +0530, Parth Shah wrote: > On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus > in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 > itself to find both the L2 and L3 cache siblings. > Hence, rename existing macros to detect if the cache property is for L2 or > L3 and use the L2 cache map itself to find the presence of L3 siblings. > > Signed-off-by: Parth Shah <parth@linux.ibm.com> > --- > arch/powerpc/include/asm/smp.h | 2 ++ > arch/powerpc/kernel/cacheinfo.c | 3 +++ > arch/powerpc/kernel/smp.c | 20 +++++++++++++++----- > 3 files changed, 20 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h > index 1259040cc3a4..55082d343bd2 100644 > --- a/arch/powerpc/include/asm/smp.h > +++ b/arch/powerpc/include/asm/smp.h > @@ -144,6 +144,7 @@ extern int cpu_to_core_id(int cpu); > > extern bool has_big_cores; > extern bool thread_group_shares_l2; > +extern bool thread_group_shares_l3; > > #define cpu_smt_mask cpu_smt_mask > #ifdef CONFIG_SCHED_SMT > @@ -198,6 +199,7 @@ extern void __cpu_die(unsigned int cpu); > #define hard_smp_processor_id() get_hard_smp_processor_id(0) > #define smp_setup_cpu_maps() > #define thread_group_shares_l2 0 > +#define thread_group_shares_l3 0 > static inline void inhibit_secondary_onlining(void) {} > static inline void uninhibit_secondary_onlining(void) {} > static inline const struct cpumask *cpu_sibling_mask(int cpu) > diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c > index 20d91693eac1..378ae20d05a9 100644 > --- a/arch/powerpc/kernel/cacheinfo.c > +++ b/arch/powerpc/kernel/cacheinfo.c > @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) > else if (thread_group_shares_l2 && level == 2) > return cpumask_first(per_cpu(thread_group_l2_cache_map, > cpu_id)); > + else if (thread_group_shares_l3 && level == 3) > + return cpumask_first(per_cpu(thread_group_l2_cache_map, > + cpu_id)); We should either rename thread_group_l2_cache_map as thread_group_l2_l3_cache_map or we should create a separate thread_group_l3_cache_map. I prefer the latter approach since it makes the code consistent. Otherwise, the patch looks good to me. -- Thanks and Regards gautham. > return -1; > } > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index a34877257f2d..d0c70fcd0068 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -78,6 +78,7 @@ struct task_struct *secondary_current; > bool has_big_cores; > bool coregroup_enabled; > bool thread_group_shares_l2; > +bool thread_group_shares_l3; > > DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); > DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); > @@ -101,7 +102,7 @@ enum { > > #define MAX_THREAD_LIST_SIZE 8 > #define THREAD_GROUP_SHARE_L1 1 > -#define THREAD_GROUP_SHARE_L2 2 > +#define THREAD_GROUP_SHARE_L2_L3 2 > struct thread_groups { > unsigned int property; > unsigned int nr_groups; > @@ -887,9 +888,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) > cpumask_var_t *mask = NULL; > > if (cache_property != THREAD_GROUP_SHARE_L1 && > - cache_property != THREAD_GROUP_SHARE_L2) > + cache_property != THREAD_GROUP_SHARE_L2_L3) > return -EINVAL; > > + /* > + * On P10 fused-core system, the L3 cache is shared between threads of a > + * small core only, but the "ibm,thread-groups" property is indicated as > + * "2" only which is interpreted as the thread-groups sharing both L2 > + * and L3 caches. Hence cache_property of THREAD_GROUP_SHARE_L2_L3 is > + * used for both L2 and L3 cache sibling detection. > + */ > tg = get_thread_groups(cpu, cache_property, &err); > if (!tg) > return err; > @@ -903,7 +911,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) > > if (cache_property == THREAD_GROUP_SHARE_L1) > mask = &per_cpu(thread_group_l1_cache_map, cpu); > - else if (cache_property == THREAD_GROUP_SHARE_L2) > + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) > mask = &per_cpu(thread_group_l2_cache_map, cpu); > > zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); > @@ -1009,14 +1017,16 @@ static int __init init_big_cores(void) > has_big_cores = true; > > for_each_possible_cpu(cpu) { > - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); > + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); > > if (err) > return err; > } > > thread_group_shares_l2 = true; > - pr_debug("L2 cache only shared by the threads in the small core\n"); > + thread_group_shares_l3 = true; > + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); > + > return 0; > } > > -- > 2.26.3 >
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 1259040cc3a4..55082d343bd2 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -144,6 +144,7 @@ extern int cpu_to_core_id(int cpu); extern bool has_big_cores; extern bool thread_group_shares_l2; +extern bool thread_group_shares_l3; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -198,6 +199,7 @@ extern void __cpu_die(unsigned int cpu); #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() #define thread_group_shares_l2 0 +#define thread_group_shares_l3 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 20d91693eac1..378ae20d05a9 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) else if (thread_group_shares_l2 && level == 2) return cpumask_first(per_cpu(thread_group_l2_cache_map, cpu_id)); + else if (thread_group_shares_l3 && level == 3) + return cpumask_first(per_cpu(thread_group_l2_cache_map, + cpu_id)); return -1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a34877257f2d..d0c70fcd0068 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -78,6 +78,7 @@ struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; bool thread_group_shares_l2; +bool thread_group_shares_l3; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -101,7 +102,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 -#define THREAD_GROUP_SHARE_L2 2 +#define THREAD_GROUP_SHARE_L2_L3 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -887,9 +888,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) cpumask_var_t *mask = NULL; if (cache_property != THREAD_GROUP_SHARE_L1 && - cache_property != THREAD_GROUP_SHARE_L2) + cache_property != THREAD_GROUP_SHARE_L2_L3) return -EINVAL; + /* + * On P10 fused-core system, the L3 cache is shared between threads of a + * small core only, but the "ibm,thread-groups" property is indicated as + * "2" only which is interpreted as the thread-groups sharing both L2 + * and L3 caches. Hence cache_property of THREAD_GROUP_SHARE_L2_L3 is + * used for both L2 and L3 cache sibling detection. + */ tg = get_thread_groups(cpu, cache_property, &err); if (!tg) return err; @@ -903,7 +911,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) if (cache_property == THREAD_GROUP_SHARE_L1) mask = &per_cpu(thread_group_l1_cache_map, cpu); - else if (cache_property == THREAD_GROUP_SHARE_L2) + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) mask = &per_cpu(thread_group_l2_cache_map, cpu); zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); @@ -1009,14 +1017,16 @@ static int __init init_big_cores(void) has_big_cores = true; for_each_possible_cpu(cpu) { - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); if (err) return err; } thread_group_shares_l2 = true; - pr_debug("L2 cache only shared by the threads in the small core\n"); + thread_group_shares_l3 = true; + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); + return 0; }
On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 itself to find both the L2 and L3 cache siblings. Hence, rename existing macros to detect if the cache property is for L2 or L3 and use the L2 cache map itself to find the presence of L3 siblings. Signed-off-by: Parth Shah <parth@linux.ibm.com> --- arch/powerpc/include/asm/smp.h | 2 ++ arch/powerpc/kernel/cacheinfo.c | 3 +++ arch/powerpc/kernel/smp.c | 20 +++++++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-)