Message ID | 20211206091749.87585-5-kjain@linux.ibm.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | perf: Add new macros for mem_hops field | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/github-powerpc_selftests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_perf | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_ppctests | success | Successfully ran 8 jobs. |
snowpatch_ozlabs/github-powerpc_sparse | success | Successfully ran 4 jobs. |
snowpatch_ozlabs/github-powerpc_clang | success | Successfully ran 7 jobs. |
snowpatch_ozlabs/github-powerpc_kernel_qemu | success | Successfully ran 24 jobs. |
Em Mon, Dec 06, 2021 at 02:47:49PM +0530, Kajol Jain escreveu: > The code represent memory/cache level data based on PERF_MEM_LVL_* > namespace, which is in the process of deprication in the favour of > newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. > Add data source encodings to represent cache/memory data based on > newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Thanks, applied. - Arnaldo > Add data source encodings to represent data coming from local > memory/Remote memory/distant memory and remote/distant cache hits. > > Inorder to represent data coming from OpenCAPI cache/memory, we use > LVLNUM "PMEM" field which is used to present persistent memory accesses. > > Result in power10 system with patch changes: > > localhost:# ./perf mem report --sort="mem,sym,dso" --stdio > # Overhead Samples Memory access Symbol Shared Object > # ........ ............ ........................ .......................... ................ > # > 29.46% 2331 L1 or L1 hit [.] __random libc-2.28.so > 23.11% 2121 L1 or L1 hit [.] producer_populate_cache producer_consumer > 18.56% 1758 L1 or L1 hit [.] __random_r libc-2.28.so > 15.64% 1559 L2 or L2 hit [.] __random libc-2.28.so > ..... > 0.09% 5 Remote socket, same board Any cache hit [.] __random libc-2.28.so > 0.07% 4 Remote socket, same board Any cache hit [.] __random libc-2.28.so > ..... > > Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com> > Signed-off-by: Kajol Jain <kjain@linux.ibm.com> > --- > arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++------- > 1 file changed, 42 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c > index 6c6bc8b7d887..4037ea652522 100644 > --- a/arch/powerpc/perf/isa207-common.c > +++ b/arch/powerpc/perf/isa207-common.c > @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) > ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); > break; > case 4: > - if (sub_idx <= 1) > - ret = PH(LVL, LOC_RAM); > - else if (sub_idx > 1 && sub_idx <= 2) > - ret = PH(LVL, REM_RAM1); > - else > - ret = PH(LVL, REM_RAM2); > - ret |= P(SNOOP, HIT); > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + ret = P(SNOOP, HIT); > + > + if (sub_idx == 1) > + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); > + else if (sub_idx == 2 || sub_idx == 3) > + ret |= P(LVL, HIT) | LEVEL(PMEM); > + else if (sub_idx == 4) > + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); > + else if (sub_idx == 5 || sub_idx == 7) > + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; > + else if (sub_idx == 6) > + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); > + } else { > + if (sub_idx <= 1) > + ret = PH(LVL, LOC_RAM); > + else if (sub_idx > 1 && sub_idx <= 2) > + ret = PH(LVL, REM_RAM1); > + else > + ret = PH(LVL, REM_RAM2); > + ret |= P(SNOOP, HIT); > + } > break; > case 5: > if (cpu_has_feature(CPU_FTR_ARCH_31)) { > @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) > } > break; > case 6: > - ret = PH(LVL, REM_CCE2); > - if ((sub_idx == 0) || (sub_idx == 2)) > - ret |= P(SNOOP, HIT); > - else if ((sub_idx == 1) || (sub_idx == 3)) > - ret |= P(SNOOP, HITM); > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + if (sub_idx == 0) > + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | > + P(SNOOP, HIT) | P(HOPS, 2); > + else if (sub_idx == 1) > + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | > + P(SNOOP, HITM) | P(HOPS, 2); > + else if (sub_idx == 2) > + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | > + P(SNOOP, HIT) | P(HOPS, 3); > + else if (sub_idx == 3) > + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | > + P(SNOOP, HITM) | P(HOPS, 3); > + } else { > + ret = PH(LVL, REM_CCE2); > + if (sub_idx == 0 || sub_idx == 2) > + ret |= P(SNOOP, HIT); > + else if (sub_idx == 1 || sub_idx == 3) > + ret |= P(SNOOP, HITM); > + } > break; > case 7: > ret = PM(LVL, L1); > -- > 2.27.0
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6c6bc8b7d887..4037ea652522 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = P(SNOOP, HIT); + + if (sub_idx == 1) + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); + else if (sub_idx == 2 || sub_idx == 3) + ret |= P(LVL, HIT) | LEVEL(PMEM); + else if (sub_idx == 4) + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); + else if (sub_idx == 5 || sub_idx == 7) + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; + else if (sub_idx == 6) + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); + } else { + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); + } break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - ret = PH(LVL, REM_CCE2); - if ((sub_idx == 0) || (sub_idx == 2)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (sub_idx == 0) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 2); + else if (sub_idx == 1) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 2); + else if (sub_idx == 2) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 3); + else if (sub_idx == 3) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 3); + } else { + ret = PH(LVL, REM_CCE2); + if (sub_idx == 0 || sub_idx == 2) + ret |= P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 3) + ret |= P(SNOOP, HITM); + } break; case 7: ret = PM(LVL, L1);