diff mbox series

[4/4] powerpc/perf: Add data source encodings for power10 platform

Message ID 20211206091749.87585-5-kjain@linux.ibm.com (mailing list archive)
State Accepted
Headers show
Series perf: Add new macros for mem_hops field | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_perf success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 7 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 24 jobs.

Commit Message

Kajol Jain Dec. 6, 2021, 9:17 a.m. UTC
The code represent memory/cache level data based on PERF_MEM_LVL_*
namespace, which is in the process of deprication in the favour of
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
Add data source encodings to represent cache/memory data based on
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.

Add data source encodings to represent data coming from local
memory/Remote memory/distant memory and remote/distant cache hits.

Inorder to represent data coming from OpenCAPI cache/memory, we use
LVLNUM "PMEM" field which is used to present persistent memory accesses.

Result in power10 system with patch changes:

localhost:# ./perf mem report --sort="mem,sym,dso" --stdio
 # Overhead       Samples  Memory access             Symbol                      Shared Object
 # ........  ............  ........................  ..........................  ................
 #
    29.46%          2331  L1 or L1 hit              [.] __random                                     libc-2.28.so
    23.11%          2121  L1 or L1 hit              [.] producer_populate_cache                      producer_consumer
    18.56%          1758  L1 or L1 hit              [.] __random_r                                   libc-2.28.so
    15.64%          1559  L2 or L2 hit              [.] __random                                     libc-2.28.so
    .....
    0.09%              5  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
    0.07%              4  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
    .....

Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 12 deletions(-)

Comments

Arnaldo Carvalho de Melo Dec. 22, 2021, 12:41 p.m. UTC | #1
Em Mon, Dec 06, 2021 at 02:47:49PM +0530, Kajol Jain escreveu:
> The code represent memory/cache level data based on PERF_MEM_LVL_*
> namespace, which is in the process of deprication in the favour of
> newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
> Add data source encodings to represent cache/memory data based on
> newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.

Thanks, applied.

- Arnaldo

 
> Add data source encodings to represent data coming from local
> memory/Remote memory/distant memory and remote/distant cache hits.
> 
> Inorder to represent data coming from OpenCAPI cache/memory, we use
> LVLNUM "PMEM" field which is used to present persistent memory accesses.
> 
> Result in power10 system with patch changes:
> 
> localhost:# ./perf mem report --sort="mem,sym,dso" --stdio
>  # Overhead       Samples  Memory access             Symbol                      Shared Object
>  # ........  ............  ........................  ..........................  ................
>  #
>     29.46%          2331  L1 or L1 hit              [.] __random                                     libc-2.28.so
>     23.11%          2121  L1 or L1 hit              [.] producer_populate_cache                      producer_consumer
>     18.56%          1758  L1 or L1 hit              [.] __random_r                                   libc-2.28.so
>     15.64%          1559  L2 or L2 hit              [.] __random                                     libc-2.28.so
>     .....
>     0.09%              5  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
>     0.07%              4  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
>     .....
> 
> Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
> ---
>  arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++-------
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
> index 6c6bc8b7d887..4037ea652522 100644
> --- a/arch/powerpc/perf/isa207-common.c
> +++ b/arch/powerpc/perf/isa207-common.c
> @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
>  		ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
>  		break;
>  	case 4:
> -		if (sub_idx <= 1)
> -			ret = PH(LVL, LOC_RAM);
> -		else if (sub_idx > 1 && sub_idx <= 2)
> -			ret = PH(LVL, REM_RAM1);
> -		else
> -			ret = PH(LVL, REM_RAM2);
> -		ret |= P(SNOOP, HIT);
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			ret = P(SNOOP, HIT);
> +
> +			if (sub_idx == 1)
> +				ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
> +			else if (sub_idx == 2 || sub_idx == 3)
> +				ret |= P(LVL, HIT) | LEVEL(PMEM);
> +			else if (sub_idx == 4)
> +				ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
> +			else if (sub_idx == 5 || sub_idx == 7)
> +				ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
> +			else if (sub_idx == 6)
> +				ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
> +		} else {
> +			if (sub_idx <= 1)
> +				ret = PH(LVL, LOC_RAM);
> +			else if (sub_idx > 1 && sub_idx <= 2)
> +				ret = PH(LVL, REM_RAM1);
> +			else
> +				ret = PH(LVL, REM_RAM2);
> +			ret |= P(SNOOP, HIT);
> +		}
>  		break;
>  	case 5:
>  		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
>  		}
>  		break;
>  	case 6:
> -		ret = PH(LVL, REM_CCE2);
> -		if ((sub_idx == 0) || (sub_idx == 2))
> -			ret |= P(SNOOP, HIT);
> -		else if ((sub_idx == 1) || (sub_idx == 3))
> -			ret |= P(SNOOP, HITM);
> +		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +			if (sub_idx == 0)
> +				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
> +					P(SNOOP, HIT) | P(HOPS, 2);
> +			else if (sub_idx == 1)
> +				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
> +					P(SNOOP, HITM) | P(HOPS, 2);
> +			else if (sub_idx == 2)
> +				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
> +					P(SNOOP, HIT) | P(HOPS, 3);
> +			else if (sub_idx == 3)
> +				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
> +					P(SNOOP, HITM) | P(HOPS, 3);
> +		} else {
> +			ret = PH(LVL, REM_CCE2);
> +			if (sub_idx == 0 || sub_idx == 2)
> +				ret |= P(SNOOP, HIT);
> +			else if (sub_idx == 1 || sub_idx == 3)
> +				ret |= P(SNOOP, HITM);
> +		}
>  		break;
>  	case 7:
>  		ret = PM(LVL, L1);
> -- 
> 2.27.0
diff mbox series

Patch

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 6c6bc8b7d887..4037ea652522 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -229,13 +229,28 @@  static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
 		ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
 		break;
 	case 4:
-		if (sub_idx <= 1)
-			ret = PH(LVL, LOC_RAM);
-		else if (sub_idx > 1 && sub_idx <= 2)
-			ret = PH(LVL, REM_RAM1);
-		else
-			ret = PH(LVL, REM_RAM2);
-		ret |= P(SNOOP, HIT);
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			ret = P(SNOOP, HIT);
+
+			if (sub_idx == 1)
+				ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+			else if (sub_idx == 2 || sub_idx == 3)
+				ret |= P(LVL, HIT) | LEVEL(PMEM);
+			else if (sub_idx == 4)
+				ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
+			else if (sub_idx == 5 || sub_idx == 7)
+				ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+			else if (sub_idx == 6)
+				ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
+		} else {
+			if (sub_idx <= 1)
+				ret = PH(LVL, LOC_RAM);
+			else if (sub_idx > 1 && sub_idx <= 2)
+				ret = PH(LVL, REM_RAM1);
+			else
+				ret = PH(LVL, REM_RAM2);
+			ret |= P(SNOOP, HIT);
+		}
 		break;
 	case 5:
 		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -261,11 +276,26 @@  static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
 		}
 		break;
 	case 6:
-		ret = PH(LVL, REM_CCE2);
-		if ((sub_idx == 0) || (sub_idx == 2))
-			ret |= P(SNOOP, HIT);
-		else if ((sub_idx == 1) || (sub_idx == 3))
-			ret |= P(SNOOP, HITM);
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (sub_idx == 0)
+				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HIT) | P(HOPS, 2);
+			else if (sub_idx == 1)
+				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HITM) | P(HOPS, 2);
+			else if (sub_idx == 2)
+				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HIT) | P(HOPS, 3);
+			else if (sub_idx == 3)
+				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HITM) | P(HOPS, 3);
+		} else {
+			ret = PH(LVL, REM_CCE2);
+			if (sub_idx == 0 || sub_idx == 2)
+				ret |= P(SNOOP, HIT);
+			else if (sub_idx == 1 || sub_idx == 3)
+				ret |= P(SNOOP, HITM);
+		}
 		break;
 	case 7:
 		ret = PM(LVL, L1);