diff mbox series

perf mem/c2c: Fix perf_mem_events to support powerpc

Message ID 20190114041402.14033-1-ravi.bangoria@linux.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series perf mem/c2c: Fix perf_mem_events to support powerpc | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied
snowpatch_ozlabs/build-ppc64le success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64be success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-ppc64e success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/build-pmac32 success build succeeded & removed 0 sparse warning(s)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 121 lines checked

Commit Message

Ravi Bangoria Jan. 14, 2019, 4:14 a.m. UTC
Powerpc hw does not have inbuilt latency filter (--ldlat) for mem-load
event and, perf_mem_events by default includes ldlat=30 which is
causing failure on powerpc. Refactor code to support perf mem/c2c on
powerpc.

This patch depends on kernel side changes done my Madhavan:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 tools/perf/Documentation/perf-c2c.txt | 16 ++++++++++++----
 tools/perf/Documentation/perf-mem.txt |  2 +-
 tools/perf/arch/x86/util/Build        |  1 +
 tools/perf/arch/x86/util/mem-events.c | 25 +++++++++++++++++++++++++
 tools/perf/util/mem-events.c          | 26 ++++----------------------
 tools/perf/util/mem-events.h          |  2 ++
 6 files changed, 45 insertions(+), 27 deletions(-)
 create mode 100644 tools/perf/arch/x86/util/mem-events.c

Comments

Ravi Bangoria Jan. 28, 2019, 10:08 a.m. UTC | #1
On 1/14/19 9:44 AM, Ravi Bangoria wrote:
> Powerpc hw does not have inbuilt latency filter (--ldlat) for mem-load
> event and, perf_mem_events by default includes ldlat=30 which is
> causing failure on powerpc. Refactor code to support perf mem/c2c on
> powerpc.
> 
> This patch depends on kernel side changes done my Madhavan:
> https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>


Arnaldo / Michael, Any thoughts?

Thanks.
Jiri Olsa Jan. 28, 2019, 11:07 a.m. UTC | #2
On Mon, Jan 14, 2019 at 09:44:02AM +0530, Ravi Bangoria wrote:

SNIP

> diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
> new file mode 100644
> index 0000000..5b4dcfe
> --- /dev/null
> +++ b/tools/perf/arch/x86/util/mem-events.c
> @@ -0,0 +1,25 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include "mem-events.h"
> +
> +struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
> +	PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
> +	PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
> +};
> +
> +static char mem_loads_name[100];
> +static bool mem_loads_name__init;
> +
> +char *perf_mem_events__name(int i)
> +{
> +	if (i == PERF_MEM_EVENTS__LOAD) {
> +		if (!mem_loads_name__init) {
> +			mem_loads_name__init = true;
> +			scnprintf(mem_loads_name, sizeof(mem_loads_name),
> +				  perf_mem_events[i].name,
> +				  perf_mem_events__loads_ldlat);
> +		}
> +		return mem_loads_name;
> +	}
> +
> +	return (char *)perf_mem_events[i].name;
> +}
> diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
> index 93f74d8..1ffefd3 100644
> --- a/tools/perf/util/mem-events.c
> +++ b/tools/perf/util/mem-events.c
> @@ -15,31 +15,13 @@
>  
>  unsigned int perf_mem_events__loads_ldlat = 30;
>  
> -#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
> -
> -struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
> -	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"mem-loads"),
> -	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
> +struct perf_mem_event __weak perf_mem_events[PERF_MEM_EVENTS__MAX] = {
> +	PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads/P", "mem-loads"),
> +	PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
>  };

I dont think perf_mem_events array needs to be overloaded as well,
the perf_mem_events__name function should be enough no?

thanks,
jirka
Michael Ellerman Jan. 29, 2019, 9:45 a.m. UTC | #3
Ravi Bangoria <ravi.bangoria@linux.ibm.com> writes:

> On 1/14/19 9:44 AM, Ravi Bangoria wrote:
>> Powerpc hw does not have inbuilt latency filter (--ldlat) for mem-load
>> event and, perf_mem_events by default includes ldlat=30 which is
>> causing failure on powerpc. Refactor code to support perf mem/c2c on
>> powerpc.
>> 
>> This patch depends on kernel side changes done my Madhavan:
>> https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html
>> 
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>
>
> Arnaldo / Michael, Any thoughts?

I haven't merged the kernel patch, I think because Maddy told me not to
because it would break the userspace tooling :)

What is the actual dependency between them? ie. should we merge the
kernel fix first or second or what?

cheers
Arnaldo Carvalho de Melo Jan. 29, 2019, 9:53 a.m. UTC | #4
Em Tue, Jan 29, 2019 at 08:45:44PM +1100, Michael Ellerman escreveu:
> Ravi Bangoria <ravi.bangoria@linux.ibm.com> writes:
> 
> > On 1/14/19 9:44 AM, Ravi Bangoria wrote:
> >> Powerpc hw does not have inbuilt latency filter (--ldlat) for mem-load
> >> event and, perf_mem_events by default includes ldlat=30 which is
> >> causing failure on powerpc. Refactor code to support perf mem/c2c on
> >> powerpc.
> >> 
> >> This patch depends on kernel side changes done my Madhavan:
> >> https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html
> >> 
> >> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> >
> >
> > Arnaldo / Michael, Any thoughts?
> 
> I haven't merged the kernel patch, I think because Maddy told me not to
> because it would break the userspace tooling :)
> 
> What is the actual dependency between them? ie. should we merge the
> kernel fix first or second or what?

I think its just a tooling side, I haven't processed it because I'm
waiting for Ravi to address Jiri's comment, after that I'm happy to put
it in my perf/urgent branch that I'm brewing to push to Ingo today or
tomorrow.

- Arnaldo
Ravi Bangoria Jan. 29, 2019, 10:40 a.m. UTC | #5
On 1/29/19 3:23 PM, Arnaldo Carvalho de Melo wrote:
> I think its just a tooling side, I haven't processed it because I'm
> waiting for Ravi to address Jiri's comment, after that I'm happy to put
> it in my perf/urgent branch that I'm brewing to push to Ingo today or
> tomorrow.

Ah.. Will try to send v2 today.

Thanks.
diff mbox series

Patch

diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 095aebd..4e12551 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -19,8 +19,11 @@  C2C stands for Cache To Cache.
 The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
 you to track down the cacheline contentions.
 
-The tool is based on x86's load latency and precise store facility events
-provided by Intel CPUs. These events provide:
+On x86, the tool is based on load latency and precise store facility events
+provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
+with thresholding feature.
+
+These events provide:
   - memory address of the access
   - type of the access (load and store details)
   - latency (in cycles) of the load access
@@ -46,7 +49,7 @@  RECORD OPTIONS
 
 -l::
 --ldlat::
-	Configure mem-loads latency.
+	Configure mem-loads latency. (x86 only)
 
 -k::
 --all-kernel::
@@ -119,11 +122,16 @@  Following perf record options are configured by default:
   -W,-d,--phys-data,--sample-cpu
 
 Unless specified otherwise with '-e' option, following events are monitored by
-default:
+default on x86:
 
   cpu/mem-loads,ldlat=30/P
   cpu/mem-stores/P
 
+and following on PowerPC:
+
+  cpu/mem-loads/P
+  cpu/mem-stores/P
+
 User can pass any 'perf record' option behind '--' mark, like (to enable
 callchains and system wide monitoring):
 
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index f8d2167..199ea0f 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -82,7 +82,7 @@  RECORD OPTIONS
 	Be more verbose (show counter open errors, etc)
 
 --ldlat <n>::
-	Specify desired latency for loads event.
+	Specify desired latency for loads event. (x86 only)
 
 In addition, for report all perf report options are valid, and for record
 all perf record options.
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 844b8f3..9827240 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -6,6 +6,7 @@  libperf-y += perf_regs.o
 libperf-y += group.o
 libperf-y += machine.o
 libperf-y += event.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
new file mode 100644
index 0000000..5b4dcfe
--- /dev/null
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -0,0 +1,25 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include "mem-events.h"
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
+	PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
+};
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+char *perf_mem_events__name(int i)
+{
+	if (i == PERF_MEM_EVENTS__LOAD) {
+		if (!mem_loads_name__init) {
+			mem_loads_name__init = true;
+			scnprintf(mem_loads_name, sizeof(mem_loads_name),
+				  perf_mem_events[i].name,
+				  perf_mem_events__loads_ldlat);
+		}
+		return mem_loads_name;
+	}
+
+	return (char *)perf_mem_events[i].name;
+}
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 93f74d8..1ffefd3 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -15,31 +15,13 @@ 
 
 unsigned int perf_mem_events__loads_ldlat = 30;
 
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
-
-struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"mem-loads"),
-	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
+struct perf_mem_event __weak perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	PERF_MEM_EVENT("ldlat-loads", "cpu/mem-loads/P", "mem-loads"),
+	PERF_MEM_EVENT("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
 };
-#undef E
-
-#undef E
-
-static char mem_loads_name[100];
-static bool mem_loads_name__init;
 
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
 {
-	if (i == PERF_MEM_EVENTS__LOAD) {
-		if (!mem_loads_name__init) {
-			mem_loads_name__init = true;
-			scnprintf(mem_loads_name, sizeof(mem_loads_name),
-				  perf_mem_events[i].name,
-				  perf_mem_events__loads_ldlat);
-		}
-		return mem_loads_name;
-	}
-
 	return (char *)perf_mem_events[i].name;
 }
 
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index a889ec2..4cb6935 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -16,6 +16,8 @@  struct perf_mem_event {
 	const char	*sysfs_name;
 };
 
+#define PERF_MEM_EVENT(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
 enum {
 	PERF_MEM_EVENTS__LOAD,
 	PERF_MEM_EVENTS__STORE,