diff mbox

[2/2] perf annotate: add powerpc support

Message ID 21173b25482dd19f85416f02bc8050324fe5eb76.1465563491.git.naveen.n.rao@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Naveen N. Rao June 10, 2016, 1:02 p.m. UTC
Convert ins__find() to a __weak function for generic functionality,
while adding a powerpc-specific variant. We look at the function name
for branch instructions and classify the instructions to one among a
branch, a function call (branch with LR update) or a function return
(branch to LR).

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Reported-by: Anton Blanchard <anton@ozlabs.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/util/Build      |  1 +
 tools/perf/arch/powerpc/util/annotate.c | 58 +++++++++++++++++++++++++++++++++
 tools/perf/util/annotate.c              | 17 +++++-----
 tools/perf/util/annotate.h              |  9 +++++
 4 files changed, 76 insertions(+), 9 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/annotate.c

Comments

Arnaldo Carvalho de Melo June 10, 2016, 1:36 p.m. UTC | #1
Em Fri, Jun 10, 2016 at 06:32:51PM +0530, Naveen N. Rao escreveu:
> Convert ins__find() to a __weak function for generic functionality,
> while adding a powerpc-specific variant. We look at the function name
> for branch instructions and classify the instructions to one among a
> branch, a function call (branch with LR update) or a function return
> (branch to LR).

How would this allow one to get a perf.data collected on a powerpc
system, transfer it to a x86-64 (or aarch64, to mention another
workstation wannabe chip) system and then try annotating it?

There was a previous discussion about this, and it involved having all
yout ppc tables available as well as other arches tables, and then
choosing which one to use based on:

	normalize_arch(thread->mg->machine->env->arch)

just like was done for support cross unwinding, see recent patch kit by
He Kuang, CCed.

- Arnaldo
 
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Anton Blanchard <anton@ozlabs.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> Reported-by: Anton Blanchard <anton@ozlabs.org>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> ---
>  tools/perf/arch/powerpc/util/Build      |  1 +
>  tools/perf/arch/powerpc/util/annotate.c | 58 +++++++++++++++++++++++++++++++++
>  tools/perf/util/annotate.c              | 17 +++++-----
>  tools/perf/util/annotate.h              |  9 +++++
>  4 files changed, 76 insertions(+), 9 deletions(-)
>  create mode 100644 tools/perf/arch/powerpc/util/annotate.c
> 
> diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
> index 90ad64b..d9e91d6 100644
> --- a/tools/perf/arch/powerpc/util/Build
> +++ b/tools/perf/arch/powerpc/util/Build
> @@ -2,6 +2,7 @@ libperf-y += header.o
>  libperf-y += sym-handling.o
>  libperf-y += kvm-stat.o
>  libperf-y += perf_regs.o
> +libperf-y += annotate.o
>  
>  libperf-$(CONFIG_DWARF) += dwarf-regs.o
>  libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
> diff --git a/tools/perf/arch/powerpc/util/annotate.c b/tools/perf/arch/powerpc/util/annotate.c
> new file mode 100644
> index 0000000..f069bd7
> --- /dev/null
> +++ b/tools/perf/arch/powerpc/util/annotate.c
> @@ -0,0 +1,58 @@
> +#include "perf.h"
> +#include "annotate.h"
> +
> +struct ins *ins__find(const char *name)
> +{
> +	int i;
> +	struct ins *ins;
> +
> +	ins = zalloc(sizeof(struct ins));
> +	if (!ins)
> +		return NULL;
> +
> +	ins->name = strdup(name);
> +	if (!ins->name)
> +		return NULL;
> +
> +	if (name[0] == 'b') {
> +		/* branch instructions */
> +		ins->ops = &jump_ops;
> +
> +		/* these start with 'b', but aren't branch instructions */
> +		if (!strncmp(name, "bcd", 3) ||
> +				!strncmp(name, "brinc", 5) ||
> +				!strncmp(name, "bper", 4))
> +			return NULL;
> +
> +		i = strlen(name) - 1;
> +		if (i < 0)
> +			return NULL;
> +
> +		/* ignore optional hints at the end of the instructions */
> +		if (name[i] == '+' || name[i] == '-')
> +			i--;
> +
> +		if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
> +			/*
> +			 * if the instruction ends up with 'l' or 'la', then
> +			 * those are considered 'calls' since they update LR.
> +			 * ... except for 'bnl' which is branch if not less than
> +			 * and the absolute form of the same.
> +			 */
> +			if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
> +			    strcmp(name, "bnl-") && strcmp(name, "bnla") &&
> +			    strcmp(name, "bnla+") && strcmp(name, "bnla-"))
> +				ins->ops = &call_ops;
> +		}
> +		if (name[i] == 'r' && name[i-1] == 'l')
> +			/*
> +			 * instructions ending with 'lr' are considered to be
> +			 * return instructions
> +			 */
> +			ins->ops = &ret_ops;
> +
> +		return ins;
> +	}
> +
> +	return NULL;
> +}
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index e871b4e..0fa4fc5 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -25,7 +25,6 @@ const char 	*disassembler_style;
>  const char	*objdump_path;
>  static regex_t	 file_lineno;
>  
> -static struct ins *ins__find(const char *name);
>  static int disasm_line__parse(char *line, char **namep, char **rawp);
>  
>  static void ins__delete(struct ins_operands *ops)
> @@ -107,7 +106,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
>  	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr);
>  }
>  
> -static struct ins_ops call_ops = {
> +struct ins_ops call_ops = {
>  	.parse	   = call__parse,
>  	.scnprintf = call__scnprintf,
>  };
> @@ -137,7 +136,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
>  	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
>  }
>  
> -static struct ins_ops jump_ops = {
> +struct ins_ops jump_ops = {
>  	.parse	   = jump__parse,
>  	.scnprintf = jump__scnprintf,
>  };
> @@ -230,7 +229,7 @@ static void lock__delete(struct ins_operands *ops)
>  	zfree(&ops->target.name);
>  }
>  
> -static struct ins_ops lock_ops = {
> +struct ins_ops lock_ops = {
>  	.free	   = lock__delete,
>  	.parse	   = lock__parse,
>  	.scnprintf = lock__scnprintf,
> @@ -298,7 +297,7 @@ static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
>  			 ops->target.name ?: ops->target.raw);
>  }
>  
> -static struct ins_ops mov_ops = {
> +struct ins_ops mov_ops = {
>  	.parse	   = mov__parse,
>  	.scnprintf = mov__scnprintf,
>  };
> @@ -339,7 +338,7 @@ static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
>  			 ops->target.name ?: ops->target.raw);
>  }
>  
> -static struct ins_ops dec_ops = {
> +struct ins_ops dec_ops = {
>  	.parse	   = dec__parse,
>  	.scnprintf = dec__scnprintf,
>  };
> @@ -350,11 +349,11 @@ static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
>  	return scnprintf(bf, size, "%-6.6s", "nop");
>  }
>  
> -static struct ins_ops nop_ops = {
> +struct ins_ops nop_ops = {
>  	.scnprintf = nop__scnprintf,
>  };
>  
> -static struct ins_ops ret_ops = {
> +struct ins_ops ret_ops = {
>  	.scnprintf = ins__raw_scnprintf,
>  };
>  
> @@ -478,7 +477,7 @@ static void ins__sort(void)
>  	qsort(instructions, nmemb, sizeof(struct ins), ins__cmp);
>  }
>  
> -static struct ins *ins__find(const char *name)
> +__weak struct ins *ins__find(const char *name)
>  {
>  	const int nmemb = ARRAY_SIZE(instructions);
>  	static bool sorted;
> diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
> index 720a4c0..6d89c1d 100644
> --- a/tools/perf/util/annotate.h
> +++ b/tools/perf/util/annotate.h
> @@ -50,6 +50,15 @@ bool ins__is_jump(const struct ins *ins);
>  bool ins__is_call(const struct ins *ins);
>  bool ins__is_ret(const struct ins *ins);
>  int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
> +struct ins *ins__find(const char *name);
> +
> +extern struct ins_ops call_ops;
> +extern struct ins_ops jump_ops;
> +extern struct ins_ops ret_ops;
> +extern struct ins_ops mov_ops;
> +extern struct ins_ops lock_ops;
> +extern struct ins_ops dec_ops;
> +extern struct ins_ops nop_ops;
>  
>  struct annotation;
>  
> -- 
> 2.8.2
Naveen N. Rao June 10, 2016, 2:38 p.m. UTC | #2
On 2016/06/10 10:36AM, Arnaldo Carvalho de Melo wrote:
> Em Fri, Jun 10, 2016 at 06:32:51PM +0530, Naveen N. Rao escreveu:
> > Convert ins__find() to a __weak function for generic functionality,
> > while adding a powerpc-specific variant. We look at the function name
> > for branch instructions and classify the instructions to one among a
> > branch, a function call (branch with LR update) or a function return
> > (branch to LR).
> 
> How would this allow one to get a perf.data collected on a powerpc
> system, transfer it to a x86-64 (or aarch64, to mention another
> workstation wannabe chip) system and then try annotating it?
> 
> There was a previous discussion about this, and it involved having all
> yout ppc tables available as well as other arches tables, and then
> choosing which one to use based on:
> 
> 	normalize_arch(thread->mg->machine->env->arch)
> 
> just like was done for support cross unwinding, see recent patch kit by
> He Kuang, CCed.

Nice. This would be good to have. I will look at adding powerpc support 
for cross-architecture unwind.

However, for cross-architecture annotation, I think there will be a lot 
more dependencies since perf currently uses objdump to obtain the 
disassembly. In addition, the actual binaries will also be needed.


- Naveen
Naveen N. Rao June 10, 2016, 4:01 p.m. UTC | #3
On 2016/06/10 08:08PM, Naveen N. Rao wrote:
> On 2016/06/10 10:36AM, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Jun 10, 2016 at 06:32:51PM +0530, Naveen N. Rao escreveu:
> > > Convert ins__find() to a __weak function for generic functionality,
> > > while adding a powerpc-specific variant. We look at the function name
> > > for branch instructions and classify the instructions to one among a
> > > branch, a function call (branch with LR update) or a function return
> > > (branch to LR).
> > 
> > How would this allow one to get a perf.data collected on a powerpc
> > system, transfer it to a x86-64 (or aarch64, to mention another
> > workstation wannabe chip) system and then try annotating it?
> > 
> > There was a previous discussion about this, and it involved having all
> > yout ppc tables available as well as other arches tables, and then
> > choosing which one to use based on:
> > 
> > 	normalize_arch(thread->mg->machine->env->arch)
> > 
> > just like was done for support cross unwinding, see recent patch kit by
> > He Kuang, CCed.
> 
> Nice. This would be good to have. I will look at adding powerpc support 
> for cross-architecture unwind.
> 
> However, for cross-architecture annotation, I think there will be a lot 
> more dependencies since perf currently uses objdump to obtain the 
> disassembly. In addition, the actual binaries will also be needed.

perf report already has a --objdump flag... will look into this later 
next week.

- Naveen
Michael Ellerman June 14, 2016, 3:40 a.m. UTC | #4
On Fri, 2016-06-10 at 20:08 +0530, Naveen N. Rao wrote:
> On 2016/06/10 10:36AM, Arnaldo Carvalho de Melo wrote:
> > Em Fri, Jun 10, 2016 at 06:32:51PM +0530, Naveen N. Rao escreveu:
> > > Convert ins__find() to a __weak function for generic functionality,
> > > while adding a powerpc-specific variant. We look at the function name
> > > for branch instructions and classify the instructions to one among a
> > > branch, a function call (branch with LR update) or a function return
> > > (branch to LR).
> > 
> > How would this allow one to get a perf.data collected on a powerpc
> > system, transfer it to a x86-64 (or aarch64, to mention another
> > workstation wannabe chip) system and then try annotating it?
> > 
> > There was a previous discussion about this, and it involved having all
> > yout ppc tables available as well as other arches tables, and then
> > choosing which one to use based on:
> > 
> > 	normalize_arch(thread->mg->machine->env->arch)
> > 
> > just like was done for support cross unwinding, see recent patch kit by
> > He Kuang, CCed.
> 
> Nice. This would be good to have. I will look at adding powerpc support 
> for cross-architecture unwind.
> 
> However, for cross-architecture annotation, I think there will be a lot 
> more dependencies since perf currently uses objdump to obtain the 
> disassembly. In addition, the actual binaries will also be needed.

It's possible to build a multi-arch objdump, I don't know if it's packaged on
all distros, or if perf wants to depend on it.

cheers
Arnaldo Carvalho de Melo June 14, 2016, 12:46 p.m. UTC | #5
Em Tue, Jun 14, 2016 at 01:40:47PM +1000, Michael Ellerman escreveu:
> On Fri, 2016-06-10 at 20:08 +0530, Naveen N. Rao wrote:
> > On 2016/06/10 10:36AM, Arnaldo Carvalho de Melo wrote:
> > > Em Fri, Jun 10, 2016 at 06:32:51PM +0530, Naveen N. Rao escreveu:
> > > > Convert ins__find() to a __weak function for generic functionality,
> > > > while adding a powerpc-specific variant. We look at the function name
> > > > for branch instructions and classify the instructions to one among a
> > > > branch, a function call (branch with LR update) or a function return
> > > > (branch to LR).

> > > How would this allow one to get a perf.data collected on a powerpc
> > > system, transfer it to a x86-64 (or aarch64, to mention another
> > > workstation wannabe chip) system and then try annotating it?

> > > There was a previous discussion about this, and it involved having all
> > > yout ppc tables available as well as other arches tables, and then
> > > choosing which one to use based on:

> > > 	normalize_arch(thread->mg->machine->env->arch)

> > > just like was done for support cross unwinding, see recent patch kit by
> > > He Kuang, CCed.

> > Nice. This would be good to have. I will look at adding powerpc support 
> > for cross-architecture unwind.

> > However, for cross-architecture annotation, I think there will be a lot 
> > more dependencies since perf currently uses objdump to obtain the 
> > disassembly. In addition, the actual binaries will also be needed.
 
> It's possible to build a multi-arch objdump, I don't know if it's packaged on
> all distros, or if perf wants to depend on it.

Somebody noticed that we can specify a different objdump binary, from a
cross toolchain package.

We need to at least check if the objdump being used supports the
architecture where the perf.data file was generated, refusing to process
the file for which there is no support, providing a clear message to
users trying to process such files.

I.e. checking if normalize_arch(thread->mg->machine->env->arch) is one of:

[acme@jouet linux]$ objdump -m 2>&1|grep 'objdump: supported' 
objdump: supported targets: elf64-x86-64 elf32-i386 elf32-x86-64 a.out-i386-linux pei-i386 pei-x86-64 elf64-l1om elf64-k1om elf64-little elf64-big elf32-little elf32-big plugin srec symbolsrec verilog tekhex binary ihex
objdump: supported architectures: i386 i386:x86-64 i386:x64-32 i8086 i386:intel i386:x86-64:intel i386:x64-32:intel i386:nacl i386:x86-64:nacl i386:x64-32:nacl l1om l1om:intel k1om k1om:intel plugin
[acme@jouet linux]$

- Arnaldo
diff mbox

Patch

diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 90ad64b..d9e91d6 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -2,6 +2,7 @@  libperf-y += header.o
 libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += annotate.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/annotate.c b/tools/perf/arch/powerpc/util/annotate.c
new file mode 100644
index 0000000..f069bd7
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/annotate.c
@@ -0,0 +1,58 @@ 
+#include "perf.h"
+#include "annotate.h"
+
+struct ins *ins__find(const char *name)
+{
+	int i;
+	struct ins *ins;
+
+	ins = zalloc(sizeof(struct ins));
+	if (!ins)
+		return NULL;
+
+	ins->name = strdup(name);
+	if (!ins->name)
+		return NULL;
+
+	if (name[0] == 'b') {
+		/* branch instructions */
+		ins->ops = &jump_ops;
+
+		/* these start with 'b', but aren't branch instructions */
+		if (!strncmp(name, "bcd", 3) ||
+				!strncmp(name, "brinc", 5) ||
+				!strncmp(name, "bper", 4))
+			return NULL;
+
+		i = strlen(name) - 1;
+		if (i < 0)
+			return NULL;
+
+		/* ignore optional hints at the end of the instructions */
+		if (name[i] == '+' || name[i] == '-')
+			i--;
+
+		if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+			/*
+			 * if the instruction ends up with 'l' or 'la', then
+			 * those are considered 'calls' since they update LR.
+			 * ... except for 'bnl' which is branch if not less than
+			 * and the absolute form of the same.
+			 */
+			if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+			    strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+			    strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+				ins->ops = &call_ops;
+		}
+		if (name[i] == 'r' && name[i-1] == 'l')
+			/*
+			 * instructions ending with 'lr' are considered to be
+			 * return instructions
+			 */
+			ins->ops = &ret_ops;
+
+		return ins;
+	}
+
+	return NULL;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e871b4e..0fa4fc5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,7 +25,6 @@  const char 	*disassembler_style;
 const char	*objdump_path;
 static regex_t	 file_lineno;
 
-static struct ins *ins__find(const char *name);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -107,7 +106,7 @@  static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr);
 }
 
-static struct ins_ops call_ops = {
+struct ins_ops call_ops = {
 	.parse	   = call__parse,
 	.scnprintf = call__scnprintf,
 };
@@ -137,7 +136,7 @@  static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
 }
 
-static struct ins_ops jump_ops = {
+struct ins_ops jump_ops = {
 	.parse	   = jump__parse,
 	.scnprintf = jump__scnprintf,
 };
@@ -230,7 +229,7 @@  static void lock__delete(struct ins_operands *ops)
 	zfree(&ops->target.name);
 }
 
-static struct ins_ops lock_ops = {
+struct ins_ops lock_ops = {
 	.free	   = lock__delete,
 	.parse	   = lock__parse,
 	.scnprintf = lock__scnprintf,
@@ -298,7 +297,7 @@  static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
 			 ops->target.name ?: ops->target.raw);
 }
 
-static struct ins_ops mov_ops = {
+struct ins_ops mov_ops = {
 	.parse	   = mov__parse,
 	.scnprintf = mov__scnprintf,
 };
@@ -339,7 +338,7 @@  static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
 			 ops->target.name ?: ops->target.raw);
 }
 
-static struct ins_ops dec_ops = {
+struct ins_ops dec_ops = {
 	.parse	   = dec__parse,
 	.scnprintf = dec__scnprintf,
 };
@@ -350,11 +349,11 @@  static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
 	return scnprintf(bf, size, "%-6.6s", "nop");
 }
 
-static struct ins_ops nop_ops = {
+struct ins_ops nop_ops = {
 	.scnprintf = nop__scnprintf,
 };
 
-static struct ins_ops ret_ops = {
+struct ins_ops ret_ops = {
 	.scnprintf = ins__raw_scnprintf,
 };
 
@@ -478,7 +477,7 @@  static void ins__sort(void)
 	qsort(instructions, nmemb, sizeof(struct ins), ins__cmp);
 }
 
-static struct ins *ins__find(const char *name)
+__weak struct ins *ins__find(const char *name)
 {
 	const int nmemb = ARRAY_SIZE(instructions);
 	static bool sorted;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 720a4c0..6d89c1d 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -50,6 +50,15 @@  bool ins__is_jump(const struct ins *ins);
 bool ins__is_call(const struct ins *ins);
 bool ins__is_ret(const struct ins *ins);
 int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+struct ins *ins__find(const char *name);
+
+extern struct ins_ops call_ops;
+extern struct ins_ops jump_ops;
+extern struct ins_ops ret_ops;
+extern struct ins_ops mov_ops;
+extern struct ins_ops lock_ops;
+extern struct ins_ops dec_ops;
+extern struct ins_ops nop_ops;
 
 struct annotation;