diff mbox series

[V2,4/9] tools/perf: Add support to capture and parse raw instruction in objdump

Message ID 20240506121906.76639-5-atrajeev@linux.vnet.ibm.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Add data type profiling support for powerpc | expand

Commit Message

Athira Rajeev May 6, 2024, 12:19 p.m. UTC
Add support to capture and parse raw instruction in objdump.
Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
with "objdump" while disassemble. Example from powerpc with this option
for an instruction address is:

Snippet from:
objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>

c0000000010224b4:	lwz     r10,0(r9)

This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset. Also to find whether there is a memory
reference in the operands, "memory_ref_char" field of objdump is used.
For x86, "(" is used as memory_ref_char to tackle instructions of the
form "mov  (%rax), %rcx".

In case of powerpc, not all instructions using "(" are the only memory
instructions. Example, above instruction can also be of extended form (X
form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
and extract the source/target registers, patch adds support to use raw
instruction. With raw instruction, macros are added to extract opcode
and register fields.

"struct ins_operands" and "struct ins" is updated to carry opcode and
raw instruction binary code (raw_insn). Function "disasm_line__parse"
is updated to fill the raw instruction hex value and opcode in newly
added fields. There is no changes in existing code paths, which parses
the disassembled code. The architecture using the instruction name and
present approach is not altered. Since this approach targets powerpc,
the macro implementation is added for powerpc as of now.

Example:
representation using --show-raw-insn in objdump gives result:

38 01 81 e8     ld      r4,312(r1)

Here "38 01 81 e8" is the raw instruction representation. In powerpc,
this translates to instruction form: "ld RT,DS(RA)" and binary code
as:

Comments

Namhyung Kim May 7, 2024, 4:57 a.m. UTC | #1
On Mon, May 6, 2024 at 5:21 AM Athira Rajeev
<atrajeev@linux.vnet.ibm.com> wrote:
>
> Add support to capture and parse raw instruction in objdump.
> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
> with "objdump" while disassemble. Example from powerpc with this option
> for an instruction address is:
>
> Snippet from:
> objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>
>
> c0000000010224b4:       lwz     r10,0(r9)
>
> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> registers names and offset. Also to find whether there is a memory
> reference in the operands, "memory_ref_char" field of objdump is used.
> For x86, "(" is used as memory_ref_char to tackle instructions of the
> form "mov  (%rax), %rcx".
>
> In case of powerpc, not all instructions using "(" are the only memory
> instructions. Example, above instruction can also be of extended form (X
> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> and extract the source/target registers, patch adds support to use raw
> instruction. With raw instruction, macros are added to extract opcode
> and register fields.
>
> "struct ins_operands" and "struct ins" is updated to carry opcode and
> raw instruction binary code (raw_insn). Function "disasm_line__parse"
> is updated to fill the raw instruction hex value and opcode in newly
> added fields. There is no changes in existing code paths, which parses
> the disassembled code. The architecture using the instruction name and
> present approach is not altered. Since this approach targets powerpc,
> the macro implementation is added for powerpc as of now.
>
> Example:
> representation using --show-raw-insn in objdump gives result:
>
> 38 01 81 e8     ld      r4,312(r1)
>
> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> this translates to instruction form: "ld RT,DS(RA)" and binary code
> as:
> _____________________________________
> | 58 |  RT  |  RA |      DS       | |
> -------------------------------------
> 0    6     11    16              30 31
>
> Function "disasm_line__parse" is updated to capture:
>
> line:    38 01 81 e8     ld      r4,312(r1)
> opcode and raw instruction "38 01 81 e8"
> Raw instruction is used later to extract the reg/offset fields.
>
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> ---
>  tools/include/linux/string.h              |  2 +
>  tools/lib/string.c                        | 13 +++++++
>  tools/perf/arch/powerpc/util/dwarf-regs.c | 19 ++++++++++
>  tools/perf/util/disasm.c                  | 46 +++++++++++++++++++----
>  tools/perf/util/disasm.h                  |  6 +++
>  tools/perf/util/include/dwarf-regs.h      |  9 +++++
>  6 files changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
> index db5c99318c79..0acb1fc14e19 100644
> --- a/tools/include/linux/string.h
> +++ b/tools/include/linux/string.h
> @@ -46,5 +46,7 @@ extern char * __must_check skip_spaces(const char *);
>
>  extern char *strim(char *);
>
> +extern void remove_spaces(char *s);
> +
>  extern void *memchr_inv(const void *start, int c, size_t bytes);
>  #endif /* _TOOLS_LINUX_STRING_H_ */
> diff --git a/tools/lib/string.c b/tools/lib/string.c
> index 8b6892f959ab..21d273e69951 100644
> --- a/tools/lib/string.c
> +++ b/tools/lib/string.c
> @@ -153,6 +153,19 @@ char *strim(char *s)
>         return skip_spaces(s);
>  }
>
> +/*
> + * remove_spaces - Removes whitespaces from @s
> + */
> +void remove_spaces(char *s)
> +{
> +       char *d = s;
> +       do {
> +               while (*d == ' ') {
> +                       ++d;
> +               }
> +       } while ((*s++ = *d++));
> +}
> +
>  /**
>   * strreplace - Replace all occurrences of character in string.
>   * @s: The string to operate on.
> diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
> index 0c4f4caf53ac..e60a71fd846e 100644
> --- a/tools/perf/arch/powerpc/util/dwarf-regs.c
> +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
> @@ -98,3 +98,22 @@ int regs_query_register_offset(const char *name)
>                         return roff->ptregs_offset;
>         return -EINVAL;
>  }
> +
> +#define        PPC_OP(op)      (((op) >> 26) & 0x3F)
> +#define PPC_RA(a)      (((a) >> 16) & 0x1f)
> +#define PPC_RT(t)      (((t) >> 21) & 0x1f)
> +
> +int get_opcode_insn(unsigned int raw_insn)
> +{
> +       return PPC_OP(raw_insn);
> +}
> +
> +int get_source_reg(unsigned int raw_insn)
> +{
> +       return PPC_RA(raw_insn);
> +}
> +
> +int get_target_reg(unsigned int raw_insn)
> +{
> +       return PPC_RT(raw_insn);
> +}
> diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
> index 2de66a092cab..85692f73e78f 100644
> --- a/tools/perf/util/disasm.c
> +++ b/tools/perf/util/disasm.c
> @@ -43,7 +43,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
>                            struct ins_operands *ops, int max_ins_name);
>
>  static void ins__sort(struct arch *arch);
> -static int disasm_line__parse(char *line, const char **namep, char **rawp);
> +static int disasm_line__parse(char *line, const char **namep, char **rawp, int *opcode, int *rawp_insn);
>
>  static __attribute__((constructor)) void symbol__init_regexpr(void)
>  {
> @@ -512,7 +512,7 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_s
>         if (ops->locked.ops == NULL)
>                 return 0;
>
> -       if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
> +       if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw, &ops->locked.ins.opcode, &ops->locked.ops->raw_insn) < 0)

This line is too long.


>                 goto out_free_ops;
>
>         ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
> @@ -815,11 +815,38 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str
>                 dl->ins.ops = NULL;
>  }
>
> -static int disasm_line__parse(char *line, const char **namep, char **rawp)
> +int __weak get_opcode_insn(unsigned int raw_insn __maybe_unused)
>  {
> -       char tmp, *name = skip_spaces(line);
> +       return -1;
> +}
> +
> +int __weak get_source_reg(unsigned int raw_insn __maybe_unused)
> +{
> +       return -1;
> +}
> +
> +int __weak get_target_reg(unsigned int raw_insn __maybe_unused)
> +{
> +       return -1;
> +}

I prefer having conditional code with #ifdef rather than weak
functions especially if it isn't needed for every arch.

> +
> +/*
> + * Parses the objdump result captured with --show-raw-insn.
> + * Example, objdump line from powerpc:
> + * line:    38 01 81 e8     ld      r4,312(r1)
> + * namep : ld
> + * rawp  : "ld r4,312(r1)"
> + * opcode: fetched from arch specific get_opcode_insn
> + * rawp_insn: e8810138
> + *
> + * rawp_insn is used later to extract the reg/offset fields
> + */
> +static int disasm_line__parse(char *line, const char **namep, char **rawp, int *opcode, int *rawp_insn)
> +{
> +       char tmp, *tmp_opcode, *name_opcode = skip_spaces(line);
> +       char *name = skip_spaces(name_opcode + 11);
>
> -       if (name[0] == '\0')
> +       if (name_opcode[0] == '\0')
>                 return -1;
>
>         *rawp = name + 1;
> @@ -829,13 +856,18 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp)
>
>         tmp = (*rawp)[0];
>         (*rawp)[0] = '\0';
> +       tmp_opcode = strdup(name_opcode);
> +       tmp_opcode[11] = '\0';

Looks like powerpc specific.  Can we move it under arch check?


> +       remove_spaces(tmp_opcode);
>         *namep = strdup(name);
> +       *opcode = get_opcode_insn(be32_to_cpu(strtol(tmp_opcode, NULL, 16)));

This as well.  Maybe we could have per-arch disasm_line__parse().

>
>         if (*namep == NULL)
>                 goto out;
>
>         (*rawp)[0] = tmp;
>         *rawp = strim(*rawp);
> +       *rawp_insn = be32_to_cpu(strtol(tmp_opcode, NULL, 16));
>
>         return 0;
>
> @@ -896,7 +928,7 @@ struct disasm_line *disasm_line__new(struct annotate_args *args)
>                 goto out_delete;
>
>         if (args->offset != -1) {
> -               if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
> +               if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw, &dl->ins.opcode, &dl->ops.raw_insn) < 0)
>                         goto out_free_line;
>
>                 disasm_line__init_ins(dl, args->arch, &args->ms);
> @@ -1726,7 +1758,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
>                  map__rip_2objdump(map, sym->start),
>                  map__rip_2objdump(map, sym->end),
>                  opts->show_linenr ? "-l" : "",
> -                opts->show_asm_raw ? "" : "--no-show-raw-insn",
> +                opts->show_asm_raw ? "" : "--show-raw-insn",

This is not what we want.  According to the man page of objdump
the --show-raw-insn should be enabled by default.  So I think the
default value prints nothing.  But if it's not the case on powerpc
(maybe on x86 too?) we could add it in the positive case too and
make powerpc init code reset the option.

Thanks,
Namhyung


>                  opts->annotate_src ? "-S" : "",
>                  opts->prefix ? "--prefix " : "",
>                  opts->prefix ? '"' : ' ',
> diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
> index 718177fa4775..5c1520010ca7 100644
> --- a/tools/perf/util/disasm.h
> +++ b/tools/perf/util/disasm.h
> @@ -43,14 +43,18 @@ struct arch {
>
>  struct ins {
>         const char     *name;
> +       int     opcode;
>         struct ins_ops *ops;
>  };
>
>  struct ins_operands {
>         char    *raw;
> +       int     raw_insn;
>         struct {
>                 char    *raw;
>                 char    *name;
> +               int     opcode;
> +               int     raw_insn;
>                 struct symbol *sym;
>                 u64     addr;
>                 s64     offset;
> @@ -62,6 +66,8 @@ struct ins_operands {
>                 struct {
>                         char    *raw;
>                         char    *name;
> +                       int     opcode;
> +                       int     raw_insn;
>                         u64     addr;
>                         bool    multi_regs;
>                 } source;
> diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
> index 01fb25a1150a..2a4e1e16e52c 100644
> --- a/tools/perf/util/include/dwarf-regs.h
> +++ b/tools/perf/util/include/dwarf-regs.h
> @@ -31,6 +31,15 @@ static inline int get_dwarf_regnum(const char *name __maybe_unused,
>  }
>  #endif
>
> +/*
> + * get_opcode_insn - Return opcode from raw instruction
> + * get_source_reg - Return source reg from raw instruction
> + * get_target_reg - Return target reg from raw instruction
> + */
> +int get_opcode_insn(unsigned int raw_insn);
> +int get_source_reg(unsigned int raw_insn);
> +int get_target_reg(unsigned int raw_insn);
> +
>  #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
>  /*
>   * Arch should support fetching the offset of a register in pt_regs
> --
> 2.43.0
>
Christophe Leroy May 7, 2024, 9:35 a.m. UTC | #2
Le 06/05/2024 à 14:19, Athira Rajeev a écrit :
> Add support to capture and parse raw instruction in objdump.

What's the purpose of using 'objdump' for reading raw instructions ? 
Can't they be read directly without invoking 'objdump' ? It looks odd to 
me to use objdump to provide readable text and then parse it back.

> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
> with "objdump" while disassemble. Example from powerpc with this option
> for an instruction address is:

Yes and that makes sense because the purpose of objdump is to provide 
human readable annotations, not to perform automated analysis. Am I 
missing something ?

> 
> Snippet from:
> objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>
> 
> c0000000010224b4:	lwz     r10,0(r9)
> 
> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> registers names and offset. Also to find whether there is a memory
> reference in the operands, "memory_ref_char" field of objdump is used.
> For x86, "(" is used as memory_ref_char to tackle instructions of the
> form "mov  (%rax), %rcx".
> 
> In case of powerpc, not all instructions using "(" are the only memory
> instructions. Example, above instruction can also be of extended form (X
> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> and extract the source/target registers, patch adds support to use raw
> instruction. With raw instruction, macros are added to extract opcode
> and register fields.
> 
> "struct ins_operands" and "struct ins" is updated to carry opcode and
> raw instruction binary code (raw_insn). Function "disasm_line__parse"
> is updated to fill the raw instruction hex value and opcode in newly
> added fields. There is no changes in existing code paths, which parses
> the disassembled code. The architecture using the instruction name and
> present approach is not altered. Since this approach targets powerpc,
> the macro implementation is added for powerpc as of now.
> 
> Example:
> representation using --show-raw-insn in objdump gives result:
> 
> 38 01 81 e8     ld      r4,312(r1)
> 
> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> this translates to instruction form: "ld RT,DS(RA)" and binary code
> as:
> _____________________________________
> | 58 |  RT  |  RA |      DS       | |
> -------------------------------------
> 0    6     11    16              30 31
> 
> Function "disasm_line__parse" is updated to capture:
> 
> line:    38 01 81 e8     ld      r4,312(r1)
> opcode and raw instruction "38 01 81 e8"
> Raw instruction is used later to extract the reg/offset fields.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> ---
Athira Rajeev May 9, 2024, 5:26 p.m. UTC | #3
> On 7 May 2024, at 3:05 PM, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> 
> 
> 
> Le 06/05/2024 à 14:19, Athira Rajeev a écrit :
>> Add support to capture and parse raw instruction in objdump.
> 
> What's the purpose of using 'objdump' for reading raw instructions ? 
> Can't they be read directly without invoking 'objdump' ? It looks odd to 
> me to use objdump to provide readable text and then parse it back.

Hi Christophe,

Thanks for your review comments.

Current implementation for data type profiling on X86 uses "objdump" tool to get the disassembled code.
And then the objdump result lines are parsed to get the instruction name and register fields. The initial patchset I posted to enable the data type profiling feature in powerpc was using the same way by getting disassembled code from objdump and parsing the disassembled lines. But in V2, we are introducing change for powerpc to use "raw instruction" and fetch opcode, reg fields from the raw instruction.

I tried to explain below that current objdump uses option "--no-show-raw-insn" which doesn't capture raw instruction.  So to capture raw instruction, V2 patchset has changes to use default option "--show-raw-insn" and get the raw instruction [ for powerpc ] along with human readable annotation [ which is used by other archs ]. Since perf tool already has objdump implementation in place, I went in the direction to enhance it to use "--show-raw-insn" for powerpc purpose.

But as you mentioned, we can directly read raw instruction without using "objdump" tool.
perf has support to read object code. The dso open/read utilities and helper functions are already present in "util/dso.c" And "dso__data_read_offset" function reads data from dso file offset. We can use these functions and I can make changes to directly read binary instruction without using objdump.

Namhyung, Arnaldo, Christophe
Looking for your valuable feedback on this approach. Please suggest if this approach looks fine


Thanks
Athira
> 
>> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
>> with "objdump" while disassemble. Example from powerpc with this option
>> for an instruction address is:
> 
> Yes and that makes sense because the purpose of objdump is to provide 
> human readable annotations, not to perform automated analysis. Am I 
> missing something ?
> 
>> 
>> Snippet from:
>> objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>
>> 
>> c0000000010224b4: lwz     r10,0(r9)
>> 
>> This line "lwz r10,0(r9)" is parsed to extract instruction name,
>> registers names and offset. Also to find whether there is a memory
>> reference in the operands, "memory_ref_char" field of objdump is used.
>> For x86, "(" is used as memory_ref_char to tackle instructions of the
>> form "mov  (%rax), %rcx".
>> 
>> In case of powerpc, not all instructions using "(" are the only memory
>> instructions. Example, above instruction can also be of extended form (X
>> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
>> and extract the source/target registers, patch adds support to use raw
>> instruction. With raw instruction, macros are added to extract opcode
>> and register fields.
>> 
>> "struct ins_operands" and "struct ins" is updated to carry opcode and
>> raw instruction binary code (raw_insn). Function "disasm_line__parse"
>> is updated to fill the raw instruction hex value and opcode in newly
>> added fields. There is no changes in existing code paths, which parses
>> the disassembled code. The architecture using the instruction name and
>> present approach is not altered. Since this approach targets powerpc,
>> the macro implementation is added for powerpc as of now.
>> 
>> Example:
>> representation using --show-raw-insn in objdump gives result:
>> 
>> 38 01 81 e8     ld      r4,312(r1)
>> 
>> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
>> this translates to instruction form: "ld RT,DS(RA)" and binary code
>> as:
>> _____________________________________
>> | 58 |  RT  |  RA |      DS       | |
>> -------------------------------------
>> 0    6     11    16              30 31
>> 
>> Function "disasm_line__parse" is updated to capture:
>> 
>> line:    38 01 81 e8     ld      r4,312(r1)
>> opcode and raw instruction "38 01 81 e8"
>> Raw instruction is used later to extract the reg/offset fields.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> ---
Namhyung Kim May 9, 2024, 8:55 p.m. UTC | #4
On Thu, May 9, 2024 at 10:27 AM Athira Rajeev
<atrajeev@linux.vnet.ibm.com> wrote:
>
>
>
> > On 7 May 2024, at 3:05 PM, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> >
> >
> >
> > Le 06/05/2024 à 14:19, Athira Rajeev a écrit :
> >> Add support to capture and parse raw instruction in objdump.
> >
> > What's the purpose of using 'objdump' for reading raw instructions ?
> > Can't they be read directly without invoking 'objdump' ? It looks odd to
> > me to use objdump to provide readable text and then parse it back.
>
> Hi Christophe,
>
> Thanks for your review comments.
>
> Current implementation for data type profiling on X86 uses "objdump" tool to get the disassembled code.
> And then the objdump result lines are parsed to get the instruction name and register fields. The initial patchset I posted to enable the data type profiling feature in powerpc was using the same way by getting disassembled code from objdump and parsing the disassembled lines. But in V2, we are introducing change for powerpc to use "raw instruction" and fetch opcode, reg fields from the raw instruction.
>
> I tried to explain below that current objdump uses option "--no-show-raw-insn" which doesn't capture raw instruction.  So to capture raw instruction, V2 patchset has changes to use default option "--show-raw-insn" and get the raw instruction [ for powerpc ] along with human readable annotation [ which is used by other archs ]. Since perf tool already has objdump implementation in place, I went in the direction to enhance it to use "--show-raw-insn" for powerpc purpose.
>
> But as you mentioned, we can directly read raw instruction without using "objdump" tool.
> perf has support to read object code. The dso open/read utilities and helper functions are already present in "util/dso.c" And "dso__data_read_offset" function reads data from dso file offset. We can use these functions and I can make changes to directly read binary instruction without using objdump.
>
> Namhyung, Arnaldo, Christophe
> Looking for your valuable feedback on this approach. Please suggest if this approach looks fine

Looks like you want to implement instruction decoding
like in arch/x86/lib/{insn,inat}.c.  I think it's ok to do that
but you need to decide which way is more convenient.

Also it works on the struct disasm_line so you need to
fill in the necessary info when not using objdump.  As
long as it produces the same output I don't care much
if you use objdump or not.  Actually it uses libcapstone
to disassemble x86 instructions if possible.  Maybe you
can use that on powerpc too.

Thanks,
Namhyung

>
>
> Thanks
> Athira
> >
> >> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
> >> with "objdump" while disassemble. Example from powerpc with this option
> >> for an instruction address is:
> >
> > Yes and that makes sense because the purpose of objdump is to provide
> > human readable annotations, not to perform automated analysis. Am I
> > missing something ?
> >
> >>
> >> Snippet from:
> >> objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>
> >>
> >> c0000000010224b4: lwz     r10,0(r9)
> >>
> >> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> >> registers names and offset. Also to find whether there is a memory
> >> reference in the operands, "memory_ref_char" field of objdump is used.
> >> For x86, "(" is used as memory_ref_char to tackle instructions of the
> >> form "mov  (%rax), %rcx".
> >>
> >> In case of powerpc, not all instructions using "(" are the only memory
> >> instructions. Example, above instruction can also be of extended form (X
> >> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> >> and extract the source/target registers, patch adds support to use raw
> >> instruction. With raw instruction, macros are added to extract opcode
> >> and register fields.
> >>
> >> "struct ins_operands" and "struct ins" is updated to carry opcode and
> >> raw instruction binary code (raw_insn). Function "disasm_line__parse"
> >> is updated to fill the raw instruction hex value and opcode in newly
> >> added fields. There is no changes in existing code paths, which parses
> >> the disassembled code. The architecture using the instruction name and
> >> present approach is not altered. Since this approach targets powerpc,
> >> the macro implementation is added for powerpc as of now.
> >>
> >> Example:
> >> representation using --show-raw-insn in objdump gives result:
> >>
> >> 38 01 81 e8     ld      r4,312(r1)
> >>
> >> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> >> this translates to instruction form: "ld RT,DS(RA)" and binary code
> >> as:
> >> _____________________________________
> >> | 58 |  RT  |  RA |      DS       | |
> >> -------------------------------------
> >> 0    6     11    16              30 31
> >>
> >> Function "disasm_line__parse" is updated to capture:
> >>
> >> line:    38 01 81 e8     ld      r4,312(r1)
> >> opcode and raw instruction "38 01 81 e8"
> >> Raw instruction is used later to extract the reg/offset fields.
> >>
> >> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> >> ---
>
Arnaldo Carvalho de Melo May 10, 2024, 2:26 p.m. UTC | #5
On Thu, May 09, 2024 at 10:56:23PM +0530, Athira Rajeev wrote:
> 
> 
> > On 7 May 2024, at 3:05 PM, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> > 
> > 
> > 
> > Le 06/05/2024 à 14:19, Athira Rajeev a écrit :
> >> Add support to capture and parse raw instruction in objdump.
> > 
> > What's the purpose of using 'objdump' for reading raw instructions ? 
> > Can't they be read directly without invoking 'objdump' ? It looks odd to 
> > me to use objdump to provide readable text and then parse it back.
> 
> Hi Christophe,
> 
> Thanks for your review comments.
> 
> Current implementation for data type profiling on X86 uses "objdump" tool to get the disassembled code.

commit 6d17edc113de1e21fc66afa76be475a4f7c91826
Author: Namhyung Kim <namhyung@kernel.org>
Date:   Fri Mar 29 14:58:11 2024 -0700

    perf annotate: Use libcapstone to disassemble
    
    Now it can use the capstone library to disassemble the instructions.
    Let's use that (if available) for perf annotate to speed up.  Currently
    it only supports x86 architecture.  With this change I can see ~3x speed
    up in data type profiling.
    
    But note that capstone cannot give the source file and line number info.
    For now, users should use the external objdump for that by specifying
    the --objdump option explicitly.
    
    Signed-off-by: Namhyung Kim <namhyung@kernel.org>
    Tested-by: Ian Rogers <irogers@google.com>
    Cc: Adrian Hunter <adrian.hunter@intel.com>
    Cc: Changbin Du <changbin.du@huawei.com>
    Cc: Ingo Molnar <mingo@kernel.org>
    Cc: Jiri Olsa <jolsa@kernel.org>
    Cc: Kan Liang <kan.liang@linux.intel.com>
    Cc: Peter Zijlstra <peterz@infradead.org>
    Link: https://lore.kernel.org/r/20240329215812.537846-5-namhyung@kernel.org
    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

From a quick look at http://www.capstone-engine.org/compile.html it
seems PowerPC is supported.

But since we did it first with objdump output parsing, its good to have
it as an alternative and sometimes a fallback:

commit f35847de2a65137e011e559f38a3de5902a5463f
Author: Namhyung Kim <namhyung@kernel.org>
Date:   Wed Apr 24 17:51:56 2024 -0700

    perf annotate: Fallback disassemble to objdump when capstone fails
    
    I found some cases that capstone failed to disassemble.  Probably my
    capstone is an old version but anyway there's a chance it can fail.  And
    then it silently stopped in the middle.  In my case, it didn't
    understand "RDPKRU" instruction.
    
    Let's check if the capstone disassemble reached the end of the function
    and fallback to objdump if not

---------------

- Arnaldo

> And then the objdump result lines are parsed to get the instruction
> name and register fields. The initial patchset I posted to enable the
> data type profiling feature in powerpc was using the same way by
> getting disassembled code from objdump and parsing the disassembled
> lines. But in V2, we are introducing change for powerpc to use "raw
> instruction" and fetch opcode, reg fields from the raw instruction.
 
> I tried to explain below that current objdump uses option
> "--no-show-raw-insn" which doesn't capture raw instruction.  So to
> capture raw instruction, V2 patchset has changes to use default option
> "--show-raw-insn" and get the raw instruction [ for powerpc ] along
> with human readable annotation [ which is used by other archs ]. Since
> perf tool already has objdump implementation in place, I went in the
> direction to enhance it to use "--show-raw-insn" for powerpc purpose.
 
> But as you mentioned, we can directly read raw instruction without
> using "objdump" tool.  perf has support to read object code. The dso
> open/read utilities and helper functions are already present in
> "util/dso.c" And "dso__data_read_offset" function reads data from dso
> file offset. We can use these functions and I can make changes to
> directly read binary instruction without using objdump.
 
> Namhyung, Arnaldo, Christophe
> Looking for your valuable feedback on this approach. Please suggest if this approach looks fine
> 
> 
> Thanks
> Athira
> > 
> >> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
> >> with "objdump" while disassemble. Example from powerpc with this option
> >> for an instruction address is:
> > 
> > Yes and that makes sense because the purpose of objdump is to provide 
> > human readable annotations, not to perform automated analysis. Am I 
> > missing something ?
> > 
> >> 
> >> Snippet from:
> >> objdump  --start-address=<address> --stop-address=<address>  -d --no-show-raw-insn -C <vmlinux>
> >> 
> >> c0000000010224b4: lwz     r10,0(r9)
> >> 
> >> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> >> registers names and offset. Also to find whether there is a memory
> >> reference in the operands, "memory_ref_char" field of objdump is used.
> >> For x86, "(" is used as memory_ref_char to tackle instructions of the
> >> form "mov  (%rax), %rcx".
> >> 
> >> In case of powerpc, not all instructions using "(" are the only memory
> >> instructions. Example, above instruction can also be of extended form (X
> >> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> >> and extract the source/target registers, patch adds support to use raw
> >> instruction. With raw instruction, macros are added to extract opcode
> >> and register fields.
> >> 
> >> "struct ins_operands" and "struct ins" is updated to carry opcode and
> >> raw instruction binary code (raw_insn). Function "disasm_line__parse"
> >> is updated to fill the raw instruction hex value and opcode in newly
> >> added fields. There is no changes in existing code paths, which parses
> >> the disassembled code. The architecture using the instruction name and
> >> present approach is not altered. Since this approach targets powerpc,
> >> the macro implementation is added for powerpc as of now.
> >> 
> >> Example:
> >> representation using --show-raw-insn in objdump gives result:
> >> 
> >> 38 01 81 e8     ld      r4,312(r1)
> >> 
> >> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> >> this translates to instruction form: "ld RT,DS(RA)" and binary code
> >> as:
> >> _____________________________________
> >> | 58 |  RT  |  RA |      DS       | |
> >> -------------------------------------
> >> 0    6     11    16              30 31
> >> 
> >> Function "disasm_line__parse" is updated to capture:
> >> 
> >> line:    38 01 81 e8     ld      r4,312(r1)
> >> opcode and raw instruction "38 01 81 e8"
> >> Raw instruction is used later to extract the reg/offset fields.
> >> 
> >> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
> >> ---
diff mbox series

Patch

diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index db5c99318c79..0acb1fc14e19 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -46,5 +46,7 @@  extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
 
+extern void remove_spaces(char *s);
+
 extern void *memchr_inv(const void *start, int c, size_t bytes);
 #endif /* _TOOLS_LINUX_STRING_H_ */
diff --git a/tools/lib/string.c b/tools/lib/string.c
index 8b6892f959ab..21d273e69951 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -153,6 +153,19 @@  char *strim(char *s)
 	return skip_spaces(s);
 }
 
+/*
+ * remove_spaces - Removes whitespaces from @s
+ */
+void remove_spaces(char *s)
+{
+	char *d = s;
+	do {
+		while (*d == ' ') {
+			++d;
+		}
+	} while ((*s++ = *d++));
+}
+
 /**
  * strreplace - Replace all occurrences of character in string.
  * @s: The string to operate on.
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 0c4f4caf53ac..e60a71fd846e 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -98,3 +98,22 @@  int regs_query_register_offset(const char *name)
 			return roff->ptregs_offset;
 	return -EINVAL;
 }
+
+#define	PPC_OP(op)	(((op) >> 26) & 0x3F)
+#define PPC_RA(a)	(((a) >> 16) & 0x1f)
+#define PPC_RT(t)	(((t) >> 21) & 0x1f)
+
+int get_opcode_insn(unsigned int raw_insn)
+{
+	return PPC_OP(raw_insn);
+}
+
+int get_source_reg(unsigned int raw_insn)
+{
+	return PPC_RA(raw_insn);
+}
+
+int get_target_reg(unsigned int raw_insn)
+{
+	return PPC_RT(raw_insn);
+}
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 2de66a092cab..85692f73e78f 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -43,7 +43,7 @@  static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops, int max_ins_name);
 
 static void ins__sort(struct arch *arch);
-static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int disasm_line__parse(char *line, const char **namep, char **rawp, int *opcode, int *rawp_insn);
 
 static __attribute__((constructor)) void symbol__init_regexpr(void)
 {
@@ -512,7 +512,7 @@  static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_s
 	if (ops->locked.ops == NULL)
 		return 0;
 
-	if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+	if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw, &ops->locked.ins.opcode, &ops->locked.ops->raw_insn) < 0)
 		goto out_free_ops;
 
 	ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
@@ -815,11 +815,38 @@  static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str
 		dl->ins.ops = NULL;
 }
 
-static int disasm_line__parse(char *line, const char **namep, char **rawp)
+int __weak get_opcode_insn(unsigned int raw_insn __maybe_unused)
 {
-	char tmp, *name = skip_spaces(line);
+	return -1;
+}
+
+int __weak get_source_reg(unsigned int raw_insn __maybe_unused)
+{
+	return -1;
+}
+
+int __weak get_target_reg(unsigned int raw_insn __maybe_unused)
+{
+	return -1;
+}
+
+/*
+ * Parses the objdump result captured with --show-raw-insn.
+ * Example, objdump line from powerpc:
+ * line:    38 01 81 e8     ld      r4,312(r1)
+ * namep : ld
+ * rawp  : "ld r4,312(r1)"
+ * opcode: fetched from arch specific get_opcode_insn
+ * rawp_insn: e8810138
+ *
+ * rawp_insn is used later to extract the reg/offset fields
+ */
+static int disasm_line__parse(char *line, const char **namep, char **rawp, int *opcode, int *rawp_insn)
+{
+	char tmp, *tmp_opcode, *name_opcode = skip_spaces(line);
+	char *name = skip_spaces(name_opcode + 11);
 
-	if (name[0] == '\0')
+	if (name_opcode[0] == '\0')
 		return -1;
 
 	*rawp = name + 1;
@@ -829,13 +856,18 @@  static int disasm_line__parse(char *line, const char **namep, char **rawp)
 
 	tmp = (*rawp)[0];
 	(*rawp)[0] = '\0';
+	tmp_opcode = strdup(name_opcode);
+	tmp_opcode[11] = '\0';
+	remove_spaces(tmp_opcode);
 	*namep = strdup(name);
+	*opcode = get_opcode_insn(be32_to_cpu(strtol(tmp_opcode, NULL, 16)));
 
 	if (*namep == NULL)
 		goto out;
 
 	(*rawp)[0] = tmp;
 	*rawp = strim(*rawp);
+	*rawp_insn = be32_to_cpu(strtol(tmp_opcode, NULL, 16));
 
 	return 0;
 
@@ -896,7 +928,7 @@  struct disasm_line *disasm_line__new(struct annotate_args *args)
 		goto out_delete;
 
 	if (args->offset != -1) {
-		if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+		if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw, &dl->ins.opcode, &dl->ops.raw_insn) < 0)
 			goto out_free_line;
 
 		disasm_line__init_ins(dl, args->arch, &args->ms);
@@ -1726,7 +1758,7 @@  int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
 		 opts->show_linenr ? "-l" : "",
-		 opts->show_asm_raw ? "" : "--no-show-raw-insn",
+		 opts->show_asm_raw ? "" : "--show-raw-insn",
 		 opts->annotate_src ? "-S" : "",
 		 opts->prefix ? "--prefix " : "",
 		 opts->prefix ? '"' : ' ',
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
index 718177fa4775..5c1520010ca7 100644
--- a/tools/perf/util/disasm.h
+++ b/tools/perf/util/disasm.h
@@ -43,14 +43,18 @@  struct arch {
 
 struct ins {
 	const char     *name;
+	int	opcode;
 	struct ins_ops *ops;
 };
 
 struct ins_operands {
 	char	*raw;
+	int	raw_insn;
 	struct {
 		char	*raw;
 		char	*name;
+		int	opcode;
+		int	raw_insn;
 		struct symbol *sym;
 		u64	addr;
 		s64	offset;
@@ -62,6 +66,8 @@  struct ins_operands {
 		struct {
 			char	*raw;
 			char	*name;
+			int	opcode;
+			int	raw_insn;
 			u64	addr;
 			bool	multi_regs;
 		} source;
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 01fb25a1150a..2a4e1e16e52c 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -31,6 +31,15 @@  static inline int get_dwarf_regnum(const char *name __maybe_unused,
 }
 #endif
 
+/*
+ * get_opcode_insn - Return opcode from raw instruction
+ * get_source_reg - Return source reg from raw instruction
+ * get_target_reg - Return target reg from raw instruction
+ */
+int get_opcode_insn(unsigned int raw_insn);
+int get_source_reg(unsigned int raw_insn);
+int get_target_reg(unsigned int raw_insn);
+
 #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
 /*
  * Arch should support fetching the offset of a register in pt_regs