diff mbox series

Adding ability to change disassembler syntax in TCG plugins

Message ID 7d17f0cbb5ed4c90bbadd3992429006f@yadro.com
State New
Headers show
Series Adding ability to change disassembler syntax in TCG plugins | expand

Commit Message

Mikhail Tyutin Feb. 10, 2023, 3:24 p.m. UTC
This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG plugins to get disassembler string with non-default syntax if it wants to.

Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
---
 contrib/plugins/execlog.c    |  2 +-
 disas.c                      |  4 +++-
 disas/capstone.c             |  9 ++++++++-
 include/disas/dis-asm.h      |  2 ++
 include/disas/disas.h        |  4 +++-
 include/qemu/qemu-plugin.h   | 17 +++++++++++++++++
 plugins/api.c                | 10 ++++++++--
 plugins/qemu-plugins.symbols |  1 +
 8 files changed, 43 insertions(+), 6 deletions(-)

Comments

Mikhail Tyutin Feb. 16, 2023, 4:17 a.m. UTC | #1
ping

patchew link:
https://patchew.org/QEMU/7d17f0cbb5ed4c90bbadd3992429006f@yadro.com/

10.02.2023 18:24, Mikhail Tyutin wrote:
> This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG plugins to get disassembler string with non-default syntax if it wants to.
> 
> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
> ---
>   contrib/plugins/execlog.c    |  2 +-
>   disas.c                      |  4 +++-
>   disas/capstone.c             |  9 ++++++++-
>   include/disas/dis-asm.h      |  2 ++
>   include/disas/disas.h        |  4 +++-
>   include/qemu/qemu-plugin.h   | 17 +++++++++++++++++
>   plugins/api.c                | 10 ++++++++--
>   plugins/qemu-plugins.symbols |  1 +
>   8 files changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
> index e255bd21fd..6006490b1d 100644
> --- a/contrib/plugins/execlog.c
> +++ b/contrib/plugins/execlog.c
> @@ -121,7 +121,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>            * a limitation for CISC architectures.
>            */
>           insn = qemu_plugin_tb_get_insn(tb, i);
> -        insn_disas = qemu_plugin_insn_disas(insn);
> +        insn_disas = qemu_plugin_insn_disas_with_syntax(insn, QEMU_PLUGIN_DISAS_SYNTAX_INTEL);
>           insn_vaddr = qemu_plugin_insn_vaddr(insn);
>   
>           /*
> diff --git a/disas.c b/disas.c
> index b087c12c47..19b0f9d15f 100644
> --- a/disas.c
> +++ b/disas.c
> @@ -267,7 +267,8 @@ static void plugin_print_address(bfd_vma addr, struct disassemble_info *info)
>    * there is left over it usually indicates the front end has read more
>    * bytes than it needed.
>    */
> -char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
> +char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size,
> +                   enum qemu_plugin_disas_syntax syntax)
>   {
>       CPUDebug s;
>       GString *ds = g_string_new(NULL);
> @@ -278,6 +279,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
>       s.info.buffer_vma = addr;
>       s.info.buffer_length = size;
>       s.info.print_address_func = plugin_print_address;
> +    s.info.dis_syntax = syntax;
>   
>       if (s.info.cap_arch >= 0 && cap_disas_plugin(&s.info, addr, size)) {
>           ; /* done */
> diff --git a/disas/capstone.c b/disas/capstone.c
> index fe3efb0d3c..7476ee4044 100644
> --- a/disas/capstone.c
> +++ b/disas/capstone.c
> @@ -5,6 +5,7 @@
>   
>   #include "qemu/osdep.h"
>   #include "qemu/bswap.h"
> +#include "qemu/plugin.h"
>   #include "disas/dis-asm.h"
>   #include "disas/capstone.h"
>   
> @@ -87,7 +88,13 @@ static cs_err cap_disas_start(disassemble_info *info, csh *handle)
>            * is compiled without AT&T syntax); the user will just have
>            * to deal with the Intel syntax.
>            */
> -        cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
> +
> +        size_t cs_opt_syntax = CS_OPT_SYNTAX_ATT;
> +        if (info->dis_syntax == QEMU_PLUGIN_DISAS_SYNTAX_INTEL) {
> +            cs_opt_syntax = CS_OPT_SYNTAX_INTEL;
> +        }
> +
> +        cs_option(*handle, CS_OPT_SYNTAX, cs_opt_syntax);
>           break;
>       }
>   
> diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
> index 64247ecb11..0153165ca2 100644
> --- a/include/disas/dis-asm.h
> +++ b/include/disas/dis-asm.h
> @@ -409,6 +409,8 @@ typedef struct disassemble_info {
>     int cap_insn_unit;
>     int cap_insn_split;
>   
> +  int dis_syntax;
> +
>   } disassemble_info;
>   
>   /* Standard disassemblers.  Disassemble one instruction at the given
> diff --git a/include/disas/disas.h b/include/disas/disas.h
> index d363e95ede..f8e4f97ab1 100644
> --- a/include/disas/disas.h
> +++ b/include/disas/disas.h
> @@ -2,6 +2,7 @@
>   #define QEMU_DISAS_H
>   
>   #include "exec/hwaddr.h"
> +#include "qemu/plugin.h"
>   
>   #ifdef NEED_CPU_H
>   #include "cpu.h"
> @@ -14,7 +15,8 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
>   void monitor_disas(Monitor *mon, CPUState *cpu,
>                      target_ulong pc, int nb_insn, int is_physical);
>   
> -char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
> +char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size,
> +                   enum qemu_plugin_disas_syntax syntax);
>   
>   /* Look up symbol for debugging purpose.  Returns "" if unknown. */
>   const char *lookup_symbol(target_ulong orig_addr);
> diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
> index d0e9d03adf..a4bd543579 100644
> --- a/include/qemu/qemu-plugin.h
> +++ b/include/qemu/qemu-plugin.h
> @@ -233,6 +233,12 @@ enum qemu_plugin_mem_rw {
>       QEMU_PLUGIN_MEM_RW,
>   };
>   
> +enum qemu_plugin_disas_syntax {
> +    QEMU_PLUGIN_DISAS_SYNTAX_DEFAULT,
> +    QEMU_PLUGIN_DISAS_SYNTAX_ATT,
> +    QEMU_PLUGIN_DISAS_SYNTAX_INTEL,
> +};
> +
>   /**
>    * typedef qemu_plugin_vcpu_tb_trans_cb_t - translation callback
>    * @id: unique plugin id
> @@ -526,6 +532,17 @@ qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
>   
>   char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
>   
> +#define QEMU_PLUGIN_DISAS_SYNTAX_ENABLED
> +/**
> + * qemu_plugin_insn_disas_with_syntax() - return disassembly string for instruction
> + * @insn: instruction reference
> + * @syntax: syntax style
> + *
> + * Returns an allocated string containing the disassembly
> + */
> +char *qemu_plugin_insn_disas_with_syntax(const struct qemu_plugin_insn *insn,
> +                                         enum qemu_plugin_disas_syntax syntax);
> +
>   /**
>    * qemu_plugin_insn_symbol() - best effort symbol lookup
>    * @insn: instruction reference
> diff --git a/plugins/api.c b/plugins/api.c
> index 2078b16edb..579dcaa0e3 100644
> --- a/plugins/api.c
> +++ b/plugins/api.c
> @@ -232,10 +232,16 @@ void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn)
>       return insn->haddr;
>   }
>   
> -char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
> +char *qemu_plugin_insn_disas_with_syntax(const struct qemu_plugin_insn *insn,
> +                                         enum qemu_plugin_disas_syntax syntax)
>   {
>       CPUState *cpu = current_cpu;
> -    return plugin_disas(cpu, insn->vaddr, insn->data->len);
> +    return plugin_disas(cpu, insn->vaddr, insn->data->len, syntax);
> +}
> +
> +char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
> +{
> +    return qemu_plugin_insn_disas_with_syntax(insn, QEMU_PLUGIN_DISAS_SYNTAX_DEFAULT);
>   }
>   
>   const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn)
> diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
> index 71f6c90549..895526ff74 100644
> --- a/plugins/qemu-plugins.symbols
> +++ b/plugins/qemu-plugins.symbols
> @@ -8,6 +8,7 @@
>     qemu_plugin_hwaddr_phys_addr;
>     qemu_plugin_insn_data;
>     qemu_plugin_insn_disas;
> +  qemu_plugin_insn_disas_with_syntax;
>     qemu_plugin_insn_haddr;
>     qemu_plugin_insn_size;
>     qemu_plugin_insn_symbol;
Richard Henderson Feb. 16, 2023, 4:24 a.m. UTC | #2
On 2/15/23 18:17, Mikhail Tyutin wrote:
> ping
> 
> patchew link:
> https://patchew.org/QEMU/7d17f0cbb5ed4c90bbadd3992429006f@yadro.com/
> 
> 10.02.2023 18:24, Mikhail Tyutin wrote:
>> This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG 
>> plugins to get disassembler string with non-default syntax if it wants to.
>>
>> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>

Why?

It's certainly not very generic, exposing a disassembly quirk for exactly one guest 
architecture.  I mean, you could just as easily link your plugin directly to libcapstone 
via qemu_plugin_insn_data().


r~
Mikhail Tyutin Feb. 16, 2023, 5:04 a.m. UTC | #3
> On 2/15/23 18:17, Mikhail Tyutin wrote:
> > ping
> >
> > patchew link:
> > https://patchew.org/QEMU/7d17f0cbb5ed4c90bbadd3992429006f@yadro.com/
> >
> > 10.02.2023 18:24, Mikhail Tyutin wrote:
> >> This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG
> >> plugins to get disassembler string with non-default syntax if it wants to.
> >>
> >> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
> 
> Why?
> 
> It's certainly not very generic, exposing a disassembly quirk for exactly one guest
> architecture.  I mean, you could just as easily link your plugin directly to libcapstone
> via qemu_plugin_insn_data().
> 
> 
> r~
 
I agree it can be done outside of Qemu using another disassembler library. However,
there are few reasons to do it in Qemu from architecture standpoint:

1. To have a single place of instruction decoding logic. TCG has to decode guest instructions
anyway. If plugins add another decoder, it causes double work and prone to errors (however
current implementation does double decode work anyway). For example, TCG might support
new instruction which is not available in external decoder yet.

2. Under the hood Qemu uses different implementations of decoder (in addition to capstone)
which is not exposed in public interface. If there is a need to configure its output, proposed
API allows that as well.

3. If multiple plugins want to use another disassembler syntax, they have to share
implementation as utility function.
Richard Henderson Feb. 16, 2023, 5:47 a.m. UTC | #4
On 2/15/23 19:04, Mikhail Tyutin wrote:
>> On 2/15/23 18:17, Mikhail Tyutin wrote:
>>> ping
>>>
>>> patchew link:
>>> https://patchew.org/QEMU/7d17f0cbb5ed4c90bbadd3992429006f@yadro.com/
>>>
>>> 10.02.2023 18:24, Mikhail Tyutin wrote:
>>>> This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG
>>>> plugins to get disassembler string with non-default syntax if it wants to.
>>>>
>>>> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
>>
>> Why?
>>
>> It's certainly not very generic, exposing a disassembly quirk for exactly one guest
>> architecture.  I mean, you could just as easily link your plugin directly to libcapstone
>> via qemu_plugin_insn_data().
>>
>>
>> r~
>   
> I agree it can be done outside of Qemu using another disassembler library. However,
> there are few reasons to do it in Qemu from architecture standpoint:
> 
> 1. To have a single place of instruction decoding logic. TCG has to decode guest instructions
> anyway. If plugins add another decoder, it causes double work and prone to errors (however
> current implementation does double decode work anyway). For example, TCG might support
> new instruction which is not available in external decoder yet.
> 
> 2. Under the hood Qemu uses different implementations of decoder (in addition to capstone)
> which is not exposed in public interface. If there is a need to configure its output, proposed
> API allows that as well.
> 
> 3. If multiple plugins want to use another disassembler syntax, they have to share
> implementation as utility function.

What's all this got to do with preferring intel over at&t syntax?
I still think it's a generally useless switch.


r~
Mikhail Tyutin Feb. 16, 2023, 5:57 a.m. UTC | #5
> On 2/15/23 19:04, Mikhail Tyutin wrote:
> >> On 2/15/23 18:17, Mikhail Tyutin wrote:
> >>> ping
> >>>
> >>> patchew link:
> >>> https://patchew.org/QEMU/7d17f0cbb5ed4c90bbadd3992429006f@yadro.com/
> >>>
> >>> 10.02.2023 18:24, Mikhail Tyutin wrote:
> >>>> This patch adds new function qemu_plugin_insn_disas_with_syntax() that allows TCG
> >>>> plugins to get disassembler string with non-default syntax if it wants to.
> >>>>
> >>>> Signed-off-by: Mikhail Tyutin <m.tyutin@yadro.com>
> >>
> >> Why?
> >>
> >> It's certainly not very generic, exposing a disassembly quirk for exactly one guest
> >> architecture.  I mean, you could just as easily link your plugin directly to libcapstone
> >> via qemu_plugin_insn_data().
> >>
> >>
> >> r~
> >
> > I agree it can be done outside of Qemu using another disassembler library. However,
> > there are few reasons to do it in Qemu from architecture standpoint:
> >
> > 1. To have a single place of instruction decoding logic. TCG has to decode guest instructions
> > anyway. If plugins add another decoder, it causes double work and prone to errors (however
> > current implementation does double decode work anyway). For example, TCG might support
> > new instruction which is not available in external decoder yet.
> >
> > 2. Under the hood Qemu uses different implementations of decoder (in addition to capstone)
> > which is not exposed in public interface. If there is a need to configure its output, proposed
> > API allows that as well.
> >
> > 3. If multiple plugins want to use another disassembler syntax, they have to share
> > implementation as utility function.
> 
> What's all this got to do with preferring intel over at&t syntax?
> I still think it's a generally useless switch.
> 
> 
> r~

Linux-world prefers AT&T style, Windows-world prefers Intel style for x86_64 ISA. That causes
a lot of pain for developers and tools that have to compare and parse assembler texts. If you have
to work on different hosts, you would better use one style for both.
diff mbox series

Patch

diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index e255bd21fd..6006490b1d 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -121,7 +121,7 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
          * a limitation for CISC architectures.
          */
         insn = qemu_plugin_tb_get_insn(tb, i);
-        insn_disas = qemu_plugin_insn_disas(insn);
+        insn_disas = qemu_plugin_insn_disas_with_syntax(insn, QEMU_PLUGIN_DISAS_SYNTAX_INTEL);
         insn_vaddr = qemu_plugin_insn_vaddr(insn);
 
         /*
diff --git a/disas.c b/disas.c
index b087c12c47..19b0f9d15f 100644
--- a/disas.c
+++ b/disas.c
@@ -267,7 +267,8 @@  static void plugin_print_address(bfd_vma addr, struct disassemble_info *info)
  * there is left over it usually indicates the front end has read more
  * bytes than it needed.
  */
-char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
+char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size,
+                   enum qemu_plugin_disas_syntax syntax)
 {
     CPUDebug s;
     GString *ds = g_string_new(NULL);
@@ -278,6 +279,7 @@  char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
     s.info.buffer_vma = addr;
     s.info.buffer_length = size;
     s.info.print_address_func = plugin_print_address;
+    s.info.dis_syntax = syntax;
 
     if (s.info.cap_arch >= 0 && cap_disas_plugin(&s.info, addr, size)) {
         ; /* done */
diff --git a/disas/capstone.c b/disas/capstone.c
index fe3efb0d3c..7476ee4044 100644
--- a/disas/capstone.c
+++ b/disas/capstone.c
@@ -5,6 +5,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
+#include "qemu/plugin.h"
 #include "disas/dis-asm.h"
 #include "disas/capstone.h"
 
@@ -87,7 +88,13 @@  static cs_err cap_disas_start(disassemble_info *info, csh *handle)
          * is compiled without AT&T syntax); the user will just have
          * to deal with the Intel syntax.
          */
-        cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+
+        size_t cs_opt_syntax = CS_OPT_SYNTAX_ATT;
+        if (info->dis_syntax == QEMU_PLUGIN_DISAS_SYNTAX_INTEL) {
+            cs_opt_syntax = CS_OPT_SYNTAX_INTEL;
+        }
+
+        cs_option(*handle, CS_OPT_SYNTAX, cs_opt_syntax);
         break;
     }
 
diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 64247ecb11..0153165ca2 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h
@@ -409,6 +409,8 @@  typedef struct disassemble_info {
   int cap_insn_unit;
   int cap_insn_split;
 
+  int dis_syntax;
+
 } disassemble_info;
 
 /* Standard disassemblers.  Disassemble one instruction at the given
diff --git a/include/disas/disas.h b/include/disas/disas.h
index d363e95ede..f8e4f97ab1 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -2,6 +2,7 @@ 
 #define QEMU_DISAS_H
 
 #include "exec/hwaddr.h"
+#include "qemu/plugin.h"
 
 #ifdef NEED_CPU_H
 #include "cpu.h"
@@ -14,7 +15,8 @@  void target_disas(FILE *out, CPUState *cpu, target_ulong code,
 void monitor_disas(Monitor *mon, CPUState *cpu,
                    target_ulong pc, int nb_insn, int is_physical);
 
-char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
+char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size,
+                   enum qemu_plugin_disas_syntax syntax);
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(target_ulong orig_addr);
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index d0e9d03adf..a4bd543579 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -233,6 +233,12 @@  enum qemu_plugin_mem_rw {
     QEMU_PLUGIN_MEM_RW,
 };
 
+enum qemu_plugin_disas_syntax {
+    QEMU_PLUGIN_DISAS_SYNTAX_DEFAULT,
+    QEMU_PLUGIN_DISAS_SYNTAX_ATT,
+    QEMU_PLUGIN_DISAS_SYNTAX_INTEL,
+};
+
 /**
  * typedef qemu_plugin_vcpu_tb_trans_cb_t - translation callback
  * @id: unique plugin id
@@ -526,6 +532,17 @@  qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
 
 char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
 
+#define QEMU_PLUGIN_DISAS_SYNTAX_ENABLED
+/**
+ * qemu_plugin_insn_disas_with_syntax() - return disassembly string for instruction
+ * @insn: instruction reference
+ * @syntax: syntax style
+ *
+ * Returns an allocated string containing the disassembly
+ */
+char *qemu_plugin_insn_disas_with_syntax(const struct qemu_plugin_insn *insn,
+                                         enum qemu_plugin_disas_syntax syntax);
+
 /**
  * qemu_plugin_insn_symbol() - best effort symbol lookup
  * @insn: instruction reference
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..579dcaa0e3 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -232,10 +232,16 @@  void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn)
     return insn->haddr;
 }
 
-char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
+char *qemu_plugin_insn_disas_with_syntax(const struct qemu_plugin_insn *insn,
+                                         enum qemu_plugin_disas_syntax syntax)
 {
     CPUState *cpu = current_cpu;
-    return plugin_disas(cpu, insn->vaddr, insn->data->len);
+    return plugin_disas(cpu, insn->vaddr, insn->data->len, syntax);
+}
+
+char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
+{
+    return qemu_plugin_insn_disas_with_syntax(insn, QEMU_PLUGIN_DISAS_SYNTAX_DEFAULT);
 }
 
 const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn)
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index 71f6c90549..895526ff74 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -8,6 +8,7 @@ 
   qemu_plugin_hwaddr_phys_addr;
   qemu_plugin_insn_data;
   qemu_plugin_insn_disas;
+  qemu_plugin_insn_disas_with_syntax;
   qemu_plugin_insn_haddr;
   qemu_plugin_insn_size;
   qemu_plugin_insn_symbol;