diff mbox series

[v2,1/2] accel/tcg: adding integration with linux perf

Message ID 20190830121903.17585-2-vandersonmr2@gmail.com
State New
Headers show
Series Integrating qemu to Linux Perf | expand

Commit Message

vandersonmr Aug. 30, 2019, 12:19 p.m. UTC
This commit adds support to Linux Perf in order
to be able to analyze qemu jitted code and
also to able to see the TBs PC in it.

When using "-perf" qemu creates a jitdump file in
the current working directory. The file format
specification can be found in:
https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jitdump-specification.tx

Example of use:
 perf record -k 1 qemu-x86_64 -perf ./a.out
 perf inject -j -i perf.data -o perf.data.jitted
 perf report -i perf.data.jitted

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/Makefile.objs      |   1 +
 accel/tcg/perf/Makefile.objs |   1 +
 accel/tcg/perf/jitdump.c     | 192 +++++++++++++++++++++++++++++++++++
 accel/tcg/perf/jitdump.h     |  36 +++++++
 accel/tcg/translate-all.c    |  14 +++
 include/qemu-common.h        |   3 +
 linux-user/main.c            |   7 ++
 os-posix.c                   |   3 +
 qemu-options.hx              |  11 ++
 9 files changed, 268 insertions(+)
 create mode 100644 accel/tcg/perf/Makefile.objs
 create mode 100644 accel/tcg/perf/jitdump.c
 create mode 100644 accel/tcg/perf/jitdump.h

Comments

Stefan Hajnoczi Sept. 2, 2019, 10:07 a.m. UTC | #1
On Fri, Aug 30, 2019 at 09:19:02AM -0300, vandersonmr wrote:
> This commit adds support to Linux Perf in order
> to be able to analyze qemu jitted code and
> also to able to see the TBs PC in it.
> 
> When using "-perf" qemu creates a jitdump file in
> the current working directory. The file format
> specification can be found in:
> https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jitdump-specification.tx

Oops, the link is broken: .txt

> +struct jr_code_close {
> +    struct jr_prefix p;
> +};

Unused?

> +
> +struct jr_code_move {

Unused?

> +static uint32_t get_e_machine(void)
> +{
> +    uint32_t e_machine = EM_NONE;
> +    Elf64_Ehdr elf_header;
> +    FILE *exe = fopen("/proc/self/exe", "r");
> +
> +    if (exe == NULL) {
> +        return e_machine;
> +    }
> +
> +    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, exe) != 1) {

What if this is a 32-bit binary because QEMU was built for a 32-bit
host?

> +        goto end;
> +    }
> +
> +    e_machine = elf_header.e_machine;
> +
> +end:
> +    fclose(exe);
> +    return e_machine;
> +}
> +
> +void start_jitdump_file(void)
> +{
> +    gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());

You can now use g_autofree:

  g_autofree gchar *dumpfile_name = g_strdup_printf(...);

and then the explicit g_free() isn't necessary anymore (and the memory
leak in the mmap error case is also solved).

> +void append_load_in_jitdump_file(TranslationBlock *tb)
> +{
> +    gchar *func_name = g_strdup_printf("TB virt:0x"TARGET_FMT_lx, tb->pc);
> +
> +    struct jr_code_load load_event;
> +    load_event.p.id = JIT_CODE_LOAD;
> +    load_event.p.total_size =
> +        sizeof(struct jr_code_load) + strlen(func_name) + 1 + tb->tc.size;
> +    load_event.p.timestamp = get_timestamp();
> +    load_event.pid = getpid();
> +    load_event.tid = syscall(SYS_gettid);
> +    load_event.vma = tb->pc;
> +    load_event.code_addr = (uint64_t) tb->tc.ptr;
> +    load_event.code_size = tb->tc.size;
> +    load_event.code_index = tb->pc;
> +
> +    fwrite(&load_event, sizeof(struct jr_code_load), 1, dumpfile);
> +    fwrite(func_name, strlen(func_name) + 1, 1, dumpfile);
> +    fwrite(tb->tc.ptr, tb->tc.size, 1, dumpfile);
> +
> +    g_free(func_name);
> +    fflush(dumpfile);
> +}

I didn't see a reply to my question on the previous patch series:

  "append_load_in_jitdump_file() calls fwrite() multiple times.  What
  guarantees they will not be interleaved when multiple threads call
  this function?"

Does TCG ever throw away TBs and is it necessary to record this in the
file so perf knows about it?
Alex Bennée Oct. 2, 2019, 6:55 p.m. UTC | #2
Stefan Hajnoczi <stefanha@gmail.com> writes:

> On Fri, Aug 30, 2019 at 09:19:02AM -0300, vandersonmr wrote:
>> This commit adds support to Linux Perf in order
>> to be able to analyze qemu jitted code and
>> also to able to see the TBs PC in it.
>>
>> When using "-perf" qemu creates a jitdump file in
>> the current working directory. The file format
>> specification can be found in:
>> https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jitdump-specification.tx
>
> Oops, the link is broken: .txt
>
>> +struct jr_code_close {
>> +    struct jr_prefix p;
>> +};
>
> Unused?
>
>> +
>> +struct jr_code_move {
>
> Unused?
>
>> +static uint32_t get_e_machine(void)
>> +{
>> +    uint32_t e_machine = EM_NONE;
>> +    Elf64_Ehdr elf_header;
>> +    FILE *exe = fopen("/proc/self/exe", "r");
>> +
>> +    if (exe == NULL) {
>> +        return e_machine;
>> +    }
>> +
>> +    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, exe) != 1) {
>
> What if this is a 32-bit binary because QEMU was built for a 32-bit
> host?
>
>> +        goto end;
>> +    }
>> +
>> +    e_machine = elf_header.e_machine;
>> +
>> +end:
>> +    fclose(exe);
>> +    return e_machine;
>> +}
>> +
>> +void start_jitdump_file(void)
>> +{
>> +    gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
>
> You can now use g_autofree:
>
>   g_autofree gchar *dumpfile_name = g_strdup_printf(...);
>
> and then the explicit g_free() isn't necessary anymore (and the memory
> leak in the mmap error case is also solved).
>
>> +void append_load_in_jitdump_file(TranslationBlock *tb)
>> +{
>> +    gchar *func_name = g_strdup_printf("TB virt:0x"TARGET_FMT_lx, tb->pc);
>> +
>> +    struct jr_code_load load_event;
>> +    load_event.p.id = JIT_CODE_LOAD;
>> +    load_event.p.total_size =
>> +        sizeof(struct jr_code_load) + strlen(func_name) + 1 + tb->tc.size;
>> +    load_event.p.timestamp = get_timestamp();
>> +    load_event.pid = getpid();
>> +    load_event.tid = syscall(SYS_gettid);
>> +    load_event.vma = tb->pc;
>> +    load_event.code_addr = (uint64_t) tb->tc.ptr;
>> +    load_event.code_size = tb->tc.size;
>> +    load_event.code_index = tb->pc;
>> +
>> +    fwrite(&load_event, sizeof(struct jr_code_load), 1, dumpfile);
>> +    fwrite(func_name, strlen(func_name) + 1, 1, dumpfile);
>> +    fwrite(tb->tc.ptr, tb->tc.size, 1, dumpfile);
>> +
>> +    g_free(func_name);
>> +    fflush(dumpfile);
>> +}
>
> I didn't see a reply to my question on the previous patch series:
>
>   "append_load_in_jitdump_file() calls fwrite() multiple times.  What
>   guarantees they will not be interleaved when multiple threads call
>   this function?"

Ahh that was exactly the problem I was debugging. Fixed with a lock now.

> Does TCG ever throw away TBs and is it necessary to record this in the
> file so perf knows about it?

Yes - I suspect that's what the currently unused headers are for.

--
Alex Bennée
diff mbox series

Patch

diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index d381a02f34..f393a7438f 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -3,6 +3,7 @@  obj-$(CONFIG_SOFTMMU) += cputlb.o
 obj-y += tcg-runtime.o tcg-runtime-gvec.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
 obj-y += translator.o
+obj-y += perf/
 
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
new file mode 100644
index 0000000000..ca9abb4f48
--- /dev/null
+++ b/accel/tcg/perf/Makefile.objs
@@ -0,0 +1 @@ 
+obj-$(CONFIG_LINUX) += jitdump.o
diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
new file mode 100644
index 0000000000..6fb464039d
--- /dev/null
+++ b/accel/tcg/perf/jitdump.c
@@ -0,0 +1,192 @@ 
+/*
+ * This code implements an interface to create and fill jitdump files. These files
+ * store information used by Linux Perf to enhance the presentation of jitted
+ * code and to allow the disassembly of jitted code.
+ *
+ * The jitdump file specification can be found in the Linux Kernel Source tree:
+ *    linux/tools/perf/Documentation/jitdump-specification.txt
+ *
+ * https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jitdump-specification.txt
+ */
+
+#include "qemu/osdep.h"
+
+#include <sys/syscall.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <elf.h>
+
+#include "disas/disas.h"
+#include "jitdump.h"
+#include "qemu-common.h"
+
+struct jitheader {
+    uint32_t magic;     /* characters "jItD" */
+    uint32_t version;   /* header version */
+    uint32_t total_size;/* total size of header */
+    uint32_t elf_mach;  /* elf mach target */
+    uint32_t pad1;      /* reserved */
+    uint32_t pid;       /* JIT process id */
+    uint64_t timestamp; /* timestamp */
+    uint64_t flags;     /* flags */
+};
+
+enum jit_record_type {
+    JIT_CODE_LOAD       = 0,
+    JIT_CODE_MOVE       = 1,
+    JIT_CODE_DEBUG_INFO = 2,
+    JIT_CODE_CLOSE      = 3,
+
+    JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+    uint32_t id;
+    uint32_t total_size;
+    uint64_t timestamp;
+};
+
+struct jr_code_load {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+struct jr_code_close {
+    struct jr_prefix p;
+};
+
+struct jr_code_move {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t old_code_addr;
+    uint64_t new_code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+FILE *dumpfile;
+void *perf_marker;
+
+static uint64_t get_timestamp(void)
+{
+    struct timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+        fprintf(stderr, "No support for CLOCK_MONOTONIC! -perf cannot be used!\n");
+        exit(1);
+    }
+    return (uint64_t) ts.tv_sec * 1000000000 + ts.tv_nsec;
+}
+
+static uint32_t get_e_machine(void)
+{
+    uint32_t e_machine = EM_NONE;
+    Elf64_Ehdr elf_header;
+    FILE *exe = fopen("/proc/self/exe", "r");
+
+    if (exe == NULL) {
+        return e_machine;
+    }
+
+    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, exe) != 1) {
+        goto end;
+    }
+
+    e_machine = elf_header.e_machine;
+
+end:
+    fclose(exe);
+    return e_machine;
+}
+
+void start_jitdump_file(void)
+{
+    gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
+    dumpfile = fopen(dumpfile_name, "w+");
+
+    /* 'Perf record' saves mmaped files during the execution of a program and
+     * 'perf inject' iterate over them to reconstruct all used/executed binary.
+     * So, we create a mmap with the path of our jitdump that is processed
+     * and used by 'perf inject' to reconstruct jitted binaries.
+     */
+    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
+                          PROT_READ | PROT_EXEC,
+                          MAP_PRIVATE,
+                          fileno(dumpfile), 0);
+
+    if (perf_marker == MAP_FAILED) {
+        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
+        fclose(dumpfile);
+        return;
+    }
+
+    g_free(dumpfile_name);
+
+    struct jitheader header;
+    header.magic = 0x4A695444;
+    header.version = 1;
+    header.elf_mach = get_e_machine();
+    header.total_size = sizeof(struct jitheader);
+    header.pid = getpid();
+    header.timestamp = get_timestamp();
+    header.flags = 0;
+
+    fwrite(&header, header.total_size, 1, dumpfile);
+
+    fflush(dumpfile);
+}
+
+void append_load_in_jitdump_file(TranslationBlock *tb)
+{
+    gchar *func_name = g_strdup_printf("TB virt:0x"TARGET_FMT_lx, tb->pc);
+
+    struct jr_code_load load_event;
+    load_event.p.id = JIT_CODE_LOAD;
+    load_event.p.total_size =
+        sizeof(struct jr_code_load) + strlen(func_name) + 1 + tb->tc.size;
+    load_event.p.timestamp = get_timestamp();
+    load_event.pid = getpid();
+    load_event.tid = syscall(SYS_gettid);
+    load_event.vma = tb->pc;
+    load_event.code_addr = (uint64_t) tb->tc.ptr;
+    load_event.code_size = tb->tc.size;
+    load_event.code_index = tb->pc;
+
+    fwrite(&load_event, sizeof(struct jr_code_load), 1, dumpfile);
+    fwrite(func_name, strlen(func_name) + 1, 1, dumpfile);
+    fwrite(tb->tc.ptr, tb->tc.size, 1, dumpfile);
+
+    g_free(func_name);
+    fflush(dumpfile);
+}
+
+void close_jitdump_file(void)
+{
+    fclose(dumpfile);
+    if (perf_marker != MAP_FAILED) {
+        munmap(perf_marker, sysconf(_SC_PAGESIZE));
+    }
+}
+
+bool is_jitdump_enabled;
+
+void enable_jitdump(void)
+{
+    is_jitdump_enabled = true;
+}
+
+bool jitdump_enabled(void)
+{
+    return is_jitdump_enabled;
+}
diff --git a/accel/tcg/perf/jitdump.h b/accel/tcg/perf/jitdump.h
new file mode 100644
index 0000000000..5d6df3ec91
--- /dev/null
+++ b/accel/tcg/perf/jitdump.h
@@ -0,0 +1,36 @@ 
+/*
+ * QEMU Linux Perf Support
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario (vandersonmr2@gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include "exec/exec-all.h"
+
+void start_jitdump_file(void);
+
+void append_load_in_jitdump_file(TranslationBlock *tb);
+void append_move_in_jitdump_file(TranslationBlock *tb);
+
+void close_jitdump_file(void);
+
+#endif
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..9b30d96af5 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -58,6 +58,10 @@ 
 #include "sysemu/cpus.h"
 #include "sysemu/tcg.h"
 
+#ifdef __linux__
+#include "perf/jitdump.h"
+#endif
+
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
 /* make various TB consistency checks */
@@ -1148,6 +1152,11 @@  void tcg_exec_init(unsigned long tb_size)
     cpu_gen_init();
     page_init();
     tb_htable_init();
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        start_jitdump_file();
+    }
+#endif
     code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
@@ -1877,6 +1886,11 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
         return existing_tb;
     }
     tcg_tb_insert(tb);
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        append_load_in_jitdump_file(tb);
+    }
+#endif
     return tb;
 }
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 0235cd3b91..7ab796e4a3 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -130,4 +130,7 @@  void page_size_init(void);
  * returned. */
 bool dump_in_progress(void);
 
+void enable_jitdump(void);
+bool jitdump_enabled(void);
+
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 47917bbb20..f6618dd060 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -379,6 +379,11 @@  static void handle_arg_strace(const char *arg)
     do_strace = 1;
 }
 
+static void handle_arg_perf(const char *arg)
+{
+    enable_jitdump();
+}
+
 static void handle_arg_version(const char *arg)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION
@@ -444,6 +449,8 @@  static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+    {"perf",      "QEMU_PERF",         false, handle_arg_perf,
+     "",           "dump jitdump files to help linux perf JIT code visualization"},
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
     {NULL, NULL, false, NULL, NULL, NULL}
diff --git a/os-posix.c b/os-posix.c
index 86cffd2c7d..2ddb1e6393 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -191,6 +191,9 @@  int os_parse_cmd_args(int index, const char *optarg)
     case QEMU_OPTION_enablefips:
         fips_set_state(true);
         break;
+    case QEMU_OPTION_perf:
+        enable_jitdump();
+        break;
 #endif
     default:
         return -1;
diff --git a/qemu-options.hx b/qemu-options.hx
index ea0638e92d..24ebfb910c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4156,6 +4156,17 @@  STEXI
 Enable FIPS 140-2 compliance mode.
 ETEXI
 
+#ifdef __linux__
+DEF("perf", 0, QEMU_OPTION_perf,
+    "-perf  dump jitdump files to help linux perf JIT code visualization\n",
+    QEMU_ARCH_ALL)
+#endif
+STEXI
+@item -perf
+@findex -perf
+Dumps jitdump files to help linux perf JIT code visualization
+ETEXI
+
 HXCOMM Deprecated by -machine accel=tcg property
 DEF("no-kvm", 0, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)