diff mbox series

[v1,1/2] accel/tcg: adding integration with linux perf

Message ID 20190815023725.2659-2-vandersonmr2@gmail.com
State New
Headers show
Series Integrating qemu to Linux Perf | expand

Commit Message

vandersonmr Aug. 15, 2019, 2:37 a.m. UTC
This commit adds support to Linux Perf in order
to be able to analyze qemu jitted code and
also to able to see the TBs PC in it.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/Makefile.objs      |   1 +
 accel/tcg/perf/Makefile.objs |   1 +
 accel/tcg/perf/jitdump.c     | 180 +++++++++++++++++++++++++++++++++++
 accel/tcg/perf/jitdump.h     |  19 ++++
 accel/tcg/translate-all.c    |  12 +++
 include/qemu-common.h        |   3 +
 linux-user/main.c            |   7 ++
 qemu-options.hx              |  12 +++
 8 files changed, 235 insertions(+)
 create mode 100644 accel/tcg/perf/Makefile.objs
 create mode 100644 accel/tcg/perf/jitdump.c
 create mode 100644 accel/tcg/perf/jitdump.h

Comments

Stefan Hajnoczi Aug. 15, 2019, 2:40 p.m. UTC | #1
On Wed, Aug 14, 2019 at 11:37:24PM -0300, vandersonmr wrote:
> This commit adds support to Linux Perf in order
> to be able to analyze qemu jitted code and
> also to able to see the TBs PC in it.

Is there any reference to the file format?  Please include it in a code
comment, if such a thing exists.

> diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
> new file mode 100644
> index 0000000000..6f4c0911c2
> --- /dev/null
> +++ b/accel/tcg/perf/jitdump.c
> @@ -0,0 +1,180 @@

License header?

> +#ifdef __linux__

If the entire source file is #ifdef __linux__ then Makefile.objs should
probably contain obj-$(CONFIG_LINUX) += jitdump.o instead.  Letting the
build system decide what to compile is cleaner than ifdeffing large
amounts of code.

> +
> +#include <stdint.h>
> +
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <time.h>
> +#include <sys/syscall.h>
> +#include <elf.h>
> +
> +#include "jitdump.h"
> +#include "qemu-common.h"

Please follow QEMU's header ordering conventions.  See ./HACKING "1.2.
Include directives".

> +void start_jitdump_file(void)
> +{
> +    GString *dumpfile_name = g_string_new(NULL);;
> +    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());

Simpler:

  gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
  ...
  g_free(dumpfile_name);

> +    dumpfile = fopen(dumpfile_name->str, "w+");

getpid() and the global dumpfile variable make me wonder what happens
with multi-threaded TCG?

> +
> +    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),

Please mention the point of this mmap in a comment.  My best guess is
that perf stores the /proc/$PID/maps and this is how it finds the
jitdump file?

> +                          PROT_READ | PROT_EXEC,
> +                          MAP_PRIVATE,
> +                          fileno(dumpfile), 0);
> +
> +    if (perf_marker == MAP_FAILED) {
> +        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
> +        fclose(dumpfile);
> +        return;
> +    }
> +
> +    g_string_free(dumpfile_name, TRUE);
> +
> +    struct jitheader *header = g_new0(struct jitheader, 1);

Why g_new this struct?  It's small and can be declared on the stack.

Please use g_free() with g_malloc/new/etc().  It's not safe to mismatch
glib and libc memory allocation functions.

> +    header->magic = 0x4A695444;
> +    header->version = 1;
> +    header->elf_mach = get_e_machine();
> +    header->total_size = sizeof(struct jitheader);
> +    header->pid = getpid();
> +    header->timestamp = get_timestamp();
> +
> +    fwrite(header, header->total_size, 1, dumpfile);
> +
> +    free(header);
> +    fflush(dumpfile);
> +}
> +
> +void append_load_in_jitdump_file(TranslationBlock *tb)
> +{
> +    GString *func_name = g_string_new(NULL);
> +    g_string_printf(func_name, "TB virt:0x"TARGET_FMT_lx"%c", tb->pc, '\0');

The explicit NUL character looks strange to me.  I think the idea is to
avoid func_name->len + 1?  Adding NUL characters to C strings can be a
source of bugs, I would stick to convention and do len + 1 instead of
putting NUL characters into the GString.  This is a question of style
though.

> +
> +    struct jr_code_load *load_event = g_new0(struct jr_code_load, 1);

No need to allocate load_event on the heap.

> diff --git a/qemu-options.hx b/qemu-options.hx
> index 9621e934c0..1c26eeeb9c 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4147,6 +4147,18 @@ STEXI
>  Enable FIPS 140-2 compliance mode.
>  ETEXI
>  
> +#ifdef __linux__
> +DEF("perf", 0, QEMU_OPTION_perf,
> +    "-perf    dump jitdump files to help linux perf JIT code visualization\n",
> +    QEMU_ARCH_ALL)
> +#endif
> +STEXI
> +@item -perf
> +@findex -perf
> +Dumps jitdump files to help linux perf JIT code visualization

Suggestions on expanding the documentation:

Where are the jitdump files dumped?  The current working directory?

Anything to say about the naming scheme for these files?

Can you include an example of how to load them into perf(1)?
Alex Bennée Aug. 15, 2019, 4:17 p.m. UTC | #2
Stefan Hajnoczi <stefanha@gmail.com> writes:

> On Wed, Aug 14, 2019 at 11:37:24PM -0300, vandersonmr wrote:
>> This commit adds support to Linux Perf in order
>> to be able to analyze qemu jitted code and
>> also to able to see the TBs PC in it.
>
> Is there any reference to the file format?  Please include it in a code
> comment, if such a thing exists.
>
>> diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
>> new file mode 100644
>> index 0000000000..6f4c0911c2
>> --- /dev/null
>> +++ b/accel/tcg/perf/jitdump.c
>> @@ -0,0 +1,180 @@
>
> License header?
>
>> +#ifdef __linux__
>
> If the entire source file is #ifdef __linux__ then Makefile.objs should
> probably contain obj-$(CONFIG_LINUX) += jitdump.o instead.  Letting the
> build system decide what to compile is cleaner than ifdeffing large
> amounts of code.
>
>> +
>> +#include <stdint.h>
>> +
>> +#include <stdio.h>
>> +#include <unistd.h>
>> +#include <time.h>
>> +#include <sys/syscall.h>
>> +#include <elf.h>
>> +
>> +#include "jitdump.h"
>> +#include "qemu-common.h"
>
> Please follow QEMU's header ordering conventions.  See ./HACKING "1.2.
> Include directives".
>
>> +void start_jitdump_file(void)
>> +{
>> +    GString *dumpfile_name = g_string_new(NULL);;
>> +    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());
>
> Simpler:
>
>   gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
>   ...
>   g_free(dumpfile_name);
>
>> +    dumpfile = fopen(dumpfile_name->str, "w+");
>
> getpid() and the global dumpfile variable make me wonder what happens
> with multi-threaded TCG?
>
>> +
>> +    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
>
> Please mention the point of this mmap in a comment.  My best guess is
> that perf stores the /proc/$PID/maps and this is how it finds the
> jitdump file?
>
>> +                          PROT_READ | PROT_EXEC,
>> +                          MAP_PRIVATE,
>> +                          fileno(dumpfile), 0);
>> +
>> +    if (perf_marker == MAP_FAILED) {
>> +        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
>> +        fclose(dumpfile);
>> +        return;
>> +    }
>> +
>> +    g_string_free(dumpfile_name, TRUE);
>> +
>> +    struct jitheader *header = g_new0(struct jitheader, 1);
>
> Why g_new this struct?  It's small and can be declared on the stack.
>
> Please use g_free() with g_malloc/new/etc().  It's not safe to mismatch
> glib and libc memory allocation functions.
>
>> +    header->magic = 0x4A695444;
>> +    header->version = 1;
>> +    header->elf_mach = get_e_machine();
>> +    header->total_size = sizeof(struct jitheader);
>> +    header->pid = getpid();
>> +    header->timestamp = get_timestamp();
>> +
>> +    fwrite(header, header->total_size, 1, dumpfile);
>> +
>> +    free(header);
>> +    fflush(dumpfile);
>> +}
>> +
>> +void append_load_in_jitdump_file(TranslationBlock *tb)
>> +{
>> +    GString *func_name = g_string_new(NULL);
>> +    g_string_printf(func_name, "TB virt:0x"TARGET_FMT_lx"%c", tb->pc, '\0');
>
> The explicit NUL character looks strange to me.  I think the idea is to
> avoid func_name->len + 1?  Adding NUL characters to C strings can be a
> source of bugs, I would stick to convention and do len + 1 instead of
> putting NUL characters into the GString.  This is a question of style
> though.

The glib functions always add null characters so you shouldn't need to
manually.

>
>> +
>> +    struct jr_code_load *load_event = g_new0(struct jr_code_load, 1);
>
> No need to allocate load_event on the heap.
>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 9621e934c0..1c26eeeb9c 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -4147,6 +4147,18 @@ STEXI
>>  Enable FIPS 140-2 compliance mode.
>>  ETEXI
>>
>> +#ifdef __linux__
>> +DEF("perf", 0, QEMU_OPTION_perf,
>> +    "-perf    dump jitdump files to help linux perf JIT code visualization\n",
>> +    QEMU_ARCH_ALL)
>> +#endif
>> +STEXI
>> +@item -perf
>> +@findex -perf
>> +Dumps jitdump files to help linux perf JIT code visualization
>
> Suggestions on expanding the documentation:
>
> Where are the jitdump files dumped?  The current working directory?
>
> Anything to say about the naming scheme for these files?
>
> Can you include an example of how to load them into perf(1)?


--
Alex Bennée
vandersonmr Aug. 21, 2019, 7:01 p.m. UTC | #3
On Thu, Aug 15, 2019 at 11:40 AM Stefan Hajnoczi <stefanha@gmail.com> wrote:

> On Wed, Aug 14, 2019 at 11:37:24PM -0300, vandersonmr wrote:
> > This commit adds support to Linux Perf in order
> > to be able to analyze qemu jitted code and
> > also to able to see the TBs PC in it.
>
> Is there any reference to the file format?  Please include it in a code
> comment, if such a thing exists.
>
> > diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
> > new file mode 100644
> > index 0000000000..6f4c0911c2
> > --- /dev/null
> > +++ b/accel/tcg/perf/jitdump.c
> > @@ -0,0 +1,180 @@
>
> License header?
>
> > +#ifdef __linux__
>
> If the entire source file is #ifdef __linux__ then Makefile.objs should
> probably contain obj-$(CONFIG_LINUX) += jitdump.o instead.  Letting the
> build system decide what to compile is cleaner than ifdeffing large
> amounts of code.
>
> > +
> > +#include <stdint.h>
> > +
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +#include <time.h>
> > +#include <sys/syscall.h>
> > +#include <elf.h>
> > +
> > +#include "jitdump.h"
> > +#include "qemu-common.h"
>
> Please follow QEMU's header ordering conventions.  See ./HACKING "1.2.
> Include directives".
>
> > +void start_jitdump_file(void)
> > +{
> > +    GString *dumpfile_name = g_string_new(NULL);;
> > +    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());
>
> Simpler:
>
>   gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
>   ...
>   g_free(dumpfile_name);
>
> > +    dumpfile = fopen(dumpfile_name->str, "w+");
>
> getpid() and the global dumpfile variable make me wonder what happens
> with multi-threaded TCG?
>

I did some tests and it appears to be working fine with multi-threaded TCG.
tcg_exec_init should execute only once even in multi-threaded TCG, right?
If so, we are going to create only one jitdump file. Also, both in Windows
and Linux/POSIX fwrites is thread-safe, thus we would be safely updating
the jitdump file. Does it make sense?


>
> > +
> > +    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
>
> Please mention the point of this mmap in a comment.  My best guess is
> that perf stores the /proc/$PID/maps and this is how it finds the
> jitdump file?
>
> > +                          PROT_READ | PROT_EXEC,
> > +                          MAP_PRIVATE,
> > +                          fileno(dumpfile), 0);
> > +
> > +    if (perf_marker == MAP_FAILED) {
> > +        printf("Failed to create mmap marker file for perf %d\n",
> fileno(dumpfile));
> > +        fclose(dumpfile);
> > +        return;
> > +    }
> > +
> > +    g_string_free(dumpfile_name, TRUE);
> > +
> > +    struct jitheader *header = g_new0(struct jitheader, 1);
>
> Why g_new this struct?  It's small and can be declared on the stack.
>
> Please use g_free() with g_malloc/new/etc().  It's not safe to mismatch
> glib and libc memory allocation functions.
>
> > +    header->magic = 0x4A695444;
> > +    header->version = 1;
> > +    header->elf_mach = get_e_machine();
> > +    header->total_size = sizeof(struct jitheader);
> > +    header->pid = getpid();
> > +    header->timestamp = get_timestamp();
> > +
> > +    fwrite(header, header->total_size, 1, dumpfile);
> > +
> > +    free(header);
> > +    fflush(dumpfile);
> > +}
> > +
> > +void append_load_in_jitdump_file(TranslationBlock *tb)
> > +{
> > +    GString *func_name = g_string_new(NULL);
> > +    g_string_printf(func_name, "TB virt:0x"TARGET_FMT_lx"%c", tb->pc,
> '\0');
>
> The explicit NUL character looks strange to me.  I think the idea is to
> avoid func_name->len + 1?  Adding NUL characters to C strings can be a
> source of bugs, I would stick to convention and do len + 1 instead of
> putting NUL characters into the GString.  This is a question of style
> though.
>
> > +
> > +    struct jr_code_load *load_event = g_new0(struct jr_code_load, 1);
>
> No need to allocate load_event on the heap.
>
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 9621e934c0..1c26eeeb9c 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -4147,6 +4147,18 @@ STEXI
> >  Enable FIPS 140-2 compliance mode.
> >  ETEXI
> >
> > +#ifdef __linux__
> > +DEF("perf", 0, QEMU_OPTION_perf,
> > +    "-perf    dump jitdump files to help linux perf JIT code
> visualization\n",
> > +    QEMU_ARCH_ALL)
> > +#endif
> > +STEXI
> > +@item -perf
> > +@findex -perf
> > +Dumps jitdump files to help linux perf JIT code visualization
>
> Suggestions on expanding the documentation:
>
> Where are the jitdump files dumped?  The current working directory?
>
> Anything to say about the naming scheme for these files?
>
> Can you include an example of how to load them into perf(1)?
>
Stefan Hajnoczi Aug. 22, 2019, 2:41 p.m. UTC | #4
On Thu, Aug 15, 2019 at 05:17:49PM +0100, Alex Bennée wrote:
> 
> Stefan Hajnoczi <stefanha@gmail.com> writes:
> 
> > On Wed, Aug 14, 2019 at 11:37:24PM -0300, vandersonmr wrote:
> >> This commit adds support to Linux Perf in order
> >> to be able to analyze qemu jitted code and
> >> also to able to see the TBs PC in it.
> >
> > Is there any reference to the file format?  Please include it in a code
> > comment, if such a thing exists.
> >
> >> diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
> >> new file mode 100644
> >> index 0000000000..6f4c0911c2
> >> --- /dev/null
> >> +++ b/accel/tcg/perf/jitdump.c
> >> @@ -0,0 +1,180 @@
> >
> > License header?
> >
> >> +#ifdef __linux__
> >
> > If the entire source file is #ifdef __linux__ then Makefile.objs should
> > probably contain obj-$(CONFIG_LINUX) += jitdump.o instead.  Letting the
> > build system decide what to compile is cleaner than ifdeffing large
> > amounts of code.
> >
> >> +
> >> +#include <stdint.h>
> >> +
> >> +#include <stdio.h>
> >> +#include <unistd.h>
> >> +#include <time.h>
> >> +#include <sys/syscall.h>
> >> +#include <elf.h>
> >> +
> >> +#include "jitdump.h"
> >> +#include "qemu-common.h"
> >
> > Please follow QEMU's header ordering conventions.  See ./HACKING "1.2.
> > Include directives".
> >
> >> +void start_jitdump_file(void)
> >> +{
> >> +    GString *dumpfile_name = g_string_new(NULL);;
> >> +    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());
> >
> > Simpler:
> >
> >   gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
> >   ...
> >   g_free(dumpfile_name);
> >
> >> +    dumpfile = fopen(dumpfile_name->str, "w+");
> >
> > getpid() and the global dumpfile variable make me wonder what happens
> > with multi-threaded TCG?
> >
> >> +
> >> +    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
> >
> > Please mention the point of this mmap in a comment.  My best guess is
> > that perf stores the /proc/$PID/maps and this is how it finds the
> > jitdump file?
> >
> >> +                          PROT_READ | PROT_EXEC,
> >> +                          MAP_PRIVATE,
> >> +                          fileno(dumpfile), 0);
> >> +
> >> +    if (perf_marker == MAP_FAILED) {
> >> +        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
> >> +        fclose(dumpfile);
> >> +        return;
> >> +    }
> >> +
> >> +    g_string_free(dumpfile_name, TRUE);
> >> +
> >> +    struct jitheader *header = g_new0(struct jitheader, 1);
> >
> > Why g_new this struct?  It's small and can be declared on the stack.
> >
> > Please use g_free() with g_malloc/new/etc().  It's not safe to mismatch
> > glib and libc memory allocation functions.
> >
> >> +    header->magic = 0x4A695444;
> >> +    header->version = 1;
> >> +    header->elf_mach = get_e_machine();
> >> +    header->total_size = sizeof(struct jitheader);
> >> +    header->pid = getpid();
> >> +    header->timestamp = get_timestamp();
> >> +
> >> +    fwrite(header, header->total_size, 1, dumpfile);
> >> +
> >> +    free(header);
> >> +    fflush(dumpfile);
> >> +}
> >> +
> >> +void append_load_in_jitdump_file(TranslationBlock *tb)
> >> +{
> >> +    GString *func_name = g_string_new(NULL);
> >> +    g_string_printf(func_name, "TB virt:0x"TARGET_FMT_lx"%c", tb->pc, '\0');
> >
> > The explicit NUL character looks strange to me.  I think the idea is to
> > avoid func_name->len + 1?  Adding NUL characters to C strings can be a
> > source of bugs, I would stick to convention and do len + 1 instead of
> > putting NUL characters into the GString.  This is a question of style
> > though.
> 
> The glib functions always add null characters so you shouldn't need to
> manually.

Yep, just remember to do func_name->len + 1 since glib doesn't count the
NUL character that gets added automatically.
Stefan Hajnoczi Aug. 22, 2019, 2:44 p.m. UTC | #5
On Wed, Aug 21, 2019 at 04:01:48PM -0300, Vanderson Martins do Rosario wrote:
> On Thu, Aug 15, 2019 at 11:40 AM Stefan Hajnoczi <stefanha@gmail.com> wrote:
> > On Wed, Aug 14, 2019 at 11:37:24PM -0300, vandersonmr wrote:
> > > +void start_jitdump_file(void)
> > > +{
> > > +    GString *dumpfile_name = g_string_new(NULL);;
> > > +    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());
> >
> > Simpler:
> >
> >   gchar *dumpfile_name = g_strdup_printf("./jit-%d.dump", getpid());
> >   ...
> >   g_free(dumpfile_name);
> >
> > > +    dumpfile = fopen(dumpfile_name->str, "w+");
> >
> > getpid() and the global dumpfile variable make me wonder what happens
> > with multi-threaded TCG?
> >
> 
> I did some tests and it appears to be working fine with multi-threaded TCG.
> tcg_exec_init should execute only once even in multi-threaded TCG, right?
> If so, we are going to create only one jitdump file. Also, both in Windows
> and Linux/POSIX fwrites is thread-safe, thus we would be safely updating
> the jitdump file. Does it make sense?

append_load_in_jitdump_file() calls fwrite() multiple times.  What
guarantees they will not be interleaved when multiple threads call this
function?

Stefan
diff mbox series

Patch

diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index d381a02f34..f393a7438f 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -3,6 +3,7 @@  obj-$(CONFIG_SOFTMMU) += cputlb.o
 obj-y += tcg-runtime.o tcg-runtime-gvec.o
 obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
 obj-y += translator.o
+obj-y += perf/
 
 obj-$(CONFIG_USER_ONLY) += user-exec.o
 obj-$(call lnot,$(CONFIG_SOFTMMU)) += user-exec-stub.o
diff --git a/accel/tcg/perf/Makefile.objs b/accel/tcg/perf/Makefile.objs
new file mode 100644
index 0000000000..f82fba35e5
--- /dev/null
+++ b/accel/tcg/perf/Makefile.objs
@@ -0,0 +1 @@ 
+obj-y += jitdump.o
diff --git a/accel/tcg/perf/jitdump.c b/accel/tcg/perf/jitdump.c
new file mode 100644
index 0000000000..6f4c0911c2
--- /dev/null
+++ b/accel/tcg/perf/jitdump.c
@@ -0,0 +1,180 @@ 
+#ifdef __linux__
+
+#include <stdint.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/syscall.h>
+#include <elf.h>
+
+#include "jitdump.h"
+#include "qemu-common.h"
+
+struct jitheader {
+    uint32_t magic;     /* characters "jItD" */
+    uint32_t version;   /* header version */
+    uint32_t total_size;/* total size of header */
+    uint32_t elf_mach;  /* elf mach target */
+    uint32_t pad1;      /* reserved */
+    uint32_t pid;       /* JIT process id */
+    uint64_t timestamp; /* timestamp */
+    uint64_t flags;     /* flags */
+};
+
+enum jit_record_type {
+    JIT_CODE_LOAD       = 0,
+    JIT_CODE_MOVE       = 1,
+    JIT_CODE_DEBUG_INFO = 2,
+    JIT_CODE_CLOSE      = 3,
+
+    JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+    uint32_t id;
+    uint32_t total_size;
+    uint64_t timestamp;
+};
+
+struct jr_code_load {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+struct jr_code_close {
+    struct jr_prefix p;
+};
+
+struct jr_code_move {
+    struct jr_prefix p;
+
+    uint32_t pid;
+    uint32_t tid;
+    uint64_t vma;
+    uint64_t old_code_addr;
+    uint64_t new_code_addr;
+    uint64_t code_size;
+    uint64_t code_index;
+};
+
+FILE *dumpfile;
+void *perf_marker;
+
+static uint64_t get_timestamp(void)
+{
+    struct timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+        fprintf(stderr, "No support for CLOCK_MONOTONIC! -perf cannot be used!\n");
+        exit(1);
+    }
+    return (uint64_t) ts.tv_sec * 1000000000 + ts.tv_nsec;
+}
+
+static uint32_t get_e_machine(void)
+{
+    uint32_t e_machine = EM_NONE;
+    Elf64_Ehdr elf_header;
+    FILE *exe = fopen("/proc/self/exe", "r");
+
+    if (exe == NULL) {
+        return e_machine;
+    }
+
+    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, exe) != 1) {
+        goto end;
+    }
+
+    e_machine = elf_header.e_machine;
+
+end:
+    fclose(exe);
+    return e_machine;
+}
+
+void start_jitdump_file(void)
+{
+    GString *dumpfile_name = g_string_new(NULL);;
+    g_string_printf(dumpfile_name, "./jit-%d.dump", getpid());
+    dumpfile = fopen(dumpfile_name->str, "w+");
+
+    perf_marker = mmap(NULL, sysconf(_SC_PAGESIZE),
+                          PROT_READ | PROT_EXEC,
+                          MAP_PRIVATE,
+                          fileno(dumpfile), 0);
+
+    if (perf_marker == MAP_FAILED) {
+        printf("Failed to create mmap marker file for perf %d\n", fileno(dumpfile));
+        fclose(dumpfile);
+        return;
+    }
+
+    g_string_free(dumpfile_name, TRUE);
+
+    struct jitheader *header = g_new0(struct jitheader, 1);
+    header->magic = 0x4A695444;
+    header->version = 1;
+    header->elf_mach = get_e_machine();
+    header->total_size = sizeof(struct jitheader);
+    header->pid = getpid();
+    header->timestamp = get_timestamp();
+
+    fwrite(header, header->total_size, 1, dumpfile);
+
+    free(header);
+    fflush(dumpfile);
+}
+
+void append_load_in_jitdump_file(TranslationBlock *tb)
+{
+    GString *func_name = g_string_new(NULL);
+    g_string_printf(func_name, "TB virt:0x"TARGET_FMT_lx"%c", tb->pc, '\0');
+
+    struct jr_code_load *load_event = g_new0(struct jr_code_load, 1);
+    load_event->p.id = JIT_CODE_LOAD;
+    load_event->p.total_size = sizeof(struct jr_code_load) + func_name->len + tb->tc.size;
+    load_event->p.timestamp = get_timestamp();
+    load_event->pid = getpid();
+    load_event->tid = syscall(SYS_gettid);
+    load_event->vma = tb->pc;
+    load_event->code_addr = (uint64_t) tb->tc.ptr;
+    load_event->code_size = tb->tc.size;
+    load_event->code_index = tb->pc;
+
+    fwrite(load_event, sizeof(struct jr_code_load), 1, dumpfile);
+    fwrite(func_name->str, func_name->len, 1, dumpfile);
+    fwrite(tb->tc.ptr, tb->tc.size, 1, dumpfile);
+
+    g_string_free(func_name, TRUE);
+    free(load_event);
+    fflush(dumpfile);
+}
+
+void close_jitdump_file(void)
+{
+    fclose(dumpfile);
+    if (perf_marker != MAP_FAILED) {
+        munmap(perf_marker, sysconf(_SC_PAGESIZE));
+    }
+}
+
+bool is_jitdump_enabled;
+
+void jitdump_enable(void)
+{
+    is_jitdump_enabled = true;
+}
+
+bool jitdump_enabled(void)
+{
+    return is_jitdump_enabled;
+}
+
+#endif
diff --git a/accel/tcg/perf/jitdump.h b/accel/tcg/perf/jitdump.h
new file mode 100644
index 0000000000..12c0991b04
--- /dev/null
+++ b/accel/tcg/perf/jitdump.h
@@ -0,0 +1,19 @@ 
+#ifdef __linux__
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+
+void start_jitdump_file(void);
+
+void append_load_in_jitdump_file(TranslationBlock *tb);
+void append_move_in_jitdump_file(TranslationBlock *tb);
+
+void close_jitdump_file(void);
+
+#endif
+
+#endif
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5d1e08b169..0a4211d3d2 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -58,6 +58,8 @@ 
 #include "sysemu/cpus.h"
 #include "sysemu/tcg.h"
 
+#include "perf/jitdump.h"
+
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
 /* make various TB consistency checks */
@@ -1148,6 +1150,11 @@  void tcg_exec_init(unsigned long tb_size)
     cpu_gen_init();
     page_init();
     tb_htable_init();
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        start_jitdump_file();
+    }
+#endif
     code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
@@ -1877,6 +1884,11 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
         return existing_tb;
     }
     tcg_tb_insert(tb);
+#ifdef __linux__
+    if (jitdump_enabled()) {
+        append_load_in_jitdump_file(tb);
+    }
+#endif
     return tb;
 }
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 0235cd3b91..bd564a9e5c 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -130,4 +130,7 @@  void page_size_init(void);
  * returned. */
 bool dump_in_progress(void);
 
+void jitdump_enable(void);
+bool jitdump_enabled(void);
+
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 8ffc525195..6521cf0c68 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -378,6 +378,11 @@  static void handle_arg_strace(const char *arg)
     do_strace = 1;
 }
 
+static void handle_arg_perf(const char *arg)
+{
+    jitdump_enable();
+}
+
 static void handle_arg_version(const char *arg)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION
@@ -443,6 +448,8 @@  static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+    {"perf",      "QEMU_PERF",         false, handle_arg_perf,
+     "",           "dump jitdump files to help linux perf JIT code visualization"},
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
     {NULL, NULL, false, NULL, NULL, NULL}
diff --git a/qemu-options.hx b/qemu-options.hx
index 9621e934c0..1c26eeeb9c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4147,6 +4147,18 @@  STEXI
 Enable FIPS 140-2 compliance mode.
 ETEXI
 
+#ifdef __linux__
+DEF("perf", 0, QEMU_OPTION_perf,
+    "-perf    dump jitdump files to help linux perf JIT code visualization\n",
+    QEMU_ARCH_ALL)
+#endif
+STEXI
+@item -perf
+@findex -perf
+Dumps jitdump files to help linux perf JIT code visualization
+ETEXI
+
+
 HXCOMM Deprecated by -machine accel=tcg property
 DEF("no-kvm", 0, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386)