Patchwork tcg: Use the GDB JIT debugging interface.

login
register
mail settings
Submitter Richard Henderson
Date Sept. 29, 2011, 11:46 p.m.
Message ID <1317339968-16212-1-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/117013/
State New
Headers show

Comments

Richard Henderson - Sept. 29, 2011, 11:46 p.m.
This allows us to generate unwind info for the dynamicly generated
code in the code_gen_buffer.  Only i386 is converted at this point.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 elf.h                 |    1 +
 exec.c                |    9 +++
 tcg/i386/tcg-target.c |  125 +++++++++++++++++++++++++++++++---
 tcg/tcg.c             |  185 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg.h             |    2 +
 5 files changed, 313 insertions(+), 9 deletions(-)


It's somewhat unfortunate, but the GDB interface requires that we
build a contiguous ELF image.  This means we can't place the ELF
header that we generate any place except the beginning of the
code_gen_buffer.  While tedious, this isn't terribly difficult.

With this patch, I now get:

Breakpoint 1, __ldb_mmu (addr=1001716, mmu_idx=0)
    at /home/rth/work/qemu/qemu/softmmu_template.h:86
86	{
(gdb) where
#0  __ldb_mmu (addr=1001716, mmu_idx=0)
    at /home/rth/work/qemu/qemu/softmmu_template.h:86
#1  0x0000000040000afc in ?? ()
#2  0x000000000053e85c in cpu_x86_exec (env=0x1208d80)
    at /home/rth/work/qemu/qemu/cpu-exec.c:565
#3  0x0000000000542932 in tcg_cpu_exec (env=0x1208d80)
    at /home/rth/work/qemu/qemu/cpus.c:913
#4  cpu_exec_all () at /home/rth/work/qemu/qemu/cpus.c:949
#5  0x0000000000542ad5 in qemu_tcg_cpu_thread_fn (arg=<optimized out>)
    at /home/rth/work/qemu/qemu/cpus.c:688
#6  0x00000033f1a07b31 in start_thread () from /lib64/libpthread.so.0
#7  0x00000033f16dfd2d in clone () from /lib64/libc.so.6

which is nicely correct.

Comments from the gdb folk are welcome.


r~
Jan Kiszka - Sept. 30, 2011, 7:12 a.m.
On 2011-09-30 01:46, Richard Henderson wrote:
> This allows us to generate unwind info for the dynamicly generated
> code in the code_gen_buffer.  Only i386 is converted at this point.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  elf.h                 |    1 +
>  exec.c                |    9 +++
>  tcg/i386/tcg-target.c |  125 +++++++++++++++++++++++++++++++---
>  tcg/tcg.c             |  185 +++++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg.h             |    2 +
>  5 files changed, 313 insertions(+), 9 deletions(-)
> 
> 
> It's somewhat unfortunate, but the GDB interface requires that we
> build a contiguous ELF image.  This means we can't place the ELF
> header that we generate any place except the beginning of the
> code_gen_buffer.  While tedious, this isn't terribly difficult.
> 
> With this patch, I now get:
> 
> Breakpoint 1, __ldb_mmu (addr=1001716, mmu_idx=0)
>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
> 86	{
> (gdb) where
> #0  __ldb_mmu (addr=1001716, mmu_idx=0)
>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
> #1  0x0000000040000afc in ?? ()
> #2  0x000000000053e85c in cpu_x86_exec (env=0x1208d80)
>     at /home/rth/work/qemu/qemu/cpu-exec.c:565
> #3  0x0000000000542932 in tcg_cpu_exec (env=0x1208d80)
>     at /home/rth/work/qemu/qemu/cpus.c:913
> #4  cpu_exec_all () at /home/rth/work/qemu/qemu/cpus.c:949
> #5  0x0000000000542ad5 in qemu_tcg_cpu_thread_fn (arg=<optimized out>)
>     at /home/rth/work/qemu/qemu/cpus.c:688
> #6  0x00000033f1a07b31 in start_thread () from /lib64/libpthread.so.0
> #7  0x00000033f16dfd2d in clone () from /lib64/libc.so.6
> 
> which is nicely correct.

Cool.

Would it be possible to tag translated code as well? At TB or maybe even
input instruction level? Only in debugging mode of course.

> 
> Comments from the gdb folk are welcome.
> 
> 
> r~
> 
> 
> 
> diff --git a/elf.h b/elf.h
> index 2e05d34..1e56a8c 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -216,6 +216,7 @@ typedef int64_t  Elf64_Sxword;
>  
>  #define ELF_ST_BIND(x)		((x) >> 4)
>  #define ELF_ST_TYPE(x)		(((unsigned int) x) & 0xf)
> +#define ELF_ST_INFO(bind,type)	(((bind) << 4) | (type))
>  #define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
>  #define ELF32_ST_TYPE(x)	ELF_ST_TYPE(x)
>  #define ELF64_ST_BIND(x)	ELF_ST_BIND(x)
> diff --git a/exec.c b/exec.c
> index 1e6f732..f6c07d5 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -464,6 +464,8 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>  
>  static void code_gen_alloc(unsigned long tb_size)
>  {
> +    size_t grab;
> +
>  #ifdef USE_STATIC_CODE_GEN_BUFFER
>      code_gen_buffer = static_code_gen_buffer;
>      code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> @@ -558,6 +560,13 @@ static void code_gen_alloc(unsigned long tb_size)
>      map_exec(code_gen_buffer, code_gen_buffer_size);
>  #endif
>  #endif /* !USE_STATIC_CODE_GEN_BUFFER */
> +
> +    /* Give GDB unwind info for the code we generate.  This grabs a small
> +       amount of space from the front of the buffer.  Account for it.  */
> +    grab = tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
> +    code_gen_buffer += grab;
> +    code_gen_buffer_size -= grab;
> +
>      map_exec(code_gen_prologue, sizeof(code_gen_prologue));
>      code_gen_buffer_max_size = code_gen_buffer_size -
>          (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 281f87d..462f455 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1913,22 +1913,29 @@ static int tcg_target_callee_save_regs[] = {
>  #endif
>  };
>  
> +/* Compute frame size via macros, to share between tcg_target_qemu_prologue
> +   and tcg_register_jit.  */
> +
> +#define PUSH_SIZE \
> +    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
> +     * (TCG_TARGET_REG_BITS / 8))
> +
> +#define FRAME_SIZE \
> +    ((PUSH_SIZE \
> +      + TCG_STATIC_CALL_ARGS_SIZE \
> +      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
> +      + TCG_TARGET_STACK_ALIGN - 1) \
> +     & ~(TCG_TARGET_STACK_ALIGN - 1))
> +
>  /* Generate global QEMU prologue and epilogue code */
>  static void tcg_target_qemu_prologue(TCGContext *s)
>  {
> -    int i, frame_size, push_size, stack_addend;
> +    int i, stack_addend;
>  
>      /* TB prologue */
>  
>      /* Reserve some stack space, also for TCG temps.  */
> -    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
> -    push_size *= TCG_TARGET_REG_BITS / 8;
> -
> -    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
> -        CPU_TEMP_BUF_NLONGS * sizeof(long);
> -    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
> -        ~(TCG_TARGET_STACK_ALIGN - 1);
> -    stack_addend = frame_size - push_size;
> +    stack_addend = FRAME_SIZE - PUSH_SIZE;
>      tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
>                    CPU_TEMP_BUF_NLONGS * sizeof(long));
>  
> @@ -1988,3 +1995,103 @@ static void tcg_target_init(TCGContext *s)
>  
>      tcg_add_target_add_op_defs(x86_op_defs);
>  }
> +
> +extern char debug_frame[];
> +extern char debug_frame_end[];
> +extern void *debug_frame_code_start;
> +extern size_t debug_frame_code_len;
> +extern char debug_frame_frame_size[];
> +
> +#if TCG_TARGET_REG_BITS == 64
> +#define ELF_HOST_MACHINE EM_X86_64
> +asm(".data\n"
> +"	.align 8\n"
> +"debug_frame:\n"
> +"	.long	99f-0f\n"	/* Length of CIE */
> +"0:	.long	-1\n"		/* CIE identifier */
> +"	.byte	1\n"		/* CIE version */
> +"	.byte	0\n"		/* CIE augmentation (none) */
> +"	.byte	1\n"		/* CIE code alignment factor */
> +"	.byte	0x78\n"		/* CIE data alignment factor (sleb128 -8) */
> +"	.byte	16\n"		/* CIE return address column */
> +"	.align 8\n"
> +"99:\n"				/* End of CIE */
> +"	.long	99f-0f\n"	/* Length of FDE */
> +"0:	.long	0\n"		/* FDE CIE offset (start of section) */
> +"debug_frame_code_start:\n"
> +"	.quad	0\n"		/* FDE start (to be filled in) */
> +"debug_frame_code_len:\n"
> +"	.quad	0\n"		/* FDE length (to be filled in) */
> +"	.byte	12\n"		/* DW_CFA_def_cfa */
> +"	.byte	7\n"		/*   %rsp */
> +"debug_frame_frame_size:\n"
> +"	.byte	0, 0\n"		/*   frame_size (to be filled in) */
> +"	.byte	0x90, 1\n"	/* DW_CFA_offset, %rip, -8 */
> +/* The following ordering must match tcg_target_callee_save_regs.  */
> +"	.byte	0x86, 2\n"	/* DW_CFA_offset, %rbp, -16 */
> +"	.byte	0x83, 3\n"	/* DW_CFA_offset, %rbx, -24 */
> +"	.byte	0x8c, 4\n"	/* DW_CFA_offset, %r12, -32 */
> +"	.byte	0x8d, 5\n"	/* DW_CFA_offset, %r13, -40 */
> +"	.byte	0x8e, 6\n"	/* DW_CFA_offset, %r14, -48 */
> +"	.byte	0x8f, 7\n"	/* DW_CFA_offset, %r15, -56 */
> +"	.align	8\n"
> +"99:\n"				/* End of FDE */
> +"debug_frame_end:\n"
> +".previous"
> +);
> +#else
> +#define ELF_HOST_MACHINE EM_386
> +asm(".data\n"
> +"	.align 4\n"
> +"debug_frame:\n"
> +"	.long	99f-0f\n"	/* Length of CIE */
> +"0:	.long	-1\n"		/* CIE identifier */
> +"	.byte	1\n"		/* CIE version */
> +"	.byte	0\n"		/* CIE augmentation (none) */
> +"	.byte	1\n"		/* CIE code alignment factor */
> +"	.byte	0x7c\n"		/* CIE data alignment factor (sleb128 -4) */
> +"	.byte	8\n"		/* CIE return address column */
> +"	.align 4\n"
> +"99:\n"				/* End of CIE */
> +"	.long	99f-0f\n"	/* Length of FDE */
> +"0:	.long	0\n"		/* FDE CIE offset (start of section) */
> +"debug_frame_code_start:\n"
> +"	.long	0\n"		/* FDE start (to be filled in) */
> +"debug_frame_code_len:\n"
> +"	.long	0\n"		/* FDE length (to be filled in) */
> +"	.byte	12\n"		/* DW_CFA_def_cfa */
> +"	.byte	4\n"		/*   %rsp */
> +"debug_frame_frame_size:\n"
> +"	.byte	0, 0\n"		/*   frame_size (to be filled in) */
> +"	.byte	0x88, 1\n"	/* DW_CFA_offset, %eip, -4 */
> +/* The following ordering must match tcg_target_callee_save_regs.  */
> +"	.byte	0x85, 2\n"	/* DW_CFA_offset, %ebp, -8 */
> +"	.byte	0x83, 3\n"	/* DW_CFA_offset, %ebx, -12 */
> +"	.byte	0x86, 4\n"	/* DW_CFA_offset, %esi, -16 */
> +"	.byte	0x87, 5\n"	/* DW_CFA_offset, %edi, -20 */
> +"	.align	4\n"
> +"99:\n"				/* End of FDE */
> +"debug_frame_end:\n"
> +".previous"
> +);
> +#endif
> +
> +size_t tcg_register_jit(void *buf, size_t buf_size)
> +{
> +    const unsigned int frame_size = FRAME_SIZE;
> +    unsigned int f_lo, f_hi;
> +
> +    /* ??? These could be filled in generically via reading the debug data.  */
> +    debug_frame_code_start = buf;
> +    debug_frame_code_len = buf_size;
> +
> +    /* ??? We're expecting a 2 byte uleb128 encoded value.  */
> +    f_lo = (frame_size & 0x7f) | 0x80;
> +    f_hi = frame_size >> 7;
> +    assert(frame_size >> 14 == 0);
> +    debug_frame_frame_size[0] = f_lo;
> +    debug_frame_frame_size[1] = f_hi;
> +
> +    return tcg_register_jit_int(buf, buf_size, debug_frame,
> +                                debug_frame_end - debug_frame);
> +}
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 411f971..63d81f9 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -57,6 +57,18 @@
>  #include "cpu.h"
>  
>  #include "tcg-op.h"
> +
> +#if TCG_TARGET_REG_BITS == 64
> +# define ELF_CLASS  ELFCLASS64
> +#else
> +# define ELF_CLASS  ELFCLASS32
> +#endif
> +#ifdef HOST_WORDS_BIGENDIAN
> +# define ELF_DATA   ELFDATA2MSB
> +#else
> +# define ELF_DATA   ELFDATA2LSB
> +#endif
> +
>  #include "elf.h"
>  
>  #if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
> @@ -68,6 +80,9 @@ static void tcg_target_qemu_prologue(TCGContext *s);
>  static void patch_reloc(uint8_t *code_ptr, int type, 
>                          tcg_target_long value, tcg_target_long addend);
>  
> +static size_t tcg_register_jit_int(void *buf, size_t size,
> +                                   void *debug_frame, size_t debug_frame_size);
> +
>  TCGOpDef tcg_op_defs[] = {
>  #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
>  #include "tcg-opc.h"
> @@ -2233,3 +2248,173 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
>      cpu_fprintf(f, "[TCG profiler not compiled]\n");
>  }
>  #endif
> +
> +#ifdef ELF_HOST_MACHINE
> +/* The backend should define ELF_HOST_MACHINE to indicate both what value to
> +   put into the ELF image and to indicate support for the feature.  */
> +
> +/* Begin GDB interface.  The following must match the docs.  */
> +typedef enum
> +{
> +    JIT_NOACTION = 0,
> +    JIT_REGISTER_FN,
> +    JIT_UNREGISTER_FN
> +} jit_actions_t;
> +
> +struct jit_code_entry {
> +    struct jit_code_entry *next_entry;
> +    struct jit_code_entry *prev_entry;
> +    const char *symfile_addr;
> +    uint64_t symfile_size;
> +};
> +
> +struct jit_descriptor {
> +    uint32_t version;
> +    uint32_t action_flag;
> +    struct jit_code_entry *relevant_entry;
> +    struct jit_code_entry *first_entry;
> +};
> +
> +void __jit_debug_register_code(void);
> +void __attribute__((noinline)) __jit_debug_register_code(void)

Leading '_' are reserved for the system and tabu for the application (we
have some other violations, yes).

> +{
> +    asm("");
> +}
> +
> +/* Must statically initialize the version, because GDB may check
> +   the version before we can set it.  */
> +struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
> +
> +/* End GDB interface.  */
> +
> +static int find_string(const char *strtab, const char *str)
> +{
> +    const char *p = strtab + 1;
> +
> +    while (1) {
> +        if (strcmp(p, str) == 0) {
> +            return p - strtab;
> +        }
> +        p += strlen(p) + 1;
> +    }
> +}
> +
> +static size_t tcg_register_jit_int(void *buf, size_t buf_size,
> +                                   void *debug_frame, size_t debug_frame_size)
> +{
> +    static const char strings[64] =
> +        "\0"
> +        ".text\0"
> +        ".debug_frame\0"
> +        ".symtab\0"
> +        ".strtab\0"
> +        "code_gen_buffer";
> +
> +    struct ElfImage {
> +        ElfW(Ehdr) ehdr;
> +        ElfW(Phdr) phdr;
> +        ElfW(Shdr) shdr[5];
> +        ElfW(Sym)  sym[1];
> +        char       str[64];
> +    };
> +
> +    /* We only need a single jit entry; statically allocate it.  */
> +    static struct jit_code_entry one_entry;
> +
> +    struct ElfImage *img = buf;
> +    size_t grab = sizeof(*img) + debug_frame_size;
> +
> +    img->ehdr.e_ident[EI_MAG0] = ELFMAG0;
> +    img->ehdr.e_ident[EI_MAG1] = ELFMAG1;
> +    img->ehdr.e_ident[EI_MAG2] = ELFMAG2;
> +    img->ehdr.e_ident[EI_MAG3] = ELFMAG3;
> +    img->ehdr.e_ident[EI_CLASS] = ELF_CLASS;
> +    img->ehdr.e_ident[EI_DATA] = ELF_DATA;
> +    img->ehdr.e_ident[EI_VERSION] = EV_CURRENT;
> +    img->ehdr.e_type = ET_EXEC;
> +    img->ehdr.e_machine = ELF_HOST_MACHINE;
> +    img->ehdr.e_version = EV_CURRENT;
> +    img->ehdr.e_phoff = offsetof(struct ElfImage, phdr);
> +    img->ehdr.e_shoff = offsetof(struct ElfImage, shdr);
> +    img->ehdr.e_ehsize = sizeof(ElfW(Shdr));
> +    img->ehdr.e_phentsize = sizeof(ElfW(Phdr));
> +    img->ehdr.e_phnum = 1;
> +    img->ehdr.e_shentsize = sizeof(img->shdr[0]);
> +    img->ehdr.e_shnum = ARRAY_SIZE(img->shdr);
> +    img->ehdr.e_shstrndx = ARRAY_SIZE(img->shdr) - 1;
> +
> +    img->phdr.p_type = PT_LOAD;
> +    img->phdr.p_offset = (char *)buf - (char *)img;
> +    img->phdr.p_vaddr = (ElfW(Addr))buf;
> +    img->phdr.p_paddr = img->phdr.p_vaddr;
> +    img->phdr.p_filesz = buf_size;
> +    img->phdr.p_memsz = buf_size;
> +    img->phdr.p_flags = PF_X;
> +
> +    memcpy(img->str, strings, sizeof(img->str));
> +
> +    img->shdr[0].sh_type = SHT_NULL;
> +
> +    img->shdr[1].sh_name = find_string(img->str, ".text");
> +    img->shdr[1].sh_type = SHT_PROGBITS;
> +    img->shdr[1].sh_flags = SHF_EXECINSTR | SHF_ALLOC;
> +    img->shdr[1].sh_addr = (ElfW(Addr))buf + grab;
> +    img->shdr[1].sh_offset = grab;
> +    img->shdr[1].sh_size = buf_size - grab;
> +
> +    img->shdr[2].sh_name = find_string(img->str, ".debug_frame");
> +    img->shdr[2].sh_type = SHT_PROGBITS;
> +    img->shdr[2].sh_offset = sizeof(*img);
> +    img->shdr[2].sh_size = debug_frame_size;
> +    memcpy(img + 1, debug_frame, debug_frame_size);
> +
> +    img->shdr[3].sh_name = find_string(img->str, ".symtab");
> +    img->shdr[3].sh_type = SHT_SYMTAB;
> +    img->shdr[3].sh_offset = offsetof(struct ElfImage, sym);
> +    img->shdr[3].sh_size = sizeof(img->sym);
> +    img->shdr[3].sh_info = ARRAY_SIZE(img->sym);
> +    img->shdr[3].sh_link = img->ehdr.e_shstrndx;
> +    img->shdr[3].sh_entsize = sizeof(ElfW(Sym));
> +
> +    img->shdr[4].sh_name = find_string(img->str, ".strtab");
> +    img->shdr[4].sh_type = SHT_STRTAB;
> +    img->shdr[4].sh_offset = offsetof(struct ElfImage, str);
> +    img->shdr[4].sh_size = sizeof(img->str);
> +
> +    img->sym[0].st_name = find_string(img->str, "code_gen_buffer");
> +    img->sym[0].st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC);
> +    img->sym[0].st_shndx = 1;
> +    img->sym[0].st_value = (ElfW(Addr))buf + grab;
> +    img->sym[0].st_size = buf_size - grab;
> +
> +#if 0

#ifdef DEBUG_ELF_GENERATION or so?

> +    /* Enable this block to be able to debug the ELF image file creation.
> +       One can use readelf, objdump, or other inspection utilities.  */
> +    {
> +        FILE *f = fopen("/tmp/qemu.jit", "w+b");
> +        if (f) {
> +            if (fwrite(buf, buf_size, 1, f) != buf_size) {
> +                /* Avoid stupid unused return value warning for fwrite.  */
> +            }
> +            fclose(f);
> +        }
> +    }
> +#endif
> +
> +    one_entry.symfile_addr = buf;
> +    one_entry.symfile_size = buf_size;
> +
> +    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
> +    __jit_debug_descriptor.relevant_entry = &one_entry;
> +    __jit_debug_descriptor.first_entry = &one_entry;
> +    __jit_debug_register_code();
> +
> +    return grab;
> +}
> +#else
> +/* No support for the feature.  Provide the entry point expected by exec.c.  */
> +size_t tcg_register_jit(void *buf, size_t buf_size)
> +{
> +    return 0;
> +}
> +#endif /* ELF_HOST_MACHINE */
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index dc5e9c9..3d2e5d0 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -574,3 +574,5 @@ extern uint8_t code_gen_prologue[];
>  #define tcg_qemu_tb_exec(env, tb_ptr)                                    \
>      ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
>  #endif
> +
> +size_t tcg_register_jit(void *, size_t);

Jan
Richard Henderson - Sept. 30, 2011, 2:36 p.m.
On 09/30/2011 12:12 AM, Jan Kiszka wrote:
>> Breakpoint 1, __ldb_mmu (addr=1001716, mmu_idx=0)
>>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
>> 86	{
>> (gdb) where
>> #0  __ldb_mmu (addr=1001716, mmu_idx=0)
>>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
>> #1  0x0000000040000afc in ?? ()
...
> Would it be possible to tag translated code as well? At TB or maybe even
> input instruction level? Only in debugging mode of course.

I don't know.  A couple of points on that:

(1) I was rather expecting frame 1 above to be marked "code_gen_buffer"
    rather than "??".  I *did* install that symbol in the generated 
    ELF image as a function covering the entire buffer.  So I'm not
    quite sure why GDB didn't use it.

(2) If we wanted to mark stuff individually, then we would have to
    manipulate the symbol table presented to GDB.  One could do this
    either via separate ELF images for each TB (overhead ~600 bytes
    per TB, more if we mark input insns), or via un-registering and
    re-registering the "main" ELF image after having manipulated the
    symbol table.  Of course, in the later case the layout of the ELF
    image would get tricky, as the symbol table expands and overruns
    existing TBs.

So, "probably not" is my answer.

>> +/* Begin GDB interface.  The following must match the docs.  */
...
>> +void __jit_debug_register_code(void);
>> +void __attribute__((noinline)) __jit_debug_register_code(void)
> 
> Leading '_' are reserved for the system and tabu for the application (we
> have some other violations, yes).

We have no choice here -- we must use exactly what GDB documents.

>> +#if 0
> 
> #ifdef DEBUG_ELF_GENERATION or so?

Sure.


r~
Daniel Jacobowitz - Oct. 2, 2011, 3:04 a.m.
On Fri, Sep 30, 2011 at 10:36 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/30/2011 12:12 AM, Jan Kiszka wrote:
>>> Breakpoint 1, __ldb_mmu (addr=1001716, mmu_idx=0)
>>>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
>>> 86   {
>>> (gdb) where
>>> #0  __ldb_mmu (addr=1001716, mmu_idx=0)
>>>     at /home/rth/work/qemu/qemu/softmmu_template.h:86
>>> #1  0x0000000040000afc in ?? ()
> ...
>> Would it be possible to tag translated code as well? At TB or maybe even
>> input instruction level? Only in debugging mode of course.
>
> I don't know.  A couple of points on that:
>
> (1) I was rather expecting frame 1 above to be marked "code_gen_buffer"
>    rather than "??".  I *did* install that symbol in the generated
>    ELF image as a function covering the entire buffer.  So I'm not
>    quite sure why GDB didn't use it.

Totally wild guess: associated with the wrong section?  I don't
remember what we do w.r.t. ABS in shared libraries, but for non-ABS we
do validate what section we think things are in.  And I'm not sure if
we load section info in the JIT loader, either.

Patch

diff --git a/elf.h b/elf.h
index 2e05d34..1e56a8c 100644
--- a/elf.h
+++ b/elf.h
@@ -216,6 +216,7 @@  typedef int64_t  Elf64_Sxword;
 
 #define ELF_ST_BIND(x)		((x) >> 4)
 #define ELF_ST_TYPE(x)		(((unsigned int) x) & 0xf)
+#define ELF_ST_INFO(bind,type)	(((bind) << 4) | (type))
 #define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
 #define ELF32_ST_TYPE(x)	ELF_ST_TYPE(x)
 #define ELF64_ST_BIND(x)	ELF_ST_BIND(x)
diff --git a/exec.c b/exec.c
index 1e6f732..f6c07d5 100644
--- a/exec.c
+++ b/exec.c
@@ -464,6 +464,8 @@  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 
 static void code_gen_alloc(unsigned long tb_size)
 {
+    size_t grab;
+
 #ifdef USE_STATIC_CODE_GEN_BUFFER
     code_gen_buffer = static_code_gen_buffer;
     code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
@@ -558,6 +560,13 @@  static void code_gen_alloc(unsigned long tb_size)
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
+
+    /* Give GDB unwind info for the code we generate.  This grabs a small
+       amount of space from the front of the buffer.  Account for it.  */
+    grab = tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
+    code_gen_buffer += grab;
+    code_gen_buffer_size -= grab;
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 281f87d..462f455 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1913,22 +1913,29 @@  static int tcg_target_callee_save_regs[] = {
 #endif
 };
 
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+   and tcg_register_jit.  */
+
+#define PUSH_SIZE \
+    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
+     * (TCG_TARGET_REG_BITS / 8))
+
+#define FRAME_SIZE \
+    ((PUSH_SIZE \
+      + TCG_STATIC_CALL_ARGS_SIZE \
+      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+      + TCG_TARGET_STACK_ALIGN - 1) \
+     & ~(TCG_TARGET_STACK_ALIGN - 1))
+
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    int i, frame_size, push_size, stack_addend;
+    int i, stack_addend;
 
     /* TB prologue */
 
     /* Reserve some stack space, also for TCG temps.  */
-    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
-    push_size *= TCG_TARGET_REG_BITS / 8;
-
-    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
-        CPU_TEMP_BUF_NLONGS * sizeof(long);
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
-        ~(TCG_TARGET_STACK_ALIGN - 1);
-    stack_addend = frame_size - push_size;
+    stack_addend = FRAME_SIZE - PUSH_SIZE;
     tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
@@ -1988,3 +1995,103 @@  static void tcg_target_init(TCGContext *s)
 
     tcg_add_target_add_op_defs(x86_op_defs);
 }
+
+extern char debug_frame[];
+extern char debug_frame_end[];
+extern void *debug_frame_code_start;
+extern size_t debug_frame_code_len;
+extern char debug_frame_frame_size[];
+
+#if TCG_TARGET_REG_BITS == 64
+#define ELF_HOST_MACHINE EM_X86_64
+asm(".data\n"
+"	.align 8\n"
+"debug_frame:\n"
+"	.long	99f-0f\n"	/* Length of CIE */
+"0:	.long	-1\n"		/* CIE identifier */
+"	.byte	1\n"		/* CIE version */
+"	.byte	0\n"		/* CIE augmentation (none) */
+"	.byte	1\n"		/* CIE code alignment factor */
+"	.byte	0x78\n"		/* CIE data alignment factor (sleb128 -8) */
+"	.byte	16\n"		/* CIE return address column */
+"	.align 8\n"
+"99:\n"				/* End of CIE */
+"	.long	99f-0f\n"	/* Length of FDE */
+"0:	.long	0\n"		/* FDE CIE offset (start of section) */
+"debug_frame_code_start:\n"
+"	.quad	0\n"		/* FDE start (to be filled in) */
+"debug_frame_code_len:\n"
+"	.quad	0\n"		/* FDE length (to be filled in) */
+"	.byte	12\n"		/* DW_CFA_def_cfa */
+"	.byte	7\n"		/*   %rsp */
+"debug_frame_frame_size:\n"
+"	.byte	0, 0\n"		/*   frame_size (to be filled in) */
+"	.byte	0x90, 1\n"	/* DW_CFA_offset, %rip, -8 */
+/* The following ordering must match tcg_target_callee_save_regs.  */
+"	.byte	0x86, 2\n"	/* DW_CFA_offset, %rbp, -16 */
+"	.byte	0x83, 3\n"	/* DW_CFA_offset, %rbx, -24 */
+"	.byte	0x8c, 4\n"	/* DW_CFA_offset, %r12, -32 */
+"	.byte	0x8d, 5\n"	/* DW_CFA_offset, %r13, -40 */
+"	.byte	0x8e, 6\n"	/* DW_CFA_offset, %r14, -48 */
+"	.byte	0x8f, 7\n"	/* DW_CFA_offset, %r15, -56 */
+"	.align	8\n"
+"99:\n"				/* End of FDE */
+"debug_frame_end:\n"
+".previous"
+);
+#else
+#define ELF_HOST_MACHINE EM_386
+asm(".data\n"
+"	.align 4\n"
+"debug_frame:\n"
+"	.long	99f-0f\n"	/* Length of CIE */
+"0:	.long	-1\n"		/* CIE identifier */
+"	.byte	1\n"		/* CIE version */
+"	.byte	0\n"		/* CIE augmentation (none) */
+"	.byte	1\n"		/* CIE code alignment factor */
+"	.byte	0x7c\n"		/* CIE data alignment factor (sleb128 -4) */
+"	.byte	8\n"		/* CIE return address column */
+"	.align 4\n"
+"99:\n"				/* End of CIE */
+"	.long	99f-0f\n"	/* Length of FDE */
+"0:	.long	0\n"		/* FDE CIE offset (start of section) */
+"debug_frame_code_start:\n"
+"	.long	0\n"		/* FDE start (to be filled in) */
+"debug_frame_code_len:\n"
+"	.long	0\n"		/* FDE length (to be filled in) */
+"	.byte	12\n"		/* DW_CFA_def_cfa */
+"	.byte	4\n"		/*   %rsp */
+"debug_frame_frame_size:\n"
+"	.byte	0, 0\n"		/*   frame_size (to be filled in) */
+"	.byte	0x88, 1\n"	/* DW_CFA_offset, %eip, -4 */
+/* The following ordering must match tcg_target_callee_save_regs.  */
+"	.byte	0x85, 2\n"	/* DW_CFA_offset, %ebp, -8 */
+"	.byte	0x83, 3\n"	/* DW_CFA_offset, %ebx, -12 */
+"	.byte	0x86, 4\n"	/* DW_CFA_offset, %esi, -16 */
+"	.byte	0x87, 5\n"	/* DW_CFA_offset, %edi, -20 */
+"	.align	4\n"
+"99:\n"				/* End of FDE */
+"debug_frame_end:\n"
+".previous"
+);
+#endif
+
+size_t tcg_register_jit(void *buf, size_t buf_size)
+{
+    const unsigned int frame_size = FRAME_SIZE;
+    unsigned int f_lo, f_hi;
+
+    /* ??? These could be filled in generically via reading the debug data.  */
+    debug_frame_code_start = buf;
+    debug_frame_code_len = buf_size;
+
+    /* ??? We're expecting a 2 byte uleb128 encoded value.  */
+    f_lo = (frame_size & 0x7f) | 0x80;
+    f_hi = frame_size >> 7;
+    assert(frame_size >> 14 == 0);
+    debug_frame_frame_size[0] = f_lo;
+    debug_frame_frame_size[1] = f_hi;
+
+    return tcg_register_jit_int(buf, buf_size, debug_frame,
+                                debug_frame_end - debug_frame);
+}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 411f971..63d81f9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -57,6 +57,18 @@ 
 #include "cpu.h"
 
 #include "tcg-op.h"
+
+#if TCG_TARGET_REG_BITS == 64
+# define ELF_CLASS  ELFCLASS64
+#else
+# define ELF_CLASS  ELFCLASS32
+#endif
+#ifdef HOST_WORDS_BIGENDIAN
+# define ELF_DATA   ELFDATA2MSB
+#else
+# define ELF_DATA   ELFDATA2LSB
+#endif
+
 #include "elf.h"
 
 #if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
@@ -68,6 +80,9 @@  static void tcg_target_qemu_prologue(TCGContext *s);
 static void patch_reloc(uint8_t *code_ptr, int type, 
                         tcg_target_long value, tcg_target_long addend);
 
+static size_t tcg_register_jit_int(void *buf, size_t size,
+                                   void *debug_frame, size_t debug_frame_size);
+
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
 #include "tcg-opc.h"
@@ -2233,3 +2248,173 @@  void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
     cpu_fprintf(f, "[TCG profiler not compiled]\n");
 }
 #endif
+
+#ifdef ELF_HOST_MACHINE
+/* The backend should define ELF_HOST_MACHINE to indicate both what value to
+   put into the ELF image and to indicate support for the feature.  */
+
+/* Begin GDB interface.  The following must match the docs.  */
+typedef enum
+{
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+} jit_actions_t;
+
+struct jit_code_entry {
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const char *symfile_addr;
+    uint64_t symfile_size;
+};
+
+struct jit_descriptor {
+    uint32_t version;
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+};
+
+void __jit_debug_register_code(void);
+void __attribute__((noinline)) __jit_debug_register_code(void)
+{
+    asm("");
+}
+
+/* Must statically initialize the version, because GDB may check
+   the version before we can set it.  */
+struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+/* End GDB interface.  */
+
+static int find_string(const char *strtab, const char *str)
+{
+    const char *p = strtab + 1;
+
+    while (1) {
+        if (strcmp(p, str) == 0) {
+            return p - strtab;
+        }
+        p += strlen(p) + 1;
+    }
+}
+
+static size_t tcg_register_jit_int(void *buf, size_t buf_size,
+                                   void *debug_frame, size_t debug_frame_size)
+{
+    static const char strings[64] =
+        "\0"
+        ".text\0"
+        ".debug_frame\0"
+        ".symtab\0"
+        ".strtab\0"
+        "code_gen_buffer";
+
+    struct ElfImage {
+        ElfW(Ehdr) ehdr;
+        ElfW(Phdr) phdr;
+        ElfW(Shdr) shdr[5];
+        ElfW(Sym)  sym[1];
+        char       str[64];
+    };
+
+    /* We only need a single jit entry; statically allocate it.  */
+    static struct jit_code_entry one_entry;
+
+    struct ElfImage *img = buf;
+    size_t grab = sizeof(*img) + debug_frame_size;
+
+    img->ehdr.e_ident[EI_MAG0] = ELFMAG0;
+    img->ehdr.e_ident[EI_MAG1] = ELFMAG1;
+    img->ehdr.e_ident[EI_MAG2] = ELFMAG2;
+    img->ehdr.e_ident[EI_MAG3] = ELFMAG3;
+    img->ehdr.e_ident[EI_CLASS] = ELF_CLASS;
+    img->ehdr.e_ident[EI_DATA] = ELF_DATA;
+    img->ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+    img->ehdr.e_type = ET_EXEC;
+    img->ehdr.e_machine = ELF_HOST_MACHINE;
+    img->ehdr.e_version = EV_CURRENT;
+    img->ehdr.e_phoff = offsetof(struct ElfImage, phdr);
+    img->ehdr.e_shoff = offsetof(struct ElfImage, shdr);
+    img->ehdr.e_ehsize = sizeof(ElfW(Shdr));
+    img->ehdr.e_phentsize = sizeof(ElfW(Phdr));
+    img->ehdr.e_phnum = 1;
+    img->ehdr.e_shentsize = sizeof(img->shdr[0]);
+    img->ehdr.e_shnum = ARRAY_SIZE(img->shdr);
+    img->ehdr.e_shstrndx = ARRAY_SIZE(img->shdr) - 1;
+
+    img->phdr.p_type = PT_LOAD;
+    img->phdr.p_offset = (char *)buf - (char *)img;
+    img->phdr.p_vaddr = (ElfW(Addr))buf;
+    img->phdr.p_paddr = img->phdr.p_vaddr;
+    img->phdr.p_filesz = buf_size;
+    img->phdr.p_memsz = buf_size;
+    img->phdr.p_flags = PF_X;
+
+    memcpy(img->str, strings, sizeof(img->str));
+
+    img->shdr[0].sh_type = SHT_NULL;
+
+    img->shdr[1].sh_name = find_string(img->str, ".text");
+    img->shdr[1].sh_type = SHT_PROGBITS;
+    img->shdr[1].sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+    img->shdr[1].sh_addr = (ElfW(Addr))buf + grab;
+    img->shdr[1].sh_offset = grab;
+    img->shdr[1].sh_size = buf_size - grab;
+
+    img->shdr[2].sh_name = find_string(img->str, ".debug_frame");
+    img->shdr[2].sh_type = SHT_PROGBITS;
+    img->shdr[2].sh_offset = sizeof(*img);
+    img->shdr[2].sh_size = debug_frame_size;
+    memcpy(img + 1, debug_frame, debug_frame_size);
+
+    img->shdr[3].sh_name = find_string(img->str, ".symtab");
+    img->shdr[3].sh_type = SHT_SYMTAB;
+    img->shdr[3].sh_offset = offsetof(struct ElfImage, sym);
+    img->shdr[3].sh_size = sizeof(img->sym);
+    img->shdr[3].sh_info = ARRAY_SIZE(img->sym);
+    img->shdr[3].sh_link = img->ehdr.e_shstrndx;
+    img->shdr[3].sh_entsize = sizeof(ElfW(Sym));
+
+    img->shdr[4].sh_name = find_string(img->str, ".strtab");
+    img->shdr[4].sh_type = SHT_STRTAB;
+    img->shdr[4].sh_offset = offsetof(struct ElfImage, str);
+    img->shdr[4].sh_size = sizeof(img->str);
+
+    img->sym[0].st_name = find_string(img->str, "code_gen_buffer");
+    img->sym[0].st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC);
+    img->sym[0].st_shndx = 1;
+    img->sym[0].st_value = (ElfW(Addr))buf + grab;
+    img->sym[0].st_size = buf_size - grab;
+
+#if 0
+    /* Enable this block to be able to debug the ELF image file creation.
+       One can use readelf, objdump, or other inspection utilities.  */
+    {
+        FILE *f = fopen("/tmp/qemu.jit", "w+b");
+        if (f) {
+            if (fwrite(buf, buf_size, 1, f) != buf_size) {
+                /* Avoid stupid unused return value warning for fwrite.  */
+            }
+            fclose(f);
+        }
+    }
+#endif
+
+    one_entry.symfile_addr = buf;
+    one_entry.symfile_size = buf_size;
+
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+    __jit_debug_descriptor.relevant_entry = &one_entry;
+    __jit_debug_descriptor.first_entry = &one_entry;
+    __jit_debug_register_code();
+
+    return grab;
+}
+#else
+/* No support for the feature.  Provide the entry point expected by exec.c.  */
+size_t tcg_register_jit(void *buf, size_t buf_size)
+{
+    return 0;
+}
+#endif /* ELF_HOST_MACHINE */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index dc5e9c9..3d2e5d0 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -574,3 +574,5 @@  extern uint8_t code_gen_prologue[];
 #define tcg_qemu_tb_exec(env, tb_ptr)                                    \
     ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
 #endif
+
+size_t tcg_register_jit(void *, size_t);