Patchwork [RFC,09/16,v6] introduce a new monitor command 'dump' to dump guest's memory

login
register
mail settings
Submitter Wen Congyang
Date Feb. 9, 2012, 3:28 a.m.
Message ID <4F333D4B.6090300@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/140299/
State New
Headers show

Comments

Wen Congyang - Feb. 9, 2012, 3:28 a.m.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 Makefile.target  |    8 +-
 dump.c           |  590 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dump.h           |    3 +
 hmp-commands.hx  |   16 ++
 hmp.c            |    9 +
 hmp.h            |    1 +
 monitor.c        |    3 +
 qapi-schema.json |   13 ++
 qmp-commands.hx  |   26 +++
 9 files changed, 665 insertions(+), 4 deletions(-)
 create mode 100644 dump.c
Jan Kiszka - Feb. 14, 2012, 5:59 p.m.
On 2012-02-09 04:28, Wen Congyang wrote:
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>  Makefile.target  |    8 +-
>  dump.c           |  590 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  dump.h           |    3 +
>  hmp-commands.hx  |   16 ++
>  hmp.c            |    9 +
>  hmp.h            |    1 +
>  monitor.c        |    3 +
>  qapi-schema.json |   13 ++
>  qmp-commands.hx  |   26 +++
>  9 files changed, 665 insertions(+), 4 deletions(-)
>  create mode 100644 dump.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index d6e5684..f39ce2f 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -112,7 +112,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
> -      user-exec.o $(oslib-obj-y)
> +      user-exec.o $(oslib-obj-y) dump.o
> 
>  obj-$(TARGET_HAS_BFLT) += flatload.o
> 
> @@ -150,7 +150,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>  LIBS+=-lmx
> 
>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
> -        gdbstub.o user-exec.o
> +        gdbstub.o user-exec.o dump.o
> 
>  obj-i386-y += ioport-user.o
> 
> @@ -172,7 +172,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
> 
>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
> -        gdbstub.o uaccess.o user-exec.o
> +        gdbstub.o uaccess.o user-exec.o dump.o
> 
>  obj-i386-y += ioport-user.o
> 
> @@ -188,7 +188,7 @@ endif #CONFIG_BSD_USER
>  # System emulator target
>  ifdef CONFIG_SOFTMMU
> 
> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>  # need to fix this properly
>  obj-$(CONFIG_NO_PCI) += pci-stub.o
> diff --git a/dump.c b/dump.c
> new file mode 100644
> index 0000000..a0e8b86
> --- /dev/null
> +++ b/dump.c
> @@ -0,0 +1,590 @@
> +/*
> + * QEMU dump
> + *
> + * Copyright Fujitsu, Corp. 2011
> + *
> + * Authors:
> + *     Wen Congyang <wency@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include <unistd.h>
> +#include <elf.h>
> +#include <sys/procfs.h>
> +#include <glib.h>
> +#include "cpu.h"
> +#include "cpu-all.h"
> +#include "targphys.h"
> +#include "monitor.h"
> +#include "kvm.h"
> +#include "dump.h"
> +#include "sysemu.h"
> +#include "bswap.h"
> +#include "memory_mapping.h"
> +#include "error.h"
> +#include "qmp-commands.h"
> +
> +#define CPU_CONVERT_TO_TARGET16(val) \
> +({ \
> +    uint16_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le16(_val); \
> +    } else {\
> +        _val = cpu_to_be16(_val); \
> +    } \
> +    _val; \
> +})
> +
> +#define CPU_CONVERT_TO_TARGET32(val) \
> +({ \
> +    uint32_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le32(_val); \
> +    } else {\
> +        _val = cpu_to_be32(_val); \
> +    } \
> +    _val; \
> +})
> +
> +#define CPU_CONVERT_TO_TARGET64(val) \
> +({ \
> +    uint64_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le64(_val); \
> +    } else {\
> +        _val = cpu_to_be64(_val); \
> +    } \
> +    _val; \
> +})

static inline functions, please.

> +
> +enum {
> +    DUMP_STATE_ERROR,
> +    DUMP_STATE_SETUP,
> +    DUMP_STATE_CANCELLED,
> +    DUMP_STATE_ACTIVE,
> +    DUMP_STATE_COMPLETED,
> +};
> +
> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    int phdr_num;
> +    int state;
> +    char *error;
> +    int fd;
> +    target_phys_addr_t memory_offset;
> +} DumpState;
> +
> +static DumpState *dump_get_current(void)
> +{
> +    static DumpState current_dump = {
> +        .state = DUMP_STATE_SETUP,
> +    };
> +
> +    return &current_dump;
> +}
> +
> +static int dump_cleanup(DumpState *s)
> +{
> +    int ret = 0;
> +
> +    free_memory_mapping_list(&s->list);
> +    if (s->fd != -1) {
> +        close(s->fd);
> +        s->fd = -1;
> +    }
> +
> +    return ret;
> +}
> +
> +static void dump_error(DumpState *s, const char *reason)
> +{
> +    s->state = DUMP_STATE_ERROR;
> +    s->error = g_strdup(reason);
> +    dump_cleanup(s);
> +}
> +
> +static inline int cpuid(CPUState *env)
> +{
> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
> +    return env->host_tid;

Curious: Does this command already work with user mode guest?

> +#else
> +    return env->cpu_index + 1;
> +#endif
> +}

There is gdb_id in gdbstub. It should be made generally avialable and
reused here.

> +
> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> +
> +    lseek(s->fd, 0, SEEK_SET);
> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> +
> +    lseek(s->fd, 0, SEEK_SET);
> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> +    }
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
> +
> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> +    }
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
> +
> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = cpuid(env);
> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf64_Ehdr);
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = cpuid(env);
> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf32_Ehdr);
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_data(DumpState *s, void *buf, int length,
> +                      target_phys_addr_t *offset)
> +{
> +    int ret;
> +
> +    lseek(s->fd, *offset, SEEK_SET);
> +    ret = write(s->fd, buf, length);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to save memory.\n");
> +        return -1;
> +    }
> +
> +    *offset += length;
> +    return 0;
> +}
> +
> +/* write the memroy to vmcore. 1 page per I/O. */
> +static int write_memory(DumpState *s, RAMBlock *block,
> +                        target_phys_addr_t *offset)
> +{
> +    int i, ret;
> +
> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         block->length % TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     target_phys_addr_t memory_offset)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = memory_offset;
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (phys_addr >= block->offset &&
> +            phys_addr < block->offset + block->length) {
> +            return phys_addr - block->offset + offset;
> +        }
> +        offset += block->length;
> +    }
> +
> +    return -1;
> +}
> +
> +static DumpState *dump_init(int fd, Error **errp)
> +{
> +    CPUState *env;
> +    DumpState *s = dump_get_current();
> +    int ret;
> +
> +    vm_stop(RUN_STATE_PAUSED);

I would save the current vm state first and restore it when finished.

> +    s->state = DUMP_STATE_SETUP;
> +    if (s->error) {
> +        g_free(s->error);
> +        s->error = NULL;
> +    }
> +    s->fd = fd;
> +
> +    /*
> +     * get dump info: endian, class and architecture.
> +     * If the target architecture is not supported, cpu_get_dump_info() will
> +     * return -1.
> +     *
> +     * if we use kvm, we should synchronize the register before we get dump
> +     * info.
> +     */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        cpu_synchronize_state(env);
> +    }
> +    ret = cpu_get_dump_info(&s->dump_info);
> +    if (ret < 0) {
> +        error_set(errp, QERR_UNSUPPORTED);
> +        return NULL;
> +    }
> +
> +    /* get memory mapping */
> +    s->list.num = 0;
> +    QTAILQ_INIT(&s->list.head);
> +    get_memory_mapping(&s->list);
> +
> +    /* crash needs extra memory mapping to determine phys_base. */
> +    ret = cpu_add_extra_memory_mapping(&s->list);
> +    if (ret < 0) {
> +        error_set(errp, QERR_UNDEFINED_ERROR);
> +        return NULL;
> +    }
> +
> +    /*
> +     * calculate phdr_num
> +     *
> +     * the type of phdr->num is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num > (1 << 16) - 2) {
> +        s->phdr_num = (1 << 16) - 1;
> +    } else {
> +        s->phdr_num += s->list.num;
> +    }
> +
> +    return s;
> +}
> +
> +/* write elf header, PT_NOTE and elf note to vmcore. */
> +static int dump_begin(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    int ret;
> +
> +    s->state = DUMP_STATE_ACTIVE;
> +
> +    /*
> +     * the vmcore's format is:
> +     *   --------------
> +     *   |  elf header |
> +     *   --------------
> +     *   |  PT_NOTE    |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  ......     |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  elf note   |
> +     *   --------------
> +     *   |  memory     |
> +     *   --------------
> +     *
> +     * we only know where the memory is saved after we write elf note into
> +     * vmcore.
> +     */
> +
> +    /* write elf header to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        ret = write_elf64_header(s);
> +    } else {
> +        ret = write_elf32_header(s);
> +    }
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* write elf notes to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> +        ret = write_elf64_notes(s, 0, &offset);
> +    } else {
> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> +        ret = write_elf32_notes(s, 0, &offset);
> +    }
> +
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    s->memory_offset = offset;
> +    return 0;
> +}
> +
> +/* write PT_LOAD to vmcore */
> +static int dump_completed(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    MemoryMapping *memory_mapping;
> +    int phdr_index = 1, ret;
> +
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
> +        if (s->dump_info.d_class == ELFCLASS64) {
> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> +        } else {
> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> +        }
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    s->state = DUMP_STATE_COMPLETED;
> +    dump_cleanup(s);
> +    return 0;
> +}
> +
> +/* write all memory to vmcore */
> +static int dump_iterate(DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int ret;
> +
> +    /* write all memory to vmcore */
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        ret = write_memory(s, block, &offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return dump_completed(s);
> +}
> +
> +static int create_vmcore(DumpState *s)
> +{
> +    int ret;
> +
> +    ret = dump_begin(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = dump_iterate(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +void qmp_dump(const char *file, Error **errp)
> +{
> +    const char *p;
> +    int fd = -1;
> +    DumpState *s;
> +
> +#if !defined(WIN32)
> +    if (strstart(file, "fd:", &p)) {
> +        fd = qemu_get_fd(p);
> +        if (fd == -1) {
> +            error_set(errp, QERR_FD_NOT_FOUND, p);
> +            return;
> +        }
> +    }
> +#endif
> +
> +    if  (strstart(file, "file:", &p)) {
> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
> +        if (fd < 0) {
> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
> +            return;
> +        }
> +    }
> +
> +    if (fd == -1) {
> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
> +        return;
> +    }
> +
> +    s = dump_init(fd, errp);
> +    if (!s) {
> +        return;
> +    }
> +
> +    if (create_vmcore(s) < 0) {
> +        error_set(errp, QERR_IO_ERROR);
> +    }
> +
> +    return;
> +}
> diff --git a/dump.h b/dump.h
> index a36468b..b413d18 100644
> --- a/dump.h
> +++ b/dump.h
> @@ -1,6 +1,9 @@
>  #ifndef DUMP_H
>  #define DUMP_H
> 
> +#include "qdict.h"
> +#include "error.h"
> +

This looks stray. Nothing is added to this header which require those
includes.

>  typedef struct ArchDumpInfo {
>      int d_machine;  /* Architecture */
>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 573b823..6cfb678 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -867,6 +867,22 @@ new parameters (if specified) once the vm migration finished successfully.
>  ETEXI
> 
>      {
> +        .name       = "dump",
> +        .args_type  = "file:s",
> +        .params     = "file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd = hmp_dump,
> +    },
> +
> +
> +STEXI
> +@item dump @var{file}
> +@findex dump
> +Dump to @var{file}.

That's way too brief! :) It should state the format, mention potential
architecture limitations, and explain that the output can be processed
with crash or gdb.

> +ETEXI
> +
> +    {
>          .name       = "snapshot_blkdev",
>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>          .params     = "device [new-image-file] [format]",
> diff --git a/hmp.c b/hmp.c
> index 8ff8c94..1a69857 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -851,3 +851,12 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
> 
>      hmp_handle_error(mon, &error);
>  }
> +
> +void hmp_dump(Monitor *mon, const QDict *qdict)
> +{
> +    Error *errp = NULL;
> +    const char *file = qdict_get_str(qdict, "file");
> +
> +    qmp_dump(file, &errp);
> +    hmp_handle_error(mon, &errp);
> +}
> diff --git a/hmp.h b/hmp.h
> index 18eecbd..66984c5 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -58,5 +58,6 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
>  void hmp_block_stream(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
> +void hmp_dump(Monitor *mon, const QDict *qdict);
> 
>  #endif
> diff --git a/monitor.c b/monitor.c
> index 7e72739..18e1ac7 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -73,6 +73,9 @@
>  #endif
>  #include "hw/lm32_pic.h"
> 
> +/* for dump */
> +#include "dump.h"
> +
>  //#define DEBUG
>  //#define DEBUG_COMPLETION
> 
> diff --git a/qapi-schema.json b/qapi-schema.json
> index d02ee86..1013ae6 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -1582,3 +1582,16 @@
>  { 'command': 'qom-list-types',
>    'data': { '*implements': 'str', '*abstract': 'bool' },
>    'returns': [ 'ObjectTypeInfo' ] }
> +
> +##
> +# @dump
> +#
> +# Dump guest's memory to vmcore.
> +#
> +# @file: the filename or file descriptor of the vmcore.
> +#
> +# Returns: nothing on success
> +#
> +# Since: 1.1
> +##
> +{ 'command': 'dump', 'data': { 'file': 'str' } }
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index b5e2ab8..52d3d3b 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -566,6 +566,32 @@ Example:
>  EQMP
> 
>      {
> +        .name       = "dump",
> +        .args_type  = "file:s",
> +        .params     = "file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = qmp_marshal_input_dump,
> +    },
> +
> +SQMP
> +dump
> +
> +
> +Dump to file.
> +
> +Arguments:
> +
> +- "file": Destination file (json-string)

The code looks like it supports both file names and file descriptors,
no? Same for HMP.

> +
> +Example:
> +
> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",
>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
> --
> 1.7.1
> 

Jan
Wen Congyang - Feb. 15, 2012, 3:44 a.m.
At 02/15/2012 01:59 AM, Jan Kiszka Wrote:
> On 2012-02-09 04:28, Wen Congyang wrote:
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  Makefile.target  |    8 +-
>>  dump.c           |  590 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  dump.h           |    3 +
>>  hmp-commands.hx  |   16 ++
>>  hmp.c            |    9 +
>>  hmp.h            |    1 +
>>  monitor.c        |    3 +
>>  qapi-schema.json |   13 ++
>>  qmp-commands.hx  |   26 +++
>>  9 files changed, 665 insertions(+), 4 deletions(-)
>>  create mode 100644 dump.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index d6e5684..f39ce2f 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -112,7 +112,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
>> -      user-exec.o $(oslib-obj-y)
>> +      user-exec.o $(oslib-obj-y) dump.o
>>
>>  obj-$(TARGET_HAS_BFLT) += flatload.o
>>
>> @@ -150,7 +150,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>>  LIBS+=-lmx
>>
>>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
>> -        gdbstub.o user-exec.o
>> +        gdbstub.o user-exec.o dump.o
>>
>>  obj-i386-y += ioport-user.o
>>
>> @@ -172,7 +172,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
>>
>>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
>> -        gdbstub.o uaccess.o user-exec.o
>> +        gdbstub.o uaccess.o user-exec.o dump.o
>>
>>  obj-i386-y += ioport-user.o
>>
>> @@ -188,7 +188,7 @@ endif #CONFIG_BSD_USER
>>  # System emulator target
>>  ifdef CONFIG_SOFTMMU
>>
>> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
>> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>>  # need to fix this properly
>>  obj-$(CONFIG_NO_PCI) += pci-stub.o
>> diff --git a/dump.c b/dump.c
>> new file mode 100644
>> index 0000000..a0e8b86
>> --- /dev/null
>> +++ b/dump.c
>> @@ -0,0 +1,590 @@
>> +/*
>> + * QEMU dump
>> + *
>> + * Copyright Fujitsu, Corp. 2011
>> + *
>> + * Authors:
>> + *     Wen Congyang <wency@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include <unistd.h>
>> +#include <elf.h>
>> +#include <sys/procfs.h>
>> +#include <glib.h>
>> +#include "cpu.h"
>> +#include "cpu-all.h"
>> +#include "targphys.h"
>> +#include "monitor.h"
>> +#include "kvm.h"
>> +#include "dump.h"
>> +#include "sysemu.h"
>> +#include "bswap.h"
>> +#include "memory_mapping.h"
>> +#include "error.h"
>> +#include "qmp-commands.h"
>> +
>> +#define CPU_CONVERT_TO_TARGET16(val) \
>> +({ \
>> +    uint16_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le16(_val); \
>> +    } else {\
>> +        _val = cpu_to_be16(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET32(val) \
>> +({ \
>> +    uint32_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le32(_val); \
>> +    } else {\
>> +        _val = cpu_to_be32(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET64(val) \
>> +({ \
>> +    uint64_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le64(_val); \
>> +    } else {\
>> +        _val = cpu_to_be64(_val); \
>> +    } \
>> +    _val; \
>> +})
> 
> static inline functions, please.

OK

> 
>> +
>> +enum {
>> +    DUMP_STATE_ERROR,
>> +    DUMP_STATE_SETUP,
>> +    DUMP_STATE_CANCELLED,
>> +    DUMP_STATE_ACTIVE,
>> +    DUMP_STATE_COMPLETED,
>> +};
>> +
>> +typedef struct DumpState {
>> +    ArchDumpInfo dump_info;
>> +    MemoryMappingList list;
>> +    int phdr_num;
>> +    int state;
>> +    char *error;
>> +    int fd;
>> +    target_phys_addr_t memory_offset;
>> +} DumpState;
>> +
>> +static DumpState *dump_get_current(void)
>> +{
>> +    static DumpState current_dump = {
>> +        .state = DUMP_STATE_SETUP,
>> +    };
>> +
>> +    return &current_dump;
>> +}
>> +
>> +static int dump_cleanup(DumpState *s)
>> +{
>> +    int ret = 0;
>> +
>> +    free_memory_mapping_list(&s->list);
>> +    if (s->fd != -1) {
>> +        close(s->fd);
>> +        s->fd = -1;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void dump_error(DumpState *s, const char *reason)
>> +{
>> +    s->state = DUMP_STATE_ERROR;
>> +    s->error = g_strdup(reason);
>> +    dump_cleanup(s);
>> +}
>> +
>> +static inline int cpuid(CPUState *env)
>> +{
>> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
>> +    return env->host_tid;
> 
> Curious: Does this command already work with user mode guest?

I think the answer is not. I will change it.

> 
>> +#else
>> +    return env->cpu_index + 1;
>> +#endif
>> +}
> 
> There is gdb_id in gdbstub. It should be made generally avialable and
> reused here.

OK

> 
>> +
>> +static int write_elf64_header(DumpState *s)
>> +{
>> +    Elf64_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_header(DumpState *s)
>> +{
>> +    Elf32_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>> +    elf_header.e_ident[EI_DATA] = endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_data(DumpState *s, void *buf, int length,
>> +                      target_phys_addr_t *offset)
>> +{
>> +    int ret;
>> +
>> +    lseek(s->fd, *offset, SEEK_SET);
>> +    ret = write(s->fd, buf, length);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to save memory.\n");
>> +        return -1;
>> +    }
>> +
>> +    *offset += length;
>> +    return 0;
>> +}
>> +
>> +/* write the memroy to vmcore. 1 page per I/O. */
>> +static int write_memory(DumpState *s, RAMBlock *block,
>> +                        target_phys_addr_t *offset)
>> +{
>> +    int i, ret;
>> +
>> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         block->length % TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* get the memory's offset in the vmcore */
>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>> +                                     target_phys_addr_t memory_offset)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = memory_offset;
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (phys_addr >= block->offset &&
>> +            phys_addr < block->offset + block->length) {
>> +            return phys_addr - block->offset + offset;
>> +        }
>> +        offset += block->length;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static DumpState *dump_init(int fd, Error **errp)
>> +{
>> +    CPUState *env;
>> +    DumpState *s = dump_get_current();
>> +    int ret;
>> +
>> +    vm_stop(RUN_STATE_PAUSED);
> 
> I would save the current vm state first and restore it when finished.

OK, I will do it.

> 
>> +    s->state = DUMP_STATE_SETUP;
>> +    if (s->error) {
>> +        g_free(s->error);
>> +        s->error = NULL;
>> +    }
>> +    s->fd = fd;
>> +
>> +    /*
>> +     * get dump info: endian, class and architecture.
>> +     * If the target architecture is not supported, cpu_get_dump_info() will
>> +     * return -1.
>> +     *
>> +     * if we use kvm, we should synchronize the register before we get dump
>> +     * info.
>> +     */
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        cpu_synchronize_state(env);
>> +    }
>> +    ret = cpu_get_dump_info(&s->dump_info);
>> +    if (ret < 0) {
>> +        error_set(errp, QERR_UNSUPPORTED);
>> +        return NULL;
>> +    }
>> +
>> +    /* get memory mapping */
>> +    s->list.num = 0;
>> +    QTAILQ_INIT(&s->list.head);
>> +    get_memory_mapping(&s->list);
>> +
>> +    /* crash needs extra memory mapping to determine phys_base. */
>> +    ret = cpu_add_extra_memory_mapping(&s->list);
>> +    if (ret < 0) {
>> +        error_set(errp, QERR_UNDEFINED_ERROR);
>> +        return NULL;
>> +    }
>> +
>> +    /*
>> +     * calculate phdr_num
>> +     *
>> +     * the type of phdr->num is uint16_t, so we should avoid overflow
>> +     */
>> +    s->phdr_num = 1; /* PT_NOTE */
>> +    if (s->list.num > (1 << 16) - 2) {
>> +        s->phdr_num = (1 << 16) - 1;
>> +    } else {
>> +        s->phdr_num += s->list.num;
>> +    }
>> +
>> +    return s;
>> +}
>> +
>> +/* write elf header, PT_NOTE and elf note to vmcore. */
>> +static int dump_begin(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    int ret;
>> +
>> +    s->state = DUMP_STATE_ACTIVE;
>> +
>> +    /*
>> +     * the vmcore's format is:
>> +     *   --------------
>> +     *   |  elf header |
>> +     *   --------------
>> +     *   |  PT_NOTE    |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  ......     |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  elf note   |
>> +     *   --------------
>> +     *   |  memory     |
>> +     *   --------------
>> +     *
>> +     * we only know where the memory is saved after we write elf note into
>> +     * vmcore.
>> +     */
>> +
>> +    /* write elf header to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        ret = write_elf64_header(s);
>> +    } else {
>> +        ret = write_elf32_header(s);
>> +    }
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    /* write elf notes to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
>> +        ret = write_elf64_notes(s, 0, &offset);
>> +    } else {
>> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
>> +        ret = write_elf32_notes(s, 0, &offset);
>> +    }
>> +
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    s->memory_offset = offset;
>> +    return 0;
>> +}
>> +
>> +/* write PT_LOAD to vmcore */
>> +static int dump_completed(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    MemoryMapping *memory_mapping;
>> +    int phdr_index = 1, ret;
>> +
>> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
>> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
>> +        if (s->dump_info.d_class == ELFCLASS64) {
>> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
>> +        } else {
>> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
>> +        }
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    s->state = DUMP_STATE_COMPLETED;
>> +    dump_cleanup(s);
>> +    return 0;
>> +}
>> +
>> +/* write all memory to vmcore */
>> +static int dump_iterate(DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int ret;
>> +
>> +    /* write all memory to vmcore */
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        ret = write_memory(s, block, &offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return dump_completed(s);
>> +}
>> +
>> +static int create_vmcore(DumpState *s)
>> +{
>> +    int ret;
>> +
>> +    ret = dump_begin(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    ret = dump_iterate(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void qmp_dump(const char *file, Error **errp)
>> +{
>> +    const char *p;
>> +    int fd = -1;
>> +    DumpState *s;
>> +
>> +#if !defined(WIN32)
>> +    if (strstart(file, "fd:", &p)) {
>> +        fd = qemu_get_fd(p);
>> +        if (fd == -1) {
>> +            error_set(errp, QERR_FD_NOT_FOUND, p);
>> +            return;
>> +        }
>> +    }
>> +#endif
>> +
>> +    if  (strstart(file, "file:", &p)) {
>> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
>> +        if (fd < 0) {
>> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
>> +            return;
>> +        }
>> +    }
>> +
>> +    if (fd == -1) {
>> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
>> +        return;
>> +    }
>> +
>> +    s = dump_init(fd, errp);
>> +    if (!s) {
>> +        return;
>> +    }
>> +
>> +    if (create_vmcore(s) < 0) {
>> +        error_set(errp, QERR_IO_ERROR);
>> +    }
>> +
>> +    return;
>> +}
>> diff --git a/dump.h b/dump.h
>> index a36468b..b413d18 100644
>> --- a/dump.h
>> +++ b/dump.h
>> @@ -1,6 +1,9 @@
>>  #ifndef DUMP_H
>>  #define DUMP_H
>>
>> +#include "qdict.h"
>> +#include "error.h"
>> +
> 
> This looks stray. Nothing is added to this header which require those
> includes.

Yes, I forgot to remove it when updating the patch. I will remove them.

> 
>>  typedef struct ArchDumpInfo {
>>      int d_machine;  /* Architecture */
>>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index 573b823..6cfb678 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -867,6 +867,22 @@ new parameters (if specified) once the vm migration finished successfully.
>>  ETEXI
>>
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd = hmp_dump,
>> +    },
>> +
>> +
>> +STEXI
>> +@item dump @var{file}
>> +@findex dump
>> +Dump to @var{file}.
> 
> That's way too brief! :) It should state the format, mention potential
> architecture limitations, and explain that the output can be processed
> with crash or gdb.

OK.

> 
>> +ETEXI
>> +
>> +    {
>>          .name       = "snapshot_blkdev",
>>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>>          .params     = "device [new-image-file] [format]",
>> diff --git a/hmp.c b/hmp.c
>> index 8ff8c94..1a69857 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -851,3 +851,12 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
>>
>>      hmp_handle_error(mon, &error);
>>  }
>> +
>> +void hmp_dump(Monitor *mon, const QDict *qdict)
>> +{
>> +    Error *errp = NULL;
>> +    const char *file = qdict_get_str(qdict, "file");
>> +
>> +    qmp_dump(file, &errp);
>> +    hmp_handle_error(mon, &errp);
>> +}
>> diff --git a/hmp.h b/hmp.h
>> index 18eecbd..66984c5 100644
>> --- a/hmp.h
>> +++ b/hmp.h
>> @@ -58,5 +58,6 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
>>  void hmp_block_stream(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
>> +void hmp_dump(Monitor *mon, const QDict *qdict);
>>
>>  #endif
>> diff --git a/monitor.c b/monitor.c
>> index 7e72739..18e1ac7 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -73,6 +73,9 @@
>>  #endif
>>  #include "hw/lm32_pic.h"
>>
>> +/* for dump */
>> +#include "dump.h"
>> +
>>  //#define DEBUG
>>  //#define DEBUG_COMPLETION
>>
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index d02ee86..1013ae6 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -1582,3 +1582,16 @@
>>  { 'command': 'qom-list-types',
>>    'data': { '*implements': 'str', '*abstract': 'bool' },
>>    'returns': [ 'ObjectTypeInfo' ] }
>> +
>> +##
>> +# @dump
>> +#
>> +# Dump guest's memory to vmcore.
>> +#
>> +# @file: the filename or file descriptor of the vmcore.
>> +#
>> +# Returns: nothing on success
>> +#
>> +# Since: 1.1
>> +##
>> +{ 'command': 'dump', 'data': { 'file': 'str' } }
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index b5e2ab8..52d3d3b 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -566,6 +566,32 @@ Example:
>>  EQMP
>>
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd_new = qmp_marshal_input_dump,
>> +    },
>> +
>> +SQMP
>> +dump
>> +
>> +
>> +Dump to file.
>> +
>> +Arguments:
>> +
>> +- "file": Destination file (json-string)
> 
> The code looks like it supports both file names and file descriptors,
> no? Same for HMP.

Yes. I will update the description.

Thanks
Wen Congyang

> 
>> +
>> +Example:
>> +
>> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
>> +<- { "return": {} }
>> +
>> +EQMP
>> +
>> +    {
>>          .name       = "netdev_add",
>>          .args_type  = "netdev:O",
>>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
>> --
>> 1.7.1
>>
> 
> Jan
>
Wen Congyang - Feb. 17, 2012, 8:52 a.m.
At 02/15/2012 01:59 AM, Jan Kiszka Wrote:
> On 2012-02-09 04:28, Wen Congyang wrote:
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  Makefile.target  |    8 +-
>>  dump.c           |  590 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  dump.h           |    3 +
>>  hmp-commands.hx  |   16 ++
>>  hmp.c            |    9 +
>>  hmp.h            |    1 +
>>  monitor.c        |    3 +
>>  qapi-schema.json |   13 ++
>>  qmp-commands.hx  |   26 +++
>>  9 files changed, 665 insertions(+), 4 deletions(-)
>>  create mode 100644 dump.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index d6e5684..f39ce2f 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -112,7 +112,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
>> -      user-exec.o $(oslib-obj-y)
>> +      user-exec.o $(oslib-obj-y) dump.o
>>
>>  obj-$(TARGET_HAS_BFLT) += flatload.o
>>
>> @@ -150,7 +150,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>>  LIBS+=-lmx
>>
>>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
>> -        gdbstub.o user-exec.o
>> +        gdbstub.o user-exec.o dump.o
>>
>>  obj-i386-y += ioport-user.o
>>
>> @@ -172,7 +172,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
>>
>>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
>> -        gdbstub.o uaccess.o user-exec.o
>> +        gdbstub.o uaccess.o user-exec.o dump.o
>>
>>  obj-i386-y += ioport-user.o
>>
>> @@ -188,7 +188,7 @@ endif #CONFIG_BSD_USER
>>  # System emulator target
>>  ifdef CONFIG_SOFTMMU
>>
>> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
>> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>>  # need to fix this properly
>>  obj-$(CONFIG_NO_PCI) += pci-stub.o
>> diff --git a/dump.c b/dump.c
>> new file mode 100644
>> index 0000000..a0e8b86
>> --- /dev/null
>> +++ b/dump.c
>> @@ -0,0 +1,590 @@
>> +/*
>> + * QEMU dump
>> + *
>> + * Copyright Fujitsu, Corp. 2011
>> + *
>> + * Authors:
>> + *     Wen Congyang <wency@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include <unistd.h>
>> +#include <elf.h>
>> +#include <sys/procfs.h>
>> +#include <glib.h>
>> +#include "cpu.h"
>> +#include "cpu-all.h"
>> +#include "targphys.h"
>> +#include "monitor.h"
>> +#include "kvm.h"
>> +#include "dump.h"
>> +#include "sysemu.h"
>> +#include "bswap.h"
>> +#include "memory_mapping.h"
>> +#include "error.h"
>> +#include "qmp-commands.h"
>> +
>> +#define CPU_CONVERT_TO_TARGET16(val) \
>> +({ \
>> +    uint16_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le16(_val); \
>> +    } else {\
>> +        _val = cpu_to_be16(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET32(val) \
>> +({ \
>> +    uint32_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le32(_val); \
>> +    } else {\
>> +        _val = cpu_to_be32(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET64(val) \
>> +({ \
>> +    uint64_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le64(_val); \
>> +    } else {\
>> +        _val = cpu_to_be64(_val); \
>> +    } \
>> +    _val; \
>> +})
> 
> static inline functions, please.
> 
>> +
>> +enum {
>> +    DUMP_STATE_ERROR,
>> +    DUMP_STATE_SETUP,
>> +    DUMP_STATE_CANCELLED,
>> +    DUMP_STATE_ACTIVE,
>> +    DUMP_STATE_COMPLETED,
>> +};
>> +
>> +typedef struct DumpState {
>> +    ArchDumpInfo dump_info;
>> +    MemoryMappingList list;
>> +    int phdr_num;
>> +    int state;
>> +    char *error;
>> +    int fd;
>> +    target_phys_addr_t memory_offset;
>> +} DumpState;
>> +
>> +static DumpState *dump_get_current(void)
>> +{
>> +    static DumpState current_dump = {
>> +        .state = DUMP_STATE_SETUP,
>> +    };
>> +
>> +    return &current_dump;
>> +}
>> +
>> +static int dump_cleanup(DumpState *s)
>> +{
>> +    int ret = 0;
>> +
>> +    free_memory_mapping_list(&s->list);
>> +    if (s->fd != -1) {
>> +        close(s->fd);
>> +        s->fd = -1;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void dump_error(DumpState *s, const char *reason)
>> +{
>> +    s->state = DUMP_STATE_ERROR;
>> +    s->error = g_strdup(reason);
>> +    dump_cleanup(s);
>> +}
>> +
>> +static inline int cpuid(CPUState *env)
>> +{
>> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
>> +    return env->host_tid;
> 
> Curious: Does this command already work with user mode guest?
> 
>> +#else
>> +    return env->cpu_index + 1;
>> +#endif
>> +}
> 
> There is gdb_id in gdbstub. It should be made generally avialable and
> reused here.
> 
>> +
>> +static int write_elf64_header(DumpState *s)
>> +{
>> +    Elf64_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_header(DumpState *s)
>> +{
>> +    Elf32_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>> +    elf_header.e_ident[EI_DATA] = endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_data(DumpState *s, void *buf, int length,
>> +                      target_phys_addr_t *offset)
>> +{
>> +    int ret;
>> +
>> +    lseek(s->fd, *offset, SEEK_SET);
>> +    ret = write(s->fd, buf, length);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to save memory.\n");
>> +        return -1;
>> +    }
>> +
>> +    *offset += length;
>> +    return 0;
>> +}
>> +
>> +/* write the memroy to vmcore. 1 page per I/O. */
>> +static int write_memory(DumpState *s, RAMBlock *block,
>> +                        target_phys_addr_t *offset)
>> +{
>> +    int i, ret;
>> +
>> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         block->length % TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* get the memory's offset in the vmcore */
>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>> +                                     target_phys_addr_t memory_offset)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = memory_offset;
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (phys_addr >= block->offset &&
>> +            phys_addr < block->offset + block->length) {
>> +            return phys_addr - block->offset + offset;
>> +        }
>> +        offset += block->length;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static DumpState *dump_init(int fd, Error **errp)
>> +{
>> +    CPUState *env;
>> +    DumpState *s = dump_get_current();
>> +    int ret;
>> +
>> +    vm_stop(RUN_STATE_PAUSED);
> 
> I would save the current vm state first and restore it when finished.

There is no API to get current vm state. If you want this feature, I will
add API to get it.

Thanks
Wen Congyang

> 
>> +    s->state = DUMP_STATE_SETUP;
>> +    if (s->error) {
>> +        g_free(s->error);
>> +        s->error = NULL;
>> +    }
>> +    s->fd = fd;
>> +
>> +    /*
>> +     * get dump info: endian, class and architecture.
>> +     * If the target architecture is not supported, cpu_get_dump_info() will
>> +     * return -1.
>> +     *
>> +     * if we use kvm, we should synchronize the register before we get dump
>> +     * info.
>> +     */
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        cpu_synchronize_state(env);
>> +    }
>> +    ret = cpu_get_dump_info(&s->dump_info);
>> +    if (ret < 0) {
>> +        error_set(errp, QERR_UNSUPPORTED);
>> +        return NULL;
>> +    }
>> +
>> +    /* get memory mapping */
>> +    s->list.num = 0;
>> +    QTAILQ_INIT(&s->list.head);
>> +    get_memory_mapping(&s->list);
>> +
>> +    /* crash needs extra memory mapping to determine phys_base. */
>> +    ret = cpu_add_extra_memory_mapping(&s->list);
>> +    if (ret < 0) {
>> +        error_set(errp, QERR_UNDEFINED_ERROR);
>> +        return NULL;
>> +    }
>> +
>> +    /*
>> +     * calculate phdr_num
>> +     *
>> +     * the type of phdr->num is uint16_t, so we should avoid overflow
>> +     */
>> +    s->phdr_num = 1; /* PT_NOTE */
>> +    if (s->list.num > (1 << 16) - 2) {
>> +        s->phdr_num = (1 << 16) - 1;
>> +    } else {
>> +        s->phdr_num += s->list.num;
>> +    }
>> +
>> +    return s;
>> +}
>> +
>> +/* write elf header, PT_NOTE and elf note to vmcore. */
>> +static int dump_begin(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    int ret;
>> +
>> +    s->state = DUMP_STATE_ACTIVE;
>> +
>> +    /*
>> +     * the vmcore's format is:
>> +     *   --------------
>> +     *   |  elf header |
>> +     *   --------------
>> +     *   |  PT_NOTE    |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  ......     |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  elf note   |
>> +     *   --------------
>> +     *   |  memory     |
>> +     *   --------------
>> +     *
>> +     * we only know where the memory is saved after we write elf note into
>> +     * vmcore.
>> +     */
>> +
>> +    /* write elf header to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        ret = write_elf64_header(s);
>> +    } else {
>> +        ret = write_elf32_header(s);
>> +    }
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    /* write elf notes to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
>> +        ret = write_elf64_notes(s, 0, &offset);
>> +    } else {
>> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
>> +        ret = write_elf32_notes(s, 0, &offset);
>> +    }
>> +
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    s->memory_offset = offset;
>> +    return 0;
>> +}
>> +
>> +/* write PT_LOAD to vmcore */
>> +static int dump_completed(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    MemoryMapping *memory_mapping;
>> +    int phdr_index = 1, ret;
>> +
>> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
>> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
>> +        if (s->dump_info.d_class == ELFCLASS64) {
>> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
>> +        } else {
>> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
>> +        }
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    s->state = DUMP_STATE_COMPLETED;
>> +    dump_cleanup(s);
>> +    return 0;
>> +}
>> +
>> +/* write all memory to vmcore */
>> +static int dump_iterate(DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int ret;
>> +
>> +    /* write all memory to vmcore */
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        ret = write_memory(s, block, &offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return dump_completed(s);
>> +}
>> +
>> +static int create_vmcore(DumpState *s)
>> +{
>> +    int ret;
>> +
>> +    ret = dump_begin(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    ret = dump_iterate(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void qmp_dump(const char *file, Error **errp)
>> +{
>> +    const char *p;
>> +    int fd = -1;
>> +    DumpState *s;
>> +
>> +#if !defined(WIN32)
>> +    if (strstart(file, "fd:", &p)) {
>> +        fd = qemu_get_fd(p);
>> +        if (fd == -1) {
>> +            error_set(errp, QERR_FD_NOT_FOUND, p);
>> +            return;
>> +        }
>> +    }
>> +#endif
>> +
>> +    if  (strstart(file, "file:", &p)) {
>> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
>> +        if (fd < 0) {
>> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
>> +            return;
>> +        }
>> +    }
>> +
>> +    if (fd == -1) {
>> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
>> +        return;
>> +    }
>> +
>> +    s = dump_init(fd, errp);
>> +    if (!s) {
>> +        return;
>> +    }
>> +
>> +    if (create_vmcore(s) < 0) {
>> +        error_set(errp, QERR_IO_ERROR);
>> +    }
>> +
>> +    return;
>> +}
>> diff --git a/dump.h b/dump.h
>> index a36468b..b413d18 100644
>> --- a/dump.h
>> +++ b/dump.h
>> @@ -1,6 +1,9 @@
>>  #ifndef DUMP_H
>>  #define DUMP_H
>>
>> +#include "qdict.h"
>> +#include "error.h"
>> +
> 
> This looks stray. Nothing is added to this header which require those
> includes.
> 
>>  typedef struct ArchDumpInfo {
>>      int d_machine;  /* Architecture */
>>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index 573b823..6cfb678 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -867,6 +867,22 @@ new parameters (if specified) once the vm migration finished successfully.
>>  ETEXI
>>
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd = hmp_dump,
>> +    },
>> +
>> +
>> +STEXI
>> +@item dump @var{file}
>> +@findex dump
>> +Dump to @var{file}.
> 
> That's way too brief! :) It should state the format, mention potential
> architecture limitations, and explain that the output can be processed
> with crash or gdb.
> 
>> +ETEXI
>> +
>> +    {
>>          .name       = "snapshot_blkdev",
>>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>>          .params     = "device [new-image-file] [format]",
>> diff --git a/hmp.c b/hmp.c
>> index 8ff8c94..1a69857 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -851,3 +851,12 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
>>
>>      hmp_handle_error(mon, &error);
>>  }
>> +
>> +void hmp_dump(Monitor *mon, const QDict *qdict)
>> +{
>> +    Error *errp = NULL;
>> +    const char *file = qdict_get_str(qdict, "file");
>> +
>> +    qmp_dump(file, &errp);
>> +    hmp_handle_error(mon, &errp);
>> +}
>> diff --git a/hmp.h b/hmp.h
>> index 18eecbd..66984c5 100644
>> --- a/hmp.h
>> +++ b/hmp.h
>> @@ -58,5 +58,6 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
>>  void hmp_block_stream(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
>> +void hmp_dump(Monitor *mon, const QDict *qdict);
>>
>>  #endif
>> diff --git a/monitor.c b/monitor.c
>> index 7e72739..18e1ac7 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -73,6 +73,9 @@
>>  #endif
>>  #include "hw/lm32_pic.h"
>>
>> +/* for dump */
>> +#include "dump.h"
>> +
>>  //#define DEBUG
>>  //#define DEBUG_COMPLETION
>>
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index d02ee86..1013ae6 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -1582,3 +1582,16 @@
>>  { 'command': 'qom-list-types',
>>    'data': { '*implements': 'str', '*abstract': 'bool' },
>>    'returns': [ 'ObjectTypeInfo' ] }
>> +
>> +##
>> +# @dump
>> +#
>> +# Dump guest's memory to vmcore.
>> +#
>> +# @file: the filename or file descriptor of the vmcore.
>> +#
>> +# Returns: nothing on success
>> +#
>> +# Since: 1.1
>> +##
>> +{ 'command': 'dump', 'data': { 'file': 'str' } }
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index b5e2ab8..52d3d3b 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -566,6 +566,32 @@ Example:
>>  EQMP
>>
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd_new = qmp_marshal_input_dump,
>> +    },
>> +
>> +SQMP
>> +dump
>> +
>> +
>> +Dump to file.
>> +
>> +Arguments:
>> +
>> +- "file": Destination file (json-string)
> 
> The code looks like it supports both file names and file descriptors,
> no? Same for HMP.
> 
>> +
>> +Example:
>> +
>> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
>> +<- { "return": {} }
>> +
>> +EQMP
>> +
>> +    {
>>          .name       = "netdev_add",
>>          .args_type  = "netdev:O",
>>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
>> --
>> 1.7.1
>>
> 
> Jan
>
Jan Kiszka - Feb. 17, 2012, 9:26 a.m.
On 2012-02-17 09:52, Wen Congyang wrote:
>>> +static DumpState *dump_init(int fd, Error **errp)
>>> +{
>>> +    CPUState *env;
>>> +    DumpState *s = dump_get_current();
>>> +    int ret;
>>> +
>>> +    vm_stop(RUN_STATE_PAUSED);
>>
>> I would save the current vm state first and restore it when finished.
> 
> There is no API to get current vm state. If you want this feature, I will
> add API to get it.

You are looking for runstate_is_running().

Jan
Jan Kiszka - Feb. 17, 2012, 9:35 a.m.
On 2012-02-17 10:35, Wen Congyang wrote:
> At 02/17/2012 05:26 PM, Jan Kiszka Wrote:
>> On 2012-02-17 09:52, Wen Congyang wrote:
>>>>> +static DumpState *dump_init(int fd, Error **errp)
>>>>> +{
>>>>> +    CPUState *env;
>>>>> +    DumpState *s = dump_get_current();
>>>>> +    int ret;
>>>>> +
>>>>> +    vm_stop(RUN_STATE_PAUSED);
>>>>
>>>> I would save the current vm state first and restore it when finished.
>>>
>>> There is no API to get current vm state. If you want this feature, I will
>>> add API to get it.
>>
>> You are looking for runstate_is_running().
> 
> Yes. vm_stop() stops the vcpu only when runstate_is_running(). So I think
> you need to resume all vcpu after dumping is finished.

Yes, but _only_ if runstate_is_running() was true before calling
vm_stop. That is my point.

Jan
Wen Congyang - Feb. 17, 2012, 9:35 a.m.
At 02/17/2012 05:26 PM, Jan Kiszka Wrote:
> On 2012-02-17 09:52, Wen Congyang wrote:
>>>> +static DumpState *dump_init(int fd, Error **errp)
>>>> +{
>>>> +    CPUState *env;
>>>> +    DumpState *s = dump_get_current();
>>>> +    int ret;
>>>> +
>>>> +    vm_stop(RUN_STATE_PAUSED);
>>>
>>> I would save the current vm state first and restore it when finished.
>>
>> There is no API to get current vm state. If you want this feature, I will
>> add API to get it.
> 
> You are looking for runstate_is_running().

Yes. vm_stop() stops the vcpu only when runstate_is_running(). So I think
you need to resume all vcpu after dumping is finished.

Thanks
Wen Congyang

> 
> Jan
>
Eric Blake - Feb. 17, 2012, 4:32 p.m.
On 02/17/2012 01:52 AM, Wen Congyang wrote:
> At 02/15/2012 01:59 AM, Jan Kiszka Wrote:
>> On 2012-02-09 04:28, Wen Congyang wrote:
>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>

<snip several kilobytes>

>>> +static DumpState *dump_init(int fd, Error **errp)
>>> +{
>>> +    CPUState *env;
>>> +    DumpState *s = dump_get_current();
>>> +    int ret;
>>> +
>>> +    vm_stop(RUN_STATE_PAUSED);
>>
>> I would save the current vm state first and restore it when finished.
> 
> There is no API to get current vm state. If you want this feature, I will
> add API to get it.
> 
> Thanks
> Wen Congyang

<snip several kilobytes>

Maybe it's just me, and you can ignore me if I'm speaking out of turn
for expressing my views on list netiquette, but...

I get frustrated by lengthy messages that are heavily re-quoted versions
of earlier versions, with only a very little new content embedded in the
middle where I have to hunt for it.  There's nothing wrong with using
the Delete key to trim replies down to relevant portions, which reduces
the bandwidth of the list engine as well as reduces the time spent in
reviewing email exchanges.

/me returns back to lurk mode, but with one additional observation:

There are other APIs where qemu has ended up pausing the domain and not
restoring things back to running when done, and where libvirt has had to
track existing state prior to starting actions in order to manually fix
things after the fact (see libvirt's qemudDomainCoreDump as a wrapper
around migration to file, for an example).  If we do things right in
this new DumpState API, we may want to decide to fix other monitor
commands to use the same mechanism (it won't offload any of the burden
from libvirt, which must still correctly interact with older qemu, but
would make life nicer for clients that can assume the saner semantics).
Jan Kiszka - Feb. 17, 2012, 4:51 p.m.
On 2012-02-17 17:32, Eric Blake wrote:
> There are other APIs where qemu has ended up pausing the domain and not
> restoring things back to running when done, and where libvirt has had to
> track existing state prior to starting actions in order to manually fix
> things after the fact (see libvirt's qemudDomainCoreDump as a wrapper
> around migration to file, for an example).  If we do things right in
> this new DumpState API, we may want to decide to fix other monitor
> commands to use the same mechanism (it won't offload any of the burden
> from libvirt, which must still correctly interact with older qemu, but
> would make life nicer for clients that can assume the saner semantics).

I think there is no need for a new API. Everything you need is there:
check current state, prevent transitions or invoked handlers on
unexpected transitions. If other commands do not make use of this, they
should probably be fixed.

What command or series of commands do you have in mind?

Jan
Eric Blake - Feb. 17, 2012, 5:05 p.m.
On 02/17/2012 09:51 AM, Jan Kiszka wrote:
> On 2012-02-17 17:32, Eric Blake wrote:
>> There are other APIs where qemu has ended up pausing the domain and not
>> restoring things back to running when done, and where libvirt has had to
>> track existing state prior to starting actions in order to manually fix
>> things after the fact (see libvirt's qemudDomainCoreDump as a wrapper
>> around migration to file, for an example).  If we do things right in
>> this new DumpState API, we may want to decide to fix other monitor
>> commands to use the same mechanism (it won't offload any of the burden
>> from libvirt, which must still correctly interact with older qemu, but
>> would make life nicer for clients that can assume the saner semantics).
> 
> I think there is no need for a new API. Everything you need is there:
> check current state, prevent transitions or invoked handlers on
> unexpected transitions. If other commands do not make use of this, they
> should probably be fixed.
> 
> What command or series of commands do you have in mind?

Right now, libvirt pauses qemu itself at least before issuing 'migrate'
to file, before issuing 'savevm', and before issuing
'blockdev-snapshot-sync' [1].  In particular, this comment in the
libvirt code surrounding the 'savevm' call is interesting:

    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
        /* savevm monitor command pauses the domain emitting an event which
         * confuses libvirt since it's not notified when qemu resumes the
         * domain. Thus we stop and start CPUs ourselves.
         */

I'm not sure if the situation has improved since that comment was first
written, but it looks like a case where if libvirt were to let qemu do
the pause and resume as part of the single monitor command, instead of
libvirt breaking things into multiple monitor commands to track state
itself, then enough weird stuff happened at least with older versions of
qemu to make libvirt unhappy.

[1] Note - the fact that libvirt must pause around
'blockdev-snapshot-sync' is due to an orthogonal issue of snapshotting
more than one disk as an atomic operation; my understanding is that Jeff
Cody is working on a patch series to add a new monitor command
'blockdev-group-snapshot-sync' that would let libvirt delegate the pause
and resume to qemu instead, but that's a topic for a different thread.

Patch

diff --git a/Makefile.target b/Makefile.target
index d6e5684..f39ce2f 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -112,7 +112,7 @@  $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
 QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
 obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
       elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
-      user-exec.o $(oslib-obj-y)
+      user-exec.o $(oslib-obj-y) dump.o
 
 obj-$(TARGET_HAS_BFLT) += flatload.o
 
@@ -150,7 +150,7 @@  LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
 LIBS+=-lmx
 
 obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
-        gdbstub.o user-exec.o
+        gdbstub.o user-exec.o dump.o
 
 obj-i386-y += ioport-user.o
 
@@ -172,7 +172,7 @@  $(call set-vpath, $(SRC_PATH)/bsd-user)
 QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
 
 obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
-        gdbstub.o uaccess.o user-exec.o
+        gdbstub.o uaccess.o user-exec.o dump.o
 
 obj-i386-y += ioport-user.o
 
@@ -188,7 +188,7 @@  endif #CONFIG_BSD_USER
 # System emulator target
 ifdef CONFIG_SOFTMMU
 
-obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
+obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-$(CONFIG_NO_PCI) += pci-stub.o
diff --git a/dump.c b/dump.c
new file mode 100644
index 0000000..a0e8b86
--- /dev/null
+++ b/dump.c
@@ -0,0 +1,590 @@ 
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include <unistd.h>
+#include <elf.h>
+#include <sys/procfs.h>
+#include <glib.h>
+#include "cpu.h"
+#include "cpu-all.h"
+#include "targphys.h"
+#include "monitor.h"
+#include "kvm.h"
+#include "dump.h"
+#include "sysemu.h"
+#include "bswap.h"
+#include "memory_mapping.h"
+#include "error.h"
+#include "qmp-commands.h"
+
+#define CPU_CONVERT_TO_TARGET16(val) \
+({ \
+    uint16_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le16(_val); \
+    } else {\
+        _val = cpu_to_be16(_val); \
+    } \
+    _val; \
+})
+
+#define CPU_CONVERT_TO_TARGET32(val) \
+({ \
+    uint32_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le32(_val); \
+    } else {\
+        _val = cpu_to_be32(_val); \
+    } \
+    _val; \
+})
+
+#define CPU_CONVERT_TO_TARGET64(val) \
+({ \
+    uint64_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le64(_val); \
+    } else {\
+        _val = cpu_to_be64(_val); \
+    } \
+    _val; \
+})
+
+enum {
+    DUMP_STATE_ERROR,
+    DUMP_STATE_SETUP,
+    DUMP_STATE_CANCELLED,
+    DUMP_STATE_ACTIVE,
+    DUMP_STATE_COMPLETED,
+};
+
+typedef struct DumpState {
+    ArchDumpInfo dump_info;
+    MemoryMappingList list;
+    int phdr_num;
+    int state;
+    char *error;
+    int fd;
+    target_phys_addr_t memory_offset;
+} DumpState;
+
+static DumpState *dump_get_current(void)
+{
+    static DumpState current_dump = {
+        .state = DUMP_STATE_SETUP,
+    };
+
+    return &current_dump;
+}
+
+static int dump_cleanup(DumpState *s)
+{
+    int ret = 0;
+
+    free_memory_mapping_list(&s->list);
+    if (s->fd != -1) {
+        close(s->fd);
+        s->fd = -1;
+    }
+
+    return ret;
+}
+
+static void dump_error(DumpState *s, const char *reason)
+{
+    s->state = DUMP_STATE_ERROR;
+    s->error = g_strdup(reason);
+    dump_cleanup(s);
+}
+
+static inline int cpuid(CPUState *env)
+{
+#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
+    return env->host_tid;
+#else
+    return env->cpu_index + 1;
+#endif
+}
+
+static int write_elf64_header(DumpState *s)
+{
+    Elf64_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
+    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
+    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
+    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
+    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
+    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
+    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
+    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
+
+    lseek(s->fd, 0, SEEK_SET);
+    ret = write(s->fd, &elf_header, sizeof(elf_header));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_header(DumpState *s)
+{
+    Elf32_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
+    elf_header.e_ident[EI_DATA] = endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
+    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
+    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
+    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
+    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
+    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
+    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
+
+    lseek(s->fd, 0, SEEK_SET);
+    ret = write(s->fd, &elf_header, sizeof(elf_header));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
+    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
+    }
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
+    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
+
+    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
+    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
+    }
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
+    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
+
+    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = cpuid(env);
+        ret = cpu_write_elf64_note(s->fd, env, id, offset);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf64_Ehdr);
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = cpuid(env);
+        ret = cpu_write_elf32_note(s->fd, env, id, offset);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf32_Ehdr);
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_data(DumpState *s, void *buf, int length,
+                      target_phys_addr_t *offset)
+{
+    int ret;
+
+    lseek(s->fd, *offset, SEEK_SET);
+    ret = write(s->fd, buf, length);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to save memory.\n");
+        return -1;
+    }
+
+    *offset += length;
+    return 0;
+}
+
+/* write the memroy to vmcore. 1 page per I/O. */
+static int write_memory(DumpState *s, RAMBlock *block,
+                        target_phys_addr_t *offset)
+{
+    int i, ret;
+
+    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
+        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
+                         TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    if ((block->length % TARGET_PAGE_SIZE) != 0) {
+        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
+                         block->length % TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/* get the memory's offset in the vmcore */
+static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
+                                     target_phys_addr_t memory_offset)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = memory_offset;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (phys_addr >= block->offset &&
+            phys_addr < block->offset + block->length) {
+            return phys_addr - block->offset + offset;
+        }
+        offset += block->length;
+    }
+
+    return -1;
+}
+
+static DumpState *dump_init(int fd, Error **errp)
+{
+    CPUState *env;
+    DumpState *s = dump_get_current();
+    int ret;
+
+    vm_stop(RUN_STATE_PAUSED);
+    s->state = DUMP_STATE_SETUP;
+    if (s->error) {
+        g_free(s->error);
+        s->error = NULL;
+    }
+    s->fd = fd;
+
+    /*
+     * get dump info: endian, class and architecture.
+     * If the target architecture is not supported, cpu_get_dump_info() will
+     * return -1.
+     *
+     * if we use kvm, we should synchronize the register before we get dump
+     * info.
+     */
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu_synchronize_state(env);
+    }
+    ret = cpu_get_dump_info(&s->dump_info);
+    if (ret < 0) {
+        error_set(errp, QERR_UNSUPPORTED);
+        return NULL;
+    }
+
+    /* get memory mapping */
+    s->list.num = 0;
+    QTAILQ_INIT(&s->list.head);
+    get_memory_mapping(&s->list);
+
+    /* crash needs extra memory mapping to determine phys_base. */
+    ret = cpu_add_extra_memory_mapping(&s->list);
+    if (ret < 0) {
+        error_set(errp, QERR_UNDEFINED_ERROR);
+        return NULL;
+    }
+
+    /*
+     * calculate phdr_num
+     *
+     * the type of phdr->num is uint16_t, so we should avoid overflow
+     */
+    s->phdr_num = 1; /* PT_NOTE */
+    if (s->list.num > (1 << 16) - 2) {
+        s->phdr_num = (1 << 16) - 1;
+    } else {
+        s->phdr_num += s->list.num;
+    }
+
+    return s;
+}
+
+/* write elf header, PT_NOTE and elf note to vmcore. */
+static int dump_begin(DumpState *s)
+{
+    target_phys_addr_t offset;
+    int ret;
+
+    s->state = DUMP_STATE_ACTIVE;
+
+    /*
+     * the vmcore's format is:
+     *   --------------
+     *   |  elf header |
+     *   --------------
+     *   |  PT_NOTE    |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  ......     |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  elf note   |
+     *   --------------
+     *   |  memory     |
+     *   --------------
+     *
+     * we only know where the memory is saved after we write elf note into
+     * vmcore.
+     */
+
+    /* write elf header to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        ret = write_elf64_header(s);
+    } else {
+        ret = write_elf32_header(s);
+    }
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* write elf notes to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
+        ret = write_elf64_notes(s, 0, &offset);
+    } else {
+        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
+        ret = write_elf32_notes(s, 0, &offset);
+    }
+
+    if (ret < 0) {
+        return -1;
+    }
+
+    s->memory_offset = offset;
+    return 0;
+}
+
+/* write PT_LOAD to vmcore */
+static int dump_completed(DumpState *s)
+{
+    target_phys_addr_t offset;
+    MemoryMapping *memory_mapping;
+    int phdr_index = 1, ret;
+
+    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
+        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
+        if (s->dump_info.d_class == ELFCLASS64) {
+            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
+        } else {
+            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
+        }
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    s->state = DUMP_STATE_COMPLETED;
+    dump_cleanup(s);
+    return 0;
+}
+
+/* write all memory to vmcore */
+static int dump_iterate(DumpState *s)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = s->memory_offset;
+    int ret;
+
+    /* write all memory to vmcore */
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        ret = write_memory(s, block, &offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    return dump_completed(s);
+}
+
+static int create_vmcore(DumpState *s)
+{
+    int ret;
+
+    ret = dump_begin(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    ret = dump_iterate(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+void qmp_dump(const char *file, Error **errp)
+{
+    const char *p;
+    int fd = -1;
+    DumpState *s;
+
+#if !defined(WIN32)
+    if (strstart(file, "fd:", &p)) {
+        fd = qemu_get_fd(p);
+        if (fd == -1) {
+            error_set(errp, QERR_FD_NOT_FOUND, p);
+            return;
+        }
+    }
+#endif
+
+    if  (strstart(file, "file:", &p)) {
+        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
+        if (fd < 0) {
+            error_set(errp, QERR_OPEN_FILE_FAILED, p);
+            return;
+        }
+    }
+
+    if (fd == -1) {
+        error_set(errp, QERR_INVALID_PARAMETER, "file");
+        return;
+    }
+
+    s = dump_init(fd, errp);
+    if (!s) {
+        return;
+    }
+
+    if (create_vmcore(s) < 0) {
+        error_set(errp, QERR_IO_ERROR);
+    }
+
+    return;
+}
diff --git a/dump.h b/dump.h
index a36468b..b413d18 100644
--- a/dump.h
+++ b/dump.h
@@ -1,6 +1,9 @@ 
 #ifndef DUMP_H
 #define DUMP_H
 
+#include "qdict.h"
+#include "error.h"
+
 typedef struct ArchDumpInfo {
     int d_machine;  /* Architecture */
     int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 573b823..6cfb678 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -867,6 +867,22 @@  new parameters (if specified) once the vm migration finished successfully.
 ETEXI
 
     {
+        .name       = "dump",
+        .args_type  = "file:s",
+        .params     = "file",
+        .help       = "dump to file",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd = hmp_dump,
+    },
+
+
+STEXI
+@item dump @var{file}
+@findex dump
+Dump to @var{file}.
+ETEXI
+
+    {
         .name       = "snapshot_blkdev",
         .args_type  = "device:B,snapshot-file:s?,format:s?",
         .params     = "device [new-image-file] [format]",
diff --git a/hmp.c b/hmp.c
index 8ff8c94..1a69857 100644
--- a/hmp.c
+++ b/hmp.c
@@ -851,3 +851,12 @@  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
 
     hmp_handle_error(mon, &error);
 }
+
+void hmp_dump(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *file = qdict_get_str(qdict, "file");
+
+    qmp_dump(file, &errp);
+    hmp_handle_error(mon, &errp);
+}
diff --git a/hmp.h b/hmp.h
index 18eecbd..66984c5 100644
--- a/hmp.h
+++ b/hmp.h
@@ -58,5 +58,6 @@  void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_dump(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/monitor.c b/monitor.c
index 7e72739..18e1ac7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -73,6 +73,9 @@ 
 #endif
 #include "hw/lm32_pic.h"
 
+/* for dump */
+#include "dump.h"
+
 //#define DEBUG
 //#define DEBUG_COMPLETION
 
diff --git a/qapi-schema.json b/qapi-schema.json
index d02ee86..1013ae6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1582,3 +1582,16 @@ 
 { 'command': 'qom-list-types',
   'data': { '*implements': 'str', '*abstract': 'bool' },
   'returns': [ 'ObjectTypeInfo' ] }
+
+##
+# @dump
+#
+# Dump guest's memory to vmcore.
+#
+# @file: the filename or file descriptor of the vmcore.
+#
+# Returns: nothing on success
+#
+# Since: 1.1
+##
+{ 'command': 'dump', 'data': { 'file': 'str' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index b5e2ab8..52d3d3b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -566,6 +566,32 @@  Example:
 EQMP
 
     {
+        .name       = "dump",
+        .args_type  = "file:s",
+        .params     = "file",
+        .help       = "dump to file",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = qmp_marshal_input_dump,
+    },
+
+SQMP
+dump
+
+
+Dump to file.
+
+Arguments:
+
+- "file": Destination file (json-string)
+
+Example:
+
+-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "netdev_add",
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket],id=str[,prop=value][,...]",