diff mbox

[RFC,08/14,v4] introduce a new monitor command 'dump' to dump guest's memory

Message ID 4F03ED75.4070605@cn.fujitsu.com
State New
Headers show

Commit Message

Wen Congyang Jan. 4, 2012, 6:11 a.m. UTC
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 Makefile.target |    8 +-
 dump.c          |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dump.h          |    4 +
 hmp-commands.hx |   16 ++
 monitor.c       |    3 +
 qmp-commands.hx |   26 +++
 6 files changed, 641 insertions(+), 4 deletions(-)
 create mode 100644 dump.c

Comments

Luiz Capitulino Jan. 10, 2012, 1:30 p.m. UTC | #1
On Wed, 04 Jan 2012 14:11:01 +0800
Wen Congyang <wency@cn.fujitsu.com> wrote:

> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>  Makefile.target |    8 +-
>  dump.c          |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  dump.h          |    4 +
>  hmp-commands.hx |   16 ++
>  monitor.c       |    3 +
>  qmp-commands.hx |   26 +++
>  6 files changed, 641 insertions(+), 4 deletions(-)
>  create mode 100644 dump.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index 29562ad..f7cc2b9 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -110,7 +110,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
> -      user-exec.o $(oslib-obj-y)
> +      user-exec.o $(oslib-obj-y) dump.o
>  
>  obj-$(TARGET_HAS_BFLT) += flatload.o
>  
> @@ -148,7 +148,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>  LIBS+=-lmx
>  
>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
> -        gdbstub.o user-exec.o
> +        gdbstub.o user-exec.o dump.o
>  
>  obj-i386-y += ioport-user.o
>  
> @@ -170,7 +170,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
>  
>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
> -        gdbstub.o uaccess.o user-exec.o
> +        gdbstub.o uaccess.o user-exec.o dump.o
>  
>  obj-i386-y += ioport-user.o
>  
> @@ -186,7 +186,7 @@ endif #CONFIG_BSD_USER
>  # System emulator target
>  ifdef CONFIG_SOFTMMU
>  
> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>  # need to fix this properly
>  obj-$(CONFIG_NO_PCI) += pci-stub.o
> diff --git a/dump.c b/dump.c
> new file mode 100644
> index 0000000..ab29a4c
> --- /dev/null
> +++ b/dump.c
> @@ -0,0 +1,588 @@
> +/*
> + * QEMU dump
> + *
> + * Copyright Fujitsu, Corp. 2011
> + *
> + * Authors:
> + *     Wen Congyang <wency@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include <unistd.h>
> +#include <elf.h>
> +#include <sys/procfs.h>
> +#include "cpu.h"
> +#include "cpu-all.h"
> +#include "targphys.h"
> +#include "monitor.h"
> +#include "kvm.h"
> +#include "dump.h"
> +#include "sysemu.h"
> +#include "bswap.h"
> +#include "memory_mapping.h"
> +
> +#define CPU_CONVERT_TO_TARGET16(val) \
> +({ \
> +    uint16_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le16(_val); \
> +    } else {\
> +        _val = cpu_to_be16(_val); \
> +    } \
> +    _val; \
> +})
> +
> +#define CPU_CONVERT_TO_TARGET32(val) \
> +({ \
> +    uint32_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le32(_val); \
> +    } else {\
> +        _val = cpu_to_be32(_val); \
> +    } \
> +    _val; \
> +})
> +
> +#define CPU_CONVERT_TO_TARGET64(val) \
> +({ \
> +    uint64_t _val = (val); \
> +    if (endian == ELFDATA2LSB) { \
> +        _val = cpu_to_le64(_val); \
> +    } else {\
> +        _val = cpu_to_be64(_val); \
> +    } \
> +    _val; \
> +})
> +
> +enum {
> +    DUMP_STATE_ERROR,
> +    DUMP_STATE_SETUP,
> +    DUMP_STATE_CANCELLED,
> +    DUMP_STATE_ACTIVE,
> +    DUMP_STATE_COMPLETED,
> +};
> +
> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    int phdr_num;
> +    int state;
> +    char *error;
> +    Monitor *mon;
> +    int fd;
> +    target_phys_addr_t memory_offset;
> +} DumpState;
> +
> +static DumpState *dump_get_current(void)
> +{
> +    static DumpState current_dump = {
> +        .state = DUMP_STATE_SETUP,
> +    };
> +
> +    return &current_dump;
> +}
> +
> +static int dump_cleanup(DumpState *s)
> +{
> +    int ret = 0;
> +
> +    free_memory_mapping_list(&s->list);
> +    if (s->fd != -1) {
> +        close(s->fd);
> +        s->fd = -1;
> +    }
> +
> +    return ret;
> +}
> +
> +static void dump_error(DumpState *s, const char *reason)
> +{
> +    s->state = DUMP_STATE_ERROR;
> +    s->error = g_strdup(reason);
> +    dump_cleanup(s);
> +}
> +
> +static inline int cpuid(CPUState *env)
> +{
> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
> +    return env->host_tid;
> +#else
> +    return env->cpu_index + 1;
> +#endif
> +}
> +
> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> +
> +    lseek(s->fd, 0, SEEK_SET);
> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> +
> +    lseek(s->fd, 0, SEEK_SET);
> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> +    }
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
> +
> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> +    }
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
> +
> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = cpuid(env);
> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf64_Ehdr);
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = cpuid(env);
> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf32_Ehdr);
> +    lseek(s->fd, phdr_offset, SEEK_SET);
> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_data(DumpState *s, void *buf, int length,
> +                      target_phys_addr_t *offset)
> +{
> +    int ret;
> +
> +    lseek(s->fd, *offset, SEEK_SET);
> +    ret = write(s->fd, buf, length);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to save memory.\n");
> +        return -1;
> +    }
> +
> +    *offset += length;
> +    return 0;
> +}
> +
> +/* write the memroy to vmcore. 1 page per I/O. */
> +static int write_memory(DumpState *s, RAMBlock *block,
> +                        target_phys_addr_t *offset)
> +{
> +    int i, ret;
> +
> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         block->length % TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     target_phys_addr_t memory_offset)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = memory_offset;
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (phys_addr >= block->offset &&
> +            phys_addr < block->offset + block->length) {
> +            return phys_addr - block->offset + offset;
> +        }
> +        offset += block->length;
> +    }
> +
> +    return -1;
> +}
> +
> +static DumpState *dump_init(Monitor *mon, int fd)
> +{
> +    CPUState *env;
> +    DumpState *s = dump_get_current();
> +    int ret;
> +
> +    vm_stop(RUN_STATE_PAUSED);
> +    s->state = DUMP_STATE_SETUP;
> +    s->error = NULL;
> +    s->mon = mon;
> +    s->fd = fd;
> +
> +    /*
> +     * get dump info: endian, class and architecture.
> +     * If the target architecture is not supported, cpu_get_dump_info() will
> +     * return -1.
> +     *
> +     * if we use kvm, we should synchronize the register before we get dump
> +     * info.
> +     */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        cpu_synchronize_state(env);
> +    }
> +    ret = cpu_get_dump_info(&s->dump_info);
> +    if (ret < 0) {
> +        monitor_printf(mon, "dump: unsupported target.\n");
> +        return NULL;
> +    }
> +
> +    /* get memory mapping */
> +    s->list.num = 0;
> +    QTAILQ_INIT(&s->list.head);
> +    get_memory_mapping(&s->list);
> +
> +    /* crash needs extra memory mapping to determine phys_base. */
> +    ret = cpu_add_extra_memory_mapping(&s->list);
> +    if (ret < 0) {
> +        monitor_printf(mon, "dump: failed to add extra memory mapping.\n");
> +        return NULL;
> +    }
> +
> +    /*
> +     * calculate phdr_num
> +     *
> +     * the type of phdr->num is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num > (1 << 16) - 2) {
> +        s->phdr_num = (1 << 16) - 1;
> +    } else {
> +        s->phdr_num += s->list.num;
> +    }
> +
> +    return s;
> +}
> +
> +/* write elf header, PT_NOTE and elf note to vmcore. */
> +static int dump_begin(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    int ret;
> +
> +    s->state = DUMP_STATE_ACTIVE;
> +
> +    /*
> +     * the vmcore's format is:
> +     *   --------------
> +     *   |  elf header |
> +     *   --------------
> +     *   |  PT_NOTE    |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  ......     |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  elf note   |
> +     *   --------------
> +     *   |  memory     |
> +     *   --------------
> +     *
> +     * we only know where the memory is saved after we write elf note into
> +     * vmcore.
> +     */
> +
> +    /* write elf header to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        ret = write_elf64_header(s);
> +    } else {
> +        ret = write_elf32_header(s);
> +    }
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* write elf notes to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> +        ret = write_elf64_notes(s, 0, &offset);
> +    } else {
> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> +        ret = write_elf32_notes(s, 0, &offset);
> +    }
> +
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    s->memory_offset = offset;
> +    return 0;
> +}
> +
> +/* write PT_LOAD to vmcore */
> +static int dump_completed(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    MemoryMapping *memory_mapping;
> +    int phdr_index = 1, ret;
> +
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
> +        if (s->dump_info.d_class == ELFCLASS64) {
> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> +        } else {
> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> +        }
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    s->state = DUMP_STATE_COMPLETED;
> +    dump_cleanup(s);
> +    return 0;
> +}
> +
> +/* write all memory to vmcore */
> +static int dump_iterate(DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int ret;
> +
> +    /* write all memory to vmcore */
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        ret = write_memory(s, block, &offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return dump_completed(s);
> +}
> +
> +static int create_vmcore(DumpState *s)
> +{
> +    int ret;
> +
> +    ret = dump_begin(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = dump_iterate(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data)

This is not using the QAPI. Please, take a look at the document
docs/writing-qmp-commands.txt on how to do that. You can also look at the
various examples in hmp.c/qmp.c.

I haven't reviewed your approach for the asynchronous support yet. We're
discussing right now what to do wrt commands introducing their own async
support, will review it as soon as we have a decision.

Btw, I'd like to have an ack from Jan for the general approach of this
command.

> +{
> +    const char *file = qdict_get_str(qdict, "file");
> +    const char *p;
> +    int fd = -1;
> +    DumpState *s;
> +
> +#if !defined(WIN32)
> +    if (strstart(file, "fd:", &p)) {
> +        fd = monitor_get_fd(mon, p);
> +        if (fd == -1) {
> +            monitor_printf(mon, "dump: invalid file descriptor"
> +                           " identifier\n");
> +            return -1;
> +        }
> +    }
> +#endif
> +
> +    if  (strstart(file, "file:", &p)) {
> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
> +        if (fd < 0) {
> +            monitor_printf(mon, "dump: failed to open %s\n", p);
> +            return -1;
> +        }
> +    }
> +
> +    if (fd == -1) {
> +        monitor_printf(mon, "unknown dump protocol: %s\n", file);
> +        return -1;
> +    }
> +
> +    s = dump_init(mon, fd);
> +    if (!s) {
> +        return -1;
> +    }
> +
> +    if (create_vmcore(s) < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> diff --git a/dump.h b/dump.h
> index a36468b..def6c0e 100644
> --- a/dump.h
> +++ b/dump.h
> @@ -1,10 +1,14 @@
>  #ifndef DUMP_H
>  #define DUMP_H
>  
> +#include "qdict.h"
> +
>  typedef struct ArchDumpInfo {
>      int d_machine;  /* Architecture */
>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
>      int d_class;    /* ELFCLASS32 or ELFCLASS64 */
>  } ArchDumpInfo;
>  
> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +
>  #endif
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 14838b7..98c1c35 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -828,6 +828,22 @@ new parameters (if specified) once the vm migration finished successfully.
>  ETEXI
>  
>      {
> +        .name       = "dump",
> +        .args_type  = "file:s",
> +        .params     = "file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_dump,
> +    },
> +
> +
> +STEXI
> +@item dump @var{file}
> +@findex dump
> +Dump to @var{file}.
> +ETEXI
> +
> +    {
>          .name       = "snapshot_blkdev",
>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>          .params     = "device [new-image-file] [format]",
> diff --git a/monitor.c b/monitor.c
> index 7334401..edd6aa7 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -73,6 +73,9 @@
>  #endif
>  #include "hw/lm32_pic.h"
>  
> +/* for dump */
> +#include "dump.h"
> +
>  //#define DEBUG
>  //#define DEBUG_COMPLETION
>  
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 7e3f4b9..023cade 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -572,6 +572,32 @@ Example:
>  EQMP
>  
>      {
> +        .name       = "dump",
> +        .args_type  = "file:s",
> +        .params     = "file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_dump,
> +    },
> +
> +SQMP
> +dump
> +
> +
> +Dump to file.
> +
> +Arguments:
> +
> +- "file": Destination file (json-string)
> +
> +Example:
> +
> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",
>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
Wen Congyang Jan. 11, 2012, 12:59 a.m. UTC | #2
At 01/10/2012 09:30 PM, Luiz Capitulino Wrote:
> On Wed, 04 Jan 2012 14:11:01 +0800
> Wen Congyang <wency@cn.fujitsu.com> wrote:
> 
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  Makefile.target |    8 +-
>>  dump.c          |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  dump.h          |    4 +
>>  hmp-commands.hx |   16 ++
>>  monitor.c       |    3 +
>>  qmp-commands.hx |   26 +++
>>  6 files changed, 641 insertions(+), 4 deletions(-)
>>  create mode 100644 dump.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 29562ad..f7cc2b9 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -110,7 +110,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
>> -      user-exec.o $(oslib-obj-y)
>> +      user-exec.o $(oslib-obj-y) dump.o
>>  
>>  obj-$(TARGET_HAS_BFLT) += flatload.o
>>  
>> @@ -148,7 +148,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>>  LIBS+=-lmx
>>  
>>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
>> -        gdbstub.o user-exec.o
>> +        gdbstub.o user-exec.o dump.o
>>  
>>  obj-i386-y += ioport-user.o
>>  
>> @@ -170,7 +170,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
>>  
>>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
>> -        gdbstub.o uaccess.o user-exec.o
>> +        gdbstub.o uaccess.o user-exec.o dump.o
>>  
>>  obj-i386-y += ioport-user.o
>>  
>> @@ -186,7 +186,7 @@ endif #CONFIG_BSD_USER
>>  # System emulator target
>>  ifdef CONFIG_SOFTMMU
>>  
>> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
>> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>>  # need to fix this properly
>>  obj-$(CONFIG_NO_PCI) += pci-stub.o
>> diff --git a/dump.c b/dump.c
>> new file mode 100644
>> index 0000000..ab29a4c
>> --- /dev/null
>> +++ b/dump.c
>> @@ -0,0 +1,588 @@
>> +/*
>> + * QEMU dump
>> + *
>> + * Copyright Fujitsu, Corp. 2011
>> + *
>> + * Authors:
>> + *     Wen Congyang <wency@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include <unistd.h>
>> +#include <elf.h>
>> +#include <sys/procfs.h>
>> +#include "cpu.h"
>> +#include "cpu-all.h"
>> +#include "targphys.h"
>> +#include "monitor.h"
>> +#include "kvm.h"
>> +#include "dump.h"
>> +#include "sysemu.h"
>> +#include "bswap.h"
>> +#include "memory_mapping.h"
>> +
>> +#define CPU_CONVERT_TO_TARGET16(val) \
>> +({ \
>> +    uint16_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le16(_val); \
>> +    } else {\
>> +        _val = cpu_to_be16(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET32(val) \
>> +({ \
>> +    uint32_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le32(_val); \
>> +    } else {\
>> +        _val = cpu_to_be32(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +#define CPU_CONVERT_TO_TARGET64(val) \
>> +({ \
>> +    uint64_t _val = (val); \
>> +    if (endian == ELFDATA2LSB) { \
>> +        _val = cpu_to_le64(_val); \
>> +    } else {\
>> +        _val = cpu_to_be64(_val); \
>> +    } \
>> +    _val; \
>> +})
>> +
>> +enum {
>> +    DUMP_STATE_ERROR,
>> +    DUMP_STATE_SETUP,
>> +    DUMP_STATE_CANCELLED,
>> +    DUMP_STATE_ACTIVE,
>> +    DUMP_STATE_COMPLETED,
>> +};
>> +
>> +typedef struct DumpState {
>> +    ArchDumpInfo dump_info;
>> +    MemoryMappingList list;
>> +    int phdr_num;
>> +    int state;
>> +    char *error;
>> +    Monitor *mon;
>> +    int fd;
>> +    target_phys_addr_t memory_offset;
>> +} DumpState;
>> +
>> +static DumpState *dump_get_current(void)
>> +{
>> +    static DumpState current_dump = {
>> +        .state = DUMP_STATE_SETUP,
>> +    };
>> +
>> +    return &current_dump;
>> +}
>> +
>> +static int dump_cleanup(DumpState *s)
>> +{
>> +    int ret = 0;
>> +
>> +    free_memory_mapping_list(&s->list);
>> +    if (s->fd != -1) {
>> +        close(s->fd);
>> +        s->fd = -1;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void dump_error(DumpState *s, const char *reason)
>> +{
>> +    s->state = DUMP_STATE_ERROR;
>> +    s->error = g_strdup(reason);
>> +    dump_cleanup(s);
>> +}
>> +
>> +static inline int cpuid(CPUState *env)
>> +{
>> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
>> +    return env->host_tid;
>> +#else
>> +    return env->cpu_index + 1;
>> +#endif
>> +}
>> +
>> +static int write_elf64_header(DumpState *s)
>> +{
>> +    Elf64_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_header(DumpState *s)
>> +{
>> +    Elf32_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>> +    elf_header.e_ident[EI_DATA] = endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>> +
>> +    lseek(s->fd, 0, SEEK_SET);
>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    }
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = cpuid(env);
>> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr);
>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_data(DumpState *s, void *buf, int length,
>> +                      target_phys_addr_t *offset)
>> +{
>> +    int ret;
>> +
>> +    lseek(s->fd, *offset, SEEK_SET);
>> +    ret = write(s->fd, buf, length);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to save memory.\n");
>> +        return -1;
>> +    }
>> +
>> +    *offset += length;
>> +    return 0;
>> +}
>> +
>> +/* write the memroy to vmcore. 1 page per I/O. */
>> +static int write_memory(DumpState *s, RAMBlock *block,
>> +                        target_phys_addr_t *offset)
>> +{
>> +    int i, ret;
>> +
>> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>> +                         block->length % TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* get the memory's offset in the vmcore */
>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>> +                                     target_phys_addr_t memory_offset)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = memory_offset;
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (phys_addr >= block->offset &&
>> +            phys_addr < block->offset + block->length) {
>> +            return phys_addr - block->offset + offset;
>> +        }
>> +        offset += block->length;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static DumpState *dump_init(Monitor *mon, int fd)
>> +{
>> +    CPUState *env;
>> +    DumpState *s = dump_get_current();
>> +    int ret;
>> +
>> +    vm_stop(RUN_STATE_PAUSED);
>> +    s->state = DUMP_STATE_SETUP;
>> +    s->error = NULL;
>> +    s->mon = mon;
>> +    s->fd = fd;
>> +
>> +    /*
>> +     * get dump info: endian, class and architecture.
>> +     * If the target architecture is not supported, cpu_get_dump_info() will
>> +     * return -1.
>> +     *
>> +     * if we use kvm, we should synchronize the register before we get dump
>> +     * info.
>> +     */
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        cpu_synchronize_state(env);
>> +    }
>> +    ret = cpu_get_dump_info(&s->dump_info);
>> +    if (ret < 0) {
>> +        monitor_printf(mon, "dump: unsupported target.\n");
>> +        return NULL;
>> +    }
>> +
>> +    /* get memory mapping */
>> +    s->list.num = 0;
>> +    QTAILQ_INIT(&s->list.head);
>> +    get_memory_mapping(&s->list);
>> +
>> +    /* crash needs extra memory mapping to determine phys_base. */
>> +    ret = cpu_add_extra_memory_mapping(&s->list);
>> +    if (ret < 0) {
>> +        monitor_printf(mon, "dump: failed to add extra memory mapping.\n");
>> +        return NULL;
>> +    }
>> +
>> +    /*
>> +     * calculate phdr_num
>> +     *
>> +     * the type of phdr->num is uint16_t, so we should avoid overflow
>> +     */
>> +    s->phdr_num = 1; /* PT_NOTE */
>> +    if (s->list.num > (1 << 16) - 2) {
>> +        s->phdr_num = (1 << 16) - 1;
>> +    } else {
>> +        s->phdr_num += s->list.num;
>> +    }
>> +
>> +    return s;
>> +}
>> +
>> +/* write elf header, PT_NOTE and elf note to vmcore. */
>> +static int dump_begin(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    int ret;
>> +
>> +    s->state = DUMP_STATE_ACTIVE;
>> +
>> +    /*
>> +     * the vmcore's format is:
>> +     *   --------------
>> +     *   |  elf header |
>> +     *   --------------
>> +     *   |  PT_NOTE    |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  ......     |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  elf note   |
>> +     *   --------------
>> +     *   |  memory     |
>> +     *   --------------
>> +     *
>> +     * we only know where the memory is saved after we write elf note into
>> +     * vmcore.
>> +     */
>> +
>> +    /* write elf header to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        ret = write_elf64_header(s);
>> +    } else {
>> +        ret = write_elf32_header(s);
>> +    }
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    /* write elf notes to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
>> +        ret = write_elf64_notes(s, 0, &offset);
>> +    } else {
>> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
>> +        ret = write_elf32_notes(s, 0, &offset);
>> +    }
>> +
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    s->memory_offset = offset;
>> +    return 0;
>> +}
>> +
>> +/* write PT_LOAD to vmcore */
>> +static int dump_completed(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    MemoryMapping *memory_mapping;
>> +    int phdr_index = 1, ret;
>> +
>> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
>> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
>> +        if (s->dump_info.d_class == ELFCLASS64) {
>> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
>> +        } else {
>> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
>> +        }
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    s->state = DUMP_STATE_COMPLETED;
>> +    dump_cleanup(s);
>> +    return 0;
>> +}
>> +
>> +/* write all memory to vmcore */
>> +static int dump_iterate(DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int ret;
>> +
>> +    /* write all memory to vmcore */
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        ret = write_memory(s, block, &offset);
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    return dump_completed(s);
>> +}
>> +
>> +static int create_vmcore(DumpState *s)
>> +{
>> +    int ret;
>> +
>> +    ret = dump_begin(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    ret = dump_iterate(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data)
> 
> This is not using the QAPI. Please, take a look at the document
> docs/writing-qmp-commands.txt on how to do that. You can also look at the
> various examples in hmp.c/qmp.c.

Yes, I have read it. But I need monitor to get fd, and I do not find such
examples. The command migrate also needs fd, and it is not converted to use
the QAPI. So, I do not know how to do that.

Thanks
Wen Congyang

> 
> I haven't reviewed your approach for the asynchronous support yet. We're
> discussing right now what to do wrt commands introducing their own async
> support, will review it as soon as we have a decision.
> 
> Btw, I'd like to have an ack from Jan for the general approach of this
> command.
> 
>> +{
>> +    const char *file = qdict_get_str(qdict, "file");
>> +    const char *p;
>> +    int fd = -1;
>> +    DumpState *s;
>> +
>> +#if !defined(WIN32)
>> +    if (strstart(file, "fd:", &p)) {
>> +        fd = monitor_get_fd(mon, p);
>> +        if (fd == -1) {
>> +            monitor_printf(mon, "dump: invalid file descriptor"
>> +                           " identifier\n");
>> +            return -1;
>> +        }
>> +    }
>> +#endif
>> +
>> +    if  (strstart(file, "file:", &p)) {
>> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
>> +        if (fd < 0) {
>> +            monitor_printf(mon, "dump: failed to open %s\n", p);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if (fd == -1) {
>> +        monitor_printf(mon, "unknown dump protocol: %s\n", file);
>> +        return -1;
>> +    }
>> +
>> +    s = dump_init(mon, fd);
>> +    if (!s) {
>> +        return -1;
>> +    }
>> +
>> +    if (create_vmcore(s) < 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/dump.h b/dump.h
>> index a36468b..def6c0e 100644
>> --- a/dump.h
>> +++ b/dump.h
>> @@ -1,10 +1,14 @@
>>  #ifndef DUMP_H
>>  #define DUMP_H
>>  
>> +#include "qdict.h"
>> +
>>  typedef struct ArchDumpInfo {
>>      int d_machine;  /* Architecture */
>>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
>>      int d_class;    /* ELFCLASS32 or ELFCLASS64 */
>>  } ArchDumpInfo;
>>  
>> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data);
>> +
>>  #endif
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index 14838b7..98c1c35 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -828,6 +828,22 @@ new parameters (if specified) once the vm migration finished successfully.
>>  ETEXI
>>  
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd_new = do_dump,
>> +    },
>> +
>> +
>> +STEXI
>> +@item dump @var{file}
>> +@findex dump
>> +Dump to @var{file}.
>> +ETEXI
>> +
>> +    {
>>          .name       = "snapshot_blkdev",
>>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>>          .params     = "device [new-image-file] [format]",
>> diff --git a/monitor.c b/monitor.c
>> index 7334401..edd6aa7 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -73,6 +73,9 @@
>>  #endif
>>  #include "hw/lm32_pic.h"
>>  
>> +/* for dump */
>> +#include "dump.h"
>> +
>>  //#define DEBUG
>>  //#define DEBUG_COMPLETION
>>  
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index 7e3f4b9..023cade 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -572,6 +572,32 @@ Example:
>>  EQMP
>>  
>>      {
>> +        .name       = "dump",
>> +        .args_type  = "file:s",
>> +        .params     = "file",
>> +        .help       = "dump to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd_new = do_dump,
>> +    },
>> +
>> +SQMP
>> +dump
>> +
>> +
>> +Dump to file.
>> +
>> +Arguments:
>> +
>> +- "file": Destination file (json-string)
>> +
>> +Example:
>> +
>> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
>> +<- { "return": {} }
>> +
>> +EQMP
>> +
>> +    {
>>          .name       = "netdev_add",
>>          .args_type  = "netdev:O",
>>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
> 
>
Luiz Capitulino Jan. 12, 2012, 1:49 p.m. UTC | #3
On Wed, 11 Jan 2012 08:59:24 +0800
Wen Congyang <wency@cn.fujitsu.com> wrote:

> At 01/10/2012 09:30 PM, Luiz Capitulino Wrote:
> > On Wed, 04 Jan 2012 14:11:01 +0800
> > Wen Congyang <wency@cn.fujitsu.com> wrote:
> > 
> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >> ---
> >>  Makefile.target |    8 +-
> >>  dump.c          |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  dump.h          |    4 +
> >>  hmp-commands.hx |   16 ++
> >>  monitor.c       |    3 +
> >>  qmp-commands.hx |   26 +++
> >>  6 files changed, 641 insertions(+), 4 deletions(-)
> >>  create mode 100644 dump.c
> >>
> >> diff --git a/Makefile.target b/Makefile.target
> >> index 29562ad..f7cc2b9 100644
> >> --- a/Makefile.target
> >> +++ b/Makefile.target
> >> @@ -110,7 +110,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
> >>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
> >>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
> >>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
> >> -      user-exec.o $(oslib-obj-y)
> >> +      user-exec.o $(oslib-obj-y) dump.o
> >>  
> >>  obj-$(TARGET_HAS_BFLT) += flatload.o
> >>  
> >> @@ -148,7 +148,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
> >>  LIBS+=-lmx
> >>  
> >>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
> >> -        gdbstub.o user-exec.o
> >> +        gdbstub.o user-exec.o dump.o
> >>  
> >>  obj-i386-y += ioport-user.o
> >>  
> >> @@ -170,7 +170,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
> >>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
> >>  
> >>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
> >> -        gdbstub.o uaccess.o user-exec.o
> >> +        gdbstub.o uaccess.o user-exec.o dump.o
> >>  
> >>  obj-i386-y += ioport-user.o
> >>  
> >> @@ -186,7 +186,7 @@ endif #CONFIG_BSD_USER
> >>  # System emulator target
> >>  ifdef CONFIG_SOFTMMU
> >>  
> >> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
> >> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
> >>  # virtio has to be here due to weird dependency between PCI and virtio-net.
> >>  # need to fix this properly
> >>  obj-$(CONFIG_NO_PCI) += pci-stub.o
> >> diff --git a/dump.c b/dump.c
> >> new file mode 100644
> >> index 0000000..ab29a4c
> >> --- /dev/null
> >> +++ b/dump.c
> >> @@ -0,0 +1,588 @@
> >> +/*
> >> + * QEMU dump
> >> + *
> >> + * Copyright Fujitsu, Corp. 2011
> >> + *
> >> + * Authors:
> >> + *     Wen Congyang <wency@cn.fujitsu.com>
> >> + *
> >> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> >> + * the COPYING file in the top-level directory.
> >> + *
> >> + */
> >> +
> >> +#include "qemu-common.h"
> >> +#include <unistd.h>
> >> +#include <elf.h>
> >> +#include <sys/procfs.h>
> >> +#include "cpu.h"
> >> +#include "cpu-all.h"
> >> +#include "targphys.h"
> >> +#include "monitor.h"
> >> +#include "kvm.h"
> >> +#include "dump.h"
> >> +#include "sysemu.h"
> >> +#include "bswap.h"
> >> +#include "memory_mapping.h"
> >> +
> >> +#define CPU_CONVERT_TO_TARGET16(val) \
> >> +({ \
> >> +    uint16_t _val = (val); \
> >> +    if (endian == ELFDATA2LSB) { \
> >> +        _val = cpu_to_le16(_val); \
> >> +    } else {\
> >> +        _val = cpu_to_be16(_val); \
> >> +    } \
> >> +    _val; \
> >> +})
> >> +
> >> +#define CPU_CONVERT_TO_TARGET32(val) \
> >> +({ \
> >> +    uint32_t _val = (val); \
> >> +    if (endian == ELFDATA2LSB) { \
> >> +        _val = cpu_to_le32(_val); \
> >> +    } else {\
> >> +        _val = cpu_to_be32(_val); \
> >> +    } \
> >> +    _val; \
> >> +})
> >> +
> >> +#define CPU_CONVERT_TO_TARGET64(val) \
> >> +({ \
> >> +    uint64_t _val = (val); \
> >> +    if (endian == ELFDATA2LSB) { \
> >> +        _val = cpu_to_le64(_val); \
> >> +    } else {\
> >> +        _val = cpu_to_be64(_val); \
> >> +    } \
> >> +    _val; \
> >> +})
> >> +
> >> +enum {
> >> +    DUMP_STATE_ERROR,
> >> +    DUMP_STATE_SETUP,
> >> +    DUMP_STATE_CANCELLED,
> >> +    DUMP_STATE_ACTIVE,
> >> +    DUMP_STATE_COMPLETED,
> >> +};
> >> +
> >> +typedef struct DumpState {
> >> +    ArchDumpInfo dump_info;
> >> +    MemoryMappingList list;
> >> +    int phdr_num;
> >> +    int state;
> >> +    char *error;
> >> +    Monitor *mon;
> >> +    int fd;
> >> +    target_phys_addr_t memory_offset;
> >> +} DumpState;
> >> +
> >> +static DumpState *dump_get_current(void)
> >> +{
> >> +    static DumpState current_dump = {
> >> +        .state = DUMP_STATE_SETUP,
> >> +    };
> >> +
> >> +    return &current_dump;
> >> +}
> >> +
> >> +static int dump_cleanup(DumpState *s)
> >> +{
> >> +    int ret = 0;
> >> +
> >> +    free_memory_mapping_list(&s->list);
> >> +    if (s->fd != -1) {
> >> +        close(s->fd);
> >> +        s->fd = -1;
> >> +    }
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static void dump_error(DumpState *s, const char *reason)
> >> +{
> >> +    s->state = DUMP_STATE_ERROR;
> >> +    s->error = g_strdup(reason);
> >> +    dump_cleanup(s);
> >> +}
> >> +
> >> +static inline int cpuid(CPUState *env)
> >> +{
> >> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
> >> +    return env->host_tid;
> >> +#else
> >> +    return env->cpu_index + 1;
> >> +#endif
> >> +}
> >> +
> >> +static int write_elf64_header(DumpState *s)
> >> +{
> >> +    Elf64_Ehdr elf_header;
> >> +    int ret;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> >> +    memcpy(&elf_header, ELFMAG, 4);
> >> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> >> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> >> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> >> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> >> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> >> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> >> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> >> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
> >> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
> >> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> >> +
> >> +    lseek(s->fd, 0, SEEK_SET);
> >> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write elf header.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_elf32_header(DumpState *s)
> >> +{
> >> +    Elf32_Ehdr elf_header;
> >> +    int ret;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> >> +    memcpy(&elf_header, ELFMAG, 4);
> >> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> >> +    elf_header.e_ident[EI_DATA] = endian;
> >> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> >> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
> >> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
> >> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
> >> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
> >> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
> >> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
> >> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
> >> +
> >> +    lseek(s->fd, 0, SEEK_SET);
> >> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write elf header.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> >> +                            int phdr_index, target_phys_addr_t offset)
> >> +{
> >> +    Elf64_Phdr phdr;
> >> +    off_t phdr_offset;
> >> +    int ret;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> >> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> >> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
> >> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
> >> +    if (offset == -1) {
> >> +        phdr.p_filesz = 0;
> >> +    } else {
> >> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> >> +    }
> >> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
> >> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
> >> +
> >> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> >> +    lseek(s->fd, phdr_offset, SEEK_SET);
> >> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write program header table.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> >> +                            int phdr_index, target_phys_addr_t offset)
> >> +{
> >> +    Elf32_Phdr phdr;
> >> +    off_t phdr_offset;
> >> +    int ret;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> >> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
> >> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
> >> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
> >> +    if (offset == -1) {
> >> +        phdr.p_filesz = 0;
> >> +    } else {
> >> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> >> +    }
> >> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
> >> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
> >> +
> >> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> >> +    lseek(s->fd, phdr_offset, SEEK_SET);
> >> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write program header table.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_elf64_notes(DumpState *s, int phdr_index,
> >> +                             target_phys_addr_t *offset)
> >> +{
> >> +    CPUState *env;
> >> +    int ret;
> >> +    target_phys_addr_t begin = *offset;
> >> +    Elf64_Phdr phdr;
> >> +    off_t phdr_offset;
> >> +    int id;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> >> +        id = cpuid(env);
> >> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
> >> +        if (ret < 0) {
> >> +            dump_error(s, "dump: failed to write elf notes.\n");
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> >> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> >> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
> >> +    phdr.p_paddr = 0;
> >> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> >> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
> >> +    phdr.p_vaddr = 0;
> >> +
> >> +    phdr_offset = sizeof(Elf64_Ehdr);
> >> +    lseek(s->fd, phdr_offset, SEEK_SET);
> >> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write program header table.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_elf32_notes(DumpState *s, int phdr_index,
> >> +                             target_phys_addr_t *offset)
> >> +{
> >> +    CPUState *env;
> >> +    int ret;
> >> +    target_phys_addr_t begin = *offset;
> >> +    Elf32_Phdr phdr;
> >> +    off_t phdr_offset;
> >> +    int id;
> >> +    int endian = s->dump_info.d_endian;
> >> +
> >> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> >> +        id = cpuid(env);
> >> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
> >> +        if (ret < 0) {
> >> +            dump_error(s, "dump: failed to write elf notes.\n");
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> >> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
> >> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
> >> +    phdr.p_paddr = 0;
> >> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> >> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
> >> +    phdr.p_vaddr = 0;
> >> +
> >> +    phdr_offset = sizeof(Elf32_Ehdr);
> >> +    lseek(s->fd, phdr_offset, SEEK_SET);
> >> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to write program header table.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int write_data(DumpState *s, void *buf, int length,
> >> +                      target_phys_addr_t *offset)
> >> +{
> >> +    int ret;
> >> +
> >> +    lseek(s->fd, *offset, SEEK_SET);
> >> +    ret = write(s->fd, buf, length);
> >> +    if (ret < 0) {
> >> +        dump_error(s, "dump: failed to save memory.\n");
> >> +        return -1;
> >> +    }
> >> +
> >> +    *offset += length;
> >> +    return 0;
> >> +}
> >> +
> >> +/* write the memroy to vmcore. 1 page per I/O. */
> >> +static int write_memory(DumpState *s, RAMBlock *block,
> >> +                        target_phys_addr_t *offset)
> >> +{
> >> +    int i, ret;
> >> +
> >> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
> >> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> >> +                         TARGET_PAGE_SIZE, offset);
> >> +        if (ret < 0) {
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
> >> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> >> +                         block->length % TARGET_PAGE_SIZE, offset);
> >> +        if (ret < 0) {
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +/* get the memory's offset in the vmcore */
> >> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> >> +                                     target_phys_addr_t memory_offset)
> >> +{
> >> +    RAMBlock *block;
> >> +    target_phys_addr_t offset = memory_offset;
> >> +
> >> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> >> +        if (phys_addr >= block->offset &&
> >> +            phys_addr < block->offset + block->length) {
> >> +            return phys_addr - block->offset + offset;
> >> +        }
> >> +        offset += block->length;
> >> +    }
> >> +
> >> +    return -1;
> >> +}
> >> +
> >> +static DumpState *dump_init(Monitor *mon, int fd)
> >> +{
> >> +    CPUState *env;
> >> +    DumpState *s = dump_get_current();
> >> +    int ret;
> >> +
> >> +    vm_stop(RUN_STATE_PAUSED);
> >> +    s->state = DUMP_STATE_SETUP;
> >> +    s->error = NULL;
> >> +    s->mon = mon;
> >> +    s->fd = fd;
> >> +
> >> +    /*
> >> +     * get dump info: endian, class and architecture.
> >> +     * If the target architecture is not supported, cpu_get_dump_info() will
> >> +     * return -1.
> >> +     *
> >> +     * if we use kvm, we should synchronize the register before we get dump
> >> +     * info.
> >> +     */
> >> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> >> +        cpu_synchronize_state(env);
> >> +    }
> >> +    ret = cpu_get_dump_info(&s->dump_info);
> >> +    if (ret < 0) {
> >> +        monitor_printf(mon, "dump: unsupported target.\n");
> >> +        return NULL;
> >> +    }
> >> +
> >> +    /* get memory mapping */
> >> +    s->list.num = 0;
> >> +    QTAILQ_INIT(&s->list.head);
> >> +    get_memory_mapping(&s->list);
> >> +
> >> +    /* crash needs extra memory mapping to determine phys_base. */
> >> +    ret = cpu_add_extra_memory_mapping(&s->list);
> >> +    if (ret < 0) {
> >> +        monitor_printf(mon, "dump: failed to add extra memory mapping.\n");
> >> +        return NULL;
> >> +    }
> >> +
> >> +    /*
> >> +     * calculate phdr_num
> >> +     *
> >> +     * the type of phdr->num is uint16_t, so we should avoid overflow
> >> +     */
> >> +    s->phdr_num = 1; /* PT_NOTE */
> >> +    if (s->list.num > (1 << 16) - 2) {
> >> +        s->phdr_num = (1 << 16) - 1;
> >> +    } else {
> >> +        s->phdr_num += s->list.num;
> >> +    }
> >> +
> >> +    return s;
> >> +}
> >> +
> >> +/* write elf header, PT_NOTE and elf note to vmcore. */
> >> +static int dump_begin(DumpState *s)
> >> +{
> >> +    target_phys_addr_t offset;
> >> +    int ret;
> >> +
> >> +    s->state = DUMP_STATE_ACTIVE;
> >> +
> >> +    /*
> >> +     * the vmcore's format is:
> >> +     *   --------------
> >> +     *   |  elf header |
> >> +     *   --------------
> >> +     *   |  PT_NOTE    |
> >> +     *   --------------
> >> +     *   |  PT_LOAD    |
> >> +     *   --------------
> >> +     *   |  ......     |
> >> +     *   --------------
> >> +     *   |  PT_LOAD    |
> >> +     *   --------------
> >> +     *   |  elf note   |
> >> +     *   --------------
> >> +     *   |  memory     |
> >> +     *   --------------
> >> +     *
> >> +     * we only know where the memory is saved after we write elf note into
> >> +     * vmcore.
> >> +     */
> >> +
> >> +    /* write elf header to vmcore */
> >> +    if (s->dump_info.d_class == ELFCLASS64) {
> >> +        ret = write_elf64_header(s);
> >> +    } else {
> >> +        ret = write_elf32_header(s);
> >> +    }
> >> +    if (ret < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    /* write elf notes to vmcore */
> >> +    if (s->dump_info.d_class == ELFCLASS64) {
> >> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> >> +        ret = write_elf64_notes(s, 0, &offset);
> >> +    } else {
> >> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> >> +        ret = write_elf32_notes(s, 0, &offset);
> >> +    }
> >> +
> >> +    if (ret < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    s->memory_offset = offset;
> >> +    return 0;
> >> +}
> >> +
> >> +/* write PT_LOAD to vmcore */
> >> +static int dump_completed(DumpState *s)
> >> +{
> >> +    target_phys_addr_t offset;
> >> +    MemoryMapping *memory_mapping;
> >> +    int phdr_index = 1, ret;
> >> +
> >> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> >> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
> >> +        if (s->dump_info.d_class == ELFCLASS64) {
> >> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> >> +        } else {
> >> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> >> +        }
> >> +        if (ret < 0) {
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    s->state = DUMP_STATE_COMPLETED;
> >> +    dump_cleanup(s);
> >> +    return 0;
> >> +}
> >> +
> >> +/* write all memory to vmcore */
> >> +static int dump_iterate(DumpState *s)
> >> +{
> >> +    RAMBlock *block;
> >> +    target_phys_addr_t offset = s->memory_offset;
> >> +    int ret;
> >> +
> >> +    /* write all memory to vmcore */
> >> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> >> +        ret = write_memory(s, block, &offset);
> >> +        if (ret < 0) {
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    return dump_completed(s);
> >> +}
> >> +
> >> +static int create_vmcore(DumpState *s)
> >> +{
> >> +    int ret;
> >> +
> >> +    ret = dump_begin(s);
> >> +    if (ret < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    ret = dump_iterate(s);
> >> +    if (ret < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data)
> > 
> > This is not using the QAPI. Please, take a look at the document
> > docs/writing-qmp-commands.txt on how to do that. You can also look at the
> > various examples in hmp.c/qmp.c.
> 
> Yes, I have read it. But I need monitor to get fd, and I do not find such
> examples. The command migrate also needs fd, and it is not converted to use
> the QAPI. So, I do not know how to do that.

I have a first try on converting the migrate command to the QAPI in
this branch:

 http://repo.or.cz/w/qemu/qmp-unstable.git/shortlog/refs/heads/qmp-wip/qapi-commands-conv/set-complex/v1

I guess all you need is the qemu_get_fd() function.

> 
> Thanks
> Wen Congyang
> 
> > 
> > I haven't reviewed your approach for the asynchronous support yet. We're
> > discussing right now what to do wrt commands introducing their own async
> > support, will review it as soon as we have a decision.
> > 
> > Btw, I'd like to have an ack from Jan for the general approach of this
> > command.
> > 
> >> +{
> >> +    const char *file = qdict_get_str(qdict, "file");
> >> +    const char *p;
> >> +    int fd = -1;
> >> +    DumpState *s;
> >> +
> >> +#if !defined(WIN32)
> >> +    if (strstart(file, "fd:", &p)) {
> >> +        fd = monitor_get_fd(mon, p);
> >> +        if (fd == -1) {
> >> +            monitor_printf(mon, "dump: invalid file descriptor"
> >> +                           " identifier\n");
> >> +            return -1;
> >> +        }
> >> +    }
> >> +#endif
> >> +
> >> +    if  (strstart(file, "file:", &p)) {
> >> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
> >> +        if (fd < 0) {
> >> +            monitor_printf(mon, "dump: failed to open %s\n", p);
> >> +            return -1;
> >> +        }
> >> +    }
> >> +
> >> +    if (fd == -1) {
> >> +        monitor_printf(mon, "unknown dump protocol: %s\n", file);
> >> +        return -1;
> >> +    }
> >> +
> >> +    s = dump_init(mon, fd);
> >> +    if (!s) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (create_vmcore(s) < 0) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> diff --git a/dump.h b/dump.h
> >> index a36468b..def6c0e 100644
> >> --- a/dump.h
> >> +++ b/dump.h
> >> @@ -1,10 +1,14 @@
> >>  #ifndef DUMP_H
> >>  #define DUMP_H
> >>  
> >> +#include "qdict.h"
> >> +
> >>  typedef struct ArchDumpInfo {
> >>      int d_machine;  /* Architecture */
> >>      int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
> >>      int d_class;    /* ELFCLASS32 or ELFCLASS64 */
> >>  } ArchDumpInfo;
> >>  
> >> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data);
> >> +
> >>  #endif
> >> diff --git a/hmp-commands.hx b/hmp-commands.hx
> >> index 14838b7..98c1c35 100644
> >> --- a/hmp-commands.hx
> >> +++ b/hmp-commands.hx
> >> @@ -828,6 +828,22 @@ new parameters (if specified) once the vm migration finished successfully.
> >>  ETEXI
> >>  
> >>      {
> >> +        .name       = "dump",
> >> +        .args_type  = "file:s",
> >> +        .params     = "file",
> >> +        .help       = "dump to file",
> >> +        .user_print = monitor_user_noop,
> >> +        .mhandler.cmd_new = do_dump,
> >> +    },
> >> +
> >> +
> >> +STEXI
> >> +@item dump @var{file}
> >> +@findex dump
> >> +Dump to @var{file}.
> >> +ETEXI
> >> +
> >> +    {
> >>          .name       = "snapshot_blkdev",
> >>          .args_type  = "device:B,snapshot-file:s?,format:s?",
> >>          .params     = "device [new-image-file] [format]",
> >> diff --git a/monitor.c b/monitor.c
> >> index 7334401..edd6aa7 100644
> >> --- a/monitor.c
> >> +++ b/monitor.c
> >> @@ -73,6 +73,9 @@
> >>  #endif
> >>  #include "hw/lm32_pic.h"
> >>  
> >> +/* for dump */
> >> +#include "dump.h"
> >> +
> >>  //#define DEBUG
> >>  //#define DEBUG_COMPLETION
> >>  
> >> diff --git a/qmp-commands.hx b/qmp-commands.hx
> >> index 7e3f4b9..023cade 100644
> >> --- a/qmp-commands.hx
> >> +++ b/qmp-commands.hx
> >> @@ -572,6 +572,32 @@ Example:
> >>  EQMP
> >>  
> >>      {
> >> +        .name       = "dump",
> >> +        .args_type  = "file:s",
> >> +        .params     = "file",
> >> +        .help       = "dump to file",
> >> +        .user_print = monitor_user_noop,
> >> +        .mhandler.cmd_new = do_dump,
> >> +    },
> >> +
> >> +SQMP
> >> +dump
> >> +
> >> +
> >> +Dump to file.
> >> +
> >> +Arguments:
> >> +
> >> +- "file": Destination file (json-string)
> >> +
> >> +Example:
> >> +
> >> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
> >> +<- { "return": {} }
> >> +
> >> +EQMP
> >> +
> >> +    {
> >>          .name       = "netdev_add",
> >>          .args_type  = "netdev:O",
> >>          .params     = "[user|tap|socket],id=str[,prop=value][,...]",
> > 
> > 
>
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index 29562ad..f7cc2b9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -110,7 +110,7 @@  $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
 QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
 obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
       elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
-      user-exec.o $(oslib-obj-y)
+      user-exec.o $(oslib-obj-y) dump.o
 
 obj-$(TARGET_HAS_BFLT) += flatload.o
 
@@ -148,7 +148,7 @@  LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
 LIBS+=-lmx
 
 obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
-        gdbstub.o user-exec.o
+        gdbstub.o user-exec.o dump.o
 
 obj-i386-y += ioport-user.o
 
@@ -170,7 +170,7 @@  $(call set-vpath, $(SRC_PATH)/bsd-user)
 QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
 
 obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
-        gdbstub.o uaccess.o user-exec.o
+        gdbstub.o uaccess.o user-exec.o dump.o
 
 obj-i386-y += ioport-user.o
 
@@ -186,7 +186,7 @@  endif #CONFIG_BSD_USER
 # System emulator target
 ifdef CONFIG_SOFTMMU
 
-obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
+obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-$(CONFIG_NO_PCI) += pci-stub.o
diff --git a/dump.c b/dump.c
new file mode 100644
index 0000000..ab29a4c
--- /dev/null
+++ b/dump.c
@@ -0,0 +1,588 @@ 
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include <unistd.h>
+#include <elf.h>
+#include <sys/procfs.h>
+#include "cpu.h"
+#include "cpu-all.h"
+#include "targphys.h"
+#include "monitor.h"
+#include "kvm.h"
+#include "dump.h"
+#include "sysemu.h"
+#include "bswap.h"
+#include "memory_mapping.h"
+
+#define CPU_CONVERT_TO_TARGET16(val) \
+({ \
+    uint16_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le16(_val); \
+    } else {\
+        _val = cpu_to_be16(_val); \
+    } \
+    _val; \
+})
+
+#define CPU_CONVERT_TO_TARGET32(val) \
+({ \
+    uint32_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le32(_val); \
+    } else {\
+        _val = cpu_to_be32(_val); \
+    } \
+    _val; \
+})
+
+#define CPU_CONVERT_TO_TARGET64(val) \
+({ \
+    uint64_t _val = (val); \
+    if (endian == ELFDATA2LSB) { \
+        _val = cpu_to_le64(_val); \
+    } else {\
+        _val = cpu_to_be64(_val); \
+    } \
+    _val; \
+})
+
+enum {
+    DUMP_STATE_ERROR,
+    DUMP_STATE_SETUP,
+    DUMP_STATE_CANCELLED,
+    DUMP_STATE_ACTIVE,
+    DUMP_STATE_COMPLETED,
+};
+
+typedef struct DumpState {
+    ArchDumpInfo dump_info;
+    MemoryMappingList list;
+    int phdr_num;
+    int state;
+    char *error;
+    Monitor *mon;
+    int fd;
+    target_phys_addr_t memory_offset;
+} DumpState;
+
+static DumpState *dump_get_current(void)
+{
+    static DumpState current_dump = {
+        .state = DUMP_STATE_SETUP,
+    };
+
+    return &current_dump;
+}
+
+static int dump_cleanup(DumpState *s)
+{
+    int ret = 0;
+
+    free_memory_mapping_list(&s->list);
+    if (s->fd != -1) {
+        close(s->fd);
+        s->fd = -1;
+    }
+
+    return ret;
+}
+
+static void dump_error(DumpState *s, const char *reason)
+{
+    s->state = DUMP_STATE_ERROR;
+    s->error = g_strdup(reason);
+    dump_cleanup(s);
+}
+
+static inline int cpuid(CPUState *env)
+{
+#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
+    return env->host_tid;
+#else
+    return env->cpu_index + 1;
+#endif
+}
+
+static int write_elf64_header(DumpState *s)
+{
+    Elf64_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
+    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
+    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
+    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
+    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
+    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
+    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
+    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
+
+    lseek(s->fd, 0, SEEK_SET);
+    ret = write(s->fd, &elf_header, sizeof(elf_header));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_header(DumpState *s)
+{
+    Elf32_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
+    elf_header.e_ident[EI_DATA] = endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
+    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
+    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
+    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
+    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
+    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
+    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
+
+    lseek(s->fd, 0, SEEK_SET);
+    ret = write(s->fd, &elf_header, sizeof(elf_header));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
+    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
+    }
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
+    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
+
+    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
+    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
+    }
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
+    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
+
+    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = cpuid(env);
+        ret = cpu_write_elf64_note(s->fd, env, id, offset);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf64_Ehdr);
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = cpuid(env);
+        ret = cpu_write_elf32_note(s->fd, env, id, offset);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
+    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
+    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf32_Ehdr);
+    lseek(s->fd, phdr_offset, SEEK_SET);
+    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_data(DumpState *s, void *buf, int length,
+                      target_phys_addr_t *offset)
+{
+    int ret;
+
+    lseek(s->fd, *offset, SEEK_SET);
+    ret = write(s->fd, buf, length);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to save memory.\n");
+        return -1;
+    }
+
+    *offset += length;
+    return 0;
+}
+
+/* write the memroy to vmcore. 1 page per I/O. */
+static int write_memory(DumpState *s, RAMBlock *block,
+                        target_phys_addr_t *offset)
+{
+    int i, ret;
+
+    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
+        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
+                         TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    if ((block->length % TARGET_PAGE_SIZE) != 0) {
+        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
+                         block->length % TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/* get the memory's offset in the vmcore */
+static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
+                                     target_phys_addr_t memory_offset)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = memory_offset;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (phys_addr >= block->offset &&
+            phys_addr < block->offset + block->length) {
+            return phys_addr - block->offset + offset;
+        }
+        offset += block->length;
+    }
+
+    return -1;
+}
+
+static DumpState *dump_init(Monitor *mon, int fd)
+{
+    CPUState *env;
+    DumpState *s = dump_get_current();
+    int ret;
+
+    vm_stop(RUN_STATE_PAUSED);
+    s->state = DUMP_STATE_SETUP;
+    s->error = NULL;
+    s->mon = mon;
+    s->fd = fd;
+
+    /*
+     * get dump info: endian, class and architecture.
+     * If the target architecture is not supported, cpu_get_dump_info() will
+     * return -1.
+     *
+     * if we use kvm, we should synchronize the register before we get dump
+     * info.
+     */
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu_synchronize_state(env);
+    }
+    ret = cpu_get_dump_info(&s->dump_info);
+    if (ret < 0) {
+        monitor_printf(mon, "dump: unsupported target.\n");
+        return NULL;
+    }
+
+    /* get memory mapping */
+    s->list.num = 0;
+    QTAILQ_INIT(&s->list.head);
+    get_memory_mapping(&s->list);
+
+    /* crash needs extra memory mapping to determine phys_base. */
+    ret = cpu_add_extra_memory_mapping(&s->list);
+    if (ret < 0) {
+        monitor_printf(mon, "dump: failed to add extra memory mapping.\n");
+        return NULL;
+    }
+
+    /*
+     * calculate phdr_num
+     *
+     * the type of phdr->num is uint16_t, so we should avoid overflow
+     */
+    s->phdr_num = 1; /* PT_NOTE */
+    if (s->list.num > (1 << 16) - 2) {
+        s->phdr_num = (1 << 16) - 1;
+    } else {
+        s->phdr_num += s->list.num;
+    }
+
+    return s;
+}
+
+/* write elf header, PT_NOTE and elf note to vmcore. */
+static int dump_begin(DumpState *s)
+{
+    target_phys_addr_t offset;
+    int ret;
+
+    s->state = DUMP_STATE_ACTIVE;
+
+    /*
+     * the vmcore's format is:
+     *   --------------
+     *   |  elf header |
+     *   --------------
+     *   |  PT_NOTE    |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  ......     |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  elf note   |
+     *   --------------
+     *   |  memory     |
+     *   --------------
+     *
+     * we only know where the memory is saved after we write elf note into
+     * vmcore.
+     */
+
+    /* write elf header to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        ret = write_elf64_header(s);
+    } else {
+        ret = write_elf32_header(s);
+    }
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* write elf notes to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
+        ret = write_elf64_notes(s, 0, &offset);
+    } else {
+        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
+        ret = write_elf32_notes(s, 0, &offset);
+    }
+
+    if (ret < 0) {
+        return -1;
+    }
+
+    s->memory_offset = offset;
+    return 0;
+}
+
+/* write PT_LOAD to vmcore */
+static int dump_completed(DumpState *s)
+{
+    target_phys_addr_t offset;
+    MemoryMapping *memory_mapping;
+    int phdr_index = 1, ret;
+
+    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
+        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
+        if (s->dump_info.d_class == ELFCLASS64) {
+            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
+        } else {
+            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
+        }
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    s->state = DUMP_STATE_COMPLETED;
+    dump_cleanup(s);
+    return 0;
+}
+
+/* write all memory to vmcore */
+static int dump_iterate(DumpState *s)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = s->memory_offset;
+    int ret;
+
+    /* write all memory to vmcore */
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        ret = write_memory(s, block, &offset);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    return dump_completed(s);
+}
+
+static int create_vmcore(DumpState *s)
+{
+    int ret;
+
+    ret = dump_begin(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    ret = dump_iterate(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data)
+{
+    const char *file = qdict_get_str(qdict, "file");
+    const char *p;
+    int fd = -1;
+    DumpState *s;
+
+#if !defined(WIN32)
+    if (strstart(file, "fd:", &p)) {
+        fd = monitor_get_fd(mon, p);
+        if (fd == -1) {
+            monitor_printf(mon, "dump: invalid file descriptor"
+                           " identifier\n");
+            return -1;
+        }
+    }
+#endif
+
+    if  (strstart(file, "file:", &p)) {
+        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
+        if (fd < 0) {
+            monitor_printf(mon, "dump: failed to open %s\n", p);
+            return -1;
+        }
+    }
+
+    if (fd == -1) {
+        monitor_printf(mon, "unknown dump protocol: %s\n", file);
+        return -1;
+    }
+
+    s = dump_init(mon, fd);
+    if (!s) {
+        return -1;
+    }
+
+    if (create_vmcore(s) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/dump.h b/dump.h
index a36468b..def6c0e 100644
--- a/dump.h
+++ b/dump.h
@@ -1,10 +1,14 @@ 
 #ifndef DUMP_H
 #define DUMP_H
 
+#include "qdict.h"
+
 typedef struct ArchDumpInfo {
     int d_machine;  /* Architecture */
     int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
     int d_class;    /* ELFCLASS32 or ELFCLASS64 */
 } ArchDumpInfo;
 
+int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data);
+
 #endif
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 14838b7..98c1c35 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -828,6 +828,22 @@  new parameters (if specified) once the vm migration finished successfully.
 ETEXI
 
     {
+        .name       = "dump",
+        .args_type  = "file:s",
+        .params     = "file",
+        .help       = "dump to file",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_dump,
+    },
+
+
+STEXI
+@item dump @var{file}
+@findex dump
+Dump to @var{file}.
+ETEXI
+
+    {
         .name       = "snapshot_blkdev",
         .args_type  = "device:B,snapshot-file:s?,format:s?",
         .params     = "device [new-image-file] [format]",
diff --git a/monitor.c b/monitor.c
index 7334401..edd6aa7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -73,6 +73,9 @@ 
 #endif
 #include "hw/lm32_pic.h"
 
+/* for dump */
+#include "dump.h"
+
 //#define DEBUG
 //#define DEBUG_COMPLETION
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 7e3f4b9..023cade 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -572,6 +572,32 @@  Example:
 EQMP
 
     {
+        .name       = "dump",
+        .args_type  = "file:s",
+        .params     = "file",
+        .help       = "dump to file",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_dump,
+    },
+
+SQMP
+dump
+
+
+Dump to file.
+
+Arguments:
+
+- "file": Destination file (json-string)
+
+Example:
+
+-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "netdev_add",
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket],id=str[,prop=value][,...]",