Patchwork [RFC,08/14,v4] introduce a new monitor command 'dump' to dump guest's memory

login
register
mail settings
Submitter Wen Congyang
Date Jan. 13, 2012, 8:35 a.m.
Message ID <4F0FECBE.8080905@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/135736/
State New
Headers show

Comments

Wen Congyang - Jan. 13, 2012, 8:35 a.m.
At 01/12/2012 09:49 PM, Luiz Capitulino Wrote:
> On Wed, 11 Jan 2012 08:59:24 +0800
> Wen Congyang <wency@cn.fujitsu.com> wrote:
> 
>> At 01/10/2012 09:30 PM, Luiz Capitulino Wrote:
>>> On Wed, 04 Jan 2012 14:11:01 +0800
>>> Wen Congyang <wency@cn.fujitsu.com> wrote:
>>>
>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>> ---
>>>>  Makefile.target |    8 +-
>>>>  dump.c          |  588 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  dump.h          |    4 +
>>>>  hmp-commands.hx |   16 ++
>>>>  monitor.c       |    3 +
>>>>  qmp-commands.hx |   26 +++
>>>>  6 files changed, 641 insertions(+), 4 deletions(-)
>>>>  create mode 100644 dump.c
>>>>
>>>> diff --git a/Makefile.target b/Makefile.target
>>>> index 29562ad..f7cc2b9 100644
>>>> --- a/Makefile.target
>>>> +++ b/Makefile.target
>>>> @@ -110,7 +110,7 @@ $(call set-vpath, $(SRC_PATH)/linux-user:$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR
>>>>  QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
>>>>  obj-y = main.o syscall.o strace.o mmap.o signal.o thunk.o \
>>>>        elfload.o linuxload.o uaccess.o gdbstub.o cpu-uname.o \
>>>> -      user-exec.o $(oslib-obj-y)
>>>> +      user-exec.o $(oslib-obj-y) dump.o
>>>>  
>>>>  obj-$(TARGET_HAS_BFLT) += flatload.o
>>>>  
>>>> @@ -148,7 +148,7 @@ LDFLAGS+=-Wl,-segaddr,__STD_PROG_ZONE,0x1000 -image_base 0x0e000000
>>>>  LIBS+=-lmx
>>>>  
>>>>  obj-y = main.o commpage.o machload.o mmap.o signal.o syscall.o thunk.o \
>>>> -        gdbstub.o user-exec.o
>>>> +        gdbstub.o user-exec.o dump.o
>>>>  
>>>>  obj-i386-y += ioport-user.o
>>>>  
>>>> @@ -170,7 +170,7 @@ $(call set-vpath, $(SRC_PATH)/bsd-user)
>>>>  QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
>>>>  
>>>>  obj-y = main.o bsdload.o elfload.o mmap.o signal.o strace.o syscall.o \
>>>> -        gdbstub.o uaccess.o user-exec.o
>>>> +        gdbstub.o uaccess.o user-exec.o dump.o
>>>>  
>>>>  obj-i386-y += ioport-user.o
>>>>  
>>>> @@ -186,7 +186,7 @@ endif #CONFIG_BSD_USER
>>>>  # System emulator target
>>>>  ifdef CONFIG_SOFTMMU
>>>>  
>>>> -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o
>>>> +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o ioport.o dump.o
>>>>  # virtio has to be here due to weird dependency between PCI and virtio-net.
>>>>  # need to fix this properly
>>>>  obj-$(CONFIG_NO_PCI) += pci-stub.o
>>>> diff --git a/dump.c b/dump.c
>>>> new file mode 100644
>>>> index 0000000..ab29a4c
>>>> --- /dev/null
>>>> +++ b/dump.c
>>>> @@ -0,0 +1,588 @@
>>>> +/*
>>>> + * QEMU dump
>>>> + *
>>>> + * Copyright Fujitsu, Corp. 2011
>>>> + *
>>>> + * Authors:
>>>> + *     Wen Congyang <wency@cn.fujitsu.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>>>> + * the COPYING file in the top-level directory.
>>>> + *
>>>> + */
>>>> +
>>>> +#include "qemu-common.h"
>>>> +#include <unistd.h>
>>>> +#include <elf.h>
>>>> +#include <sys/procfs.h>
>>>> +#include "cpu.h"
>>>> +#include "cpu-all.h"
>>>> +#include "targphys.h"
>>>> +#include "monitor.h"
>>>> +#include "kvm.h"
>>>> +#include "dump.h"
>>>> +#include "sysemu.h"
>>>> +#include "bswap.h"
>>>> +#include "memory_mapping.h"
>>>> +
>>>> +#define CPU_CONVERT_TO_TARGET16(val) \
>>>> +({ \
>>>> +    uint16_t _val = (val); \
>>>> +    if (endian == ELFDATA2LSB) { \
>>>> +        _val = cpu_to_le16(_val); \
>>>> +    } else {\
>>>> +        _val = cpu_to_be16(_val); \
>>>> +    } \
>>>> +    _val; \
>>>> +})
>>>> +
>>>> +#define CPU_CONVERT_TO_TARGET32(val) \
>>>> +({ \
>>>> +    uint32_t _val = (val); \
>>>> +    if (endian == ELFDATA2LSB) { \
>>>> +        _val = cpu_to_le32(_val); \
>>>> +    } else {\
>>>> +        _val = cpu_to_be32(_val); \
>>>> +    } \
>>>> +    _val; \
>>>> +})
>>>> +
>>>> +#define CPU_CONVERT_TO_TARGET64(val) \
>>>> +({ \
>>>> +    uint64_t _val = (val); \
>>>> +    if (endian == ELFDATA2LSB) { \
>>>> +        _val = cpu_to_le64(_val); \
>>>> +    } else {\
>>>> +        _val = cpu_to_be64(_val); \
>>>> +    } \
>>>> +    _val; \
>>>> +})
>>>> +
>>>> +enum {
>>>> +    DUMP_STATE_ERROR,
>>>> +    DUMP_STATE_SETUP,
>>>> +    DUMP_STATE_CANCELLED,
>>>> +    DUMP_STATE_ACTIVE,
>>>> +    DUMP_STATE_COMPLETED,
>>>> +};
>>>> +
>>>> +typedef struct DumpState {
>>>> +    ArchDumpInfo dump_info;
>>>> +    MemoryMappingList list;
>>>> +    int phdr_num;
>>>> +    int state;
>>>> +    char *error;
>>>> +    Monitor *mon;
>>>> +    int fd;
>>>> +    target_phys_addr_t memory_offset;
>>>> +} DumpState;
>>>> +
>>>> +static DumpState *dump_get_current(void)
>>>> +{
>>>> +    static DumpState current_dump = {
>>>> +        .state = DUMP_STATE_SETUP,
>>>> +    };
>>>> +
>>>> +    return &current_dump;
>>>> +}
>>>> +
>>>> +static int dump_cleanup(DumpState *s)
>>>> +{
>>>> +    int ret = 0;
>>>> +
>>>> +    free_memory_mapping_list(&s->list);
>>>> +    if (s->fd != -1) {
>>>> +        close(s->fd);
>>>> +        s->fd = -1;
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void dump_error(DumpState *s, const char *reason)
>>>> +{
>>>> +    s->state = DUMP_STATE_ERROR;
>>>> +    s->error = g_strdup(reason);
>>>> +    dump_cleanup(s);
>>>> +}
>>>> +
>>>> +static inline int cpuid(CPUState *env)
>>>> +{
>>>> +#if defined(CONFIG_USER_ONLY) && defined(CONFIG_USE_NPTL)
>>>> +    return env->host_tid;
>>>> +#else
>>>> +    return env->cpu_index + 1;
>>>> +#endif
>>>> +}
>>>> +
>>>> +static int write_elf64_header(DumpState *s)
>>>> +{
>>>> +    Elf64_Ehdr elf_header;
>>>> +    int ret;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>>>> +    memcpy(&elf_header, ELFMAG, 4);
>>>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>>>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>>>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>>>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>>>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>>>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>>>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>>>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET64(sizeof(Elf64_Ehdr));
>>>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf64_Phdr));
>>>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>>>> +
>>>> +    lseek(s->fd, 0, SEEK_SET);
>>>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write elf header.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_elf32_header(DumpState *s)
>>>> +{
>>>> +    Elf32_Ehdr elf_header;
>>>> +    int ret;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>>>> +    memcpy(&elf_header, ELFMAG, 4);
>>>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>>>> +    elf_header.e_ident[EI_DATA] = endian;
>>>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>>>> +    elf_header.e_type = CPU_CONVERT_TO_TARGET16(ET_CORE);
>>>> +    elf_header.e_machine = CPU_CONVERT_TO_TARGET16(s->dump_info.d_machine);
>>>> +    elf_header.e_version = CPU_CONVERT_TO_TARGET32(EV_CURRENT);
>>>> +    elf_header.e_ehsize = CPU_CONVERT_TO_TARGET16(sizeof(elf_header));
>>>> +    elf_header.e_phoff = CPU_CONVERT_TO_TARGET32(sizeof(Elf32_Ehdr));
>>>> +    elf_header.e_phentsize = CPU_CONVERT_TO_TARGET16(sizeof(Elf32_Phdr));
>>>> +    elf_header.e_phnum = CPU_CONVERT_TO_TARGET16(s->phdr_num);
>>>> +
>>>> +    lseek(s->fd, 0, SEEK_SET);
>>>> +    ret = write(s->fd, &elf_header, sizeof(elf_header));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write elf header.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
>>>> +                            int phdr_index, target_phys_addr_t offset)
>>>> +{
>>>> +    Elf64_Phdr phdr;
>>>> +    off_t phdr_offset;
>>>> +    int ret;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>>>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>>>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(offset);
>>>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET64(memory_mapping->phys_addr);
>>>> +    if (offset == -1) {
>>>> +        phdr.p_filesz = 0;
>>>> +    } else {
>>>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>>>> +    }
>>>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(memory_mapping->length);
>>>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET64(memory_mapping->virt_addr);
>>>> +
>>>> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
>>>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>>>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write program header table.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
>>>> +                            int phdr_index, target_phys_addr_t offset)
>>>> +{
>>>> +    Elf32_Phdr phdr;
>>>> +    off_t phdr_offset;
>>>> +    int ret;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>>>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_LOAD);
>>>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(offset);
>>>> +    phdr.p_paddr = CPU_CONVERT_TO_TARGET32(memory_mapping->phys_addr);
>>>> +    if (offset == -1) {
>>>> +        phdr.p_filesz = 0;
>>>> +    } else {
>>>> +        phdr.p_filesz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>>>> +    }
>>>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(memory_mapping->length);
>>>> +    phdr.p_vaddr = CPU_CONVERT_TO_TARGET32(memory_mapping->virt_addr);
>>>> +
>>>> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
>>>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>>>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write program header table.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_elf64_notes(DumpState *s, int phdr_index,
>>>> +                             target_phys_addr_t *offset)
>>>> +{
>>>> +    CPUState *env;
>>>> +    int ret;
>>>> +    target_phys_addr_t begin = *offset;
>>>> +    Elf64_Phdr phdr;
>>>> +    off_t phdr_offset;
>>>> +    int id;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>>> +        id = cpuid(env);
>>>> +        ret = cpu_write_elf64_note(s->fd, env, id, offset);
>>>> +        if (ret < 0) {
>>>> +            dump_error(s, "dump: failed to write elf notes.\n");
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>>>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>>>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET64(begin);
>>>> +    phdr.p_paddr = 0;
>>>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>>>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET64(*offset - begin);
>>>> +    phdr.p_vaddr = 0;
>>>> +
>>>> +    phdr_offset = sizeof(Elf64_Ehdr);
>>>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>>>> +    ret = write(s->fd, &phdr, sizeof(Elf64_Phdr));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write program header table.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_elf32_notes(DumpState *s, int phdr_index,
>>>> +                             target_phys_addr_t *offset)
>>>> +{
>>>> +    CPUState *env;
>>>> +    int ret;
>>>> +    target_phys_addr_t begin = *offset;
>>>> +    Elf32_Phdr phdr;
>>>> +    off_t phdr_offset;
>>>> +    int id;
>>>> +    int endian = s->dump_info.d_endian;
>>>> +
>>>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>>> +        id = cpuid(env);
>>>> +        ret = cpu_write_elf32_note(s->fd, env, id, offset);
>>>> +        if (ret < 0) {
>>>> +            dump_error(s, "dump: failed to write elf notes.\n");
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>>>> +    phdr.p_type = CPU_CONVERT_TO_TARGET32(PT_NOTE);
>>>> +    phdr.p_offset = CPU_CONVERT_TO_TARGET32(begin);
>>>> +    phdr.p_paddr = 0;
>>>> +    phdr.p_filesz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>>>> +    phdr.p_memsz = CPU_CONVERT_TO_TARGET32(*offset - begin);
>>>> +    phdr.p_vaddr = 0;
>>>> +
>>>> +    phdr_offset = sizeof(Elf32_Ehdr);
>>>> +    lseek(s->fd, phdr_offset, SEEK_SET);
>>>> +    ret = write(s->fd, &phdr, sizeof(Elf32_Phdr));
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to write program header table.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int write_data(DumpState *s, void *buf, int length,
>>>> +                      target_phys_addr_t *offset)
>>>> +{
>>>> +    int ret;
>>>> +
>>>> +    lseek(s->fd, *offset, SEEK_SET);
>>>> +    ret = write(s->fd, buf, length);
>>>> +    if (ret < 0) {
>>>> +        dump_error(s, "dump: failed to save memory.\n");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    *offset += length;
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/* write the memroy to vmcore. 1 page per I/O. */
>>>> +static int write_memory(DumpState *s, RAMBlock *block,
>>>> +                        target_phys_addr_t *offset)
>>>> +{
>>>> +    int i, ret;
>>>> +
>>>> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
>>>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>>>> +                         TARGET_PAGE_SIZE, offset);
>>>> +        if (ret < 0) {
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
>>>> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
>>>> +                         block->length % TARGET_PAGE_SIZE, offset);
>>>> +        if (ret < 0) {
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/* get the memory's offset in the vmcore */
>>>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>>>> +                                     target_phys_addr_t memory_offset)
>>>> +{
>>>> +    RAMBlock *block;
>>>> +    target_phys_addr_t offset = memory_offset;
>>>> +
>>>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>>>> +        if (phys_addr >= block->offset &&
>>>> +            phys_addr < block->offset + block->length) {
>>>> +            return phys_addr - block->offset + offset;
>>>> +        }
>>>> +        offset += block->length;
>>>> +    }
>>>> +
>>>> +    return -1;
>>>> +}
>>>> +
>>>> +static DumpState *dump_init(Monitor *mon, int fd)
>>>> +{
>>>> +    CPUState *env;
>>>> +    DumpState *s = dump_get_current();
>>>> +    int ret;
>>>> +
>>>> +    vm_stop(RUN_STATE_PAUSED);
>>>> +    s->state = DUMP_STATE_SETUP;
>>>> +    s->error = NULL;
>>>> +    s->mon = mon;
>>>> +    s->fd = fd;
>>>> +
>>>> +    /*
>>>> +     * get dump info: endian, class and architecture.
>>>> +     * If the target architecture is not supported, cpu_get_dump_info() will
>>>> +     * return -1.
>>>> +     *
>>>> +     * if we use kvm, we should synchronize the register before we get dump
>>>> +     * info.
>>>> +     */
>>>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>>> +        cpu_synchronize_state(env);
>>>> +    }
>>>> +    ret = cpu_get_dump_info(&s->dump_info);
>>>> +    if (ret < 0) {
>>>> +        monitor_printf(mon, "dump: unsupported target.\n");
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /* get memory mapping */
>>>> +    s->list.num = 0;
>>>> +    QTAILQ_INIT(&s->list.head);
>>>> +    get_memory_mapping(&s->list);
>>>> +
>>>> +    /* crash needs extra memory mapping to determine phys_base. */
>>>> +    ret = cpu_add_extra_memory_mapping(&s->list);
>>>> +    if (ret < 0) {
>>>> +        monitor_printf(mon, "dump: failed to add extra memory mapping.\n");
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * calculate phdr_num
>>>> +     *
>>>> +     * the type of phdr->num is uint16_t, so we should avoid overflow
>>>> +     */
>>>> +    s->phdr_num = 1; /* PT_NOTE */
>>>> +    if (s->list.num > (1 << 16) - 2) {
>>>> +        s->phdr_num = (1 << 16) - 1;
>>>> +    } else {
>>>> +        s->phdr_num += s->list.num;
>>>> +    }
>>>> +
>>>> +    return s;
>>>> +}
>>>> +
>>>> +/* write elf header, PT_NOTE and elf note to vmcore. */
>>>> +static int dump_begin(DumpState *s)
>>>> +{
>>>> +    target_phys_addr_t offset;
>>>> +    int ret;
>>>> +
>>>> +    s->state = DUMP_STATE_ACTIVE;
>>>> +
>>>> +    /*
>>>> +     * the vmcore's format is:
>>>> +     *   --------------
>>>> +     *   |  elf header |
>>>> +     *   --------------
>>>> +     *   |  PT_NOTE    |
>>>> +     *   --------------
>>>> +     *   |  PT_LOAD    |
>>>> +     *   --------------
>>>> +     *   |  ......     |
>>>> +     *   --------------
>>>> +     *   |  PT_LOAD    |
>>>> +     *   --------------
>>>> +     *   |  elf note   |
>>>> +     *   --------------
>>>> +     *   |  memory     |
>>>> +     *   --------------
>>>> +     *
>>>> +     * we only know where the memory is saved after we write elf note into
>>>> +     * vmcore.
>>>> +     */
>>>> +
>>>> +    /* write elf header to vmcore */
>>>> +    if (s->dump_info.d_class == ELFCLASS64) {
>>>> +        ret = write_elf64_header(s);
>>>> +    } else {
>>>> +        ret = write_elf32_header(s);
>>>> +    }
>>>> +    if (ret < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    /* write elf notes to vmcore */
>>>> +    if (s->dump_info.d_class == ELFCLASS64) {
>>>> +        offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
>>>> +        ret = write_elf64_notes(s, 0, &offset);
>>>> +    } else {
>>>> +        offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
>>>> +        ret = write_elf32_notes(s, 0, &offset);
>>>> +    }
>>>> +
>>>> +    if (ret < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    s->memory_offset = offset;
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/* write PT_LOAD to vmcore */
>>>> +static int dump_completed(DumpState *s)
>>>> +{
>>>> +    target_phys_addr_t offset;
>>>> +    MemoryMapping *memory_mapping;
>>>> +    int phdr_index = 1, ret;
>>>> +
>>>> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
>>>> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
>>>> +        if (s->dump_info.d_class == ELFCLASS64) {
>>>> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
>>>> +        } else {
>>>> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
>>>> +        }
>>>> +        if (ret < 0) {
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    s->state = DUMP_STATE_COMPLETED;
>>>> +    dump_cleanup(s);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/* write all memory to vmcore */
>>>> +static int dump_iterate(DumpState *s)
>>>> +{
>>>> +    RAMBlock *block;
>>>> +    target_phys_addr_t offset = s->memory_offset;
>>>> +    int ret;
>>>> +
>>>> +    /* write all memory to vmcore */
>>>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>>>> +        ret = write_memory(s, block, &offset);
>>>> +        if (ret < 0) {
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return dump_completed(s);
>>>> +}
>>>> +
>>>> +static int create_vmcore(DumpState *s)
>>>> +{
>>>> +    int ret;
>>>> +
>>>> +    ret = dump_begin(s);
>>>> +    if (ret < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    ret = dump_iterate(s);
>>>> +    if (ret < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int do_dump(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>>
>>> This is not using the QAPI. Please, take a look at the document
>>> docs/writing-qmp-commands.txt on how to do that. You can also look at the
>>> various examples in hmp.c/qmp.c.
>>
>> Yes, I have read it. But I need monitor to get fd, and I do not find such
>> examples. The command migrate also needs fd, and it is not converted to use
>> the QAPI. So, I do not know how to do that.
> 
> I have a first try on converting the migrate command to the QAPI in
> this branch:
> 
>  http://repo.or.cz/w/qemu/qmp-unstable.git/shortlog/refs/heads/qmp-wip/qapi-commands-conv/set-complex/v1
> 
> I guess all you need is the qemu_get_fd() function.

Yes, that is what I need.

I reread my patchset and migration's code, and I think we also need two more APIs to
suspend/resume monitor:

From 6e4fc82c50ad0c816d7af2f63e32d018455e867a Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Fri, 13 Jan 2012 14:51:48 +0800
Subject: [PATCH] monitor: introduce qemu_suspend_monitor()/qemu_resume_monitor()

Introduce two APIs to suspend/resume cur_mon. It can be used in asynchronous
monitor command.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 monitor.c |   10 ++++++++++
 monitor.h |    2 ++
 2 files changed, 12 insertions(+), 0 deletions(-)

Patch

diff --git a/monitor.c b/monitor.c
index 56f3778..4a264d3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4594,6 +4594,11 @@  static void monitor_command_cb(Monitor *mon, const char *cmdline, void *opaque)
     monitor_resume(mon);
 }
 
+int qemu_suspend_monitor(void)
+{
+    return monitor_suspend(cur_mon);
+}
+
 int monitor_suspend(Monitor *mon)
 {
     if (!mon->rs)
@@ -4602,6 +4607,11 @@  int monitor_suspend(Monitor *mon)
     return 0;
 }
 
+void qemu_resume_monitor(void)
+{
+    monitor_resume(cur_mon);
+}
+
 void monitor_resume(Monitor *mon)
 {
     if (!mon->rs)
diff --git a/monitor.h b/monitor.h
index 274cd39..aa15ad7 100644
--- a/monitor.h
+++ b/monitor.h
@@ -43,7 +43,9 @@  int monitor_cur_is_qmp(void);
 void monitor_protocol_event(MonitorEvent event, QObject *data);
 void monitor_init(CharDriverState *chr, int flags);
 
+int qemu_suspend_monitor(void);
 int monitor_suspend(Monitor *mon);
+void qemu_resume_monitor(void);
 void monitor_resume(Monitor *mon);
 
 int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,