diff mbox

[11/11,v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

Message ID 4F680037.9090101@cn.fujitsu.com
State New
Headers show

Commit Message

Wen Congyang March 20, 2012, 3:57 a.m. UTC
The command's usage:
   dump [-p] protocol [begin] [length]
The supported protocol can be file or fd:
1. file: the protocol starts with "file:", and the following string is
   the file's path.
2. fd: the protocol starts with "fd:", and the following string is the
   fd's name.

Note:
  1. If you want to use gdb to process the core, please specify -p option.
     The reason why the -p option is not default is: guest machine in a
     catastrophic state can have corrupted memory, which we cannot trust.
  2. This command doesn't support the fd that is is associated with a pipe,
     socket, or FIFO(lseek will fail with such fd).
  3. If you don't want to dump all guest's memory, please specify the start
     physical address and the length.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 Makefile.target  |    2 +-
 dump.c           |  841 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 elf.h            |    5 +
 hmp-commands.hx  |   28 ++
 hmp.c            |   22 ++
 hmp.h            |    1 +
 memory_mapping.c |   27 ++
 memory_mapping.h |    3 +
 qapi-schema.json |   18 ++
 qmp-commands.hx  |   38 +++
 10 files changed, 984 insertions(+), 1 deletions(-)
 create mode 100644 dump.c

Comments

Hatayama, Daisuke March 23, 2012, 8:06 a.m. UTC | #1
From: Wen Congyang <wency@cn.fujitsu.com>
Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
Date: Tue, 20 Mar 2012 11:57:43 +0800

<cut>

> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    uint16_t phdr_num;
> +    uint32_t sh_info;
> +    bool have_section;
> +    bool resume;
> +    target_phys_addr_t memory_offset;
> +    write_core_dump_function f;

f() is so general. Type information is meaningless enough, but there's
no explicit occurence of the function call of f(). Could you consider
renaming?

> +    void (*cleanup)(void *opaque);
> +    void *opaque;
> +
> +    RAMBlock *block;
> +    ram_addr_t start;
> +    bool has_filter;
> +    int64_t begin;
> +    int64_t length;
> +} DumpState;
> +

<cut>

> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target64(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
> +    if (offset == -1) {

Question: In what situations does offset become -1?

<cut>

> +static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int type)
> +{
> +    Elf32_Shdr shdr32;
> +    Elf64_Shdr shdr64;
> +    int endian = s->dump_info.d_endian;
> +    int shdr_size;
> +    void *shdr;
> +    int ret;
> +
> +    if (type == 0) {

In other places, you checks s->dump_info.d_class == ELFCLASS64, and
variable ``type'' here has the same meaning. It's better to fit this
to the others.

Also, the ``s->dump_info.d_class == ELFCLASS64'' occurs so many times
in source code. Why not add method to DumpState type to avoid this
check? For example, 

typedef struct DumpState {
  ...
  int write_elf_header(DumpState *s)
  ...
} DumpState;

and then, register appropreate function to the member in
cpu_get_dump_info().

> +     *
> +     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num < UINT16_MAX - 2) {

Is s->list.num < UINT16_MAX - 1 correct? 

> +        s->phdr_num += s->list.num;
> +        s->have_section = false;
> +    } else {
> +        s->have_section = true;
> +        s->phdr_num = PN_XNUM;
> +        s->sh_info = 1; /* PT_NOTE */

It's confusing to use member names, phdr_num and sh_info, from
differnet views. I first think phdr_num is used for an actual number
of program headers, but in reality, this is used as e_phum member in
ELF header. It's better to use e_phnum just as sh_info to avoid
confusion.

Also, this logic is comform to ELF specification, so it's better to
move this to write_elfNN_header() and write_elf_section().

Thanks.
HATAYAMA, Daisuke
Hatayama, Daisuke March 23, 2012, 9 a.m. UTC | #2
From: Wen Congyang <wency@cn.fujitsu.com>
Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
Date: Tue, 20 Mar 2012 11:57:43 +0800

> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);

Use SELFMAG instead of 4.

> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);

Also.

> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}

Thanks.
HATAYAMA, Daisuke
Hatayama, Daisuke March 23, 2012, 9:40 a.m. UTC | #3
From: Wen Congyang <wency@cn.fujitsu.com>
Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
Date: Tue, 20 Mar 2012 11:57:43 +0800

> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int64_t size_in_block, start;
> +
> +    if (s->has_filter) {
> +        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
> +            return -1;
> +        }
> +    }
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (s->has_filter) {
> +            if (block->offset >= s->begin + s->length ||
> +                block->offset + block->length <= s->begin) {
> +                /* This block is out of the range */
> +                continue;
> +            }
> +
> +            if (s->begin <= block->offset) {
> +                start = block->offset;
> +            } else {
> +                start = s->begin;
> +            }
> +
> +            size_in_block = block->length - (start - block->offset);
> +            if (s->begin + s->length < block->offset + block->length) {
> +                size_in_block -= block->offset + block->length -
> +                                 (s->begin + s->length);
> +            }
> +        } else {
> +            start = block->offset;
> +            size_in_block = block->length;
> +        }
> +
> +        if (phys_addr >= start && phys_addr < start + size_in_block) {
> +            return phys_addr - start + offset;
> +        }
> +
> +        offset += size_in_block;
> +    }
> +
> +    return -1;
> +}

OK. -1 is assigned to offset when the corresponding memory is filtered
and so not contained in dumpfile. But please give -1 a name. I want
the name to tell me the data is filterred.

Also, I couldn't imagine get_offset() does filtering processing. This
is important for the purspective of dump because there's data not
saved in dumpfile. Could you claify this in some way? By moving
filtering processing to the outside, or splitting it into anotehr
funciton.

Thanks.
HATAYAMA, Daisuke
Luiz Capitulino March 23, 2012, 5:07 p.m. UTC | #4
On Fri, 23 Mar 2012 17:06:22 +0900 (   )
HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com> wrote:

> From: Wen Congyang <wency@cn.fujitsu.com>
> Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
> Date: Tue, 20 Mar 2012 11:57:43 +0800
> 
> <cut>
> 
> > +typedef struct DumpState {
> > +    ArchDumpInfo dump_info;
> > +    MemoryMappingList list;
> > +    uint16_t phdr_num;
> > +    uint32_t sh_info;
> > +    bool have_section;
> > +    bool resume;
> > +    target_phys_addr_t memory_offset;
> > +    write_core_dump_function f;
> 
> f() is so general. Type information is meaningless enough, but there's
> no explicit occurence of the function call of f(). Could you consider
> renaming?

Agreed. I actually don't see why this indirection is needed.
Luiz Capitulino March 23, 2012, 5:19 p.m. UTC | #5
On Tue, 20 Mar 2012 11:57:43 +0800
Wen Congyang <wency@cn.fujitsu.com> wrote:

> The command's usage:
>    dump [-p] protocol [begin] [length]
> The supported protocol can be file or fd:
> 1. file: the protocol starts with "file:", and the following string is
>    the file's path.
> 2. fd: the protocol starts with "fd:", and the following string is the
>    fd's name.
> 
> Note:
>   1. If you want to use gdb to process the core, please specify -p option.
>      The reason why the -p option is not default is: guest machine in a
>      catastrophic state can have corrupted memory, which we cannot trust.
>   2. This command doesn't support the fd that is is associated with a pipe,
>      socket, or FIFO(lseek will fail with such fd).
>   3. If you don't want to dump all guest's memory, please specify the start
>      physical address and the length.

All the explanation above (except the HMP command-line) pertains to the
qapi-schema.json file.

> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>  Makefile.target  |    2 +-
>  dump.c           |  841 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  elf.h            |    5 +
>  hmp-commands.hx  |   28 ++
>  hmp.c            |   22 ++
>  hmp.h            |    1 +
>  memory_mapping.c |   27 ++
>  memory_mapping.h |    3 +
>  qapi-schema.json |   18 ++
>  qmp-commands.hx  |   38 +++
>  10 files changed, 984 insertions(+), 1 deletions(-)
>  create mode 100644 dump.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index c3aa5d7..0e179c9 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -219,7 +219,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>  obj-$(CONFIG_VGA) += vga.o
>  obj-y += memory.o savevm.o
>  obj-y += memory_mapping.o
> -obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
> +obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
>  LIBS+=-lz
>  
>  obj-i386-$(CONFIG_KVM) += hyperv.o
> diff --git a/dump.c b/dump.c
> new file mode 100644
> index 0000000..917bccc
> --- /dev/null
> +++ b/dump.c
> @@ -0,0 +1,841 @@
> +/*
> + * QEMU dump
> + *
> + * Copyright Fujitsu, Corp. 2011, 2012
> + *
> + * Authors:
> + *     Wen Congyang <wency@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include <unistd.h>
> +#include "elf.h"
> +#include <sys/procfs.h>
> +#include <glib.h>
> +#include "cpu.h"
> +#include "cpu-all.h"
> +#include "targphys.h"
> +#include "monitor.h"
> +#include "kvm.h"
> +#include "dump.h"
> +#include "sysemu.h"
> +#include "bswap.h"
> +#include "memory_mapping.h"
> +#include "error.h"
> +#include "qmp-commands.h"
> +#include "gdbstub.h"
> +
> +static inline uint16_t cpu_convert_to_target16(uint16_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le16(val);
> +    } else {
> +        val = cpu_to_be16(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint32_t cpu_convert_to_target32(uint32_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le32(val);
> +    } else {
> +        val = cpu_to_be32(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint64_t cpu_convert_to_target64(uint64_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le64(val);
> +    } else {
> +        val = cpu_to_be64(val);
> +    }
> +
> +    return val;
> +}

Nitpick: I personally, wouldn't make these functions inline.

> +
> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    uint16_t phdr_num;
> +    uint32_t sh_info;
> +    bool have_section;
> +    bool resume;
> +    target_phys_addr_t memory_offset;
> +    write_core_dump_function f;
> +    void (*cleanup)(void *opaque);
> +    void *opaque;
> +
> +    RAMBlock *block;
> +    ram_addr_t start;
> +    bool has_filter;
> +    int64_t begin;
> +    int64_t length;
> +} DumpState;
> +
> +static DumpState *dump_get_current(void)
> +{
> +    static DumpState current_dump;
> +
> +    return &current_dump;
> +}

You can drop this function if you allocate DumpState in the stack in
qmp_dump_guest_memory().

> +
> +static int dump_cleanup(DumpState *s)
> +{
> +    int ret = 0;
> +
> +    memory_mapping_list_free(&s->list);
> +    s->cleanup(s->opaque);
> +    if (s->resume) {
> +        vm_start();
> +    }
> +
> +    return ret;
> +}
> +
> +static void dump_error(DumpState *s, const char *reason)
> +{
> +    dump_cleanup(s);
> +}
> +
> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target64(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
> +
> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target32(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
> +
> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUArchState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf64_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf64_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target64(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf64_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUArchState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf32_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf32_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target32(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf32_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int type)
> +{
> +    Elf32_Shdr shdr32;
> +    Elf64_Shdr shdr64;
> +    int endian = s->dump_info.d_endian;
> +    int shdr_size;
> +    void *shdr;
> +    int ret;
> +
> +    if (type == 0) {
> +        shdr_size = sizeof(Elf32_Shdr);
> +        memset(&shdr32, 0, shdr_size);
> +        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr32;
> +    } else {
> +        shdr_size = sizeof(Elf64_Shdr);
> +        memset(&shdr64, 0, shdr_size);
> +        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr64;
> +    }
> +
> +    ret = s->f(*offset, &shdr, shdr_size, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write section header table.\n");
> +        return -1;
> +    }
> +
> +    *offset += shdr_size;
> +    return 0;
> +}
> +
> +static int write_data(DumpState *s, void *buf, int length,
> +                      target_phys_addr_t *offset)
> +{
> +    int ret;
> +
> +    ret = s->f(*offset, buf, length, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to save memory.\n");
> +        return -1;
> +    }
> +
> +    *offset += length;
> +    return 0;
> +}
> +
> +/* write the memroy to vmcore. 1 page per I/O. */
> +static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
> +                        target_phys_addr_t *offset, int64_t *size)
> +{
> +    int i, ret;
> +    int64_t writen_size = 0;
> +
> +    *size = block->length - start;
> +    for (i = 0; i < *size / TARGET_PAGE_SIZE; i++) {
> +        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
> +                         TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            *size = writen_size;
> +            return ret;
> +        }
> +
> +        writen_size += TARGET_PAGE_SIZE;
> +    }
> +
> +    if ((*size % TARGET_PAGE_SIZE) != 0) {
> +        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
> +                         *size % TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            *size = writen_size;
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int64_t size_in_block, start;
> +
> +    if (s->has_filter) {
> +        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
> +            return -1;
> +        }
> +    }
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (s->has_filter) {
> +            if (block->offset >= s->begin + s->length ||
> +                block->offset + block->length <= s->begin) {
> +                /* This block is out of the range */
> +                continue;
> +            }
> +
> +            if (s->begin <= block->offset) {
> +                start = block->offset;
> +            } else {
> +                start = s->begin;
> +            }
> +
> +            size_in_block = block->length - (start - block->offset);
> +            if (s->begin + s->length < block->offset + block->length) {
> +                size_in_block -= block->offset + block->length -
> +                                 (s->begin + s->length);
> +            }
> +        } else {
> +            start = block->offset;
> +            size_in_block = block->length;
> +        }
> +
> +        if (phys_addr >= start && phys_addr < start + size_in_block) {
> +            return phys_addr - start + offset;
> +        }
> +
> +        offset += size_in_block;
> +    }
> +
> +    return -1;
> +}
> +
> +/* write elf header, PT_NOTE and elf note to vmcore. */
> +static int dump_begin(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    int ret;
> +
> +    /*
> +     * the vmcore's format is:
> +     *   --------------
> +     *   |  elf header |
> +     *   --------------
> +     *   |  PT_NOTE    |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  ......     |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  sec_hdr    |
> +     *   --------------
> +     *   |  elf note   |
> +     *   --------------
> +     *   |  memory     |
> +     *   --------------
> +     *
> +     * we only know where the memory is saved after we write elf note into
> +     * vmcore.
> +     */
> +
> +    /* write elf header to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        ret = write_elf64_header(s);
> +    } else {
> +        ret = write_elf32_header(s);
> +    }
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* write elf section and notes to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        if (s->have_section) {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 1) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf64_notes(s, 0, &offset);
> +    } else {
> +        if (s->have_section) {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 0) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf32_notes(s, 0, &offset);
> +    }
> +
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    s->memory_offset = offset;
> +    return 0;
> +}
> +
> +/* write PT_LOAD to vmcore */
> +static int dump_completed(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    MemoryMapping *memory_mapping;
> +    int phdr_index = 1, ret;
> +
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        offset = get_offset(memory_mapping->phys_addr, s);
> +        if (s->dump_info.d_class == ELFCLASS64) {
> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> +        } else {
> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> +        }
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    dump_cleanup(s);
> +    return 0;
> +}
> +
> +static int get_next_block(DumpState *s, RAMBlock *block)
> +{
> +    while (1) {
> +        block = QLIST_NEXT(block, next);
> +        if (!block) {
> +            /* no more block */
> +            return 1;
> +        }
> +
> +        s->start = 0;
> +        s->block = block;
> +        if (s->has_filter) {
> +            if (block->offset >= s->begin + s->length ||
> +                block->offset + block->length <= s->begin) {
> +                /* This block is out of the range */
> +                continue;
> +            }
> +
> +            if (s->begin > block->offset) {
> +                s->start = s->begin - block->offset;
> +            }
> +        }
> +
> +        return 0;
> +    }
> +}
> +
> +/* write all memory to vmcore */
> +static int dump_iterate(DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int64_t size;
> +    int ret;
> +
> +    while(1) {
> +        block = s->block;
> +        ret = write_memory(s, block, s->start, &offset, &size);
> +        if (ret == -1) {
> +            return ret;
> +        }
> +
> +        ret = get_next_block(s, block);
> +        if (ret == 1) {
> +            dump_completed(s);
> +            return 0;
> +        }
> +    }
> +}
> +
> +static int create_vmcore(DumpState *s)
> +{
> +    int ret;
> +
> +    ret = dump_begin(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = dump_iterate(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static ram_addr_t get_start_block(DumpState *s)
> +{
> +    RAMBlock *block;
> +
> +    if (!s->has_filter) {
> +        s->block = QLIST_FIRST(&ram_list.blocks);
> +        return 0;
> +    }
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (block->offset >= s->begin + s->length ||
> +            block->offset + block->length <= s->begin) {
> +            /* This block is out of the range */
> +            continue;
> +        }
> +
> +        s->block = block;
> +        if (s->begin > block->offset ) {
> +            s->start = s->begin - block->offset;
> +        } else {
> +            s->start = 0;
> +        }
> +        return s->start;
> +    }
> +
> +    return -1;
> +}
> +
> +static DumpState *dump_init(bool paging, bool has_filter, int64_t begin,
> +                            int64_t length, Error **errp)
> +{
> +    CPUArchState *env;
> +    DumpState *s = dump_get_current();

As I said above, you could drop dump_get_current() and pass 's' as a parameter.

> +    int ret;
> +
> +    if (runstate_is_running()) {
> +        vm_stop(RUN_STATE_SAVE_VM);
> +        s->resume = true;
> +    } else {
> +        s->resume = false;
> +    }
> +
> +    s->has_filter = has_filter;
> +    s->begin = begin;
> +    s->length = length;
> +    s->start = get_start_block(s);
> +    if (s->start == -1) {
> +        error_set(errp, QERR_INVALID_PARAMETER, "begin");
> +        goto cleanup;
> +    }
> +
> +    /*
> +     * get dump info: endian, class and architecture.
> +     * If the target architecture is not supported, cpu_get_dump_info() will
> +     * return -1.
> +     *
> +     * if we use kvm, we should synchronize the register before we get dump
> +     * info.
> +     */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        cpu_synchronize_state(env);
> +    }
> +
> +    ret = cpu_get_dump_info(&s->dump_info);
> +    if (ret < 0) {
> +        error_set(errp, QERR_UNSUPPORTED);
> +        goto cleanup;
> +    }
> +
> +    /* get memory mapping */
> +    memory_mapping_list_init(&s->list);
> +    if (paging) {
> +        qemu_get_guest_memory_mapping(&s->list);
> +    } else {
> +        qemu_get_guest_simple_memory_mapping(&s->list);
> +    }
> +
> +    if (s->has_filter) {
> +        memory_mapping_filter(&s->list, s->begin, s->length);
> +    }
> +
> +    /*
> +     * calculate phdr_num
> +     *
> +     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num < UINT16_MAX - 2) {
> +        s->phdr_num += s->list.num;
> +        s->have_section = false;
> +    } else {
> +        s->have_section = true;
> +        s->phdr_num = PN_XNUM;
> +        s->sh_info = 1; /* PT_NOTE */
> +
> +        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
> +        if (s->list.num <= UINT32_MAX -1) {
> +            s->sh_info += s->list.num;
> +        } else {
> +            s->sh_info = UINT32_MAX;
> +        }
> +    }
> +
> +    return s;
> +
> +cleanup:
> +    if (s->resume) {
> +        vm_start();
> +    }
> +
> +    return NULL;
> +}
> +
> +static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
> +                           void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +    off_t ret;
> +    size_t writen_size;
> +
> +    while(1) {
> +        ret = lseek(fd, offset, SEEK_SET);
> +        if (ret < 0) {
> +            if (errno != EINTR && errno != EAGAIN) {

According to the man page, lseek() doesn't return EINTR nor EAGAIN.

Also, it would be nice to check for ESPIPE and return the proper QMP by
using error_set(). The error doesn't exist yet, maybe it could be called
PipeOrSocketFD (look at qerror.[ch] for more examples).

> +                return -1;
> +            }
> +            continue;
> +        }
> +        break;
> +    }
> +
> +    /* The fd may be passed from user, and it can be non-blocked */
> +    while(size) {
> +        writen_size = qemu_write_full(fd, buf, size);
> +        if (writen_size != size && errno != EAGAIN) {
> +            return -1;
> +        }
> +
> +        buf += writen_size;
> +        size -= writen_size;
> +    }
> +
> +    return 0;
> +}
> +
> +static void fd_cleanup(void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +
> +    if (fd != -1) {
> +        close(fd);
> +    }
> +}
> +
> +static DumpState *dump_init_fd(int fd, bool paging, bool has_filter,
> +                               int64_t begin, int64_t length, Error **errp)
> +{
> +    DumpState *s = dump_init(paging, has_filter, begin, length, errp);
> +
> +    if (s == NULL) {
> +        return NULL;
> +    }
> +
> +    s->f = fd_write_vmcore;
> +    s->cleanup = fd_cleanup;
> +    s->opaque = (void *)(intptr_t)fd;
> +
> +    return s;
> +}
> +
> +void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
> +                           int64_t begin, bool has_length, int64_t length,
> +                           Error **errp)
> +{
> +    const char *p;
> +    int fd = -1;
> +    DumpState *s;
> +
> +    if (has_begin && !has_length) {
> +        error_set(errp, QERR_MISSING_PARAMETER, "length");
> +        return;
> +    }
> +    if (!has_begin && has_length) {
> +        error_set(errp, QERR_MISSING_PARAMETER, "begin");
> +        return;
> +    }
> +
> +#if !defined(WIN32)
> +    if (strstart(file, "fd:", &p)) {
> +        fd = monitor_get_fd(cur_mon, p);
> +        if (fd == -1) {
> +            error_set(errp, QERR_FD_NOT_FOUND, p);
> +            return;
> +        }
> +    }
> +#endif
> +
> +    if  (strstart(file, "file:", &p)) {
> +        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
> +        if (fd < 0) {
> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
> +            return;
> +        }
> +    }
> +
> +    if (fd == -1) {
> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
> +        return;
> +    }

s/file/parameter

> +
> +    s = dump_init_fd(fd, paging, has_begin, begin, length, errp);
> +    if (!s) {
> +        return;
> +    }
> +
> +    if (create_vmcore(s) < 0) {
> +        error_set(errp, QERR_IO_ERROR);
> +    }
> +}
> diff --git a/elf.h b/elf.h
> index 2e05d34..6a10657 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -1000,6 +1000,11 @@ typedef struct elf64_sym {
>  
>  #define EI_NIDENT	16
>  
> +/* Special value for e_phnum.  This indicates that the real number of
> +   program headers is too large to fit into e_phnum.  Instead the real
> +   value is in the field sh_info of section 0.  */
> +#define PN_XNUM         0xffff
> +
>  typedef struct elf32_hdr{
>    unsigned char	e_ident[EI_NIDENT];
>    Elf32_Half	e_type;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index bd35a3e..d270fd1 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -879,6 +879,34 @@ server will ask the spice/vnc client to automatically reconnect using the
>  new parameters (if specified) once the vm migration finished successfully.
>  ETEXI
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump-guest-memory",
> +        .args_type  = "paging:-p,protocol:s,begin:i?,length:i?",
> +        .params     = "[-p] protocol [begin] [length]",
> +        .help       = "dump guest memory to file"
> +                      "\n\t\t\t begin(optional): the starting physical address"
> +                      "\n\t\t\t length(optional): the memory size, in bytes",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd = hmp_dump_guest_memory,
> +    },
> +
> +
> +STEXI
> +@item dump-guest-memory [-p] @var{protocol} @var{begin} @var{length}
> +@findex dump-guest-memory
> +Dump guest memory to @var{protocol}. The file can be processed with crash or
> +gdb.
> +  protocol: destination file(started with "file:") or destination file
> +            descriptor (started with "fd:")
> +    paging: do paging to get guest's memory mapping
> +     begin: the starting physical address. It's optional, and should be
> +            specified with length together.
> +    length: the memory size, in bytes. It's optional, and should be specified
> +            with begin together.
> +ETEXI
> +#endif
> +
>      {
>          .name       = "snapshot_blkdev",
>          .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
> diff --git a/hmp.c b/hmp.c
> index 9cf2d13..75ec9a9 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -934,3 +934,25 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
>          qemu_mod_timer(status->timer, qemu_get_clock_ms(rt_clock));
>      }
>  }
> +
> +void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
> +{
> +    Error *errp = NULL;
> +    int paging = qdict_get_try_bool(qdict, "paging", 0);
> +    const char *file = qdict_get_str(qdict, "protocol");
> +    bool has_begin = qdict_haskey(qdict, "begin");
> +    bool has_length = qdict_haskey(qdict, "length");
> +    int64_t begin = 0;
> +    int64_t length = 0;
> +
> +    if (has_begin) {
> +        begin = qdict_get_int(qdict, "begin");
> +    }
> +    if (has_length) {
> +        length = qdict_get_int(qdict, "length");
> +    }
> +
> +    qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length,
> +                          &errp);
> +    hmp_handle_error(mon, &errp);
> +}
> diff --git a/hmp.h b/hmp.h
> index 8807853..36d69c6 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -60,5 +60,6 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
>  void hmp_migrate(Monitor *mon, const QDict *qdict);
> +void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
>  
>  #endif
> diff --git a/memory_mapping.c b/memory_mapping.c
> index 9a2ffe6..ff52c9d 100644
> --- a/memory_mapping.c
> +++ b/memory_mapping.c
> @@ -207,3 +207,30 @@ void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
>          create_new_memory_mapping(list, block->offset, 0, block->length);
>      }
>  }
> +
> +void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
> +                           int64_t length)
> +{
> +    MemoryMapping *cur, *next;
> +
> +    QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
> +        if (cur->phys_addr >= begin + length ||
> +            cur->phys_addr + cur->length <= begin) {
> +            QTAILQ_REMOVE(&list->head, cur, next);
> +            list->num--;
> +            continue;
> +        }
> +
> +        if (cur->phys_addr < begin) {
> +            cur->length -= begin - cur->phys_addr;
> +            if (cur->virt_addr) {
> +                cur->virt_addr += begin - cur->phys_addr;
> +            }
> +            cur->phys_addr = begin;
> +        }
> +
> +        if (cur->phys_addr + cur->length > begin + length) {
> +            cur->length -= cur->phys_addr + cur->length - begin - length;
> +        }
> +    }
> +}
> diff --git a/memory_mapping.h b/memory_mapping.h
> index a583e44..b795678 100644
> --- a/memory_mapping.h
> +++ b/memory_mapping.h
> @@ -62,4 +62,7 @@ static inline int qemu_get_guest_memory_mapping(MemoryMappingList *list)
>  /* get guest's memory mapping without do paging(virtual address is 0). */
>  void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
>  
> +void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
> +                           int64_t length);
> +
>  #endif
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 0d11d6e..b0d885f 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -1701,3 +1701,21 @@
>  # Since: 1.1
>  ##
>  { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
> +
> +##
> +# @dump-guest-memory
> +#
> +# Dump guest's memory to vmcore.
> +#
> +# @paging: if true, do paging to get guest's memory mapping
> +# @protocol: the filename or file descriptor of the vmcore.
> +# @begin: #optional if specified, the starting physical address.
> +# @length: #optional if specified, the memory size, in bytes.
> +#
> +# Returns: nothing on success
> +#
> +# Since: 1.1
> +##
> +{ 'command': 'dump-guest-memory',
> +  'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
> +            '*length': 'int' } }
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index c626ba8..ca4484b 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -606,6 +606,44 @@ Example:
>  
>  EQMP
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump-guest-memory",
> +        .args_type  = "paging:-p,protocol:s,begin:i?,end:i?",

Please, use b instead of -p.

> +        .params     = "[-p] protocol [begin] [length]",
> +        .help       = "dump guest memory to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = qmp_marshal_input_dump_guest_memory,
> +    },
> +
> +SQMP
> +dump
> +
> +
> +Dump guest memory to file. The file can be processed with crash or gdb.
> +
> +Arguments:
> +
> +- "paging": do paging to get guest's memory mapping (json-bool)
> +- "protocol": destination file(started with "file:") or destination file
> +              descriptor (started with "fd:") (json-string)
> +- "begin": the starting physical address. It's optional, and should be specified
> +           with length together (json-int)
> +- "length": the memory size, in bytes. It's optional, and should be specified
> +            with begin together (json-int)
> +
> +Example:
> +
> +-> { "execute": "dump-guest-memory", "arguments": { "protocol": "fd:dump" } }
> +<- { "return": {} }
> +
> +Notes:
> +
> +(1) All boolean arguments default to false
> +
> +EQMP
> +#endif
> +
>      {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",
Wen Congyang March 26, 2012, 1:39 a.m. UTC | #6
At 03/23/2012 05:40 PM, HATAYAMA Daisuke Wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>
> Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
> Date: Tue, 20 Mar 2012 11:57:43 +0800
> 
>> +/* get the memory's offset in the vmcore */
>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>> +                                     DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int64_t size_in_block, start;
>> +
>> +    if (s->has_filter) {
>> +        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (s->has_filter) {
>> +            if (block->offset >= s->begin + s->length ||
>> +                block->offset + block->length <= s->begin) {
>> +                /* This block is out of the range */
>> +                continue;
>> +            }
>> +
>> +            if (s->begin <= block->offset) {
>> +                start = block->offset;
>> +            } else {
>> +                start = s->begin;
>> +            }
>> +
>> +            size_in_block = block->length - (start - block->offset);
>> +            if (s->begin + s->length < block->offset + block->length) {
>> +                size_in_block -= block->offset + block->length -
>> +                                 (s->begin + s->length);
>> +            }
>> +        } else {
>> +            start = block->offset;
>> +            size_in_block = block->length;
>> +        }
>> +
>> +        if (phys_addr >= start && phys_addr < start + size_in_block) {
>> +            return phys_addr - start + offset;
>> +        }
>> +
>> +        offset += size_in_block;
>> +    }
>> +
>> +    return -1;
>> +}
> 
> OK. -1 is assigned to offset when the corresponding memory is filtered
> and so not contained in dumpfile. But please give -1 a name. I want
> the name to tell me the data is filterred.
> 
> Also, I couldn't imagine get_offset() does filtering processing. This
> is important for the purspective of dump because there's data not
> saved in dumpfile. Could you claify this in some way? By moving
> filtering processing to the outside, or splitting it into anotehr
> funciton.

offset should not be -1. I remove the PT_LOAD in the function memory_mapping_filter().
In the previous patchset, Luiz said he wants to squash the filtering
feature to this patch.

Thanks
Wen Congyang

> 
> Thanks.
> HATAYAMA, Daisuke
> 
>
Wen Congyang March 26, 2012, 1:43 a.m. UTC | #7
At 03/23/2012 05:00 PM, HATAYAMA Daisuke Wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>
> Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
> Date: Tue, 20 Mar 2012 11:57:43 +0800
> 
>> +static int write_elf64_header(DumpState *s)
>> +{
>> +    Elf64_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
> 
> Use SELFMAG instead of 4.

Yes, SELFMAG is more readable than 4.

> 
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
>> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
>> +                                                   endian);
>> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
>> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
>> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
>> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
>> +                                                     endian);
>> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
>> +    if (s->have_section) {
>> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
>> +
>> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
>> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
>> +                                                         endian);
>> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
>> +    }
>> +
>> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_header(DumpState *s)
>> +{
>> +    Elf32_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
> 
> Also.

I got it.

Thanks
Wen Congyang

> 
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>> +    elf_header.e_ident[EI_DATA] = endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
>> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
>> +                                                   endian);
>> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
>> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
>> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
>> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
>> +                                                     endian);
>> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
>> +    if (s->have_section) {
>> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
>> +
>> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
>> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
>> +                                                         endian);
>> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
>> +    }
>> +
>> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
> 
> Thanks.
> HATAYAMA, Daisuke
> 
>
Wen Congyang March 26, 2012, 1:53 a.m. UTC | #8
At 03/24/2012 01:19 AM, Luiz Capitulino Wrote:
> On Tue, 20 Mar 2012 11:57:43 +0800
> Wen Congyang <wency@cn.fujitsu.com> wrote:
> 
>> The command's usage:
>>    dump [-p] protocol [begin] [length]
>> The supported protocol can be file or fd:
>> 1. file: the protocol starts with "file:", and the following string is
>>    the file's path.
>> 2. fd: the protocol starts with "fd:", and the following string is the
>>    fd's name.
>>
>> Note:
>>   1. If you want to use gdb to process the core, please specify -p option.
>>      The reason why the -p option is not default is: guest machine in a
>>      catastrophic state can have corrupted memory, which we cannot trust.
>>   2. This command doesn't support the fd that is is associated with a pipe,
>>      socket, or FIFO(lseek will fail with such fd).
>>   3. If you don't want to dump all guest's memory, please specify the start
>>      physical address and the length.
> 
> All the explanation above (except the HMP command-line) pertains to the
> qapi-schema.json file.

OK. I will do it.

> 
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> ---
>>  Makefile.target  |    2 +-
>>  dump.c           |  841 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  elf.h            |    5 +
>>  hmp-commands.hx  |   28 ++
>>  hmp.c            |   22 ++
>>  hmp.h            |    1 +
>>  memory_mapping.c |   27 ++
>>  memory_mapping.h |    3 +
>>  qapi-schema.json |   18 ++
>>  qmp-commands.hx  |   38 +++
>>  10 files changed, 984 insertions(+), 1 deletions(-)
>>  create mode 100644 dump.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index c3aa5d7..0e179c9 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -219,7 +219,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>>  obj-$(CONFIG_VGA) += vga.o
>>  obj-y += memory.o savevm.o
>>  obj-y += memory_mapping.o
>> -obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
>> +obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
>>  LIBS+=-lz
>>  
>>  obj-i386-$(CONFIG_KVM) += hyperv.o
>> diff --git a/dump.c b/dump.c
>> new file mode 100644
>> index 0000000..917bccc
>> --- /dev/null
>> +++ b/dump.c
>> @@ -0,0 +1,841 @@
>> +/*
>> + * QEMU dump
>> + *
>> + * Copyright Fujitsu, Corp. 2011, 2012
>> + *
>> + * Authors:
>> + *     Wen Congyang <wency@cn.fujitsu.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include <unistd.h>
>> +#include "elf.h"
>> +#include <sys/procfs.h>
>> +#include <glib.h>
>> +#include "cpu.h"
>> +#include "cpu-all.h"
>> +#include "targphys.h"
>> +#include "monitor.h"
>> +#include "kvm.h"
>> +#include "dump.h"
>> +#include "sysemu.h"
>> +#include "bswap.h"
>> +#include "memory_mapping.h"
>> +#include "error.h"
>> +#include "qmp-commands.h"
>> +#include "gdbstub.h"
>> +
>> +static inline uint16_t cpu_convert_to_target16(uint16_t val, int endian)
>> +{
>> +    if (endian == ELFDATA2LSB) {
>> +        val = cpu_to_le16(val);
>> +    } else {
>> +        val = cpu_to_be16(val);
>> +    }
>> +
>> +    return val;
>> +}
>> +
>> +static inline uint32_t cpu_convert_to_target32(uint32_t val, int endian)
>> +{
>> +    if (endian == ELFDATA2LSB) {
>> +        val = cpu_to_le32(val);
>> +    } else {
>> +        val = cpu_to_be32(val);
>> +    }
>> +
>> +    return val;
>> +}
>> +
>> +static inline uint64_t cpu_convert_to_target64(uint64_t val, int endian)
>> +{
>> +    if (endian == ELFDATA2LSB) {
>> +        val = cpu_to_le64(val);
>> +    } else {
>> +        val = cpu_to_be64(val);
>> +    }
>> +
>> +    return val;
>> +}
> 
> Nitpick: I personally, wouldn't make these functions inline.

Yes, it is better to let the compiler to decide whether it should be
made inline.

> 
>> +
>> +typedef struct DumpState {
>> +    ArchDumpInfo dump_info;
>> +    MemoryMappingList list;
>> +    uint16_t phdr_num;
>> +    uint32_t sh_info;
>> +    bool have_section;
>> +    bool resume;
>> +    target_phys_addr_t memory_offset;
>> +    write_core_dump_function f;
>> +    void (*cleanup)(void *opaque);
>> +    void *opaque;
>> +
>> +    RAMBlock *block;
>> +    ram_addr_t start;
>> +    bool has_filter;
>> +    int64_t begin;
>> +    int64_t length;
>> +} DumpState;
>> +
>> +static DumpState *dump_get_current(void)
>> +{
>> +    static DumpState current_dump;
>> +
>> +    return &current_dump;
>> +}
> 
> You can drop this function if you allocate DumpState in the stack in
> qmp_dump_guest_memory().

OK

> 
>> +
>> +static int dump_cleanup(DumpState *s)
>> +{
>> +    int ret = 0;
>> +
>> +    memory_mapping_list_free(&s->list);
>> +    s->cleanup(s->opaque);
>> +    if (s->resume) {
>> +        vm_start();
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void dump_error(DumpState *s, const char *reason)
>> +{
>> +    dump_cleanup(s);
>> +}
>> +
>> +static int write_elf64_header(DumpState *s)
>> +{
>> +    Elf64_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
>> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
>> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
>> +                                                   endian);
>> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
>> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
>> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
>> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
>> +                                                     endian);
>> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
>> +    if (s->have_section) {
>> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
>> +
>> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
>> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
>> +                                                         endian);
>> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
>> +    }
>> +
>> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_header(DumpState *s)
>> +{
>> +    Elf32_Ehdr elf_header;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
>> +    memcpy(&elf_header, ELFMAG, 4);
>> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
>> +    elf_header.e_ident[EI_DATA] = endian;
>> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
>> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
>> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
>> +                                                   endian);
>> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
>> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
>> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
>> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
>> +                                                     endian);
>> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
>> +    if (s->have_section) {
>> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
>> +
>> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
>> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
>> +                                                         endian);
>> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
>> +    }
>> +
>> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write elf header.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
>> +    phdr.p_offset = cpu_convert_to_target64(offset, endian);
>> +    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
>> +    }
>> +    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
>> +    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
>> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
>> +                            int phdr_index, target_phys_addr_t offset)
>> +{
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int ret;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
>> +    phdr.p_offset = cpu_convert_to_target32(offset, endian);
>> +    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
>> +    if (offset == -1) {
>> +        phdr.p_filesz = 0;
>> +    } else {
>> +        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
>> +    }
>> +    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
>> +    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
>> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf64_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUArchState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf64_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = gdb_id(env);
>> +        ret = cpu_write_elf64_note(s->f, env, id, offset, s->opaque);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        ret = cpu_write_elf64_qemunote(s->f, env, offset, s->opaque);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write CPU status.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
>> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
>> +    phdr.p_offset = cpu_convert_to_target64(begin, endian);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = cpu_convert_to_target64(*offset - begin, endian);
>> +    phdr.p_memsz = cpu_convert_to_target64(*offset - begin, endian);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf64_Ehdr);
>> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf32_notes(DumpState *s, int phdr_index,
>> +                             target_phys_addr_t *offset)
>> +{
>> +    CPUArchState *env;
>> +    int ret;
>> +    target_phys_addr_t begin = *offset;
>> +    Elf32_Phdr phdr;
>> +    off_t phdr_offset;
>> +    int id;
>> +    int endian = s->dump_info.d_endian;
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        id = gdb_id(env);
>> +        ret = cpu_write_elf32_note(s->f, env, id, offset, s->opaque);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write elf notes.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        ret = cpu_write_elf32_qemunote(s->f, env, offset, s->opaque);
>> +        if (ret < 0) {
>> +            dump_error(s, "dump: failed to write CPU status.\n");
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
>> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
>> +    phdr.p_offset = cpu_convert_to_target32(begin, endian);
>> +    phdr.p_paddr = 0;
>> +    phdr.p_filesz = cpu_convert_to_target32(*offset - begin, endian);
>> +    phdr.p_memsz = cpu_convert_to_target32(*offset - begin, endian);
>> +    phdr.p_vaddr = 0;
>> +
>> +    phdr_offset = sizeof(Elf32_Ehdr);
>> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write program header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int type)
>> +{
>> +    Elf32_Shdr shdr32;
>> +    Elf64_Shdr shdr64;
>> +    int endian = s->dump_info.d_endian;
>> +    int shdr_size;
>> +    void *shdr;
>> +    int ret;
>> +
>> +    if (type == 0) {
>> +        shdr_size = sizeof(Elf32_Shdr);
>> +        memset(&shdr32, 0, shdr_size);
>> +        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
>> +        shdr = &shdr32;
>> +    } else {
>> +        shdr_size = sizeof(Elf64_Shdr);
>> +        memset(&shdr64, 0, shdr_size);
>> +        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
>> +        shdr = &shdr64;
>> +    }
>> +
>> +    ret = s->f(*offset, &shdr, shdr_size, s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to write section header table.\n");
>> +        return -1;
>> +    }
>> +
>> +    *offset += shdr_size;
>> +    return 0;
>> +}
>> +
>> +static int write_data(DumpState *s, void *buf, int length,
>> +                      target_phys_addr_t *offset)
>> +{
>> +    int ret;
>> +
>> +    ret = s->f(*offset, buf, length, s->opaque);
>> +    if (ret < 0) {
>> +        dump_error(s, "dump: failed to save memory.\n");
>> +        return -1;
>> +    }
>> +
>> +    *offset += length;
>> +    return 0;
>> +}
>> +
>> +/* write the memroy to vmcore. 1 page per I/O. */
>> +static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
>> +                        target_phys_addr_t *offset, int64_t *size)
>> +{
>> +    int i, ret;
>> +    int64_t writen_size = 0;
>> +
>> +    *size = block->length - start;
>> +    for (i = 0; i < *size / TARGET_PAGE_SIZE; i++) {
>> +        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
>> +                         TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            *size = writen_size;
>> +            return ret;
>> +        }
>> +
>> +        writen_size += TARGET_PAGE_SIZE;
>> +    }
>> +
>> +    if ((*size % TARGET_PAGE_SIZE) != 0) {
>> +        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
>> +                         *size % TARGET_PAGE_SIZE, offset);
>> +        if (ret < 0) {
>> +            *size = writen_size;
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* get the memory's offset in the vmcore */
>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>> +                                     DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int64_t size_in_block, start;
>> +
>> +    if (s->has_filter) {
>> +        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (s->has_filter) {
>> +            if (block->offset >= s->begin + s->length ||
>> +                block->offset + block->length <= s->begin) {
>> +                /* This block is out of the range */
>> +                continue;
>> +            }
>> +
>> +            if (s->begin <= block->offset) {
>> +                start = block->offset;
>> +            } else {
>> +                start = s->begin;
>> +            }
>> +
>> +            size_in_block = block->length - (start - block->offset);
>> +            if (s->begin + s->length < block->offset + block->length) {
>> +                size_in_block -= block->offset + block->length -
>> +                                 (s->begin + s->length);
>> +            }
>> +        } else {
>> +            start = block->offset;
>> +            size_in_block = block->length;
>> +        }
>> +
>> +        if (phys_addr >= start && phys_addr < start + size_in_block) {
>> +            return phys_addr - start + offset;
>> +        }
>> +
>> +        offset += size_in_block;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +/* write elf header, PT_NOTE and elf note to vmcore. */
>> +static int dump_begin(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    int ret;
>> +
>> +    /*
>> +     * the vmcore's format is:
>> +     *   --------------
>> +     *   |  elf header |
>> +     *   --------------
>> +     *   |  PT_NOTE    |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  ......     |
>> +     *   --------------
>> +     *   |  PT_LOAD    |
>> +     *   --------------
>> +     *   |  sec_hdr    |
>> +     *   --------------
>> +     *   |  elf note   |
>> +     *   --------------
>> +     *   |  memory     |
>> +     *   --------------
>> +     *
>> +     * we only know where the memory is saved after we write elf note into
>> +     * vmcore.
>> +     */
>> +
>> +    /* write elf header to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        ret = write_elf64_header(s);
>> +    } else {
>> +        ret = write_elf32_header(s);
>> +    }
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    /* write elf section and notes to vmcore */
>> +    if (s->dump_info.d_class == ELFCLASS64) {
>> +        if (s->have_section) {
>> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->sh_info;
>> +            if (write_elf_section(s, &offset, 1) < 0) {
>> +                return -1;
>> +            }
>> +        } else {
>> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
>> +        }
>> +        ret = write_elf64_notes(s, 0, &offset);
>> +    } else {
>> +        if (s->have_section) {
>> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->sh_info;
>> +            if (write_elf_section(s, &offset, 0) < 0) {
>> +                return -1;
>> +            }
>> +        } else {
>> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
>> +        }
>> +        ret = write_elf32_notes(s, 0, &offset);
>> +    }
>> +
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    s->memory_offset = offset;
>> +    return 0;
>> +}
>> +
>> +/* write PT_LOAD to vmcore */
>> +static int dump_completed(DumpState *s)
>> +{
>> +    target_phys_addr_t offset;
>> +    MemoryMapping *memory_mapping;
>> +    int phdr_index = 1, ret;
>> +
>> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
>> +        offset = get_offset(memory_mapping->phys_addr, s);
>> +        if (s->dump_info.d_class == ELFCLASS64) {
>> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
>> +        } else {
>> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
>> +        }
>> +        if (ret < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    dump_cleanup(s);
>> +    return 0;
>> +}
>> +
>> +static int get_next_block(DumpState *s, RAMBlock *block)
>> +{
>> +    while (1) {
>> +        block = QLIST_NEXT(block, next);
>> +        if (!block) {
>> +            /* no more block */
>> +            return 1;
>> +        }
>> +
>> +        s->start = 0;
>> +        s->block = block;
>> +        if (s->has_filter) {
>> +            if (block->offset >= s->begin + s->length ||
>> +                block->offset + block->length <= s->begin) {
>> +                /* This block is out of the range */
>> +                continue;
>> +            }
>> +
>> +            if (s->begin > block->offset) {
>> +                s->start = s->begin - block->offset;
>> +            }
>> +        }
>> +
>> +        return 0;
>> +    }
>> +}
>> +
>> +/* write all memory to vmcore */
>> +static int dump_iterate(DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +    target_phys_addr_t offset = s->memory_offset;
>> +    int64_t size;
>> +    int ret;
>> +
>> +    while(1) {
>> +        block = s->block;
>> +        ret = write_memory(s, block, s->start, &offset, &size);
>> +        if (ret == -1) {
>> +            return ret;
>> +        }
>> +
>> +        ret = get_next_block(s, block);
>> +        if (ret == 1) {
>> +            dump_completed(s);
>> +            return 0;
>> +        }
>> +    }
>> +}
>> +
>> +static int create_vmcore(DumpState *s)
>> +{
>> +    int ret;
>> +
>> +    ret = dump_begin(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    ret = dump_iterate(s);
>> +    if (ret < 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static ram_addr_t get_start_block(DumpState *s)
>> +{
>> +    RAMBlock *block;
>> +
>> +    if (!s->has_filter) {
>> +        s->block = QLIST_FIRST(&ram_list.blocks);
>> +        return 0;
>> +    }
>> +
>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>> +        if (block->offset >= s->begin + s->length ||
>> +            block->offset + block->length <= s->begin) {
>> +            /* This block is out of the range */
>> +            continue;
>> +        }
>> +
>> +        s->block = block;
>> +        if (s->begin > block->offset ) {
>> +            s->start = s->begin - block->offset;
>> +        } else {
>> +            s->start = 0;
>> +        }
>> +        return s->start;
>> +    }
>> +
>> +    return -1;
>> +}
>> +
>> +static DumpState *dump_init(bool paging, bool has_filter, int64_t begin,
>> +                            int64_t length, Error **errp)
>> +{
>> +    CPUArchState *env;
>> +    DumpState *s = dump_get_current();
> 
> As I said above, you could drop dump_get_current() and pass 's' as a parameter.

OK

> 
>> +    int ret;
>> +
>> +    if (runstate_is_running()) {
>> +        vm_stop(RUN_STATE_SAVE_VM);
>> +        s->resume = true;
>> +    } else {
>> +        s->resume = false;
>> +    }
>> +
>> +    s->has_filter = has_filter;
>> +    s->begin = begin;
>> +    s->length = length;
>> +    s->start = get_start_block(s);
>> +    if (s->start == -1) {
>> +        error_set(errp, QERR_INVALID_PARAMETER, "begin");
>> +        goto cleanup;
>> +    }
>> +
>> +    /*
>> +     * get dump info: endian, class and architecture.
>> +     * If the target architecture is not supported, cpu_get_dump_info() will
>> +     * return -1.
>> +     *
>> +     * if we use kvm, we should synchronize the register before we get dump
>> +     * info.
>> +     */
>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>> +        cpu_synchronize_state(env);
>> +    }
>> +
>> +    ret = cpu_get_dump_info(&s->dump_info);
>> +    if (ret < 0) {
>> +        error_set(errp, QERR_UNSUPPORTED);
>> +        goto cleanup;
>> +    }
>> +
>> +    /* get memory mapping */
>> +    memory_mapping_list_init(&s->list);
>> +    if (paging) {
>> +        qemu_get_guest_memory_mapping(&s->list);
>> +    } else {
>> +        qemu_get_guest_simple_memory_mapping(&s->list);
>> +    }
>> +
>> +    if (s->has_filter) {
>> +        memory_mapping_filter(&s->list, s->begin, s->length);
>> +    }
>> +
>> +    /*
>> +     * calculate phdr_num
>> +     *
>> +     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
>> +     */
>> +    s->phdr_num = 1; /* PT_NOTE */
>> +    if (s->list.num < UINT16_MAX - 2) {
>> +        s->phdr_num += s->list.num;
>> +        s->have_section = false;
>> +    } else {
>> +        s->have_section = true;
>> +        s->phdr_num = PN_XNUM;
>> +        s->sh_info = 1; /* PT_NOTE */
>> +
>> +        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
>> +        if (s->list.num <= UINT32_MAX -1) {
>> +            s->sh_info += s->list.num;
>> +        } else {
>> +            s->sh_info = UINT32_MAX;
>> +        }
>> +    }
>> +
>> +    return s;
>> +
>> +cleanup:
>> +    if (s->resume) {
>> +        vm_start();
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
>> +                           void *opaque)
>> +{
>> +    int fd = (int)(intptr_t)opaque;
>> +    off_t ret;
>> +    size_t writen_size;
>> +
>> +    while(1) {
>> +        ret = lseek(fd, offset, SEEK_SET);
>> +        if (ret < 0) {
>> +            if (errno != EINTR && errno != EAGAIN) {
> 
> According to the man page, lseek() doesn't return EINTR nor EAGAIN.

Yes, the man page doesn't say this. But in my test, I meet EAGAIN.
If lseek() is interrupted by some signal, I guess it may return EINTR.
So I check the EINTR and EAGAIN here.

> 
> Also, it would be nice to check for ESPIPE and return the proper QMP by
> using error_set(). The error doesn't exist yet, maybe it could be called
> PipeOrSocketFD (look at qerror.[ch] for more examples).

OK. I will look at qerror.[ch].

> 
>> +                return -1;
>> +            }
>> +            continue;
>> +        }
>> +        break;
>> +    }
>> +
>> +    /* The fd may be passed from user, and it can be non-blocked */
>> +    while(size) {
>> +        writen_size = qemu_write_full(fd, buf, size);
>> +        if (writen_size != size && errno != EAGAIN) {
>> +            return -1;
>> +        }
>> +
>> +        buf += writen_size;
>> +        size -= writen_size;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void fd_cleanup(void *opaque)
>> +{
>> +    int fd = (int)(intptr_t)opaque;
>> +
>> +    if (fd != -1) {
>> +        close(fd);
>> +    }
>> +}
>> +
>> +static DumpState *dump_init_fd(int fd, bool paging, bool has_filter,
>> +                               int64_t begin, int64_t length, Error **errp)
>> +{
>> +    DumpState *s = dump_init(paging, has_filter, begin, length, errp);
>> +
>> +    if (s == NULL) {
>> +        return NULL;
>> +    }
>> +
>> +    s->f = fd_write_vmcore;
>> +    s->cleanup = fd_cleanup;
>> +    s->opaque = (void *)(intptr_t)fd;
>> +
>> +    return s;
>> +}
>> +
>> +void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
>> +                           int64_t begin, bool has_length, int64_t length,
>> +                           Error **errp)
>> +{
>> +    const char *p;
>> +    int fd = -1;
>> +    DumpState *s;
>> +
>> +    if (has_begin && !has_length) {
>> +        error_set(errp, QERR_MISSING_PARAMETER, "length");
>> +        return;
>> +    }
>> +    if (!has_begin && has_length) {
>> +        error_set(errp, QERR_MISSING_PARAMETER, "begin");
>> +        return;
>> +    }
>> +
>> +#if !defined(WIN32)
>> +    if (strstart(file, "fd:", &p)) {
>> +        fd = monitor_get_fd(cur_mon, p);
>> +        if (fd == -1) {
>> +            error_set(errp, QERR_FD_NOT_FOUND, p);
>> +            return;
>> +        }
>> +    }
>> +#endif
>> +
>> +    if  (strstart(file, "file:", &p)) {
>> +        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
>> +        if (fd < 0) {
>> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
>> +            return;
>> +        }
>> +    }
>> +
>> +    if (fd == -1) {
>> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
>> +        return;
>> +    }
> 
> s/file/parameter

Hmm, I forgot to update it. 'protocol' is better than parameter.

> 
>> +
>> +    s = dump_init_fd(fd, paging, has_begin, begin, length, errp);
>> +    if (!s) {
>> +        return;
>> +    }
>> +
>> +    if (create_vmcore(s) < 0) {
>> +        error_set(errp, QERR_IO_ERROR);
>> +    }
>> +}
>> diff --git a/elf.h b/elf.h
>> index 2e05d34..6a10657 100644
>> --- a/elf.h
>> +++ b/elf.h
>> @@ -1000,6 +1000,11 @@ typedef struct elf64_sym {
>>  
>>  #define EI_NIDENT	16
>>  
>> +/* Special value for e_phnum.  This indicates that the real number of
>> +   program headers is too large to fit into e_phnum.  Instead the real
>> +   value is in the field sh_info of section 0.  */
>> +#define PN_XNUM         0xffff
>> +
>>  typedef struct elf32_hdr{
>>    unsigned char	e_ident[EI_NIDENT];
>>    Elf32_Half	e_type;
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index bd35a3e..d270fd1 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -879,6 +879,34 @@ server will ask the spice/vnc client to automatically reconnect using the
>>  new parameters (if specified) once the vm migration finished successfully.
>>  ETEXI
>>  
>> +#if defined(CONFIG_HAVE_CORE_DUMP)
>> +    {
>> +        .name       = "dump-guest-memory",
>> +        .args_type  = "paging:-p,protocol:s,begin:i?,length:i?",
>> +        .params     = "[-p] protocol [begin] [length]",
>> +        .help       = "dump guest memory to file"
>> +                      "\n\t\t\t begin(optional): the starting physical address"
>> +                      "\n\t\t\t length(optional): the memory size, in bytes",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd = hmp_dump_guest_memory,
>> +    },
>> +
>> +
>> +STEXI
>> +@item dump-guest-memory [-p] @var{protocol} @var{begin} @var{length}
>> +@findex dump-guest-memory
>> +Dump guest memory to @var{protocol}. The file can be processed with crash or
>> +gdb.
>> +  protocol: destination file(started with "file:") or destination file
>> +            descriptor (started with "fd:")
>> +    paging: do paging to get guest's memory mapping
>> +     begin: the starting physical address. It's optional, and should be
>> +            specified with length together.
>> +    length: the memory size, in bytes. It's optional, and should be specified
>> +            with begin together.
>> +ETEXI
>> +#endif
>> +
>>      {
>>          .name       = "snapshot_blkdev",
>>          .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
>> diff --git a/hmp.c b/hmp.c
>> index 9cf2d13..75ec9a9 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -934,3 +934,25 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
>>          qemu_mod_timer(status->timer, qemu_get_clock_ms(rt_clock));
>>      }
>>  }
>> +
>> +void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
>> +{
>> +    Error *errp = NULL;
>> +    int paging = qdict_get_try_bool(qdict, "paging", 0);
>> +    const char *file = qdict_get_str(qdict, "protocol");
>> +    bool has_begin = qdict_haskey(qdict, "begin");
>> +    bool has_length = qdict_haskey(qdict, "length");
>> +    int64_t begin = 0;
>> +    int64_t length = 0;
>> +
>> +    if (has_begin) {
>> +        begin = qdict_get_int(qdict, "begin");
>> +    }
>> +    if (has_length) {
>> +        length = qdict_get_int(qdict, "length");
>> +    }
>> +
>> +    qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length,
>> +                          &errp);
>> +    hmp_handle_error(mon, &errp);
>> +}
>> diff --git a/hmp.h b/hmp.h
>> index 8807853..36d69c6 100644
>> --- a/hmp.h
>> +++ b/hmp.h
>> @@ -60,5 +60,6 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
>>  void hmp_migrate(Monitor *mon, const QDict *qdict);
>> +void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
>>  
>>  #endif
>> diff --git a/memory_mapping.c b/memory_mapping.c
>> index 9a2ffe6..ff52c9d 100644
>> --- a/memory_mapping.c
>> +++ b/memory_mapping.c
>> @@ -207,3 +207,30 @@ void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
>>          create_new_memory_mapping(list, block->offset, 0, block->length);
>>      }
>>  }
>> +
>> +void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
>> +                           int64_t length)
>> +{
>> +    MemoryMapping *cur, *next;
>> +
>> +    QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
>> +        if (cur->phys_addr >= begin + length ||
>> +            cur->phys_addr + cur->length <= begin) {
>> +            QTAILQ_REMOVE(&list->head, cur, next);
>> +            list->num--;
>> +            continue;
>> +        }
>> +
>> +        if (cur->phys_addr < begin) {
>> +            cur->length -= begin - cur->phys_addr;
>> +            if (cur->virt_addr) {
>> +                cur->virt_addr += begin - cur->phys_addr;
>> +            }
>> +            cur->phys_addr = begin;
>> +        }
>> +
>> +        if (cur->phys_addr + cur->length > begin + length) {
>> +            cur->length -= cur->phys_addr + cur->length - begin - length;
>> +        }
>> +    }
>> +}
>> diff --git a/memory_mapping.h b/memory_mapping.h
>> index a583e44..b795678 100644
>> --- a/memory_mapping.h
>> +++ b/memory_mapping.h
>> @@ -62,4 +62,7 @@ static inline int qemu_get_guest_memory_mapping(MemoryMappingList *list)
>>  /* get guest's memory mapping without do paging(virtual address is 0). */
>>  void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
>>  
>> +void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
>> +                           int64_t length);
>> +
>>  #endif
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index 0d11d6e..b0d885f 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -1701,3 +1701,21 @@
>>  # Since: 1.1
>>  ##
>>  { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
>> +
>> +##
>> +# @dump-guest-memory
>> +#
>> +# Dump guest's memory to vmcore.
>> +#
>> +# @paging: if true, do paging to get guest's memory mapping
>> +# @protocol: the filename or file descriptor of the vmcore.
>> +# @begin: #optional if specified, the starting physical address.
>> +# @length: #optional if specified, the memory size, in bytes.
>> +#
>> +# Returns: nothing on success
>> +#
>> +# Since: 1.1
>> +##
>> +{ 'command': 'dump-guest-memory',
>> +  'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
>> +            '*length': 'int' } }
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index c626ba8..ca4484b 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -606,6 +606,44 @@ Example:
>>  
>>  EQMP
>>  
>> +#if defined(CONFIG_HAVE_CORE_DUMP)
>> +    {
>> +        .name       = "dump-guest-memory",
>> +        .args_type  = "paging:-p,protocol:s,begin:i?,end:i?",
> 
> Please, use b instead of -p.

OK

Thanks
Wen Congyang

> 
>> +        .params     = "[-p] protocol [begin] [length]",
>> +        .help       = "dump guest memory to file",
>> +        .user_print = monitor_user_noop,
>> +        .mhandler.cmd_new = qmp_marshal_input_dump_guest_memory,
>> +    },
>> +
>> +SQMP
>> +dump
>> +
>> +
>> +Dump guest memory to file. The file can be processed with crash or gdb.
>> +
>> +Arguments:
>> +
>> +- "paging": do paging to get guest's memory mapping (json-bool)
>> +- "protocol": destination file(started with "file:") or destination file
>> +              descriptor (started with "fd:") (json-string)
>> +- "begin": the starting physical address. It's optional, and should be specified
>> +           with length together (json-int)
>> +- "length": the memory size, in bytes. It's optional, and should be specified
>> +            with begin together (json-int)
>> +
>> +Example:
>> +
>> +-> { "execute": "dump-guest-memory", "arguments": { "protocol": "fd:dump" } }
>> +<- { "return": {} }
>> +
>> +Notes:
>> +
>> +(1) All boolean arguments default to false
>> +
>> +EQMP
>> +#endif
>> +
>>      {
>>          .name       = "netdev_add",
>>          .args_type  = "netdev:O",
> 
>
Wen Congyang March 26, 2012, 2 a.m. UTC | #9
At 03/24/2012 01:07 AM, Luiz Capitulino Wrote:
> On Fri, 23 Mar 2012 17:06:22 +0900 (   )
> HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com> wrote:
> 
>> From: Wen Congyang <wency@cn.fujitsu.com>
>> Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
>> Date: Tue, 20 Mar 2012 11:57:43 +0800
>>
>> <cut>
>>
>>> +typedef struct DumpState {
>>> +    ArchDumpInfo dump_info;
>>> +    MemoryMappingList list;
>>> +    uint16_t phdr_num;
>>> +    uint32_t sh_info;
>>> +    bool have_section;
>>> +    bool resume;
>>> +    target_phys_addr_t memory_offset;
>>> +    write_core_dump_function f;
>>
>> f() is so general. Type information is meaningless enough, but there's
>> no explicit occurence of the function call of f(). Could you consider
>> renaming?
> 
> Agreed. I actually don't see why this indirection is needed.
> 

Currently, we only support to write to fd. So this indirection isn't needed.
If we want to write the memory to other place, this indirection may be needed.
I will remove it as it isn't needed now. But the code in target-i386/arch_dump.c
will still use write_core_dump_function.

Thanks
Wen Congyang
Hatayama, Daisuke March 26, 2012, 3:22 a.m. UTC | #10
From: Wen Congyang <wency@cn.fujitsu.com>
Subject: Re: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
Date: Mon, 26 Mar 2012 09:39:24 +0800

> At 03/23/2012 05:40 PM, HATAYAMA Daisuke Wrote:
>> From: Wen Congyang <wency@cn.fujitsu.com>
>> Subject: [PATCH 11/11 v10] introduce a new monitor command 'dump-guest-memory' to dump guest's memory
>> Date: Tue, 20 Mar 2012 11:57:43 +0800
>> 
>>> +/* get the memory's offset in the vmcore */
>>> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
>>> +                                     DumpState *s)
>>> +{
>>> +    RAMBlock *block;
>>> +    target_phys_addr_t offset = s->memory_offset;
>>> +    int64_t size_in_block, start;
>>> +
>>> +    if (s->has_filter) {
>>> +        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
>>> +            return -1;
>>> +        }
>>> +    }
>>> +
>>> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
>>> +        if (s->has_filter) {
>>> +            if (block->offset >= s->begin + s->length ||
>>> +                block->offset + block->length <= s->begin) {
>>> +                /* This block is out of the range */
>>> +                continue;
>>> +            }
>>> +
>>> +            if (s->begin <= block->offset) {
>>> +                start = block->offset;
>>> +            } else {
>>> +                start = s->begin;
>>> +            }
>>> +
>>> +            size_in_block = block->length - (start - block->offset);
>>> +            if (s->begin + s->length < block->offset + block->length) {
>>> +                size_in_block -= block->offset + block->length -
>>> +                                 (s->begin + s->length);
>>> +            }
>>> +        } else {
>>> +            start = block->offset;
>>> +            size_in_block = block->length;
>>> +        }
>>> +
>>> +        if (phys_addr >= start && phys_addr < start + size_in_block) {
>>> +            return phys_addr - start + offset;
>>> +        }
>>> +
>>> +        offset += size_in_block;
>>> +    }
>>> +
>>> +    return -1;
>>> +}
>> 
>> OK. -1 is assigned to offset when the corresponding memory is filtered
>> and so not contained in dumpfile. But please give -1 a name. I want
>> the name to tell me the data is filterred.
>> 
>> Also, I couldn't imagine get_offset() does filtering processing. This
>> is important for the purspective of dump because there's data not
>> saved in dumpfile. Could you claify this in some way? By moving
>> filtering processing to the outside, or splitting it into anotehr
>> funciton.
> 
> offset should not be -1. I remove the PT_LOAD in the function memory_mapping_filter().
> In the previous patchset, Luiz said he wants to squash the filtering
> feature to this patch.
> 

I didn't noticed that the patch was moved... _I_ think it should be
presented in a single patch.

Looking at memory_mapping_filter(), PT_LOAD is configured so that
memory that is filtered doesn't exist. I think it better to keep such
mapping with p_filesz == 0 to indicate to users the mapping is
filtered.

Also, get_offset() loops on ram_list.blocks each time it is
called. Because dump_completed() loops memory_mapping list, offset can
be calculated incrementally.

Thanks.
HATAYAMA, Daisuke
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index c3aa5d7..0e179c9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -219,7 +219,7 @@  obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
 obj-y += memory_mapping.o
-obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/dump.c b/dump.c
new file mode 100644
index 0000000..917bccc
--- /dev/null
+++ b/dump.c
@@ -0,0 +1,841 @@ 
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include <unistd.h>
+#include "elf.h"
+#include <sys/procfs.h>
+#include <glib.h>
+#include "cpu.h"
+#include "cpu-all.h"
+#include "targphys.h"
+#include "monitor.h"
+#include "kvm.h"
+#include "dump.h"
+#include "sysemu.h"
+#include "bswap.h"
+#include "memory_mapping.h"
+#include "error.h"
+#include "qmp-commands.h"
+#include "gdbstub.h"
+
+static inline uint16_t cpu_convert_to_target16(uint16_t val, int endian)
+{
+    if (endian == ELFDATA2LSB) {
+        val = cpu_to_le16(val);
+    } else {
+        val = cpu_to_be16(val);
+    }
+
+    return val;
+}
+
+static inline uint32_t cpu_convert_to_target32(uint32_t val, int endian)
+{
+    if (endian == ELFDATA2LSB) {
+        val = cpu_to_le32(val);
+    } else {
+        val = cpu_to_be32(val);
+    }
+
+    return val;
+}
+
+static inline uint64_t cpu_convert_to_target64(uint64_t val, int endian)
+{
+    if (endian == ELFDATA2LSB) {
+        val = cpu_to_le64(val);
+    } else {
+        val = cpu_to_be64(val);
+    }
+
+    return val;
+}
+
+typedef struct DumpState {
+    ArchDumpInfo dump_info;
+    MemoryMappingList list;
+    uint16_t phdr_num;
+    uint32_t sh_info;
+    bool have_section;
+    bool resume;
+    target_phys_addr_t memory_offset;
+    write_core_dump_function f;
+    void (*cleanup)(void *opaque);
+    void *opaque;
+
+    RAMBlock *block;
+    ram_addr_t start;
+    bool has_filter;
+    int64_t begin;
+    int64_t length;
+} DumpState;
+
+static DumpState *dump_get_current(void)
+{
+    static DumpState current_dump;
+
+    return &current_dump;
+}
+
+static int dump_cleanup(DumpState *s)
+{
+    int ret = 0;
+
+    memory_mapping_list_free(&s->list);
+    s->cleanup(s->opaque);
+    if (s->resume) {
+        vm_start();
+    }
+
+    return ret;
+}
+
+static void dump_error(DumpState *s, const char *reason)
+{
+    dump_cleanup(s);
+}
+
+static int write_elf64_header(DumpState *s)
+{
+    Elf64_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
+    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
+    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
+                                                   endian);
+    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
+    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
+    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
+    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
+                                                     endian);
+    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
+    if (s->have_section) {
+        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
+
+        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
+        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
+                                                         endian);
+        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
+    }
+
+    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_header(DumpState *s)
+{
+    Elf32_Ehdr elf_header;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
+    memcpy(&elf_header, ELFMAG, 4);
+    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
+    elf_header.e_ident[EI_DATA] = endian;
+    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
+    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
+                                                   endian);
+    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
+    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
+    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
+    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
+                                                     endian);
+    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
+    if (s->have_section) {
+        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
+
+        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
+        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
+                                                         endian);
+        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
+    }
+
+    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write elf header.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
+    phdr.p_offset = cpu_convert_to_target64(offset, endian);
+    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
+    }
+    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
+    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
+
+    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
+    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
+                            int phdr_index, target_phys_addr_t offset)
+{
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int ret;
+    int endian = s->dump_info.d_endian;
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
+    phdr.p_offset = cpu_convert_to_target32(offset, endian);
+    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
+    if (offset == -1) {
+        phdr.p_filesz = 0;
+    } else {
+        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
+    }
+    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
+    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
+
+    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
+    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf64_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUArchState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf64_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = gdb_id(env);
+        ret = cpu_write_elf64_note(s->f, env, id, offset, s->opaque);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        ret = cpu_write_elf64_qemunote(s->f, env, offset, s->opaque);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write CPU status.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf64_Phdr));
+    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
+    phdr.p_offset = cpu_convert_to_target64(begin, endian);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = cpu_convert_to_target64(*offset - begin, endian);
+    phdr.p_memsz = cpu_convert_to_target64(*offset - begin, endian);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf64_Ehdr);
+    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf32_notes(DumpState *s, int phdr_index,
+                             target_phys_addr_t *offset)
+{
+    CPUArchState *env;
+    int ret;
+    target_phys_addr_t begin = *offset;
+    Elf32_Phdr phdr;
+    off_t phdr_offset;
+    int id;
+    int endian = s->dump_info.d_endian;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        id = gdb_id(env);
+        ret = cpu_write_elf32_note(s->f, env, id, offset, s->opaque);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write elf notes.\n");
+            return -1;
+        }
+    }
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        ret = cpu_write_elf32_qemunote(s->f, env, offset, s->opaque);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to write CPU status.\n");
+            return -1;
+        }
+    }
+
+    memset(&phdr, 0, sizeof(Elf32_Phdr));
+    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
+    phdr.p_offset = cpu_convert_to_target32(begin, endian);
+    phdr.p_paddr = 0;
+    phdr.p_filesz = cpu_convert_to_target32(*offset - begin, endian);
+    phdr.p_memsz = cpu_convert_to_target32(*offset - begin, endian);
+    phdr.p_vaddr = 0;
+
+    phdr_offset = sizeof(Elf32_Ehdr);
+    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write program header table.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int type)
+{
+    Elf32_Shdr shdr32;
+    Elf64_Shdr shdr64;
+    int endian = s->dump_info.d_endian;
+    int shdr_size;
+    void *shdr;
+    int ret;
+
+    if (type == 0) {
+        shdr_size = sizeof(Elf32_Shdr);
+        memset(&shdr32, 0, shdr_size);
+        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
+        shdr = &shdr32;
+    } else {
+        shdr_size = sizeof(Elf64_Shdr);
+        memset(&shdr64, 0, shdr_size);
+        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
+        shdr = &shdr64;
+    }
+
+    ret = s->f(*offset, &shdr, shdr_size, s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to write section header table.\n");
+        return -1;
+    }
+
+    *offset += shdr_size;
+    return 0;
+}
+
+static int write_data(DumpState *s, void *buf, int length,
+                      target_phys_addr_t *offset)
+{
+    int ret;
+
+    ret = s->f(*offset, buf, length, s->opaque);
+    if (ret < 0) {
+        dump_error(s, "dump: failed to save memory.\n");
+        return -1;
+    }
+
+    *offset += length;
+    return 0;
+}
+
+/* write the memroy to vmcore. 1 page per I/O. */
+static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
+                        target_phys_addr_t *offset, int64_t *size)
+{
+    int i, ret;
+    int64_t writen_size = 0;
+
+    *size = block->length - start;
+    for (i = 0; i < *size / TARGET_PAGE_SIZE; i++) {
+        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
+                         TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            *size = writen_size;
+            return ret;
+        }
+
+        writen_size += TARGET_PAGE_SIZE;
+    }
+
+    if ((*size % TARGET_PAGE_SIZE) != 0) {
+        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
+                         *size % TARGET_PAGE_SIZE, offset);
+        if (ret < 0) {
+            *size = writen_size;
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/* get the memory's offset in the vmcore */
+static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
+                                     DumpState *s)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = s->memory_offset;
+    int64_t size_in_block, start;
+
+    if (s->has_filter) {
+        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
+            return -1;
+        }
+    }
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (s->has_filter) {
+            if (block->offset >= s->begin + s->length ||
+                block->offset + block->length <= s->begin) {
+                /* This block is out of the range */
+                continue;
+            }
+
+            if (s->begin <= block->offset) {
+                start = block->offset;
+            } else {
+                start = s->begin;
+            }
+
+            size_in_block = block->length - (start - block->offset);
+            if (s->begin + s->length < block->offset + block->length) {
+                size_in_block -= block->offset + block->length -
+                                 (s->begin + s->length);
+            }
+        } else {
+            start = block->offset;
+            size_in_block = block->length;
+        }
+
+        if (phys_addr >= start && phys_addr < start + size_in_block) {
+            return phys_addr - start + offset;
+        }
+
+        offset += size_in_block;
+    }
+
+    return -1;
+}
+
+/* write elf header, PT_NOTE and elf note to vmcore. */
+static int dump_begin(DumpState *s)
+{
+    target_phys_addr_t offset;
+    int ret;
+
+    /*
+     * the vmcore's format is:
+     *   --------------
+     *   |  elf header |
+     *   --------------
+     *   |  PT_NOTE    |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  ......     |
+     *   --------------
+     *   |  PT_LOAD    |
+     *   --------------
+     *   |  sec_hdr    |
+     *   --------------
+     *   |  elf note   |
+     *   --------------
+     *   |  memory     |
+     *   --------------
+     *
+     * we only know where the memory is saved after we write elf note into
+     * vmcore.
+     */
+
+    /* write elf header to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        ret = write_elf64_header(s);
+    } else {
+        ret = write_elf32_header(s);
+    }
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* write elf section and notes to vmcore */
+    if (s->dump_info.d_class == ELFCLASS64) {
+        if (s->have_section) {
+            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->sh_info;
+            if (write_elf_section(s, &offset, 1) < 0) {
+                return -1;
+            }
+        } else {
+            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
+        }
+        ret = write_elf64_notes(s, 0, &offset);
+    } else {
+        if (s->have_section) {
+            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->sh_info;
+            if (write_elf_section(s, &offset, 0) < 0) {
+                return -1;
+            }
+        } else {
+            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
+        }
+        ret = write_elf32_notes(s, 0, &offset);
+    }
+
+    if (ret < 0) {
+        return -1;
+    }
+
+    s->memory_offset = offset;
+    return 0;
+}
+
+/* write PT_LOAD to vmcore */
+static int dump_completed(DumpState *s)
+{
+    target_phys_addr_t offset;
+    MemoryMapping *memory_mapping;
+    int phdr_index = 1, ret;
+
+    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
+        offset = get_offset(memory_mapping->phys_addr, s);
+        if (s->dump_info.d_class == ELFCLASS64) {
+            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
+        } else {
+            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
+        }
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    dump_cleanup(s);
+    return 0;
+}
+
+static int get_next_block(DumpState *s, RAMBlock *block)
+{
+    while (1) {
+        block = QLIST_NEXT(block, next);
+        if (!block) {
+            /* no more block */
+            return 1;
+        }
+
+        s->start = 0;
+        s->block = block;
+        if (s->has_filter) {
+            if (block->offset >= s->begin + s->length ||
+                block->offset + block->length <= s->begin) {
+                /* This block is out of the range */
+                continue;
+            }
+
+            if (s->begin > block->offset) {
+                s->start = s->begin - block->offset;
+            }
+        }
+
+        return 0;
+    }
+}
+
+/* write all memory to vmcore */
+static int dump_iterate(DumpState *s)
+{
+    RAMBlock *block;
+    target_phys_addr_t offset = s->memory_offset;
+    int64_t size;
+    int ret;
+
+    while(1) {
+        block = s->block;
+        ret = write_memory(s, block, s->start, &offset, &size);
+        if (ret == -1) {
+            return ret;
+        }
+
+        ret = get_next_block(s, block);
+        if (ret == 1) {
+            dump_completed(s);
+            return 0;
+        }
+    }
+}
+
+static int create_vmcore(DumpState *s)
+{
+    int ret;
+
+    ret = dump_begin(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    ret = dump_iterate(s);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static ram_addr_t get_start_block(DumpState *s)
+{
+    RAMBlock *block;
+
+    if (!s->has_filter) {
+        s->block = QLIST_FIRST(&ram_list.blocks);
+        return 0;
+    }
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (block->offset >= s->begin + s->length ||
+            block->offset + block->length <= s->begin) {
+            /* This block is out of the range */
+            continue;
+        }
+
+        s->block = block;
+        if (s->begin > block->offset ) {
+            s->start = s->begin - block->offset;
+        } else {
+            s->start = 0;
+        }
+        return s->start;
+    }
+
+    return -1;
+}
+
+static DumpState *dump_init(bool paging, bool has_filter, int64_t begin,
+                            int64_t length, Error **errp)
+{
+    CPUArchState *env;
+    DumpState *s = dump_get_current();
+    int ret;
+
+    if (runstate_is_running()) {
+        vm_stop(RUN_STATE_SAVE_VM);
+        s->resume = true;
+    } else {
+        s->resume = false;
+    }
+
+    s->has_filter = has_filter;
+    s->begin = begin;
+    s->length = length;
+    s->start = get_start_block(s);
+    if (s->start == -1) {
+        error_set(errp, QERR_INVALID_PARAMETER, "begin");
+        goto cleanup;
+    }
+
+    /*
+     * get dump info: endian, class and architecture.
+     * If the target architecture is not supported, cpu_get_dump_info() will
+     * return -1.
+     *
+     * if we use kvm, we should synchronize the register before we get dump
+     * info.
+     */
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        cpu_synchronize_state(env);
+    }
+
+    ret = cpu_get_dump_info(&s->dump_info);
+    if (ret < 0) {
+        error_set(errp, QERR_UNSUPPORTED);
+        goto cleanup;
+    }
+
+    /* get memory mapping */
+    memory_mapping_list_init(&s->list);
+    if (paging) {
+        qemu_get_guest_memory_mapping(&s->list);
+    } else {
+        qemu_get_guest_simple_memory_mapping(&s->list);
+    }
+
+    if (s->has_filter) {
+        memory_mapping_filter(&s->list, s->begin, s->length);
+    }
+
+    /*
+     * calculate phdr_num
+     *
+     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
+     */
+    s->phdr_num = 1; /* PT_NOTE */
+    if (s->list.num < UINT16_MAX - 2) {
+        s->phdr_num += s->list.num;
+        s->have_section = false;
+    } else {
+        s->have_section = true;
+        s->phdr_num = PN_XNUM;
+        s->sh_info = 1; /* PT_NOTE */
+
+        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
+        if (s->list.num <= UINT32_MAX -1) {
+            s->sh_info += s->list.num;
+        } else {
+            s->sh_info = UINT32_MAX;
+        }
+    }
+
+    return s;
+
+cleanup:
+    if (s->resume) {
+        vm_start();
+    }
+
+    return NULL;
+}
+
+static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
+                           void *opaque)
+{
+    int fd = (int)(intptr_t)opaque;
+    off_t ret;
+    size_t writen_size;
+
+    while(1) {
+        ret = lseek(fd, offset, SEEK_SET);
+        if (ret < 0) {
+            if (errno != EINTR && errno != EAGAIN) {
+                return -1;
+            }
+            continue;
+        }
+        break;
+    }
+
+    /* The fd may be passed from user, and it can be non-blocked */
+    while(size) {
+        writen_size = qemu_write_full(fd, buf, size);
+        if (writen_size != size && errno != EAGAIN) {
+            return -1;
+        }
+
+        buf += writen_size;
+        size -= writen_size;
+    }
+
+    return 0;
+}
+
+static void fd_cleanup(void *opaque)
+{
+    int fd = (int)(intptr_t)opaque;
+
+    if (fd != -1) {
+        close(fd);
+    }
+}
+
+static DumpState *dump_init_fd(int fd, bool paging, bool has_filter,
+                               int64_t begin, int64_t length, Error **errp)
+{
+    DumpState *s = dump_init(paging, has_filter, begin, length, errp);
+
+    if (s == NULL) {
+        return NULL;
+    }
+
+    s->f = fd_write_vmcore;
+    s->cleanup = fd_cleanup;
+    s->opaque = (void *)(intptr_t)fd;
+
+    return s;
+}
+
+void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
+                           int64_t begin, bool has_length, int64_t length,
+                           Error **errp)
+{
+    const char *p;
+    int fd = -1;
+    DumpState *s;
+
+    if (has_begin && !has_length) {
+        error_set(errp, QERR_MISSING_PARAMETER, "length");
+        return;
+    }
+    if (!has_begin && has_length) {
+        error_set(errp, QERR_MISSING_PARAMETER, "begin");
+        return;
+    }
+
+#if !defined(WIN32)
+    if (strstart(file, "fd:", &p)) {
+        fd = monitor_get_fd(cur_mon, p);
+        if (fd == -1) {
+            error_set(errp, QERR_FD_NOT_FOUND, p);
+            return;
+        }
+    }
+#endif
+
+    if  (strstart(file, "file:", &p)) {
+        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
+        if (fd < 0) {
+            error_set(errp, QERR_OPEN_FILE_FAILED, p);
+            return;
+        }
+    }
+
+    if (fd == -1) {
+        error_set(errp, QERR_INVALID_PARAMETER, "file");
+        return;
+    }
+
+    s = dump_init_fd(fd, paging, has_begin, begin, length, errp);
+    if (!s) {
+        return;
+    }
+
+    if (create_vmcore(s) < 0) {
+        error_set(errp, QERR_IO_ERROR);
+    }
+}
diff --git a/elf.h b/elf.h
index 2e05d34..6a10657 100644
--- a/elf.h
+++ b/elf.h
@@ -1000,6 +1000,11 @@  typedef struct elf64_sym {
 
 #define EI_NIDENT	16
 
+/* Special value for e_phnum.  This indicates that the real number of
+   program headers is too large to fit into e_phnum.  Instead the real
+   value is in the field sh_info of section 0.  */
+#define PN_XNUM         0xffff
+
 typedef struct elf32_hdr{
   unsigned char	e_ident[EI_NIDENT];
   Elf32_Half	e_type;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index bd35a3e..d270fd1 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -879,6 +879,34 @@  server will ask the spice/vnc client to automatically reconnect using the
 new parameters (if specified) once the vm migration finished successfully.
 ETEXI
 
+#if defined(CONFIG_HAVE_CORE_DUMP)
+    {
+        .name       = "dump-guest-memory",
+        .args_type  = "paging:-p,protocol:s,begin:i?,length:i?",
+        .params     = "[-p] protocol [begin] [length]",
+        .help       = "dump guest memory to file"
+                      "\n\t\t\t begin(optional): the starting physical address"
+                      "\n\t\t\t length(optional): the memory size, in bytes",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd = hmp_dump_guest_memory,
+    },
+
+
+STEXI
+@item dump-guest-memory [-p] @var{protocol} @var{begin} @var{length}
+@findex dump-guest-memory
+Dump guest memory to @var{protocol}. The file can be processed with crash or
+gdb.
+  protocol: destination file(started with "file:") or destination file
+            descriptor (started with "fd:")
+    paging: do paging to get guest's memory mapping
+     begin: the starting physical address. It's optional, and should be
+            specified with length together.
+    length: the memory size, in bytes. It's optional, and should be specified
+            with begin together.
+ETEXI
+#endif
+
     {
         .name       = "snapshot_blkdev",
         .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
diff --git a/hmp.c b/hmp.c
index 9cf2d13..75ec9a9 100644
--- a/hmp.c
+++ b/hmp.c
@@ -934,3 +934,25 @@  void hmp_migrate(Monitor *mon, const QDict *qdict)
         qemu_mod_timer(status->timer, qemu_get_clock_ms(rt_clock));
     }
 }
+
+void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    int paging = qdict_get_try_bool(qdict, "paging", 0);
+    const char *file = qdict_get_str(qdict, "protocol");
+    bool has_begin = qdict_haskey(qdict, "begin");
+    bool has_length = qdict_haskey(qdict, "length");
+    int64_t begin = 0;
+    int64_t length = 0;
+
+    if (has_begin) {
+        begin = qdict_get_int(qdict, "begin");
+    }
+    if (has_length) {
+        length = qdict_get_int(qdict, "length");
+    }
+
+    qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length,
+                          &errp);
+    hmp_handle_error(mon, &errp);
+}
diff --git a/hmp.h b/hmp.h
index 8807853..36d69c6 100644
--- a/hmp.h
+++ b/hmp.h
@@ -60,5 +60,6 @@  void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
+void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/memory_mapping.c b/memory_mapping.c
index 9a2ffe6..ff52c9d 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -207,3 +207,30 @@  void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
         create_new_memory_mapping(list, block->offset, 0, block->length);
     }
 }
+
+void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
+                           int64_t length)
+{
+    MemoryMapping *cur, *next;
+
+    QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
+        if (cur->phys_addr >= begin + length ||
+            cur->phys_addr + cur->length <= begin) {
+            QTAILQ_REMOVE(&list->head, cur, next);
+            list->num--;
+            continue;
+        }
+
+        if (cur->phys_addr < begin) {
+            cur->length -= begin - cur->phys_addr;
+            if (cur->virt_addr) {
+                cur->virt_addr += begin - cur->phys_addr;
+            }
+            cur->phys_addr = begin;
+        }
+
+        if (cur->phys_addr + cur->length > begin + length) {
+            cur->length -= cur->phys_addr + cur->length - begin - length;
+        }
+    }
+}
diff --git a/memory_mapping.h b/memory_mapping.h
index a583e44..b795678 100644
--- a/memory_mapping.h
+++ b/memory_mapping.h
@@ -62,4 +62,7 @@  static inline int qemu_get_guest_memory_mapping(MemoryMappingList *list)
 /* get guest's memory mapping without do paging(virtual address is 0). */
 void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
 
+void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
+                           int64_t length);
+
 #endif
diff --git a/qapi-schema.json b/qapi-schema.json
index 0d11d6e..b0d885f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1701,3 +1701,21 @@ 
 # Since: 1.1
 ##
 { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
+
+##
+# @dump-guest-memory
+#
+# Dump guest's memory to vmcore.
+#
+# @paging: if true, do paging to get guest's memory mapping
+# @protocol: the filename or file descriptor of the vmcore.
+# @begin: #optional if specified, the starting physical address.
+# @length: #optional if specified, the memory size, in bytes.
+#
+# Returns: nothing on success
+#
+# Since: 1.1
+##
+{ 'command': 'dump-guest-memory',
+  'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
+            '*length': 'int' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index c626ba8..ca4484b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -606,6 +606,44 @@  Example:
 
 EQMP
 
+#if defined(CONFIG_HAVE_CORE_DUMP)
+    {
+        .name       = "dump-guest-memory",
+        .args_type  = "paging:-p,protocol:s,begin:i?,end:i?",
+        .params     = "[-p] protocol [begin] [length]",
+        .help       = "dump guest memory to file",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = qmp_marshal_input_dump_guest_memory,
+    },
+
+SQMP
+dump
+
+
+Dump guest memory to file. The file can be processed with crash or gdb.
+
+Arguments:
+
+- "paging": do paging to get guest's memory mapping (json-bool)
+- "protocol": destination file(started with "file:") or destination file
+              descriptor (started with "fd:") (json-string)
+- "begin": the starting physical address. It's optional, and should be specified
+           with length together (json-int)
+- "length": the memory size, in bytes. It's optional, and should be specified
+            with begin together (json-int)
+
+Example:
+
+-> { "execute": "dump-guest-memory", "arguments": { "protocol": "fd:dump" } }
+<- { "return": {} }
+
+Notes:
+
+(1) All boolean arguments default to false
+
+EQMP
+#endif
+
     {
         .name       = "netdev_add",
         .args_type  = "netdev:O",