Patchwork [v6,06/11] dump: add API to write dump header

login
register
mail settings
Submitter Qiao Nuohan
Date Jan. 5, 2014, 7:27 a.m.
Message ID <1388906864-1083-7-git-send-email-qiaonuohan@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/306921/
State New
Headers show

Comments

Qiao Nuohan - Jan. 5, 2014, 7:27 a.m.
the functions are used to write header of kdump-compressed format to vmcore.
Header of kdump-compressed format includes:
1. common header: DiskDumpHeader32 / DiskDumpHeader64
2. sub header: KdumpSubHeader32 / KdumpSubHeader64
3. extra information: only elf notes here

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 dump.c                |  199 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/sysemu/dump.h |   96 ++++++++++++++++++++++++
 2 files changed, 295 insertions(+), 0 deletions(-)
Laszlo Ersek - Jan. 7, 2014, 11:38 a.m.
comments below

On 01/05/14 08:27, Qiao Nuohan wrote:
> the functions are used to write header of kdump-compressed format to vmcore.
> Header of kdump-compressed format includes:
> 1. common header: DiskDumpHeader32 / DiskDumpHeader64
> 2. sub header: KdumpSubHeader32 / KdumpSubHeader64
> 3. extra information: only elf notes here
> 
> Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
> ---
>  dump.c                |  199 +++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sysemu/dump.h |   96 ++++++++++++++++++++++++
>  2 files changed, 295 insertions(+), 0 deletions(-)
> 
> diff --git a/dump.c b/dump.c
> index 3b9cf00..e3623b9 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -77,8 +77,16 @@ typedef struct DumpState {
>      int64_t length;
>      Error **errp;
>  
> +    bool flag_flatten;
> +    uint32_t nr_cpus;
> +    size_t page_size;
> +    uint64_t max_mapnr;
> +    size_t len_dump_bitmap;
>      void *note_buf;
>      size_t note_buf_offset;
> +    off_t offset_dump_bitmap;
> +    off_t offset_page;
> +    uint32_t flag_compress;
>  } DumpState;
>  
>  static int dump_cleanup(DumpState *s)
> @@ -773,6 +781,197 @@ static int buf_write_note(void *buf, size_t size, void *opaque)
>      return 0;
>  }
>  
> +/* write common header, sub header and elf note to vmcore */
> +static int create_header32(DumpState *s)
> +{
> +    int ret = 0;
> +    DiskDumpHeader32 *dh = NULL;
> +    KdumpSubHeader32 *kh = NULL;
> +    size_t size;
> +
> +    /* write common header, the version of kdump-compressed format is 5th */

I think this is a typo (it should say 6th, shouldn't it?), but it's not
critical.

> +    size = sizeof(DiskDumpHeader32);
> +    dh = g_malloc0(size);
> +
> +    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
> +    dh->header_version = 6;
> +    dh->block_size = s->page_size;
> +    dh->sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
> +    dh->sub_hdr_size = DIV_ROUND_UP(dh->sub_hdr_size, dh->block_size);
> +    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
> +    dh->max_mapnr = MIN(s->max_mapnr, UINT_MAX);

You could have simply converted / truncated "s->max_mapnr" to uint32_t
as part of the assignment, but the MIN(..., UINT_MAX) doesn't hurt either.

> +    dh->nr_cpus = s->nr_cpus;
> +    dh->bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, dh->block_size) * 2;
> +    memcpy(&(dh->utsname.machine), "i686", 4);
> +
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
> +        dh->status |= DUMP_DH_COMPRESSED_ZLIB;
> +    }
> +#ifdef CONFIG_LZO
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
> +        dh->status |= DUMP_DH_COMPRESSED_LZO;
> +    }
> +#endif
> +#ifdef CONFIG_SNAPPY
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
> +        dh->status |= DUMP_DH_COMPRESSED_SNAPPY;
> +    }
> +#endif
> +
> +    if (write_buffer(s->fd, s->flag_flatten, 0, dh, size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }

The following fields in "dh" are left zero-filled:
- timestamp
- total_ram_blocks
- device_blocks
- written_blocks
- current_cpu

I guess we'll either overwrite them later or it's OK to leave them all
zeroed.

Also... is it OK to write these fields to the file in host native byte
order? What happens if an i686 / x86_64 target is emulated on a BE host?

> +
> +    /* write sub header */
> +    size = sizeof(KdumpSubHeader32);
> +    kh = g_malloc0(size);
> +
> +    /* 64bit max_mapnr_64 */
> +    kh->max_mapnr_64 = s->max_mapnr;
> +    kh->phys_base = PHYS_BASE;
> +    kh->dump_level = DUMP_LEVEL;
> +
> +    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size + size;
> +    kh->note_size = s->note_size;
> +
> +    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh, size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }

- Same question about endianness as above.

- Again, many fields left zeroed in "kh", but I guess that's OK.

- I would prefer if you repeated the multiplication by
DISKDUMP_HEADER_BLOCKS verbatim in the "offset" write_buffer() argument.

- When this write_buffer() is directed to a regular file in non-flat
mode, then the file might become sparse (you jump over a range of
offsets with lseek() in write_buffer()). If the output has been opened
by qemu itself (ie. "file:....", in qmp_dump_guest_memory()), then due
to the O_TRUNC we can't seek over preexistent data (and keep garbage in
the file). When libvirt pre-opens the file (to send over the fd later),
in doCoreDump(), it also passes O_TRUNC. OK.

> +
> +    /* write note */
> +    s->note_buf = g_malloc(s->note_size);
> +    s->note_buf_offset = 0;
> +
> +    /* use s->note_buf to store notes temporarily */
> +    if (write_elf32_notes(buf_write_note, s) < 0) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    if (write_buffer(s->fd, s->flag_flatten, kh->offset_note, s->note_buf,
> +                     s->note_size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }

Right before the write_buffer() call, we know that

  s->note_buf_offset <= s->note_size

because buf_write_note() ensures it.

We know even that

  s->note_buf_offset == s->note_size

there, because write_elf32_notes() produces exactly as many bytes as
we've calculated in advance, in cpu_get_note_size().

However, this is not very easy to see, hence I'd prefer adding *one* of
the following three:
- an assert() before write_buffer() that states the above equality, or
- passing "s->note_buf_offset" to write_buffer() as "size" argument, or
- allocating "s->note_buf" with g_malloc0().

(The 64-bit version below goes with the g_malloc0() choice, which BTW
makes the two versions a bit inconsistent currently.)

> +
> +    /* get offset of dump_bitmap */
> +    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) *
> +                             dh->block_size;

So, DISKDUMP_HEADER_BLOCKS covers "dh", and "dh->sub_hdr_size" covers
KdumpSubHeader32 plus the note. Seems OK.

> +
> +    /* get offset of page */
> +    s->offset_page = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size +
> +                      dh->bitmap_blocks) * dh->block_size;

Seems OK too. I guess we'll use these fields later.

Both of these multiplications are done in "unsigned int" (uint32_t)
before converting the product to "off_t".

Are you sure even the second one will fit? "dh->bitmap_blocks" is the
interesting addend. 1 byte in one bitmap covers to 8*4K = 32K bytes
guest RAM. We have two bitmaps. So, if we had close to 4G bytes in the
two bitmaps together (close to 2G bytes in each), we could cover close
to 64 TB guest RAM without overflowing the multiplication. Seems sufficient.


> +
> +out:
> +    g_free(dh);
> +    g_free(kh);
> +    g_free(s->note_buf);
> +
> +    return ret;
> +}
> +
> +/* write common header, sub header and elf note to vmcore */
> +static int create_header64(DumpState *s)
> +{
> +    int ret = 0;
> +    DiskDumpHeader64 *dh = NULL;
> +    KdumpSubHeader64 *kh = NULL;
> +    size_t size;
> +
> +    /* write common header, the version of kdump-compressed format is 5th */
> +    size = sizeof(DiskDumpHeader64);
> +    dh = g_malloc0(size);
> +
> +    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
> +    dh->header_version = 6;
> +    dh->block_size = s->page_size;
> +    dh->sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
> +    dh->sub_hdr_size = DIV_ROUND_UP(dh->sub_hdr_size, dh->block_size);
> +    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
> +    dh->max_mapnr = MIN(s->max_mapnr, UINT_MAX);
> +    dh->nr_cpus = s->nr_cpus;
> +    dh->bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, dh->block_size) * 2;
> +    memcpy(&(dh->utsname.machine), "x86_64", 6);
> +
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
> +        dh->status |= DUMP_DH_COMPRESSED_ZLIB;
> +    }
> +#ifdef CONFIG_LZO
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
> +        dh->status |= DUMP_DH_COMPRESSED_LZO;
> +    }
> +#endif
> +#ifdef CONFIG_SNAPPY
> +    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
> +        dh->status |= DUMP_DH_COMPRESSED_SNAPPY;
> +    }
> +#endif
> +
> +    if (write_buffer(s->fd, s->flag_flatten, 0, dh, size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    /* write sub header */
> +    size = sizeof(KdumpSubHeader64);
> +    kh = g_malloc0(size);
> +
> +    /* 64bit max_mapnr_64 */
> +    kh->max_mapnr_64 = s->max_mapnr;
> +    kh->phys_base = PHYS_BASE;
> +    kh->dump_level = DUMP_LEVEL;
> +
> +    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size + size;
> +    kh->note_size = s->note_size;
> +
> +    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh, size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    /* write note */
> +    s->note_buf = g_malloc0(s->note_size);
> +    s->note_buf_offset = 0;
> +
> +    /* use s->note_buf to store notes temporarily */
> +    if (write_elf64_notes(buf_write_note, s) < 0) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    if (write_buffer(s->fd, s->flag_flatten, kh->offset_note, s->note_buf,
> +                     s->note_size) < 0) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    /* get offset of dump_bitmap */
> +    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) *
> +                             dh->block_size;
> +
> +    /* get offset of page */
> +    s->offset_page = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size +
> +                      dh->bitmap_blocks) * dh->block_size;
> +
> +out:
> +    g_free(dh);
> +    g_free(kh);
> +    g_free(s->note_buf);
> +
> +    return ret;
> +}

I diffed this against the 32-bit version of the function, and the only
"suprising" difference is

-+    s->note_buf = g_malloc(s->note_size);
++    s->note_buf = g_malloc0(s->note_size);

which I already mentioned above.


I couldn't find anything in this patch that I'd call a direct bug. I
think you can address what you want from the above later too.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Andreas Färber - Jan. 7, 2014, 11:49 a.m.
Am 07.01.2014 12:38, schrieb Laszlo Ersek:
> Also... is it OK to write these fields to the file in host native byte
> order? What happens if an i686 / x86_64 target is emulated on a BE host?

For the target-s390x implementation Alex required to take care of
endianness for the s390x-on-x86 case, so it would probably make sense to
handle it for kdump format as well. Thanks for bringing it up!

BTW my original concerns related to CPU seem to have been addressed, so
I'm hoping this can go via the QMP queue once ready.

Regards,
Andreas
Qiao Nuohan - Jan. 13, 2014, 10:03 a.m.
Sorry for responsing late.

On 01/07/2014 07:38 PM, Laszlo Ersek wrote:
> The following fields in "dh" are left zero-filled:
> - timestamp
> - total_ram_blocks
> - device_blocks
> - written_blocks
> - current_cpu
>
> I guess we'll either overwrite them later or it's OK to leave them all
> zeroed.

Yes, they are leaved all zeroed here. Tools, like crash will get exact data
from dumped memory.

>
> Also... is it OK to write these fields to the file in host native byte
> order? What happens if an i686 / x86_64 target is emulated on a BE host?

I will add convert work in v7.

>
>> >  +
>> >  +    /* write sub header */
>> >  +    size = sizeof(KdumpSubHeader32);
>> >  +    kh = g_malloc0(size);
>> >  +
>> >  +    /* 64bit max_mapnr_64 */
>> >  +    kh->max_mapnr_64 = s->max_mapnr;
>> >  +    kh->phys_base = PHYS_BASE;
>> >  +    kh->dump_level = DUMP_LEVEL;
>> >  +
>> >  +    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size + size;
>> >  +    kh->note_size = s->note_size;
>> >  +
>> >  +    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh, size)<  0) {
>> >  +        ret = -1;
>> >  +        goto out;
>> >  +    }
> - Same question about endianness as above.
>
> - Again, many fields left zeroed in "kh", but I guess that's OK.
>
> - I would prefer if you repeated the multiplication by
> DISKDUMP_HEADER_BLOCKS verbatim in the "offset" write_buffer() argument.

write_buffer(s->fd, s->flag_flatten, DISKDUMP_HEADER_BLOCKS * dh->block_size,
kh, size) ?

Yes, I should change it.

>
> - When this write_buffer() is directed to a regular file in non-flat
> mode, then the file might become sparse (you jump over a range of
> offsets with lseek() in write_buffer()). If the output has been opened
> by qemu itself (ie."file:....", in qmp_dump_guest_memory()), then due
> to the O_TRUNC we can't seek over preexistent data (and keep garbage in
> the file). When libvirt pre-opens the file (to send over the fd later),
> in doCoreDump(), it also passes O_TRUNC. OK.
>

Do you mean because of O_TRUNC,seek will exceed the end of the file that may 
cause some problem?
Laszlo Ersek - Jan. 13, 2014, 10:39 a.m.
On 01/13/14 11:03, Qiao Nuohan wrote:
> Sorry for responsing late.
> 
> On 01/07/2014 07:38 PM, Laszlo Ersek wrote:

>>> >  +    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size +
>>> size;
>>> >  +    kh->note_size = s->note_size;
>>> >  +
>>> >  +    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh,
>>> size)<  0) {
>>> >  +        ret = -1;
>>> >  +        goto out;
>>> >  +    }

>> - I would prefer if you repeated the multiplication by
>> DISKDUMP_HEADER_BLOCKS verbatim in the "offset" write_buffer() argument.
> 
> write_buffer(s->fd, s->flag_flatten, DISKDUMP_HEADER_BLOCKS *
> dh->block_size,
> kh, size) ?
> 
> Yes, I should change it.

Yes that's what I meant.

> 
>>
>> - When this write_buffer() is directed to a regular file in non-flat
>> mode, then the file might become sparse (you jump over a range of
>> offsets with lseek() in write_buffer()). If the output has been opened
>> by qemu itself (ie."file:....", in qmp_dump_guest_memory()), then due
>> to the O_TRUNC we can't seek over preexistent data (and keep garbage in
>> the file). When libvirt pre-opens the file (to send over the fd later),
>> in doCoreDump(), it also passes O_TRUNC. OK.
>>
> 
> Do you mean because of O_TRUNC,seek will exceed the end of the file
> that may cause some problem?

I meant that lseek() would seek over an unwritten portion of the file.
If that portion had any kind of data written into it earlier, then that
data would now likely turn into garbage (lose meaning, become truncated
etc.) It wouldn't be corrupted or anything like that, it would just
become a leftover with potential to cause misinterpretation.

But, since we have O_TRUNC at open() time, we're seeking past the end of
the file, and this sought-over portion will read back as zeroes (and the
file might become "sparse", dependent on the filesystem and the size of
the range sought-over).

Seeking past the end of the file is explicitly allowed by POSIX:

    The lseek() function shall allow the file offset to be set beyond
    the end of the existing data in the file. If data is later written
    at this point, subsequent reads of data in the gap shall return
    bytes with the value 0 until data is actually written into the gap.

http://pubs.opengroup.org/onlinepubs/9699919799/functions/lseek.html

So this is fine.

Thanks
Laszlo
Qiao Nuohan - Jan. 14, 2014, 2:07 a.m.
On 01/13/2014 06:39 PM, Laszlo Ersek wrote:
>>> >>
>>> >>  - When this write_buffer() is directed to a regular file in non-flat
>>> >>  mode, then the file might become sparse (you jump over a range of
>>> >>  offsets with lseek() in write_buffer()). If the output has been opened
>>> >>  by qemu itself (ie."file:....", in qmp_dump_guest_memory()), then due
>>> >>  to the O_TRUNC we can't seek over preexistent data (and keep garbage in
>>> >>  the file). When libvirt pre-opens the file (to send over the fd later),
>>> >>  in doCoreDump(), it also passes O_TRUNC. OK.
>>> >>
>> >
>> >  Do you mean because of O_TRUNC,seek will exceed the end of the file
>> >  that may cause some problem?
> I meant that lseek() would seek over an unwritten portion of the file.
> If that portion had any kind of data written into it earlier, then that
> data would now likely turn into garbage (lose meaning, become truncated
> etc.) It wouldn't be corrupted or anything like that, it would just
> become a leftover with potential to cause misinterpretation.
>
> But, since we have O_TRUNC at open() time, we're seeking past the end of
> the file, and this sought-over portion will read back as zeroes (and the
> file might become "sparse", dependent on the filesystem and the size of
> the range sought-over).
>
> Seeking past the end of the file is explicitly allowed by POSIX:
>
>      The lseek() function shall allow the file offset to be set beyond
>      the end of the existing data in the file. If data is later written
>      at this point, subsequent reads of data in the gap shall return
>      bytes with the value 0 until data is actually written into the gap.
>
> http://pubs.opengroup.org/onlinepubs/9699919799/functions/lseek.html
>
> So this is fine.

Thanks for your explanation. I think it would be better to abandon the non-flat
mode to avoid potential risk.
Laszlo Ersek - Jan. 14, 2014, 2:29 a.m.
On 01/14/14 03:07, Qiao Nuohan wrote:
> On 01/13/2014 06:39 PM, Laszlo Ersek wrote:
>>>> >>
>>>> >>  - When this write_buffer() is directed to a regular file in
>>>> non-flat
>>>> >>  mode, then the file might become sparse (you jump over a range of
>>>> >>  offsets with lseek() in write_buffer()). If the output has been
>>>> opened
>>>> >>  by qemu itself (ie."file:....", in qmp_dump_guest_memory()),
>>>> then due
>>>> >>  to the O_TRUNC we can't seek over preexistent data (and keep
>>>> garbage in
>>>> >>  the file). When libvirt pre-opens the file (to send over the fd
>>>> later),
>>>> >>  in doCoreDump(), it also passes O_TRUNC. OK.
>>>> >>
>>> >
>>> >  Do you mean because of O_TRUNC,seek will exceed the end of the file
>>> >  that may cause some problem?
>> I meant that lseek() would seek over an unwritten portion of the file.
>> If that portion had any kind of data written into it earlier, then that
>> data would now likely turn into garbage (lose meaning, become truncated
>> etc.) It wouldn't be corrupted or anything like that, it would just
>> become a leftover with potential to cause misinterpretation.
>>
>> But, since we have O_TRUNC at open() time, we're seeking past the end of
>> the file, and this sought-over portion will read back as zeroes (and the
>> file might become "sparse", dependent on the filesystem and the size of
>> the range sought-over).
>>
>> Seeking past the end of the file is explicitly allowed by POSIX:
>>
>>      The lseek() function shall allow the file offset to be set beyond
>>      the end of the existing data in the file. If data is later written
>>      at this point, subsequent reads of data in the gap shall return
>>      bytes with the value 0 until data is actually written into the gap.
>>
>> http://pubs.opengroup.org/onlinepubs/9699919799/functions/lseek.html
>>
>> So this is fine.
> 
> Thanks for your explanation. I think it would be better to abandon the
> non-flat
> mode to avoid potential risk.

I can't really provide any input to that decision -- I have no clue
which tools support which format. The non-flat (ie. random-access,
regular file) format appears more space- and computation-efficient, and
I thought that would be the "natural" choice. The flat (non-seekable)
format was a surprise to me -- I wouldn't have thought that any debugger
could directly consume that format.

So it's really your call. Again, the lseek()s seemed fine to me on POSIX
platforms.

Thanks,
Laszlo
Qiao Nuohan - Jan. 14, 2014, 2:42 a.m.
On 01/14/2014 10:29 AM, Laszlo Ersek wrote:
> I can't really provide any input to that decision -- I have no clue
> which tools support which format. The non-flat (ie. random-access,
> regular file) format appears more space- and computation-efficient, and
> I thought that would be the "natural" choice. The flat (non-seekable)
> format was a surprise to me -- I wouldn't have thought that any debugger
> could directly consume that format.

The flat-mode comes from makedumpfile as kdump-compressed format, and crash
utility has already supported it.

Patch

diff --git a/dump.c b/dump.c
index 3b9cf00..e3623b9 100644
--- a/dump.c
+++ b/dump.c
@@ -77,8 +77,16 @@  typedef struct DumpState {
     int64_t length;
     Error **errp;
 
+    bool flag_flatten;
+    uint32_t nr_cpus;
+    size_t page_size;
+    uint64_t max_mapnr;
+    size_t len_dump_bitmap;
     void *note_buf;
     size_t note_buf_offset;
+    off_t offset_dump_bitmap;
+    off_t offset_page;
+    uint32_t flag_compress;
 } DumpState;
 
 static int dump_cleanup(DumpState *s)
@@ -773,6 +781,197 @@  static int buf_write_note(void *buf, size_t size, void *opaque)
     return 0;
 }
 
+/* write common header, sub header and elf note to vmcore */
+static int create_header32(DumpState *s)
+{
+    int ret = 0;
+    DiskDumpHeader32 *dh = NULL;
+    KdumpSubHeader32 *kh = NULL;
+    size_t size;
+
+    /* write common header, the version of kdump-compressed format is 5th */
+    size = sizeof(DiskDumpHeader32);
+    dh = g_malloc0(size);
+
+    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
+    dh->header_version = 6;
+    dh->block_size = s->page_size;
+    dh->sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
+    dh->sub_hdr_size = DIV_ROUND_UP(dh->sub_hdr_size, dh->block_size);
+    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
+    dh->max_mapnr = MIN(s->max_mapnr, UINT_MAX);
+    dh->nr_cpus = s->nr_cpus;
+    dh->bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, dh->block_size) * 2;
+    memcpy(&(dh->utsname.machine), "i686", 4);
+
+    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
+        dh->status |= DUMP_DH_COMPRESSED_ZLIB;
+    }
+#ifdef CONFIG_LZO
+    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
+        dh->status |= DUMP_DH_COMPRESSED_LZO;
+    }
+#endif
+#ifdef CONFIG_SNAPPY
+    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
+        dh->status |= DUMP_DH_COMPRESSED_SNAPPY;
+    }
+#endif
+
+    if (write_buffer(s->fd, s->flag_flatten, 0, dh, size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* write sub header */
+    size = sizeof(KdumpSubHeader32);
+    kh = g_malloc0(size);
+
+    /* 64bit max_mapnr_64 */
+    kh->max_mapnr_64 = s->max_mapnr;
+    kh->phys_base = PHYS_BASE;
+    kh->dump_level = DUMP_LEVEL;
+
+    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size + size;
+    kh->note_size = s->note_size;
+
+    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh, size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* write note */
+    s->note_buf = g_malloc(s->note_size);
+    s->note_buf_offset = 0;
+
+    /* use s->note_buf to store notes temporarily */
+    if (write_elf32_notes(buf_write_note, s) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    if (write_buffer(s->fd, s->flag_flatten, kh->offset_note, s->note_buf,
+                     s->note_size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* get offset of dump_bitmap */
+    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) *
+                             dh->block_size;
+
+    /* get offset of page */
+    s->offset_page = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size +
+                      dh->bitmap_blocks) * dh->block_size;
+
+out:
+    g_free(dh);
+    g_free(kh);
+    g_free(s->note_buf);
+
+    return ret;
+}
+
+/* write common header, sub header and elf note to vmcore */
+static int create_header64(DumpState *s)
+{
+    int ret = 0;
+    DiskDumpHeader64 *dh = NULL;
+    KdumpSubHeader64 *kh = NULL;
+    size_t size;
+
+    /* write common header, the version of kdump-compressed format is 5th */
+    size = sizeof(DiskDumpHeader64);
+    dh = g_malloc0(size);
+
+    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
+    dh->header_version = 6;
+    dh->block_size = s->page_size;
+    dh->sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
+    dh->sub_hdr_size = DIV_ROUND_UP(dh->sub_hdr_size, dh->block_size);
+    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
+    dh->max_mapnr = MIN(s->max_mapnr, UINT_MAX);
+    dh->nr_cpus = s->nr_cpus;
+    dh->bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, dh->block_size) * 2;
+    memcpy(&(dh->utsname.machine), "x86_64", 6);
+
+    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
+        dh->status |= DUMP_DH_COMPRESSED_ZLIB;
+    }
+#ifdef CONFIG_LZO
+    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
+        dh->status |= DUMP_DH_COMPRESSED_LZO;
+    }
+#endif
+#ifdef CONFIG_SNAPPY
+    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
+        dh->status |= DUMP_DH_COMPRESSED_SNAPPY;
+    }
+#endif
+
+    if (write_buffer(s->fd, s->flag_flatten, 0, dh, size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* write sub header */
+    size = sizeof(KdumpSubHeader64);
+    kh = g_malloc0(size);
+
+    /* 64bit max_mapnr_64 */
+    kh->max_mapnr_64 = s->max_mapnr;
+    kh->phys_base = PHYS_BASE;
+    kh->dump_level = DUMP_LEVEL;
+
+    kh->offset_note = DISKDUMP_HEADER_BLOCKS * dh->block_size + size;
+    kh->note_size = s->note_size;
+
+    if (write_buffer(s->fd, s->flag_flatten, dh->block_size, kh, size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* write note */
+    s->note_buf = g_malloc0(s->note_size);
+    s->note_buf_offset = 0;
+
+    /* use s->note_buf to store notes temporarily */
+    if (write_elf64_notes(buf_write_note, s) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    if (write_buffer(s->fd, s->flag_flatten, kh->offset_note, s->note_buf,
+                     s->note_size) < 0) {
+        ret = -1;
+        goto out;
+    }
+
+    /* get offset of dump_bitmap */
+    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) *
+                             dh->block_size;
+
+    /* get offset of page */
+    s->offset_page = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size +
+                      dh->bitmap_blocks) * dh->block_size;
+
+out:
+    g_free(dh);
+    g_free(kh);
+    g_free(s->note_buf);
+
+    return ret;
+}
+
+static int write_dump_header(DumpState *s)
+{
+    if (s->dump_info.d_machine == EM_386) {
+        return create_header32(s);
+    } else {
+        return create_header64(s);
+    }
+}
+
 static ram_addr_t get_start_block(DumpState *s)
 {
     GuestPhysBlock *block;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index b32b390..9e47b4c 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -20,6 +20,19 @@ 
 #define VERSION_FLAT_HEADER         (1)    /* version of flattened format */
 #define END_FLAG_FLAT_HEADER        (-1)
 
+/*
+ * flag for compressed format
+ */
+#define DUMP_DH_COMPRESSED_ZLIB     (0x1)
+#define DUMP_DH_COMPRESSED_LZO      (0x2)
+#define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
+
+#define KDUMP_SIGNATURE             "KDUMP   "
+#define SIG_LEN                     (sizeof(KDUMP_SIGNATURE) - 1)
+#define PHYS_BASE                   (0)
+#define DUMP_LEVEL                  (1)
+#define DISKDUMP_HEADER_BLOCKS      (1)
+
 typedef struct ArchDumpInfo {
     int d_machine;  /* Architecture */
     int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
@@ -37,6 +50,89 @@  typedef struct QEMU_PACKED MakedumpfileDataHeader {
     int64_t buf_size;
 } MakedumpfileDataHeader;
 
+typedef struct QEMU_PACKED NewUtsname {
+    char sysname[65];
+    char nodename[65];
+    char release[65];
+    char version[65];
+    char machine[65];
+    char domainname[65];
+} NewUtsname;
+
+typedef struct QEMU_PACKED DiskDumpHeader32 {
+    char signature[SIG_LEN];        /* = "KDUMP   " */
+    uint32_t header_version;        /* Dump header version */
+    NewUtsname utsname;             /* copy of system_utsname */
+    char timestamp[10];             /* Time stamp */
+    uint32_t status;                /* Above flags */
+    uint32_t block_size;            /* Size of a block in byte */
+    uint32_t sub_hdr_size;          /* Size of arch dependent header in block */
+    uint32_t bitmap_blocks;         /* Size of Memory bitmap in block */
+    uint32_t max_mapnr;             /* = max_mapnr ,
+                                       obsoleted in header_version 6 */
+    uint32_t total_ram_blocks;      /* Number of blocks should be written */
+    uint32_t device_blocks;         /* Number of total blocks in dump device */
+    uint32_t written_blocks;        /* Number of written blocks */
+    uint32_t current_cpu;           /* CPU# which handles dump */
+    uint32_t nr_cpus;               /* Number of CPUs */
+} DiskDumpHeader32;
+
+typedef struct QEMU_PACKED DiskDumpHeader64 {
+    char signature[SIG_LEN];        /* = "KDUMP   " */
+    uint32_t header_version;        /* Dump header version */
+    NewUtsname utsname;             /* copy of system_utsname */
+    char timestamp[22];             /* Time stamp */
+    uint32_t status;                /* Above flags */
+    uint32_t block_size;            /* Size of a block in byte */
+    uint32_t sub_hdr_size;          /* Size of arch dependent header in block */
+    uint32_t bitmap_blocks;         /* Size of Memory bitmap in block */
+    uint32_t max_mapnr;             /* = max_mapnr,
+                                       obsoleted in header_version 6 */
+    uint32_t total_ram_blocks;      /* Number of blocks should be written */
+    uint32_t device_blocks;         /* Number of total blocks in dump device */
+    uint32_t written_blocks;        /* Number of written blocks */
+    uint32_t current_cpu;           /* CPU# which handles dump */
+    uint32_t nr_cpus;               /* Number of CPUs */
+} DiskDumpHeader64;
+
+typedef struct QEMU_PACKED KdumpSubHeader32 {
+    uint32_t phys_base;
+    uint32_t dump_level;            /* header_version 1 and later */
+    uint32_t split;                 /* header_version 2 and later */
+    uint32_t start_pfn;             /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint32_t end_pfn;               /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t offset_vmcoreinfo;     /* header_version 3 and later */
+    uint32_t size_vmcoreinfo;       /* header_version 3 and later */
+    uint64_t offset_note;           /* header_version 4 and later */
+    uint32_t note_size;             /* header_version 4 and later */
+    uint64_t offset_eraseinfo;      /* header_version 5 and later */
+    uint32_t size_eraseinfo;        /* header_version 5 and later */
+    uint64_t start_pfn_64;          /* header_version 6 and later */
+    uint64_t end_pfn_64;            /* header_version 6 and later */
+    uint64_t max_mapnr_64;          /* header_version 6 and later */
+} KdumpSubHeader32;
+
+typedef struct QEMU_PACKED KdumpSubHeader64 {
+    uint64_t phys_base;
+    uint32_t dump_level;            /* header_version 1 and later */
+    uint32_t split;                 /* header_version 2 and later */
+    uint64_t start_pfn;             /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t end_pfn;               /* header_version 2 and later,
+                                       obsoleted in header_version 6 */
+    uint64_t offset_vmcoreinfo;     /* header_version 3 and later */
+    uint64_t size_vmcoreinfo;       /* header_version 3 and later */
+    uint64_t offset_note;           /* header_version 4 and later */
+    uint64_t note_size;             /* header_version 4 and later */
+    uint64_t offset_eraseinfo;      /* header_version 5 and later */
+    uint64_t size_eraseinfo;        /* header_version 5 and later */
+    uint64_t start_pfn_64;          /* header_version 6 and later */
+    uint64_t end_pfn_64;            /* header_version 6 and later */
+    uint64_t max_mapnr_64;          /* header_version 6 and later */
+} KdumpSubHeader64;
+
 struct GuestPhysBlockList; /* memory_mapping.h */
 int cpu_get_dump_info(ArchDumpInfo *info,
                       const struct GuestPhysBlockList *guest_phys_blocks);