Patchwork [v6,07/11] dump: Add API to write dump_bitmap

login
register
mail settings
Submitter Qiao Nuohan
Date Jan. 5, 2014, 7:27 a.m.
Message ID <1388906864-1083-8-git-send-email-qiaonuohan@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/306930/
State New
Headers show

Comments

Qiao Nuohan - Jan. 5, 2014, 7:27 a.m.
functions are used to write 1st and 2nd dump_bitmap of kdump-compressed format,
which is used to indicate whether the corresponded page is existed in vmcore.
1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
---
 dump.c                |  116 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/sysemu/dump.h |    7 +++
 2 files changed, 123 insertions(+), 0 deletions(-)
Laszlo Ersek - Jan. 7, 2014, 2:49 p.m.
comments below

On 01/05/14 08:27, Qiao Nuohan wrote:
> functions are used to write 1st and 2nd dump_bitmap of kdump-compressed format,
> which is used to indicate whether the corresponded page is existed in vmcore.
> 1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.
> 
> Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
> ---
>  dump.c                |  116 +++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sysemu/dump.h |    7 +++
>  2 files changed, 123 insertions(+), 0 deletions(-)
> 
> diff --git a/dump.c b/dump.c
> index e3623b9..1fae152 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -80,12 +80,14 @@ typedef struct DumpState {
>      bool flag_flatten;
>      uint32_t nr_cpus;
>      size_t page_size;
> +    uint32_t page_shift;
>      uint64_t max_mapnr;
>      size_t len_dump_bitmap;
>      void *note_buf;
>      size_t note_buf_offset;
>      off_t offset_dump_bitmap;
>      off_t offset_page;
> +    size_t num_dumpable;
>      uint32_t flag_compress;
>  } DumpState;
>  
> @@ -972,6 +974,120 @@ static int write_dump_header(DumpState *s)
>      }
>  }
>  
> +/* set dump_bitmap sequencely. bitmap is not allowed to be rewritten. */
> +static int set_dump_bitmap(int64_t last_pfn, int64_t pfn, uint32_t value,
> +                           void *buf, DumpState *s)
> +{

I'd recommend
- "uint8_t *buf", and
- "bool value", and
- maybe an assert() that neither of pfn and last_pfn is negative.

> +    off_t old_offset, new_offset;
> +    off_t offset_bitmap1, offset_bitmap2;
> +    uint32_t byte, bit;
> +
> +    /* should not set the previous place */
> +    if (last_pfn > pfn) {
> +        return -1;
> +    }
> +
> +    /*
> +     * if the bit needed to be set is not cached in buf, flush the data in buf
> +     * to vmcore firstly.
> +     * making new_offset be bigger than old_offset can also sync remained data
> +     * into vmcore.
> +     */
> +    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
> +    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
> +
> +    while (old_offset < new_offset) {
> +        /* calculate the offset and write dump_bitmap */
> +        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
> +        if (write_buffer(s->fd, s->flag_flatten, offset_bitmap1, buf,
> +                         BUFSIZE_BITMAP) < 0) {
> +            return -1;
> +        }
> +
> +        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
> +        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
> +                         old_offset;
> +        if (write_buffer(s->fd, s->flag_flatten, offset_bitmap2, buf,
> +                         BUFSIZE_BITMAP) < 0) {
> +            return -1;
> +        }
> +
> +        memset(buf, 0, BUFSIZE_BITMAP);
> +        old_offset += BUFSIZE_BITMAP;
> +    }

Seems sane to me.

> +
> +    /* get the exact place of the bit in the buf, and set it */
> +    byte = (pfn % PFN_BUFBITMAP) >> 3;

(dividing by CHAR_BIT would be more consistent with the connection
between PFN_BUFBITMAP and BUFSIZE_BITMAP)

> +    bit = (pfn % PFN_BUFBITMAP) & 7;
> +    if (value) {
> +        ((char *)buf)[byte] |= 1<<bit;
> +    } else {
> +        ((char *)buf)[byte] &= ~(1<<bit);
> +    }

Shudder. I would much prefer (with "uint8_t *buf" included):

    if (value) {
        buf[byte] |=   1u << bit;
    } else {
        buf[byte] &= ~(1u << bit);
    }

When you interpret the expressions in question against the C standard,
my proposal is simpler to understand, and relies on fewer
platform-specifics. The current code looks simple, but it's actually
quite complex.

In any case, I think it will work in practice.


> +
> +    return 0;
> +}
> +
> +static int write_dump_bitmap(DumpState *s)
> +{
> +    int ret = 0;
> +    int64_t pfn_start, pfn_end, pfn;
> +    int64_t last_pfn;
> +    void *dump_bitmap_buf;
> +    size_t num_dumpable;
> +    MemoryMapping *memory_mapping;
> +
> +    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
> +    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
> +
> +    num_dumpable = 0;
> +    last_pfn = -1;

This would trip up the assert() that I proposed above. It also exploits
that (last_pfn == -1) will result in (old_offset = 0) in
set_dump_bitmap(), due to the division. I think it's fairly ugly, but
should work in practice.

> +
> +    /*
> +     * exam memory page by page, and set the bit in dump_bitmap corresponded
> +     * to the existing page
> +     */
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        pfn_start = paddr_to_pfn(memory_mapping->phys_addr, s->page_shift);
> +        pfn_end = paddr_to_pfn(memory_mapping->phys_addr +
> +                               memory_mapping->length, s->page_shift);

OK, the intent seems to be to make "pfn_end" exclusive (see the loop
below). That however depends on "memory_mapping->length" being an
integral multiple of the target page size.

I propose an assert() here for that reason. Looking at patch v6 10/11,
for a compressed dump both paging and filtering are rejected. This
implies that in dump_init(),
- qemu_get_guest_simple_memory_mapping() is called -- ie. the memory
mapping list will directly reflect the guest-phys blocks,
- memory_mapping_filter() will *not* be called.

These should ensure that pfn_end is exclusive here (and that "phys_addr"
falls to the beginning of a page) , but an assert() with a comment would
help.

> +
> +        for (pfn = pfn_start; pfn < pfn_end; pfn++) {
> +            ret = set_dump_bitmap(last_pfn, pfn, 1, dump_bitmap_buf, s);

(1 --> "true" after replacing that param type with bool)

> +            if (ret < 0) {
> +                dump_error(s, "dump: failed to set dump_bitmap.\n");

Alas, dump_error() ignores the reason, but maybe we can fix that at a
different time.

> +                ret = -1;
> +                goto out;
> +            }
> +
> +            last_pfn = pfn;
> +            num_dumpable++;
> +        }
> +    }
> +
> +    /*
> +     * last_pfn > -1 means bitmap is set, then remained data in buf should be
> +     * synchronized into vmcore
> +     */

As far as I can see, (num_dumpable > 0) could work just as well, and it
would eliminate the super-ugly (last_pfn == -1) case.

> +    if (last_pfn > -1) {
> +        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, 0,
> +                              dump_bitmap_buf, s);

Results in

  new_offset == old_offset + BUFSIZE_BITMAP

in set_dump_bitmap(), that is, one iteration of the loop. It seems
correct to call this if we have set at least one bit, because
set_dump_bitmap() *always* leaves the most recently set bit un-synced.


> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to sync dump_bitmap.\n");
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /* number of dumpable pages that will be dumped later */
> +    s->num_dumpable = num_dumpable;
> +
> +out:
> +    g_free(dump_bitmap_buf);
> +
> +    return ret;
> +}
> +
>  static ram_addr_t get_start_block(DumpState *s)
>  {
>      GuestPhysBlock *block;
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index 9e47b4c..b5eaf8d 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -27,11 +27,18 @@
>  #define DUMP_DH_COMPRESSED_LZO      (0x2)
>  #define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
>  
> +#define PAGE_SIZE                   (4096)
>  #define KDUMP_SIGNATURE             "KDUMP   "
>  #define SIG_LEN                     (sizeof(KDUMP_SIGNATURE) - 1)
>  #define PHYS_BASE                   (0)
>  #define DUMP_LEVEL                  (1)
>  #define DISKDUMP_HEADER_BLOCKS      (1)
> +#define BUFSIZE_BITMAP              (PAGE_SIZE)
> +#define PFN_BUFBITMAP               (CHAR_BIT * BUFSIZE_BITMAP)
> +#define ARCH_PFN_OFFSET             (0)
> +
> +#define paddr_to_pfn(X, page_shift) \
> +    (((unsigned long long)(X) >> (page_shift)) - ARCH_PFN_OFFSET)
>  
>  typedef struct ArchDumpInfo {
>      int d_machine;  /* Architecture */
> 

I think these magic constants are somewhat tied to x86, and therefore
should be in an arch-specific file rather than a common file, but
whoever wants to extend this to another architecture can do that.

I think I haven't found anything that I'd call a bug.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Laszlo Ersek - Jan. 7, 2014, 9:41 p.m.
On 01/07/14 15:49, Laszlo Ersek wrote:
> 
> On 01/05/14 08:27, Qiao Nuohan wrote:

>> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
>> index 9e47b4c..b5eaf8d 100644
>> --- a/include/sysemu/dump.h
>> +++ b/include/sysemu/dump.h
>> @@ -27,11 +27,18 @@
>>  #define DUMP_DH_COMPRESSED_LZO      (0x2)
>>  #define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
>>  
>> +#define PAGE_SIZE                   (4096)
>>  #define KDUMP_SIGNATURE             "KDUMP   "
>>  #define SIG_LEN                     (sizeof(KDUMP_SIGNATURE) - 1)
>>  #define PHYS_BASE                   (0)
>>  #define DUMP_LEVEL                  (1)
>>  #define DISKDUMP_HEADER_BLOCKS      (1)
>> +#define BUFSIZE_BITMAP              (PAGE_SIZE)
>> +#define PFN_BUFBITMAP               (CHAR_BIT * BUFSIZE_BITMAP)
>> +#define ARCH_PFN_OFFSET             (0)
>> +
>> +#define paddr_to_pfn(X, page_shift) \
>> +    (((unsigned long long)(X) >> (page_shift)) - ARCH_PFN_OFFSET)
>>  
>>  typedef struct ArchDumpInfo {
>>      int d_machine;  /* Architecture */
>>
> 
> I think these magic constants are somewhat tied to x86, and therefore
> should be in an arch-specific file rather than a common file, but
> whoever wants to extend this to another architecture can do that.

Stressing the argument a bit more for PAGE_SIZE specifically:
- we already have TARGET_PAGE_SIZE, maybe that would be a better choice,
- PAGE_SIZE is defined *as* TARGET_PAGE_SIZE in kvm-all.c. There's no
actual conflict, but the mental conflict is bad enough.

Anyway my R-b stands.

Laszlo

Patch

diff --git a/dump.c b/dump.c
index e3623b9..1fae152 100644
--- a/dump.c
+++ b/dump.c
@@ -80,12 +80,14 @@  typedef struct DumpState {
     bool flag_flatten;
     uint32_t nr_cpus;
     size_t page_size;
+    uint32_t page_shift;
     uint64_t max_mapnr;
     size_t len_dump_bitmap;
     void *note_buf;
     size_t note_buf_offset;
     off_t offset_dump_bitmap;
     off_t offset_page;
+    size_t num_dumpable;
     uint32_t flag_compress;
 } DumpState;
 
@@ -972,6 +974,120 @@  static int write_dump_header(DumpState *s)
     }
 }
 
+/* set dump_bitmap sequencely. bitmap is not allowed to be rewritten. */
+static int set_dump_bitmap(int64_t last_pfn, int64_t pfn, uint32_t value,
+                           void *buf, DumpState *s)
+{
+    off_t old_offset, new_offset;
+    off_t offset_bitmap1, offset_bitmap2;
+    uint32_t byte, bit;
+
+    /* should not set the previous place */
+    if (last_pfn > pfn) {
+        return -1;
+    }
+
+    /*
+     * if the bit needed to be set is not cached in buf, flush the data in buf
+     * to vmcore firstly.
+     * making new_offset be bigger than old_offset can also sync remained data
+     * into vmcore.
+     */
+    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
+    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
+
+    while (old_offset < new_offset) {
+        /* calculate the offset and write dump_bitmap */
+        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
+        if (write_buffer(s->fd, s->flag_flatten, offset_bitmap1, buf,
+                         BUFSIZE_BITMAP) < 0) {
+            return -1;
+        }
+
+        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
+        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
+                         old_offset;
+        if (write_buffer(s->fd, s->flag_flatten, offset_bitmap2, buf,
+                         BUFSIZE_BITMAP) < 0) {
+            return -1;
+        }
+
+        memset(buf, 0, BUFSIZE_BITMAP);
+        old_offset += BUFSIZE_BITMAP;
+    }
+
+    /* get the exact place of the bit in the buf, and set it */
+    byte = (pfn % PFN_BUFBITMAP) >> 3;
+    bit = (pfn % PFN_BUFBITMAP) & 7;
+    if (value) {
+        ((char *)buf)[byte] |= 1<<bit;
+    } else {
+        ((char *)buf)[byte] &= ~(1<<bit);
+    }
+
+    return 0;
+}
+
+static int write_dump_bitmap(DumpState *s)
+{
+    int ret = 0;
+    int64_t pfn_start, pfn_end, pfn;
+    int64_t last_pfn;
+    void *dump_bitmap_buf;
+    size_t num_dumpable;
+    MemoryMapping *memory_mapping;
+
+    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
+    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
+
+    num_dumpable = 0;
+    last_pfn = -1;
+
+    /*
+     * exam memory page by page, and set the bit in dump_bitmap corresponded
+     * to the existing page
+     */
+    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
+        pfn_start = paddr_to_pfn(memory_mapping->phys_addr, s->page_shift);
+        pfn_end = paddr_to_pfn(memory_mapping->phys_addr +
+                               memory_mapping->length, s->page_shift);
+
+        for (pfn = pfn_start; pfn < pfn_end; pfn++) {
+            ret = set_dump_bitmap(last_pfn, pfn, 1, dump_bitmap_buf, s);
+            if (ret < 0) {
+                dump_error(s, "dump: failed to set dump_bitmap.\n");
+                ret = -1;
+                goto out;
+            }
+
+            last_pfn = pfn;
+            num_dumpable++;
+        }
+    }
+
+    /*
+     * last_pfn > -1 means bitmap is set, then remained data in buf should be
+     * synchronized into vmcore
+     */
+    if (last_pfn > -1) {
+        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, 0,
+                              dump_bitmap_buf, s);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to sync dump_bitmap.\n");
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /* number of dumpable pages that will be dumped later */
+    s->num_dumpable = num_dumpable;
+
+out:
+    g_free(dump_bitmap_buf);
+
+    return ret;
+}
+
 static ram_addr_t get_start_block(DumpState *s)
 {
     GuestPhysBlock *block;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 9e47b4c..b5eaf8d 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -27,11 +27,18 @@ 
 #define DUMP_DH_COMPRESSED_LZO      (0x2)
 #define DUMP_DH_COMPRESSED_SNAPPY   (0x4)
 
+#define PAGE_SIZE                   (4096)
 #define KDUMP_SIGNATURE             "KDUMP   "
 #define SIG_LEN                     (sizeof(KDUMP_SIGNATURE) - 1)
 #define PHYS_BASE                   (0)
 #define DUMP_LEVEL                  (1)
 #define DISKDUMP_HEADER_BLOCKS      (1)
+#define BUFSIZE_BITMAP              (PAGE_SIZE)
+#define PFN_BUFBITMAP               (CHAR_BIT * BUFSIZE_BITMAP)
+#define ARCH_PFN_OFFSET             (0)
+
+#define paddr_to_pfn(X, page_shift) \
+    (((unsigned long long)(X) >> (page_shift)) - ARCH_PFN_OFFSET)
 
 typedef struct ArchDumpInfo {
     int d_machine;  /* Architecture */