diff mbox

[09/13,v7] dump: add API to write dump_bitmap

Message ID 1389944779-31899-10-git-send-email-qiaonuohan@cn.fujitsu.com
State New
Headers show

Commit Message

Qiao Nuohan Jan. 17, 2014, 7:46 a.m. UTC
functions are used to write 1st and 2nd dump_bitmap of kdump-compressed format,
which is used to indicate whether the corresponded page is existed in vmcore.
1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.

Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
---
 dump.c                |  172 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/sysemu/dump.h |    2 +
 2 files changed, 174 insertions(+), 0 deletions(-)

Comments

Laszlo Ersek Jan. 23, 2014, 1:56 p.m. UTC | #1
comments below

On 01/17/14 08:46, qiaonuohan wrote:
> functions are used to write 1st and 2nd dump_bitmap of kdump-compressed format,
> which is used to indicate whether the corresponded page is existed in vmcore.
> 1st and 2nd dump_bitmap are same, because dump level is specified to 1 here.
> 
> Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
> Reviewed-by: Laszlo Ersek <lersek@redhat.com>

Please don't keep my R-b when you implement intrusive changes in a new
version.

> ---
>  dump.c                |  172 +++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sysemu/dump.h |    2 +
>  2 files changed, 174 insertions(+), 0 deletions(-)
> 
> diff --git a/dump.c b/dump.c
> index a7fdc3f..26a1756 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -1001,6 +1001,178 @@ static int write_dump_header(DumpState *s)
>      }
>  }
>  
> +/*
> + * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
> + * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
> + * set_dump_bitmap will always leave the recently set bit un-sync. And setting
> + * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
> + * vmcore, ie. synchronizing un-sync bit into vmcore.
> + */
> +static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
> +                           uint8_t *buf, DumpState *s)
> +{
> +    off_t old_offset, new_offset;
> +    off_t offset_bitmap1, offset_bitmap2;
> +    uint32_t byte, bit;
> +
> +    /* should not set the previous place */
> +    assert(last_pfn <= pfn);
> +
> +    /*
> +     * if the bit needed to be set is not cached in buf, flush the data in buf
> +     * to vmcore firstly.
> +     * making new_offset be bigger than old_offset can also sync remained data
> +     * into vmcore.
> +     */
> +    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
> +    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
> +
> +    while (old_offset < new_offset) {
> +        /* calculate the offset and write dump_bitmap */
> +        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
> +        if (write_buffer(s->fd, offset_bitmap1, buf,
> +                         BUFSIZE_BITMAP) < 0) {
> +            return -1;
> +        }
> +
> +        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
> +        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
> +                         old_offset;
> +        if (write_buffer(s->fd, offset_bitmap2, buf,
> +                         BUFSIZE_BITMAP) < 0) {
> +            return -1;
> +        }
> +
> +        memset(buf, 0, BUFSIZE_BITMAP);
> +        old_offset += BUFSIZE_BITMAP;
> +    }
> +
> +    /* get the exact place of the bit in the buf, and set it */
> +    byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
> +    bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
> +    if (value) {
> +        buf[byte] |= 1u << bit;
> +    } else {
> +        buf[byte] &= ~(1u << bit);
> +    }
> +
> +    return 0;
> +}

Seems OK, thanks for addressing my earlier comments.

> +
> +/*
> + * exam every page and return the page from number and the address of the page.
> + * pfnptr and bufptr can be NULL. note: the blocks here is supposed to reflect
> + * guest-phys blocks, so block->target_start and block->target_end should be
> + * interal multiples of the target page size.
> + */
> +static int get_next_page(uint64_t *pfnptr, uint8_t **bufptr, DumpState *s)
> +{
> +    static GuestPhysBlock *block;
> +    static uint64_t pfn;

Using static variables is incorrect. Suppose that dump attempt #1 fails
because the target disk becomes full mid-dump. Then dump attempt #2 will
be incorrect because the "block" and "pfn" variables here are not
reinitialized.

I think you should
- move the "block" variable to the caller, ie. write_dump_bitmap(), and
take only its address here,
- eliminate the "pfn" variable, and make "pfnptr" mandatory (ie. require
that it be non-NULL).

This way the loop state will be maintained by the caller.

Also, "bool" would be a nicer return type.

> +    hwaddr addr;
> +    uint8_t *buf;
> +
> +    /* block == NULL means the start of the iteration */
> +    if (!block) {
> +        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);

I guess we can rely on the guest having at least one page. OK. (I think
this is even enforced somewhere near the startup code; memory size is
clamped to some minimum.)

> +        assert(block->target_start % s->page_size == 0);
> +        assert(block->target_end % s->page_size == 0);
> +        pfn = paddr_to_pfn(block->target_start, s->page_shift);
> +        if (pfnptr) {
> +            *pfnptr = pfn;
> +        }
> +        if (bufptr) {
> +            *bufptr = block->host_addr;
> +        }
> +        return 1;
> +    }

The logic seems otherwise sane.

(1) I can see you rebased this loop from the MemoryMapping list to the
guest_phys_blocks list. Considering the v6 discussion ("for a compressed
dump both paging and filtering are rejected"), this is correct.

(2) However, in this case I wonder whether get_max_mapnr(), now moved to
v7 07/13, should also be rebased to guest_phys_blocks (that function
still uses the MemoryMapping list).

Admittedly, get_max_mapnr() is called also in the non-compressed dump
case, when paging/filtering are possibly enabled. However, the limit
determined in get_max_mapnr(), ie. "s->max_mapnr", is not even used then.

You might want to make the call to get_max_mapnr() conditinal on the
compressed dump case, and then rebase get_max_mapnr() to the
guest_phys_blocks list. After all, "s->max_mapnr" is even documented now
in the struct def as "the biggest guest's phys-mem's number".

(3) I should have noticed this earlier, sorry:

With regard to the loop in get_max_mapnr(), you already rely on that
list being increasing. You determine the maximum by finding the last
element. This is correct.

However we're talking a QTAILQ here, which is double-ended. You don't
need to iterate, just call QTAILQ_LAST() to grab the last element.

> +
> +    pfn++;
> +    addr = pfn_to_paddr(pfn, s->page_shift);
> +
> +    if ((addr >= block->target_start) &&

I can't see how this sub-condition could ever be false. But it shouldn't
hurt.

> +        (addr + s->page_size <= block->target_end)) {
> +        buf = block->host_addr + (addr - block->target_start);

OK

> +    } else {
> +        /* the next page is in the next block */
> +        block = QTAILQ_NEXT(block, next);
> +        /*
> +         * iteration comes to end and block is set to NULL, next time when
> +         * get_next_page is called, the iteration will start from the first
> +         * block
> +         */
> +        if (!block) {
> +            return 0;
> +        }

Yes, but this misses the possibility of aborting the dump mid-way.

> +        assert(block->target_start % s->page_size == 0);
> +        assert(block->target_end % s->page_size == 0);
> +        pfn = paddr_to_pfn(block->target_start, s->page_shift);
> +        buf = block->host_addr;
> +    }
> +
> +    if (pfnptr) {
> +        *pfnptr = pfn;
> +    }
> +    if (bufptr) {
> +        *bufptr = buf;
> +    }
> +
> +    return 1;
> +}

In general the logic seems fine.

> +
> +static int write_dump_bitmap(DumpState *s)
> +{
> +    int ret = 0;
> +    uint64_t last_pfn, pfn;
> +    void *dump_bitmap_buf;
> +    size_t num_dumpable;
> +
> +    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
> +    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
> +
> +    num_dumpable = 0;
> +    last_pfn = 0;
> +
> +    /*
> +     * exam memory page by page, and set the bit in dump_bitmap corresponded
> +     * to the existing page.
> +     */
> +    while (get_next_page(&pfn, NULL, s)) {
> +        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to set dump_bitmap.\n");
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        last_pfn = pfn;
> +        num_dumpable++;
> +    }
> +
> +    /*
> +     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
> +     * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
> +     * synchronized into vmcore.
> +     */
> +    if (num_dumpable > 0) {
> +        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
> +                              dump_bitmap_buf, s);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to sync dump_bitmap.\n");
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /* number of dumpable pages that will be dumped later */
> +    s->num_dumpable = num_dumpable;
> +
> +out:
> +    g_free(dump_bitmap_buf);
> +
> +    return ret;
> +}

Seems OK, thanks.

> +
>  static ram_addr_t get_start_block(DumpState *s)
>  {
>      GuestPhysBlock *block;
> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
> index dfee238..6d4d0bc 100644
> --- a/include/sysemu/dump.h
> +++ b/include/sysemu/dump.h
> @@ -39,6 +39,8 @@
>  #define PHYS_BASE                   (0)
>  #define DUMP_LEVEL                  (1)
>  #define DISKDUMP_HEADER_BLOCKS      (1)
> +#define BUFSIZE_BITMAP              (TARGET_PAGE_SIZE)
> +#define PFN_BUFBITMAP               (CHAR_BIT * BUFSIZE_BITMAP)
>  
>  typedef struct ArchDumpInfo {
>      int d_machine;  /* Architecture */
> 

OK.

To summarize:
- the static variables in get_next_page() are a blocker, they should be
fixed;
- addressing the rest would improve code quality in my opinion, but I
don't insist.

Thank you,
Laszlo
diff mbox

Patch

diff --git a/dump.c b/dump.c
index a7fdc3f..26a1756 100644
--- a/dump.c
+++ b/dump.c
@@ -1001,6 +1001,178 @@  static int write_dump_header(DumpState *s)
     }
 }
 
+/*
+ * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
+ * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
+ * set_dump_bitmap will always leave the recently set bit un-sync. And setting
+ * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
+ * vmcore, ie. synchronizing un-sync bit into vmcore.
+ */
+static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
+                           uint8_t *buf, DumpState *s)
+{
+    off_t old_offset, new_offset;
+    off_t offset_bitmap1, offset_bitmap2;
+    uint32_t byte, bit;
+
+    /* should not set the previous place */
+    assert(last_pfn <= pfn);
+
+    /*
+     * if the bit needed to be set is not cached in buf, flush the data in buf
+     * to vmcore firstly.
+     * making new_offset be bigger than old_offset can also sync remained data
+     * into vmcore.
+     */
+    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
+    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
+
+    while (old_offset < new_offset) {
+        /* calculate the offset and write dump_bitmap */
+        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
+        if (write_buffer(s->fd, offset_bitmap1, buf,
+                         BUFSIZE_BITMAP) < 0) {
+            return -1;
+        }
+
+        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
+        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
+                         old_offset;
+        if (write_buffer(s->fd, offset_bitmap2, buf,
+                         BUFSIZE_BITMAP) < 0) {
+            return -1;
+        }
+
+        memset(buf, 0, BUFSIZE_BITMAP);
+        old_offset += BUFSIZE_BITMAP;
+    }
+
+    /* get the exact place of the bit in the buf, and set it */
+    byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
+    bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
+    if (value) {
+        buf[byte] |= 1u << bit;
+    } else {
+        buf[byte] &= ~(1u << bit);
+    }
+
+    return 0;
+}
+
+/*
+ * exam every page and return the page from number and the address of the page.
+ * pfnptr and bufptr can be NULL. note: the blocks here is supposed to reflect
+ * guest-phys blocks, so block->target_start and block->target_end should be
+ * interal multiples of the target page size.
+ */
+static int get_next_page(uint64_t *pfnptr, uint8_t **bufptr, DumpState *s)
+{
+    static GuestPhysBlock *block;
+    static uint64_t pfn;
+    hwaddr addr;
+    uint8_t *buf;
+
+    /* block == NULL means the start of the iteration */
+    if (!block) {
+        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
+        assert(block->target_start % s->page_size == 0);
+        assert(block->target_end % s->page_size == 0);
+        pfn = paddr_to_pfn(block->target_start, s->page_shift);
+        if (pfnptr) {
+            *pfnptr = pfn;
+        }
+        if (bufptr) {
+            *bufptr = block->host_addr;
+        }
+        return 1;
+    }
+
+    pfn++;
+    addr = pfn_to_paddr(pfn, s->page_shift);
+
+    if ((addr >= block->target_start) &&
+        (addr + s->page_size <= block->target_end)) {
+        buf = block->host_addr + (addr - block->target_start);
+    } else {
+        /* the next page is in the next block */
+        block = QTAILQ_NEXT(block, next);
+        /*
+         * iteration comes to end and block is set to NULL, next time when
+         * get_next_page is called, the iteration will start from the first
+         * block
+         */
+        if (!block) {
+            return 0;
+        }
+        assert(block->target_start % s->page_size == 0);
+        assert(block->target_end % s->page_size == 0);
+        pfn = paddr_to_pfn(block->target_start, s->page_shift);
+        buf = block->host_addr;
+    }
+
+    if (pfnptr) {
+        *pfnptr = pfn;
+    }
+    if (bufptr) {
+        *bufptr = buf;
+    }
+
+    return 1;
+}
+
+static int write_dump_bitmap(DumpState *s)
+{
+    int ret = 0;
+    uint64_t last_pfn, pfn;
+    void *dump_bitmap_buf;
+    size_t num_dumpable;
+
+    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
+    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
+
+    num_dumpable = 0;
+    last_pfn = 0;
+
+    /*
+     * exam memory page by page, and set the bit in dump_bitmap corresponded
+     * to the existing page.
+     */
+    while (get_next_page(&pfn, NULL, s)) {
+        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to set dump_bitmap.\n");
+            ret = -1;
+            goto out;
+        }
+
+        last_pfn = pfn;
+        num_dumpable++;
+    }
+
+    /*
+     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
+     * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
+     * synchronized into vmcore.
+     */
+    if (num_dumpable > 0) {
+        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
+                              dump_bitmap_buf, s);
+        if (ret < 0) {
+            dump_error(s, "dump: failed to sync dump_bitmap.\n");
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /* number of dumpable pages that will be dumped later */
+    s->num_dumpable = num_dumpable;
+
+out:
+    g_free(dump_bitmap_buf);
+
+    return ret;
+}
+
 static ram_addr_t get_start_block(DumpState *s)
 {
     GuestPhysBlock *block;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index dfee238..6d4d0bc 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -39,6 +39,8 @@ 
 #define PHYS_BASE                   (0)
 #define DUMP_LEVEL                  (1)
 #define DISKDUMP_HEADER_BLOCKS      (1)
+#define BUFSIZE_BITMAP              (TARGET_PAGE_SIZE)
+#define PFN_BUFBITMAP               (CHAR_BIT * BUFSIZE_BITMAP)
 
 typedef struct ArchDumpInfo {
     int d_machine;  /* Architecture */