Patchwork [RFC,08/20] Introduce RAMSaveIO and use cpu_physical_memory_get_dirty_range() to check multiple dirty pages.

login
register
mail settings
Submitter Yoshiaki Tamura
Date April 21, 2010, 5:57 a.m.
Message ID <1271829445-5328-9-git-send-email-tamura.yoshiaki@lab.ntt.co.jp>
Download mbox | patch
Permalink /patch/50632/
State New
Headers show

Comments

Yoshiaki Tamura - April 21, 2010, 5:57 a.m.
Introduce RAMSaveIO to use writev for saving ram blocks, and modifies
ram_save_block() and ram_save_remaining() to use
cpu_physical_memory_get_dirty_range() to check multiple dirty and
non-dirty pages at once.

Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
Signed-off-by: OHMURA Kei <ohmura.kei@lab.ntt.co.jp>
---
 vl.c |  221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 197 insertions(+), 24 deletions(-)
Anthony Liguori - April 22, 2010, 7:31 p.m.
On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
> Introduce RAMSaveIO to use writev for saving ram blocks, and modifies
> ram_save_block() and ram_save_remaining() to use
> cpu_physical_memory_get_dirty_range() to check multiple dirty and
> non-dirty pages at once.
>
> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
> Signed-off-by: OHMURA Kei<ohmura.kei@lab.ntt.co.jp>
>    

Perf data?

Regards,

Anthony Liguori

> ---
>   vl.c |  221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
>   1 files changed, 197 insertions(+), 24 deletions(-)
>
> diff --git a/vl.c b/vl.c
> index 729c955..9c3dc4c 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2774,12 +2774,167 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
>       return 1;
>   }
>
> -static int ram_save_block(QEMUFile *f)
> +typedef struct RAMSaveIO RAMSaveIO;
> +
> +struct RAMSaveIO {
> +    QEMUFile *f;
> +    QEMUIOVector *qiov;
> +
> +    uint8_t *ram_store;
> +    size_t nalloc, nused;
> +    uint8_t io_mode;
> +
> +    void (*put_buffer)(RAMSaveIO *s, uint8_t *buf, size_t len);
> +    void (*put_byte)(RAMSaveIO *s, int v);
> +    void (*put_be64)(RAMSaveIO *s, uint64_t v);
> +
> +};
> +
> +static inline void ram_saveio_flush(RAMSaveIO *s, int prepare)
> +{
> +    qemu_put_vector(s->f, s->qiov);
> +    if (prepare)
> +        qemu_put_vector_prepare(s->f);
> +
> +    /* reset stored data */
> +    qemu_iovec_reset(s->qiov);
> +    s->nused = 0;
> +}
> +
> +static inline void ram_saveio_put_buffer(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    s->put_buffer(s, buf, len);
> +}
> +
> +static inline void ram_saveio_put_byte(RAMSaveIO *s, int v)
> +{
> +    s->put_byte(s, v);
> +}
> +
> +static inline void ram_saveio_put_be64(RAMSaveIO *s, uint64_t v)
> +{
> +    s->put_be64(s, v);
> +}
> +
> +static inline void ram_saveio_set_error(RAMSaveIO *s)
> +{
> +    qemu_file_set_error(s->f);
> +}
> +
> +static void ram_saveio_put_buffer_vector(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    qemu_iovec_add(s->qiov, buf, len);
> +}
> +
> +static void ram_saveio_put_buffer_direct(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    qemu_put_buffer(s->f, buf, len);
> +}
> +
> +static void ram_saveio_put_byte_vector(RAMSaveIO *s, int v)
> +{
> +    uint8_t *to_save;
> +
> +    if (s->nalloc - s->nused<  sizeof(int))
> +        ram_saveio_flush(s, 1);
> +
> +    to_save =&s->ram_store[s->nused];
> +    to_save[0] = v&  0xff;
> +    s->nused++;
> +
> +    qemu_iovec_add(s->qiov, to_save, 1);
> +}
> +
> +static void ram_saveio_put_byte_direct(RAMSaveIO *s, int v)
> +{
> +    qemu_put_byte(s->f, v);
> +}
> +
> +static void ram_saveio_put_be64_vector(RAMSaveIO *s, uint64_t v)
> +{
> +    uint8_t *to_save;
> +
> +    if (s->nalloc - s->nused<  sizeof(uint64_t))
> +        ram_saveio_flush(s, 1);
> +
> +    to_save =&s->ram_store[s->nused];
> +    to_save[0] = (v>>  56)&  0xff;
> +    to_save[1] = (v>>  48)&  0xff;
> +    to_save[2] = (v>>  40)&  0xff;
> +    to_save[3] = (v>>  32)&  0xff;
> +    to_save[4] = (v>>  24)&  0xff;
> +    to_save[5] = (v>>  16)&  0xff;
> +    to_save[6] = (v>>   8)&  0xff;
> +    to_save[7] = (v>>   0)&  0xff;
> +    s->nused += sizeof(uint64_t);
> +
> +    qemu_iovec_add(s->qiov, to_save, sizeof(uint64_t));
> +}
> +
> +static void ram_saveio_put_be64_direct(RAMSaveIO *s, uint64_t v)
> +{
> +
> +    qemu_put_be64(s->f, v);
> +}
> +
> +static RAMSaveIO *ram_saveio_new(QEMUFile *f, size_t max_store)
> +{
> +    RAMSaveIO *s;
> +
> +    s = qemu_mallocz(sizeof(*s));
> +
> +    if (qemu_file_get_rate_limit(f) == 0) {/* non buffer mode */
> +        /* When QEMUFile don't have get_rate limit,
> +         * qemu_file_get_rate_limit will return 0.
> +         * However, we believe that all kinds of QEMUFile
> +         * except non-block mode has rate limit function.
> +         */
> +        s->io_mode = 1;
> +        s->ram_store = qemu_mallocz(max_store);
> +        s->nalloc = max_store;
> +        s->nused = 0;
> +
> +        s->qiov = qemu_mallocz(sizeof(*s->qiov));
> +        qemu_iovec_init(s->qiov, max_store);
> +
> +        s->put_buffer = ram_saveio_put_buffer_vector;
> +        s->put_byte = ram_saveio_put_byte_vector;
> +        s->put_be64 = ram_saveio_put_be64_vector;
> +
> +        qemu_put_vector_prepare(f);
> +    } else {
> +        s->io_mode = 0;
> +        s->put_buffer = ram_saveio_put_buffer_direct;
> +        s->put_byte = ram_saveio_put_byte_direct;
> +        s->put_be64 = ram_saveio_put_be64_direct;
> +    }
> +
> +    s->f = f;
> +
> +    return s;
> +}
> +
> +static void ram_saveio_destroy(RAMSaveIO *s)
> +{
> +    if (s->qiov != NULL) { /* means using put_vector */
> +        ram_saveio_flush(s, 0);
> +        qemu_iovec_destroy(s->qiov);
> +        qemu_free(s->qiov);
> +        qemu_free(s->ram_store);
> +    }
> +    qemu_free(s);
> +}
> +
> +/*
> + * RAMSaveIO will manage I/O.
> + */
> +static int ram_save_block(RAMSaveIO *s)
>   {
>       static ram_addr_t current_addr = 0;
>       ram_addr_t saved_addr = current_addr;
>       ram_addr_t addr = 0;
> -    int found = 0;
> +    ram_addr_t dirty_rams[HOST_LONG_BITS];
> +    int i, found = 0;
>
>       while (addr<  last_ram_offset) {
>           if (kvm_enabled()&&  current_addr == 0) {
> @@ -2787,32 +2942,38 @@ static int ram_save_block(QEMUFile *f)
>               r = kvm_update_dirty_pages_log();
>               if (r) {
>                   fprintf(stderr, "%s: update dirty pages log failed %d\n", __FUNCTION__, r);
> -                qemu_file_set_error(f);
> +                ram_saveio_set_error(s);
>                   return 0;
>               }
>           }
> -        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
> +        if ((found = cpu_physical_memory_get_dirty_range(
> +                 current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS,
> +                 MIGRATION_DIRTY_FLAG))) {
>               uint8_t *p;
>
> -            cpu_physical_memory_reset_dirty(current_addr,
> -                                            current_addr + TARGET_PAGE_SIZE,
> -                                            MIGRATION_DIRTY_FLAG);
> +            for (i = 0; i<  found; i++) {
> +                ram_addr_t page_addr = dirty_rams[i];
> +                cpu_physical_memory_reset_dirty(page_addr,
> +                                                page_addr + TARGET_PAGE_SIZE,
> +                                                MIGRATION_DIRTY_FLAG);
>
> -            p = qemu_get_ram_ptr(current_addr);
> +                p = qemu_get_ram_ptr(page_addr);
>
> -            if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> -                qemu_put_byte(f, *p);
> -            } else {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
> -                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> +                if (is_dup_page(p, *p)) {
> +                    ram_saveio_put_be64(s,
> +                                        (page_addr) | RAM_SAVE_FLAG_COMPRESS);
> +                    ram_saveio_put_byte(s, *p);
> +                } else {
> +                    ram_saveio_put_be64(s, (page_addr) | RAM_SAVE_FLAG_PAGE);
> +                    ram_saveio_put_buffer(s, p, TARGET_PAGE_SIZE);
> +                }
>               }
>
> -            found = 1;
>               break;
> +        } else {
> +            addr += dirty_rams[0];
> +            current_addr = (saved_addr + addr) % last_ram_offset;
>           }
> -        addr += TARGET_PAGE_SIZE;
> -        current_addr = (saved_addr + addr) % last_ram_offset;
>       }
>
>       return found;
> @@ -2822,12 +2983,19 @@ static uint64_t bytes_transferred;
>
>   static ram_addr_t ram_save_remaining(void)
>   {
> -    ram_addr_t addr;
> +    ram_addr_t addr = 0;
>       ram_addr_t count = 0;
> +    ram_addr_t dirty_rams[HOST_LONG_BITS];
> +    int found = 0;
>
> -    for (addr = 0; addr<  last_ram_offset; addr += TARGET_PAGE_SIZE) {
> -        if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
> -            count++;
> +    while (addr<  last_ram_offset) {
> +        if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset,
> +            dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) {
> +            count += found;
> +            addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE;
> +        } else {
> +            addr += dirty_rams[0];
> +        }
>       }
>
>       return count;
> @@ -2854,6 +3022,7 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       uint64_t bytes_transferred_last;
>       double bwidth = 0;
>       uint64_t expected_time = 0;
> +    RAMSaveIO *s;
>
>       if (stage<  0) {
>           cpu_physical_memory_set_dirty_tracking(0);
> @@ -2883,10 +3052,12 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       bytes_transferred_last = bytes_transferred;
>       bwidth = qemu_get_clock_ns(rt_clock);
>
> -    while (!qemu_file_rate_limit(f)) {
> +    s = ram_saveio_new(f, IOV_MAX);
> +
> +     while (!qemu_file_rate_limit(f)) {
>           int ret;
>
> -        ret = ram_save_block(f);
> +        ret = ram_save_block(s);
>           bytes_transferred += ret * TARGET_PAGE_SIZE;
>           if (ret == 0) /* no more blocks */
>               break;
> @@ -2903,12 +3074,14 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       /* try transferring iterative blocks of memory */
>       if (stage == 3) {
>           /* flush all remaining blocks regardless of rate limiting */
> -        while (ram_save_block(f) != 0) {
> +        while (ram_save_block(s) != 0) {
>               bytes_transferred += TARGET_PAGE_SIZE;
>           }
>           cpu_physical_memory_set_dirty_tracking(0);
>       }
>
> +    ram_saveio_destroy(s);
> +
>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>       expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>

Patch

diff --git a/vl.c b/vl.c
index 729c955..9c3dc4c 100644
--- a/vl.c
+++ b/vl.c
@@ -2774,12 +2774,167 @@  static int is_dup_page(uint8_t *page, uint8_t ch)
     return 1;
 }
 
-static int ram_save_block(QEMUFile *f)
+typedef struct RAMSaveIO RAMSaveIO;
+
+struct RAMSaveIO {
+    QEMUFile *f;
+    QEMUIOVector *qiov;
+
+    uint8_t *ram_store;
+    size_t nalloc, nused;
+    uint8_t io_mode;
+
+    void (*put_buffer)(RAMSaveIO *s, uint8_t *buf, size_t len);
+    void (*put_byte)(RAMSaveIO *s, int v);
+    void (*put_be64)(RAMSaveIO *s, uint64_t v);
+
+};
+
+static inline void ram_saveio_flush(RAMSaveIO *s, int prepare)
+{
+    qemu_put_vector(s->f, s->qiov);
+    if (prepare)
+        qemu_put_vector_prepare(s->f);
+
+    /* reset stored data */
+    qemu_iovec_reset(s->qiov);
+    s->nused = 0;
+}
+
+static inline void ram_saveio_put_buffer(RAMSaveIO *s, uint8_t *buf, size_t len)
+{
+    s->put_buffer(s, buf, len);
+}
+
+static inline void ram_saveio_put_byte(RAMSaveIO *s, int v)
+{
+    s->put_byte(s, v);
+}
+
+static inline void ram_saveio_put_be64(RAMSaveIO *s, uint64_t v)
+{
+    s->put_be64(s, v);
+}
+
+static inline void ram_saveio_set_error(RAMSaveIO *s)
+{
+    qemu_file_set_error(s->f);
+}
+
+static void ram_saveio_put_buffer_vector(RAMSaveIO *s, uint8_t *buf, size_t len)
+{
+    qemu_iovec_add(s->qiov, buf, len);
+}
+
+static void ram_saveio_put_buffer_direct(RAMSaveIO *s, uint8_t *buf, size_t len)
+{
+    qemu_put_buffer(s->f, buf, len);
+}
+
+static void ram_saveio_put_byte_vector(RAMSaveIO *s, int v)
+{
+    uint8_t *to_save;
+
+    if (s->nalloc - s->nused < sizeof(int))
+        ram_saveio_flush(s, 1);
+
+    to_save = &s->ram_store[s->nused];
+    to_save[0] = v & 0xff;
+    s->nused++;
+
+    qemu_iovec_add(s->qiov, to_save, 1);
+}
+
+static void ram_saveio_put_byte_direct(RAMSaveIO *s, int v)
+{
+    qemu_put_byte(s->f, v);
+}
+
+static void ram_saveio_put_be64_vector(RAMSaveIO *s, uint64_t v)
+{
+    uint8_t *to_save;
+
+    if (s->nalloc - s->nused < sizeof(uint64_t))
+        ram_saveio_flush(s, 1);
+
+    to_save = &s->ram_store[s->nused];
+    to_save[0] = (v >> 56) & 0xff;
+    to_save[1] = (v >> 48) & 0xff;
+    to_save[2] = (v >> 40) & 0xff;
+    to_save[3] = (v >> 32) & 0xff;
+    to_save[4] = (v >> 24) & 0xff;
+    to_save[5] = (v >> 16) & 0xff;
+    to_save[6] = (v >>  8) & 0xff;
+    to_save[7] = (v >>  0) & 0xff;
+    s->nused += sizeof(uint64_t);
+
+    qemu_iovec_add(s->qiov, to_save, sizeof(uint64_t));
+}
+
+static void ram_saveio_put_be64_direct(RAMSaveIO *s, uint64_t v)
+{
+
+    qemu_put_be64(s->f, v);
+}
+
+static RAMSaveIO *ram_saveio_new(QEMUFile *f, size_t max_store)
+{
+    RAMSaveIO *s;
+
+    s = qemu_mallocz(sizeof(*s));
+
+    if (qemu_file_get_rate_limit(f) == 0) {/* non buffer mode */
+        /* When QEMUFile don't have get_rate limit,
+         * qemu_file_get_rate_limit will return 0.
+         * However, we believe that all kinds of QEMUFile
+         * except non-block mode has rate limit function.
+         */
+        s->io_mode = 1;
+        s->ram_store = qemu_mallocz(max_store);
+        s->nalloc = max_store;
+        s->nused = 0;
+
+        s->qiov = qemu_mallocz(sizeof(*s->qiov));
+        qemu_iovec_init(s->qiov, max_store);
+
+        s->put_buffer = ram_saveio_put_buffer_vector;
+        s->put_byte = ram_saveio_put_byte_vector;
+        s->put_be64 = ram_saveio_put_be64_vector;
+
+        qemu_put_vector_prepare(f);
+    } else {
+        s->io_mode = 0;
+        s->put_buffer = ram_saveio_put_buffer_direct;
+        s->put_byte = ram_saveio_put_byte_direct;
+        s->put_be64 = ram_saveio_put_be64_direct;
+    }
+
+    s->f = f;
+    
+    return s;
+}
+
+static void ram_saveio_destroy(RAMSaveIO *s)
+{
+    if (s->qiov != NULL) { /* means using put_vector */
+        ram_saveio_flush(s, 0);
+        qemu_iovec_destroy(s->qiov);
+        qemu_free(s->qiov);
+        qemu_free(s->ram_store);
+    }
+    qemu_free(s);
+}
+
+/*
+ * RAMSaveIO will manage I/O.
+ */
+static int ram_save_block(RAMSaveIO *s)
 {
     static ram_addr_t current_addr = 0;
     ram_addr_t saved_addr = current_addr;
     ram_addr_t addr = 0;
-    int found = 0;
+    ram_addr_t dirty_rams[HOST_LONG_BITS];
+    int i, found = 0;
 
     while (addr < last_ram_offset) {
         if (kvm_enabled() && current_addr == 0) {
@@ -2787,32 +2942,38 @@  static int ram_save_block(QEMUFile *f)
             r = kvm_update_dirty_pages_log();
             if (r) {
                 fprintf(stderr, "%s: update dirty pages log failed %d\n", __FUNCTION__, r);
-                qemu_file_set_error(f);
+                ram_saveio_set_error(s);
                 return 0;
             }
         }
-        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+        if ((found = cpu_physical_memory_get_dirty_range(
+                 current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS,
+                 MIGRATION_DIRTY_FLAG))) {
             uint8_t *p;
 
-            cpu_physical_memory_reset_dirty(current_addr,
-                                            current_addr + TARGET_PAGE_SIZE,
-                                            MIGRATION_DIRTY_FLAG);
+            for (i = 0; i < found; i++) {
+                ram_addr_t page_addr = dirty_rams[i];
+                cpu_physical_memory_reset_dirty(page_addr,
+                                                page_addr + TARGET_PAGE_SIZE,
+                                                MIGRATION_DIRTY_FLAG);
 
-            p = qemu_get_ram_ptr(current_addr);
+                p = qemu_get_ram_ptr(page_addr);
 
-            if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-            } else {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                if (is_dup_page(p, *p)) {
+                    ram_saveio_put_be64(s, 
+                                        (page_addr) | RAM_SAVE_FLAG_COMPRESS);
+                    ram_saveio_put_byte(s, *p);
+                } else {
+                    ram_saveio_put_be64(s, (page_addr) | RAM_SAVE_FLAG_PAGE);
+                    ram_saveio_put_buffer(s, p, TARGET_PAGE_SIZE);
+                }
             }
 
-            found = 1;
             break;
+        } else {
+            addr += dirty_rams[0];
+            current_addr = (saved_addr + addr) % last_ram_offset;
         }
-        addr += TARGET_PAGE_SIZE;
-        current_addr = (saved_addr + addr) % last_ram_offset;
     }
 
     return found;
@@ -2822,12 +2983,19 @@  static uint64_t bytes_transferred;
 
 static ram_addr_t ram_save_remaining(void)
 {
-    ram_addr_t addr;
+    ram_addr_t addr = 0;
     ram_addr_t count = 0;
+    ram_addr_t dirty_rams[HOST_LONG_BITS];
+    int found = 0;
 
-    for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) {
-        if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-            count++;
+    while (addr < last_ram_offset) {
+        if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset,
+            dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) {
+            count += found;
+            addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE;
+        } else {
+            addr += dirty_rams[0];
+        }
     }
 
     return count;
@@ -2854,6 +3022,7 @@  static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     uint64_t bytes_transferred_last;
     double bwidth = 0;
     uint64_t expected_time = 0;
+    RAMSaveIO *s;
 
     if (stage < 0) {
         cpu_physical_memory_set_dirty_tracking(0);
@@ -2883,10 +3052,12 @@  static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
 
-    while (!qemu_file_rate_limit(f)) {
+    s = ram_saveio_new(f, IOV_MAX);
+
+     while (!qemu_file_rate_limit(f)) {
         int ret;
 
-        ret = ram_save_block(f);
+        ret = ram_save_block(s);
         bytes_transferred += ret * TARGET_PAGE_SIZE;
         if (ret == 0) /* no more blocks */
             break;
@@ -2903,12 +3074,14 @@  static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     /* try transferring iterative blocks of memory */
     if (stage == 3) {
         /* flush all remaining blocks regardless of rate limiting */
-        while (ram_save_block(f) != 0) {
+        while (ram_save_block(s) != 0) {
             bytes_transferred += TARGET_PAGE_SIZE;
         }
         cpu_physical_memory_set_dirty_tracking(0);
     }
 
+    ram_saveio_destroy(s);
+
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;