diff mbox series

[v3,09/12] kvm: Persistent per kvmslot dirty bitmap

Message ID 20190530092919.26059-10-peterx@redhat.com
State New
Headers show
Series kvm/migration: support KVM_CLEAR_DIRTY_LOG | expand

Commit Message

Peter Xu May 30, 2019, 9:29 a.m. UTC
When synchronizing dirty bitmap from kernel KVM we do it in a
per-kvmslot fashion and we allocate the userspace bitmap for each of
the ioctl.  This patch instead make the bitmap cache be persistent
then we don't need to g_malloc0() every time.

More importantly, the cached per-kvmslot dirty bitmap will be further
used when we want to add support for the KVM_CLEAR_DIRTY_LOG and this
cached bitmap will be used to guarantee we won't clear any unknown
dirty bits otherwise that can be a severe data loss issue for
migration code.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 accel/kvm/kvm-all.c      | 39 +++++++++++++++++++++------------------
 include/sysemu/kvm_int.h |  2 ++
 2 files changed, 23 insertions(+), 18 deletions(-)

Comments

Dr. David Alan Gilbert May 30, 2019, 1:53 p.m. UTC | #1
* Peter Xu (peterx@redhat.com) wrote:
> When synchronizing dirty bitmap from kernel KVM we do it in a
> per-kvmslot fashion and we allocate the userspace bitmap for each of
> the ioctl.  This patch instead make the bitmap cache be persistent
> then we don't need to g_malloc0() every time.
> 
> More importantly, the cached per-kvmslot dirty bitmap will be further
> used when we want to add support for the KVM_CLEAR_DIRTY_LOG and this
> cached bitmap will be used to guarantee we won't clear any unknown
> dirty bits otherwise that can be a severe data loss issue for
> migration code.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

Is there no way to make this get allocated the first time it's needed?
I'm thinking here of the VM most of the time not being migrated so we're
allocating this structure for no benefit.

Dave

> ---
>  accel/kvm/kvm-all.c      | 39 +++++++++++++++++++++------------------
>  include/sysemu/kvm_int.h |  2 ++
>  2 files changed, 23 insertions(+), 18 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index b686531586..334c610918 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -497,31 +497,14 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
>              return 0;
>          }
>  
> -        /* XXX bad kernel interface alert
> -         * For dirty bitmap, kernel allocates array of size aligned to
> -         * bits-per-long.  But for case when the kernel is 64bits and
> -         * the userspace is 32bits, userspace can't align to the same
> -         * bits-per-long, since sizeof(long) is different between kernel
> -         * and user space.  This way, userspace will provide buffer which
> -         * may be 4 bytes less than the kernel will use, resulting in
> -         * userspace memory corruption (which is not detectable by valgrind
> -         * too, in most cases).
> -         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
> -         * a hope that sizeof(long) won't become >8 any time soon.
> -         */
> -        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
> -                     /*HOST_LONG_BITS*/ 64) / 8;
> -        d.dirty_bitmap = g_malloc0(size);
> -
> +        d.dirty_bitmap = mem->dirty_bmap;
>          d.slot = mem->slot | (kml->as_id << 16);
>          if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
>              DPRINTF("ioctl failed %d\n", errno);
> -            g_free(d.dirty_bitmap);
>              return -1;
>          }
>  
>          kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
> -        g_free(d.dirty_bitmap);
>      }
>  
>      return 0;
> @@ -765,6 +748,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>      MemoryRegion *mr = section->mr;
>      bool writeable = !mr->readonly && !mr->rom_device;
>      hwaddr start_addr, size;
> +    unsigned long bmap_size;
>      void *ram;
>  
>      if (!memory_region_is_ram(mr)) {
> @@ -796,6 +780,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>          }
>  
>          /* unregister the slot */
> +        g_free(mem->dirty_bmap);
> +        mem->dirty_bmap = NULL;
>          mem->memory_size = 0;
>          mem->flags = 0;
>          err = kvm_set_user_memory_region(kml, mem, false);
> @@ -807,12 +793,29 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
>          return;
>      }
>  
> +    /*
> +     * XXX bad kernel interface alert For dirty bitmap, kernel
> +     * allocates array of size aligned to bits-per-long.  But for case
> +     * when the kernel is 64bits and the userspace is 32bits,
> +     * userspace can't align to the same bits-per-long, since
> +     * sizeof(long) is different between kernel and user space.  This
> +     * way, userspace will provide buffer which may be 4 bytes less
> +     * than the kernel will use, resulting in userspace memory
> +     * corruption (which is not detectable by valgrind too, in most
> +     * cases).  So for now, let's align to 64 instead of
> +     * HOST_LONG_BITS here, in a hope that sizeof(long) won't become
> +     * >8 any time soon.
> +     */
> +    bmap_size = ALIGN((size >> TARGET_PAGE_BITS),
> +                      /*HOST_LONG_BITS*/ 64) / 8;
> +
>      /* register the new slot */
>      mem = kvm_alloc_slot(kml);
>      mem->memory_size = size;
>      mem->start_addr = start_addr;
>      mem->ram = ram;
>      mem->flags = kvm_mem_flags(mr);
> +    mem->dirty_bmap = g_malloc0(bmap_size);
>  
>      err = kvm_set_user_memory_region(kml, mem, true);
>      if (err) {
> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> index f838412491..687a2ee423 100644
> --- a/include/sysemu/kvm_int.h
> +++ b/include/sysemu/kvm_int.h
> @@ -21,6 +21,8 @@ typedef struct KVMSlot
>      int slot;
>      int flags;
>      int old_flags;
> +    /* Dirty bitmap cache for the slot */
> +    unsigned long *dirty_bmap;
>  } KVMSlot;
>  
>  typedef struct KVMMemoryListener {
> -- 
> 2.17.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Peter Xu May 31, 2019, 2:43 a.m. UTC | #2
On Thu, May 30, 2019 at 02:53:30PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > When synchronizing dirty bitmap from kernel KVM we do it in a
> > per-kvmslot fashion and we allocate the userspace bitmap for each of
> > the ioctl.  This patch instead make the bitmap cache be persistent
> > then we don't need to g_malloc0() every time.
> > 
> > More importantly, the cached per-kvmslot dirty bitmap will be further
> > used when we want to add support for the KVM_CLEAR_DIRTY_LOG and this
> > cached bitmap will be used to guarantee we won't clear any unknown
> > dirty bits otherwise that can be a severe data loss issue for
> > migration code.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> Is there no way to make this get allocated the first time it's needed?
> I'm thinking here of the VM most of the time not being migrated so we're
> allocating this structure for no benefit.

Valid argument...  sure we can do the allocation on first usage. How
about below change squashed?

(I'll squash them properly into different patches where proper; the
 assertion would belong to the other patch)

=======================

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 23895a95a2..80bc4be03a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -523,6 +523,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,                                                                       
             goto out;
         }

+        if (!mem->dirty_bmap) {
+            /* Allocate on the first log_sync, once and for all */
+            mem->dirty_bmap = g_malloc0(bmap_size);
+        }
+
         d.dirty_bitmap = mem->dirty_bmap;
         d.slot = mem->slot | (kml->as_id << 16);
         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
@@ -638,6 +643,8 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
      */

     assert(bmap_start % BITS_PER_LONG == 0);
+    /* We should never do log_clear before log_sync */
+    assert(mem->dirty_bmap);
     if (start_delta) {
         /* Slow path - we need to manipulate a temp bitmap */
         bmap_clear = bitmap_new(bmap_npages);
@@ -995,7 +1002,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     mem->start_addr = start_addr;
     mem->ram = ram;
     mem->flags = kvm_mem_flags(mr);
-    mem->dirty_bmap = g_malloc0(bmap_size);

     err = kvm_set_user_memory_region(kml, mem, true);
     if (err) {

Regards,
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index b686531586..334c610918 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -497,31 +497,14 @@  static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
             return 0;
         }
 
-        /* XXX bad kernel interface alert
-         * For dirty bitmap, kernel allocates array of size aligned to
-         * bits-per-long.  But for case when the kernel is 64bits and
-         * the userspace is 32bits, userspace can't align to the same
-         * bits-per-long, since sizeof(long) is different between kernel
-         * and user space.  This way, userspace will provide buffer which
-         * may be 4 bytes less than the kernel will use, resulting in
-         * userspace memory corruption (which is not detectable by valgrind
-         * too, in most cases).
-         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
-         * a hope that sizeof(long) won't become >8 any time soon.
-         */
-        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
-                     /*HOST_LONG_BITS*/ 64) / 8;
-        d.dirty_bitmap = g_malloc0(size);
-
+        d.dirty_bitmap = mem->dirty_bmap;
         d.slot = mem->slot | (kml->as_id << 16);
         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
             DPRINTF("ioctl failed %d\n", errno);
-            g_free(d.dirty_bitmap);
             return -1;
         }
 
         kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
-        g_free(d.dirty_bitmap);
     }
 
     return 0;
@@ -765,6 +748,7 @@  static void kvm_set_phys_mem(KVMMemoryListener *kml,
     MemoryRegion *mr = section->mr;
     bool writeable = !mr->readonly && !mr->rom_device;
     hwaddr start_addr, size;
+    unsigned long bmap_size;
     void *ram;
 
     if (!memory_region_is_ram(mr)) {
@@ -796,6 +780,8 @@  static void kvm_set_phys_mem(KVMMemoryListener *kml,
         }
 
         /* unregister the slot */
+        g_free(mem->dirty_bmap);
+        mem->dirty_bmap = NULL;
         mem->memory_size = 0;
         mem->flags = 0;
         err = kvm_set_user_memory_region(kml, mem, false);
@@ -807,12 +793,29 @@  static void kvm_set_phys_mem(KVMMemoryListener *kml,
         return;
     }
 
+    /*
+     * XXX bad kernel interface alert For dirty bitmap, kernel
+     * allocates array of size aligned to bits-per-long.  But for case
+     * when the kernel is 64bits and the userspace is 32bits,
+     * userspace can't align to the same bits-per-long, since
+     * sizeof(long) is different between kernel and user space.  This
+     * way, userspace will provide buffer which may be 4 bytes less
+     * than the kernel will use, resulting in userspace memory
+     * corruption (which is not detectable by valgrind too, in most
+     * cases).  So for now, let's align to 64 instead of
+     * HOST_LONG_BITS here, in a hope that sizeof(long) won't become
+     * >8 any time soon.
+     */
+    bmap_size = ALIGN((size >> TARGET_PAGE_BITS),
+                      /*HOST_LONG_BITS*/ 64) / 8;
+
     /* register the new slot */
     mem = kvm_alloc_slot(kml);
     mem->memory_size = size;
     mem->start_addr = start_addr;
     mem->ram = ram;
     mem->flags = kvm_mem_flags(mr);
+    mem->dirty_bmap = g_malloc0(bmap_size);
 
     err = kvm_set_user_memory_region(kml, mem, true);
     if (err) {
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index f838412491..687a2ee423 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -21,6 +21,8 @@  typedef struct KVMSlot
     int slot;
     int flags;
     int old_flags;
+    /* Dirty bitmap cache for the slot */
+    unsigned long *dirty_bmap;
 } KVMSlot;
 
 typedef struct KVMMemoryListener {