diff mbox series

[RFC,21/21] migration: Collapse huge pages again after postcopy finished

Message ID 20230117220914.2062125-22-peterx@redhat.com
State New
Headers show
Series migration: Support hugetlb doublemaps | expand

Commit Message

Peter Xu Jan. 17, 2023, 10:09 p.m. UTC
When hugetlb-doublemap enabled, the pages will be migrated in small page
sizes during postcopy.  When the migration finishes, the pgtable needs to
be rebuilt explicitly for these ranges to have huge page being mapped again.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/ram.c        | 31 +++++++++++++++++++++++++++++++
 migration/trace-events |  1 +
 2 files changed, 32 insertions(+)

Comments

Juan Quintela Feb. 1, 2023, 7:49 p.m. UTC | #1
Peter Xu <peterx@redhat.com> wrote:
> When hugetlb-doublemap enabled, the pages will be migrated in small page
> sizes during postcopy.  When the migration finishes, the pgtable needs to
> be rebuilt explicitly for these ranges to have huge page being mapped again.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/ram.c        | 31 +++++++++++++++++++++++++++++++
>  migration/trace-events |  1 +
>  2 files changed, 32 insertions(+)
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 4da56d925c..178739f8c3 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3986,6 +3986,31 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
>      return 0;
>  }
>  
> +#define  MADV_COLLAPSE_CHUNK_SIZE  (1UL << 30) /* 1G */
> +
> +static void ramblock_rebuild_huge_mappings(RAMBlock *rb)
> +{
> +    unsigned long addr, size;

This makes my head explode.

We have:

unsigned long
__u64
uint64_t

Used and mixed all around.

> +    assert(qemu_ram_is_hugetlb(rb));
> +
> +    addr = (unsigned long)qemu_ram_get_host_addr(rb);

Don't this cast should be uintptr_t?
At least on win64 it should fail, no?

> +    size = rb->mmap_length;

this is ram_addr_t.  It is uint64_t except with xen.
So it should fail on any 32 bit host.

Later, Juan.
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index 4da56d925c..178739f8c3 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3986,6 +3986,31 @@  static int ram_load_setup(QEMUFile *f, void *opaque)
     return 0;
 }
 
+#define  MADV_COLLAPSE_CHUNK_SIZE  (1UL << 30) /* 1G */
+
+static void ramblock_rebuild_huge_mappings(RAMBlock *rb)
+{
+    unsigned long addr, size;
+
+    assert(qemu_ram_is_hugetlb(rb));
+
+    addr = (unsigned long)qemu_ram_get_host_addr(rb);
+    size = rb->mmap_length;
+
+    while (size) {
+        unsigned long chunk = MIN(size, MADV_COLLAPSE_CHUNK_SIZE);
+
+        if (qemu_madvise((void *)addr, chunk, QEMU_MADV_COLLAPSE)) {
+            error_report("%s: madvise(MADV_COLLAPSE) failed "
+                         "for ramblock '%s'", __func__, rb->idstr);
+        } else {
+            trace_ramblock_rebuild_huge_mappings(rb->idstr, addr, chunk);
+        }
+        addr += chunk;
+        size -= chunk;
+    }
+}
+
 static int ram_load_cleanup(void *opaque)
 {
     RAMBlock *rb;
@@ -4001,6 +4026,12 @@  static int ram_load_cleanup(void *opaque)
         g_free(rb->receivedmap);
         rb->receivedmap = NULL;
         if (rb->host_mirror) {
+            /*
+             * If host_mirror set, it means this is an hugetlb ramblock,
+             * and we've enabled double mappings for it.  Rebuild the huge
+             * page tables here.
+             */
+            ramblock_rebuild_huge_mappings(rb);
             munmap(rb->host_mirror, rb->mmap_length);
             rb->host_mirror = NULL;
         }
diff --git a/migration/trace-events b/migration/trace-events
index 7baf235d22..6b52bb691c 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -119,6 +119,7 @@  postcopy_preempt_hit(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64
 postcopy_preempt_send_host_page(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64
 postcopy_preempt_switch_channel(int channel) "%d"
 postcopy_preempt_reset_channel(void) ""
+ramblock_rebuild_huge_mappings(char *str, unsigned long start, unsigned long size) "ramblock %s start 0x%lx size 0x%lx"
 
 # multifd.c
 multifd_new_send_channel_async(uint8_t id) "channel %u"