diff mbox series

[RFC,16/21] migration: Enable doublemap with MADV_SPLIT

Message ID 20230117220914.2062125-17-peterx@redhat.com
State New
Headers show
Series migration: Support hugetlb doublemaps | expand

Commit Message

Peter Xu Jan. 17, 2023, 10:09 p.m. UTC
MADV_SPLIT enables doublemap on hugetlb.  Do that if doublemap=true
specified for the migration.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/postcopy-ram.c | 16 ++++++++++++++++
 migration/ram.c          | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+)

Comments

Juan Quintela Feb. 1, 2023, 6:59 p.m. UTC | #1
Peter Xu <peterx@redhat.com> wrote:
> MADV_SPLIT enables doublemap on hugetlb.  Do that if doublemap=true
> specified for the migration.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/postcopy-ram.c | 16 ++++++++++++++++
>  migration/ram.c          | 18 ++++++++++++++++++
>  2 files changed, 34 insertions(+)

Reviewed-by: Juan Quintela <quintela@redhat.com>


>
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 86ff73c2c0..dbc7e54e4a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -694,6 +694,22 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
>       */
>      reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
>      if (minor_fault) {
> +        /*
> +         * MADV_SPLIT implicitly enables doublemap mode for hugetlb.  If
> +         * that fails (e.g. on old kernels) we need to fail the migration.
> +         *
> +         * It's a bit late to fail here as we could have migrated lots of
> +         * pages in precopy, but early failure will require us to allocate
> +         * hugetlb pages secretly in QEMU which is not friendly to admins
> +         * and it may affect the global hugetlb pool.  Considering it is
> +         * normally always limited, keep the failure late but tolerable.
> +         */
> +        if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->postcopy_length,
> +                         QEMU_MADV_SPLIT)) {
> +            error_report("%s: madvise(MADV_SPLIT) failed (ret=%d) but "
> +                         "required for doublemap.", __func__, -errno);

Here you write errno

> +            return -1;
> +        }
>          reg_struct.mode |= UFFDIO_REGISTER_MODE_MINOR;
>      }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 37d7b3553a..4d786f4b97 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3891,6 +3891,19 @@ static int migrate_hugetlb_doublemap_init(void)
>  
>      RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
>          if (qemu_ram_is_hugetlb(rb)) {
> +            /*
> +             * MADV_SPLIT implicitly enables doublemap mode for hugetlb on
> +             * the guest mapped ranges.  If that fails (e.g. on old
> +             * kernels) we need to fail the migration.  Note, the
> +             * host_mirror mapping below can be kept as hugely mapped.
> +             */
> +            if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->mmap_length,
> +                             QEMU_MADV_SPLIT)) {
> +                error_report("%s: madvise(MADV_SPLIT) required for doublemap",
> +                             __func__);

Here you don't.

So I think you could change it.

I was thinking about creating a function for this, but as comments are
different I think it is overkill.

> +                return -1;
> +            }
> +
>              /*
>               * Firstly, we remap the same ramblock into another range of
>               * virtual address, so that we can write to the pages without
> @@ -3898,6 +3911,11 @@ static int migrate_hugetlb_doublemap_init(void)
>               */
>              addr = ramblock_file_map(rb);
>              if (addr == MAP_FAILED) {
> +                /*
> +                 * No need to undo MADV_SPLIT because this is dest node and
> +                 * we're going to bail out anyway.  Leave that for mm exit
> +                 * to clean things up.
> +                 */
>                  ret = -errno;
>                  error_report("%s: Duplicate mapping for hugetlb ramblock '%s'"
>                               "failed: %s", __func__, qemu_ram_get_idstr(rb),
diff mbox series

Patch

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 86ff73c2c0..dbc7e54e4a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -694,6 +694,22 @@  static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
      */
     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
     if (minor_fault) {
+        /*
+         * MADV_SPLIT implicitly enables doublemap mode for hugetlb.  If
+         * that fails (e.g. on old kernels) we need to fail the migration.
+         *
+         * It's a bit late to fail here as we could have migrated lots of
+         * pages in precopy, but early failure will require us to allocate
+         * hugetlb pages secretly in QEMU which is not friendly to admins
+         * and it may affect the global hugetlb pool.  Considering it is
+         * normally always limited, keep the failure late but tolerable.
+         */
+        if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->postcopy_length,
+                         QEMU_MADV_SPLIT)) {
+            error_report("%s: madvise(MADV_SPLIT) failed (ret=%d) but "
+                         "required for doublemap.", __func__, -errno);
+            return -1;
+        }
         reg_struct.mode |= UFFDIO_REGISTER_MODE_MINOR;
     }
 
diff --git a/migration/ram.c b/migration/ram.c
index 37d7b3553a..4d786f4b97 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3891,6 +3891,19 @@  static int migrate_hugetlb_doublemap_init(void)
 
     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
         if (qemu_ram_is_hugetlb(rb)) {
+            /*
+             * MADV_SPLIT implicitly enables doublemap mode for hugetlb on
+             * the guest mapped ranges.  If that fails (e.g. on old
+             * kernels) we need to fail the migration.  Note, the
+             * host_mirror mapping below can be kept as hugely mapped.
+             */
+            if (qemu_madvise(qemu_ram_get_host_addr(rb), rb->mmap_length,
+                             QEMU_MADV_SPLIT)) {
+                error_report("%s: madvise(MADV_SPLIT) required for doublemap",
+                             __func__);
+                return -1;
+            }
+
             /*
              * Firstly, we remap the same ramblock into another range of
              * virtual address, so that we can write to the pages without
@@ -3898,6 +3911,11 @@  static int migrate_hugetlb_doublemap_init(void)
              */
             addr = ramblock_file_map(rb);
             if (addr == MAP_FAILED) {
+                /*
+                 * No need to undo MADV_SPLIT because this is dest node and
+                 * we're going to bail out anyway.  Leave that for mm exit
+                 * to clean things up.
+                 */
                 ret = -errno;
                 error_report("%s: Duplicate mapping for hugetlb ramblock '%s'"
                              "failed: %s", __func__, qemu_ram_get_idstr(rb),