diff mbox

[v2,09/16] postcopy: Use temporary for placing zero huge pages

Message ID 20170206173306.20603-10-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert Feb. 6, 2017, 5:32 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
on it.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/migration.h |  1 +
 migration/postcopy-ram.c      | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

Comments

Laurent Vivier Feb. 24, 2017, 3:31 p.m. UTC | #1
On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
> to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
> on it.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> ---
>  include/migration/migration.h |  1 +
>  migration/postcopy-ram.c      | 23 +++++++++++++++++++++--
>  2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index c9c1d5f..bd399fc 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -108,6 +108,7 @@ struct MigrationIncomingState {
>      QEMUFile *to_src_file;
>      QemuMutex rp_mutex;    /* We send replies from multiple threads */
>      void     *postcopy_tmp_page;
> +    void     *postcopy_tmp_zero_page;
>  
>      QEMUBH *bh;
>  
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index a8b7fed..4c736d2 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -324,6 +324,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
>          munmap(mis->postcopy_tmp_page, mis->largest_page_size);
>          mis->postcopy_tmp_page = NULL;
>      }
> +    if (mis->postcopy_tmp_zero_page) {
> +        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
> +        mis->postcopy_tmp_zero_page = NULL;
> +    }
>      trace_postcopy_ram_incoming_cleanup_exit();
>      return 0;
>  }
> @@ -593,8 +597,23 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
>              return -e;
>          }
>      } else {
> -        /* TODO: The kernel can't use UFFDIO_ZEROPAGE for hugepages */
> -        assert(0);
> +        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
> +        if (!mis->postcopy_tmp_zero_page) {
> +            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
> +                                               PROT_READ | PROT_WRITE,
> +                                               MAP_PRIVATE | MAP_ANONYMOUS,
> +                                               -1, 0);
> +            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
> +                int e = errno;
> +                mis->postcopy_tmp_zero_page = NULL;
> +                error_report("%s: %s mapping large zero page",
> +                             __func__, strerror(e));
> +                return -e;
> +            }
> +            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
> +        }
> +        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
> +                                   pagesize);
>      }

It's sad to have to allocate 1 huge page just to zero them.

Are you sure the kernel doesn't support UFFDIO_ZEROPAGE for huge page.
It seems __mcopy_atomic() manages HUGETLB vma (it is called by
mfill_zeropage(), called by userfaultfd_zeropage())?

Anyway, the code looks good:
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Dr. David Alan Gilbert Feb. 24, 2017, 3:46 p.m. UTC | #2
* Laurent Vivier (lvivier@redhat.com) wrote:
> On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
> > to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
> > on it.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Reviewed-by: Juan Quintela <quintela@redhat.com>
> > ---
> >  include/migration/migration.h |  1 +
> >  migration/postcopy-ram.c      | 23 +++++++++++++++++++++--
> >  2 files changed, 22 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > index c9c1d5f..bd399fc 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -108,6 +108,7 @@ struct MigrationIncomingState {
> >      QEMUFile *to_src_file;
> >      QemuMutex rp_mutex;    /* We send replies from multiple threads */
> >      void     *postcopy_tmp_page;
> > +    void     *postcopy_tmp_zero_page;
> >  
> >      QEMUBH *bh;
> >  
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > index a8b7fed..4c736d2 100644
> > --- a/migration/postcopy-ram.c
> > +++ b/migration/postcopy-ram.c
> > @@ -324,6 +324,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
> >          munmap(mis->postcopy_tmp_page, mis->largest_page_size);
> >          mis->postcopy_tmp_page = NULL;
> >      }
> > +    if (mis->postcopy_tmp_zero_page) {
> > +        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
> > +        mis->postcopy_tmp_zero_page = NULL;
> > +    }
> >      trace_postcopy_ram_incoming_cleanup_exit();
> >      return 0;
> >  }
> > @@ -593,8 +597,23 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
> >              return -e;
> >          }
> >      } else {
> > -        /* TODO: The kernel can't use UFFDIO_ZEROPAGE for hugepages */
> > -        assert(0);
> > +        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
> > +        if (!mis->postcopy_tmp_zero_page) {
> > +            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
> > +                                               PROT_READ | PROT_WRITE,
> > +                                               MAP_PRIVATE | MAP_ANONYMOUS,
> > +                                               -1, 0);
> > +            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
> > +                int e = errno;
> > +                mis->postcopy_tmp_zero_page = NULL;
> > +                error_report("%s: %s mapping large zero page",
> > +                             __func__, strerror(e));
> > +                return -e;
> > +            }
> > +            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
> > +        }
> > +        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
> > +                                   pagesize);
> >      }
> 
> It's sad to have to allocate 1 huge page just to zero them.
> 
> Are you sure the kernel doesn't support UFFDIO_ZEROPAGE for huge page.
> It seems __mcopy_atomic() manages HUGETLB vma (it is called by
> mfill_zeropage(), called by userfaultfd_zeropage())?

That's as I understand it from Andrea; and I think it does fail if you try it.

> Anyway, the code looks good:
> Reviewed-by: Laurent Vivier <lvivier@redhat.com>

Thanks.

Dave

> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Laurent Vivier Feb. 24, 2017, 5:24 p.m. UTC | #3
On 24/02/2017 16:46, Dr. David Alan Gilbert wrote:
> * Laurent Vivier (lvivier@redhat.com) wrote:
>> On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
>>> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>>>
>>> The kernel can't do UFFDIO_ZEROPAGE for huge pages, so we have
>>> to allocate a temporary (always zero) page and use UFFDIO_COPYPAGE
>>> on it.
>>>
>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>>> ---
...
>> Are you sure the kernel doesn't support UFFDIO_ZEROPAGE for huge page.
>> It seems __mcopy_atomic() manages HUGETLB vma (it is called by
>> mfill_zeropage(), called by userfaultfd_zeropage())?
> 
> That's as I understand it from Andrea; and I think it does fail if you try it.

Found the answer in kernel log:

    commit 7a0c4cf85b856430af62a907dd65dfc51438d24f
    Author: Andrea Arcangeli <aarcange@redhat.com>
    Date:   Wed Feb 22 15:44:10 2017 -0800

        userfaultfd: selftest: test UFFDIO_ZEROPAGE on all memory types

        This will verify -EINVAL is returned with hugetlbfs/shmem and
        it'll do a functional test of UFFDIO_ZEROPAGE on anonymous
        memory.

Thanks,
Laurent
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index c9c1d5f..bd399fc 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -108,6 +108,7 @@  struct MigrationIncomingState {
     QEMUFile *to_src_file;
     QemuMutex rp_mutex;    /* We send replies from multiple threads */
     void     *postcopy_tmp_page;
+    void     *postcopy_tmp_zero_page;
 
     QEMUBH *bh;
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a8b7fed..4c736d2 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -324,6 +324,10 @@  int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
         munmap(mis->postcopy_tmp_page, mis->largest_page_size);
         mis->postcopy_tmp_page = NULL;
     }
+    if (mis->postcopy_tmp_zero_page) {
+        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+        mis->postcopy_tmp_zero_page = NULL;
+    }
     trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
@@ -593,8 +597,23 @@  int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
             return -e;
         }
     } else {
-        /* TODO: The kernel can't use UFFDIO_ZEROPAGE for hugepages */
-        assert(0);
+        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+        if (!mis->postcopy_tmp_zero_page) {
+            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+                                               PROT_READ | PROT_WRITE,
+                                               MAP_PRIVATE | MAP_ANONYMOUS,
+                                               -1, 0);
+            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+                int e = errno;
+                mis->postcopy_tmp_zero_page = NULL;
+                error_report("%s: %s mapping large zero page",
+                             __func__, strerror(e));
+                return -e;
+            }
+            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+        }
+        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+                                   pagesize);
     }
 
     return 0;