diff mbox

[RFC,09/13] migration/postcopy-ram: fix some helper functions to support userfaultfd write-protect

Message ID 1452169208-840-10-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Jan. 7, 2016, 12:20 p.m. UTC
We will re-use some helper functions for snapshot process, and fix these
helper functions to support UFFDIO_WRITEPROTECT_MODE_WP.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
---
 include/migration/migration.h     |  2 +
 include/migration/postcopy-ram.h  |  2 +-
 linux-headers/linux/userfaultfd.h | 21 +++++++++--
 migration/postcopy-ram.c          | 78 ++++++++++++++++++++++++++++++---------
 migration/savevm.c                |  5 ++-
 5 files changed, 83 insertions(+), 25 deletions(-)
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1316d22..2312c73 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -87,6 +87,8 @@  struct UserfaultState {
     int       userfault_fd;
     /* To tell the fault_thread to quit */
     int       userfault_quit_fd;
+    /* UFFDIO_REGISTER_MODE_MISSING or UFFDIO_REGISTER_MODE_WP*/
+    int       mode;
 };
 
 /* State for the incoming migration */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index e30978f..568cbdd 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -20,7 +20,7 @@  bool postcopy_ram_supported_by_host(void);
  * Make all of RAM sensitive to accesses to areas that haven't yet been written
  * and wire up anything necessary to deal with it.
  */
-int postcopy_ram_enable_notify(UserfaultState *us);
+int postcopy_ram_enable_notify(UserfaultState *us, int mode);
 
 /*
  * Initialise postcopy-ram, setting the RAM to a state where we can go into
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 9057d7a..1cc3f44 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -17,7 +17,7 @@ 
  * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
  *			      UFFD_FEATURE_EVENT_FORK)
  */
-#define UFFD_API_FEATURES (0)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -25,7 +25,8 @@ 
 #define UFFD_API_RANGE_IOCTLS			\
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
-	 (__u64)1 << _UFFDIO_ZEROPAGE)
+     (__u64)1 << _UFFDIO_ZEROPAGE | \
+     (__u64)1 << _UFFDIO_WRITEPROTECT)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -40,6 +41,7 @@ 
 #define _UFFDIO_WAKE			(0x02)
 #define _UFFDIO_COPY			(0x03)
 #define _UFFDIO_ZEROPAGE		(0x04)
+#define _UFFDIO_WRITEPROTECT    (0x05)
 #define _UFFDIO_API			(0x3F)
 
 /* userfaultfd ioctl ids */
@@ -57,6 +59,9 @@ 
 #define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
 				      struct uffdio_zeropage)
 
+#define UFFDIO_WRITEPROTECT    _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+                     struct uffdio_writeprotect)
+
 /* read() structure */
 struct uffd_msg {
 	__u8	event;
@@ -78,7 +83,7 @@  struct uffd_msg {
 			__u64	reserved3;
 		} reserved;
 	} arg;
-} __packed;
+} __attribute__((packed));
 
 /*
  * Start at 0x12 and not at 0 to be more strict against bugs.
@@ -105,8 +110,9 @@  struct uffdio_api {
 	 * are to be considered implicitly always enabled in all kernels as
 	 * long as the uffdio_api.api requested matches UFFD_API.
 	 */
-#if 0 /* not available yet */
+
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
+#if 0
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
 #endif
 	__u64 features;
@@ -164,4 +170,11 @@  struct uffdio_zeropage {
 	__s64 zeropage;
 };
 
+struct uffdio_writeprotect {
+   struct uffdio_range range;
+   /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
+#define UFFDIO_WRITEPROTECT_MODE_WP        ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE  ((__u64)1<<1)
+   __u64 mode;
+};
 #endif /* _LINUX_USERFAULTFD_H */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 38245d4..370197e 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -85,6 +85,11 @@  static bool ufd_version_check(int ufd)
         return false;
     }
 
+    if (!(api_struct.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+        error_report("Does not support write protect feature");
+        return false;
+    }
+
     return true;
 }
 
@@ -374,6 +379,31 @@  int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
     return 0;
 }
 
+static int ram_set_pages_wp(uint64_t page_addr,
+                            uint64_t size,
+                            bool remove,
+                            int uffd)
+{
+    struct uffdio_writeprotect wp_struct;
+
+    memset(&wp_struct, 0, sizeof(wp_struct));
+    wp_struct.range.start = (uint64_t)(uintptr_t)page_addr;
+    wp_struct.range.len = size;
+    if (remove) {
+        wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
+    } else {
+        wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+    }
+    if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_struct)) {
+        int e = errno;
+        error_report("%s: %s  page_addr: 0x%lx",
+                     __func__, strerror(e), page_addr);
+
+        return -e;
+    }
+    return 0;
+}
+
 /*
  * Mark the given area of RAM as requiring notification to unwritten areas
  * Used as a  callback on qemu_ram_foreach_block.
@@ -389,18 +419,26 @@  static int ram_block_enable_notify(const char *block_name, void *host_addr,
 {
     UserfaultState *us = opaque;
     struct uffdio_register reg_struct;
+    int ret = 0;
 
     reg_struct.range.start = (uintptr_t)host_addr;
     reg_struct.range.len = length;
-    reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+    reg_struct.mode = us->mode;
 
     /* Now tell our userfault_fd that it's responsible for this area */
     if (ioctl(us->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
         error_report("%s userfault register: %s", __func__, strerror(errno));
         return -1;
     }
+    /* We need to remove the write permission for pages to enable kernel
+    * notify us.
+    */
+    if (us->mode == UFFDIO_REGISTER_MODE_WP) {
+        ret = ram_set_pages_wp((uintptr_t)host_addr, length, false,
+                                us->userfault_fd);
+    }
 
-    return 0;
+    return ret;
 }
 
 /*
@@ -414,8 +452,6 @@  static void *postcopy_ram_fault_thread(void *opaque)
     size_t hostpagesize = getpagesize();
     RAMBlock *rb = NULL;
     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
-    MigrationIncomingState *mis = container_of(us, MigrationIncomingState,
-                                               userfault_state);
 
     trace_postcopy_ram_fault_thread_entry();
     qemu_sem_post(&us->fault_thread_sem);
@@ -487,25 +523,31 @@  static void *postcopy_ram_fault_thread(void *opaque)
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset);
 
-        /*
-         * Send the request to the source - we want to request one
-         * of our host page sizes (which is >= TPS)
-         */
-        if (rb != last_rb) {
-            last_rb = rb;
-            migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                     rb_offset, hostpagesize);
-        } else {
-            /* Save some space */
-            migrate_send_rp_req_pages(mis, NULL,
-                                     rb_offset, hostpagesize);
+        if (us->mode == UFFDIO_REGISTER_MODE_MISSING) {
+            MigrationIncomingState *mis = container_of(us,
+                                                       MigrationIncomingState,
+                                                       userfault_state);
+
+            /*
+             * Send the request to the source - we want to request one
+             * of our host page sizes (which is >= TPS)
+             */
+            if (rb != last_rb) {
+                last_rb = rb;
+                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+                                          rb_offset, hostpagesize);
+            } else {
+                /* Save some space */
+                migrate_send_rp_req_pages(mis, NULL,
+                                          rb_offset, hostpagesize);
+            }
         }
     }
     trace_postcopy_ram_fault_thread_exit();
     return NULL;
 }
 
-int postcopy_ram_enable_notify(UserfaultState *us)
+int postcopy_ram_enable_notify(UserfaultState *us, int mode)
 {
     /* Open the fd for the kernel to give us userfaults */
     us->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -514,7 +556,7 @@  int postcopy_ram_enable_notify(UserfaultState *us)
                      strerror(errno));
         return -1;
     }
-
+    us->mode = mode;
     /*
      * Although the host check already tested the API, we need to
      * do the check again as an ABI handshake on the new fd.
diff --git a/migration/savevm.c b/migration/savevm.c
index a59f216..8fe5328f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -50,7 +50,7 @@ 
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
-
+#include <linux/userfaultfd.h>
 
 #ifndef ETH_P_RARP
 #define ETH_P_RARP 0x8035
@@ -1488,7 +1488,8 @@  static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
      * However, at this point the CPU shouldn't be running, and the IO
      * shouldn't be doing anything yet so don't actually expect requests
      */
-    if (postcopy_ram_enable_notify(&mis->userfault_state)) {
+    if (postcopy_ram_enable_notify(&mis->userfault_state,
+                                   UFFDIO_REGISTER_MODE_MISSING)) {
         return -1;
     }