@@ -87,6 +87,8 @@ struct UserfaultState {
int userfault_fd;
/* To tell the fault_thread to quit */
int userfault_quit_fd;
+ /* UFFDIO_REGISTER_MODE_MISSING or UFFDIO_REGISTER_MODE_WP*/
+ int mode;
};
/* State for the incoming migration */
@@ -20,7 +20,7 @@ bool postcopy_ram_supported_by_host(void);
* Make all of RAM sensitive to accesses to areas that haven't yet been written
* and wire up anything necessary to deal with it.
*/
-int postcopy_ram_enable_notify(UserfaultState *us);
+int postcopy_ram_enable_notify(UserfaultState *us, int mode);
/*
* Initialise postcopy-ram, setting the RAM to a state where we can go into
@@ -17,7 +17,7 @@
* #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
* UFFD_FEATURE_EVENT_FORK)
*/
-#define UFFD_API_FEATURES (0)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -25,7 +25,8 @@
#define UFFD_API_RANGE_IOCTLS \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
- (__u64)1 << _UFFDIO_ZEROPAGE)
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -40,6 +41,7 @@
#define _UFFDIO_WAKE (0x02)
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_WRITEPROTECT (0x05)
#define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */
@@ -57,6 +59,9 @@
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
struct uffdio_zeropage)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+
/* read() structure */
struct uffd_msg {
__u8 event;
@@ -78,7 +83,7 @@ struct uffd_msg {
__u64 reserved3;
} reserved;
} arg;
-} __packed;
+} __attribute__((packed));
/*
* Start at 0x12 and not at 0 to be more strict against bugs.
@@ -105,8 +110,9 @@ struct uffdio_api {
* are to be considered implicitly always enabled in all kernels as
* long as the uffdio_api.api requested matches UFFD_API.
*/
-#if 0 /* not available yet */
+
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#if 0
#define UFFD_FEATURE_EVENT_FORK (1<<1)
#endif
__u64 features;
@@ -164,4 +170,11 @@ struct uffdio_zeropage {
__s64 zeropage;
};
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+ /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
#endif /* _LINUX_USERFAULTFD_H */
@@ -85,6 +85,11 @@ static bool ufd_version_check(int ufd)
return false;
}
+ if (!(api_struct.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+ error_report("Does not support write protect feature");
+ return false;
+ }
+
return true;
}
@@ -374,6 +379,31 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
return 0;
}
+static int ram_set_pages_wp(uint64_t page_addr,
+ uint64_t size,
+ bool remove,
+ int uffd)
+{
+ struct uffdio_writeprotect wp_struct;
+
+ memset(&wp_struct, 0, sizeof(wp_struct));
+ wp_struct.range.start = (uint64_t)(uintptr_t)page_addr;
+ wp_struct.range.len = size;
+ if (remove) {
+ wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
+ } else {
+ wp_struct.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ }
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_struct)) {
+ int e = errno;
+ error_report("%s: %s page_addr: 0x%lx",
+ __func__, strerror(e), page_addr);
+
+ return -e;
+ }
+ return 0;
+}
+
/*
* Mark the given area of RAM as requiring notification to unwritten areas
* Used as a callback on qemu_ram_foreach_block.
@@ -389,18 +419,26 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
{
UserfaultState *us = opaque;
struct uffdio_register reg_struct;
+ int ret = 0;
reg_struct.range.start = (uintptr_t)host_addr;
reg_struct.range.len = length;
- reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+ reg_struct.mode = us->mode;
/* Now tell our userfault_fd that it's responsible for this area */
if (ioctl(us->userfault_fd, UFFDIO_REGISTER, ®_struct)) {
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
+ /* We need to remove the write permission for pages to enable kernel
+ * notify us.
+ */
+ if (us->mode == UFFDIO_REGISTER_MODE_WP) {
+ ret = ram_set_pages_wp((uintptr_t)host_addr, length, false,
+ us->userfault_fd);
+ }
- return 0;
+ return ret;
}
/*
@@ -414,8 +452,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
size_t hostpagesize = getpagesize();
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
- MigrationIncomingState *mis = container_of(us, MigrationIncomingState,
- userfault_state);
trace_postcopy_ram_fault_thread_entry();
qemu_sem_post(&us->fault_thread_sem);
@@ -487,25 +523,31 @@ static void *postcopy_ram_fault_thread(void *opaque)
qemu_ram_get_idstr(rb),
rb_offset);
- /*
- * Send the request to the source - we want to request one
- * of our host page sizes (which is >= TPS)
- */
- if (rb != last_rb) {
- last_rb = rb;
- migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, hostpagesize);
- } else {
- /* Save some space */
- migrate_send_rp_req_pages(mis, NULL,
- rb_offset, hostpagesize);
+ if (us->mode == UFFDIO_REGISTER_MODE_MISSING) {
+ MigrationIncomingState *mis = container_of(us,
+ MigrationIncomingState,
+ userfault_state);
+
+ /*
+ * Send the request to the source - we want to request one
+ * of our host page sizes (which is >= TPS)
+ */
+ if (rb != last_rb) {
+ last_rb = rb;
+ migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
+ rb_offset, hostpagesize);
+ } else {
+ /* Save some space */
+ migrate_send_rp_req_pages(mis, NULL,
+ rb_offset, hostpagesize);
+ }
}
}
trace_postcopy_ram_fault_thread_exit();
return NULL;
}
-int postcopy_ram_enable_notify(UserfaultState *us)
+int postcopy_ram_enable_notify(UserfaultState *us, int mode)
{
/* Open the fd for the kernel to give us userfaults */
us->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@@ -514,7 +556,7 @@ int postcopy_ram_enable_notify(UserfaultState *us)
strerror(errno));
return -1;
}
-
+ us->mode = mode;
/*
* Although the host check already tested the API, we need to
* do the check again as an ABI handshake on the new fd.
@@ -50,7 +50,7 @@
#include "qemu/iov.h"
#include "block/snapshot.h"
#include "block/qapi.h"
-
+#include <linux/userfaultfd.h>
#ifndef ETH_P_RARP
#define ETH_P_RARP 0x8035
@@ -1488,7 +1488,8 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* However, at this point the CPU shouldn't be running, and the IO
* shouldn't be doing anything yet so don't actually expect requests
*/
- if (postcopy_ram_enable_notify(&mis->userfault_state)) {
+ if (postcopy_ram_enable_notify(&mis->userfault_state,
+ UFFDIO_REGISTER_MODE_MISSING)) {
return -1;
}
We will re-use some helper functions for snapshot process, and fix these helper functions to support UFFDIO_WRITEPROTECT_MODE_WP. Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> --- include/migration/migration.h | 2 + include/migration/postcopy-ram.h | 2 +- linux-headers/linux/userfaultfd.h | 21 +++++++++-- migration/postcopy-ram.c | 78 ++++++++++++++++++++++++++++++--------- migration/savevm.c | 5 ++- 5 files changed, 83 insertions(+), 25 deletions(-)