Patchwork [v3,21/35] postcopy: introduce helper functions for postcopy

login
register
mail settings
Submitter Isaku Yamahata
Date Oct. 30, 2012, 8:32 a.m.
Message ID <9f6ff0fdcd838dd885f594adf1a5727a55f012fb.1351582535.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/195360/
State New
Headers show

Comments

Isaku Yamahata - Oct. 30, 2012, 8:32 a.m.
This patch introduces helper function for postcopy to access
umem char device and to communicate between incoming-qemu and umemd.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
changes v2 -> v3:
- error check, don't abort
- typedef
- #ifdef CONFIG_LINUX
- code simplification

changes v1 -> v2:
- code simplification
- make fault trigger more robust
- introduce struct umem_pages
---
 umem.c |  291 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 umem.h |   88 ++++++++++++++++++++
 2 files changed, 379 insertions(+)
 create mode 100644 umem.c
 create mode 100644 umem.h

Patch

diff --git a/umem.c b/umem.c
new file mode 100644
index 0000000..b05377b
--- /dev/null
+++ b/umem.c
@@ -0,0 +1,291 @@ 
+/*
+ * umem.c: user process backed memory module for postcopy livemigration
+ *
+ * Copyright (c) 2011
+ * National Institute of Advanced Industrial Science and Technology
+ *
+ * https://sites.google.com/site/grivonhome/quick-kvm-migration
+ * Author: Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "config-host.h"
+#ifdef CONFIG_LINUX
+#include <linux/uvmem.h>
+#endif
+
+#include "bitops.h"
+#include "sysemu.h"
+#include "hw/hw.h"
+#include "umem.h"
+
+//#define DEBUG_UMEM
+#ifdef DEBUG_UMEM
+#define DPRINTF(format, ...)                                            \
+    do {                                                                \
+        printf("%s:%d "format, __func__, __LINE__, ## __VA_ARGS__);     \
+    } while (0)
+#else
+#define DPRINTF(format, ...)    do { } while (0)
+#endif
+
+#define DEV_UMEM        "/dev/uvmem"
+
+int umem_new(void *hostp, size_t size, UMem** umemp)
+{
+#ifdef CONFIG_LINUX
+    struct uvmem_init uinit = {
+        .size = size,
+        .shmem_fd = -1,
+    };
+    UMem *umem;
+    int error;
+
+    assert((size % getpagesize()) == 0);
+    umem = g_new(UMem, 1);
+    umem->fd = open(DEV_UMEM, O_RDWR);
+    if (umem->fd < 0) {
+        error = -errno;
+        perror("can't open "DEV_UMEM);
+        goto error;
+    }
+
+    if (ioctl(umem->fd, UVMEM_INIT, &uinit) < 0) {
+        error = -errno;
+        perror("UMEM_INIT failed");
+        goto error;
+    }
+    if (ftruncate(uinit.shmem_fd, uinit.size) < 0) {
+        error = -errno;
+        perror("truncate(\"shmem_fd\") failed");
+        goto error;
+    }
+
+    umem->nbits = 0;
+    umem->nsets = 0;
+    umem->faulted = NULL;
+    umem->page_shift = ffs(getpagesize()) - 1;
+    umem->shmem_fd = uinit.shmem_fd;
+    umem->size = uinit.size;
+    umem->umem = mmap(hostp, size, PROT_EXEC | PROT_READ | PROT_WRITE,
+                      MAP_PRIVATE | MAP_FIXED, umem->fd, 0);
+    if (umem->umem == MAP_FAILED) {
+        error = -errno;
+        perror("mmap(UMem) failed");
+        goto error;
+    }
+    *umemp = umem;
+    return 0;
+
+error:
+    if (umem->fd >= 0) {
+        close(umem->fd);
+    }
+    if (uinit.shmem_fd >= 0) {
+        close(uinit.shmem_fd);
+    }
+    g_free(umem);
+    return error;
+#else
+    perror("postcopy migration is not supported");
+    return -ENOSYS;
+#endif
+}
+
+void umem_destroy(UMem *umem)
+{
+    if (umem->fd != -1) {
+        close(umem->fd);
+    }
+    if (umem->shmem_fd != -1) {
+        close(umem->shmem_fd);
+    }
+    g_free(umem->faulted);
+    g_free(umem);
+}
+
+size_t umem_pages_size(uint64_t nr)
+{
+    return sizeof(UMemPages) + nr * sizeof(uint64_t);
+}
+
+int umem_get_page_request(UMem *umem, UMemPages *page_request)
+{
+    ssize_t ret = read(umem->fd, page_request->pgoffs,
+                       page_request->nr * sizeof(page_request->pgoffs[0]));
+    if (ret < 0) {
+        if (errno != EINTR) {
+            perror("daemon: umem read failed");
+            return -errno;
+        }
+        ret = 0;
+    }
+    page_request->nr = ret / sizeof(page_request->pgoffs[0]);
+    return 0;
+}
+
+int umem_mark_page_cached(UMem *umem, UMemPages *page_cached)
+{
+    const void *buf = page_cached->pgoffs;
+    size_t size = page_cached->nr * sizeof(page_cached->pgoffs[0]);
+    ssize_t ret;
+
+    ret = qemu_write_full(umem->fd, buf, size);
+    if (ret != size) {
+        perror("daemon: umem write");
+        return -errno;
+    }
+    return 0;
+}
+
+void umem_unmap(UMem *umem)
+{
+    munmap(umem->umem, umem->size);
+    umem->umem = NULL;
+}
+
+void umem_close(UMem *umem)
+{
+    close(umem->fd);
+    umem->fd = -1;
+}
+
+int umem_map_shmem(UMem *umem)
+{
+    umem->nbits = umem->size >> umem->page_shift;
+    umem->nsets = 0;
+    umem->faulted = g_new0(unsigned long, BITS_TO_LONGS(umem->nbits));
+
+    umem->shmem = mmap(NULL, umem->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       umem->shmem_fd, 0);
+    if (umem->shmem == MAP_FAILED) {
+        perror("daemon: mmap(\"shmem\")");
+        return -errno;
+    }
+    return 0;
+}
+
+void umem_unmap_shmem(UMem *umem)
+{
+    if (umem->shmem) {
+        munmap(umem->shmem, umem->size);
+        umem->shmem = NULL;
+    }
+}
+
+void umem_remove_shmem(UMem *umem, size_t offset, size_t size)
+{
+    size_t s = offset >> umem->page_shift;
+    size_t e = (offset + size) >> umem->page_shift;
+    size_t i;
+
+    for (i = s; i < e; i++) {
+        if (!test_and_set_bit(i, umem->faulted)) {
+            umem->nsets++;
+            qemu_madvise(umem->shmem + offset, size, QEMU_MADV_REMOVE);
+        }
+    }
+}
+
+bool umem_shmem_finished(const UMem *umem)
+{
+    return umem->nsets == umem->nbits;
+}
+
+void umem_close_shmem(UMem *umem)
+{
+    close(umem->shmem_fd);
+    umem->shmem_fd = -1;
+}
+
+/***************************************************************************/
+/* qemu main loop <-> umem thread communication */
+
+static int umem_write_cmd(int fd, uint8_t cmd)
+{
+    ssize_t size;
+
+    DPRINTF("write cmd %c\n", cmd);
+    size = qemu_write_full(fd, &cmd, sizeof(cmd));
+    if (size == 0) {
+        if (errno == EPIPE) {
+            perror("pipe is closed");
+            DPRINTF("write cmd %c %d: pipe is closed\n", cmd, errno);
+            return 0;
+        }
+        perror("fail to write to pipe");
+        DPRINTF("write cmd %c %d\n", cmd, errno);
+        return -1;
+    }
+    return 0;
+}
+
+static int umem_read_cmd(int fd, uint8_t expect)
+{
+    ssize_t size;
+    uint8_t cmd;
+
+    size = qemu_read_full(fd, &cmd, sizeof(cmd));
+    if (size == 0) {
+        DPRINTF("read cmd %c: pipe is closed\n", cmd);
+        return -1;
+    }
+
+    DPRINTF("read cmd %c\n", cmd);
+    if (cmd != expect) {
+        DPRINTF("cmd %c expect %d\n", cmd, expect);
+        return -1;
+    }
+    return 0;
+}
+
+/* umem thread -> qemu main loop */
+int umem_daemon_ready(int to_qemu_fd)
+{
+    return umem_write_cmd(to_qemu_fd, UMEM_DAEMON_READY);
+}
+
+int umem_daemon_wait_for_qemu(int from_qemu_fd)
+{
+    return umem_read_cmd(from_qemu_fd, UMEM_QEMU_READY);
+}
+
+void umem_daemon_quit(QEMUFile *to_qemu)
+{
+    qemu_put_byte(to_qemu, UMEM_DAEMON_QUIT);
+}
+
+void umem_daemon_error(QEMUFile *to_qemu)
+{
+    qemu_put_byte(to_qemu, UMEM_DAEMON_ERROR);
+}
+
+/* qemu main loop -> umem thread */
+int umem_qemu_wait_for_daemon(int from_umemd_fd)
+{
+    return umem_read_cmd(from_umemd_fd, UMEM_DAEMON_READY);
+}
+
+int umem_qemu_ready(int to_umemd_fd)
+{
+    return umem_write_cmd(to_umemd_fd, UMEM_QEMU_READY);
+}
+
+void umem_qemu_quit(QEMUFile *to_umemd)
+{
+    qemu_put_byte(to_umemd, UMEM_QEMU_QUIT);
+}
diff --git a/umem.h b/umem.h
new file mode 100644
index 0000000..dbc965c
--- /dev/null
+++ b/umem.h
@@ -0,0 +1,88 @@ 
+/*
+ * umem.h: user process backed memory module for postcopy livemigration
+ *
+ * Copyright (c) 2011
+ * National Institute of Advanced Industrial Science and Technology
+ *
+ * https://sites.google.com/site/grivonhome/quick-kvm-migration
+ * Author: Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_UMEM_H
+#define QEMU_UMEM_H
+
+#include "qemu-common.h"
+
+typedef struct UMemDev UMemDev;
+
+struct UMem {
+    void *umem;
+    int fd;
+    void *shmem;
+    int shmem_fd;
+    uint64_t size;
+
+    /* indexed by host page size */
+    int page_shift;
+    int nbits;
+    int nsets;
+    unsigned long *faulted;
+};
+typedef struct UMem UMem;
+
+struct UMemPages {
+    uint64_t nr;
+    uint64_t pgoffs[0];
+};
+typedef struct UMemPages UMemPages;
+
+int umem_new(void *hostp, size_t size, UMem** umemp);
+void umem_destroy(UMem *umem);
+
+/* umem device operations */
+size_t umem_pages_size(uint64_t nr);
+int umem_get_page_request(UMem *umem, UMemPages *page_request);
+int umem_mark_page_cached(UMem *umem, UMemPages *page_cached);
+void umem_unmap(UMem *umem);
+void umem_close(UMem *umem);
+
+/* umem shmem operations */
+int umem_map_shmem(UMem *umem);
+void umem_unmap_shmem(UMem *umem);
+void umem_remove_shmem(UMem *umem, size_t offset, size_t size);
+bool umem_shmem_finished(const UMem *umem);
+void umem_close_shmem(UMem *umem);
+
+/* umem thread -> qemu main loop */
+#define UMEM_DAEMON_READY               'R'
+#define UMEM_DAEMON_QUIT                'Q'
+#define UMEM_DAEMON_ERROR               'E'
+
+/* qemu main loop -> umem thread */
+#define UMEM_QEMU_READY                 'r'
+#define UMEM_QEMU_QUIT                  'q'
+
+/* for umem thread */
+int umem_daemon_ready(int to_qemu_fd);
+int umem_daemon_wait_for_qemu(int from_qemu_fd);
+void umem_daemon_quit(QEMUFile *to_qemu);
+void umem_daemon_error(QEMUFile *to_qemu);
+
+/* for qemu main loop */
+int umem_qemu_wait_for_daemon(int from_umemd_fd);
+int umem_qemu_ready(int to_umemd_fd);
+void umem_qemu_quit(QEMUFile *to_umemd);
+
+#endif /* QEMU_UMEM_H */