@@ -3168,6 +3168,8 @@ F: tests/unit/test-strlist.c
F: include/migration/cpr-state.h
F: migration/cpr-state.c
F: stubs/cpr-state.c
+F: include/migration/cpr.h
+F: migration/cpr.c
Record/replay
M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
new file mode 100644
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+extern bool only_cpr_capable;
+
+void cpr_init(void);
+void cpr_exec(void);
+void cpr_exec_failed(Error *err);
+void cpr_preserve_fds(void);
+
+#endif
new file mode 100644
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
+#include "migration/cpr.h"
+#include "migration/cpr-state.h"
+#include "sysemu/runstate.h"
+
+bool only_cpr_capable;
+static Notifier cpr_fd_notifier;
+
+static int preserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_clear_cloexec(fd);
+ return 0;
+}
+
+static int unpreserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_set_cloexec(fd);
+ return 0;
+}
+
+static void cpr_fd_notifier_func(Notifier *notifier, void *data)
+{
+ MigrationState *s = data;
+
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC && migration_has_failed(s)) {
+ cpr_walk_fd(unpreserve_fd, 0);
+ }
+}
+
+void cpr_preserve_fds(void)
+{
+ cpr_walk_fd(preserve_fd, 0);
+}
+
+void cpr_init(void)
+{
+ cpr_state_load(&error_fatal);
+ migration_add_notifier(&cpr_fd_notifier, cpr_fd_notifier_func);
+}
+
+void cpr_exec(void)
+{
+ MigrationState *s = migrate_get_current();
+ Error *err = NULL;
+
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC && !migration_has_failed(s)) {
+ if (!migration_has_finished(s)) {
+ error_setg(&err, "cannot exec: migration status is '%s', "
+ "but must be 'completed'",
+ MigrationStatus_str(s->state));
+ goto error;
+ }
+
+ if (cpr_state_save(&err)) {
+ goto error;
+ }
+
+ qemu_system_exec_request(s->parameters.cpr_exec_args);
+ }
+ return;
+
+error:
+ cpr_exec_failed(err);
+}
+
+void cpr_exec_failed(Error *err)
+{
+ MigrationState *s = migrate_get_current();
+
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ migrate_set_error(s, err);
+ error_report_err(err);
+ migration_call_notifiers(s);
+ cpr_state_unsave();
+}
@@ -15,6 +15,7 @@ softmmu_ss.add(files(
'channel-block.c',
'colo-failover.c',
'colo.c',
+ 'cpr.c',
'cpr-state.c',
'exec.c',
'fd.c',
@@ -32,6 +32,7 @@
#include "migration.h"
#include "savevm.h"
#include "qemu-file.h"
+#include "migration/cpr.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@@ -231,6 +232,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ cpr_init();
}
void migration_cancel(const Error *error)
@@ -1964,6 +1966,7 @@ static void migrate_fd_cleanup(MigrationState *s)
}
migration_call_notifiers(s);
block_cleanup_parameters(s);
+ cpr_exec();
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
@@ -2489,6 +2492,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC &&
+ !s->parameters.has_cpr_exec_args) {
+ error_setg(errp, "cpr-exec mode requires setting cpr-exec-args");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
@@ -165,6 +165,7 @@ out:
bool ramblock_is_ignored(RAMBlock *block)
{
return !qemu_ram_is_migratable(block) ||
+ migrate_mode() == MIG_MODE_CPR_EXEC ||
(migrate_ignore_shared() && qemu_ram_is_shared(block) &&
ramblock_is_named_file(block));
}
@@ -3058,7 +3059,8 @@ static void ram_init_bitmaps(RAMState *rs)
WITH_RCU_READ_LOCK_GUARD() {
ram_list_init_bitmaps();
/* We don't use dirty log with background snapshots */
- if (!migrate_background_snapshot()) {
+ if (!migrate_background_snapshot() &&
+ migrate_mode() == MIG_MODE_NORMAL) {
memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
migration_bitmap_sync_precopy(rs);
}
@@ -586,10 +586,21 @@
# arguments must match those used to initially start qemu, plus
# the -incoming option.
#
+# @cpr-exec: The migrate command saves state to a file, directly exec's a
+# new version of qemu on the same host, replacing the original
+# process while retaining its PID, and loads the file via the
+# migrate-incoming command. The caller must specify a migration URI
+# that writes to and reads from a file. Guest RAM must be backed by
+# a memory backend with share=on, and cannot be memory-backend-ram.
+# Guest RAM is not copied, and storage blocks are not migrated, so
+# all capabilities related to page and block copy must be disabled,
+# and all related parameters are ignored. Arguments for the new
+# qemu process are taken from the @cpr-exec-args parameter.
+#
# Since: 7.1
##
{ 'enum': 'MigMode',
- 'data': [ 'normal', 'cpr-reboot' ] }
+ 'data': [ 'normal', 'cpr-reboot', 'cpr-exec' ] }
##
# @BitmapMigrationBitmapAliasTransform:
@@ -712,7 +723,11 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: arguments passed to new qemu for cpr-exec mode. The first
+# argument should be the path of a new qemu binary, or a prefix
+# command that exec's the new qemu binary. The arguments must
+# match those used to initially start qemu, plus the -incoming
+# option. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials for
# establishing a TLS connection over the migration data channel.
@@ -885,7 +900,8 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: Arguments passed to new qemu for cpr-exec mode.
+# See description in @MigrationParameter. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -1090,7 +1106,8 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: Arguments passed to new qemu for cpr-exec mode.
+# See description in @MigrationParameter. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -44,6 +44,7 @@
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
+#include "qemu/memfd.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -66,6 +67,8 @@
#include "qemu/pmem.h"
+#include "migration/cpr-state.h"
+#include "migration/misc.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -1971,6 +1974,40 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
}
}
+static bool memory_region_is_backend(MemoryRegion *mr)
+{
+ return !!object_dynamic_cast(OBJECT(mr)->parent, TYPE_MEMORY_BACKEND);
+}
+
+static void *qemu_anon_memfd_alloc(RAMBlock *rb, size_t maxlen, Error **errp)
+{
+ size_t len, align;
+ void *addr;
+ struct MemoryRegion *mr = rb->mr;
+ const char *name = memory_region_name(mr);
+ int mfd = cpr_find_memfd(name, &len, &maxlen, &align);
+
+ if (mfd >= 0) {
+ rb->used_length = len;
+ rb->max_length = maxlen;
+ mr->align = align;
+ } else {
+ len = rb->used_length;
+ maxlen = rb->max_length;
+ mr->align = QEMU_VMALLOC_ALIGN;
+ mfd = qemu_memfd_create(name, maxlen + mr->align, 0, 0, 0, errp);
+ if (mfd < 0) {
+ return NULL;
+ }
+ cpr_save_memfd(name, mfd, len, maxlen, mr->align);
+ }
+ rb->flags |= RAM_SHARED;
+ qemu_set_cloexec(mfd);
+ addr = file_ram_alloc(rb, maxlen, mfd, false, false, 0, errp);
+ trace_anon_memfd_alloc(name, maxlen, addr, mfd);
+ return addr;
+}
+
static void ram_block_add(RAMBlock *new_block, Error **errp)
{
const bool noreserve = qemu_ram_is_noreserve(new_block);
@@ -1994,6 +2031,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
return;
}
+ } else if (migrate_mode_enabled(MIG_MODE_CPR_EXEC) &&
+ !memory_region_is_backend(new_block->mr)) {
+ new_block->host = qemu_anon_memfd_alloc(new_block,
+ new_block->max_length,
+ errp);
+ if (!new_block->host) {
+ return;
+ }
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
@@ -2005,8 +2050,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
return;
}
- memory_try_enable_merging(new_block->host, new_block->max_length);
}
+ memory_try_enable_merging(new_block->host, new_block->max_length);
}
new_ram_size = MAX(old_ram_size,
@@ -2239,6 +2284,7 @@ void qemu_ram_free(RAMBlock *block)
}
qemu_mutex_lock_ramlist();
+ cpr_delete_memfd(memory_region_name(block->mr));
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
@@ -32,6 +32,7 @@
#include "exec/cpu-common.h"
#include "exec/gdbstub.h"
#include "hw/boards.h"
+#include "migration/cpr.h"
#include "migration/misc.h"
#include "migration/postcopy-ram.h"
#include "monitor/monitor.h"
@@ -692,9 +693,10 @@ static bool main_loop_should_exit(void)
if (qemu_exec_requested()) {
Error *err = NULL;
+ cpr_preserve_fds();
execvp(exec_argv[0], exec_argv);
error_setg_errno(&err, errno, "execvp %s failed", exec_argv[0]);
- error_report_err(err);
+ cpr_exec_failed(err);
g_strfreev(exec_argv);
exec_argv = NULL;
return false;
@@ -45,6 +45,7 @@ ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_
# accel/tcg/cputlb.c
memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
+anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s size %zu ptr %p fd %d"
# gdbstub.c
gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
Add the cpr-exec migration mode. Usage: qemu-system-$arch -migrate-mode-enable cpr-exec ... migrate_set_parameter mode cpr-exec migrate_set_parameter cpr-exec-args <arg1> <arg2> ... -incoming defer migrate -d file:<filename> ... poll for runstate inmigrate ... migrate_incoming file:<filename> In this mode, the migrate command saves state to a file, directly exec's a new version of qemu on the same host, replacing the original process while retaining its PID, and loads the file via the migrate-incoming command. The caller must specify a migration URI that writes to and reads from a file. Arguments for the new qemu process are taken from the @cpr-exec-args parameter. The first argument should be the path of a new qemu binary, or a prefix command that exec's the new qemu binary. The arguments must match those used to initially start qemu, plus the -incoming option. Guest RAM must be backed by a memory backend with share=on, but cannot be memory-backend-ram, and the '-migrate-mode-enable cpr-exec' option is required. This causes secondary guest ram blocks (those not specified on the command line) to be allocated by mmap'ing a memfd. The memfds are kept open across exec, their values are saved in special cpr state which is retrieved after exec, and they are re-mmap'd. Hence guest RAM is preserved in place, albeit with new virtual addresses in the qemu process. Since guest RAM is not copied, and storage blocks are not migrated, the caller must disable all capabilities related to page and block copy, and the implementation ignores all related parameters. Cpr-exec mode supports memory-backend-memfd, memory-backend-epc, and vfio devices in subsequent patches. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- MAINTAINERS | 2 ++ include/migration/cpr.h | 18 +++++++++++ migration/cpr.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ migration/meson.build | 1 + migration/migration.c | 9 ++++++ migration/ram.c | 4 ++- qapi/migration.json | 25 ++++++++++++--- softmmu/physmem.c | 48 +++++++++++++++++++++++++++- softmmu/runstate.c | 4 ++- trace-events | 1 + 10 files changed, 190 insertions(+), 7 deletions(-) create mode 100644 include/migration/cpr.h create mode 100644 migration/cpr.c