diff mbox series

[RFC,server,10/11] vfio-user: register handlers to facilitate migration

Message ID 2b9b94e7b5e1bcf77c0b900bc0a5488db4910833.1626722742.git.jag.raman@oracle.com
State New
Headers show
Series vfio-user server in QEMU | expand

Commit Message

Jag Raman July 19, 2021, 8 p.m. UTC
Store and load the device's state using handlers for live migration

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 migration/savevm.h        |   2 +
 hw/remote/vfio-user-obj.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++
 migration/savevm.c        |  63 ++++++++++
 3 files changed, 352 insertions(+)

Comments

Thanos Makatos July 20, 2021, 2:05 p.m. UTC | #1
> -----Original Message-----
> From: Jagannathan Raman <jag.raman@oracle.com>
> Sent: 19 July 2021 21:00
> To: qemu-devel@nongnu.org
> Cc: stefanha@redhat.com; alex.williamson@redhat.com;
> elena.ufimtseva@oracle.com; John Levon <john.levon@nutanix.com>;
> john.g.johnson@oracle.com; Thanos Makatos
> <thanos.makatos@nutanix.com>; Swapnil Ingle
> <swapnil.ingle@nutanix.com>; jag.raman@oracle.com
> Subject: [PATCH RFC server 10/11] vfio-user: register handlers to facilitate
> migration
> 
> Store and load the device's state using handlers for live migration
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  migration/savevm.h        |   2 +
>  hw/remote/vfio-user-obj.c | 287
> ++++++++++++++++++++++++++++++++++++++++++++++
>  migration/savevm.c        |  63 ++++++++++
>  3 files changed, 352 insertions(+)
> 
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 6461342..71d1733 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -67,5 +67,7 @@ int qemu_loadvm_state_main(QEMUFile *f,
> MigrationIncomingState *mis);
>  int qemu_load_device_state(QEMUFile *f);
>  int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
>          bool in_postcopy, bool inactivate_disks);
> +int qemu_remote_savevm(QEMUFile *f);
> +int qemu_remote_loadvm(QEMUFile *f);
> 
>  #endif
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index d2a2e51..5948576 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -44,6 +44,10 @@
>  #include "hw/boards.h"
>  #include "hw/remote/iohub.h"
>  #include "hw/remote/machine.h"
> +#include "migration/qemu-file.h"
> +#include "migration/savevm.h"
> +#include "migration/global_state.h"
> +#include "block/block.h"
> 
>  #include "libvfio-user/include/libvfio-user.h"
> 
> @@ -73,6 +77,31 @@ struct VfuObject {
>      PCIDevice *pci_dev;
> 
>      QemuThread vfu_ctx_thread;
> +
> +    /*
> +     * vfu_mig_buf holds the migration data. In the remote process, this
> +     * buffer replaces the role of an IO channel which links the source
> +     * and the destination.
> +     *
> +     * Whenever the client QEMU process initiates migration, the libvfio-user
> +     * library notifies that to this server. The remote/server QEMU sets up a
> +     * QEMUFile object using this buffer as backend. The remote passes this

Can we use remote/server more consistently? E.g. "remote process" or "server" instead of just "remote"? (makes me think of git remotes :D)

> +     * object to its migration subsystem, and it slirps the VMSDs of all its

By "slirps" do you mean transfer from the client to the server over the SLiRP network?

> +     * devices and stores them in this buffer.

Isn't this a per-device object? If so, then why do we store the VMSDs of *all* the devices in a single device's buffer? I think I'm missing something here.

> +     *
> +     * libvfio-user library subsequetly asks the remote for any data that needs
> +     * to be moved over to the destination using its vfu_migration_callbacks_t

It's not obvious to me, is this the libvfio-user library running at the server?

> +     * APIs. The remote hands over this buffer as data at this time.

Hands over the buffer to whom?

> +     *
> +     * A reverse of this process happens at the destination.
> +     */
> +    uint8_t *vfu_mig_buf;

Does the above description refer to a typical use case of the VFIO migration protocol where data is copied in an iterative manner (implemented in libvfio-user by the migration callbacks)? Is this what you're documenting here?

> +
> +    uint64_t vfu_mig_buf_size;
> +
> +    uint64_t vfu_mig_buf_pending;
> +
> +    QEMUFile *vfu_mig_file;
>  };
> 
>  static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
> @@ -97,6 +126,226 @@ static void vfu_object_set_devid(Object *obj, const
> char *str, Error **errp)
>      trace_vfu_prop("devid", str);
>  }
> 
> +/**
> + * Migration helper functions
> + *
> + * vfu_mig_buf_read & vfu_mig_buf_write are used by QEMU's migration
> + * subsystem - qemu_remote_savevm & qemu_remote_loadvm.

vfu_mig_buf_read is used by qemu_remote_loadvm and vfu_mig_buf_write is used by qemu_remote_savevm, right? The order they're written suggests the opposite.

> savevm/loadvm
> + * call these functions via QEMUFileOps to save/load the VMSD of all
> + * the devices into vfu_mig_buf
> + *
> + */
> +static ssize_t vfu_mig_buf_read(void *opaque, uint8_t *buf, int64_t pos,
> +                                size_t size, Error **errp)
> +{
> +    VfuObject *o = opaque;
> +
> +    if (pos > o->vfu_mig_buf_size) {
> +        size = 0;
> +    } else if ((pos + size) > o->vfu_mig_buf_size) {
> +        size = o->vfu_mig_buf_size;
> +    }
> +
> +    memcpy(buf, (o->vfu_mig_buf + pos), size);
> +
> +    o->vfu_mig_buf_size -= size;
> +
> +    return size;
> +}
> +
> +static ssize_t vfu_mig_buf_write(void *opaque, struct iovec *iov, int iovcnt,
> +                                 int64_t pos, Error **errp)
> +{
> +    VfuObject *o = opaque;
> +    uint64_t end = pos + iov_size(iov, iovcnt);
> +    int i;
> +
> +    if (end > o->vfu_mig_buf_size) {
> +        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
> +    }
> +
> +    for (i = 0; i < iovcnt; i++) {
> +        memcpy((o->vfu_mig_buf + o->vfu_mig_buf_size), iov[i].iov_base,
> +               iov[i].iov_len);
> +        o->vfu_mig_buf_size += iov[i].iov_len;
> +        o->vfu_mig_buf_pending += iov[i].iov_len;
> +    }
> +
> +    return iov_size(iov, iovcnt);
> +}
> +
> +static int vfu_mig_buf_shutdown(void *opaque, bool rd, bool wr, Error
> **errp)
> +{
> +    VfuObject *o = opaque;
> +
> +    o->vfu_mig_buf_size = 0;
> +
> +    g_free(o->vfu_mig_buf);
> +
> +    return 0;
> +}
> +
> +static const QEMUFileOps vfu_mig_fops_save = {
> +    .writev_buffer  = vfu_mig_buf_write,
> +    .shut_down      = vfu_mig_buf_shutdown,
> +};
> +
> +static const QEMUFileOps vfu_mig_fops_load = {
> +    .get_buffer     = vfu_mig_buf_read,
> +    .shut_down      = vfu_mig_buf_shutdown,
> +};
> +
> +/**
> + * handlers for vfu_migration_callbacks_t
> + *
> + * The libvfio-user library accesses these handlers to drive the migration
> + * at the remote end, and also to transport the data stored in vfu_mig_buf
> + *
> + */
> +static void vfu_mig_state_precopy(vfu_ctx_t *vfu_ctx)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +    int ret;
> +
> +    if (!o->vfu_mig_file) {
> +        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_save);
> +    }
> +
> +    global_state_store();
> +
> +    ret = qemu_remote_savevm(o->vfu_mig_file);
> +    if (ret) {
> +        qemu_file_shutdown(o->vfu_mig_file);
> +        return;
> +    }
> +
> +    qemu_fflush(o->vfu_mig_file);
> +
> +    bdrv_inactivate_all();
> +}
> +
> +static void vfu_mig_state_running(vfu_ctx_t *vfu_ctx)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +    Error *local_err = NULL;
> +    int ret;
> +
> +    ret = qemu_remote_loadvm(o->vfu_mig_file);
> +    if (ret) {
> +        error_setg(&error_abort, "vfu: failed to restore device state");
> +        return;
> +    }
> +
> +    bdrv_invalidate_cache_all(&local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        return;
> +    }
> +
> +    vm_start();
> +}
> +
> +static int vfu_mig_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
> +{
> +    switch (state) {
> +    case VFU_MIGR_STATE_RESUME:
> +    case VFU_MIGR_STATE_STOP_AND_COPY:
> +    case VFU_MIGR_STATE_STOP:
> +        break;

Can you explain why we don't have to do anything in the above cases?

> +    case VFU_MIGR_STATE_PRE_COPY:
> +        vfu_mig_state_precopy(vfu_ctx);
> +        break;
> +    case VFU_MIGR_STATE_RUNNING:
> +        if (!runstate_is_running()) {
> +            vfu_mig_state_running(vfu_ctx);
> +        }
> +        break;
> +    default:
> +        warn_report("vfu: Unknown migration state %d", state);
> +    }
> +
> +    return 0;
> +}
> +
> +static uint64_t vfu_mig_get_pending_bytes(vfu_ctx_t *vfu_ctx)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +
> +    return o->vfu_mig_buf_pending;
> +}
> +
> +static int vfu_mig_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset,
> +                                uint64_t *size)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +
> +    if (offset) {
> +        *offset = 0;
> +    }
> +
> +    if (size) {
> +        *size = o->vfu_mig_buf_size;
> +    }
> +
> +    return 0;
> +}
> +
> +static ssize_t vfu_mig_read_data(vfu_ctx_t *vfu_ctx, void *buf,
> +                                 uint64_t size, uint64_t offset)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +
> +    if (offset > o->vfu_mig_buf_size) {
> +        return -1;
> +    }
> +
> +    if ((offset + size) > o->vfu_mig_buf_size) {
> +        warn_report("vfu: buffer overflow - check pending_bytes");
> +        size = o->vfu_mig_buf_size - offset;
> +    }
> +
> +    memcpy(buf, (o->vfu_mig_buf + offset), size);
> +
> +    o->vfu_mig_buf_pending -= size;
> +
> +    return size;
> +}
> +
> +static ssize_t vfu_mig_write_data(vfu_ctx_t *vfu_ctx, void *data,
> +                                  uint64_t size, uint64_t offset)
> +{
> +    VfuObject *o = vfu_get_private(vfu_ctx);
> +    uint64_t end = offset + size;
> +
> +    if (end > o->vfu_mig_buf_size) {
> +        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
> +        o->vfu_mig_buf_size = end;
> +    }
> +
> +    memcpy((o->vfu_mig_buf + offset), data, size);
> +
> +    if (!o->vfu_mig_file) {
> +        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_load);
> +    }
> +
> +    return size;
> +}
> +
> +static int vfu_mig_data_written(vfu_ctx_t *vfu_ctx, uint64_t count)
> +{
> +    return 0;
> +}
> +
> +static const vfu_migration_callbacks_t vfu_mig_cbs = {
> +    .version = VFU_MIGR_CALLBACKS_VERS,
> +    .transition = &vfu_mig_transition,
> +    .get_pending_bytes = &vfu_mig_get_pending_bytes,
> +    .prepare_data = &vfu_mig_prepare_data,
> +    .read_data = &vfu_mig_read_data,
> +    .data_written = &vfu_mig_data_written,
> +    .write_data = &vfu_mig_write_data,
> +};
> +
>  static void *vfu_object_ctx_run(void *opaque)
>  {
>      VfuObject *o = opaque;
> @@ -332,6 +581,7 @@ static void vfu_object_machine_done(Notifier
> *notifier, void *data)
>  {
>      VfuObject *o = container_of(notifier, VfuObject, machine_done);
>      DeviceState *dev = NULL;
> +    size_t migr_area_size;
>      int ret;
> 
>      o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0,
> @@ -391,6 +641,35 @@ static void vfu_object_machine_done(Notifier
> *notifier, void *data)
>          return;
>      }
> 
> +    /*
> +     * TODO: The 0x20000 number used below is a temporary. We are
> working on
> +     *     a cleaner fix for this.
> +     *
> +     *     The libvfio-user library assumes that the remote knows the size of
> +     *     the data to be migrated at boot time, but that is not the case with
> +     *     VMSDs, as it can contain a variable-size buffer. 0x20000 is used
> +     *     as a sufficiently large buffer to demonstrate migration, but that
> +     *     cannot be used as a solution.
> +     *
> +     */

The size of the migration region dictates the amount of migration data that can be produced/consumed in one-go, it's not necessarily the total size of the migration data produced/consumed throughout the migration operation.

> +    ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX,
> +                           0x20000, NULL,
> +                           VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
> +    if (ret < 0) {
> +        error_setg(&error_abort, "vfu: Failed to register migration BAR %s- %s",
> +                   o->devid, strerror(errno));
> +        return;
> +    }
> +
> +    migr_area_size = vfu_get_migr_register_area_size();
> +    ret = vfu_setup_device_migration_callbacks(o->vfu_ctx, &vfu_mig_cbs,
> +                                               migr_area_size);
> +    if (ret < 0) {
> +        error_setg(&error_abort, "vfu: Failed to setup migration %s- %s",
> +                   o->devid, strerror(errno));
> +        return;
> +    }
> +
>      qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner",
> vfu_object_ctx_run,
>                         o, QEMU_THREAD_JOINABLE);
>  }
> @@ -412,6 +691,14 @@ static void vfu_object_init(Object *obj)
> 
>      o->machine_done.notify = vfu_object_machine_done;
>      qemu_add_machine_init_done_notifier(&o->machine_done);
> +
> +    o->vfu_mig_file = NULL;
> +
> +    o->vfu_mig_buf = NULL;
> +
> +    o->vfu_mig_buf_size = 0;
> +
> +    o->vfu_mig_buf_pending = 0;
>  }
> 
>  static void vfu_object_finalize(Object *obj)
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 72848b9..c2279af 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1603,6 +1603,33 @@ static int qemu_savevm_state(QEMUFile *f, Error
> **errp)
>      return ret;
>  }
> 
> +int qemu_remote_savevm(QEMUFile *f)
> +{
> +    SaveStateEntry *se;
> +    int ret;
> +
> +    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
> +        if (!se->vmsd || !vmstate_save_needed(se->vmsd, se->opaque)) {
> +            continue;
> +        }
> +
> +        save_section_header(f, se, QEMU_VM_SECTION_FULL);
> +
> +        ret = vmstate_save(f, se, NULL);
> +        if (ret) {
> +            qemu_file_set_error(f, ret);
> +            return ret;
> +        }
> +
> +        save_section_footer(f, se);
> +    }
> +
> +    qemu_put_byte(f, QEMU_VM_EOF);
> +    qemu_fflush(f);
> +
> +    return 0;
> +}
> +
>  void qemu_savevm_live_state(QEMUFile *f)
>  {
>      /* save QEMU_VM_SECTION_END section */
> @@ -2443,6 +2470,42 @@ qemu_loadvm_section_start_full(QEMUFile *f,
> MigrationIncomingState *mis)
>      return 0;
>  }
> 
> +int qemu_remote_loadvm(QEMUFile *f)
> +{
> +    uint8_t section_type;
> +    int ret = 0;
> +
> +    qemu_mutex_lock_iothread();
> +
> +    while (true) {
> +        section_type = qemu_get_byte(f);
> +
> +        if (qemu_file_get_error(f)) {
> +            ret = qemu_file_get_error(f);
> +            break;
> +        }
> +
> +        switch (section_type) {
> +        case QEMU_VM_SECTION_FULL:
> +            ret = qemu_loadvm_section_start_full(f, NULL);
> +            if (ret < 0) {
> +                break;
> +            }
> +            break;
> +        case QEMU_VM_EOF:
> +            goto out;
> +        default:
> +            ret = -EINVAL;
> +            goto out;
> +        }
> +    }
> +
> +out:
> +    qemu_mutex_unlock_iothread();
> +
> +    return ret;
> +}
> +
>  static int
>  qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState
> *mis)
>  {
> --
> 1.8.3.1

My background in implementing device migration with libvfio-user is for a specific device, it seems to me that you're using this functionality differently? Maybe that's why I'm getting confused. If this is the case, could you explain in more detail how you're using libvfio-user here?
diff mbox series

Patch

diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342..71d1733 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -67,5 +67,7 @@  int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy, bool inactivate_disks);
+int qemu_remote_savevm(QEMUFile *f);
+int qemu_remote_loadvm(QEMUFile *f);
 
 #endif
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index d2a2e51..5948576 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -44,6 +44,10 @@ 
 #include "hw/boards.h"
 #include "hw/remote/iohub.h"
 #include "hw/remote/machine.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/global_state.h"
+#include "block/block.h"
 
 #include "libvfio-user/include/libvfio-user.h"
 
@@ -73,6 +77,31 @@  struct VfuObject {
     PCIDevice *pci_dev;
 
     QemuThread vfu_ctx_thread;
+
+    /*
+     * vfu_mig_buf holds the migration data. In the remote process, this
+     * buffer replaces the role of an IO channel which links the source
+     * and the destination.
+     *
+     * Whenever the client QEMU process initiates migration, the libvfio-user
+     * library notifies that to this server. The remote/server QEMU sets up a
+     * QEMUFile object using this buffer as backend. The remote passes this
+     * object to its migration subsystem, and it slirps the VMSDs of all its
+     * devices and stores them in this buffer.
+     *
+     * libvfio-user library subsequetly asks the remote for any data that needs
+     * to be moved over to the destination using its vfu_migration_callbacks_t
+     * APIs. The remote hands over this buffer as data at this time.
+     *
+     * A reverse of this process happens at the destination.
+     */
+    uint8_t *vfu_mig_buf;
+
+    uint64_t vfu_mig_buf_size;
+
+    uint64_t vfu_mig_buf_pending;
+
+    QEMUFile *vfu_mig_file;
 };
 
 static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
@@ -97,6 +126,226 @@  static void vfu_object_set_devid(Object *obj, const char *str, Error **errp)
     trace_vfu_prop("devid", str);
 }
 
+/**
+ * Migration helper functions
+ *
+ * vfu_mig_buf_read & vfu_mig_buf_write are used by QEMU's migration
+ * subsystem - qemu_remote_savevm & qemu_remote_loadvm. savevm/loadvm
+ * call these functions via QEMUFileOps to save/load the VMSD of all
+ * the devices into vfu_mig_buf
+ *
+ */
+static ssize_t vfu_mig_buf_read(void *opaque, uint8_t *buf, int64_t pos,
+                                size_t size, Error **errp)
+{
+    VfuObject *o = opaque;
+
+    if (pos > o->vfu_mig_buf_size) {
+        size = 0;
+    } else if ((pos + size) > o->vfu_mig_buf_size) {
+        size = o->vfu_mig_buf_size;
+    }
+
+    memcpy(buf, (o->vfu_mig_buf + pos), size);
+
+    o->vfu_mig_buf_size -= size;
+
+    return size;
+}
+
+static ssize_t vfu_mig_buf_write(void *opaque, struct iovec *iov, int iovcnt,
+                                 int64_t pos, Error **errp)
+{
+    VfuObject *o = opaque;
+    uint64_t end = pos + iov_size(iov, iovcnt);
+    int i;
+
+    if (end > o->vfu_mig_buf_size) {
+        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+    }
+
+    for (i = 0; i < iovcnt; i++) {
+        memcpy((o->vfu_mig_buf + o->vfu_mig_buf_size), iov[i].iov_base,
+               iov[i].iov_len);
+        o->vfu_mig_buf_size += iov[i].iov_len;
+        o->vfu_mig_buf_pending += iov[i].iov_len;
+    }
+
+    return iov_size(iov, iovcnt);
+}
+
+static int vfu_mig_buf_shutdown(void *opaque, bool rd, bool wr, Error **errp)
+{
+    VfuObject *o = opaque;
+
+    o->vfu_mig_buf_size = 0;
+
+    g_free(o->vfu_mig_buf);
+
+    return 0;
+}
+
+static const QEMUFileOps vfu_mig_fops_save = {
+    .writev_buffer  = vfu_mig_buf_write,
+    .shut_down      = vfu_mig_buf_shutdown,
+};
+
+static const QEMUFileOps vfu_mig_fops_load = {
+    .get_buffer     = vfu_mig_buf_read,
+    .shut_down      = vfu_mig_buf_shutdown,
+};
+
+/**
+ * handlers for vfu_migration_callbacks_t
+ *
+ * The libvfio-user library accesses these handlers to drive the migration
+ * at the remote end, and also to transport the data stored in vfu_mig_buf
+ *
+ */
+static void vfu_mig_state_precopy(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    int ret;
+
+    if (!o->vfu_mig_file) {
+        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_save);
+    }
+
+    global_state_store();
+
+    ret = qemu_remote_savevm(o->vfu_mig_file);
+    if (ret) {
+        qemu_file_shutdown(o->vfu_mig_file);
+        return;
+    }
+
+    qemu_fflush(o->vfu_mig_file);
+
+    bdrv_inactivate_all();
+}
+
+static void vfu_mig_state_running(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    Error *local_err = NULL;
+    int ret;
+
+    ret = qemu_remote_loadvm(o->vfu_mig_file);
+    if (ret) {
+        error_setg(&error_abort, "vfu: failed to restore device state");
+        return;
+    }
+
+    bdrv_invalidate_cache_all(&local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    vm_start();
+}
+
+static int vfu_mig_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
+{
+    switch (state) {
+    case VFU_MIGR_STATE_RESUME:
+    case VFU_MIGR_STATE_STOP_AND_COPY:
+    case VFU_MIGR_STATE_STOP:
+        break;
+    case VFU_MIGR_STATE_PRE_COPY:
+        vfu_mig_state_precopy(vfu_ctx);
+        break;
+    case VFU_MIGR_STATE_RUNNING:
+        if (!runstate_is_running()) {
+            vfu_mig_state_running(vfu_ctx);
+        }
+        break;
+    default:
+        warn_report("vfu: Unknown migration state %d", state);
+    }
+
+    return 0;
+}
+
+static uint64_t vfu_mig_get_pending_bytes(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    return o->vfu_mig_buf_pending;
+}
+
+static int vfu_mig_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset,
+                                uint64_t *size)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    if (offset) {
+        *offset = 0;
+    }
+
+    if (size) {
+        *size = o->vfu_mig_buf_size;
+    }
+
+    return 0;
+}
+
+static ssize_t vfu_mig_read_data(vfu_ctx_t *vfu_ctx, void *buf,
+                                 uint64_t size, uint64_t offset)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    if (offset > o->vfu_mig_buf_size) {
+        return -1;
+    }
+
+    if ((offset + size) > o->vfu_mig_buf_size) {
+        warn_report("vfu: buffer overflow - check pending_bytes");
+        size = o->vfu_mig_buf_size - offset;
+    }
+
+    memcpy(buf, (o->vfu_mig_buf + offset), size);
+
+    o->vfu_mig_buf_pending -= size;
+
+    return size;
+}
+
+static ssize_t vfu_mig_write_data(vfu_ctx_t *vfu_ctx, void *data,
+                                  uint64_t size, uint64_t offset)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    uint64_t end = offset + size;
+
+    if (end > o->vfu_mig_buf_size) {
+        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+        o->vfu_mig_buf_size = end;
+    }
+
+    memcpy((o->vfu_mig_buf + offset), data, size);
+
+    if (!o->vfu_mig_file) {
+        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_load);
+    }
+
+    return size;
+}
+
+static int vfu_mig_data_written(vfu_ctx_t *vfu_ctx, uint64_t count)
+{
+    return 0;
+}
+
+static const vfu_migration_callbacks_t vfu_mig_cbs = {
+    .version = VFU_MIGR_CALLBACKS_VERS,
+    .transition = &vfu_mig_transition,
+    .get_pending_bytes = &vfu_mig_get_pending_bytes,
+    .prepare_data = &vfu_mig_prepare_data,
+    .read_data = &vfu_mig_read_data,
+    .data_written = &vfu_mig_data_written,
+    .write_data = &vfu_mig_write_data,
+};
+
 static void *vfu_object_ctx_run(void *opaque)
 {
     VfuObject *o = opaque;
@@ -332,6 +581,7 @@  static void vfu_object_machine_done(Notifier *notifier, void *data)
 {
     VfuObject *o = container_of(notifier, VfuObject, machine_done);
     DeviceState *dev = NULL;
+    size_t migr_area_size;
     int ret;
 
     o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0,
@@ -391,6 +641,35 @@  static void vfu_object_machine_done(Notifier *notifier, void *data)
         return;
     }
 
+    /*
+     * TODO: The 0x20000 number used below is a temporary. We are working on
+     *     a cleaner fix for this.
+     *
+     *     The libvfio-user library assumes that the remote knows the size of
+     *     the data to be migrated at boot time, but that is not the case with
+     *     VMSDs, as it can contain a variable-size buffer. 0x20000 is used
+     *     as a sufficiently large buffer to demonstrate migration, but that
+     *     cannot be used as a solution.
+     *
+     */
+    ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX,
+                           0x20000, NULL,
+                           VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
+    if (ret < 0) {
+        error_setg(&error_abort, "vfu: Failed to register migration BAR %s- %s",
+                   o->devid, strerror(errno));
+        return;
+    }
+
+    migr_area_size = vfu_get_migr_register_area_size();
+    ret = vfu_setup_device_migration_callbacks(o->vfu_ctx, &vfu_mig_cbs,
+                                               migr_area_size);
+    if (ret < 0) {
+        error_setg(&error_abort, "vfu: Failed to setup migration %s- %s",
+                   o->devid, strerror(errno));
+        return;
+    }
+
     qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner", vfu_object_ctx_run,
                        o, QEMU_THREAD_JOINABLE);
 }
@@ -412,6 +691,14 @@  static void vfu_object_init(Object *obj)
 
     o->machine_done.notify = vfu_object_machine_done;
     qemu_add_machine_init_done_notifier(&o->machine_done);
+
+    o->vfu_mig_file = NULL;
+
+    o->vfu_mig_buf = NULL;
+
+    o->vfu_mig_buf_size = 0;
+
+    o->vfu_mig_buf_pending = 0;
 }
 
 static void vfu_object_finalize(Object *obj)
diff --git a/migration/savevm.c b/migration/savevm.c
index 72848b9..c2279af 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1603,6 +1603,33 @@  static int qemu_savevm_state(QEMUFile *f, Error **errp)
     return ret;
 }
 
+int qemu_remote_savevm(QEMUFile *f)
+{
+    SaveStateEntry *se;
+    int ret;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->vmsd || !vmstate_save_needed(se->vmsd, se->opaque)) {
+            continue;
+        }
+
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
+
+        ret = vmstate_save(f, se, NULL);
+        if (ret) {
+            qemu_file_set_error(f, ret);
+            return ret;
+        }
+
+        save_section_footer(f, se);
+    }
+
+    qemu_put_byte(f, QEMU_VM_EOF);
+    qemu_fflush(f);
+
+    return 0;
+}
+
 void qemu_savevm_live_state(QEMUFile *f)
 {
     /* save QEMU_VM_SECTION_END section */
@@ -2443,6 +2470,42 @@  qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
     return 0;
 }
 
+int qemu_remote_loadvm(QEMUFile *f)
+{
+    uint8_t section_type;
+    int ret = 0;
+
+    qemu_mutex_lock_iothread();
+
+    while (true) {
+        section_type = qemu_get_byte(f);
+
+        if (qemu_file_get_error(f)) {
+            ret = qemu_file_get_error(f);
+            break;
+        }
+
+        switch (section_type) {
+        case QEMU_VM_SECTION_FULL:
+            ret = qemu_loadvm_section_start_full(f, NULL);
+            if (ret < 0) {
+                break;
+            }
+            break;
+        case QEMU_VM_EOF:
+            goto out;
+        default:
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+
+out:
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
 static int
 qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
 {