diff mbox series

[V5,11/25] cpr: restart mode

Message ID 1625678434-240960-12-git-send-email-steven.sistare@oracle.com
State New
Headers show
Series Live Update | expand

Commit Message

Steve Sistare July 7, 2021, 5:20 p.m. UTC
Provide the cprsave restart mode, which preserves the guest VM across a
restart of the qemu process.  After cprsave, the caller passes qemu
command-line arguments to cprexec, which directly exec's the new qemu
binary.  The arguments must include -S so new qemu starts in a paused state.
The caller resumes the guest by calling cprload.

To use the restart mode, qemu must be started with the memfd-alloc machine
option.  The memfd's are saved to the environment and kept open across exec,
after which they are found from the environment and re-mmap'd.  Hence guest
ram is preserved in place, albeit with new virtual addresses in the qemu
process.

The restart mode supports vfio devices in a subsequent patch.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 migration/cpr.c   | 21 +++++++++++++++++++++
 softmmu/physmem.c |  6 +++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

Comments

Marc-André Lureau July 8, 2021, 3:43 p.m. UTC | #1
Hi

On Wed, Jul 7, 2021 at 9:31 PM Steve Sistare <steven.sistare@oracle.com>
wrote:

> Provide the cprsave restart mode, which preserves the guest VM across a
> restart of the qemu process.  After cprsave, the caller passes qemu
> command-line arguments to cprexec, which directly exec's the new qemu
> binary.  The arguments must include -S so new qemu starts in a paused
> state.
> The caller resumes the guest by calling cprload.
>
> To use the restart mode, qemu must be started with the memfd-alloc machine
> option.  The memfd's are saved to the environment and kept open across
> exec,
> after which they are found from the environment and re-mmap'd.  Hence guest
> ram is preserved in place, albeit with new virtual addresses in the qemu
> process.
>
> The restart mode supports vfio devices in a subsequent patch.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>

What's the plan to make it work with -object memory-backend-memfd -machine
memory-backend? (or memory-backend-file, I guess that should work?)

There should be some extra checks before accepting cprexec() on a
misconfigured VM.

---
>  migration/cpr.c   | 21 +++++++++++++++++++++
>  softmmu/physmem.c |  6 +++++-
>  2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/migration/cpr.c b/migration/cpr.c
> index c5bad8a..fb57dec 100644
> --- a/migration/cpr.c
> +++ b/migration/cpr.c
> @@ -29,6 +29,7 @@
>  #include "sysemu/xen.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "hw/virtio/vhost.h"
> +#include "qemu/env.h"
>
>  QEMUFile *qf_file_open(const char *path, int flags, int mode,
>                                const char *name, Error **errp)
> @@ -108,6 +109,26 @@ done:
>      return;
>  }
>
> +static int preserve_fd(const char *name, const char *val, void *handle)
> +{
> +    qemu_clr_cloexec(atoi(val));
> +    return 0;
> +}
> +
> +void cprexec(strList *args, Error **errp)
> +{
> +    if (xen_enabled()) {
> +        error_setg(errp, "xen does not support cprexec");
> +        return;
> +    }
> +    if (!runstate_check(RUN_STATE_SAVE_VM)) {
> +        error_setg(errp, "runstate is not save-vm");
> +        return;
> +    }
> +    walkenv(FD_PREFIX, preserve_fd, 0);


I am  not convinced that relying on environment variables here is the best
thing to do.

+    qemu_system_exec_request(args);
> +}
> +
>  void cprload(const char *file, Error **errp)
>  {
>      QEMUFile *f;
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index b149250..8a65ef7 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -65,6 +65,7 @@
>  #include "qemu/pmem.h"
>
>  #include "qemu/memfd.h"
> +#include "qemu/env.h"
>  #include "migration/vmstate.h"
>
>  #include "qemu/range.h"
> @@ -1986,7 +1987,7 @@ static void ram_block_add(RAMBlock *new_block, Error
> **errp)
>          } else {
>              name = memory_region_name(new_block->mr);
>              if (ms->memfd_alloc) {
>


-                int mfd = -1;          /* placeholder until next patch */
> +                int mfd = getenv_fd(name);
>                  mr->align = QEMU_VMALLOC_ALIGN;
>                  if (mfd < 0) {
>                      mfd = qemu_memfd_create(name, maxlen + mr->align,
> @@ -1994,7 +1995,9 @@ static void ram_block_add(RAMBlock *new_block, Error
> **errp)
>                      if (mfd < 0) {
>                          return;
>                      }
> +                    setenv_fd(name, mfd);
>                  }
> +                qemu_clr_cloexec(mfd);
>

Why clear it now, and on exec again?

                 new_block->flags |= RAM_SHARED;
>                  addr = file_ram_alloc(new_block, maxlen, mfd,
>                                        false, false, 0, errp);
> @@ -2246,6 +2249,7 @@ void qemu_ram_free(RAMBlock *block)
>      }
>
>      qemu_mutex_lock_ramlist();
> +    unsetenv_fd(memory_region_name(block->mr));
>      QLIST_REMOVE_RCU(block, next);
>      ram_list.mru_block = NULL;
>      /* Write list before version */
> --
> 1.8.3.1
>
>
>
Marc-André Lureau July 8, 2021, 3:54 p.m. UTC | #2
On Thu, Jul 8, 2021 at 7:43 PM Marc-André Lureau <marcandre.lureau@gmail.com>
wrote:

> Hi
>
> On Wed, Jul 7, 2021 at 9:31 PM Steve Sistare <steven.sistare@oracle.com>
> wrote:
>
>> Provide the cprsave restart mode, which preserves the guest VM across a
>> restart of the qemu process.  After cprsave, the caller passes qemu
>> command-line arguments to cprexec, which directly exec's the new qemu
>> binary.  The arguments must include -S so new qemu starts in a paused
>> state.
>> The caller resumes the guest by calling cprload.
>>
>> To use the restart mode, qemu must be started with the memfd-alloc machine
>> option.  The memfd's are saved to the environment and kept open across
>> exec,
>> after which they are found from the environment and re-mmap'd.  Hence
>> guest
>> ram is preserved in place, albeit with new virtual addresses in the qemu
>> process.
>>
>> The restart mode supports vfio devices in a subsequent patch.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>>
>
> What's the plan to make it work with -object memory-backend-memfd -machine
> memory-backend? (or memory-backend-file, I guess that should work?)
>
>
It seems to be addressed in some way in a later "hostmem-memfd: cpr
support" patch. Imho it's worth mentioning in the commit message,
reorganize patches closer. And the checks be added anyway for unsupported
configurations.


There should be some extra checks before accepting cprexec() on a
> misconfigured VM.
>
> ---
>>  migration/cpr.c   | 21 +++++++++++++++++++++
>>  softmmu/physmem.c |  6 +++++-
>>  2 files changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/migration/cpr.c b/migration/cpr.c
>> index c5bad8a..fb57dec 100644
>> --- a/migration/cpr.c
>> +++ b/migration/cpr.c
>> @@ -29,6 +29,7 @@
>>  #include "sysemu/xen.h"
>>  #include "hw/vfio/vfio-common.h"
>>  #include "hw/virtio/vhost.h"
>> +#include "qemu/env.h"
>>
>>  QEMUFile *qf_file_open(const char *path, int flags, int mode,
>>                                const char *name, Error **errp)
>> @@ -108,6 +109,26 @@ done:
>>      return;
>>  }
>>
>> +static int preserve_fd(const char *name, const char *val, void *handle)
>> +{
>> +    qemu_clr_cloexec(atoi(val));
>> +    return 0;
>> +}
>> +
>> +void cprexec(strList *args, Error **errp)
>> +{
>> +    if (xen_enabled()) {
>> +        error_setg(errp, "xen does not support cprexec");
>> +        return;
>> +    }
>> +    if (!runstate_check(RUN_STATE_SAVE_VM)) {
>> +        error_setg(errp, "runstate is not save-vm");
>> +        return;
>> +    }
>> +    walkenv(FD_PREFIX, preserve_fd, 0);
>
>
> I am  not convinced that relying on environment variables here is the best
> thing to do.
>
> +    qemu_system_exec_request(args);
>> +}
>> +
>>  void cprload(const char *file, Error **errp)
>>  {
>>      QEMUFile *f;
>> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
>> index b149250..8a65ef7 100644
>> --- a/softmmu/physmem.c
>> +++ b/softmmu/physmem.c
>> @@ -65,6 +65,7 @@
>>  #include "qemu/pmem.h"
>>
>>  #include "qemu/memfd.h"
>> +#include "qemu/env.h"
>>  #include "migration/vmstate.h"
>>
>>  #include "qemu/range.h"
>> @@ -1986,7 +1987,7 @@ static void ram_block_add(RAMBlock *new_block,
>> Error **errp)
>>          } else {
>>              name = memory_region_name(new_block->mr);
>>              if (ms->memfd_alloc) {
>>
>
>
> -                int mfd = -1;          /* placeholder until next patch */
>> +                int mfd = getenv_fd(name);
>>                  mr->align = QEMU_VMALLOC_ALIGN;
>>                  if (mfd < 0) {
>>                      mfd = qemu_memfd_create(name, maxlen + mr->align,
>> @@ -1994,7 +1995,9 @@ static void ram_block_add(RAMBlock *new_block,
>> Error **errp)
>>                      if (mfd < 0) {
>>                          return;
>>                      }
>> +                    setenv_fd(name, mfd);
>>                  }
>> +                qemu_clr_cloexec(mfd);
>>
>
> Why clear it now, and on exec again?
>
>                  new_block->flags |= RAM_SHARED;
>>                  addr = file_ram_alloc(new_block, maxlen, mfd,
>>                                        false, false, 0, errp);
>> @@ -2246,6 +2249,7 @@ void qemu_ram_free(RAMBlock *block)
>>      }
>>
>>      qemu_mutex_lock_ramlist();
>> +    unsetenv_fd(memory_region_name(block->mr));
>>      QLIST_REMOVE_RCU(block, next);
>>      ram_list.mru_block = NULL;
>>      /* Write list before version */
>> --
>> 1.8.3.1
>>
>>
>>
>
> --
> Marc-André Lureau
>
Steve Sistare July 12, 2021, 7:19 p.m. UTC | #3
On 7/8/2021 11:54 AM, Marc-André Lureau wrote:
> On Thu, Jul 8, 2021 at 7:43 PM Marc-André Lureau <marcandre.lureau@gmail.com <mailto:marcandre.lureau@gmail.com>> wrote:
> 
>     Hi
> 
>     On Wed, Jul 7, 2021 at 9:31 PM Steve Sistare <steven.sistare@oracle.com <mailto:steven.sistare@oracle.com>> wrote:
> 
>         Provide the cprsave restart mode, which preserves the guest VM across a
>         restart of the qemu process.  After cprsave, the caller passes qemu
>         command-line arguments to cprexec, which directly exec's the new qemu
>         binary.  The arguments must include -S so new qemu starts in a paused state.
>         The caller resumes the guest by calling cprload.
> 
>         To use the restart mode, qemu must be started with the memfd-alloc machine
>         option.  The memfd's are saved to the environment and kept open across exec,
>         after which they are found from the environment and re-mmap'd.  Hence guest
>         ram is preserved in place, albeit with new virtual addresses in the qemu
>         process.
> 
>         The restart mode supports vfio devices in a subsequent patch.
> 
>         Signed-off-by: Steve Sistare <steven.sistare@oracle.com <mailto:steven.sistare@oracle.com>>
> 
> 
>     What's the plan to make it work with -object memory-backend-memfd -machine memory-backend? > (or memory-backend-file, I guess that should work?)
> 
> 
> It seems to be addressed in some way in a later "hostmem-memfd: cpr support" patch. 

Correct, but in both cases you also need the memfd-alloc machine option so that misc small 
segments are preserved.  For some discussion see:
  https://lore.kernel.org/qemu-devel/YKPEWicpOeh3yo5%2F@stefanha-x1.localdomain/

> Imho it's worth mentioning in the commit message, reorganize patches closer. 

OK.

> And the checks be added anyway for unsupported configurations.

The only-cpr-capable option in the next to last patch performs those checks.

>     There should be some extra checks before accepting cprexec() on a misconfigured VM.
> 
>         ---
>          migration/cpr.c   | 21 +++++++++++++++++++++
>          softmmu/physmem.c |  6 +++++-
>          2 files changed, 26 insertions(+), 1 deletion(-)
> 
>         diff --git a/migration/cpr.c b/migration/cpr.c
>         index c5bad8a..fb57dec 100644
>         --- a/migration/cpr.c
>         +++ b/migration/cpr.c
>         @@ -29,6 +29,7 @@
>          #include "sysemu/xen.h"
>          #include "hw/vfio/vfio-common.h"
>          #include "hw/virtio/vhost.h"
>         +#include "qemu/env.h"
> 
>          QEMUFile *qf_file_open(const char *path, int flags, int mode,
>                                        const char *name, Error **errp)
>         @@ -108,6 +109,26 @@ done:
>              return;
>          }
> 
>         +static int preserve_fd(const char *name, const char *val, void *handle)
>         +{
>         +    qemu_clr_cloexec(atoi(val));
>         +    return 0;
>         +}
>         +
>         +void cprexec(strList *args, Error **errp)
>         +{
>         +    if (xen_enabled()) {
>         +        error_setg(errp, "xen does not support cprexec");
>         +        return;
>         +    }
>         +    if (!runstate_check(RUN_STATE_SAVE_VM)) {
>         +        error_setg(errp, "runstate is not save-vm");
>         +        return;
>         +    }
>         +    walkenv(FD_PREFIX, preserve_fd, 0);
> 
> 
>     I am  not convinced that relying on environment variables here is the best thing to do.
> 
>         +    qemu_system_exec_request(args);
>         +}
>         +
>          void cprload(const char *file, Error **errp)
>          {
>              QEMUFile *f;
>         diff --git a/softmmu/physmem.c b/softmmu/physmem.c
>         index b149250..8a65ef7 100644
>         --- a/softmmu/physmem.c
>         +++ b/softmmu/physmem.c
>         @@ -65,6 +65,7 @@
>          #include "qemu/pmem.h"
> 
>          #include "qemu/memfd.h"
>         +#include "qemu/env.h"
>          #include "migration/vmstate.h"
> 
>          #include "qemu/range.h"
>         @@ -1986,7 +1987,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>                  } else {
>                      name = memory_region_name(new_block->mr);
>                      if (ms->memfd_alloc) {
> 
> 
> 
>         -                int mfd = -1;          /* placeholder until next patch */
>         +                int mfd = getenv_fd(name);
>                          mr->align = QEMU_VMALLOC_ALIGN;
>                          if (mfd < 0) {
>                              mfd = qemu_memfd_create(name, maxlen + mr->align,
>         @@ -1994,7 +1995,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>                              if (mfd < 0) {
>                                  return;
>                              }
>         +                    setenv_fd(name, mfd);
>                          }
>         +                qemu_clr_cloexec(mfd);
> 
> 
>     Why clear it now, and on exec again?

That's a bug, thanks.  This should be qemu_set_cloexec(), so the mfd is closed for
any misc fork/exec calls prior to cprexec.

- Steve


>                          new_block->flags |= RAM_SHARED;
>                          addr = file_ram_alloc(new_block, maxlen, mfd,
>                                                false, false, 0, errp);
>         @@ -2246,6 +2249,7 @@ void qemu_ram_free(RAMBlock *block)
>              }
> 
>              qemu_mutex_lock_ramlist();
>         +    unsetenv_fd(memory_region_name(block->mr));
>              QLIST_REMOVE_RCU(block, next);
>              ram_list.mru_block = NULL;
>              /* Write list before version */
>         -- 
>         1.8.3.1
> 
> 
> 
> 
>     -- 
>     Marc-André Lureau
> 
> 
> 
> -- 
> Marc-André Lureau
diff mbox series

Patch

diff --git a/migration/cpr.c b/migration/cpr.c
index c5bad8a..fb57dec 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -29,6 +29,7 @@ 
 #include "sysemu/xen.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/virtio/vhost.h"
+#include "qemu/env.h"
 
 QEMUFile *qf_file_open(const char *path, int flags, int mode,
                               const char *name, Error **errp)
@@ -108,6 +109,26 @@  done:
     return;
 }
 
+static int preserve_fd(const char *name, const char *val, void *handle)
+{
+    qemu_clr_cloexec(atoi(val));
+    return 0;
+}
+
+void cprexec(strList *args, Error **errp)
+{
+    if (xen_enabled()) {
+        error_setg(errp, "xen does not support cprexec");
+        return;
+    }
+    if (!runstate_check(RUN_STATE_SAVE_VM)) {
+        error_setg(errp, "runstate is not save-vm");
+        return;
+    }
+    walkenv(FD_PREFIX, preserve_fd, 0);
+    qemu_system_exec_request(args);
+}
+
 void cprload(const char *file, Error **errp)
 {
     QEMUFile *f;
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index b149250..8a65ef7 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -65,6 +65,7 @@ 
 #include "qemu/pmem.h"
 
 #include "qemu/memfd.h"
+#include "qemu/env.h"
 #include "migration/vmstate.h"
 
 #include "qemu/range.h"
@@ -1986,7 +1987,7 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
         } else {
             name = memory_region_name(new_block->mr);
             if (ms->memfd_alloc) {
-                int mfd = -1;          /* placeholder until next patch */
+                int mfd = getenv_fd(name);
                 mr->align = QEMU_VMALLOC_ALIGN;
                 if (mfd < 0) {
                     mfd = qemu_memfd_create(name, maxlen + mr->align,
@@ -1994,7 +1995,9 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
                     if (mfd < 0) {
                         return;
                     }
+                    setenv_fd(name, mfd);
                 }
+                qemu_clr_cloexec(mfd);
                 new_block->flags |= RAM_SHARED;
                 addr = file_ram_alloc(new_block, maxlen, mfd,
                                       false, false, 0, errp);
@@ -2246,6 +2249,7 @@  void qemu_ram_free(RAMBlock *block)
     }
 
     qemu_mutex_lock_ramlist();
+    unsetenv_fd(memory_region_name(block->mr));
     QLIST_REMOVE_RCU(block, next);
     ram_list.mru_block = NULL;
     /* Write list before version */