diff mbox series

[RFC,v4,45/49] multi-process/mig: Synchronize runstate of remote process

Message ID 5ef7500148d9c15b236f551f6af7b8a4a8d6e81d.1571905346.git.jag.raman@oracle.com
State New
Headers show
Series Initial support of multi-process qemu | expand

Commit Message

Jag Raman Oct. 24, 2019, 9:09 a.m. UTC
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>

Synchronize the runstate of the remote process with that of QEMU

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 New patch in v4

 hw/proxy/qemu-proxy.c         | 18 ++++++++++++++++++
 include/hw/proxy/qemu-proxy.h |  2 ++
 include/io/mpqemu-link.h      |  8 ++++++++
 include/sysemu/runstate.h     |  1 +
 qdev-monitor.c                | 13 ++++++++++++-
 remote/remote-main.c          |  4 ++++
 remote/remote-opts.c          |  5 +++++
 runstate.c                    |  5 +++++
 8 files changed, 55 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi Nov. 11, 2019, 4:17 p.m. UTC | #1
On Thu, Oct 24, 2019 at 05:09:26AM -0400, Jagannathan Raman wrote:
> @@ -656,6 +657,19 @@ static void init_proxy(PCIDevice *dev, char *command, bool need_spawn, Error **e
>      }
>  }
>  
> +static void proxy_vm_state_change(void *opaque, int running, RunState state)
> +{
> +    PCIProxyDev *dev = opaque;
> +    MPQemuMsg msg = { 0 };
> +
> +    msg.cmd = RUNSTATE_SET;
> +    msg.bytestream = 0;
> +    msg.size = sizeof(msg.data1);
> +    msg.data1.runstate.state = state;
> +
> +    mpqemu_msg_send(dev->mpqemu_link, &msg, dev->mpqemu_link->com);
> +}

Changing vm state is a barrier operation - devices must not dirty memory
afterwards.  This function doesn't have barrier semantics, it sends off
the message without waiting for the remote process to finish processing
it.  This means there is a race condition where QEMU has changes the vm
state but devices could still dirty memory.  Please wait for a reply to
prevent this.

Stefan
Jag Raman Nov. 13, 2019, 4:33 p.m. UTC | #2
On 11/11/2019 11:17 AM, Stefan Hajnoczi wrote:
> On Thu, Oct 24, 2019 at 05:09:26AM -0400, Jagannathan Raman wrote:
>> @@ -656,6 +657,19 @@ static void init_proxy(PCIDevice *dev, char *command, bool need_spawn, Error **e
>>       }
>>   }
>>   
>> +static void proxy_vm_state_change(void *opaque, int running, RunState state)
>> +{
>> +    PCIProxyDev *dev = opaque;
>> +    MPQemuMsg msg = { 0 };
>> +
>> +    msg.cmd = RUNSTATE_SET;
>> +    msg.bytestream = 0;
>> +    msg.size = sizeof(msg.data1);
>> +    msg.data1.runstate.state = state;
>> +
>> +    mpqemu_msg_send(dev->mpqemu_link, &msg, dev->mpqemu_link->com);
>> +}
> 
> Changing vm state is a barrier operation - devices must not dirty memory
> afterwards.  This function doesn't have barrier semantics, it sends off
> the message without waiting for the remote process to finish processing
> it.  This means there is a race condition where QEMU has changes the vm
> state but devices could still dirty memory.  Please wait for a reply to
> prevent this.

Got it, thanks! Will do.

--
Jag

> 
> Stefan
>
diff mbox series

Patch

diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
index c85ffb3..eff299b 100644
--- a/hw/proxy/qemu-proxy.c
+++ b/hw/proxy/qemu-proxy.c
@@ -44,6 +44,7 @@ 
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qstring.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
 #include "hw/proxy/qemu-proxy.h"
 #include "hw/proxy/memory-sync.h"
 #include "qom/object.h"
@@ -656,6 +657,19 @@  static void init_proxy(PCIDevice *dev, char *command, bool need_spawn, Error **e
     }
 }
 
+static void proxy_vm_state_change(void *opaque, int running, RunState state)
+{
+    PCIProxyDev *dev = opaque;
+    MPQemuMsg msg = { 0 };
+
+    msg.cmd = RUNSTATE_SET;
+    msg.bytestream = 0;
+    msg.size = sizeof(msg.data1);
+    msg.data1.runstate.state = state;
+
+    mpqemu_msg_send(dev->mpqemu_link, &msg, dev->mpqemu_link->com);
+}
+
 static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
 {
     PCIProxyDev *dev = PCI_PROXY_DEV(device);
@@ -681,6 +695,8 @@  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
                          &dev->region[r].mr);
     }
 
+    dev->vmcse = qemu_add_vm_change_state_handler(proxy_vm_state_change, dev);
+
     dev->set_proxy_sock = set_proxy_sock;
     dev->get_proxy_sock = get_proxy_sock;
     dev->init_proxy = init_proxy;
@@ -706,6 +722,8 @@  static void pci_dev_exit(PCIDevice *pdev)
     if (!QLIST_EMPTY(&proxy_dev_list.devices)) {
         start_heartbeat_timer();
     }
+
+    qemu_del_vm_change_state_handler(dev->vmcse);
 }
 
 static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
index b122e6d..7fe987d 100644
--- a/include/hw/proxy/qemu-proxy.h
+++ b/include/hw/proxy/qemu-proxy.h
@@ -90,6 +90,8 @@  struct PCIProxyDev {
 
     ProxyMemoryRegion region[PCI_NUM_REGIONS];
 
+    VMChangeStateEntry *vmcse;
+
     uint64_t migsize;
 };
 
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index 05dc55e..f5a0bbb 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -39,6 +39,8 @@ 
 #include "exec/cpu-common.h"
 #include "exec/hwaddr.h"
 
+#include "qapi/qapi-types-run-state.h"
+
 #define TYPE_MPQEMU_LINK "mpqemu-link"
 #define MPQEMU_LINK(obj) \
     OBJECT_CHECK(MPQemuLinkState, (obj), TYPE_MPQEMU_LINK)
@@ -77,6 +79,7 @@  typedef enum {
     DEVICE_RESET,
     START_MIG_OUT,
     START_MIG_IN,
+    RUNSTATE_SET,
     MAX,
 } mpqemu_cmd_t;
 
@@ -115,6 +118,10 @@  typedef struct {
 } mmio_ret_msg_t;
 
 typedef struct {
+    RunState state;
+} runstate_msg_t;
+
+typedef struct {
     mpqemu_cmd_t cmd;
     int bytestream;
     size_t size;
@@ -125,6 +132,7 @@  typedef struct {
         bar_access_msg_t bar_access;
         set_irqfd_msg_t set_irqfd;
         mmio_ret_msg_t mmio_ret;
+        runstate_msg_t runstate;
     } data1;
 
     int fds[REMOTE_MAX_FDS];
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index e89ebf8..c7ad916 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -8,6 +8,7 @@  extern RunState current_run_state;
 
 bool runstate_check(RunState state);
 void runstate_set(RunState new_state);
+void remote_runstate_set(RunState state);
 int runstate_is_running(void);
 bool runstate_needs_reset(void);
 bool runstate_store(char *str, size_t size);
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 2a2c10b..c6aa35c 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -34,6 +34,7 @@ 
 #include "qemu/qemu-print.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
 #include "migration/misc.h"
 #include "hw/boards.h"
 #include "hw/proxy/qemu-proxy.h"
@@ -641,7 +642,7 @@  void qdev_proxy_fire(void)
 }
 
 DeviceState *qdev_proxy_add(const char *rid, const char *id, char *bus,
-                            char *command, int rsocket, bool managed,
+                            char *cmd, int rsocket, bool managed,
                             Error **errp)
 {
     DeviceState *ds;
@@ -653,6 +654,7 @@  DeviceState *qdev_proxy_add(const char *rid, const char *id, char *bus,
     const char *str;
     bool need_spawn = false;
     bool remote_exists = false;
+    char *command;
 
     if (strlen(rid) > MAX_RID_LENGTH) {
         error_setg(errp, "rid %s is too long.", rid);
@@ -725,6 +727,12 @@  DeviceState *qdev_proxy_add(const char *rid, const char *id, char *bus,
         need_spawn = true;
     }
 
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        command = g_strdup_printf("%s %s", cmd, "-incoming defer");
+    } else {
+        command = g_strdup(cmd);
+    }
+
     pdev->init_proxy(PCI_DEVICE(ds), command, need_spawn, errp);
 
     qemu_mutex_lock(&proxy_list_lock);
@@ -732,6 +740,9 @@  DeviceState *qdev_proxy_add(const char *rid, const char *id, char *bus,
     qemu_mutex_unlock(&proxy_list_lock);
 
     qemu_opts_del(proxy_opts);
+
+    g_free(command);
+
     return ds;
 }
 
diff --git a/remote/remote-main.c b/remote/remote-main.c
index 0284039..2de5ddf 100644
--- a/remote/remote-main.c
+++ b/remote/remote-main.c
@@ -45,6 +45,7 @@ 
 #include "qemu/main-loop.h"
 #include "qemu/config-file.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
 #include "block/block.h"
 #include "exec/memattrs.h"
 #include "exec/address-spaces.h"
@@ -497,6 +498,9 @@  static void process_msg(GIOCondition cond, MPQemuChannel *chan)
     case START_MIG_OUT:
         process_start_mig_out(msg);
         break;
+    case RUNSTATE_SET:
+        remote_runstate_set(msg->data1.runstate.state);
+        break;
     default:
         error_setg(&err, "Unknown command");
         goto finalize_loop;
diff --git a/remote/remote-opts.c b/remote/remote-opts.c
index 1b1824e..d3ae221 100644
--- a/remote/remote-opts.c
+++ b/remote/remote-opts.c
@@ -56,8 +56,10 @@ 
 #include "remote/remote-opts.h"
 #include "include/qemu-common.h"
 #include "monitor/monitor.h"
+#include "sysemu/runstate.h"
 
 #include "vl.h"
+
 /*
  * In remote process, we parse only subset of options. The code
  * taken from vl.c to re-use in remote command line parser.
@@ -101,6 +103,9 @@  void parse_cmdline(int argc, char **argv, char **envp)
             case QEMU_OPTION_qmp:
                 monitor_parse(optarg, "control", false);
                 break;
+            case QEMU_OPTION_incoming:
+                remote_runstate_set(RUN_STATE_INMIGRATE);
+                break;
             case QEMU_OPTION_monitor:
                 if (!strncmp(optarg, "stdio", 5)) {
                     warn_report("STDIO not supported in remote process");
diff --git a/runstate.c b/runstate.c
index 273345a..9c5c627 100644
--- a/runstate.c
+++ b/runstate.c
@@ -34,3 +34,8 @@  bool runstate_check(RunState state)
 {
     return current_run_state == state;
 }
+
+void remote_runstate_set(RunState state)
+{
+    current_run_state = state;
+}