@@ -19,4 +19,6 @@ void colo_proxy_destroy(int side);
void colo_add_nic_devices(NetClientState *nc);
void colo_remove_nic_devices(NetClientState *nc);
+int colo_proxy_compare(void);
+
#endif
@@ -25,6 +25,13 @@
} \
} while (0)
+/*
+* We should not do checkpoint one after another without any time interval,
+* Because this will lead continuous 'stop' status for VM.
+* CHECKPOINT_MIN_PERIOD is the min time limit between two checkpoint action.
+*/
+#define CHECKPOINT_MIN_PERIOD 100 /* unit: ms */
+
enum {
COLO_READY = 0x46,
@@ -290,6 +297,7 @@ static void *colo_thread(void *opaque)
{
MigrationState *s = opaque;
QEMUFile *colo_control = NULL;
+ int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
int ret;
if (colo_proxy_init(COLO_PRIMARY_MODE) != 0) {
@@ -326,10 +334,36 @@ static void *colo_thread(void *opaque)
DPRINTF("vm resume to run\n");
while (s->state == MIGRATION_STATUS_COLO) {
+ int proxy_checkpoint_req;
+
+ /* wait for a colo checkpoint */
+ proxy_checkpoint_req = colo_proxy_compare();
+ if (proxy_checkpoint_req < 0) {
+ goto out;
+ } else if (!proxy_checkpoint_req) {
+ /*
+ * No checkpoint is needed, wait for 1ms and then
+ * check if we need checkpoint again
+ */
+ g_usleep(1000);
+ continue;
+ } else {
+ int64_t interval;
+
+ current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ interval = current_time - checkpoint_time;
+ if (interval < CHECKPOINT_MIN_PERIOD) {
+ /* Limit the min time between two checkpoint */
+ g_usleep((1000*(CHECKPOINT_MIN_PERIOD - interval)));
+ }
+ DPRINTF("Net packets is not consistent!!!\n");
+ }
+
/* start a colo checkpoint */
if (colo_do_checkpoint_transaction(s, colo_control)) {
goto out;
}
+ checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
}
out:
@@ -37,6 +37,9 @@ typedef struct nic_device {
bool is_up;
} nic_device;
+typedef struct colo_msg {
+ bool is_checkpoint;
+} colo_msg;
typedef struct colo_proxy {
int sockfd;
@@ -376,3 +379,41 @@ void colo_proxy_destroy(int side)
cp_info.index = -1;
colo_nic_side = -1;
}
+/*
+do checkpoint: return 1
+error: return -1
+do not checkpoint: return 0
+*/
+int colo_proxy_compare(void)
+{
+ uint8_t *buff;
+ int64_t size;
+ struct nlmsghdr *h;
+ struct colo_msg *m;
+ int ret = -1;
+
+ size = colo_proxy_recv(&buff, MSG_DONTWAIT);
+
+ /* timeout, return no checkpoint message. */
+ if (size <= 0) {
+ return 0;
+ }
+
+ h = (struct nlmsghdr *) buff;
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ goto out;
+ }
+
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*m))) {
+ goto out;
+ }
+
+ m = NLMSG_DATA(h);
+
+ ret = m->is_checkpoint ? 1 : 0;
+
+out:
+ g_free(buff);
+ return ret;
+}