Patchwork [RFC] New thread for the VM migration

login
register
mail settings
Submitter Umesh Deshpande
Date July 13, 2011, 11:34 p.m.
Message ID <989268144.1313209.1310600082672.JavaMail.root@zmail01.collab.prod.int.phx2.redhat.com>
Download mbox | patch
Permalink /patch/104602/
State New
Headers show

Comments

Umesh Deshpande - July 13, 2011, 11:34 p.m.
Following patch is implemented to deal with the VCPU and iothread starvation during the migration of a guest. Currently iothread is responsible for performing the migration. It holds the qemu_mutex during the migration and doesn't allow VCPU to enter the qemu mode and delays its return to the guest. The guest migration, executed as an iohandler also delays the execution of other iohandlers. In the following patch, the migration has been moved to a separate thread to reduce the qemu_mutex contention and iohandler starvation.

Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
---
 arch_init.c      |   19 +++++++++-----
 buffered_file.c  |   10 ++++----
 cpu-all.h        |   37 ++++++++++++++++++++++++++++
 exec.c           |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 migration-exec.c |   17 ++++++++++---
 migration-fd.c   |   15 ++++++++++-
 migration-tcp.c  |   34 ++++++++++++++-----------
 migration-unix.c |   23 ++++++++++-------
 migration.c      |   67 +++++++++++++++++++++++++++++++--------------------
 migration.h      |    6 +++-
 qemu-timer.c     |   28 ++++++++++++++++++++-
 qemu-timer.h     |    3 ++
 12 files changed, 256 insertions(+), 73 deletions(-)

Patch

diff --git a/arch_init.c b/arch_init.c
index 484b39d..f18dda2 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -123,13 +123,13 @@  static int ram_save_block(QEMUFile *f)
     current_addr = block->offset + offset;
 
     do {
-        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+        if (migration_bitmap_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
             uint8_t *p;
             int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
 
-            cpu_physical_memory_reset_dirty(current_addr,
-                                            current_addr + TARGET_PAGE_SIZE,
-                                            MIGRATION_DIRTY_FLAG);
+            migration_bitmap_reset_dirty(current_addr,
+                                         current_addr + TARGET_PAGE_SIZE,
+                                         MIGRATION_DIRTY_FLAG);
 
             p = block->host + offset;
 
@@ -185,7 +185,7 @@  static ram_addr_t ram_save_remaining(void)
         ram_addr_t addr;
         for (addr = block->offset; addr < block->offset + block->length;
              addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
+            if (migration_bitmap_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
                 count++;
             }
         }
@@ -260,10 +260,15 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         return 0;
     }
 
+    if (stage != 3)
+        qemu_mutex_lock_iothread();
     if (cpu_physical_sync_dirty_bitmap(0, TARGET_PHYS_ADDR_MAX) != 0) {
         qemu_file_set_error(f);
         return 0;
     }
+    sync_migration_bitmap(0, TARGET_PHYS_ADDR_MAX);
+    if (stage != 3)
+        qemu_mutex_unlock_iothread();
 
     if (stage == 1) {
         RAMBlock *block;
@@ -276,9 +281,9 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
                  addr += TARGET_PAGE_SIZE) {
-                if (!cpu_physical_memory_get_dirty(addr,
+                if (!migration_bitmap_get_dirty(addr,
                                                    MIGRATION_DIRTY_FLAG)) {
-                    cpu_physical_memory_set_dirty(addr);
+                    migration_bitmap_set_dirty(addr);
                 }
             }
         }
diff --git a/buffered_file.c b/buffered_file.c
index 41b42c3..e05efe8 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -237,7 +237,7 @@  static void buffered_rate_tick(void *opaque)
         return;
     }
 
-    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(migration_clock) + 100);
 
     if (s->freeze_output)
         return;
@@ -246,8 +246,8 @@  static void buffered_rate_tick(void *opaque)
 
     buffered_flush(s);
 
-    /* Add some checks around this */
     s->put_ready(s->opaque);
+    usleep(qemu_timer_difference(s->timer, migration_clock) * 1000);
 }
 
 QEMUFile *qemu_fopen_ops_buffered(void *opaque,
@@ -271,11 +271,11 @@  QEMUFile *qemu_fopen_ops_buffered(void *opaque,
     s->file = qemu_fopen_ops(s, buffered_put_buffer, NULL,
                              buffered_close, buffered_rate_limit,
                              buffered_set_rate_limit,
-			     buffered_get_rate_limit);
+                             buffered_get_rate_limit);
 
-    s->timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s);
+    s->timer = qemu_new_timer_ms(migration_clock, buffered_rate_tick, s);
 
-    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(migration_clock) + 100);
 
     return s->file;
 }
diff --git a/cpu-all.h b/cpu-all.h
index e839100..4693d86 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -932,6 +932,7 @@  typedef struct RAMBlock {
 
 typedef struct RAMList {
     uint8_t *phys_dirty;
+    uint8_t *migration_bitmap;
     QLIST_HEAD(ram, RAMBlock) blocks;
 } RAMList;
 extern RAMList ram_list;
@@ -1004,8 +1005,44 @@  static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
     }
 }
 
+
+
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
                                      int dirty_flags);
+
+static inline int migration_bitmap_get_dirty(ram_addr_t addr,
+                                             int dirty_flags)
+{
+    return ram_list.migration_bitmap[addr >> TARGET_PAGE_BITS] & dirty_flags;
+}
+
+static inline void migration_bitmap_set_dirty(ram_addr_t addr)
+{
+    ram_list.migration_bitmap[addr >> TARGET_PAGE_BITS] = 0xff;
+}
+
+static inline void migration_bitmap_mask_dirty_range(ram_addr_t start,
+                                                     int length,
+                                                       int dirty_flags)
+{
+    int i, mask, len;
+    uint8_t *p;
+
+    len = length >> TARGET_PAGE_BITS;
+    mask = ~dirty_flags;
+    p = ram_list.migration_bitmap + (start >> TARGET_PAGE_BITS);
+    for (i = 0; i < len; i++) {
+        p[i] &= mask;
+    }
+}
+
+
+void migration_bitmap_reset_dirty(ram_addr_t start,
+                                  ram_addr_t end,
+                                  int dirty_flags);
+
+void sync_migration_bitmap(ram_addr_t start, ram_addr_t end);
+
 void cpu_tlb_update_dirty(CPUState *env);
 
 int cpu_physical_memory_set_dirty_tracking(int enable);
diff --git a/exec.c b/exec.c
index 0e2ce57..bd37e05 100644
--- a/exec.c
+++ b/exec.c
@@ -2106,6 +2106,9 @@  void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
         abort();
     }
 
+    if (kvm_enabled())
+       return;
+
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         int mmu_idx;
         for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -2114,8 +2117,69 @@  void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
                                       start1, length);
         }
     }
+
+}
+
+void migration_bitmap_reset_dirty(ram_addr_t start, ram_addr_t end,
+                                  int dirty_flags)
+{
+    CPUState *env;
+    unsigned long length, start1;
+    int i;
+
+    start &= TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    length = end - start;
+    if (length == 0)
+        return;
+    migration_bitmap_mask_dirty_range(start, length, dirty_flags);
+
+    /* we modify the TLB cache so that the dirty bit will be set again
+       when accessing the range */
+    start1 = (unsigned long)qemu_safe_ram_ptr(start);
+    /* Check that we don't span multiple blocks - this breaks the
+       address comparisons below.  */
+    if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
+            != (end - 1) - start) {
+        abort();
+    }
+
+    if (kvm_enabled())
+       return;
+
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        int mmu_idx;
+        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+            for(i = 0; i < CPU_TLB_SIZE; i++)
+                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
+                                      start1,
+                                      length);
+        }
+    }
+
 }
 
+void sync_migration_bitmap(ram_addr_t start, ram_addr_t end)
+{
+    unsigned long length, len, i;
+    start &= TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    length = end - start;
+    if (length == 0)
+        return;
+
+    len = length >> TARGET_PAGE_BITS;
+    for (i = 0; i < len; i++)
+        if (ram_list.phys_dirty[i] & MIGRATION_DIRTY_FLAG)
+            ram_list.migration_bitmap[i]
+                = 0xff;
+
+}
+
+
+
 int cpu_physical_memory_set_dirty_tracking(int enable)
 {
     int ret = 0;
@@ -2979,6 +3043,12 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, size >> TARGET_PAGE_BITS);
 
+    ram_list.migration_bitmap = qemu_realloc(ram_list.phys_dirty,
+                                       last_ram_offset() >> TARGET_PAGE_BITS);
+    memset(ram_list.migration_bitmap + (new_block->offset >> TARGET_PAGE_BITS),
+           0xff, size >> TARGET_PAGE_BITS);
+
+
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
 
diff --git a/migration-exec.c b/migration-exec.c
index 4b7aad8..5085703 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -13,6 +13,7 @@ 
  *
  */
 
+#include "qemu-thread.h"
 #include "qemu-common.h"
 #include "qemu_socket.h"
 #include "migration.h"
@@ -117,13 +118,21 @@  err_after_alloc:
     return NULL;
 }
 
-static void exec_accept_incoming_migration(void *opaque)
+static void *exec_incoming_migration_thread(void *opaque)
 {
     QEMUFile *f = opaque;
-
     process_incoming_migration(f);
-    qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
     qemu_fclose(f);
+    return NULL;
+}
+
+static void exec_accept_incoming_migration(void *opaque)
+{
+    QEMUFile *f = opaque;
+    struct QemuThread migrate_incoming_thread;
+    qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
+    qemu_thread_create(&migrate_incoming_thread, exec_incoming_migration_thread,
+                       f);
 }
 
 int exec_start_incoming_migration(const char *command)
@@ -138,7 +147,7 @@  int exec_start_incoming_migration(const char *command)
     }
 
     qemu_set_fd_handler2(qemu_stdio_fd(f), NULL,
-			 exec_accept_incoming_migration, NULL, f);
+                         exec_accept_incoming_migration, NULL, f);
 
     return 0;
 }
diff --git a/migration-fd.c b/migration-fd.c
index 66d51c1..4220566 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -11,6 +11,7 @@ 
  *
  */
 
+#include "qemu-thread.h"
 #include "qemu-common.h"
 #include "qemu_socket.h"
 #include "migration.h"
@@ -100,13 +101,23 @@  err_after_alloc:
     return NULL;
 }
 
+
+static void *fd_incoming_migration_thread(void *opaque)
+{
+    QEMUFile *f = opaque;
+    process_incoming_migration(f);
+    qemu_fclose(f);
+    return NULL;
+}
+
 static void fd_accept_incoming_migration(void *opaque)
 {
     QEMUFile *f = opaque;
+    struct QemuThread migrate_incoming_thread;
 
-    process_incoming_migration(f);
     qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
-    qemu_fclose(f);
+    qemu_thread_create(&migrate_incoming_thread, fd_incoming_migration_thread,
+                       f);
 }
 
 int fd_start_incoming_migration(const char *infd)
diff --git a/migration-tcp.c b/migration-tcp.c
index d3d80c9..4ef58c7 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -11,6 +11,7 @@ 
  *
  */
 
+#include "qemu-thread.h"
 #include "qemu-common.h"
 #include "qemu_socket.h"
 #include "migration.h"
@@ -65,11 +66,9 @@  static void tcp_wait_for_connect(void *opaque)
         return;
     }
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
-    if (val == 0)
+    if (val == 0) {
         migrate_fd_connect(s);
-    else {
+    } else {
         DPRINTF("error connecting %d\n", val);
         migrate_fd_error(s);
     }
@@ -79,8 +78,8 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              const char *host_port,
                                              int64_t bandwidth_limit,
                                              int detach,
-					     int blk,
-					     int inc)
+                                             int blk,
+                                             int inc)
 {
     struct sockaddr_in addr;
     FdMigrationState *s;
@@ -111,7 +110,7 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,
     }
 
     socket_set_nonblock(s->fd);
-
+    
     if (!detach) {
         migrate_fd_monitor_suspend(s, mon);
     }
@@ -121,20 +120,22 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,
         if (ret == -1)
             ret = -(s->get_error(s));
 
-        if (ret == -EINPROGRESS || ret == -EWOULDBLOCK)
-            qemu_set_fd_handler2(s->fd, NULL, NULL, tcp_wait_for_connect, s);
     } while (ret == -EINTR);
 
     if (ret < 0 && ret != -EINPROGRESS && ret != -EWOULDBLOCK) {
         DPRINTF("connect failed\n");
         migrate_fd_error(s);
-    } else if (ret >= 0)
+    } else if (ret >= 0) {
         migrate_fd_connect(s);
+    } else { 
+        migrate_fd_wait_for_unfreeze(s);
+        tcp_wait_for_connect(s);
+    }
 
     return &s->mig_state;
 }
 
-static void tcp_accept_incoming_migration(void *opaque)
+static void *tcp_accept_incoming_migration(void *opaque)
 {
     struct sockaddr_in addr;
     socklen_t addrlen = sizeof(addr);
@@ -142,6 +143,8 @@  static void tcp_accept_incoming_migration(void *opaque)
     QEMUFile *f;
     int c;
 
+    migrate_fd_wait(s);
+
     do {
         c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen);
     } while (c == -1 && socket_error() == EINTR);
@@ -164,15 +167,16 @@  static void tcp_accept_incoming_migration(void *opaque)
 out:
     close(c);
 out2:
-    qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL);
     close(s);
+    return NULL;
 }
 
 int tcp_start_incoming_migration(const char *host_port)
 {
     struct sockaddr_in addr;
     int val;
-    int s;
+    struct QemuThread migrate_incoming_thread;
+    static int s;
 
     if (parse_host_port(&addr, host_port) < 0) {
         fprintf(stderr, "invalid host/port combination: %s\n", host_port);
@@ -192,8 +196,8 @@  int tcp_start_incoming_migration(const char *host_port)
     if (listen(s, 1) == -1)
         goto err;
 
-    qemu_set_fd_handler2(s, NULL, tcp_accept_incoming_migration, NULL,
-                         (void *)(intptr_t)s);
+    qemu_thread_create(&migrate_incoming_thread, tcp_accept_incoming_migration,
+                       (void *)(unsigned long)s);
 
     return 0;
 
diff --git a/migration-unix.c b/migration-unix.c
index c8625c7..9ba2944 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -11,6 +11,7 @@ 
  *
  */
 
+#include "qemu-thread.h"
 #include "qemu-common.h"
 #include "qemu_socket.h"
 #include "migration.h"
@@ -64,8 +65,6 @@  static void unix_wait_for_connect(void *opaque)
         return;
     }
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
     if (val == 0)
         migrate_fd_connect(s);
     else {
@@ -116,13 +115,14 @@  MigrationState *unix_start_outgoing_migration(Monitor *mon,
         if (ret == -1)
 	    ret = -(s->get_error(s));
 
-        if (ret == -EINPROGRESS || ret == -EWOULDBLOCK)
-	    qemu_set_fd_handler2(s->fd, NULL, NULL, unix_wait_for_connect, s);
     } while (ret == -EINTR);
 
     if (ret < 0 && ret != -EINPROGRESS && ret != -EWOULDBLOCK) {
         DPRINTF("connect failed\n");
         goto err_after_open;
+    } else if (ret == -EINPROGRESS || ret == -EWOULDBLOCK) {
+        migrate_fd_wait_for_unfreeze(s);
+        unix_wait_for_connect(s);
     }
 
     if (!detach) {
@@ -142,7 +142,7 @@  err_after_alloc:
     return NULL;
 }
 
-static void unix_accept_incoming_migration(void *opaque)
+static void *unix_accept_incoming_migration(void *opaque)
 {
     struct sockaddr_un addr;
     socklen_t addrlen = sizeof(addr);
@@ -150,6 +150,8 @@  static void unix_accept_incoming_migration(void *opaque)
     QEMUFile *f;
     int c;
 
+    migrate_fd_wait(s);
+
     do {
         c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen);
     } while (c == -1 && socket_error() == EINTR);
@@ -158,7 +160,7 @@  static void unix_accept_incoming_migration(void *opaque)
 
     if (c == -1) {
         fprintf(stderr, "could not accept migration connection\n");
-        return;
+        return NULL;
     }
 
     f = qemu_fopen_socket(c);
@@ -170,15 +172,16 @@  static void unix_accept_incoming_migration(void *opaque)
     process_incoming_migration(f);
     qemu_fclose(f);
 out:
-    qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL);
     close(s);
     close(c);
+    return NULL;
 }
 
 int unix_start_incoming_migration(const char *path)
 {
     struct sockaddr_un un;
-    int sock;
+    struct QemuThread migrate_incoming_thread;
+    static int sock;
 
     DPRINTF("Attempting to start an incoming migration\n");
 
@@ -202,8 +205,8 @@  int unix_start_incoming_migration(const char *path)
         goto err;
     }
 
-    qemu_set_fd_handler2(sock, NULL, unix_accept_incoming_migration, NULL,
-			 (void *)(intptr_t)sock);
+    qemu_thread_create(&migrate_incoming_thread, unix_accept_incoming_migration,
+                       (void *)(unsigned long)sock);
 
     return 0;
 
diff --git a/migration.c b/migration.c
index af3a1f2..5ada647 100644
--- a/migration.c
+++ b/migration.c
@@ -12,6 +12,8 @@ 
  */
 
 #include "qemu-common.h"
+#include "qemu-thread.h"
+#include "qemu-timer.h"
 #include "migration.h"
 #include "monitor.h"
 #include "buffered_file.h"
@@ -35,6 +37,7 @@ 
 static int64_t max_throttle = (32 << 20);
 
 static MigrationState *current_migration;
+char host_port[50];
 
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -284,8 +287,6 @@  int migrate_fd_cleanup(FdMigrationState *s)
 {
     int ret = 0;
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
     if (s->file) {
         DPRINTF("closing file\n");
         if (qemu_fclose(s->file) != 0) {
@@ -307,14 +308,6 @@  int migrate_fd_cleanup(FdMigrationState *s)
     return ret;
 }
 
-void migrate_fd_put_notify(void *opaque)
-{
-    FdMigrationState *s = opaque;
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-    qemu_file_put_notify(s->file);
-}
-
 ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
 {
     FdMigrationState *s = opaque;
@@ -327,9 +320,7 @@  ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
     if (ret == -1)
         ret = -(s->get_error(s));
 
-    if (ret == -EAGAIN) {
-        qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s);
-    } else if (ret < 0) {
+    if (ret < 0 && ret != -EAGAIN) {
         if (s->mon) {
             monitor_resume(s->mon);
         }
@@ -340,10 +331,27 @@  ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size)
     return ret;
 }
 
-void migrate_fd_connect(FdMigrationState *s)
+void *migrate_run_timers(void *arg)
 {
+    FdMigrationState *s = arg;
     int ret;
+    ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
+            s->mig_state.shared);
+    if (ret < 0) {
+        DPRINTF("failed, %d\n", ret);
+        migrate_fd_error(s);
+        return NULL;
+    }
+
+    migrate_fd_put_ready(s);
+    while (s->state == MIG_STATE_ACTIVE)
+        qemu_run_timers(migration_clock);
+    return NULL;
+}
 
+void migrate_fd_connect(FdMigrationState *s)
+{
+    struct QemuThread migrate_thread;
     s->file = qemu_fopen_ops_buffered(s,
                                       s->bandwidth_limit,
                                       migrate_fd_put_buffer,
@@ -352,15 +360,7 @@  void migrate_fd_connect(FdMigrationState *s)
                                       migrate_fd_close);
 
     DPRINTF("beginning savevm\n");
-    ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
-                                  s->mig_state.shared);
-    if (ret < 0) {
-        DPRINTF("failed, %d\n", ret);
-        migrate_fd_error(s);
-        return;
-    }
-    
-    migrate_fd_put_ready(s);
+    qemu_thread_create(&migrate_thread, migrate_run_timers, s);
 }
 
 void migrate_fd_put_ready(void *opaque)
@@ -376,8 +376,7 @@  void migrate_fd_put_ready(void *opaque)
     if (qemu_savevm_state_iterate(s->mon, s->file) == 1) {
         int state;
         int old_vm_running = vm_running;
-
-        DPRINTF("done iterating\n");
+        qemu_mutex_lock_iothread();
         vm_stop(VMSTOP_MIGRATE);
 
         if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) {
@@ -396,6 +395,10 @@  void migrate_fd_put_ready(void *opaque)
         }
         s->state = state;
         notifier_list_notify(&migration_state_notifiers);
+        qemu_mutex_unlock_iothread();
+    } else {
+        migrate_fd_wait_for_unfreeze(s);
+        qemu_file_put_notify(s->file);
     }
 }
 
@@ -454,11 +457,23 @@  void migrate_fd_wait_for_unfreeze(void *opaque)
     } while (ret == -1 && (s->get_error(s)) == EINTR);
 }
 
+void migrate_fd_wait(int fd)
+{
+    int ret;
+    do {
+        fd_set rfds;
+
+        FD_ZERO(&rfds);
+        FD_SET(fd, &rfds);
+
+        ret = select(fd + 1, &rfds, NULL, NULL, NULL);
+    } while (ret == -1);
+}
+
 int migrate_fd_close(void *opaque)
 {
     FdMigrationState *s = opaque;
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
 }
 
diff --git a/migration.h b/migration.h
index 050c56c..2f46d9e 100644
--- a/migration.h
+++ b/migration.h
@@ -72,6 +72,8 @@  void do_info_migrate(Monitor *mon, QObject **ret_data);
 
 int exec_start_incoming_migration(const char *host_port);
 
+void *migrate_run_timers(void *);
+
 MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                               const char *host_port,
 					      int64_t bandwidth_limit,
@@ -112,8 +114,6 @@  void migrate_fd_error(FdMigrationState *s);
 
 int migrate_fd_cleanup(FdMigrationState *s);
 
-void migrate_fd_put_notify(void *opaque);
-
 ssize_t migrate_fd_put_buffer(void *opaque, const void *data, size_t size);
 
 void migrate_fd_connect(FdMigrationState *s);
@@ -128,6 +128,8 @@  void migrate_fd_release(MigrationState *mig_state);
 
 void migrate_fd_wait_for_unfreeze(void *opaque);
 
+void migrate_fd_wait(int fd);
+
 int migrate_fd_close(void *opaque);
 
 static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
diff --git a/qemu-timer.c b/qemu-timer.c
index 72066c7..3a0b114 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -144,6 +144,7 @@  void cpu_disable_ticks(void)
 #define QEMU_CLOCK_REALTIME 0
 #define QEMU_CLOCK_VIRTUAL  1
 #define QEMU_CLOCK_HOST     2
+#define QEMU_CLOCK_MIGRATE  3
 
 struct QEMUClock {
     int type;
@@ -364,9 +365,10 @@  next:
     }
 }
 
-#define QEMU_NUM_CLOCKS 3
+#define QEMU_NUM_CLOCKS 4
 
 QEMUClock *rt_clock;
+QEMUClock *migration_clock;
 QEMUClock *vm_clock;
 QEMUClock *host_clock;
 
@@ -561,12 +563,30 @@  int qemu_timer_pending(QEMUTimer *ts)
     return 0;
 }
 
+int64_t qemu_timer_difference(QEMUTimer *ts, QEMUClock *clock)
+{
+    int64_t expire_time, current_time;
+    QEMUTimer *t;
+
+    current_time = qemu_get_clock_ms(clock);
+    for(t = active_timers[clock->type]; t != NULL; t = t->next) {
+        if (t == ts) {
+            expire_time = ts->expire_time / SCALE_MS;
+            if (current_time >= expire_time)
+                return 0;
+            else
+                return expire_time - current_time;
+        }
+    }
+    return 0;
+}
+
 int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time)
 {
     return qemu_timer_expired_ns(timer_head, current_time * timer_head->scale);
 }
 
-static void qemu_run_timers(QEMUClock *clock)
+void qemu_run_timers(QEMUClock *clock)
 {
     QEMUTimer **ptimer_head, *ts;
     int64_t current_time;
@@ -595,6 +615,9 @@  int64_t qemu_get_clock_ns(QEMUClock *clock)
     switch(clock->type) {
     case QEMU_CLOCK_REALTIME:
         return get_clock();
+
+    case QEMU_CLOCK_MIGRATE:
+        return get_clock();
     default:
     case QEMU_CLOCK_VIRTUAL:
         if (use_icount) {
@@ -610,6 +633,7 @@  int64_t qemu_get_clock_ns(QEMUClock *clock)
 void init_clocks(void)
 {
     rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
+    migration_clock = qemu_new_clock(QEMU_CLOCK_MIGRATE);
     vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
     host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
 
diff --git a/qemu-timer.h b/qemu-timer.h
index 06cbe20..014b70b 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -23,6 +23,7 @@  typedef void QEMUTimerCB(void *opaque);
    machine is stopped. The real time clock has a frequency of 1000
    Hz. */
 extern QEMUClock *rt_clock;
+extern QEMUClock *migration_clock;
 
 /* The virtual clock is only run during the emulation. It is stopped
    when the virtual machine is stopped. Virtual timers use a high
@@ -45,7 +46,9 @@  QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
 void qemu_free_timer(QEMUTimer *ts);
 void qemu_del_timer(QEMUTimer *ts);
 void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+void qemu_run_timers(QEMUClock *clock);
 int qemu_timer_pending(QEMUTimer *ts);
+int64_t qemu_timer_difference(QEMUTimer *ts, QEMUClock *);
 int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time);
 
 void qemu_run_all_timers(void);