diff mbox

[2/7] migration: only flush when there are no errors

Message ID 2fb9e253266c4926a168168c854fdf5c68ccfca3.1316524908.git.quintela@redhat.com
State New
Headers show

Commit Message

Juan Quintela Sept. 20, 2011, 1:24 p.m. UTC
If we have one error while migrating, and then we issuse a
"migrate_cancel" command, guest hang.  Fix it for flushing only when
migration is in MIG_STATE_ACTIVE.  In case of error of cancellation,
don't flush.

We had an infinite loop at buffered_close()

        while (!s->has_error && s->buffer_size) {
            buffered_flush(s);
            if (s->freeze_output)
                s->wait_for_unfreeze(s);
        }

There was no errors, there were things to send, and connection was
broken.  send() returns -EAGAIN, so we freezed output, but we
unfreeze_output and try again.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 buffered_file.c  |   17 ++++++++++-------
 buffered_file.h  |    2 +-
 hw/hw.h          |    4 ++--
 migration-exec.c |    6 +++---
 migration-fd.c   |    4 ++--
 migration-tcp.c  |    4 ++--
 migration-unix.c |    4 ++--
 migration.c      |    6 +++---
 migration.h      |    4 ++--
 savevm.c         |   20 +++++++++++---------
 10 files changed, 38 insertions(+), 33 deletions(-)

Comments

Daniel P. Berrangé Sept. 20, 2011, 2:25 p.m. UTC | #1
On Tue, Sep 20, 2011 at 03:24:41PM +0200, Juan Quintela wrote:
> If we have one error while migrating, and then we issuse a
> "migrate_cancel" command, guest hang.  Fix it for flushing only when
> migration is in MIG_STATE_ACTIVE.  In case of error of cancellation,
> don't flush.
> 
> We had an infinite loop at buffered_close()
> 
>         while (!s->has_error && s->buffer_size) {
>             buffered_flush(s);
>             if (s->freeze_output)
>                 s->wait_for_unfreeze(s);
>         }
> 
> There was no errors, there were things to send, and connection was
> broken.  send() returns -EAGAIN, so we freezed output, but we
> unfreeze_output and try again.

I posted a couple of alternative approaches to fixing this
hang problem

http://lists.nongnu.org/archive/html/qemu-devel/2011-08/msg03248.html

My second approach of checking the migration state in migrate_fd_put_buffer()
seems like it would be worthwhile, even with your patch as an additional
safety net against bad code.

Daniel
Juan Quintela Sept. 20, 2011, 2:47 p.m. UTC | #2
"Daniel P. Berrange" <berrange@redhat.com> wrote:
> On Tue, Sep 20, 2011 at 03:24:41PM +0200, Juan Quintela wrote:
>> If we have one error while migrating, and then we issuse a
>> "migrate_cancel" command, guest hang.  Fix it for flushing only when
>> migration is in MIG_STATE_ACTIVE.  In case of error of cancellation,
>> don't flush.
>> 
>> We had an infinite loop at buffered_close()
>> 
>>         while (!s->has_error && s->buffer_size) {
>>             buffered_flush(s);
>>             if (s->freeze_output)
>>                 s->wait_for_unfreeze(s);
>>         }
>> 
>> There was no errors, there were things to send, and connection was
>> broken.  send() returns -EAGAIN, so we freezed output, but we
>> unfreeze_output and try again.
>
> I posted a couple of alternative approaches to fixing this
> hang problem
>
> http://lists.nongnu.org/archive/html/qemu-devel/2011-08/msg03248.html
>
> My second approach of checking the migration state in migrate_fd_put_buffer()
> seems like it would be worthwhile, even with your patch as an additional
> safety net against bad code.

We can add that there, but in my tests, the s->write() was returning
correctly an error (or -EAGAIN).  The problem was that we were not
exiting when we didn't needed to.

I agree that we can have *both* tests.  I will add your patch to my
series.

Thanks for the fast review.

Later, Juan.
diff mbox

Patch

diff --git a/buffered_file.c b/buffered_file.c
index 486af57..5ba3d19 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -166,20 +166,23 @@  static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
     return offset;
 }

-static int buffered_close(void *opaque)
+static int buffered_close(void *opaque, bool flush)
 {
     QEMUFileBuffered *s = opaque;
     int ret;

     DPRINTF("closing\n");

-    while (!s->has_error && s->buffer_size) {
-        buffered_flush(s);
-        if (s->freeze_output)
-            s->wait_for_unfreeze(s);
+    if (flush) {
+        while (!s->has_error && s->buffer_size) {
+            buffered_flush(s);
+            if (s->freeze_output) {
+                s->wait_for_unfreeze(s);
+            }
+        }
     }

-    ret = s->close(s->opaque);
+    ret = s->close(s->opaque, flush);

     qemu_del_timer(s->timer);
     qemu_free_timer(s->timer);
@@ -233,7 +236,7 @@  static void buffered_rate_tick(void *opaque)
     QEMUFileBuffered *s = opaque;

     if (s->has_error) {
-        buffered_close(s);
+        buffered_close(s, false);
         return;
     }

diff --git a/buffered_file.h b/buffered_file.h
index 98d358b..16162ec 100644
--- a/buffered_file.h
+++ b/buffered_file.h
@@ -19,7 +19,7 @@ 
 typedef ssize_t (BufferedPutFunc)(void *opaque, const void *data, size_t size);
 typedef void (BufferedPutReadyFunc)(void *opaque);
 typedef void (BufferedWaitForUnfreezeFunc)(void *opaque);
-typedef int (BufferedCloseFunc)(void *opaque);
+typedef int (BufferedCloseFunc)(void *opaque, bool flush);

 QEMUFile *qemu_fopen_ops_buffered(void *opaque, size_t xfer_limit,
                                   BufferedPutFunc *put_buffer,
diff --git a/hw/hw.h b/hw/hw.h
index a124da9..129de0e 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -28,7 +28,7 @@  typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
                                     int64_t pos, int size);

 /* Close a file and return an error code */
-typedef int (QEMUFileCloseFunc)(void *opaque);
+typedef int (QEMUFileCloseFunc)(void *opaque, bool flush);

 /* Called to determine if the file has exceeded it's bandwidth allocation.  The
  * bandwidth capping is a soft limit, not a hard limit.
@@ -55,7 +55,7 @@  QEMUFile *qemu_popen(FILE *popen_file, const char *mode);
 QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
 int qemu_stdio_fd(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
-int qemu_fclose(QEMUFile *f);
+int qemu_fclose(QEMUFile *f, bool flush);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
 void qemu_put_byte(QEMUFile *f, int v);

diff --git a/migration-exec.c b/migration-exec.c
index 2cfb6f2..5fe09e3 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -42,12 +42,12 @@  static int file_write(FdMigrationState *s, const void * buf, size_t size)
     return write(s->fd, buf, size);
 }

-static int exec_close(FdMigrationState *s)
+static int exec_close(FdMigrationState *s, bool flush)
 {
     int ret = 0;
     DPRINTF("exec_close\n");
     if (s->opaque) {
-        ret = qemu_fclose(s->opaque);
+        ret = qemu_fclose(s->opaque, flush);
         s->opaque = NULL;
         s->fd = -1;
         if (ret != -1 &&
@@ -123,7 +123,7 @@  static void exec_accept_incoming_migration(void *opaque)

     process_incoming_migration(f);
     qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
-    qemu_fclose(f);
+    qemu_fclose(f, false);
 }

 int exec_start_incoming_migration(const char *command)
diff --git a/migration-fd.c b/migration-fd.c
index aee690a..3908850 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -40,7 +40,7 @@  static int fd_write(FdMigrationState *s, const void * buf, size_t size)
     return write(s->fd, buf, size);
 }

-static int fd_close(FdMigrationState *s)
+static int fd_close(FdMigrationState *s, bool flush)
 {
     DPRINTF("fd_close\n");
     if (s->fd != -1) {
@@ -106,7 +106,7 @@  static void fd_accept_incoming_migration(void *opaque)

     process_incoming_migration(f);
     qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
-    qemu_fclose(f);
+    qemu_fclose(f, false);
 }

 int fd_start_incoming_migration(const char *infd)
diff --git a/migration-tcp.c b/migration-tcp.c
index c431e03..cb061f6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -38,7 +38,7 @@  static int socket_write(FdMigrationState *s, const void * buf, size_t size)
     return send(s->fd, buf, size, 0);
 }

-static int tcp_close(FdMigrationState *s)
+static int tcp_close(FdMigrationState *s, bool flush)
 {
     DPRINTF("tcp_close\n");
     if (s->fd != -1) {
@@ -160,7 +160,7 @@  static void tcp_accept_incoming_migration(void *opaque)
     }

     process_incoming_migration(f);
-    qemu_fclose(f);
+    qemu_fclose(f, false);
 out:
     close(c);
 out2:
diff --git a/migration-unix.c b/migration-unix.c
index 6dc985d..bafb855 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -38,7 +38,7 @@  static int unix_write(FdMigrationState *s, const void * buf, size_t size)
     return write(s->fd, buf, size);
 }

-static int unix_close(FdMigrationState *s)
+static int unix_close(FdMigrationState *s, bool flush)
 {
     DPRINTF("unix_close\n");
     if (s->fd != -1) {
@@ -168,7 +168,7 @@  static void unix_accept_incoming_migration(void *opaque)
     }

     process_incoming_migration(f);
-    qemu_fclose(f);
+    qemu_fclose(f, false);
 out:
     qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL);
     close(s);
diff --git a/migration.c b/migration.c
index 9a93e3b..15d001e 100644
--- a/migration.c
+++ b/migration.c
@@ -288,7 +288,7 @@  int migrate_fd_cleanup(FdMigrationState *s)

     if (s->file) {
         DPRINTF("closing file\n");
-        if (qemu_fclose(s->file) != 0) {
+        if (qemu_fclose(s->file, s->state == MIG_STATE_ACTIVE) != 0) {
             ret = -1;
         }
         s->file = NULL;
@@ -449,7 +449,7 @@  void migrate_fd_wait_for_unfreeze(void *opaque)
     } while (ret == -1 && (s->get_error(s)) == EINTR);
 }

-int migrate_fd_close(void *opaque)
+int migrate_fd_close(void *opaque, bool flush)
 {
     FdMigrationState *s = opaque;

@@ -457,7 +457,7 @@  int migrate_fd_close(void *opaque)
         monitor_resume(s->mon);
     }
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-    return s->close(s);
+    return s->close(s, flush);
 }

 void add_migration_state_change_notifier(Notifier *notify)
diff --git a/migration.h b/migration.h
index 050c56c..27a1f25 100644
--- a/migration.h
+++ b/migration.h
@@ -46,7 +46,7 @@  struct FdMigrationState
     Monitor *mon;
     int state;
     int (*get_error)(struct FdMigrationState*);
-    int (*close)(struct FdMigrationState*);
+    int (*close)(struct FdMigrationState*, bool flush);
     int (*write)(struct FdMigrationState*, const void *, size_t);
     void *opaque;
 };
@@ -128,7 +128,7 @@  void migrate_fd_release(MigrationState *mig_state);

 void migrate_fd_wait_for_unfreeze(void *opaque);

-int migrate_fd_close(void *opaque);
+int migrate_fd_close(void *opaque, bool flush);

 static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
 {
diff --git a/savevm.c b/savevm.c
index 1feaa70..a793137 100644
--- a/savevm.c
+++ b/savevm.c
@@ -203,7 +203,7 @@  static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
     return len;
 }

-static int socket_close(void *opaque)
+static int socket_close(void *opaque, bool flush)
 {
     QEMUFileSocket *s = opaque;
     g_free(s);
@@ -229,7 +229,7 @@  static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
     return bytes;
 }

-static int stdio_pclose(void *opaque)
+static int stdio_pclose(void *opaque, bool flush)
 {
     QEMUFileStdio *s = opaque;
     int ret;
@@ -238,7 +238,7 @@  static int stdio_pclose(void *opaque)
     return ret;
 }

-static int stdio_fclose(void *opaque)
+static int stdio_fclose(void *opaque, bool flush)
 {
     QEMUFileStdio *s = opaque;
     fclose(s->stdio_file);
@@ -389,7 +389,7 @@  static int block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
     return bdrv_load_vmstate(opaque, buf, pos, size);
 }

-static int bdrv_fclose(void *opaque)
+static int bdrv_fclose(void *opaque, bool flush)
 {
     return 0;
 }
@@ -471,12 +471,14 @@  static void qemu_fill_buffer(QEMUFile *f)
         f->has_error = 1;
 }

-int qemu_fclose(QEMUFile *f)
+int qemu_fclose(QEMUFile *f, bool flush)
 {
     int ret = 0;
-    qemu_fflush(f);
+    if (flush) {
+        qemu_fflush(f);
+    }
     if (f->close)
-        ret = f->close(f->opaque);
+        ret = f->close(f->opaque, flush);
     g_free(f);
     return ret;
 }
@@ -1980,7 +1982,7 @@  void do_savevm(Monitor *mon, const QDict *qdict)
     }
     ret = qemu_savevm_state(mon, f);
     vm_state_size = qemu_ftell(f);
-    qemu_fclose(f);
+    qemu_fclose(f, true);
     if (ret < 0) {
         monitor_printf(mon, "Error %d while writing VM\n", ret);
         goto the_end;
@@ -2077,7 +2079,7 @@  int load_vmstate(const char *name)
     qemu_system_reset(VMRESET_SILENT);
     ret = qemu_loadvm_state(f);

-    qemu_fclose(f);
+    qemu_fclose(f, false);
     if (ret < 0) {
         error_report("Error %d while loading VM state", ret);
         return ret;