Patchwork [11/41] migration: simplify error handling

login
register
mail settings
Submitter Paolo Bonzini
Date Feb. 15, 2013, 5:46 p.m.
Message ID <1360950433-17106-12-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/220816/
State New
Headers show

Comments

Paolo Bonzini - Feb. 15, 2013, 5:46 p.m.
Always use qemu_file_get_error to detect errors, since that is how
QEMUFile itself drops I/O after an error occurs.  There is no need
to propagate and check return values all the time.

Also remove the "complete" member, since we know that it is set (via
migrate_fd_cleanup) only when the state changes.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/migration/migration.h |    1 -
 migration.c                   |   46 ++++++++++++----------------------------
 2 files changed, 14 insertions(+), 33 deletions(-)
Orit Wasserman - Feb. 18, 2013, 10:22 a.m.
On 02/15/2013 07:46 PM, Paolo Bonzini wrote:
> Always use qemu_file_get_error to detect errors, since that is how
> QEMUFile itself drops I/O after an error occurs.  There is no need
> to propagate and check return values all the time.
> 
> Also remove the "complete" member, since we know that it is set (via
> migrate_fd_cleanup) only when the state changes.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/migration/migration.h |    1 -
>  migration.c                   |   46 ++++++++++++----------------------------
>  2 files changed, 14 insertions(+), 33 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index a8c9639..4928642 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -53,7 +53,6 @@ struct MigrationState
>      int64_t dirty_pages_rate;
>      bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
>      int64_t xbzrle_cache_size;
> -    bool complete;
>  };
>  
>  void process_incoming_migration(QEMUFile *f);
> diff --git a/migration.c b/migration.c
> index 6834d61..75dd38a 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -525,6 +525,10 @@ static void buffered_flush(MigrationState *s)
>  
>      DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
>  
> +    if (qemu_file_get_error(s->file)) {
> +        s->buffer_size = 0;
> +        return;
> +    }
>      qemu_fflush(s->file);
>  
>      while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
> @@ -592,7 +596,6 @@ static int buffered_close(void *opaque)
>      while (!qemu_file_get_error(s->file) && s->buffer_size) {
>          buffered_flush(s);
>      }
> -    s->complete = true;
>      return migrate_fd_close(s);
>  }
>  
> @@ -655,37 +658,21 @@ static void *buffered_file_thread(void *opaque)
>      int64_t initial_time = qemu_get_clock_ms(rt_clock);
>      int64_t max_size = 0;
>      bool last_round = false;
> -    int ret;
>  
>      qemu_mutex_lock_iothread();
>      DPRINTF("beginning savevm\n");
> -    ret = qemu_savevm_state_begin(s->file, &s->params);
> -    qemu_mutex_unlock_iothread();
> +    qemu_savevm_state_begin(s->file, &s->params);
>  
> -    while (ret >= 0) {
> +    while (s->state == MIG_STATE_ACTIVE) {
>          int64_t current_time = qemu_get_clock_ms(rt_clock);
>          uint64_t pending_size;
>  
> -        qemu_mutex_lock_iothread();
> -        if (s->state != MIG_STATE_ACTIVE) {
> -            DPRINTF("put_ready returning because of non-active state\n");
> -            qemu_mutex_unlock_iothread();
> -            break;
> -        }
> -        if (s->complete) {
> -            qemu_mutex_unlock_iothread();
> -            break;
> -        }
>          if (s->bytes_xfer < s->xfer_limit) {
>              DPRINTF("iterate\n");
>              pending_size = qemu_savevm_state_pending(s->file, max_size);
>              DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
>              if (pending_size && pending_size >= max_size) {
> -                ret = qemu_savevm_state_iterate(s->file);
> -                if (ret < 0) {
> -                    qemu_mutex_unlock_iothread();
> -                    break;
> -                }
> +                qemu_savevm_state_iterate(s->file);
>              } else {
>                  int old_vm_running = runstate_is_running();
>                  int64_t start_time, end_time;
> @@ -694,13 +681,8 @@ static void *buffered_file_thread(void *opaque)
>                  start_time = qemu_get_clock_ms(rt_clock);
>                  qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
>                  vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
> -                ret = qemu_savevm_state_complete(s->file);
> -                if (ret < 0) {
> -                    qemu_mutex_unlock_iothread();
> -                    break;
> -                } else {
> -                    migrate_fd_completed(s);
> -                }
> +                qemu_savevm_state_complete(s->file);
> +                migrate_fd_completed(s);
>                  end_time = qemu_get_clock_ms(rt_clock);
>                  s->total_time = end_time - s->total_time;
>                  s->downtime = end_time - start_time;
> @@ -731,12 +713,13 @@ static void *buffered_file_thread(void *opaque)
>              g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
>          }
>          buffered_flush(s);
> -        ret = qemu_file_get_error(s->file);
> +        qemu_mutex_lock_iothread();
> +        if (qemu_file_get_error(s->file)) {
> +            migrate_fd_error(s);
> +        }
>      }
>  
> -    if (ret < 0) {
> -        migrate_fd_error(s);
> -    }
> +    qemu_mutex_unlock_iothread();
>      g_free(s->buffer);
>      return NULL;
>  }
> @@ -759,7 +742,6 @@ void migrate_fd_connect(MigrationState *s)
>      s->buffer_capacity = 0;
>  
>      s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO;
> -    s->complete = false;
>  
>      s->file = qemu_fopen_ops(s, &buffered_file_ops);
>  
> 
Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Juan Quintela - Feb. 21, 2013, 5:34 p.m.
Paolo Bonzini <pbonzini@redhat.com> wrote:
> Always use qemu_file_get_error to detect errors, since that is how
> QEMUFile itself drops I/O after an error occurs.  There is no need
> to propagate and check return values all the time.
>
> Also remove the "complete" member, since we know that it is set (via
> migrate_fd_cleanup) only when the state changes.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

This move buffered_flush() to inside the iothread lock.  At least the
commit message needs to be changed.

Looking at the rest of the series before thinking if that is the right
approach.

Later, Juan.
Paolo Bonzini - Feb. 21, 2013, 5:44 p.m.
Il 21/02/2013 18:34, Juan Quintela ha scritto:
> This move buffered_flush() to inside the iothread lock.  At least the
> commit message needs to be changed.

No, it doesn't... Here is the full body of the migration thread:

    qemu_mutex_lock_iothread();
    qemu_savevm_state_begin(s->file, &s->params);

    while (s->state == MIG_STATE_ACTIVE) {
        int64_t current_time = qemu_get_clock_ms(rt_clock);
        uint64_t pending_size;

        if (s->bytes_xfer < s->xfer_limit) {
            /* call qemu_savevm_state_* */
        }

        qemu_mutex_unlock_iothread();
        if (current_time >= initial_time + BUFFER_DELAY) {
            /* yadda yadda */
            s->bytes_xfer = 0;
            initial_time = current_time;
        }
        if (!last_round && (s->bytes_xfer >= s->xfer_limit)) {
            /* usleep expects microseconds */
            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
        }
        buffered_flush(s);
        qemu_mutex_lock_iothread();

        if (qemu_file_get_error(s->file)) {
            migrate_fd_error(s);
        }
    }

    qemu_mutex_unlock_iothread();

> Looking at the rest of the series before thinking if that is the right
> approach.

The series is fully bisectable.  There should be no thread-unsafe patch,
nor any state where blocking calls are done with iothread lock taken
(except at the end of migration where qemu_savevm_state_complete runs
with iothread lock taken; but this happens much later than this patch).

Paolo
Juan Quintela - Feb. 22, 2013, 10:42 a.m.
Paolo Bonzini <pbonzini@redhat.com> wrote:
> Always use qemu_file_get_error to detect errors, since that is how
> QEMUFile itself drops I/O after an error occurs.  There is no need
> to propagate and check return values all the time.
>
> Also remove the "complete" member, since we know that it is set (via
> migrate_fd_cleanup) only when the state changes.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index a8c9639..4928642 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -53,7 +53,6 @@  struct MigrationState
     int64_t dirty_pages_rate;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
     int64_t xbzrle_cache_size;
-    bool complete;
 };
 
 void process_incoming_migration(QEMUFile *f);
diff --git a/migration.c b/migration.c
index 6834d61..75dd38a 100644
--- a/migration.c
+++ b/migration.c
@@ -525,6 +525,10 @@  static void buffered_flush(MigrationState *s)
 
     DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
 
+    if (qemu_file_get_error(s->file)) {
+        s->buffer_size = 0;
+        return;
+    }
     qemu_fflush(s->file);
 
     while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
@@ -592,7 +596,6 @@  static int buffered_close(void *opaque)
     while (!qemu_file_get_error(s->file) && s->buffer_size) {
         buffered_flush(s);
     }
-    s->complete = true;
     return migrate_fd_close(s);
 }
 
@@ -655,37 +658,21 @@  static void *buffered_file_thread(void *opaque)
     int64_t initial_time = qemu_get_clock_ms(rt_clock);
     int64_t max_size = 0;
     bool last_round = false;
-    int ret;
 
     qemu_mutex_lock_iothread();
     DPRINTF("beginning savevm\n");
-    ret = qemu_savevm_state_begin(s->file, &s->params);
-    qemu_mutex_unlock_iothread();
+    qemu_savevm_state_begin(s->file, &s->params);
 
-    while (ret >= 0) {
+    while (s->state == MIG_STATE_ACTIVE) {
         int64_t current_time = qemu_get_clock_ms(rt_clock);
         uint64_t pending_size;
 
-        qemu_mutex_lock_iothread();
-        if (s->state != MIG_STATE_ACTIVE) {
-            DPRINTF("put_ready returning because of non-active state\n");
-            qemu_mutex_unlock_iothread();
-            break;
-        }
-        if (s->complete) {
-            qemu_mutex_unlock_iothread();
-            break;
-        }
         if (s->bytes_xfer < s->xfer_limit) {
             DPRINTF("iterate\n");
             pending_size = qemu_savevm_state_pending(s->file, max_size);
             DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
             if (pending_size && pending_size >= max_size) {
-                ret = qemu_savevm_state_iterate(s->file);
-                if (ret < 0) {
-                    qemu_mutex_unlock_iothread();
-                    break;
-                }
+                qemu_savevm_state_iterate(s->file);
             } else {
                 int old_vm_running = runstate_is_running();
                 int64_t start_time, end_time;
@@ -694,13 +681,8 @@  static void *buffered_file_thread(void *opaque)
                 start_time = qemu_get_clock_ms(rt_clock);
                 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
                 vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-                ret = qemu_savevm_state_complete(s->file);
-                if (ret < 0) {
-                    qemu_mutex_unlock_iothread();
-                    break;
-                } else {
-                    migrate_fd_completed(s);
-                }
+                qemu_savevm_state_complete(s->file);
+                migrate_fd_completed(s);
                 end_time = qemu_get_clock_ms(rt_clock);
                 s->total_time = end_time - s->total_time;
                 s->downtime = end_time - start_time;
@@ -731,12 +713,13 @@  static void *buffered_file_thread(void *opaque)
             g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
         }
         buffered_flush(s);
-        ret = qemu_file_get_error(s->file);
+        qemu_mutex_lock_iothread();
+        if (qemu_file_get_error(s->file)) {
+            migrate_fd_error(s);
+        }
     }
 
-    if (ret < 0) {
-        migrate_fd_error(s);
-    }
+    qemu_mutex_unlock_iothread();
     g_free(s->buffer);
     return NULL;
 }
@@ -759,7 +742,6 @@  void migrate_fd_connect(MigrationState *s)
     s->buffer_capacity = 0;
 
     s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO;
-    s->complete = false;
 
     s->file = qemu_fopen_ops(s, &buffered_file_ops);