diff mbox

[[RFC,WIP] v1] Keeping the Destination side alive incase of network failure (Migration recover from network failure)

Message ID 1464721993-15682-1-git-send-email-haris.phnx@gmail.com
State New
Headers show

Commit Message

Md Haris Iqbal May 31, 2016, 7:13 p.m. UTC
---
 include/migration/migration.h |  3 +++
 migration/migration.c         | 12 ++++++++++++
 migration/savevm.c            | 25 +++++++++++++++++--------
 3 files changed, 32 insertions(+), 8 deletions(-)

Comments

Dr. David Alan Gilbert June 1, 2016, 5:07 p.m. UTC | #1
* Md Haris Iqbal (haris.phnx@gmail.com) wrote:
> ---
>  include/migration/migration.h |  3 +++
>  migration/migration.c         | 12 ++++++++++++
>  migration/savevm.c            | 25 +++++++++++++++++--------
>  3 files changed, 32 insertions(+), 8 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ac2c12c..73832ac 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -326,6 +326,9 @@ void flush_page_queue(MigrationState *ms);
>  int ram_save_queue_pages(MigrationState *ms, const char *rbname,
>                           ram_addr_t start, ram_addr_t len);
>  
> +// New added function for postcopy migration recovery in case of failure

Note the comment style is always /* not //

> +void qemu_postcopy_migration_recovery(QEMUFile **f,MigrationIncomingState* mis);
> +
>  PostcopyState postcopy_state_get(void);
>  /* Set the state and return the old state */
>  PostcopyState postcopy_state_set(PostcopyState new_state);
> diff --git a/migration/migration.c b/migration/migration.c
> index 991313a..4301972 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1797,6 +1797,18 @@ void migrate_fd_connect(MigrationState *s)
>      s->migration_thread_running = true;
>  }
>  
> +void qemu_postcopy_migration_recovery(QEMUFile **f,
> +                                      MigrationIncomingState* mis)
> +{
> +    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> +                                   MIGRATION_STATUS_POSTCOPY_RECOVERING);
> +    /* Code for network recovery to be added here */
> +    while(1) {
> +        fprintf(stderr, "Not letting it fail\n");
> +        sleep(2);
> +    }
> +}
> +
>  PostcopyState  postcopy_state_get(void)
>  {
>      return atomic_mb_read(&incoming_postcopy_state);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 16ba443..03467da 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1832,7 +1832,8 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
>  static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
>  {
>      uint8_t section_type;
> -    int ret;
> +    PostcopyState ps;
> +    int32_t ret;

Again match up the 'ret' type.

>      while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
>  
> @@ -1841,21 +1842,16 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
>          case QEMU_VM_SECTION_START:
>          case QEMU_VM_SECTION_FULL:
>              ret = qemu_loadvm_section_start_full(f, mis);
> -            if (ret < 0) {
> -                return ret;
> -            }
>              break;
>          case QEMU_VM_SECTION_PART:
>          case QEMU_VM_SECTION_END:
>              ret = qemu_loadvm_section_part_end(f, mis);
> -            if (ret < 0) {
> -                return ret;
> -            }
>              break;
>          case QEMU_VM_COMMAND:
>              ret = loadvm_process_command(f);
>              trace_qemu_loadvm_state_section_command(ret);
> -            if ((ret < 0) || (ret & LOADVM_QUIT)) {
> +            if (ret & LOADVM_QUIT) {
> +                fprintf(stderr, "LOADVM_QUIT\n");
>                  return ret;
>              }
>              break;
> @@ -1863,6 +1859,19 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
>              error_report("Unknown savevm section type %d", section_type);
>              return -EINVAL;
>          }
> +
> +        if (ret < 0) {
> +            fprintf(stderr,  "Failure\n");
> +
> +            ps = postcopy_state_get();
> +            ret = qemu_file_get_error(f);
> +            fprintf(stderr, "Error %s %d\n", strerror(-ret), -ret);
> +
> +            if(ret != -EIO && ps == POSTCOPY_INCOMING_RUNNING)
> +                qemu_postcopy_migration_recovery(&f, mis);

Qemu's if style always puts {'s in even for one line if.

ps == POSTCOPY_INCOMING_RUNNING is a reasonable test; I wonder if we should
also combine it with the run-state; if the destination was started with -S
and got to RUNNING but libvirt hadn't actually told it to run yet, then I think
it's still safe to fail to migration right up to the point the destination
is started running.

Dave

> +            else
> +                return ret;
> +        }
>      }
>  
>      return 0;
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index ac2c12c..73832ac 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -326,6 +326,9 @@  void flush_page_queue(MigrationState *ms);
 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
                          ram_addr_t start, ram_addr_t len);
 
+// New added function for postcopy migration recovery in case of failure
+void qemu_postcopy_migration_recovery(QEMUFile **f,MigrationIncomingState* mis);
+
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
 PostcopyState postcopy_state_set(PostcopyState new_state);
diff --git a/migration/migration.c b/migration/migration.c
index 991313a..4301972 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1797,6 +1797,18 @@  void migrate_fd_connect(MigrationState *s)
     s->migration_thread_running = true;
 }
 
+void qemu_postcopy_migration_recovery(QEMUFile **f,
+                                      MigrationIncomingState* mis)
+{
+    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+                                   MIGRATION_STATUS_POSTCOPY_RECOVERING);
+    /* Code for network recovery to be added here */
+    while(1) {
+        fprintf(stderr, "Not letting it fail\n");
+        sleep(2);
+    }
+}
+
 PostcopyState  postcopy_state_get(void)
 {
     return atomic_mb_read(&incoming_postcopy_state);
diff --git a/migration/savevm.c b/migration/savevm.c
index 16ba443..03467da 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1832,7 +1832,8 @@  qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
 {
     uint8_t section_type;
-    int ret;
+    PostcopyState ps;
+    int32_t ret;
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
 
@@ -1841,21 +1842,16 @@  static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
             ret = qemu_loadvm_section_start_full(f, mis);
-            if (ret < 0) {
-                return ret;
-            }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
             ret = qemu_loadvm_section_part_end(f, mis);
-            if (ret < 0) {
-                return ret;
-            }
             break;
         case QEMU_VM_COMMAND:
             ret = loadvm_process_command(f);
             trace_qemu_loadvm_state_section_command(ret);
-            if ((ret < 0) || (ret & LOADVM_QUIT)) {
+            if (ret & LOADVM_QUIT) {
+                fprintf(stderr, "LOADVM_QUIT\n");
                 return ret;
             }
             break;
@@ -1863,6 +1859,19 @@  static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
             error_report("Unknown savevm section type %d", section_type);
             return -EINVAL;
         }
+
+        if (ret < 0) {
+            fprintf(stderr,  "Failure\n");
+
+            ps = postcopy_state_get();
+            ret = qemu_file_get_error(f);
+            fprintf(stderr, "Error %s %d\n", strerror(-ret), -ret);
+
+            if(ret != -EIO && ps == POSTCOPY_INCOMING_RUNNING)
+                qemu_postcopy_migration_recovery(&f, mis);
+            else
+                return ret;
+        }
     }
 
     return 0;