diff mbox

[03/11] migration: create new section to store global state

Message ID 1434505833-11234-4-git-send-email-quintela@redhat.com
State New
Headers show

Commit Message

Juan Quintela June 17, 2015, 1:50 a.m. UTC
This includes a new section that for now just stores the current qemu state.

Right now, there are only one way to control what is the state of the
target after migration.

- If you run the target qemu with -S, it would start stopped.
- If you run the target qemu without -S, it would run just after migration finishes.

The problem here is what happens if we start the target without -S and
there happens one error during migration that puts current state as
-EIO.  Migration would ends (notice that the error happend doing block
IO, network IO, i.e. nothing related with migration), and when
migration finish, we would just "continue" running on destination,
probably hanging the guest/corruption data, whatever.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 include/migration/migration.h |  1 +
 migration/migration.c         | 93 +++++++++++++++++++++++++++++++++++++++++--
 vl.c                          |  1 +
 3 files changed, 92 insertions(+), 3 deletions(-)

Comments

Dr. David Alan Gilbert June 17, 2015, 7 p.m. UTC | #1
* Juan Quintela (quintela@redhat.com) wrote:
> This includes a new section that for now just stores the current qemu state.
> 
> Right now, there are only one way to control what is the state of the
> target after migration.
> 
> - If you run the target qemu with -S, it would start stopped.
> - If you run the target qemu without -S, it would run just after migration finishes.
> 
> The problem here is what happens if we start the target without -S and
> there happens one error during migration that puts current state as
> -EIO.  Migration would ends (notice that the error happend doing block
> IO, network IO, i.e. nothing related with migration), and when
> migration finish, we would just "continue" running on destination,
> probably hanging the guest/corruption data, whatever.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  include/migration/migration.h |  1 +
>  migration/migration.c         | 93 +++++++++++++++++++++++++++++++++++++++++--
>  vl.c                          |  1 +
>  3 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 9387c8c..1280193 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -197,4 +197,5 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> 
>  void ram_mig_init(void);
>  void savevm_skip_section_footers(void);
> +void register_global_state(void);
>  #endif
> diff --git a/migration/migration.c b/migration/migration.c
> index b04b457..01bb90d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -25,6 +25,7 @@
>  #include "qemu/thread.h"
>  #include "qmp-commands.h"
>  #include "trace.h"
> +#include "qapi/util.h"
> 
>  #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
> 
> @@ -96,6 +97,81 @@ void migration_incoming_state_destroy(void)
>      mis_current = NULL;
>  }
> 
> +
> +typedef struct {
> +    int32_t size;

Still think that size should be unsigned.

> +    uint8_t runstate[100];
> +} GlobalState;
> +
> +static GlobalState global_state;
> +
> +static void global_state_store(void)
> +{
> +    if (!runstate_store((char *)global_state.runstate,
> +                        sizeof(global_state.runstate))) {
> +        printf("Runstate is too big\n");
> +        exit(-1);

Hmmmm:
  1) Shouldn't that be an error report? or an assert?
  2) Exit should use EXIT_FAILURE (which is +ve as well)
  3) But you shouldn't kill the guest on an outwards migration
     - just fail the migration.
  4) (And anyway this all seems overkill for sending a 
      status string).

> +    }
> +}
> +
> +static char *global_state_get_runstate(void)
> +{
> +    return (char *)global_state.runstate;
> +}
> +
> +static int global_state_post_load(void *opaque, int version_id)
> +{
> +    GlobalState *s = opaque;
> +    int ret = 0;
> +    char *runstate = (char *)s->runstate;
> +
> +    printf("loaded state: %s\n", runstate);

trace_.....

> +
> +    if (strcmp(runstate, "running") != 0) {
> +        Error *local_err = NULL;
> +        int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
> +                                -1, &local_err);

Is there a reason not to do the qapi_enum_parse first, and then
compare it's output to RUN_STATE_RUNNING?

> +
> +        if (r == -1) {
> +            if (local_err) {
> +                error_report_err(local_err);
> +            }
> +            return -1;

I'm not sure, but shouldn't that be -EINVAL ?
(Not that vmstate is consistent about it)

> +        }
> +        ret = vm_stop_force_state(r);

Kind of going back to adding the state transitions;
don't you need to allow INMIGRATE to * - because it could
be pretty much anything? (Shutdown? Suspended?)

> +    }
> +
> +   return ret;
> +}
> +
> +static void global_state_pre_save(void *opaque)
> +{
> +    GlobalState *s = opaque;
> +
> +    s->size = strlen((char *)s->runstate) + 1;
> +    printf("saved state: %s\n", s->runstate);

trace_

> +}
> +
> +static const VMStateDescription vmstate_globalstate = {
> +    .name = "globalstate",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .post_load = global_state_post_load,
> +    .pre_save = global_state_pre_save,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_INT32(size, GlobalState),
> +        VMSTATE_BUFFER(runstate, GlobalState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +void register_global_state(void)
> +{
> +    /* We would use it independently that we receive it */
> +    strcpy((char *)&global_state.runstate, "");
> +    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
> +}
> +
>  /*
>   * Called on -incoming with a defer: uri.
>   * The migration can be started later after any parameters have been
> @@ -163,10 +239,20 @@ static void process_incoming_migration_co(void *opaque)
>          exit(EXIT_FAILURE);
>      }
> 
> -    if (autostart) {
> +    /* runstate == "" means that we haven't received it through the
> +     * wire, so we obey autostart.  runstate == runing means that we
> +     * need to run it, we need to make sure that we do it after
> +     * everything else has finished.  Every other state change is done
> +     * at the post_load function */
> +
> +    if (strcmp(global_state_get_runstate(), "running") == 0) {
>          vm_start();
> -    } else {
> -        runstate_set(RUN_STATE_PAUSED);
> +    } else if (strcmp(global_state_get_runstate(), "") == 0) {
> +        if (autostart) {
> +            vm_start();
> +        } else {
> +            runstate_set(RUN_STATE_PAUSED);
> +        }
>      }
>      migrate_decompress_threads_join();
>  }
> @@ -791,6 +877,7 @@ static void *migration_thread(void *opaque)
>                  qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
>                  old_vm_running = runstate_is_running();
> 
> +                global_state_store();
>                  ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
>                  if (ret >= 0) {
>                      qemu_file_set_rate_limit(s->file, INT64_MAX);
> diff --git a/vl.c b/vl.c
> index 555fd88..95acdb1 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -4473,6 +4473,7 @@ int main(int argc, char **argv, char **envp)
>          return 0;
>      }
> 
> +    register_global_state();
>      if (incoming) {
>          Error *local_err = NULL;
>          qemu_start_incoming_migration(incoming, &local_err);
> -- 
> 2.4.3
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Juan Quintela July 1, 2015, 7:53 a.m. UTC | #2
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Juan Quintela (quintela@redhat.com) wrote:
>> This includes a new section that for now just stores the current qemu state.
>> 
>> Right now, there are only one way to control what is the state of the
>> target after migration.
>> 
>> - If you run the target qemu with -S, it would start stopped.
>> - If you run the target qemu without -S, it would run just after migration finishes.
>> 
>> The problem here is what happens if we start the target without -S and
>> there happens one error during migration that puts current state as
>> -EIO.  Migration would ends (notice that the error happend doing block
>> IO, network IO, i.e. nothing related with migration), and when
>> migration finish, we would just "continue" running on destination,
>> probably hanging the guest/corruption data, whatever.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> ---
>>  include/migration/migration.h |  1 +
>>  migration/migration.c         | 93 +++++++++++++++++++++++++++++++++++++++++--
>>  vl.c                          |  1 +
>>  3 files changed, 92 insertions(+), 3 deletions(-)
>> 
>> diff --git a/include/migration/migration.h b/include/migration/migration.h
>> index 9387c8c..1280193 100644
>> --- a/include/migration/migration.h
>> +++ b/include/migration/migration.h
>> @@ -197,4 +197,5 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
>> 
>>  void ram_mig_init(void);
>>  void savevm_skip_section_footers(void);
>> +void register_global_state(void);
>>  #endif
>> diff --git a/migration/migration.c b/migration/migration.c
>> index b04b457..01bb90d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -25,6 +25,7 @@
>>  #include "qemu/thread.h"
>>  #include "qmp-commands.h"
>>  #include "trace.h"
>> +#include "qapi/util.h"
>> 
>>  #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
>> 
>> @@ -96,6 +97,81 @@ void migration_incoming_state_destroy(void)
>>      mis_current = NULL;
>>  }
>> 
>> +
>> +typedef struct {
>> +    int32_t size;
>
> Still think that size should be unsigned.

changed


>
>> +    uint8_t runstate[100];
>> +} GlobalState;
>> +
>> +static GlobalState global_state;
>> +
>> +static void global_state_store(void)
>> +{
>> +    if (!runstate_store((char *)global_state.runstate,
>> +                        sizeof(global_state.runstate))) {
>> +        printf("Runstate is too big\n");
>> +        exit(-1);
>
> Hmmmm:
>   1) Shouldn't that be an error report? or an assert?
>   2) Exit should use EXIT_FAILURE (which is +ve as well)
>   3) But you shouldn't kill the guest on an outwards migration
>      - just fail the migration.
>   4) (And anyway this all seems overkill for sending a 
>       status string).

moved to a trace & and return -EINVAL.  Adjusted only caller to handle
the error.

>
>> +    }
>> +}
>> +
>> +static char *global_state_get_runstate(void)
>> +{
>> +    return (char *)global_state.runstate;
>> +}
>> +
>> +static int global_state_post_load(void *opaque, int version_id)
>> +{
>> +    GlobalState *s = opaque;
>> +    int ret = 0;
>> +    char *runstate = (char *)s->runstate;
>> +
>> +    printf("loaded state: %s\n", runstate);
>
> trace_.....

Done

>
>> +
>> +    if (strcmp(runstate, "running") != 0) {
>> +        Error *local_err = NULL;
>> +        int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
>> +                                -1, &local_err);
>
> Is there a reason not to do the qapi_enum_parse first, and then
> compare it's output to RUN_STATE_RUNNING?

Avoid running the enum_parse at all if we are not at running state, I
don't really care one way or another.

>
>> +
>> +        if (r == -1) {
>> +            if (local_err) {
>> +                error_report_err(local_err);
>> +            }
>> +            return -1;
>
> I'm not sure, but shouldn't that be -EINVAL ?
> (Not that vmstate is consistent about it)

Changed.


>> +        }
>> +        ret = vm_stop_force_state(r);
>
> Kind of going back to adding the state transitions;
> don't you need to allow INMIGRATE to * - because it could
> be pretty much anything? (Shutdown? Suspended?)

good question. Doing the change in the other patch.


>> +    }
>> +
>> +   return ret;
>> +}
>> +
>> +static void global_state_pre_save(void *opaque)
>> +{
>> +    GlobalState *s = opaque;
>> +
>> +    s->size = strlen((char *)s->runstate) + 1;
>> +    printf("saved state: %s\n", s->runstate);
>
> trace_

Done.

>> +}
>> +
>> +static const VMStateDescription vmstate_globalstate = {
>> +    .name = "globalstate",
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .post_load = global_state_post_load,
>> +    .pre_save = global_state_pre_save,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_INT32(size, GlobalState),
>> +        VMSTATE_BUFFER(runstate, GlobalState),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +void register_global_state(void)
>> +{
>> +    /* We would use it independently that we receive it */
>> +    strcpy((char *)&global_state.runstate, "");
>> +    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
>> +}
>> +
>>  /*
>>   * Called on -incoming with a defer: uri.
>>   * The migration can be started later after any parameters have been
>> @@ -163,10 +239,20 @@ static void process_incoming_migration_co(void *opaque)
>>          exit(EXIT_FAILURE);
>>      }
>> 
>> -    if (autostart) {
>> +    /* runstate == "" means that we haven't received it through the
>> +     * wire, so we obey autostart.  runstate == runing means that we
>> +     * need to run it, we need to make sure that we do it after
>> +     * everything else has finished.  Every other state change is done
>> +     * at the post_load function */
>> +
>> +    if (strcmp(global_state_get_runstate(), "running") == 0) {
>>          vm_start();
>> -    } else {
>> -        runstate_set(RUN_STATE_PAUSED);
>> +    } else if (strcmp(global_state_get_runstate(), "") == 0) {
>> +        if (autostart) {
>> +            vm_start();
>> +        } else {
>> +            runstate_set(RUN_STATE_PAUSED);
>> +        }
>>      }
>>      migrate_decompress_threads_join();
>>  }
>> @@ -791,6 +877,7 @@ static void *migration_thread(void *opaque)
>>                  qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
>>                  old_vm_running = runstate_is_running();
>> 
>> +                global_state_store();
>>                  ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
>>                  if (ret >= 0) {
>>                      qemu_file_set_rate_limit(s->file, INT64_MAX);
>> diff --git a/vl.c b/vl.c
>> index 555fd88..95acdb1 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -4473,6 +4473,7 @@ int main(int argc, char **argv, char **envp)
>>          return 0;
>>      }
>> 
>> +    register_global_state();
>>      if (incoming) {
>>          Error *local_err = NULL;
>>          qemu_start_incoming_migration(incoming, &local_err);
>> -- 
>> 2.4.3
>> 
>> 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9387c8c..1280193 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -197,4 +197,5 @@  size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,

 void ram_mig_init(void);
 void savevm_skip_section_footers(void);
+void register_global_state(void);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index b04b457..01bb90d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -25,6 +25,7 @@ 
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
+#include "qapi/util.h"

 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */

@@ -96,6 +97,81 @@  void migration_incoming_state_destroy(void)
     mis_current = NULL;
 }

+
+typedef struct {
+    int32_t size;
+    uint8_t runstate[100];
+} GlobalState;
+
+static GlobalState global_state;
+
+static void global_state_store(void)
+{
+    if (!runstate_store((char *)global_state.runstate,
+                        sizeof(global_state.runstate))) {
+        printf("Runstate is too big\n");
+        exit(-1);
+    }
+}
+
+static char *global_state_get_runstate(void)
+{
+    return (char *)global_state.runstate;
+}
+
+static int global_state_post_load(void *opaque, int version_id)
+{
+    GlobalState *s = opaque;
+    int ret = 0;
+    char *runstate = (char *)s->runstate;
+
+    printf("loaded state: %s\n", runstate);
+
+    if (strcmp(runstate, "running") != 0) {
+        Error *local_err = NULL;
+        int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
+                                -1, &local_err);
+
+        if (r == -1) {
+            if (local_err) {
+                error_report_err(local_err);
+            }
+            return -1;
+        }
+        ret = vm_stop_force_state(r);
+    }
+
+   return ret;
+}
+
+static void global_state_pre_save(void *opaque)
+{
+    GlobalState *s = opaque;
+
+    s->size = strlen((char *)s->runstate) + 1;
+    printf("saved state: %s\n", s->runstate);
+}
+
+static const VMStateDescription vmstate_globalstate = {
+    .name = "globalstate",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = global_state_post_load,
+    .pre_save = global_state_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(size, GlobalState),
+        VMSTATE_BUFFER(runstate, GlobalState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+void register_global_state(void)
+{
+    /* We would use it independently that we receive it */
+    strcpy((char *)&global_state.runstate, "");
+    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
+}
+
 /*
  * Called on -incoming with a defer: uri.
  * The migration can be started later after any parameters have been
@@ -163,10 +239,20 @@  static void process_incoming_migration_co(void *opaque)
         exit(EXIT_FAILURE);
     }

-    if (autostart) {
+    /* runstate == "" means that we haven't received it through the
+     * wire, so we obey autostart.  runstate == runing means that we
+     * need to run it, we need to make sure that we do it after
+     * everything else has finished.  Every other state change is done
+     * at the post_load function */
+
+    if (strcmp(global_state_get_runstate(), "running") == 0) {
         vm_start();
-    } else {
-        runstate_set(RUN_STATE_PAUSED);
+    } else if (strcmp(global_state_get_runstate(), "") == 0) {
+        if (autostart) {
+            vm_start();
+        } else {
+            runstate_set(RUN_STATE_PAUSED);
+        }
     }
     migrate_decompress_threads_join();
 }
@@ -791,6 +877,7 @@  static void *migration_thread(void *opaque)
                 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
                 old_vm_running = runstate_is_running();

+                global_state_store();
                 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
                 if (ret >= 0) {
                     qemu_file_set_rate_limit(s->file, INT64_MAX);
diff --git a/vl.c b/vl.c
index 555fd88..95acdb1 100644
--- a/vl.c
+++ b/vl.c
@@ -4473,6 +4473,7 @@  int main(int argc, char **argv, char **envp)
         return 0;
     }

+    register_global_state();
     if (incoming) {
         Error *local_err = NULL;
         qemu_start_incoming_migration(incoming, &local_err);