diff mbox

[v8,29/54] MIGRATION_STATUS_POSTCOPY_ACTIVE: Add new migration state

Message ID 1443515898-3594-30-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert Sept. 29, 2015, 8:37 a.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy

'migration_in_postcopy' is provided for other sections to know if
they're in postcopy.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
---
 include/migration/migration.h |  2 ++
 migration/migration.c         | 56 ++++++++++++++++++++++++++++++++++++-------
 qapi-schema.json              |  4 +++-
 trace-events                  |  1 +
 4 files changed, 54 insertions(+), 9 deletions(-)

Comments

Juan Quintela Oct. 20, 2015, 1:35 p.m. UTC | #1
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> 'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy
>
> 'migration_in_postcopy' is provided for other sections to know if
> they're in postcopy.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Reviewed-by: Eric Blake <eblake@redhat.com>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> Reviewed-by: Amit Shah <amit.shah@redhat.com>

Just wondering


> diff --git a/migration/migration.c b/migration/migration.c
> index 5ee2c11..2ae5909 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -439,6 +439,7 @@ static bool migration_is_active(MigrationState *ms)
>  {
>      switch (ms->state) {
>      case MIGRATION_STATUS_ACTIVE:
> +    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
>      case MIGRATION_STATUS_SETUP:
>          return true;
>  
> @@ -509,6 +510,39 @@ MigrationInfo *qmp_query_migrate(Error **errp)
>  
>          get_xbzrle_cache_stats(info);
>          break;
> +    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> +        /* Mostly the same as active; TODO add some postcopy stats */
> +        info->has_status = true;
> +        info->has_total_time = true;
> +        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
> +            - s->total_time;
> +        info->has_expected_downtime = true;
> +        info->expected_downtime = s->expected_downtime;
> +        info->has_setup_time = true;
> +        info->setup_time = s->setup_time;
> +
> +        info->has_ram = true;
> +        info->ram = g_malloc0(sizeof(*info->ram));
> +        info->ram->transferred = ram_bytes_transferred();
> +        info->ram->remaining = ram_bytes_remaining();
> +        info->ram->total = ram_bytes_total();
> +        info->ram->duplicate = dup_mig_pages_transferred();
> +        info->ram->skipped = skipped_mig_pages_transferred();
> +        info->ram->normal = norm_mig_pages_transferred();
> +        info->ram->normal_bytes = norm_mig_bytes_transferred();
> +        info->ram->dirty_pages_rate = s->dirty_pages_rate;
> +        info->ram->mbps = s->mbps;
> +
> +        if (blk_mig_active()) {
> +            info->has_disk = true;
> +            info->disk = g_malloc0(sizeof(*info->disk));
> +            info->disk->transferred = blk_mig_bytes_transferred();
> +            info->disk->remaining = blk_mig_bytes_remaining();
> +            info->disk->total = blk_mig_bytes_total();
> +        }

Are we sure that disk migration works with postcopy?  I would expect no ...
Dr. David Alan Gilbert Oct. 30, 2015, 6:19 p.m. UTC | #2
* Juan Quintela (quintela@redhat.com) wrote:
> "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >
> > 'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy
> >
> > 'migration_in_postcopy' is provided for other sections to know if
> > they're in postcopy.
> >
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > Reviewed-by: Eric Blake <eblake@redhat.com>
> > Reviewed-by: Juan Quintela <quintela@redhat.com>
> > Reviewed-by: Amit Shah <amit.shah@redhat.com>
> 
> Just wondering
> 
> 
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 5ee2c11..2ae5909 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -439,6 +439,7 @@ static bool migration_is_active(MigrationState *ms)
> >  {
> >      switch (ms->state) {
> >      case MIGRATION_STATUS_ACTIVE:
> > +    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> >      case MIGRATION_STATUS_SETUP:
> >          return true;
> >  
> > @@ -509,6 +510,39 @@ MigrationInfo *qmp_query_migrate(Error **errp)
> >  
> >          get_xbzrle_cache_stats(info);
> >          break;
> > +    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
> > +        /* Mostly the same as active; TODO add some postcopy stats */
> > +        info->has_status = true;
> > +        info->has_total_time = true;
> > +        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
> > +            - s->total_time;
> > +        info->has_expected_downtime = true;
> > +        info->expected_downtime = s->expected_downtime;
> > +        info->has_setup_time = true;
> > +        info->setup_time = s->setup_time;
> > +
> > +        info->has_ram = true;
> > +        info->ram = g_malloc0(sizeof(*info->ram));
> > +        info->ram->transferred = ram_bytes_transferred();
> > +        info->ram->remaining = ram_bytes_remaining();
> > +        info->ram->total = ram_bytes_total();
> > +        info->ram->duplicate = dup_mig_pages_transferred();
> > +        info->ram->skipped = skipped_mig_pages_transferred();
> > +        info->ram->normal = norm_mig_pages_transferred();
> > +        info->ram->normal_bytes = norm_mig_bytes_transferred();
> > +        info->ram->dirty_pages_rate = s->dirty_pages_rate;
> > +        info->ram->mbps = s->mbps;
> > +
> > +        if (blk_mig_active()) {
> > +            info->has_disk = true;
> > +            info->disk = g_malloc0(sizeof(*info->disk));
> > +            info->disk->transferred = blk_mig_bytes_transferred();
> > +            info->disk->remaining = blk_mig_bytes_remaining();
> > +            info->disk->total = blk_mig_bytes_total();
> > +        }
> 
> Are we sure that disk migration works with postcopy?  I would expect no ...

Well, the theory goes that it should; although probably not a good
idea.
What should happen is that since the block migration will reply
with a non-postcopiable pending size > 0, it will stay in precopy
mode until the block migration is done; once that happens
(and the block code returns a pending_size < bandwidth*downtime)
it should flip into postcopy.

It's a bad idea because:
  a) While still in precopy mode the RAM migration will still be
trying to transfer in precopy mode as well, which is wasting bandwidth
fighting with the block IO (we could probably quiesce that once
postcopy is turned on).
  b) I'm a bit worried about how much the block migration might try
and use in RAM when it does it's save_complete into the postcopy
tempoary buffer.

It should be possible to implement a postcopy for block as well
that would run concurrently with the psotcopying of RAM; you'd have
to make sure you scheduled requests that came back to the source so
that slow disc reads didn't block RAM requests.

(Having said that, my test I just did of it didn't work; I'll
try and have a look at that).

Dave

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2176666..219032d 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -170,6 +170,8 @@  MigrationState *migrate_init(const MigrationParams *params);
 bool migration_in_setup(MigrationState *);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
+/* True if outgoing migration has entered postcopy phase */
+bool migration_in_postcopy(MigrationState *);
 MigrationState *migrate_get_current(void);
 
 void migrate_compress_threads_create(void);
diff --git a/migration/migration.c b/migration/migration.c
index 5ee2c11..2ae5909 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -439,6 +439,7 @@  static bool migration_is_active(MigrationState *ms)
 {
     switch (ms->state) {
     case MIGRATION_STATUS_ACTIVE:
+    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_SETUP:
         return true;
 
@@ -509,6 +510,39 @@  MigrationInfo *qmp_query_migrate(Error **errp)
 
         get_xbzrle_cache_stats(info);
         break;
+    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
+        /* Mostly the same as active; TODO add some postcopy stats */
+        info->has_status = true;
+        info->has_total_time = true;
+        info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+            - s->total_time;
+        info->has_expected_downtime = true;
+        info->expected_downtime = s->expected_downtime;
+        info->has_setup_time = true;
+        info->setup_time = s->setup_time;
+
+        info->has_ram = true;
+        info->ram = g_malloc0(sizeof(*info->ram));
+        info->ram->transferred = ram_bytes_transferred();
+        info->ram->remaining = ram_bytes_remaining();
+        info->ram->total = ram_bytes_total();
+        info->ram->duplicate = dup_mig_pages_transferred();
+        info->ram->skipped = skipped_mig_pages_transferred();
+        info->ram->normal = norm_mig_pages_transferred();
+        info->ram->normal_bytes = norm_mig_bytes_transferred();
+        info->ram->dirty_pages_rate = s->dirty_pages_rate;
+        info->ram->mbps = s->mbps;
+
+        if (blk_mig_active()) {
+            info->has_disk = true;
+            info->disk = g_malloc0(sizeof(*info->disk));
+            info->disk->transferred = blk_mig_bytes_transferred();
+            info->disk->remaining = blk_mig_bytes_remaining();
+            info->disk->total = blk_mig_bytes_total();
+        }
+
+        get_xbzrle_cache_stats(info);
+        break;
     case MIGRATION_STATUS_COMPLETED:
         get_xbzrle_cache_stats(info);
 
@@ -550,8 +584,7 @@  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     MigrationState *s = migrate_get_current();
     MigrationCapabilityStatusList *cap;
 
-    if (s->state == MIGRATION_STATUS_ACTIVE ||
-        s->state == MIGRATION_STATUS_SETUP) {
+    if (migration_is_active(s)) {
         error_setg(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -666,7 +699,8 @@  static void migrate_fd_cleanup(void *opaque)
         s->file = NULL;
     }
 
-    assert(s->state != MIGRATION_STATUS_ACTIVE);
+    assert((s->state != MIGRATION_STATUS_ACTIVE) &&
+           (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
 
     if (s->state != MIGRATION_STATUS_COMPLETED) {
         qemu_savevm_state_cancel();
@@ -700,8 +734,7 @@  static void migrate_fd_cancel(MigrationState *s)
 
     do {
         old_state = s->state;
-        if (old_state != MIGRATION_STATUS_SETUP &&
-            old_state != MIGRATION_STATUS_ACTIVE) {
+        if (!migration_is_active(s)) {
             break;
         }
         migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
@@ -745,6 +778,11 @@  bool migration_has_failed(MigrationState *s)
             s->state == MIGRATION_STATUS_FAILED);
 }
 
+bool migration_in_postcopy(MigrationState *s)
+{
+    return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
+}
+
 MigrationState *migrate_init(const MigrationParams *params)
 {
     MigrationState *s = migrate_get_current();
@@ -825,8 +863,7 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.blk = has_blk && blk;
     params.shared = has_inc && inc;
 
-    if (s->state == MIGRATION_STATUS_ACTIVE ||
-        s->state == MIGRATION_STATUS_SETUP ||
+    if (migration_is_active(s) ||
         s->state == MIGRATION_STATUS_CANCELLING) {
         error_setg(errp, QERR_MIGRATION_ACTIVE);
         return;
@@ -1203,7 +1240,10 @@  static void *migration_thread(void *opaque)
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
     migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
 
-    while (s->state == MIGRATION_STATUS_ACTIVE) {
+    trace_migration_thread_setup_complete();
+
+    while (s->state == MIGRATION_STATUS_ACTIVE ||
+           s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
         int64_t current_time;
         uint64_t pending_size;
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 48644f5..f814174 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -430,6 +430,8 @@ 
 #
 # @active: in the process of doing migration.
 #
+# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
+#
 # @completed: migration is finished.
 #
 # @failed: some error occurred during migration process.
@@ -439,7 +441,7 @@ 
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
-            'active', 'completed', 'failed' ] }
+            'active', 'postcopy-active', 'completed', 'failed' ] }
 
 ##
 # @MigrationInfo
diff --git a/trace-events b/trace-events
index 4fff040..e68e69d 100644
--- a/trace-events
+++ b/trace-events
@@ -1440,6 +1440,7 @@  migrate_fd_error(void) ""
 migrate_fd_cancel(void) ""
 migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
 migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
+migration_thread_setup_complete(void) ""
 open_return_path_on_source(void) ""
 open_return_path_on_source_continue(void) ""
 source_return_path_thread_bad_end(void) ""