Message ID | 1443515898-3594-30-git-send-email-dgilbert@redhat.com |
---|---|
State | New |
Headers | show |
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > 'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy > > 'migration_in_postcopy' is provided for other sections to know if > they're in postcopy. > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > Reviewed-by: Eric Blake <eblake@redhat.com> > Reviewed-by: Juan Quintela <quintela@redhat.com> > Reviewed-by: Amit Shah <amit.shah@redhat.com> Just wondering > diff --git a/migration/migration.c b/migration/migration.c > index 5ee2c11..2ae5909 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -439,6 +439,7 @@ static bool migration_is_active(MigrationState *ms) > { > switch (ms->state) { > case MIGRATION_STATUS_ACTIVE: > + case MIGRATION_STATUS_POSTCOPY_ACTIVE: > case MIGRATION_STATUS_SETUP: > return true; > > @@ -509,6 +510,39 @@ MigrationInfo *qmp_query_migrate(Error **errp) > > get_xbzrle_cache_stats(info); > break; > + case MIGRATION_STATUS_POSTCOPY_ACTIVE: > + /* Mostly the same as active; TODO add some postcopy stats */ > + info->has_status = true; > + info->has_total_time = true; > + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > + - s->total_time; > + info->has_expected_downtime = true; > + info->expected_downtime = s->expected_downtime; > + info->has_setup_time = true; > + info->setup_time = s->setup_time; > + > + info->has_ram = true; > + info->ram = g_malloc0(sizeof(*info->ram)); > + info->ram->transferred = ram_bytes_transferred(); > + info->ram->remaining = ram_bytes_remaining(); > + info->ram->total = ram_bytes_total(); > + info->ram->duplicate = dup_mig_pages_transferred(); > + info->ram->skipped = skipped_mig_pages_transferred(); > + info->ram->normal = norm_mig_pages_transferred(); > + info->ram->normal_bytes = norm_mig_bytes_transferred(); > + info->ram->dirty_pages_rate = s->dirty_pages_rate; > + info->ram->mbps = s->mbps; > + > + if (blk_mig_active()) { > + info->has_disk = true; > + info->disk = g_malloc0(sizeof(*info->disk)); > + info->disk->transferred = blk_mig_bytes_transferred(); > + info->disk->remaining = blk_mig_bytes_remaining(); > + info->disk->total = blk_mig_bytes_total(); > + } Are we sure that disk migration works with postcopy? I would expect no ...
* Juan Quintela (quintela@redhat.com) wrote: > "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > > 'MIGRATION_STATUS_POSTCOPY_ACTIVE' is entered after migrate_start_postcopy > > > > 'migration_in_postcopy' is provided for other sections to know if > > they're in postcopy. > > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > Reviewed-by: Eric Blake <eblake@redhat.com> > > Reviewed-by: Juan Quintela <quintela@redhat.com> > > Reviewed-by: Amit Shah <amit.shah@redhat.com> > > Just wondering > > > > diff --git a/migration/migration.c b/migration/migration.c > > index 5ee2c11..2ae5909 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -439,6 +439,7 @@ static bool migration_is_active(MigrationState *ms) > > { > > switch (ms->state) { > > case MIGRATION_STATUS_ACTIVE: > > + case MIGRATION_STATUS_POSTCOPY_ACTIVE: > > case MIGRATION_STATUS_SETUP: > > return true; > > > > @@ -509,6 +510,39 @@ MigrationInfo *qmp_query_migrate(Error **errp) > > > > get_xbzrle_cache_stats(info); > > break; > > + case MIGRATION_STATUS_POSTCOPY_ACTIVE: > > + /* Mostly the same as active; TODO add some postcopy stats */ > > + info->has_status = true; > > + info->has_total_time = true; > > + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > > + - s->total_time; > > + info->has_expected_downtime = true; > > + info->expected_downtime = s->expected_downtime; > > + info->has_setup_time = true; > > + info->setup_time = s->setup_time; > > + > > + info->has_ram = true; > > + info->ram = g_malloc0(sizeof(*info->ram)); > > + info->ram->transferred = ram_bytes_transferred(); > > + info->ram->remaining = ram_bytes_remaining(); > > + info->ram->total = ram_bytes_total(); > > + info->ram->duplicate = dup_mig_pages_transferred(); > > + info->ram->skipped = skipped_mig_pages_transferred(); > > + info->ram->normal = norm_mig_pages_transferred(); > > + info->ram->normal_bytes = norm_mig_bytes_transferred(); > > + info->ram->dirty_pages_rate = s->dirty_pages_rate; > > + info->ram->mbps = s->mbps; > > + > > + if (blk_mig_active()) { > > + info->has_disk = true; > > + info->disk = g_malloc0(sizeof(*info->disk)); > > + info->disk->transferred = blk_mig_bytes_transferred(); > > + info->disk->remaining = blk_mig_bytes_remaining(); > > + info->disk->total = blk_mig_bytes_total(); > > + } > > Are we sure that disk migration works with postcopy? I would expect no ... Well, the theory goes that it should; although probably not a good idea. What should happen is that since the block migration will reply with a non-postcopiable pending size > 0, it will stay in precopy mode until the block migration is done; once that happens (and the block code returns a pending_size < bandwidth*downtime) it should flip into postcopy. It's a bad idea because: a) While still in precopy mode the RAM migration will still be trying to transfer in precopy mode as well, which is wasting bandwidth fighting with the block IO (we could probably quiesce that once postcopy is turned on). b) I'm a bit worried about how much the block migration might try and use in RAM when it does it's save_complete into the postcopy tempoary buffer. It should be possible to implement a postcopy for block as well that would run concurrently with the psotcopying of RAM; you'd have to make sure you scheduled requests that came back to the source so that slow disc reads didn't block RAM requests. (Having said that, my test I just did of it didn't work; I'll try and have a look at that). Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/include/migration/migration.h b/include/migration/migration.h index 2176666..219032d 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -170,6 +170,8 @@ MigrationState *migrate_init(const MigrationParams *params); bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); +/* True if outgoing migration has entered postcopy phase */ +bool migration_in_postcopy(MigrationState *); MigrationState *migrate_get_current(void); void migrate_compress_threads_create(void); diff --git a/migration/migration.c b/migration/migration.c index 5ee2c11..2ae5909 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -439,6 +439,7 @@ static bool migration_is_active(MigrationState *ms) { switch (ms->state) { case MIGRATION_STATUS_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_SETUP: return true; @@ -509,6 +510,39 @@ MigrationInfo *qmp_query_migrate(Error **errp) get_xbzrle_cache_stats(info); break; + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + /* Mostly the same as active; TODO add some postcopy stats */ + info->has_status = true; + info->has_total_time = true; + info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + - s->total_time; + info->has_expected_downtime = true; + info->expected_downtime = s->expected_downtime; + info->has_setup_time = true; + info->setup_time = s->setup_time; + + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_bytes_transferred(); + info->ram->remaining = ram_bytes_remaining(); + info->ram->total = ram_bytes_total(); + info->ram->duplicate = dup_mig_pages_transferred(); + info->ram->skipped = skipped_mig_pages_transferred(); + info->ram->normal = norm_mig_pages_transferred(); + info->ram->normal_bytes = norm_mig_bytes_transferred(); + info->ram->dirty_pages_rate = s->dirty_pages_rate; + info->ram->mbps = s->mbps; + + if (blk_mig_active()) { + info->has_disk = true; + info->disk = g_malloc0(sizeof(*info->disk)); + info->disk->transferred = blk_mig_bytes_transferred(); + info->disk->remaining = blk_mig_bytes_remaining(); + info->disk->total = blk_mig_bytes_total(); + } + + get_xbzrle_cache_stats(info); + break; case MIGRATION_STATUS_COMPLETED: get_xbzrle_cache_stats(info); @@ -550,8 +584,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationState *s = migrate_get_current(); MigrationCapabilityStatusList *cap; - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_SETUP) { + if (migration_is_active(s)) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } @@ -666,7 +699,8 @@ static void migrate_fd_cleanup(void *opaque) s->file = NULL; } - assert(s->state != MIGRATION_STATUS_ACTIVE); + assert((s->state != MIGRATION_STATUS_ACTIVE) && + (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); if (s->state != MIGRATION_STATUS_COMPLETED) { qemu_savevm_state_cancel(); @@ -700,8 +734,7 @@ static void migrate_fd_cancel(MigrationState *s) do { old_state = s->state; - if (old_state != MIGRATION_STATUS_SETUP && - old_state != MIGRATION_STATUS_ACTIVE) { + if (!migration_is_active(s)) { break; } migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING); @@ -745,6 +778,11 @@ bool migration_has_failed(MigrationState *s) s->state == MIGRATION_STATUS_FAILED); } +bool migration_in_postcopy(MigrationState *s) +{ + return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); +} + MigrationState *migrate_init(const MigrationParams *params) { MigrationState *s = migrate_get_current(); @@ -825,8 +863,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, params.blk = has_blk && blk; params.shared = has_inc && inc; - if (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_SETUP || + if (migration_is_active(s) || s->state == MIGRATION_STATUS_CANCELLING) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; @@ -1203,7 +1240,10 @@ static void *migration_thread(void *opaque) s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); - while (s->state == MIGRATION_STATUS_ACTIVE) { + trace_migration_thread_setup_complete(); + + while (s->state == MIGRATION_STATUS_ACTIVE || + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { int64_t current_time; uint64_t pending_size; diff --git a/qapi-schema.json b/qapi-schema.json index 48644f5..f814174 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -430,6 +430,8 @@ # # @active: in the process of doing migration. # +# @postcopy-active: like active, but now in postcopy mode. (since 2.5) +# # @completed: migration is finished. # # @failed: some error occurred during migration process. @@ -439,7 +441,7 @@ ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', - 'active', 'completed', 'failed' ] } + 'active', 'postcopy-active', 'completed', 'failed' ] } ## # @MigrationInfo diff --git a/trace-events b/trace-events index 4fff040..e68e69d 100644 --- a/trace-events +++ b/trace-events @@ -1440,6 +1440,7 @@ migrate_fd_error(void) "" migrate_fd_cancel(void) "" migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")" migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d" +migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" source_return_path_thread_bad_end(void) ""