Message ID | 5727cd0876e1a8d3db451feb46afb3281875cca3.1669047366.git.huangy81@chinatelecom.cn |
---|---|
State | New |
Headers | show |
Series | migration: introduce dirtylimit capability | expand |
On Mon, Nov 21, 2022 at 11:26:39AM -0500, huangy81@chinatelecom.cn wrote: > diff --git a/migration/migration.c b/migration/migration.c > index 86950a1..096b61a 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -240,6 +240,7 @@ void migration_cancel(const Error *error) > if (error) { > migrate_set_error(current_migration, error); > } > + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); Disable it only if migrate_dirty_limit() is true? It seems okay if the admin wants to use dirtylimit separately from migration. > migrate_fd_cancel(current_migration); > } [...] > @@ -1148,22 +1175,31 @@ static void migration_trigger_throttle(RAMState *rs) > uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; > uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; > > - /* During block migration the auto-converge logic incorrectly detects > - * that ram migration makes no progress. Avoid this by disabling the > - * throttling logic during the bulk phase of block migration. */ > - if (migrate_auto_converge() && !blk_mig_bulk_active()) { > - /* The following detection logic can be refined later. For now: > - Check to see if the ratio between dirtied bytes and the approx. > - amount of bytes that just got transferred since the last time > - we were in this routine reaches the threshold. If that happens > - twice, start or increase throttling. */ > - > - if ((bytes_dirty_period > bytes_dirty_threshold) && > - (++rs->dirty_rate_high_cnt >= 2)) { > + /* > + * The following detection logic can be refined later. For now: > + * Check to see if the ratio between dirtied bytes and the approx. > + * amount of bytes that just got transferred since the last time > + * we were in this routine reaches the threshold. If that happens > + * twice, start or increase throttling. > + */ > + > + if ((bytes_dirty_period > bytes_dirty_threshold) && > + (++rs->dirty_rate_high_cnt >= 2)) { > + rs->dirty_rate_high_cnt = 0; > + /* > + * During block migration the auto-converge logic incorrectly detects > + * that ram migration makes no progress. Avoid this by disabling the > + * throttling logic during the bulk phase of block migration > + */ > + > + if (migrate_auto_converge() && !blk_mig_bulk_active()) { Does dirtylimit cap needs to check blk_mig_bulk_active() too? I assume that check was used to ignore the bulk block migration phase where major bandwidth will be consumed by block migrations so the measured bandwidth is not accurate. IIUC it applies to dirtylimit too. > trace_migration_throttle(); > - rs->dirty_rate_high_cnt = 0; > mig_throttle_guest_down(bytes_dirty_period, > bytes_dirty_threshold); > + } else if (migrate_dirty_limit() && > + kvm_dirty_ring_enabled() && > + migration_is_active(s)) { Is "kvm_dirty_ring_enabled()" and "migration_is_active(s)" check helpful? Can we only rely on migrate_dirty_limit() alone?
在 2022/11/30 7:17, Peter Xu 写道: > On Mon, Nov 21, 2022 at 11:26:39AM -0500, huangy81@chinatelecom.cn wrote: >> diff --git a/migration/migration.c b/migration/migration.c >> index 86950a1..096b61a 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -240,6 +240,7 @@ void migration_cancel(const Error *error) >> if (error) { >> migrate_set_error(current_migration, error); >> } >> + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); > > Disable it only if migrate_dirty_limit() is true? It seems okay if the > admin wants to use dirtylimit separately from migration. Ok. > >> migrate_fd_cancel(current_migration); >> } > > [...] > >> @@ -1148,22 +1175,31 @@ static void migration_trigger_throttle(RAMState *rs) >> uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; >> uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; >> >> - /* During block migration the auto-converge logic incorrectly detects >> - * that ram migration makes no progress. Avoid this by disabling the >> - * throttling logic during the bulk phase of block migration. */ >> - if (migrate_auto_converge() && !blk_mig_bulk_active()) { >> - /* The following detection logic can be refined later. For now: >> - Check to see if the ratio between dirtied bytes and the approx. >> - amount of bytes that just got transferred since the last time >> - we were in this routine reaches the threshold. If that happens >> - twice, start or increase throttling. */ >> - >> - if ((bytes_dirty_period > bytes_dirty_threshold) && >> - (++rs->dirty_rate_high_cnt >= 2)) { >> + /* >> + * The following detection logic can be refined later. For now: >> + * Check to see if the ratio between dirtied bytes and the approx. >> + * amount of bytes that just got transferred since the last time >> + * we were in this routine reaches the threshold. If that happens >> + * twice, start or increase throttling. >> + */ >> + >> + if ((bytes_dirty_period > bytes_dirty_threshold) && >> + (++rs->dirty_rate_high_cnt >= 2)) { >> + rs->dirty_rate_high_cnt = 0; >> + /* >> + * During block migration the auto-converge logic incorrectly detects >> + * that ram migration makes no progress. Avoid this by disabling the >> + * throttling logic during the bulk phase of block migration >> + */ >> + >> + if (migrate_auto_converge() && !blk_mig_bulk_active()) { > > Does dirtylimit cap needs to check blk_mig_bulk_active() too? I assume > that check was used to ignore the bulk block migration phase where major > bandwidth will be consumed by block migrations so the measured bandwidth is > not accurate. IIUC it applies to dirtylimit too.Indeed, i'll add this next version. > >> trace_migration_throttle(); >> - rs->dirty_rate_high_cnt = 0; >> mig_throttle_guest_down(bytes_dirty_period, >> bytes_dirty_threshold); >> + } else if (migrate_dirty_limit() && >> + kvm_dirty_ring_enabled() && >> + migration_is_active(s)) { > > Is "kvm_dirty_ring_enabled()" and "migration_is_active(s)" check helpful? > Can we only rely on migrate_dirty_limit() alone? In qmp_set_vcpu_dirty_limit, it checks if kvm enabled and dirty ring size set. When "dirty-limit" capability set, we also check this in migrate_caps_check, so kvm_dirty_ring_enabled can be dropped indeed. As for migration_is_active, dirty-limit can be set anytime and migration is active already in the path. It also can be dropped. I'll fix this next version. >
diff --git a/migration/migration.c b/migration/migration.c index 86950a1..096b61a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -240,6 +240,7 @@ void migration_cancel(const Error *error) if (error) { migrate_set_error(current_migration, error); } + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); migrate_fd_cancel(current_migration); } diff --git a/migration/ram.c b/migration/ram.c index dc1de9d..94516b7 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -45,6 +45,7 @@ #include "qapi/error.h" #include "qapi/qapi-types-migration.h" #include "qapi/qapi-events-migration.h" +#include "qapi/qapi-commands-migration.h" #include "qapi/qmp/qerror.h" #include "trace.h" #include "exec/ram_addr.h" @@ -57,6 +58,8 @@ #include "qemu/iov.h" #include "multifd.h" #include "sysemu/runstate.h" +#include "sysemu/dirtylimit.h" +#include "sysemu/kvm.h" #include "hw/boards.h" /* for machine_dump_guest_core() */ @@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) } } +/* + * Enable dirty-limit to throttle down the guest + */ +static void migration_dirty_limit_guest(void) +{ + static int64_t quota_dirtyrate; + MigrationState *s = migrate_get_current(); + + /* + * If dirty limit already enabled and migration parameter + * vcpu-dirty-limit untouched. + */ + if (dirtylimit_in_service() && + quota_dirtyrate == s->parameters.vcpu_dirty_limit) { + return; + } + + quota_dirtyrate = s->parameters.vcpu_dirty_limit; + + /* Set or update quota dirty limit */ + qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL); + trace_migration_dirty_limit_guest(quota_dirtyrate); +} + static void migration_trigger_throttle(RAMState *rs) { MigrationState *s = migrate_get_current(); @@ -1148,22 +1175,31 @@ static void migration_trigger_throttle(RAMState *rs) uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; - /* During block migration the auto-converge logic incorrectly detects - * that ram migration makes no progress. Avoid this by disabling the - * throttling logic during the bulk phase of block migration. */ - if (migrate_auto_converge() && !blk_mig_bulk_active()) { - /* The following detection logic can be refined later. For now: - Check to see if the ratio between dirtied bytes and the approx. - amount of bytes that just got transferred since the last time - we were in this routine reaches the threshold. If that happens - twice, start or increase throttling. */ - - if ((bytes_dirty_period > bytes_dirty_threshold) && - (++rs->dirty_rate_high_cnt >= 2)) { + /* + * The following detection logic can be refined later. For now: + * Check to see if the ratio between dirtied bytes and the approx. + * amount of bytes that just got transferred since the last time + * we were in this routine reaches the threshold. If that happens + * twice, start or increase throttling. + */ + + if ((bytes_dirty_period > bytes_dirty_threshold) && + (++rs->dirty_rate_high_cnt >= 2)) { + rs->dirty_rate_high_cnt = 0; + /* + * During block migration the auto-converge logic incorrectly detects + * that ram migration makes no progress. Avoid this by disabling the + * throttling logic during the bulk phase of block migration + */ + + if (migrate_auto_converge() && !blk_mig_bulk_active()) { trace_migration_throttle(); - rs->dirty_rate_high_cnt = 0; mig_throttle_guest_down(bytes_dirty_period, bytes_dirty_threshold); + } else if (migrate_dirty_limit() && + kvm_dirty_ring_enabled() && + migration_is_active(s)) { + migration_dirty_limit_guest(); } } } diff --git a/migration/trace-events b/migration/trace-events index 57003ed..33a2666 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -91,6 +91,7 @@ migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" migration_throttle(void) "" +migration_dirty_limit_guest(int64_t dirtyrate) "guest dirty page rate limit %" PRIi64 " MB/s" ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p" ram_load_postcopy_loop(int channel, uint64_t addr, int flags) "chan=%d addr=0x%" PRIx64 " flags=0x%x" diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c index 4537c51..3f3c405 100644 --- a/softmmu/dirtylimit.c +++ b/softmmu/dirtylimit.c @@ -439,6 +439,8 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, int64_t cpu_index, Error **errp) { + MigrationState *ms = migrate_get_current(); + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { return; } @@ -452,6 +454,15 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, return; } + if (migration_is_running(ms->state) && + (!qemu_thread_is_self(&ms->thread)) && + migrate_dirty_limit() && + dirtylimit_in_service()) { + error_setg(errp, "dirty-limit live migration is running, not" + " allowing dirty page limit being canceled manually"); + return; + } + dirtylimit_state_lock(); if (has_cpu_index) { @@ -487,6 +498,8 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index, uint64_t dirty_rate, Error **errp) { + MigrationState *ms = migrate_get_current(); + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { error_setg(errp, "dirty page limit feature requires KVM with" " accelerator property 'dirty-ring-size' set'"); @@ -503,6 +516,15 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index, return; } + if (migration_is_running(ms->state) && + (!qemu_thread_is_self(&ms->thread)) && + migrate_dirty_limit() && + dirtylimit_in_service()) { + error_setg(errp, "dirty-limit live migration is running, not" + " allowing dirty page limit being configured manually"); + return; + } + dirtylimit_state_lock(); if (!dirtylimit_in_service()) {