Message ID | 20210722175841.938739-6-peterx@redhat.com |
---|---|
State | New |
Headers | show |
Series | migrations: Fix potential rare race of migration-test after yank | expand |
* Peter Xu (peterx@redhat.com) wrote: > It's efficient, but hackish to call yank unregister calls in channel_close(), > especially it'll be hard to debug when qemu crashed with some yank function > leaked. > > Remove that hack, but instead explicitly unregister yank functions at the > places where needed, they are: > > (on src) > - migrate_fd_cleanup > - postcopy_pause > > (on dst) > - migration_incoming_state_destroy > - postcopy_pause_incoming > > Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/migration.c | 14 +++++++++++++- > migration/qemu-file-channel.c | 3 --- > migration/savevm.c | 7 +++++++ > migration/yank_functions.c | 14 ++++++++++++++ > migration/yank_functions.h | 1 + > 5 files changed, 35 insertions(+), 4 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index b2c48b7e17..a50330016c 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -59,6 +59,7 @@ > #include "multifd.h" > #include "qemu/yank.h" > #include "sysemu/cpus.h" > +#include "yank_functions.h" > > #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ > > @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void) > } > > if (mis->from_src_file) { > + migration_ioc_unregister_yank_from_file(mis->from_src_file); > qemu_fclose(mis->from_src_file); > mis->from_src_file = NULL; > } > @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s) > * Close the file handle without the lock to make sure the > * critical section won't block for long. > */ > + migration_ioc_unregister_yank_from_file(tmp); > qemu_fclose(tmp); > } > > @@ -3351,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s) > while (true) { > QEMUFile *file; > > - /* Current channel is possibly broken. Release it. */ > + /* > + * Current channel is possibly broken. Release it. Note that this is > + * guaranteed even without lock because to_dst_file should only be > + * modified by the migration thread. That also guarantees that the > + * unregister of yank is safe too without the lock. It should be safe > + * even to be within the qemu_file_lock, but we didn't do that to avoid > + * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make > + * the qemu_file_lock critical section as small as possible. > + */ > assert(s->to_dst_file); > + migration_ioc_unregister_yank_from_file(s->to_dst_file); > qemu_mutex_lock(&s->qemu_file_lock); > file = s->to_dst_file; > s->to_dst_file = NULL; > diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c > index 2f8b1fcd46..bb5a5752df 100644 > --- a/migration/qemu-file-channel.c > +++ b/migration/qemu-file-channel.c > @@ -107,9 +107,6 @@ static int channel_close(void *opaque, Error **errp) > int ret; > QIOChannel *ioc = QIO_CHANNEL(opaque); > ret = qio_channel_close(ioc, errp); > - if (OBJECT(ioc)->ref == 1) { > - migration_ioc_unregister_yank(ioc); > - } > object_unref(OBJECT(ioc)); > return ret; > } > diff --git a/migration/savevm.c b/migration/savevm.c > index 96b5e5d639..7b7b64bd13 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -65,6 +65,7 @@ > #include "qemu/bitmap.h" > #include "net/announce.h" > #include "qemu/yank.h" > +#include "yank_functions.h" > > const unsigned int postcopy_ram_discard_version; > > @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) > /* Clear the triggered bit to allow one recovery */ > mis->postcopy_recover_triggered = false; > > + /* > + * Unregister yank with either from/to src would work, since ioc behind it > + * is the same > + */ > + migration_ioc_unregister_yank_from_file(mis->from_src_file); > + > assert(mis->from_src_file); > qemu_file_shutdown(mis->from_src_file); > qemu_fclose(mis->from_src_file); > diff --git a/migration/yank_functions.c b/migration/yank_functions.c > index 23697173ae..8c08aef14a 100644 > --- a/migration/yank_functions.c > +++ b/migration/yank_functions.c > @@ -14,6 +14,7 @@ > #include "qemu/yank.h" > #include "io/channel-socket.h" > #include "io/channel-tls.h" > +#include "qemu-file.h" > > void migration_yank_iochannel(void *opaque) > { > @@ -46,3 +47,16 @@ void migration_ioc_unregister_yank(QIOChannel *ioc) > QIO_CHANNEL(ioc)); > } > } > + > +void migration_ioc_unregister_yank_from_file(QEMUFile *file) > +{ > + QIOChannel *ioc = qemu_file_get_ioc(file); > + > + if (ioc) { > + /* > + * For migration qemufiles, we'll always reach here. Though we'll skip > + * calls from e.g. savevm/loadvm as they don't use yank. > + */ > + migration_ioc_unregister_yank(ioc); > + } > +} > diff --git a/migration/yank_functions.h b/migration/yank_functions.h > index 74c7f18c91..a7577955ed 100644 > --- a/migration/yank_functions.h > +++ b/migration/yank_functions.h > @@ -17,3 +17,4 @@ > void migration_yank_iochannel(void *opaque); > void migration_ioc_register_yank(QIOChannel *ioc); > void migration_ioc_unregister_yank(QIOChannel *ioc); > +void migration_ioc_unregister_yank_from_file(QEMUFile *file); > -- > 2.31.1 >
diff --git a/migration/migration.c b/migration/migration.c index b2c48b7e17..a50330016c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -59,6 +59,7 @@ #include "multifd.h" #include "qemu/yank.h" #include "sysemu/cpus.h" +#include "yank_functions.h" #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void) } if (mis->from_src_file) { + migration_ioc_unregister_yank_from_file(mis->from_src_file); qemu_fclose(mis->from_src_file); mis->from_src_file = NULL; } @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s) * Close the file handle without the lock to make sure the * critical section won't block for long. */ + migration_ioc_unregister_yank_from_file(tmp); qemu_fclose(tmp); } @@ -3351,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; - /* Current channel is possibly broken. Release it. */ + /* + * Current channel is possibly broken. Release it. Note that this is + * guaranteed even without lock because to_dst_file should only be + * modified by the migration thread. That also guarantees that the + * unregister of yank is safe too without the lock. It should be safe + * even to be within the qemu_file_lock, but we didn't do that to avoid + * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make + * the qemu_file_lock critical section as small as possible. + */ assert(s->to_dst_file); + migration_ioc_unregister_yank_from_file(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); file = s->to_dst_file; s->to_dst_file = NULL; diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index 2f8b1fcd46..bb5a5752df 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -107,9 +107,6 @@ static int channel_close(void *opaque, Error **errp) int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); ret = qio_channel_close(ioc, errp); - if (OBJECT(ioc)->ref == 1) { - migration_ioc_unregister_yank(ioc); - } object_unref(OBJECT(ioc)); return ret; } diff --git a/migration/savevm.c b/migration/savevm.c index 96b5e5d639..7b7b64bd13 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -65,6 +65,7 @@ #include "qemu/bitmap.h" #include "net/announce.h" #include "qemu/yank.h" +#include "yank_functions.h" const unsigned int postcopy_ram_discard_version; @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) /* Clear the triggered bit to allow one recovery */ mis->postcopy_recover_triggered = false; + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same + */ + migration_ioc_unregister_yank_from_file(mis->from_src_file); + assert(mis->from_src_file); qemu_file_shutdown(mis->from_src_file); qemu_fclose(mis->from_src_file); diff --git a/migration/yank_functions.c b/migration/yank_functions.c index 23697173ae..8c08aef14a 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -14,6 +14,7 @@ #include "qemu/yank.h" #include "io/channel-socket.h" #include "io/channel-tls.h" +#include "qemu-file.h" void migration_yank_iochannel(void *opaque) { @@ -46,3 +47,16 @@ void migration_ioc_unregister_yank(QIOChannel *ioc) QIO_CHANNEL(ioc)); } } + +void migration_ioc_unregister_yank_from_file(QEMUFile *file) +{ + QIOChannel *ioc = qemu_file_get_ioc(file); + + if (ioc) { + /* + * For migration qemufiles, we'll always reach here. Though we'll skip + * calls from e.g. savevm/loadvm as they don't use yank. + */ + migration_ioc_unregister_yank(ioc); + } +} diff --git a/migration/yank_functions.h b/migration/yank_functions.h index 74c7f18c91..a7577955ed 100644 --- a/migration/yank_functions.h +++ b/migration/yank_functions.h @@ -17,3 +17,4 @@ void migration_yank_iochannel(void *opaque); void migration_ioc_register_yank(QIOChannel *ioc); void migration_ioc_unregister_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank_from_file(QEMUFile *file);
It's efficient, but hackish to call yank unregister calls in channel_close(), especially it'll be hard to debug when qemu crashed with some yank function leaked. Remove that hack, but instead explicitly unregister yank functions at the places where needed, they are: (on src) - migrate_fd_cleanup - postcopy_pause (on dst) - migration_incoming_state_destroy - postcopy_pause_incoming Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/migration.c | 14 +++++++++++++- migration/qemu-file-channel.c | 3 --- migration/savevm.c | 7 +++++++ migration/yank_functions.c | 14 ++++++++++++++ migration/yank_functions.h | 1 + 5 files changed, 35 insertions(+), 4 deletions(-)