diff mbox series

[v3,5/5] migration: Move the yank unregister of channel_close out

Message ID 20210722175841.938739-6-peterx@redhat.com
State New
Headers show
Series migrations: Fix potential rare race of migration-test after yank | expand

Commit Message

Peter Xu July 22, 2021, 5:58 p.m. UTC
It's efficient, but hackish to call yank unregister calls in channel_close(),
especially it'll be hard to debug when qemu crashed with some yank function
leaked.

Remove that hack, but instead explicitly unregister yank functions at the
places where needed, they are:

  (on src)
  - migrate_fd_cleanup
  - postcopy_pause

  (on dst)
  - migration_incoming_state_destroy
  - postcopy_pause_incoming

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.c         | 14 +++++++++++++-
 migration/qemu-file-channel.c |  3 ---
 migration/savevm.c            |  7 +++++++
 migration/yank_functions.c    | 14 ++++++++++++++
 migration/yank_functions.h    |  1 +
 5 files changed, 35 insertions(+), 4 deletions(-)

Comments

Dr. David Alan Gilbert July 22, 2021, 6:41 p.m. UTC | #1
* Peter Xu (peterx@redhat.com) wrote:
> It's efficient, but hackish to call yank unregister calls in channel_close(),
> especially it'll be hard to debug when qemu crashed with some yank function
> leaked.
> 
> Remove that hack, but instead explicitly unregister yank functions at the
> places where needed, they are:
> 
>   (on src)
>   - migrate_fd_cleanup
>   - postcopy_pause
> 
>   (on dst)
>   - migration_incoming_state_destroy
>   - postcopy_pause_incoming
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
>  migration/migration.c         | 14 +++++++++++++-
>  migration/qemu-file-channel.c |  3 ---
>  migration/savevm.c            |  7 +++++++
>  migration/yank_functions.c    | 14 ++++++++++++++
>  migration/yank_functions.h    |  1 +
>  5 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index b2c48b7e17..a50330016c 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -59,6 +59,7 @@
>  #include "multifd.h"
>  #include "qemu/yank.h"
>  #include "sysemu/cpus.h"
> +#include "yank_functions.h"
>  
>  #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
>  
> @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void)
>      }
>  
>      if (mis->from_src_file) {
> +        migration_ioc_unregister_yank_from_file(mis->from_src_file);
>          qemu_fclose(mis->from_src_file);
>          mis->from_src_file = NULL;
>      }
> @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s)
>           * Close the file handle without the lock to make sure the
>           * critical section won't block for long.
>           */
> +        migration_ioc_unregister_yank_from_file(tmp);
>          qemu_fclose(tmp);
>      }
>  
> @@ -3351,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s)
>      while (true) {
>          QEMUFile *file;
>  
> -        /* Current channel is possibly broken. Release it. */
> +        /*
> +         * Current channel is possibly broken. Release it.  Note that this is
> +         * guaranteed even without lock because to_dst_file should only be
> +         * modified by the migration thread.  That also guarantees that the
> +         * unregister of yank is safe too without the lock.  It should be safe
> +         * even to be within the qemu_file_lock, but we didn't do that to avoid
> +         * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
> +         * the qemu_file_lock critical section as small as possible.
> +         */
>          assert(s->to_dst_file);
> +        migration_ioc_unregister_yank_from_file(s->to_dst_file);
>          qemu_mutex_lock(&s->qemu_file_lock);
>          file = s->to_dst_file;
>          s->to_dst_file = NULL;
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 2f8b1fcd46..bb5a5752df 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -107,9 +107,6 @@ static int channel_close(void *opaque, Error **errp)
>      int ret;
>      QIOChannel *ioc = QIO_CHANNEL(opaque);
>      ret = qio_channel_close(ioc, errp);
> -    if (OBJECT(ioc)->ref == 1) {
> -        migration_ioc_unregister_yank(ioc);
> -    }
>      object_unref(OBJECT(ioc));
>      return ret;
>  }
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 96b5e5d639..7b7b64bd13 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -65,6 +65,7 @@
>  #include "qemu/bitmap.h"
>  #include "net/announce.h"
>  #include "qemu/yank.h"
> +#include "yank_functions.h"
>  
>  const unsigned int postcopy_ram_discard_version;
>  
> @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
>      /* Clear the triggered bit to allow one recovery */
>      mis->postcopy_recover_triggered = false;
>  
> +    /*
> +     * Unregister yank with either from/to src would work, since ioc behind it
> +     * is the same
> +     */
> +    migration_ioc_unregister_yank_from_file(mis->from_src_file);
> +
>      assert(mis->from_src_file);
>      qemu_file_shutdown(mis->from_src_file);
>      qemu_fclose(mis->from_src_file);
> diff --git a/migration/yank_functions.c b/migration/yank_functions.c
> index 23697173ae..8c08aef14a 100644
> --- a/migration/yank_functions.c
> +++ b/migration/yank_functions.c
> @@ -14,6 +14,7 @@
>  #include "qemu/yank.h"
>  #include "io/channel-socket.h"
>  #include "io/channel-tls.h"
> +#include "qemu-file.h"
>  
>  void migration_yank_iochannel(void *opaque)
>  {
> @@ -46,3 +47,16 @@ void migration_ioc_unregister_yank(QIOChannel *ioc)
>                                   QIO_CHANNEL(ioc));
>      }
>  }
> +
> +void migration_ioc_unregister_yank_from_file(QEMUFile *file)
> +{
> +    QIOChannel *ioc = qemu_file_get_ioc(file);
> +
> +    if (ioc) {
> +        /*
> +         * For migration qemufiles, we'll always reach here.  Though we'll skip
> +         * calls from e.g. savevm/loadvm as they don't use yank.
> +         */
> +        migration_ioc_unregister_yank(ioc);
> +    }
> +}
> diff --git a/migration/yank_functions.h b/migration/yank_functions.h
> index 74c7f18c91..a7577955ed 100644
> --- a/migration/yank_functions.h
> +++ b/migration/yank_functions.h
> @@ -17,3 +17,4 @@
>  void migration_yank_iochannel(void *opaque);
>  void migration_ioc_register_yank(QIOChannel *ioc);
>  void migration_ioc_unregister_yank(QIOChannel *ioc);
> +void migration_ioc_unregister_yank_from_file(QEMUFile *file);
> -- 
> 2.31.1
>
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index b2c48b7e17..a50330016c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -59,6 +59,7 @@ 
 #include "multifd.h"
 #include "qemu/yank.h"
 #include "sysemu/cpus.h"
+#include "yank_functions.h"
 
 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
 
@@ -273,6 +274,7 @@  void migration_incoming_state_destroy(void)
     }
 
     if (mis->from_src_file) {
+        migration_ioc_unregister_yank_from_file(mis->from_src_file);
         qemu_fclose(mis->from_src_file);
         mis->from_src_file = NULL;
     }
@@ -1811,6 +1813,7 @@  static void migrate_fd_cleanup(MigrationState *s)
          * Close the file handle without the lock to make sure the
          * critical section won't block for long.
          */
+        migration_ioc_unregister_yank_from_file(tmp);
         qemu_fclose(tmp);
     }
 
@@ -3351,8 +3354,17 @@  static MigThrError postcopy_pause(MigrationState *s)
     while (true) {
         QEMUFile *file;
 
-        /* Current channel is possibly broken. Release it. */
+        /*
+         * Current channel is possibly broken. Release it.  Note that this is
+         * guaranteed even without lock because to_dst_file should only be
+         * modified by the migration thread.  That also guarantees that the
+         * unregister of yank is safe too without the lock.  It should be safe
+         * even to be within the qemu_file_lock, but we didn't do that to avoid
+         * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
+         * the qemu_file_lock critical section as small as possible.
+         */
         assert(s->to_dst_file);
+        migration_ioc_unregister_yank_from_file(s->to_dst_file);
         qemu_mutex_lock(&s->qemu_file_lock);
         file = s->to_dst_file;
         s->to_dst_file = NULL;
diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
index 2f8b1fcd46..bb5a5752df 100644
--- a/migration/qemu-file-channel.c
+++ b/migration/qemu-file-channel.c
@@ -107,9 +107,6 @@  static int channel_close(void *opaque, Error **errp)
     int ret;
     QIOChannel *ioc = QIO_CHANNEL(opaque);
     ret = qio_channel_close(ioc, errp);
-    if (OBJECT(ioc)->ref == 1) {
-        migration_ioc_unregister_yank(ioc);
-    }
     object_unref(OBJECT(ioc));
     return ret;
 }
diff --git a/migration/savevm.c b/migration/savevm.c
index 96b5e5d639..7b7b64bd13 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -65,6 +65,7 @@ 
 #include "qemu/bitmap.h"
 #include "net/announce.h"
 #include "qemu/yank.h"
+#include "yank_functions.h"
 
 const unsigned int postcopy_ram_discard_version;
 
@@ -2568,6 +2569,12 @@  static bool postcopy_pause_incoming(MigrationIncomingState *mis)
     /* Clear the triggered bit to allow one recovery */
     mis->postcopy_recover_triggered = false;
 
+    /*
+     * Unregister yank with either from/to src would work, since ioc behind it
+     * is the same
+     */
+    migration_ioc_unregister_yank_from_file(mis->from_src_file);
+
     assert(mis->from_src_file);
     qemu_file_shutdown(mis->from_src_file);
     qemu_fclose(mis->from_src_file);
diff --git a/migration/yank_functions.c b/migration/yank_functions.c
index 23697173ae..8c08aef14a 100644
--- a/migration/yank_functions.c
+++ b/migration/yank_functions.c
@@ -14,6 +14,7 @@ 
 #include "qemu/yank.h"
 #include "io/channel-socket.h"
 #include "io/channel-tls.h"
+#include "qemu-file.h"
 
 void migration_yank_iochannel(void *opaque)
 {
@@ -46,3 +47,16 @@  void migration_ioc_unregister_yank(QIOChannel *ioc)
                                  QIO_CHANNEL(ioc));
     }
 }
+
+void migration_ioc_unregister_yank_from_file(QEMUFile *file)
+{
+    QIOChannel *ioc = qemu_file_get_ioc(file);
+
+    if (ioc) {
+        /*
+         * For migration qemufiles, we'll always reach here.  Though we'll skip
+         * calls from e.g. savevm/loadvm as they don't use yank.
+         */
+        migration_ioc_unregister_yank(ioc);
+    }
+}
diff --git a/migration/yank_functions.h b/migration/yank_functions.h
index 74c7f18c91..a7577955ed 100644
--- a/migration/yank_functions.h
+++ b/migration/yank_functions.h
@@ -17,3 +17,4 @@ 
 void migration_yank_iochannel(void *opaque);
 void migration_ioc_register_yank(QIOChannel *ioc);
 void migration_ioc_unregister_yank(QIOChannel *ioc);
+void migration_ioc_unregister_yank_from_file(QEMUFile *file);