diff mbox series

[2/3] migration/multifd: Protect accesses to migration_threads

Message ID 20230606144551.24367-3-farosas@suse.de
State New
Headers show
Series migration: Fix multifd cancel test | expand

Commit Message

Fabiano Rosas June 6, 2023, 2:45 p.m. UTC
This doubly linked list is common for all the multifd and migration
threads so we need to avoid concurrent access.

Add a mutex to protect the data from concurrent access. This fixes a
crash when removing two MigrationThread objects from the list at the
same time during cleanup of multifd threads.

To avoid destroying the mutex before the last element has been
removed, move calls to qmp_migration_thread_remove so they run before
multifd_save_cleanup joins the threads.

Fixes: 671326201d ("migration: Introduce interface query-migrationthreads")
Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 migration/migration.c  |  5 ++++-
 migration/multifd.c    |  3 ++-
 migration/threadinfo.c | 19 ++++++++++++++++++-
 migration/threadinfo.h |  5 +++--
 4 files changed, 27 insertions(+), 5 deletions(-)

Comments

Peter Xu June 6, 2023, 6:43 p.m. UTC | #1
On Tue, Jun 06, 2023 at 11:45:50AM -0300, Fabiano Rosas wrote:
> This doubly linked list is common for all the multifd and migration
> threads so we need to avoid concurrent access.
> 
> Add a mutex to protect the data from concurrent access. This fixes a
> crash when removing two MigrationThread objects from the list at the
> same time during cleanup of multifd threads.
> 
> To avoid destroying the mutex before the last element has been
> removed, move calls to qmp_migration_thread_remove so they run before
> multifd_save_cleanup joins the threads.
> 
> Fixes: 671326201d ("migration: Introduce interface query-migrationthreads")
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
>  migration/migration.c  |  5 ++++-
>  migration/multifd.c    |  3 ++-
>  migration/threadinfo.c | 19 ++++++++++++++++++-
>  migration/threadinfo.h |  5 +++--
>  4 files changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index e731fc98a1..b3b8345eb2 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s)
>          qemu_mutex_lock_iothread();
>  
>          multifd_save_cleanup();
> +        qmp_migration_threads_cleanup();
>          qemu_mutex_lock(&s->qemu_file_lock);
>          tmp = s->to_dst_file;
>          s->to_dst_file = NULL;
> @@ -1405,6 +1406,8 @@ void migrate_init(MigrationState *s)
>      s->vm_old_state = -1;
>      s->iteration_initial_bytes = 0;
>      s->threshold_size = 0;
> +
> +    qmp_migration_threads_init();
>  }
>  
>  int migrate_add_blocker_internal(Error *reason, Error **errp)
> @@ -2997,10 +3000,10 @@ static void *migration_thread(void *opaque)
>      }
>  
>      trace_migration_thread_after_loop();
> +    qmp_migration_threads_remove(thread);
>      migration_iteration_finish(s);
>      object_unref(OBJECT(s));
>      rcu_unregister_thread();
> -    qmp_migration_threads_remove(thread);
>      return NULL;
>  }
>  
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 5ec1ac5c64..ee7944560a 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -762,12 +762,13 @@ out:
>          qemu_sem_post(&multifd_send_state->channels_ready);
>      }
>  
> +    qmp_migration_threads_remove(thread);
> +
>      qemu_mutex_lock(&p->mutex);
>      p->running = false;
>      qemu_mutex_unlock(&p->mutex);
>  
>      rcu_unregister_thread();
> -    qmp_migration_threads_remove(thread);
>      trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
>  
>      return NULL;
> diff --git a/migration/threadinfo.c b/migration/threadinfo.c
> index c3e85c33e8..1fe64a02dd 100644
> --- a/migration/threadinfo.c
> +++ b/migration/threadinfo.c
> @@ -10,23 +10,40 @@
>   *  See the COPYING file in the top-level directory.
>   */
>  
> +#include "qemu/osdep.h"
> +#include "qemu/queue.h"
> +#include "qemu/lockable.h"
>  #include "threadinfo.h"
>  
> +QemuMutex migration_threads_lock;
>  static QLIST_HEAD(, MigrationThread) migration_threads;
>  
> +void qmp_migration_threads_init(void)
> +{
> +    qemu_mutex_init(&migration_threads_lock);
> +}
> +
> +void qmp_migration_threads_cleanup(void)
> +{
> +    qemu_mutex_destroy(&migration_threads_lock);
> +}
> +
>  MigrationThread *qmp_migration_threads_add(const char *name, int thread_id)
>  {
>      MigrationThread *thread =  g_new0(MigrationThread, 1);
>      thread->name = name;
>      thread->thread_id = thread_id;
>  
> -    QLIST_INSERT_HEAD(&migration_threads, thread, node);
> +    WITH_QEMU_LOCK_GUARD(&migration_threads_lock) {
> +        QLIST_INSERT_HEAD(&migration_threads, thread, node);
> +    }
>  
>      return thread;
>  }
>  
>  void qmp_migration_threads_remove(MigrationThread *thread)
>  {
> +    QEMU_LOCK_GUARD(&migration_threads_lock);
>      if (thread) {
>          QLIST_REMOVE(thread, node);
>          g_free(thread);

qmp_query_migrationthreads() better also have the lock?

Other than that looks good, thanks!

> diff --git a/migration/threadinfo.h b/migration/threadinfo.h
> index 61b990f5e3..eb7f8e5bb2 100644
> --- a/migration/threadinfo.h
> +++ b/migration/threadinfo.h
> @@ -10,8 +10,6 @@
>   *  See the COPYING file in the top-level directory.
>   */
>  
> -#include "qemu/queue.h"
> -#include "qemu/osdep.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-commands-migration.h"
>  
> @@ -23,5 +21,8 @@ struct MigrationThread {
>      QLIST_ENTRY(MigrationThread) node;
>  };
>  
> +void qmp_migration_threads_init(void);
> +void qmp_migration_threads_cleanup(void);
> +
>  MigrationThread *qmp_migration_threads_add(const char *name, int thread_id);
>  void qmp_migration_threads_remove(MigrationThread *info);
> -- 
> 2.35.3
>
Juan Quintela June 7, 2023, 8:26 a.m. UTC | #2
Fabiano Rosas <farosas@suse.de> wrote:
> This doubly linked list is common for all the multifd and migration
> threads so we need to avoid concurrent access.
>
> Add a mutex to protect the data from concurrent access. This fixes a
> crash when removing two MigrationThread objects from the list at the
> same time during cleanup of multifd threads.
>
> To avoid destroying the mutex before the last element has been
> removed, move calls to qmp_migration_thread_remove so they run before
> multifd_save_cleanup joins the threads.
>
> Fixes: 671326201d ("migration: Introduce interface query-migrationthreads")
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

I agree with Peter here.  Why don't you have to protect the walking?

> ---
>  migration/migration.c  |  5 ++++-
>  migration/multifd.c    |  3 ++-
>  migration/threadinfo.c | 19 ++++++++++++++++++-
>  migration/threadinfo.h |  5 +++--
>  4 files changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index e731fc98a1..b3b8345eb2 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s)
>          qemu_mutex_lock_iothread();
>  
>          multifd_save_cleanup();
> +        qmp_migration_threads_cleanup();

I think I will spare this one as the mutex is static, so we are not
winning any memory back.

>      }
>  
>      trace_migration_thread_after_loop();
> +    qmp_migration_threads_remove(thread);
>      migration_iteration_finish(s);

I can understand moving it here, but why before migration_iteration_finish?

>      object_unref(OBJECT(s));
>      rcu_unregister_thread();
> -    qmp_migration_threads_remove(thread);
>      return NULL;
>  }
> +    qmp_migration_threads_remove(thread);
> +
>      qemu_mutex_lock(&p->mutex);
>      p->running = false;
>      qemu_mutex_unlock(&p->mutex);
>  
>      rcu_unregister_thread();
> -    qmp_migration_threads_remove(thread);
>      trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
>  
>      return NULL;

Here it looks like the right place.


> +#include "qemu/osdep.h"
> +#include "qemu/queue.h"
> +#include "qemu/lockable.h"
>  #include "threadinfo.h"

Ouch, it missed Markus cleanup.  Thanks.

For the rest it looks good.

Later, Juan.
Fabiano Rosas June 7, 2023, noon UTC | #3
Juan Quintela <quintela@redhat.com> writes:

> Fabiano Rosas <farosas@suse.de> wrote:
>> This doubly linked list is common for all the multifd and migration
>> threads so we need to avoid concurrent access.
>>
>> Add a mutex to protect the data from concurrent access. This fixes a
>> crash when removing two MigrationThread objects from the list at the
>> same time during cleanup of multifd threads.
>>
>> To avoid destroying the mutex before the last element has been
>> removed, move calls to qmp_migration_thread_remove so they run before
>> multifd_save_cleanup joins the threads.
>>
>> Fixes: 671326201d ("migration: Introduce interface query-migrationthreads")
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>
> I agree with Peter here.  Why don't you have to protect the walking?
>

Oversight on my part.

>> ---
>>  migration/migration.c  |  5 ++++-
>>  migration/multifd.c    |  3 ++-
>>  migration/threadinfo.c | 19 ++++++++++++++++++-
>>  migration/threadinfo.h |  5 +++--
>>  4 files changed, 27 insertions(+), 5 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index e731fc98a1..b3b8345eb2 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s)
>>          qemu_mutex_lock_iothread();
>>  
>>          multifd_save_cleanup();
>> +        qmp_migration_threads_cleanup();
>
> I think I will spare this one as the mutex is static, so we are not
> winning any memory back.
>

Ok

>>      }
>>  
>>      trace_migration_thread_after_loop();
>> +    qmp_migration_threads_remove(thread);
>>      migration_iteration_finish(s);
>
> I can understand moving it here, but why before migration_iteration_finish?
>

Because migration_iteration_finish schedules migrate_fd_cleanup and it
calls qmp_migration_threads_cleanup. So I wanted to be sure that the
removal happens before destroying the mutex.

>>      object_unref(OBJECT(s));
>>      rcu_unregister_thread();
>> -    qmp_migration_threads_remove(thread);
>>      return NULL;
>>  }
>> +    qmp_migration_threads_remove(thread);
>> +
>>      qemu_mutex_lock(&p->mutex);
>>      p->running = false;
>>      qemu_mutex_unlock(&p->mutex);
>>  
>>      rcu_unregister_thread();
>> -    qmp_migration_threads_remove(thread);
>>      trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
>>  
>>      return NULL;
>
> Here it looks like the right place.
>

Yep, we shouldn't really put any new code after that p->running =
false. Because multifd_save_cleanup will happily start cleaning up
everything while this thread is still running if it sees p->running ==
false.

>
>> +#include "qemu/osdep.h"
>> +#include "qemu/queue.h"
>> +#include "qemu/lockable.h"
>>  #include "threadinfo.h"
>
> Ouch, it missed Markus cleanup.  Thanks.
>
> For the rest it looks good.
>
> Later, Juan.
Peter Xu June 7, 2023, 1:25 p.m. UTC | #4
On Wed, Jun 07, 2023 at 09:00:14AM -0300, Fabiano Rosas wrote:
> >> diff --git a/migration/migration.c b/migration/migration.c
> >> index e731fc98a1..b3b8345eb2 100644
> >> --- a/migration/migration.c
> >> +++ b/migration/migration.c
> >> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState *s)
> >>          qemu_mutex_lock_iothread();
> >>  
> >>          multifd_save_cleanup();
> >> +        qmp_migration_threads_cleanup();
> >
> > I think I will spare this one as the mutex is static, so we are not
> > winning any memory back.
> >
> 
> Ok

We could consider __attribute__((__constructor__)) in this case.
Juan Quintela June 7, 2023, 4:58 p.m. UTC | #5
Sounds good.

On Wed, Jun 7, 2023, 18:28 Peter Xu <peterx@redhat.com> wrote:

> On Wed, Jun 07, 2023 at 09:00:14AM -0300, Fabiano Rosas wrote:
> > >> diff --git a/migration/migration.c b/migration/migration.c
> > >> index e731fc98a1..b3b8345eb2 100644
> > >> --- a/migration/migration.c
> > >> +++ b/migration/migration.c
> > >> @@ -1146,6 +1146,7 @@ static void migrate_fd_cleanup(MigrationState
> *s)
> > >>          qemu_mutex_lock_iothread();
> > >>
> > >>          multifd_save_cleanup();
> > >> +        qmp_migration_threads_cleanup();
> > >
> > > I think I will spare this one as the mutex is static, so we are not
> > > winning any memory back.
> > >
> >
> > Ok
>
> We could consider __attribute__((__constructor__)) in this case.
>
> --
> Peter Xu
>
>
>
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index e731fc98a1..b3b8345eb2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1146,6 +1146,7 @@  static void migrate_fd_cleanup(MigrationState *s)
         qemu_mutex_lock_iothread();
 
         multifd_save_cleanup();
+        qmp_migration_threads_cleanup();
         qemu_mutex_lock(&s->qemu_file_lock);
         tmp = s->to_dst_file;
         s->to_dst_file = NULL;
@@ -1405,6 +1406,8 @@  void migrate_init(MigrationState *s)
     s->vm_old_state = -1;
     s->iteration_initial_bytes = 0;
     s->threshold_size = 0;
+
+    qmp_migration_threads_init();
 }
 
 int migrate_add_blocker_internal(Error *reason, Error **errp)
@@ -2997,10 +3000,10 @@  static void *migration_thread(void *opaque)
     }
 
     trace_migration_thread_after_loop();
+    qmp_migration_threads_remove(thread);
     migration_iteration_finish(s);
     object_unref(OBJECT(s));
     rcu_unregister_thread();
-    qmp_migration_threads_remove(thread);
     return NULL;
 }
 
diff --git a/migration/multifd.c b/migration/multifd.c
index 5ec1ac5c64..ee7944560a 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -762,12 +762,13 @@  out:
         qemu_sem_post(&multifd_send_state->channels_ready);
     }
 
+    qmp_migration_threads_remove(thread);
+
     qemu_mutex_lock(&p->mutex);
     p->running = false;
     qemu_mutex_unlock(&p->mutex);
 
     rcu_unregister_thread();
-    qmp_migration_threads_remove(thread);
     trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
 
     return NULL;
diff --git a/migration/threadinfo.c b/migration/threadinfo.c
index c3e85c33e8..1fe64a02dd 100644
--- a/migration/threadinfo.c
+++ b/migration/threadinfo.c
@@ -10,23 +10,40 @@ 
  *  See the COPYING file in the top-level directory.
  */
 
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "qemu/lockable.h"
 #include "threadinfo.h"
 
+QemuMutex migration_threads_lock;
 static QLIST_HEAD(, MigrationThread) migration_threads;
 
+void qmp_migration_threads_init(void)
+{
+    qemu_mutex_init(&migration_threads_lock);
+}
+
+void qmp_migration_threads_cleanup(void)
+{
+    qemu_mutex_destroy(&migration_threads_lock);
+}
+
 MigrationThread *qmp_migration_threads_add(const char *name, int thread_id)
 {
     MigrationThread *thread =  g_new0(MigrationThread, 1);
     thread->name = name;
     thread->thread_id = thread_id;
 
-    QLIST_INSERT_HEAD(&migration_threads, thread, node);
+    WITH_QEMU_LOCK_GUARD(&migration_threads_lock) {
+        QLIST_INSERT_HEAD(&migration_threads, thread, node);
+    }
 
     return thread;
 }
 
 void qmp_migration_threads_remove(MigrationThread *thread)
 {
+    QEMU_LOCK_GUARD(&migration_threads_lock);
     if (thread) {
         QLIST_REMOVE(thread, node);
         g_free(thread);
diff --git a/migration/threadinfo.h b/migration/threadinfo.h
index 61b990f5e3..eb7f8e5bb2 100644
--- a/migration/threadinfo.h
+++ b/migration/threadinfo.h
@@ -10,8 +10,6 @@ 
  *  See the COPYING file in the top-level directory.
  */
 
-#include "qemu/queue.h"
-#include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
 
@@ -23,5 +21,8 @@  struct MigrationThread {
     QLIST_ENTRY(MigrationThread) node;
 };
 
+void qmp_migration_threads_init(void);
+void qmp_migration_threads_cleanup(void);
+
 MigrationThread *qmp_migration_threads_add(const char *name, int thread_id);
 void qmp_migration_threads_remove(MigrationThread *info);