diff mbox series

[2/4] migration: fix migrate_cancel leads live_migration thread hung forever

Message ID 20190724095523.1527-3-quintela@redhat.com
State New
Headers show
Series migration: fix migrate_cancel problems of multifd | expand

Commit Message

Juan Quintela July 24, 2019, 9:55 a.m. UTC
From: Ivan Ren <renyime@gmail.com>

When we 'migrate_cancel' a multifd migration, live_migration thread may
hung forever at some points, because of multifd_send_thread has already
exit for socket error:
1. multifd_send_pages may hung at qemu_sem_wait(&multifd_send_state->
   channels_ready)
2. multifd_send_sync_main my hung at qemu_sem_wait(&multifd_send_state->
   sem_sync)

Signed-off-by: Ivan Ren <ivanren@tencent.com>
Message-Id: <1561468699-9819-3-git-send-email-ivanren@tencent.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>

---

Remove spurious not needed bits
---
 migration/ram.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Dr. David Alan Gilbert July 24, 2019, 10:58 a.m. UTC | #1
* Juan Quintela (quintela@redhat.com) wrote:
> From: Ivan Ren <renyime@gmail.com>
> 
> When we 'migrate_cancel' a multifd migration, live_migration thread may
> hung forever at some points, because of multifd_send_thread has already
> exit for socket error:
> 1. multifd_send_pages may hung at qemu_sem_wait(&multifd_send_state->
>    channels_ready)
> 2. multifd_send_sync_main my hung at qemu_sem_wait(&multifd_send_state->
>    sem_sync)
> 
> Signed-off-by: Ivan Ren <ivanren@tencent.com>
> Message-Id: <1561468699-9819-3-git-send-email-ivanren@tencent.com>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> Signed-off-by: Juan Quintela <quintela@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> 
> ---
> 
> Remove spurious not needed bits
> ---
>  migration/ram.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 52a2d498e4..87bb7da8e2 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1097,7 +1097,8 @@ static void *multifd_send_thread(void *opaque)
>  {
>      MultiFDSendParams *p = opaque;
>      Error *local_err = NULL;
> -    int ret;
> +    int ret = 0;
> +    uint32_t flags = 0;
>  
>      trace_multifd_send_thread_start(p->id);
>      rcu_register_thread();
> @@ -1115,7 +1116,7 @@ static void *multifd_send_thread(void *opaque)
>          if (p->pending_job) {
>              uint32_t used = p->pages->used;
>              uint64_t packet_num = p->packet_num;
> -            uint32_t flags = p->flags;
> +            flags = p->flags;
>  
>              p->next_packet_size = used * qemu_target_page_size();
>              multifd_send_fill_packet(p);
> @@ -1164,6 +1165,17 @@ out:
>          multifd_send_terminate_threads(local_err);
>      }
>  
> +    /*
> +     * Error happen, I will exit, but I can't just leave, tell
> +     * who pay attention to me.
> +     */
> +    if (ret != 0) {
> +        if (flags & MULTIFD_FLAG_SYNC) {
> +            qemu_sem_post(&multifd_send_state->sem_sync);
> +        }
> +        qemu_sem_post(&multifd_send_state->channels_ready);
> +    }
> +
>      qemu_mutex_lock(&p->mutex);
>      p->running = false;
>      qemu_mutex_unlock(&p->mutex);
> -- 
> 2.21.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index 52a2d498e4..87bb7da8e2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1097,7 +1097,8 @@  static void *multifd_send_thread(void *opaque)
 {
     MultiFDSendParams *p = opaque;
     Error *local_err = NULL;
-    int ret;
+    int ret = 0;
+    uint32_t flags = 0;
 
     trace_multifd_send_thread_start(p->id);
     rcu_register_thread();
@@ -1115,7 +1116,7 @@  static void *multifd_send_thread(void *opaque)
         if (p->pending_job) {
             uint32_t used = p->pages->used;
             uint64_t packet_num = p->packet_num;
-            uint32_t flags = p->flags;
+            flags = p->flags;
 
             p->next_packet_size = used * qemu_target_page_size();
             multifd_send_fill_packet(p);
@@ -1164,6 +1165,17 @@  out:
         multifd_send_terminate_threads(local_err);
     }
 
+    /*
+     * Error happen, I will exit, but I can't just leave, tell
+     * who pay attention to me.
+     */
+    if (ret != 0) {
+        if (flags & MULTIFD_FLAG_SYNC) {
+            qemu_sem_post(&multifd_send_state->sem_sync);
+        }
+        qemu_sem_post(&multifd_send_state->channels_ready);
+    }
+
     qemu_mutex_lock(&p->mutex);
     p->running = false;
     qemu_mutex_unlock(&p->mutex);