diff mbox series

[3/6] migration: Make sure that all multifd channels have been created

Message ID 20190814020218.1868-4-quintela@redhat.com
State New
Headers show
Series Fix multifd with big number of channels | expand

Commit Message

Juan Quintela Aug. 14, 2019, 2:02 a.m. UTC
If we start the migration before all have been created, we have to
handle the case that one channel still don't exist.  This way it is
easier.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/ram.c        | 14 ++++++++++++++
 migration/trace-events |  1 +
 2 files changed, 15 insertions(+)

Comments

Dr. David Alan Gilbert Aug. 14, 2019, 2:58 p.m. UTC | #1
* Juan Quintela (quintela@redhat.com) wrote:
> If we start the migration before all have been created, we have to
> handle the case that one channel still don't exist.  This way it is
> easier.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  migration/ram.c        | 14 ++++++++++++++
>  migration/trace-events |  1 +
>  2 files changed, 15 insertions(+)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 4bdd201a4e..4a6ae677a9 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -663,6 +663,8 @@ typedef struct {
>      uint64_t num_pages;
>      /* syncs main thread and channels */
>      QemuSemaphore sem_sync;
> +    /* thread has started and setup is done */
> +    QemuSemaphore started;
>  }  MultiFDSendParams;
>  
>  typedef struct {
> @@ -1039,6 +1041,7 @@ void multifd_save_cleanup(void)
>          qemu_mutex_destroy(&p->mutex);
>          qemu_sem_destroy(&p->sem);
>          qemu_sem_destroy(&p->sem_sync);
> +        qemu_sem_destroy(&p->started);
>          g_free(p->name);
>          p->name = NULL;
>          multifd_pages_clear(p->pages);
> @@ -1113,6 +1116,8 @@ static void *multifd_send_thread(void *opaque)
>      /* initial packet */
>      p->num_packets = 1;
>  
> +    qemu_sem_post(&p->started);
> +
>      while (true) {
>          qemu_sem_wait(&p->sem);
>          qemu_mutex_lock(&p->mutex);
> @@ -1229,6 +1234,7 @@ int multifd_save_setup(void)
>          qemu_mutex_init(&p->mutex);
>          qemu_sem_init(&p->sem, 0);
>          qemu_sem_init(&p->sem_sync, 0);
> +        qemu_sem_init(&p->started, 0);
>          p->quit = false;
>          p->pending_job = 0;
>          p->id = i;
> @@ -3486,6 +3492,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>      ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>  
> +    /* We want to wait for all threads to have started before doing
> +     * anything else */
> +    for (int i = 0; i < migrate_multifd_channels(); i++) {
> +        MultiFDSendParams *p = &multifd_send_state->params[i];
> +
> +        qemu_sem_wait(&p->started);
> +        trace_multifd_send_thread_started(p->id);
> +    }

What happens if there's an error during startup and either we cancel or
the migration fails and we try and cleanup - how do we get out of this
loop?

Dave

>      multifd_send_sync_main();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>      qemu_fflush(f);
> diff --git a/migration/trace-events b/migration/trace-events
> index 886ce70ca0..dd13a5c4b1 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -95,6 +95,7 @@ multifd_send_sync_main_wait(uint8_t id) "channel %d"
>  multifd_send_terminate_threads(bool error) "error %d"
>  multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
>  multifd_send_thread_start(uint8_t id) "%d"
> +multifd_send_thread_started(uint8_t id) "channel %d"
>  ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
>  ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p"
>  ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
> -- 
> 2.21.0
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Juan Quintela Aug. 19, 2019, 8:29 a.m. UTC | #2
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Juan Quintela (quintela@redhat.com) wrote:
>> If we start the migration before all have been created, we have to
>> handle the case that one channel still don't exist.  This way it is
>> easier.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>> @@ -3486,6 +3492,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>      ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>>      ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>>  
>> +    /* We want to wait for all threads to have started before doing
>> +     * anything else */
>> +    for (int i = 0; i < migrate_multifd_channels(); i++) {
>> +        MultiFDSendParams *p = &multifd_send_state->params[i];
>> +
>> +        qemu_sem_wait(&p->started);
>> +        trace_multifd_send_thread_started(p->id);
>> +    }
>
> What happens if there's an error during startup and either we cancel or
> the migration fails and we try and cleanup - how do we get out of this
> loop?

Good catch.

Will think a way to do it.

Thanks.


> Dave
>
>>      multifd_send_sync_main();
>>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>      qemu_fflush(f);
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 886ce70ca0..dd13a5c4b1 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -95,6 +95,7 @@ multifd_send_sync_main_wait(uint8_t id) "channel %d"
>>  multifd_send_terminate_threads(bool error) "error %d"
>>  multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
>>  multifd_send_thread_start(uint8_t id) "%d"
>> +multifd_send_thread_started(uint8_t id) "channel %d"
>>  ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
>>  ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p"
>>  ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
>> -- 
>> 2.21.0
>> 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index 4bdd201a4e..4a6ae677a9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -663,6 +663,8 @@  typedef struct {
     uint64_t num_pages;
     /* syncs main thread and channels */
     QemuSemaphore sem_sync;
+    /* thread has started and setup is done */
+    QemuSemaphore started;
 }  MultiFDSendParams;
 
 typedef struct {
@@ -1039,6 +1041,7 @@  void multifd_save_cleanup(void)
         qemu_mutex_destroy(&p->mutex);
         qemu_sem_destroy(&p->sem);
         qemu_sem_destroy(&p->sem_sync);
+        qemu_sem_destroy(&p->started);
         g_free(p->name);
         p->name = NULL;
         multifd_pages_clear(p->pages);
@@ -1113,6 +1116,8 @@  static void *multifd_send_thread(void *opaque)
     /* initial packet */
     p->num_packets = 1;
 
+    qemu_sem_post(&p->started);
+
     while (true) {
         qemu_sem_wait(&p->sem);
         qemu_mutex_lock(&p->mutex);
@@ -1229,6 +1234,7 @@  int multifd_save_setup(void)
         qemu_mutex_init(&p->mutex);
         qemu_sem_init(&p->sem, 0);
         qemu_sem_init(&p->sem_sync, 0);
+        qemu_sem_init(&p->started, 0);
         p->quit = false;
         p->pending_job = 0;
         p->id = i;
@@ -3486,6 +3492,14 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 
+    /* We want to wait for all threads to have started before doing
+     * anything else */
+    for (int i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+
+        qemu_sem_wait(&p->started);
+        trace_multifd_send_thread_started(p->id);
+    }
     multifd_send_sync_main();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
     qemu_fflush(f);
diff --git a/migration/trace-events b/migration/trace-events
index 886ce70ca0..dd13a5c4b1 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -95,6 +95,7 @@  multifd_send_sync_main_wait(uint8_t id) "channel %d"
 multifd_send_terminate_threads(bool error) "error %d"
 multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
 multifd_send_thread_start(uint8_t id) "%d"
+multifd_send_thread_started(uint8_t id) "channel %d"
 ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
 ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p"
 ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"