diff mbox series

[v11,for-4.0,09/11] qemu_thread: supplement error handling for migration

Message ID 20190201051806.53183-10-lifei1214@126.com
State New
Headers show
Series qemu_thread_create: propagate the error to callers to handle | expand

Commit Message

fei Feb. 1, 2019, 5:18 a.m. UTC
From: Fei Li <fli@suse.com>

Update qemu_thread_create()'s callers by
- setting an error on qemu_thread_create() failure for callers that
  set an error on failure;
- reporting the error and returning failure for callers that return
  an error code on failure;
- reporting the error and setting some state for callers that just
  report errors and choose not to continue on.

Besides, make compress_threads_save_cleanup() cope with partially
initialized comp_param[i] to adapt to the new qemu_thread_create()
failure case.

Cc: Markus Armbruster <armbru@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Fei Li <fli@suse.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/migration.c    | 35 +++++++++++++-------
 migration/postcopy-ram.c | 16 ++++++---
 migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
 migration/savevm.c       | 12 ++++---
 4 files changed, 89 insertions(+), 44 deletions(-)

Comments

Fei Li Feb. 1, 2019, 5:35 a.m. UTC | #1
在 2019/2/1 下午1:18, Fei Li 写道:
> From: Fei Li <fli@suse.com>
>
> Update qemu_thread_create()'s callers by
> - setting an error on qemu_thread_create() failure for callers that
>    set an error on failure;
> - reporting the error and returning failure for callers that return
>    an error code on failure;
> - reporting the error and setting some state for callers that just
>    report errors and choose not to continue on.
>
> Besides, make compress_threads_save_cleanup() cope with partially
> initialized comp_param[i] to adapt to the new qemu_thread_create()
> failure case.

Hi David,

I got your Reviewed-by in last version at an early stage, but later one 
issue is raised
by Markus due to the improper “[patch v9 16/16] qemu_thread_join: fix 
segmentation fault”.
Thus in this v11, I do an update for the following two functions:

compress_threads_save_cleanup() & compress_threads_save_setup()

Please help to review again, thanks :)

Have a nice day
Fei
>
> Cc: Markus Armbruster <armbru@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Fei Li <fli@suse.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>   migration/migration.c    | 35 +++++++++++++-------
>   migration/postcopy-ram.c | 16 ++++++---
>   migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
>   migration/savevm.c       | 12 ++++---
>   4 files changed, 89 insertions(+), 44 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 1da71211c8..0034ca1334 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
>               goto fail;
>           }
>   
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> -                           colo_process_incoming_thread, mis,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> +                               colo_process_incoming_thread, mis,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err, "failed to create "
> +                              "colo_process_incoming_thread: ");
> +            goto fail;
> +        }
>           mis->have_colo_incoming_thread = true;
>           qemu_coroutine_yield();
>   
> @@ -2349,6 +2352,7 @@ out:
>   static int open_return_path_on_source(MigrationState *ms,
>                                         bool create_thread)
>   {
> +    Error *local_err = NULL;
>   
>       ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>       if (!ms->rp_state.from_dst_file) {
> @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
>           return 0;
>       }
>   
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> -                       source_return_path_thread, ms,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> +                           source_return_path_thread, ms,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create source_return_path_thread: ");
> +        qemu_fclose(ms->rp_state.from_dst_file);
> +        ms->rp_state.from_dst_file = NULL;
> +        return -1;
> +     }
>   
>       trace_open_return_path_on_source_continue();
>   
> @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>           migrate_fd_cleanup(s);
>           return;
>       }
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
> +        error_reportf_err(error_in, "failed to create migration_thread: ");
> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> +        migrate_fd_cleanup(s);
> +        return;
> +    }
>       s->migration_thread_running = true;
>   }
>   
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 221ea24919..0934a1403a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1083,6 +1083,8 @@ retry:
>   
>   int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>   {
> +    Error *local_err = NULL;
> +
>       /* Open the fd for the kernel to give us userfaults */
>       mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>       if (mis->userfault_fd == -1) {
> @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>       }
>   
>       qemu_sem_init(&mis->fault_thread_sem, 0);
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> -                       postcopy_ram_fault_thread, mis,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> +                           postcopy_ram_fault_thread, mis,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create postcopy_ram_fault_thread: ");
> +        close(mis->userfault_event_fd);
> +        close(mis->userfault_fd);
> +        qemu_sem_destroy(&mis->fault_thread_sem);
> +        return -1;
> +    }
>       qemu_sem_wait(&mis->fault_thread_sem);
>       qemu_sem_destroy(&mis->fault_thread_sem);
>       mis->have_fault_thread = true;
> diff --git a/migration/ram.c b/migration/ram.c
> index 700ea229e0..66b8b764f1 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
>   
>       thread_count = migrate_compress_threads();
>       for (i = 0; i < thread_count; i++) {
> +        qemu_mutex_lock(&comp_param[i].mutex);
> +        comp_param[i].quit = true;
> +        qemu_cond_signal(&comp_param[i].cond);
> +        qemu_mutex_unlock(&comp_param[i].mutex);
> +
> +        qemu_mutex_destroy(&comp_param[i].mutex);
> +        qemu_cond_destroy(&comp_param[i].cond);
> +
>           /*
>            * we use it as a indicator which shows if the thread is
>            * properly init'd or not
> @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
>           if (!comp_param[i].file) {
>               break;
>           }
> -
> -        qemu_mutex_lock(&comp_param[i].mutex);
> -        comp_param[i].quit = true;
> -        qemu_cond_signal(&comp_param[i].cond);
> -        qemu_mutex_unlock(&comp_param[i].mutex);
> -
>           qemu_thread_join(compress_threads + i);
> -        qemu_mutex_destroy(&comp_param[i].mutex);
> -        qemu_cond_destroy(&comp_param[i].cond);
>           deflateEnd(&comp_param[i].stream);
>           g_free(comp_param[i].originbuf);
>           qemu_fclose(comp_param[i].file);
> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>   static int compress_threads_save_setup(void)
>   {
>       int i, thread_count;
> +    Error *local_err = NULL;
>   
>       if (!migrate_use_compression()) {
>           return 0;
> @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
>       qemu_cond_init(&comp_done_cond);
>       qemu_mutex_init(&comp_done_lock);
>       for (i = 0; i < thread_count; i++) {
> +        qemu_mutex_init(&comp_param[i].mutex);
> +        qemu_cond_init(&comp_param[i].cond);
> +        comp_param[i].quit = false;
>           comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
>           if (!comp_param[i].originbuf) {
>               goto exit;
> @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
>            */
>           comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
>           comp_param[i].done = true;
> -        comp_param[i].quit = false;
> -        qemu_mutex_init(&comp_param[i].mutex);
> -        qemu_cond_init(&comp_param[i].cond);
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(compress_threads + i, "compress",
> -                           do_data_compress, comp_param + i,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(compress_threads + i, "compress",
> +                               do_data_compress, comp_param + i,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err, "failed to create do_data_compress: ");
> +            deflateEnd(&comp_param[i].stream);
> +            g_free(comp_param[i].originbuf);
> +            qemu_fclose(comp_param[i].file);
> +            comp_param[i].file = NULL;
> +            goto exit;
> +        }
>       }
>       return 0;
>   
> @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>           p->c = QIO_CHANNEL(sioc);
>           qio_channel_set_delay(p->c, false);
>           p->running = true;
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            migrate_set_error(migrate_get_current(), local_err);
> +            error_reportf_err(local_err,
> +                              "failed to create multifd_send_thread: ");
> +            multifd_save_cleanup();
> +            return;
> +        }
>   
>           atomic_inc(&multifd_send_state->count);
>       }
> @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>       p->num_packets = 1;
>   
>       p->running = true;
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_propagate_prepend(errp, local_err,
> +                                "failed to create multifd_recv_thread: ");
> +        multifd_recv_terminate_threads(local_err);
> +        return false;
> +    }
>       atomic_inc(&multifd_recv_state->count);
>       return atomic_read(&multifd_recv_state->count) ==
>              migrate_multifd_channels();
> @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
>   static int compress_threads_load_setup(QEMUFile *f)
>   {
>       int i, thread_count;
> +    Error *local_err = NULL;
>   
>       if (!migrate_use_compression()) {
>           return 0;
> @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>           qemu_cond_init(&decomp_param[i].cond);
>           decomp_param[i].done = true;
>           decomp_param[i].quit = false;
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(decompress_threads + i, "decompress",
> -                           do_data_decompress, decomp_param + i,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(decompress_threads + i, "decompress",
> +                               do_data_decompress, decomp_param + i,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err,
> +                              "failed to create do_data_decompress: ");
> +            goto exit;
> +        }
>       }
>       return 0;
>   exit:
> diff --git a/migration/savevm.c b/migration/savevm.c
> index d5b45843b6..310cecbf8f 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>       mis->have_listen_thread = true;
>       /* Start up the listening thread and wait for it to signal ready */
>       qemu_sem_init(&mis->listen_thread_sem, 0);
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> -                       postcopy_ram_listen_thread, NULL,
> -                       QEMU_THREAD_DETACHED, &error_abort);
> +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> +                           postcopy_ram_listen_thread, NULL,
> +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create postcopy_ram_listen_thread: ");
> +        qemu_sem_destroy(&mis->listen_thread_sem);
> +        return -1;
> +    }
>       qemu_sem_wait(&mis->listen_thread_sem);
>       qemu_sem_destroy(&mis->listen_thread_sem);
>
Markus Armbruster Feb. 1, 2019, 3:34 p.m. UTC | #2
Dave, I tried to review the error paths, in particular resource cleanup,
but there's a lot going on, and I'm not feeling confident.  Please have
a close look.

Fei Li <lifei1214@126.com> writes:

> From: Fei Li <fli@suse.com>
>
> Update qemu_thread_create()'s callers by
> - setting an error on qemu_thread_create() failure for callers that
>   set an error on failure;
> - reporting the error and returning failure for callers that return
>   an error code on failure;
> - reporting the error and setting some state for callers that just
>   report errors and choose not to continue on.
>
> Besides, make compress_threads_save_cleanup() cope with partially
> initialized comp_param[i] to adapt to the new qemu_thread_create()
> failure case.
>
> Cc: Markus Armbruster <armbru@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Fei Li <fli@suse.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  migration/migration.c    | 35 +++++++++++++-------
>  migration/postcopy-ram.c | 16 ++++++---
>  migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
>  migration/savevm.c       | 12 ++++---
>  4 files changed, 89 insertions(+), 44 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 1da71211c8..0034ca1334 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
>              goto fail;
>          }
>  
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> -                           colo_process_incoming_thread, mis,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> +                               colo_process_incoming_thread, mis,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err, "failed to create "
> +                              "colo_process_incoming_thread: ");
> +            goto fail;
> +        }
>          mis->have_colo_incoming_thread = true;
>          qemu_coroutine_yield();
>  
> @@ -2349,6 +2352,7 @@ out:
>  static int open_return_path_on_source(MigrationState *ms,
>                                        bool create_thread)
>  {
> +    Error *local_err = NULL;
>  
>      ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>      if (!ms->rp_state.from_dst_file) {
> @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
>          return 0;
>      }
>  
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> -                       source_return_path_thread, ms,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> +                           source_return_path_thread, ms,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create source_return_path_thread: ");
> +        qemu_fclose(ms->rp_state.from_dst_file);
> +        ms->rp_state.from_dst_file = NULL;
> +        return -1;
> +     }
>  
>      trace_open_return_path_on_source_continue();
>  
> @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
       if (multifd_save_setup() != 0) {
           migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                             MIGRATION_STATUS_FAILED);
>          migrate_fd_cleanup(s);
>          return;
>      }
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
> +        error_reportf_err(error_in, "failed to create migration_thread: ");
> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> +        migrate_fd_cleanup(s);

Is there anything to clean up for multifd_save_setup()?  Dave?

> +        return;
> +    }
>      s->migration_thread_running = true;
>  }
>  
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 221ea24919..0934a1403a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1083,6 +1083,8 @@ retry:
>  
>  int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>  {
> +    Error *local_err = NULL;
> +
>      /* Open the fd for the kernel to give us userfaults */
>      mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>      if (mis->userfault_fd == -1) {
> @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>      }
>  
>      qemu_sem_init(&mis->fault_thread_sem, 0);
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> -                       postcopy_ram_fault_thread, mis,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> +                           postcopy_ram_fault_thread, mis,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create postcopy_ram_fault_thread: ");
> +        close(mis->userfault_event_fd);
> +        close(mis->userfault_fd);
> +        qemu_sem_destroy(&mis->fault_thread_sem);
> +        return -1;
> +    }
>      qemu_sem_wait(&mis->fault_thread_sem);
>      qemu_sem_destroy(&mis->fault_thread_sem);
>      mis->have_fault_thread = true;

       /* Mark so that we get notified of accesses to unwritten areas */
       if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
           error_report("ram_block_enable_notify failed");
           return -1;

Not this patch's problem, but here goes anyway: where are
mis->userfault_event_fd and mis->userfault_fd closed?

       }

> diff --git a/migration/ram.c b/migration/ram.c
> index 700ea229e0..66b8b764f1 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
>  
>      thread_count = migrate_compress_threads();
>      for (i = 0; i < thread_count; i++) {
> +        qemu_mutex_lock(&comp_param[i].mutex);
> +        comp_param[i].quit = true;
> +        qemu_cond_signal(&comp_param[i].cond);
> +        qemu_mutex_unlock(&comp_param[i].mutex);
> +
> +        qemu_mutex_destroy(&comp_param[i].mutex);
> +        qemu_cond_destroy(&comp_param[i].cond);
> +
>          /*
>           * we use it as a indicator which shows if the thread is
>           * properly init'd or not
> @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
>          if (!comp_param[i].file) {
>              break;
>          }
> -
> -        qemu_mutex_lock(&comp_param[i].mutex);
> -        comp_param[i].quit = true;
> -        qemu_cond_signal(&comp_param[i].cond);
> -        qemu_mutex_unlock(&comp_param[i].mutex);
> -
>          qemu_thread_join(compress_threads + i);
> -        qemu_mutex_destroy(&comp_param[i].mutex);
> -        qemu_cond_destroy(&comp_param[i].cond);
>          deflateEnd(&comp_param[i].stream);
>          g_free(comp_param[i].originbuf);
>          qemu_fclose(comp_param[i].file);
> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>  static int compress_threads_save_setup(void)
>  {
>      int i, thread_count;
> +    Error *local_err = NULL;
>  
>      if (!migrate_use_compression()) {
>          return 0;
> @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
>      qemu_cond_init(&comp_done_cond);
>      qemu_mutex_init(&comp_done_lock);
>      for (i = 0; i < thread_count; i++) {
> +        qemu_mutex_init(&comp_param[i].mutex);
> +        qemu_cond_init(&comp_param[i].cond);
> +        comp_param[i].quit = false;
>          comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
>          if (!comp_param[i].originbuf) {
>              goto exit;
> @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
>           */
>          comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
>          comp_param[i].done = true;
> -        comp_param[i].quit = false;
> -        qemu_mutex_init(&comp_param[i].mutex);
> -        qemu_cond_init(&comp_param[i].cond);
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(compress_threads + i, "compress",
> -                           do_data_compress, comp_param + i,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(compress_threads + i, "compress",
> +                               do_data_compress, comp_param + i,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err, "failed to create do_data_compress: ");
> +            deflateEnd(&comp_param[i].stream);
> +            g_free(comp_param[i].originbuf);
> +            qemu_fclose(comp_param[i].file);
> +            comp_param[i].file = NULL;
> +            goto exit;
> +        }
>      }
>      return 0;
>  
> @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>          p->c = QIO_CHANNEL(sioc);
>          qio_channel_set_delay(p->c, false);
>          p->running = true;
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            migrate_set_error(migrate_get_current(), local_err);
> +            error_reportf_err(local_err,
> +                              "failed to create multifd_send_thread: ");
> +            multifd_save_cleanup();
> +            return;
> +        }
>  
>          atomic_inc(&multifd_send_state->count);
>      }
> @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>      p->num_packets = 1;
>  
>      p->running = true;
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> -                       QEMU_THREAD_JOINABLE, &error_abort);
> +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +        error_propagate_prepend(errp, local_err,
> +                                "failed to create multifd_recv_thread: ");

This prepends only if @errp isn't null.  Fine from the caller's point of
view.  But it also affects multifd_recv_thread():

> +        multifd_recv_terminate_threads(local_err);

That's at least unclean.

I think you should use error_prepend() and error_propagate() here.

> +        return false;
> +    }
>      atomic_inc(&multifd_recv_state->count);
>      return atomic_read(&multifd_recv_state->count) ==
>             migrate_multifd_channels();
> @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
>  static int compress_threads_load_setup(QEMUFile *f)
>  {
>      int i, thread_count;
> +    Error *local_err = NULL;
>  
>      if (!migrate_use_compression()) {
>          return 0;
> @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>          qemu_cond_init(&decomp_param[i].cond);
>          decomp_param[i].done = true;
>          decomp_param[i].quit = false;
> -        /* TODO: let the further caller handle the error instead of abort() */
> -        qemu_thread_create(decompress_threads + i, "decompress",
> -                           do_data_decompress, decomp_param + i,
> -                           QEMU_THREAD_JOINABLE, &error_abort);
> +        if (qemu_thread_create(decompress_threads + i, "decompress",
> +                               do_data_decompress, decomp_param + i,
> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> +            error_reportf_err(local_err,
> +                              "failed to create do_data_decompress: ");
> +            goto exit;
> +        }
>      }
>      return 0;
>  exit:
> diff --git a/migration/savevm.c b/migration/savevm.c
> index d5b45843b6..310cecbf8f 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>      mis->have_listen_thread = true;
>      /* Start up the listening thread and wait for it to signal ready */
>      qemu_sem_init(&mis->listen_thread_sem, 0);
> -    /* TODO: let the further caller handle the error instead of abort() here */
> -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> -                       postcopy_ram_listen_thread, NULL,
> -                       QEMU_THREAD_DETACHED, &error_abort);
> +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> +                           postcopy_ram_listen_thread, NULL,
> +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
> +        error_reportf_err(local_err,
> +                          "failed to create postcopy_ram_listen_thread: ");
> +        qemu_sem_destroy(&mis->listen_thread_sem);
> +        return -1;
> +    }
>      qemu_sem_wait(&mis->listen_thread_sem);
>      qemu_sem_destroy(&mis->listen_thread_sem);
Fei Li Feb. 3, 2019, 7:55 a.m. UTC | #3
在 2019/2/1 下午11:34, Markus Armbruster 写道:
> Dave, I tried to review the error paths, in particular resource cleanup,
> but there's a lot going on, and I'm not feeling confident.  Please have
> a close look.
>
> Fei Li <lifei1214@126.com> writes:
>
>> From: Fei Li <fli@suse.com>
>>
>> Update qemu_thread_create()'s callers by
>> - setting an error on qemu_thread_create() failure for callers that
>>    set an error on failure;
>> - reporting the error and returning failure for callers that return
>>    an error code on failure;
>> - reporting the error and setting some state for callers that just
>>    report errors and choose not to continue on.
>>
>> Besides, make compress_threads_save_cleanup() cope with partially
>> initialized comp_param[i] to adapt to the new qemu_thread_create()
>> failure case.
>>
>> Cc: Markus Armbruster <armbru@redhat.com>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Signed-off-by: Fei Li <fli@suse.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>   migration/migration.c    | 35 +++++++++++++-------
>>   migration/postcopy-ram.c | 16 ++++++---
>>   migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
>>   migration/savevm.c       | 12 ++++---
>>   4 files changed, 89 insertions(+), 44 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 1da71211c8..0034ca1334 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
>>               goto fail;
>>           }
>>   
>> -        /* TODO: let the further caller handle the error instead of abort() */
>> -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>> -                           colo_process_incoming_thread, mis,
>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>> +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>> +                               colo_process_incoming_thread, mis,
>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +            error_reportf_err(local_err, "failed to create "
>> +                              "colo_process_incoming_thread: ");
>> +            goto fail;
>> +        }
>>           mis->have_colo_incoming_thread = true;
>>           qemu_coroutine_yield();
>>   
>> @@ -2349,6 +2352,7 @@ out:
>>   static int open_return_path_on_source(MigrationState *ms,
>>                                         bool create_thread)
>>   {
>> +    Error *local_err = NULL;
>>   
>>       ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>>       if (!ms->rp_state.from_dst_file) {
>> @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
>>           return 0;
>>       }
>>   
>> -    /* TODO: let the further caller handle the error instead of abort() here */
>> -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>> -                       source_return_path_thread, ms,
>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>> +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>> +                           source_return_path_thread, ms,
>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +        error_reportf_err(local_err,
>> +                          "failed to create source_return_path_thread: ");
>> +        qemu_fclose(ms->rp_state.from_dst_file);
>> +        ms->rp_state.from_dst_file = NULL;
>> +        return -1;
>> +     }
>>   
>>       trace_open_return_path_on_source_continue();
>>   
>> @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>         if (multifd_save_setup() != 0) {
>             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>                               MIGRATION_STATUS_FAILED);
>>           migrate_fd_cleanup(s);
>>           return;
>>       }
>> -    /* TODO: let the further caller handle the error instead of abort() here */
>> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>> +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>> +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
>> +        error_reportf_err(error_in, "failed to create migration_thread: ");
>> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>> +        migrate_fd_cleanup(s);
> Is there anything to clean up for multifd_save_setup()?  Dave?
>
>> +        return;
>> +    }
>>       s->migration_thread_running = true;
>>   }
>>   
>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>> index 221ea24919..0934a1403a 100644
>> --- a/migration/postcopy-ram.c
>> +++ b/migration/postcopy-ram.c
>> @@ -1083,6 +1083,8 @@ retry:
>>   
>>   int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>   {
>> +    Error *local_err = NULL;
>> +
>>       /* Open the fd for the kernel to give us userfaults */
>>       mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>>       if (mis->userfault_fd == -1) {
>> @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>       }
>>   
>>       qemu_sem_init(&mis->fault_thread_sem, 0);
>> -    /* TODO: let the further caller handle the error instead of abort() here */
>> -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>> -                       postcopy_ram_fault_thread, mis,
>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>> +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>> +                           postcopy_ram_fault_thread, mis,
>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +        error_reportf_err(local_err,
>> +                          "failed to create postcopy_ram_fault_thread: ");
>> +        close(mis->userfault_event_fd);
>> +        close(mis->userfault_fd);
>> +        qemu_sem_destroy(&mis->fault_thread_sem);
>> +        return -1;
>> +    }
>>       qemu_sem_wait(&mis->fault_thread_sem);
>>       qemu_sem_destroy(&mis->fault_thread_sem);
>>       mis->have_fault_thread = true;
>         /* Mark so that we get notified of accesses to unwritten areas */
>         if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>             error_report("ram_block_enable_notify failed");
>             return -1;
>
> Not this patch's problem, but here goes anyway: where are
> mis->userfault_event_fd and mis->userfault_fd closed?

They are actually cleaned in the caller when 
postcopy_ram_enable_notify() fails:

call postcopy_ram_incoming_cleanup() to do the cleanup when 
mis->have_fault_thread=true

The merged patch: 91b02dc is to fix this.

>
>         }
>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 700ea229e0..66b8b764f1 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
>>   
>>       thread_count = migrate_compress_threads();
>>       for (i = 0; i < thread_count; i++) {
>> +        qemu_mutex_lock(&comp_param[i].mutex);
>> +        comp_param[i].quit = true;
>> +        qemu_cond_signal(&comp_param[i].cond);
>> +        qemu_mutex_unlock(&comp_param[i].mutex);
>> +
>> +        qemu_mutex_destroy(&comp_param[i].mutex);
>> +        qemu_cond_destroy(&comp_param[i].cond);
>> +
>>           /*
>>            * we use it as a indicator which shows if the thread is
>>            * properly init'd or not
>> @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
>>           if (!comp_param[i].file) {
>>               break;
>>           }
>> -
>> -        qemu_mutex_lock(&comp_param[i].mutex);
>> -        comp_param[i].quit = true;
>> -        qemu_cond_signal(&comp_param[i].cond);
>> -        qemu_mutex_unlock(&comp_param[i].mutex);
>> -
>>           qemu_thread_join(compress_threads + i);
>> -        qemu_mutex_destroy(&comp_param[i].mutex);
>> -        qemu_cond_destroy(&comp_param[i].cond);
>>           deflateEnd(&comp_param[i].stream);
>>           g_free(comp_param[i].originbuf);
>>           qemu_fclose(comp_param[i].file);
>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>   static int compress_threads_save_setup(void)
>>   {
>>       int i, thread_count;
>> +    Error *local_err = NULL;
>>   
>>       if (!migrate_use_compression()) {
>>           return 0;
>> @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
>>       qemu_cond_init(&comp_done_cond);
>>       qemu_mutex_init(&comp_done_lock);
>>       for (i = 0; i < thread_count; i++) {
>> +        qemu_mutex_init(&comp_param[i].mutex);
>> +        qemu_cond_init(&comp_param[i].cond);
>> +        comp_param[i].quit = false;
>>           comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
>>           if (!comp_param[i].originbuf) {
>>               goto exit;
>> @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
>>            */
>>           comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
>>           comp_param[i].done = true;
>> -        comp_param[i].quit = false;
>> -        qemu_mutex_init(&comp_param[i].mutex);
>> -        qemu_cond_init(&comp_param[i].cond);
>> -        /* TODO: let the further caller handle the error instead of abort() */
>> -        qemu_thread_create(compress_threads + i, "compress",
>> -                           do_data_compress, comp_param + i,
>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>> +        if (qemu_thread_create(compress_threads + i, "compress",
>> +                               do_data_compress, comp_param + i,
>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +            error_reportf_err(local_err, "failed to create do_data_compress: ");
>> +            deflateEnd(&comp_param[i].stream);
>> +            g_free(comp_param[i].originbuf);
>> +            qemu_fclose(comp_param[i].file);
>> +            comp_param[i].file = NULL;
>> +            goto exit;
>> +        }
>>       }
>>       return 0;
>>   
>> @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>>           p->c = QIO_CHANNEL(sioc);
>>           qio_channel_set_delay(p->c, false);
>>           p->running = true;
>> -        /* TODO: let the further caller handle the error instead of abort() */
>> -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>> +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +            migrate_set_error(migrate_get_current(), local_err);
>> +            error_reportf_err(local_err,
>> +                              "failed to create multifd_send_thread: ");
>> +            multifd_save_cleanup();
>> +            return;
>> +        }
>>   
>>           atomic_inc(&multifd_send_state->count);
>>       }
>> @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>>       p->num_packets = 1;
>>   
>>       p->running = true;
>> -    /* TODO: let the further caller handle the error instead of abort() here */
>> -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>> +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +        error_propagate_prepend(errp, local_err,
>> +                                "failed to create multifd_recv_thread: ");
> This prepends only if @errp isn't null.  Fine from the caller's point of
> view.  But it also affects multifd_recv_thread():
>
>> +        multifd_recv_terminate_threads(local_err);
> That's at least unclean.
>
> I think you should use error_prepend() and error_propagate() here.

Right, I will change these two sentences' order. Thanks for pointing 
this out. :)

Have a nice day
Fei
>
>> +        return false;
>> +    }
>>       atomic_inc(&multifd_recv_state->count);
>>       return atomic_read(&multifd_recv_state->count) ==
>>              migrate_multifd_channels();
>> @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
>>   static int compress_threads_load_setup(QEMUFile *f)
>>   {
>>       int i, thread_count;
>> +    Error *local_err = NULL;
>>   
>>       if (!migrate_use_compression()) {
>>           return 0;
>> @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>>           qemu_cond_init(&decomp_param[i].cond);
>>           decomp_param[i].done = true;
>>           decomp_param[i].quit = false;
>> -        /* TODO: let the further caller handle the error instead of abort() */
>> -        qemu_thread_create(decompress_threads + i, "decompress",
>> -                           do_data_decompress, decomp_param + i,
>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>> +        if (qemu_thread_create(decompress_threads + i, "decompress",
>> +                               do_data_decompress, decomp_param + i,
>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>> +            error_reportf_err(local_err,
>> +                              "failed to create do_data_decompress: ");
>> +            goto exit;
>> +        }
>>       }
>>       return 0;
>>   exit:
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index d5b45843b6..310cecbf8f 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>>       mis->have_listen_thread = true;
>>       /* Start up the listening thread and wait for it to signal ready */
>>       qemu_sem_init(&mis->listen_thread_sem, 0);
>> -    /* TODO: let the further caller handle the error instead of abort() here */
>> -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>> -                       postcopy_ram_listen_thread, NULL,
>> -                       QEMU_THREAD_DETACHED, &error_abort);
>> +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>> +                           postcopy_ram_listen_thread, NULL,
>> +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
>> +        error_reportf_err(local_err,
>> +                          "failed to create postcopy_ram_listen_thread: ");
>> +        qemu_sem_destroy(&mis->listen_thread_sem);
>> +        return -1;
>> +    }
>>       qemu_sem_wait(&mis->listen_thread_sem);
>>       qemu_sem_destroy(&mis->listen_thread_sem);
Dr. David Alan Gilbert Feb. 4, 2019, 4:34 p.m. UTC | #4
* Markus Armbruster (armbru@redhat.com) wrote:
> Dave, I tried to review the error paths, in particular resource cleanup,
> but there's a lot going on, and I'm not feeling confident.  Please have
> a close look.
> 
> Fei Li <lifei1214@126.com> writes:
> 
> > From: Fei Li <fli@suse.com>
> >
> > Update qemu_thread_create()'s callers by
> > - setting an error on qemu_thread_create() failure for callers that
> >   set an error on failure;
> > - reporting the error and returning failure for callers that return
> >   an error code on failure;
> > - reporting the error and setting some state for callers that just
> >   report errors and choose not to continue on.
> >
> > Besides, make compress_threads_save_cleanup() cope with partially
> > initialized comp_param[i] to adapt to the new qemu_thread_create()
> > failure case.
> >
> > Cc: Markus Armbruster <armbru@redhat.com>
> > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Signed-off-by: Fei Li <fli@suse.com>
> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >  migration/migration.c    | 35 +++++++++++++-------
> >  migration/postcopy-ram.c | 16 ++++++---
> >  migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
> >  migration/savevm.c       | 12 ++++---
> >  4 files changed, 89 insertions(+), 44 deletions(-)
> >
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 1da71211c8..0034ca1334 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
> >              goto fail;
> >          }
> >  
> > -        /* TODO: let the further caller handle the error instead of abort() */
> > -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> > -                           colo_process_incoming_thread, mis,
> > -                           QEMU_THREAD_JOINABLE, &error_abort);
> > +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> > +                               colo_process_incoming_thread, mis,
> > +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +            error_reportf_err(local_err, "failed to create "
> > +                              "colo_process_incoming_thread: ");
> > +            goto fail;
> > +        }
> >          mis->have_colo_incoming_thread = true;
> >          qemu_coroutine_yield();
> >  
> > @@ -2349,6 +2352,7 @@ out:
> >  static int open_return_path_on_source(MigrationState *ms,
> >                                        bool create_thread)
> >  {
> > +    Error *local_err = NULL;
> >  
> >      ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
> >      if (!ms->rp_state.from_dst_file) {
> > @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
> >          return 0;
> >      }
> >  
> > -    /* TODO: let the further caller handle the error instead of abort() here */
> > -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> > -                       source_return_path_thread, ms,
> > -                       QEMU_THREAD_JOINABLE, &error_abort);
> > +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> > +                           source_return_path_thread, ms,
> > +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +        error_reportf_err(local_err,
> > +                          "failed to create source_return_path_thread: ");
> > +        qemu_fclose(ms->rp_state.from_dst_file);
> > +        ms->rp_state.from_dst_file = NULL;
> > +        return -1;
> > +     }
> >  
> >      trace_open_return_path_on_source_continue();
> >  
> > @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>        if (multifd_save_setup() != 0) {
>            migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>                              MIGRATION_STATUS_FAILED);
> >          migrate_fd_cleanup(s);
> >          return;
> >      }
> > -    /* TODO: let the further caller handle the error instead of abort() here */
> > -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > -                       QEMU_THREAD_JOINABLE, &error_abort);
> > +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
> > +        error_reportf_err(error_in, "failed to create migration_thread: ");
> > +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> > +        migrate_fd_cleanup(s);
> 
> Is there anything to clean up for multifd_save_setup()?  Dave?

I need to bounce that one to Juan; he knows the multifd stuff; cc'd

> > +        return;
> > +    }
> >      s->migration_thread_running = true;
> >  }
> >  
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > index 221ea24919..0934a1403a 100644
> > --- a/migration/postcopy-ram.c
> > +++ b/migration/postcopy-ram.c
> > @@ -1083,6 +1083,8 @@ retry:
> >  
> >  int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> >  {
> > +    Error *local_err = NULL;
> > +
> >      /* Open the fd for the kernel to give us userfaults */
> >      mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> >      if (mis->userfault_fd == -1) {
> > @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> >      }
> >  
> >      qemu_sem_init(&mis->fault_thread_sem, 0);
> > -    /* TODO: let the further caller handle the error instead of abort() here */
> > -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> > -                       postcopy_ram_fault_thread, mis,
> > -                       QEMU_THREAD_JOINABLE, &error_abort);
> > +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> > +                           postcopy_ram_fault_thread, mis,
> > +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +        error_reportf_err(local_err,
> > +                          "failed to create postcopy_ram_fault_thread: ");
> > +        close(mis->userfault_event_fd);
> > +        close(mis->userfault_fd);
> > +        qemu_sem_destroy(&mis->fault_thread_sem);
> > +        return -1;
> > +    }
> >      qemu_sem_wait(&mis->fault_thread_sem);
> >      qemu_sem_destroy(&mis->fault_thread_sem);
> >      mis->have_fault_thread = true;
> 
>        /* Mark so that we get notified of accesses to unwritten areas */
>        if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>            error_report("ram_block_enable_notify failed");
>            return -1;
> 
> Not this patch's problem, but here goes anyway: where are
> mis->userfault_event_fd and mis->userfault_fd closed?

As Fei replied, see postcopy_ram_incoming_cleanup; once the fault thread
is running it does a bigger cleanup.

Dave

>        }
> 
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 700ea229e0..66b8b764f1 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
> >  
> >      thread_count = migrate_compress_threads();
> >      for (i = 0; i < thread_count; i++) {
> > +        qemu_mutex_lock(&comp_param[i].mutex);
> > +        comp_param[i].quit = true;
> > +        qemu_cond_signal(&comp_param[i].cond);
> > +        qemu_mutex_unlock(&comp_param[i].mutex);
> > +
> > +        qemu_mutex_destroy(&comp_param[i].mutex);
> > +        qemu_cond_destroy(&comp_param[i].cond);
> > +
> >          /*
> >           * we use it as a indicator which shows if the thread is
> >           * properly init'd or not
> > @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
> >          if (!comp_param[i].file) {
> >              break;
> >          }
> > -
> > -        qemu_mutex_lock(&comp_param[i].mutex);
> > -        comp_param[i].quit = true;
> > -        qemu_cond_signal(&comp_param[i].cond);
> > -        qemu_mutex_unlock(&comp_param[i].mutex);
> > -
> >          qemu_thread_join(compress_threads + i);
> > -        qemu_mutex_destroy(&comp_param[i].mutex);
> > -        qemu_cond_destroy(&comp_param[i].cond);
> >          deflateEnd(&comp_param[i].stream);
> >          g_free(comp_param[i].originbuf);
> >          qemu_fclose(comp_param[i].file);
> > @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
> >  static int compress_threads_save_setup(void)
> >  {
> >      int i, thread_count;
> > +    Error *local_err = NULL;
> >  
> >      if (!migrate_use_compression()) {
> >          return 0;
> > @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
> >      qemu_cond_init(&comp_done_cond);
> >      qemu_mutex_init(&comp_done_lock);
> >      for (i = 0; i < thread_count; i++) {
> > +        qemu_mutex_init(&comp_param[i].mutex);
> > +        qemu_cond_init(&comp_param[i].cond);
> > +        comp_param[i].quit = false;
> >          comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
> >          if (!comp_param[i].originbuf) {
> >              goto exit;
> > @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
> >           */
> >          comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
> >          comp_param[i].done = true;
> > -        comp_param[i].quit = false;
> > -        qemu_mutex_init(&comp_param[i].mutex);
> > -        qemu_cond_init(&comp_param[i].cond);
> > -        /* TODO: let the further caller handle the error instead of abort() */
> > -        qemu_thread_create(compress_threads + i, "compress",
> > -                           do_data_compress, comp_param + i,
> > -                           QEMU_THREAD_JOINABLE, &error_abort);
> > +        if (qemu_thread_create(compress_threads + i, "compress",
> > +                               do_data_compress, comp_param + i,
> > +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +            error_reportf_err(local_err, "failed to create do_data_compress: ");
> > +            deflateEnd(&comp_param[i].stream);
> > +            g_free(comp_param[i].originbuf);
> > +            qemu_fclose(comp_param[i].file);
> > +            comp_param[i].file = NULL;
> > +            goto exit;
> > +        }
> >      }
> >      return 0;
> >  
> > @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> >          p->c = QIO_CHANNEL(sioc);
> >          qio_channel_set_delay(p->c, false);
> >          p->running = true;
> > -        /* TODO: let the further caller handle the error instead of abort() */
> > -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> > -                           QEMU_THREAD_JOINABLE, &error_abort);
> > +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> > +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +            migrate_set_error(migrate_get_current(), local_err);
> > +            error_reportf_err(local_err,
> > +                              "failed to create multifd_send_thread: ");
> > +            multifd_save_cleanup();
> > +            return;
> > +        }
> >  
> >          atomic_inc(&multifd_send_state->count);
> >      }
> > @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
> >      p->num_packets = 1;
> >  
> >      p->running = true;
> > -    /* TODO: let the further caller handle the error instead of abort() here */
> > -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> > -                       QEMU_THREAD_JOINABLE, &error_abort);
> > +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> > +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +        error_propagate_prepend(errp, local_err,
> > +                                "failed to create multifd_recv_thread: ");
> 
> This prepends only if @errp isn't null.  Fine from the caller's point of
> view.  But it also affects multifd_recv_thread():
> 
> > +        multifd_recv_terminate_threads(local_err);
> 
> That's at least unclean.
> 
> I think you should use error_prepend() and error_propagate() here.
> 
> > +        return false;
> > +    }
> >      atomic_inc(&multifd_recv_state->count);
> >      return atomic_read(&multifd_recv_state->count) ==
> >             migrate_multifd_channels();
> > @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
> >  static int compress_threads_load_setup(QEMUFile *f)
> >  {
> >      int i, thread_count;
> > +    Error *local_err = NULL;
> >  
> >      if (!migrate_use_compression()) {
> >          return 0;
> > @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
> >          qemu_cond_init(&decomp_param[i].cond);
> >          decomp_param[i].done = true;
> >          decomp_param[i].quit = false;
> > -        /* TODO: let the further caller handle the error instead of abort() */
> > -        qemu_thread_create(decompress_threads + i, "decompress",
> > -                           do_data_decompress, decomp_param + i,
> > -                           QEMU_THREAD_JOINABLE, &error_abort);
> > +        if (qemu_thread_create(decompress_threads + i, "decompress",
> > +                               do_data_decompress, decomp_param + i,
> > +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
> > +            error_reportf_err(local_err,
> > +                              "failed to create do_data_decompress: ");
> > +            goto exit;
> > +        }
> >      }
> >      return 0;
> >  exit:
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index d5b45843b6..310cecbf8f 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
> >      mis->have_listen_thread = true;
> >      /* Start up the listening thread and wait for it to signal ready */
> >      qemu_sem_init(&mis->listen_thread_sem, 0);
> > -    /* TODO: let the further caller handle the error instead of abort() here */
> > -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> > -                       postcopy_ram_listen_thread, NULL,
> > -                       QEMU_THREAD_DETACHED, &error_abort);
> > +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> > +                           postcopy_ram_listen_thread, NULL,
> > +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
> > +        error_reportf_err(local_err,
> > +                          "failed to create postcopy_ram_listen_thread: ");
> > +        qemu_sem_destroy(&mis->listen_thread_sem);
> > +        return -1;
> > +    }
> >      qemu_sem_wait(&mis->listen_thread_sem);
> >      qemu_sem_destroy(&mis->listen_thread_sem);
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
fei Feb. 15, 2019, 1:23 p.m. UTC | #5
在 2019/2/5 上午12:34, Dr. David Alan Gilbert 写道:
> * Markus Armbruster (armbru@redhat.com) wrote:
>> Dave, I tried to review the error paths, in particular resource cleanup,
>> but there's a lot going on, and I'm not feeling confident.  Please have
>> a close look.
>>
>> Fei Li <lifei1214@126.com> writes:
>>
>>> From: Fei Li <fli@suse.com>
>>>
>>> Update qemu_thread_create()'s callers by
>>> - setting an error on qemu_thread_create() failure for callers that
>>>    set an error on failure;
>>> - reporting the error and returning failure for callers that return
>>>    an error code on failure;
>>> - reporting the error and setting some state for callers that just
>>>    report errors and choose not to continue on.
>>>
>>> Besides, make compress_threads_save_cleanup() cope with partially
>>> initialized comp_param[i] to adapt to the new qemu_thread_create()
>>> failure case.
>>>
>>> Cc: Markus Armbruster <armbru@redhat.com>
>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Signed-off-by: Fei Li <fli@suse.com>
>>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> ---
>>>   migration/migration.c    | 35 +++++++++++++-------
>>>   migration/postcopy-ram.c | 16 ++++++---
>>>   migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
>>>   migration/savevm.c       | 12 ++++---
>>>   4 files changed, 89 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index 1da71211c8..0034ca1334 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
>>>               goto fail;
>>>           }
>>>   
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>> -                           colo_process_incoming_thread, mis,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>> +                               colo_process_incoming_thread, mis,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err, "failed to create "
>>> +                              "colo_process_incoming_thread: ");
>>> +            goto fail;
>>> +        }
>>>           mis->have_colo_incoming_thread = true;
>>>           qemu_coroutine_yield();
>>>   
>>> @@ -2349,6 +2352,7 @@ out:
>>>   static int open_return_path_on_source(MigrationState *ms,
>>>                                         bool create_thread)
>>>   {
>>> +    Error *local_err = NULL;
>>>   
>>>       ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>>>       if (!ms->rp_state.from_dst_file) {
>>> @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
>>>           return 0;
>>>       }
>>>   
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> -                       source_return_path_thread, ms,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> +                           source_return_path_thread, ms,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create source_return_path_thread: ");
>>> +        qemu_fclose(ms->rp_state.from_dst_file);
>>> +        ms->rp_state.from_dst_file = NULL;
>>> +        return -1;
>>> +     }
>>>   
>>>       trace_open_return_path_on_source_continue();
>>>   
>>> @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>>         if (multifd_save_setup() != 0) {
>>             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>                               MIGRATION_STATUS_FAILED);
>>>           migrate_fd_cleanup(s);
>>>           return;
>>>       }
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>> +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
>>> +        error_reportf_err(error_in, "failed to create migration_thread: ");
>>> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>>> +        migrate_fd_cleanup(s);
>> Is there anything to clean up for multifd_save_setup()?  Dave?
> I need to bounce that one to Juan; he knows the multifd stuff; cc'd

Hi Juan, Peter,

I think this may involve the sequential order between migration_thread() and
multifd_save_setup() we talked earlier :

https://www.mail-archive.com/qemu-devel@nongnu.org/msg571178.html

To be specific, the current code can not tell whether all channels have 
been sent via multifd_new_send_channel_async() before/during/after the 
migration_thread() runs.

What do you think?

Have a nice day, thanks :)
Fei
>
>>> +        return;
>>> +    }
>>>       s->migration_thread_running = true;
>>>   }
>>>   
>>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>>> index 221ea24919..0934a1403a 100644
>>> --- a/migration/postcopy-ram.c
>>> +++ b/migration/postcopy-ram.c
>>> @@ -1083,6 +1083,8 @@ retry:
>>>   
>>>   int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>   {
>>> +    Error *local_err = NULL;
>>> +
>>>       /* Open the fd for the kernel to give us userfaults */
>>>       mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>>>       if (mis->userfault_fd == -1) {
>>> @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>       }
>>>   
>>>       qemu_sem_init(&mis->fault_thread_sem, 0);
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> -                       postcopy_ram_fault_thread, mis,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> +                           postcopy_ram_fault_thread, mis,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create postcopy_ram_fault_thread: ");
>>> +        close(mis->userfault_event_fd);
>>> +        close(mis->userfault_fd);
>>> +        qemu_sem_destroy(&mis->fault_thread_sem);
>>> +        return -1;
>>> +    }
>>>       qemu_sem_wait(&mis->fault_thread_sem);
>>>       qemu_sem_destroy(&mis->fault_thread_sem);
>>>       mis->have_fault_thread = true;
>>         /* Mark so that we get notified of accesses to unwritten areas */
>>         if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>>             error_report("ram_block_enable_notify failed");
>>             return -1;
>>
>> Not this patch's problem, but here goes anyway: where are
>> mis->userfault_event_fd and mis->userfault_fd closed?
> As Fei replied, see postcopy_ram_incoming_cleanup; once the fault thread
> is running it does a bigger cleanup.
>
> Dave
>
>>         }
>>
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 700ea229e0..66b8b764f1 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
>>>   
>>>       thread_count = migrate_compress_threads();
>>>       for (i = 0; i < thread_count; i++) {
>>> +        qemu_mutex_lock(&comp_param[i].mutex);
>>> +        comp_param[i].quit = true;
>>> +        qemu_cond_signal(&comp_param[i].cond);
>>> +        qemu_mutex_unlock(&comp_param[i].mutex);
>>> +
>>> +        qemu_mutex_destroy(&comp_param[i].mutex);
>>> +        qemu_cond_destroy(&comp_param[i].cond);
>>> +
>>>           /*
>>>            * we use it as a indicator which shows if the thread is
>>>            * properly init'd or not
>>> @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
>>>           if (!comp_param[i].file) {
>>>               break;
>>>           }
>>> -
>>> -        qemu_mutex_lock(&comp_param[i].mutex);
>>> -        comp_param[i].quit = true;
>>> -        qemu_cond_signal(&comp_param[i].cond);
>>> -        qemu_mutex_unlock(&comp_param[i].mutex);
>>> -
>>>           qemu_thread_join(compress_threads + i);
>>> -        qemu_mutex_destroy(&comp_param[i].mutex);
>>> -        qemu_cond_destroy(&comp_param[i].cond);
>>>           deflateEnd(&comp_param[i].stream);
>>>           g_free(comp_param[i].originbuf);
>>>           qemu_fclose(comp_param[i].file);
>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>>   static int compress_threads_save_setup(void)
>>>   {
>>>       int i, thread_count;
>>> +    Error *local_err = NULL;
>>>   
>>>       if (!migrate_use_compression()) {
>>>           return 0;
>>> @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
>>>       qemu_cond_init(&comp_done_cond);
>>>       qemu_mutex_init(&comp_done_lock);
>>>       for (i = 0; i < thread_count; i++) {
>>> +        qemu_mutex_init(&comp_param[i].mutex);
>>> +        qemu_cond_init(&comp_param[i].cond);
>>> +        comp_param[i].quit = false;
>>>           comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
>>>           if (!comp_param[i].originbuf) {
>>>               goto exit;
>>> @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
>>>            */
>>>           comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
>>>           comp_param[i].done = true;
>>> -        comp_param[i].quit = false;
>>> -        qemu_mutex_init(&comp_param[i].mutex);
>>> -        qemu_cond_init(&comp_param[i].cond);
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(compress_threads + i, "compress",
>>> -                           do_data_compress, comp_param + i,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(compress_threads + i, "compress",
>>> +                               do_data_compress, comp_param + i,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err, "failed to create do_data_compress: ");
>>> +            deflateEnd(&comp_param[i].stream);
>>> +            g_free(comp_param[i].originbuf);
>>> +            qemu_fclose(comp_param[i].file);
>>> +            comp_param[i].file = NULL;
>>> +            goto exit;
>>> +        }
>>>       }
>>>       return 0;
>>>   
>>> @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>>>           p->c = QIO_CHANNEL(sioc);
>>>           qio_channel_set_delay(p->c, false);
>>>           p->running = true;
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            migrate_set_error(migrate_get_current(), local_err);
>>> +            error_reportf_err(local_err,
>>> +                              "failed to create multifd_send_thread: ");
>>> +            multifd_save_cleanup();
>>> +            return;
>>> +        }
>>>   
>>>           atomic_inc(&multifd_send_state->count);
>>>       }
>>> @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>>>       p->num_packets = 1;
>>>   
>>>       p->running = true;
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_propagate_prepend(errp, local_err,
>>> +                                "failed to create multifd_recv_thread: ");
>> This prepends only if @errp isn't null.  Fine from the caller's point of
>> view.  But it also affects multifd_recv_thread():
>>
>>> +        multifd_recv_terminate_threads(local_err);
>> That's at least unclean.
>>
>> I think you should use error_prepend() and error_propagate() here.
>>
>>> +        return false;
>>> +    }
>>>       atomic_inc(&multifd_recv_state->count);
>>>       return atomic_read(&multifd_recv_state->count) ==
>>>              migrate_multifd_channels();
>>> @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
>>>   static int compress_threads_load_setup(QEMUFile *f)
>>>   {
>>>       int i, thread_count;
>>> +    Error *local_err = NULL;
>>>   
>>>       if (!migrate_use_compression()) {
>>>           return 0;
>>> @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>>>           qemu_cond_init(&decomp_param[i].cond);
>>>           decomp_param[i].done = true;
>>>           decomp_param[i].quit = false;
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(decompress_threads + i, "decompress",
>>> -                           do_data_decompress, decomp_param + i,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(decompress_threads + i, "decompress",
>>> +                               do_data_decompress, decomp_param + i,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err,
>>> +                              "failed to create do_data_decompress: ");
>>> +            goto exit;
>>> +        }
>>>       }
>>>       return 0;
>>>   exit:
>>> diff --git a/migration/savevm.c b/migration/savevm.c
>>> index d5b45843b6..310cecbf8f 100644
>>> --- a/migration/savevm.c
>>> +++ b/migration/savevm.c
>>> @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>>>       mis->have_listen_thread = true;
>>>       /* Start up the listening thread and wait for it to signal ready */
>>>       qemu_sem_init(&mis->listen_thread_sem, 0);
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> -                       postcopy_ram_listen_thread, NULL,
>>> -                       QEMU_THREAD_DETACHED, &error_abort);
>>> +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> +                           postcopy_ram_listen_thread, NULL,
>>> +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create postcopy_ram_listen_thread: ");
>>> +        qemu_sem_destroy(&mis->listen_thread_sem);
>>> +        return -1;
>>> +    }
>>>       qemu_sem_wait(&mis->listen_thread_sem);
>>>       qemu_sem_destroy(&mis->listen_thread_sem);
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Fei Li March 24, 2019, 4:18 p.m. UTC | #6
在 2019/2/5 上午12:34, Dr. David Alan Gilbert 写道:
> * Markus Armbruster (armbru@redhat.com) wrote:
>> Dave, I tried to review the error paths, in particular resource cleanup,
>> but there's a lot going on, and I'm not feeling confident.  Please have
>> a close look.
>>
>> Fei Li <lifei1214@126.com> writes:
>>
>>> From: Fei Li <fli@suse.com>
>>>
>>> Update qemu_thread_create()'s callers by
>>> - setting an error on qemu_thread_create() failure for callers that
>>>    set an error on failure;
>>> - reporting the error and returning failure for callers that return
>>>    an error code on failure;
>>> - reporting the error and setting some state for callers that just
>>>    report errors and choose not to continue on.
>>>
>>> Besides, make compress_threads_save_cleanup() cope with partially
>>> initialized comp_param[i] to adapt to the new qemu_thread_create()
>>> failure case.
>>>
>>> Cc: Markus Armbruster <armbru@redhat.com>
>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Signed-off-by: Fei Li <fli@suse.com>
>>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> ---
>>>   migration/migration.c    | 35 +++++++++++++-------
>>>   migration/postcopy-ram.c | 16 ++++++---
>>>   migration/ram.c          | 70 ++++++++++++++++++++++++++--------------
>>>   migration/savevm.c       | 12 ++++---
>>>   4 files changed, 89 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index 1da71211c8..0034ca1334 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -447,10 +447,13 @@ static void process_incoming_migration_co(void *opaque)
>>>               goto fail;
>>>           }
>>>   
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>> -                           colo_process_incoming_thread, mis,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>> +                               colo_process_incoming_thread, mis,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err, "failed to create "
>>> +                              "colo_process_incoming_thread: ");
>>> +            goto fail;
>>> +        }
>>>           mis->have_colo_incoming_thread = true;
>>>           qemu_coroutine_yield();
>>>   
>>> @@ -2349,6 +2352,7 @@ out:
>>>   static int open_return_path_on_source(MigrationState *ms,
>>>                                         bool create_thread)
>>>   {
>>> +    Error *local_err = NULL;
>>>   
>>>       ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>>>       if (!ms->rp_state.from_dst_file) {
>>> @@ -2362,10 +2366,15 @@ static int open_return_path_on_source(MigrationState *ms,
>>>           return 0;
>>>       }
>>>   
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> -                       source_return_path_thread, ms,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> +                           source_return_path_thread, ms,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create source_return_path_thread: ");
>>> +        qemu_fclose(ms->rp_state.from_dst_file);
>>> +        ms->rp_state.from_dst_file = NULL;
>>> +        return -1;
>>> +     }
>>>   
>>>       trace_open_return_path_on_source_continue();
>>>   
>>> @@ -3201,9 +3210,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>>         if (multifd_save_setup() != 0) {
>>             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
>>                               MIGRATION_STATUS_FAILED);
>>>           migrate_fd_cleanup(s);
>>>           return;
>>>       }
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>> +                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
>>> +        error_reportf_err(error_in, "failed to create migration_thread: ");
>>> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>>> +        migrate_fd_cleanup(s);
>> Is there anything to clean up for multifd_save_setup()?  Dave?
> I need to bounce that one to Juan; he knows the multifd stuff; cc'd

Hi all,

I check the code again, the migrate_fd_cleanup() will call 
multifd_save_setup() to clean
the multifd-stuff. Thus the current code is just fine. :)

Sorry for the late reply...

BTW, I will send the updated v12 later.

Have a nice day, thanks
Fei
>
>>> +        return;
>>> +    }
>>>       s->migration_thread_running = true;
>>>   }
>>>   
>>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>>> index 221ea24919..0934a1403a 100644
>>> --- a/migration/postcopy-ram.c
>>> +++ b/migration/postcopy-ram.c
>>> @@ -1083,6 +1083,8 @@ retry:
>>>   
>>>   int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>   {
>>> +    Error *local_err = NULL;
>>> +
>>>       /* Open the fd for the kernel to give us userfaults */
>>>       mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>>>       if (mis->userfault_fd == -1) {
>>> @@ -1109,10 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>       }
>>>   
>>>       qemu_sem_init(&mis->fault_thread_sem, 0);
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> -                       postcopy_ram_fault_thread, mis,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> +                           postcopy_ram_fault_thread, mis,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create postcopy_ram_fault_thread: ");
>>> +        close(mis->userfault_event_fd);
>>> +        close(mis->userfault_fd);
>>> +        qemu_sem_destroy(&mis->fault_thread_sem);
>>> +        return -1;
>>> +    }
>>>       qemu_sem_wait(&mis->fault_thread_sem);
>>>       qemu_sem_destroy(&mis->fault_thread_sem);
>>>       mis->have_fault_thread = true;
>>         /* Mark so that we get notified of accesses to unwritten areas */
>>         if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>>             error_report("ram_block_enable_notify failed");
>>             return -1;
>>
>> Not this patch's problem, but here goes anyway: where are
>> mis->userfault_event_fd and mis->userfault_fd closed?
> As Fei replied, see postcopy_ram_incoming_cleanup; once the fault thread
> is running it does a bigger cleanup.
>
> Dave
>
>>         }
>>
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 700ea229e0..66b8b764f1 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -441,6 +441,14 @@ static void compress_threads_save_cleanup(void)
>>>   
>>>       thread_count = migrate_compress_threads();
>>>       for (i = 0; i < thread_count; i++) {
>>> +        qemu_mutex_lock(&comp_param[i].mutex);
>>> +        comp_param[i].quit = true;
>>> +        qemu_cond_signal(&comp_param[i].cond);
>>> +        qemu_mutex_unlock(&comp_param[i].mutex);
>>> +
>>> +        qemu_mutex_destroy(&comp_param[i].mutex);
>>> +        qemu_cond_destroy(&comp_param[i].cond);
>>> +
>>>           /*
>>>            * we use it as a indicator which shows if the thread is
>>>            * properly init'd or not
>>> @@ -448,15 +456,7 @@ static void compress_threads_save_cleanup(void)
>>>           if (!comp_param[i].file) {
>>>               break;
>>>           }
>>> -
>>> -        qemu_mutex_lock(&comp_param[i].mutex);
>>> -        comp_param[i].quit = true;
>>> -        qemu_cond_signal(&comp_param[i].cond);
>>> -        qemu_mutex_unlock(&comp_param[i].mutex);
>>> -
>>>           qemu_thread_join(compress_threads + i);
>>> -        qemu_mutex_destroy(&comp_param[i].mutex);
>>> -        qemu_cond_destroy(&comp_param[i].cond);
>>>           deflateEnd(&comp_param[i].stream);
>>>           g_free(comp_param[i].originbuf);
>>>           qemu_fclose(comp_param[i].file);
>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>>   static int compress_threads_save_setup(void)
>>>   {
>>>       int i, thread_count;
>>> +    Error *local_err = NULL;
>>>   
>>>       if (!migrate_use_compression()) {
>>>           return 0;
>>> @@ -483,6 +484,9 @@ static int compress_threads_save_setup(void)
>>>       qemu_cond_init(&comp_done_cond);
>>>       qemu_mutex_init(&comp_done_lock);
>>>       for (i = 0; i < thread_count; i++) {
>>> +        qemu_mutex_init(&comp_param[i].mutex);
>>> +        qemu_cond_init(&comp_param[i].cond);
>>> +        comp_param[i].quit = false;
>>>           comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
>>>           if (!comp_param[i].originbuf) {
>>>               goto exit;
>>> @@ -499,13 +503,16 @@ static int compress_threads_save_setup(void)
>>>            */
>>>           comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
>>>           comp_param[i].done = true;
>>> -        comp_param[i].quit = false;
>>> -        qemu_mutex_init(&comp_param[i].mutex);
>>> -        qemu_cond_init(&comp_param[i].cond);
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(compress_threads + i, "compress",
>>> -                           do_data_compress, comp_param + i,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(compress_threads + i, "compress",
>>> +                               do_data_compress, comp_param + i,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err, "failed to create do_data_compress: ");
>>> +            deflateEnd(&comp_param[i].stream);
>>> +            g_free(comp_param[i].originbuf);
>>> +            qemu_fclose(comp_param[i].file);
>>> +            comp_param[i].file = NULL;
>>> +            goto exit;
>>> +        }
>>>       }
>>>       return 0;
>>>   
>>> @@ -1076,9 +1083,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>>>           p->c = QIO_CHANNEL(sioc);
>>>           qio_channel_set_delay(p->c, false);
>>>           p->running = true;
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            migrate_set_error(migrate_get_current(), local_err);
>>> +            error_reportf_err(local_err,
>>> +                              "failed to create multifd_send_thread: ");
>>> +            multifd_save_cleanup();
>>> +            return;
>>> +        }
>>>   
>>>           atomic_inc(&multifd_send_state->count);
>>>       }
>>> @@ -1357,9 +1369,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>>>       p->num_packets = 1;
>>>   
>>>       p->running = true;
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>> -                       QEMU_THREAD_JOINABLE, &error_abort);
>>> +    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>> +                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +        error_propagate_prepend(errp, local_err,
>>> +                                "failed to create multifd_recv_thread: ");
>> This prepends only if @errp isn't null.  Fine from the caller's point of
>> view.  But it also affects multifd_recv_thread():
>>
>>> +        multifd_recv_terminate_threads(local_err);
>> That's at least unclean.
>>
>> I think you should use error_prepend() and error_propagate() here.
>>
>>> +        return false;
>>> +    }
>>>       atomic_inc(&multifd_recv_state->count);
>>>       return atomic_read(&multifd_recv_state->count) ==
>>>              migrate_multifd_channels();
>>> @@ -3631,6 +3647,7 @@ static void compress_threads_load_cleanup(void)
>>>   static int compress_threads_load_setup(QEMUFile *f)
>>>   {
>>>       int i, thread_count;
>>> +    Error *local_err = NULL;
>>>   
>>>       if (!migrate_use_compression()) {
>>>           return 0;
>>> @@ -3652,10 +3669,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>>>           qemu_cond_init(&decomp_param[i].cond);
>>>           decomp_param[i].done = true;
>>>           decomp_param[i].quit = false;
>>> -        /* TODO: let the further caller handle the error instead of abort() */
>>> -        qemu_thread_create(decompress_threads + i, "decompress",
>>> -                           do_data_decompress, decomp_param + i,
>>> -                           QEMU_THREAD_JOINABLE, &error_abort);
>>> +        if (qemu_thread_create(decompress_threads + i, "decompress",
>>> +                               do_data_decompress, decomp_param + i,
>>> +                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
>>> +            error_reportf_err(local_err,
>>> +                              "failed to create do_data_decompress: ");
>>> +            goto exit;
>>> +        }
>>>       }
>>>       return 0;
>>>   exit:
>>> diff --git a/migration/savevm.c b/migration/savevm.c
>>> index d5b45843b6..310cecbf8f 100644
>>> --- a/migration/savevm.c
>>> +++ b/migration/savevm.c
>>> @@ -1747,10 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>>>       mis->have_listen_thread = true;
>>>       /* Start up the listening thread and wait for it to signal ready */
>>>       qemu_sem_init(&mis->listen_thread_sem, 0);
>>> -    /* TODO: let the further caller handle the error instead of abort() here */
>>> -    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> -                       postcopy_ram_listen_thread, NULL,
>>> -                       QEMU_THREAD_DETACHED, &error_abort);
>>> +    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> +                           postcopy_ram_listen_thread, NULL,
>>> +                           QEMU_THREAD_DETACHED, &local_err) < 0) {
>>> +        error_reportf_err(local_err,
>>> +                          "failed to create postcopy_ram_listen_thread: ");
>>> +        qemu_sem_destroy(&mis->listen_thread_sem);
>>> +        return -1;
>>> +    }
>>>       qemu_sem_wait(&mis->listen_thread_sem);
>>>       qemu_sem_destroy(&mis->listen_thread_sem);
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index 1da71211c8..0034ca1334 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -447,10 +447,13 @@  static void process_incoming_migration_co(void *opaque)
             goto fail;
         }
 
-        /* TODO: let the further caller handle the error instead of abort() */
-        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
-                           colo_process_incoming_thread, mis,
-                           QEMU_THREAD_JOINABLE, &error_abort);
+        if (qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
+                               colo_process_incoming_thread, mis,
+                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
+            error_reportf_err(local_err, "failed to create "
+                              "colo_process_incoming_thread: ");
+            goto fail;
+        }
         mis->have_colo_incoming_thread = true;
         qemu_coroutine_yield();
 
@@ -2349,6 +2352,7 @@  out:
 static int open_return_path_on_source(MigrationState *ms,
                                       bool create_thread)
 {
+    Error *local_err = NULL;
 
     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
     if (!ms->rp_state.from_dst_file) {
@@ -2362,10 +2366,15 @@  static int open_return_path_on_source(MigrationState *ms,
         return 0;
     }
 
-    /* TODO: let the further caller handle the error instead of abort() here */
-    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
-                       source_return_path_thread, ms,
-                       QEMU_THREAD_JOINABLE, &error_abort);
+    if (qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+                           source_return_path_thread, ms,
+                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
+        error_reportf_err(local_err,
+                          "failed to create source_return_path_thread: ");
+        qemu_fclose(ms->rp_state.from_dst_file);
+        ms->rp_state.from_dst_file = NULL;
+        return -1;
+     }
 
     trace_open_return_path_on_source_continue();
 
@@ -3201,9 +3210,13 @@  void migrate_fd_connect(MigrationState *s, Error *error_in)
         migrate_fd_cleanup(s);
         return;
     }
-    /* TODO: let the further caller handle the error instead of abort() here */
-    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
-                       QEMU_THREAD_JOINABLE, &error_abort);
+    if (qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
+                           QEMU_THREAD_JOINABLE, &error_in) < 0) {
+        error_reportf_err(error_in, "failed to create migration_thread: ");
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+        migrate_fd_cleanup(s);
+        return;
+    }
     s->migration_thread_running = true;
 }
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 221ea24919..0934a1403a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1083,6 +1083,8 @@  retry:
 
 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 {
+    Error *local_err = NULL;
+
     /* Open the fd for the kernel to give us userfaults */
     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
     if (mis->userfault_fd == -1) {
@@ -1109,10 +1111,16 @@  int postcopy_ram_enable_notify(MigrationIncomingState *mis)
     }
 
     qemu_sem_init(&mis->fault_thread_sem, 0);
-    /* TODO: let the further caller handle the error instead of abort() here */
-    qemu_thread_create(&mis->fault_thread, "postcopy/fault",
-                       postcopy_ram_fault_thread, mis,
-                       QEMU_THREAD_JOINABLE, &error_abort);
+    if (qemu_thread_create(&mis->fault_thread, "postcopy/fault",
+                           postcopy_ram_fault_thread, mis,
+                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
+        error_reportf_err(local_err,
+                          "failed to create postcopy_ram_fault_thread: ");
+        close(mis->userfault_event_fd);
+        close(mis->userfault_fd);
+        qemu_sem_destroy(&mis->fault_thread_sem);
+        return -1;
+    }
     qemu_sem_wait(&mis->fault_thread_sem);
     qemu_sem_destroy(&mis->fault_thread_sem);
     mis->have_fault_thread = true;
diff --git a/migration/ram.c b/migration/ram.c
index 700ea229e0..66b8b764f1 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -441,6 +441,14 @@  static void compress_threads_save_cleanup(void)
 
     thread_count = migrate_compress_threads();
     for (i = 0; i < thread_count; i++) {
+        qemu_mutex_lock(&comp_param[i].mutex);
+        comp_param[i].quit = true;
+        qemu_cond_signal(&comp_param[i].cond);
+        qemu_mutex_unlock(&comp_param[i].mutex);
+
+        qemu_mutex_destroy(&comp_param[i].mutex);
+        qemu_cond_destroy(&comp_param[i].cond);
+
         /*
          * we use it as a indicator which shows if the thread is
          * properly init'd or not
@@ -448,15 +456,7 @@  static void compress_threads_save_cleanup(void)
         if (!comp_param[i].file) {
             break;
         }
-
-        qemu_mutex_lock(&comp_param[i].mutex);
-        comp_param[i].quit = true;
-        qemu_cond_signal(&comp_param[i].cond);
-        qemu_mutex_unlock(&comp_param[i].mutex);
-
         qemu_thread_join(compress_threads + i);
-        qemu_mutex_destroy(&comp_param[i].mutex);
-        qemu_cond_destroy(&comp_param[i].cond);
         deflateEnd(&comp_param[i].stream);
         g_free(comp_param[i].originbuf);
         qemu_fclose(comp_param[i].file);
@@ -473,6 +473,7 @@  static void compress_threads_save_cleanup(void)
 static int compress_threads_save_setup(void)
 {
     int i, thread_count;
+    Error *local_err = NULL;
 
     if (!migrate_use_compression()) {
         return 0;
@@ -483,6 +484,9 @@  static int compress_threads_save_setup(void)
     qemu_cond_init(&comp_done_cond);
     qemu_mutex_init(&comp_done_lock);
     for (i = 0; i < thread_count; i++) {
+        qemu_mutex_init(&comp_param[i].mutex);
+        qemu_cond_init(&comp_param[i].cond);
+        comp_param[i].quit = false;
         comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
         if (!comp_param[i].originbuf) {
             goto exit;
@@ -499,13 +503,16 @@  static int compress_threads_save_setup(void)
          */
         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
         comp_param[i].done = true;
-        comp_param[i].quit = false;
-        qemu_mutex_init(&comp_param[i].mutex);
-        qemu_cond_init(&comp_param[i].cond);
-        /* TODO: let the further caller handle the error instead of abort() */
-        qemu_thread_create(compress_threads + i, "compress",
-                           do_data_compress, comp_param + i,
-                           QEMU_THREAD_JOINABLE, &error_abort);
+        if (qemu_thread_create(compress_threads + i, "compress",
+                               do_data_compress, comp_param + i,
+                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
+            error_reportf_err(local_err, "failed to create do_data_compress: ");
+            deflateEnd(&comp_param[i].stream);
+            g_free(comp_param[i].originbuf);
+            qemu_fclose(comp_param[i].file);
+            comp_param[i].file = NULL;
+            goto exit;
+        }
     }
     return 0;
 
@@ -1076,9 +1083,14 @@  static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
         p->c = QIO_CHANNEL(sioc);
         qio_channel_set_delay(p->c, false);
         p->running = true;
-        /* TODO: let the further caller handle the error instead of abort() */
-        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                           QEMU_THREAD_JOINABLE, &error_abort);
+        if (qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
+                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
+            migrate_set_error(migrate_get_current(), local_err);
+            error_reportf_err(local_err,
+                              "failed to create multifd_send_thread: ");
+            multifd_save_cleanup();
+            return;
+        }
 
         atomic_inc(&multifd_send_state->count);
     }
@@ -1357,9 +1369,13 @@  bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
     p->num_packets = 1;
 
     p->running = true;
-    /* TODO: let the further caller handle the error instead of abort() here */
-    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
-                       QEMU_THREAD_JOINABLE, &error_abort);
+    if (qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
+                           QEMU_THREAD_JOINABLE, &local_err) < 0) {
+        error_propagate_prepend(errp, local_err,
+                                "failed to create multifd_recv_thread: ");
+        multifd_recv_terminate_threads(local_err);
+        return false;
+    }
     atomic_inc(&multifd_recv_state->count);
     return atomic_read(&multifd_recv_state->count) ==
            migrate_multifd_channels();
@@ -3631,6 +3647,7 @@  static void compress_threads_load_cleanup(void)
 static int compress_threads_load_setup(QEMUFile *f)
 {
     int i, thread_count;
+    Error *local_err = NULL;
 
     if (!migrate_use_compression()) {
         return 0;
@@ -3652,10 +3669,13 @@  static int compress_threads_load_setup(QEMUFile *f)
         qemu_cond_init(&decomp_param[i].cond);
         decomp_param[i].done = true;
         decomp_param[i].quit = false;
-        /* TODO: let the further caller handle the error instead of abort() */
-        qemu_thread_create(decompress_threads + i, "decompress",
-                           do_data_decompress, decomp_param + i,
-                           QEMU_THREAD_JOINABLE, &error_abort);
+        if (qemu_thread_create(decompress_threads + i, "decompress",
+                               do_data_decompress, decomp_param + i,
+                               QEMU_THREAD_JOINABLE, &local_err) < 0) {
+            error_reportf_err(local_err,
+                              "failed to create do_data_decompress: ");
+            goto exit;
+        }
     }
     return 0;
 exit:
diff --git a/migration/savevm.c b/migration/savevm.c
index d5b45843b6..310cecbf8f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1747,10 +1747,14 @@  static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
     mis->have_listen_thread = true;
     /* Start up the listening thread and wait for it to signal ready */
     qemu_sem_init(&mis->listen_thread_sem, 0);
-    /* TODO: let the further caller handle the error instead of abort() here */
-    qemu_thread_create(&mis->listen_thread, "postcopy/listen",
-                       postcopy_ram_listen_thread, NULL,
-                       QEMU_THREAD_DETACHED, &error_abort);
+    if (qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+                           postcopy_ram_listen_thread, NULL,
+                           QEMU_THREAD_DETACHED, &local_err) < 0) {
+        error_reportf_err(local_err,
+                          "failed to create postcopy_ram_listen_thread: ");
+        qemu_sem_destroy(&mis->listen_thread_sem);
+        return -1;
+    }
     qemu_sem_wait(&mis->listen_thread_sem);
     qemu_sem_destroy(&mis->listen_thread_sem);