diff mbox series

migration: do not exit on incoming failure

Message ID 20240417221329.248803-1-vsementsov@yandex-team.ru
State New
Headers show
Series migration: do not exit on incoming failure | expand

Commit Message

Vladimir Sementsov-Ogievskiy April 17, 2024, 10:13 p.m. UTC
We do set MIGRATION_FAILED state, but don't give a chance to
orchestrator to query migration state and get the error.

Let's report an error through QAPI like we do on outgoing migration.

migration-test is updated correspondingly.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---

Doubt: is exiting on failure a contract? Will this commit break
something in Libvirt? Finally, could we just change the logic, or I need
and additional migration-parameter for new behavior?

 migration/migration.c           | 22 +++++++---------------
 tests/qtest/migration-helpers.c | 13 ++++++++++---
 tests/qtest/migration-helpers.h |  3 ++-
 tests/qtest/migration-test.c    | 14 +++++++-------
 4 files changed, 26 insertions(+), 26 deletions(-)

Comments

Fabiano Rosas April 18, 2024, 2:27 p.m. UTC | #1
Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:

> We do set MIGRATION_FAILED state, but don't give a chance to
> orchestrator to query migration state and get the error.
>
> Let's report an error through QAPI like we do on outgoing migration.
>
> migration-test is updated correspondingly.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>
> Doubt: is exiting on failure a contract? Will this commit break
> something in Libvirt? Finally, could we just change the logic, or I need
> and additional migration-parameter for new behavior?

It seems we depend on the non-zero value:

  4aead69241 ("migration: reflect incoming failure to shell")
  Author: Eric Blake <eblake@redhat.com>
  Date:   Tue Apr 16 15:50:41 2013 -0600
  
      migration: reflect incoming failure to shell
      
      Management apps like libvirt don't know to pay attention to
      stderr unless there is a non-zero exit status.
      
      * migration.c (process_incoming_migration_co): Exit with non-zero
      status on failure.
      
      Signed-off-by: Eric Blake <eblake@redhat.com>
      Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com
      Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

One idea would be to plumb the s->error somehow through
migration_shutdown() and allow qemu_cleanup() to change the status
value.

>  migration/migration.c           | 22 +++++++---------------
>  tests/qtest/migration-helpers.c | 13 ++++++++++---
>  tests/qtest/migration-helpers.h |  3 ++-
>  tests/qtest/migration-test.c    | 14 +++++++-------
>  4 files changed, 26 insertions(+), 26 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 86bf76e925..3c203e767d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyState ps;
>      int ret;
> +    Error *local_err = NULL;
>  
>      assert(mis->from_src_file);
>  
>      if (compress_threads_load_setup(mis->from_src_file)) {
> -        error_report("Failed to setup decompress threads");
> +        error_setg(&local_err, "Failed to setup decompress threads");
>          goto fail;
>      }
>  
> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>      }
>  
>      if (ret < 0) {
> -        MigrationState *s = migrate_get_current();
> -
> -        if (migrate_has_error(s)) {
> -            WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
> -                error_report_err(s->error);
> -            }
> -        }
> -        error_report("load of migration failed: %s", strerror(-ret));
> +        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>          goto fail;
>      }
>  
>      if (colo_incoming_co() < 0) {
> +        error_setg(&local_err, "colo incoming failed");
>          goto fail;
>      }
>  
>      migration_bh_schedule(process_incoming_migration_bh, mis);
>      return;
>  fail:
> +    migrate_set_error(migrate_get_current(), local_err);
> +    error_report_err(local_err);

This will report an different error from the QMP one if s->error happens
to be already set. Either use s->error here or prepend the "load of
migration..." error to the s->error above.

>      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_FAILED);
> -    qemu_fclose(mis->from_src_file);
> -
> -    multifd_recv_cleanup();
> -    compress_threads_load_cleanup();
> -
> -    exit(EXIT_FAILURE);
> +    migration_incoming_state_destroy();
>  }
>  
>  /**
> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
> index e451dbdbed..91c13bd566 100644
> --- a/tests/qtest/migration-helpers.c
> +++ b/tests/qtest/migration-helpers.c
> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>      wait_for_migration_status(who, "completed", NULL);
>  }
>  
> -void wait_for_migration_fail(QTestState *from, bool allow_active)
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> +                             bool is_incoming)
>  {
>      g_test_timer_start();
>      QDict *rsp_return;
> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>      /* Is the machine currently running? */
>      rsp_return = qtest_qmp_assert_success_ref(from,
>                                                "{ 'execute': 'query-status' }");
> -    g_assert(qdict_haskey(rsp_return, "running"));
> -    g_assert(qdict_get_bool(rsp_return, "running"));
> +    if (is_incoming) {
> +        if (qdict_haskey(rsp_return, "running")) {
> +            g_assert(!qdict_get_bool(rsp_return, "running"));
> +        }
> +    } else {
> +        g_assert(qdict_haskey(rsp_return, "running"));
> +        g_assert(qdict_get_bool(rsp_return, "running"));
> +    }
>      qobject_unref(rsp_return);
>  }
>  
> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
> index 3bf7ded1b9..7bd07059ae 100644
> --- a/tests/qtest/migration-helpers.h
> +++ b/tests/qtest/migration-helpers.h
> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>  
>  void wait_for_migration_complete(QTestState *who);
>  
> -void wait_for_migration_fail(QTestState *from, bool allow_active);
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> +                             bool is_incoming);
>  
>  char *find_common_machine_version(const char *mtype, const char *var1,
>                                    const char *var2);
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 1d2cee87ea..e00b755f05 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>          return;
>      }
>      migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
> -    wait_for_migration_fail(from, false);
> +    wait_for_migration_fail(from, false, false);
>      test_migrate_end(from, to, false);
>  }
>  
> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>  
>      if (args->result != MIG_TEST_SUCCEED) {
>          bool allow_active = args->result == MIG_TEST_FAIL;
> -        wait_for_migration_fail(from, allow_active);
> +        wait_for_migration_fail(from, allow_active, false);
>  
>          if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
> -            qtest_set_expected_status(to, EXIT_FAILURE);
> +            wait_for_migration_fail(to, true, true);
>          }
>      } else {
>          if (args->live) {
> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>      migrate_qmp(from, uri, "{}");
>  
>      if (should_fail) {
> -        qtest_set_expected_status(to, EXIT_FAILURE);
> -        wait_for_migration_fail(from, true);
> +        wait_for_migration_fail(to, true, true);
> +        wait_for_migration_fail(from, true, false);
>      } else {
>          wait_for_migration_complete(from);
>      }
> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>      migrate_cancel(from);
>  
>      /* Make sure QEMU process "to" exited */
> -    qtest_set_expected_status(to, EXIT_FAILURE);
> -    qtest_wait_qemu(to);
> +    wait_for_migration_fail(to, true, true);
> +    qtest_quit(to);
>  
>      args = (MigrateStart){
>          .only_target = true,
Daniel P. Berrangé April 18, 2024, 2:37 p.m. UTC | #2
On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> We do set MIGRATION_FAILED state, but don't give a chance to
> orchestrator to query migration state and get the error.
> 
> Let's report an error through QAPI like we do on outgoing migration.
> 
> migration-test is updated correspondingly.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
> 
> Doubt: is exiting on failure a contract? Will this commit break
> something in Libvirt? Finally, could we just change the logic, or I need
> and additional migration-parameter for new behavior?

There's a decent risk that this could break apps, whether
libvirt or something else, especially if the app is just
launching QEMU with '-incoming URI', rather than using
'-incoming defer' and then explicitly using QMP to start the
incoming migration.

I'd say that with '-incoming defer' we should *not* exit on
migration error, because that arg implies the app explicitly
wants to be using QMP to control migration.

With the legacy '-incoming URI' it is probably best to keep
exit on error, as that's comparatively more likely to be used
in adhoc scenarios where the app/user is ignoring QMP on the
dst side.

None the less, I think we need to check how libvirt behaves
with this patch to be sure of no surprises.

> 
>  migration/migration.c           | 22 +++++++---------------
>  tests/qtest/migration-helpers.c | 13 ++++++++++---
>  tests/qtest/migration-helpers.h |  3 ++-
>  tests/qtest/migration-test.c    | 14 +++++++-------
>  4 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 86bf76e925..3c203e767d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyState ps;
>      int ret;
> +    Error *local_err = NULL;
>  
>      assert(mis->from_src_file);
>  
>      if (compress_threads_load_setup(mis->from_src_file)) {
> -        error_report("Failed to setup decompress threads");
> +        error_setg(&local_err, "Failed to setup decompress threads");
>          goto fail;
>      }
>  
> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>      }
>  
>      if (ret < 0) {
> -        MigrationState *s = migrate_get_current();
> -
> -        if (migrate_has_error(s)) {
> -            WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
> -                error_report_err(s->error);
> -            }
> -        }
> -        error_report("load of migration failed: %s", strerror(-ret));
> +        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>          goto fail;
>      }
>  
>      if (colo_incoming_co() < 0) {
> +        error_setg(&local_err, "colo incoming failed");
>          goto fail;
>      }
>  
>      migration_bh_schedule(process_incoming_migration_bh, mis);
>      return;
>  fail:
> +    migrate_set_error(migrate_get_current(), local_err);
> +    error_report_err(local_err);
>      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>                        MIGRATION_STATUS_FAILED);
> -    qemu_fclose(mis->from_src_file);
> -
> -    multifd_recv_cleanup();
> -    compress_threads_load_cleanup();
> -
> -    exit(EXIT_FAILURE);
> +    migration_incoming_state_destroy();
>  }
>  
>  /**
> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
> index e451dbdbed..91c13bd566 100644
> --- a/tests/qtest/migration-helpers.c
> +++ b/tests/qtest/migration-helpers.c
> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>      wait_for_migration_status(who, "completed", NULL);
>  }
>  
> -void wait_for_migration_fail(QTestState *from, bool allow_active)
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> +                             bool is_incoming)
>  {
>      g_test_timer_start();
>      QDict *rsp_return;
> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>      /* Is the machine currently running? */
>      rsp_return = qtest_qmp_assert_success_ref(from,
>                                                "{ 'execute': 'query-status' }");
> -    g_assert(qdict_haskey(rsp_return, "running"));
> -    g_assert(qdict_get_bool(rsp_return, "running"));
> +    if (is_incoming) {
> +        if (qdict_haskey(rsp_return, "running")) {
> +            g_assert(!qdict_get_bool(rsp_return, "running"));
> +        }
> +    } else {
> +        g_assert(qdict_haskey(rsp_return, "running"));
> +        g_assert(qdict_get_bool(rsp_return, "running"));
> +    }
>      qobject_unref(rsp_return);
>  }
>  
> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
> index 3bf7ded1b9..7bd07059ae 100644
> --- a/tests/qtest/migration-helpers.h
> +++ b/tests/qtest/migration-helpers.h
> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>  
>  void wait_for_migration_complete(QTestState *who);
>  
> -void wait_for_migration_fail(QTestState *from, bool allow_active);
> +void wait_for_migration_fail(QTestState *from, bool allow_active,
> +                             bool is_incoming);
>  
>  char *find_common_machine_version(const char *mtype, const char *var1,
>                                    const char *var2);
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 1d2cee87ea..e00b755f05 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>          return;
>      }
>      migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
> -    wait_for_migration_fail(from, false);
> +    wait_for_migration_fail(from, false, false);
>      test_migrate_end(from, to, false);
>  }
>  
> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>  
>      if (args->result != MIG_TEST_SUCCEED) {
>          bool allow_active = args->result == MIG_TEST_FAIL;
> -        wait_for_migration_fail(from, allow_active);
> +        wait_for_migration_fail(from, allow_active, false);
>  
>          if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
> -            qtest_set_expected_status(to, EXIT_FAILURE);
> +            wait_for_migration_fail(to, true, true);
>          }
>      } else {
>          if (args->live) {
> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>      migrate_qmp(from, uri, "{}");
>  
>      if (should_fail) {
> -        qtest_set_expected_status(to, EXIT_FAILURE);
> -        wait_for_migration_fail(from, true);
> +        wait_for_migration_fail(to, true, true);
> +        wait_for_migration_fail(from, true, false);
>      } else {
>          wait_for_migration_complete(from);
>      }
> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>      migrate_cancel(from);
>  
>      /* Make sure QEMU process "to" exited */
> -    qtest_set_expected_status(to, EXIT_FAILURE);
> -    qtest_wait_qemu(to);
> +    wait_for_migration_fail(to, true, true);
> +    qtest_quit(to);
>  
>      args = (MigrateStart){
>          .only_target = true,
> -- 
> 2.34.1
> 
> 

With regards,
Daniel
Vladimir Sementsov-Ogievskiy April 18, 2024, 3:38 p.m. UTC | #3
On 18.04.24 17:27, Fabiano Rosas wrote:
> Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> writes:
> 
>> We do set MIGRATION_FAILED state, but don't give a chance to
>> orchestrator to query migration state and get the error.
>>
>> Let's report an error through QAPI like we do on outgoing migration.
>>
>> migration-test is updated correspondingly.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>
>> Doubt: is exiting on failure a contract? Will this commit break
>> something in Libvirt? Finally, could we just change the logic, or I need
>> and additional migration-parameter for new behavior?
> 
> It seems we depend on the non-zero value:
> 
>    4aead69241 ("migration: reflect incoming failure to shell")
>    Author: Eric Blake <eblake@redhat.com>
>    Date:   Tue Apr 16 15:50:41 2013 -0600
>    
>        migration: reflect incoming failure to shell
>        
>        Management apps like libvirt don't know to pay attention to
>        stderr unless there is a non-zero exit status.
>        
>        * migration.c (process_incoming_migration_co): Exit with non-zero
>        status on failure.
>        
>        Signed-off-by: Eric Blake <eblake@redhat.com>
>        Message-id: 1366149041-626-1-git-send-email-eblake@redhat.com
>        Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> 
> One idea would be to plumb the s->error somehow through
> migration_shutdown() and allow qemu_cleanup() to change the status
> value.

The idea is not to exit at all, and wait for 'quit' QMP command to exit. But I agree with Daniel that new behavior is good only for -incoming defer.

> 
>>   migration/migration.c           | 22 +++++++---------------
>>   tests/qtest/migration-helpers.c | 13 ++++++++++---
>>   tests/qtest/migration-helpers.h |  3 ++-
>>   tests/qtest/migration-test.c    | 14 +++++++-------
>>   4 files changed, 26 insertions(+), 26 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 86bf76e925..3c203e767d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       PostcopyState ps;
>>       int ret;
>> +    Error *local_err = NULL;
>>   
>>       assert(mis->from_src_file);
>>   
>>       if (compress_threads_load_setup(mis->from_src_file)) {
>> -        error_report("Failed to setup decompress threads");
>> +        error_setg(&local_err, "Failed to setup decompress threads");
>>           goto fail;
>>       }
>>   
>> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>>       }
>>   
>>       if (ret < 0) {
>> -        MigrationState *s = migrate_get_current();
>> -
>> -        if (migrate_has_error(s)) {
>> -            WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>> -                error_report_err(s->error);
>> -            }
>> -        }
>> -        error_report("load of migration failed: %s", strerror(-ret));
>> +        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>>           goto fail;
>>       }
>>   
>>       if (colo_incoming_co() < 0) {
>> +        error_setg(&local_err, "colo incoming failed");
>>           goto fail;
>>       }
>>   
>>       migration_bh_schedule(process_incoming_migration_bh, mis);
>>       return;
>>   fail:
>> +    migrate_set_error(migrate_get_current(), local_err);
>> +    error_report_err(local_err);
> 
> This will report an different error from the QMP one if s->error happens
> to be already set. Either use s->error here or prepend the "load of
> migration..." error to the s->error above.

I had another idea: first, modify migrate_set_error so that it always prints the error. This way we should not care to print it here.

> 
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>                         MIGRATION_STATUS_FAILED);
>> -    qemu_fclose(mis->from_src_file);
>> -
>> -    multifd_recv_cleanup();
>> -    compress_threads_load_cleanup();
>> -
>> -    exit(EXIT_FAILURE);
>> +    migration_incoming_state_destroy();
>>   }
>>   
>>   /**
>> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
>> index e451dbdbed..91c13bd566 100644
>> --- a/tests/qtest/migration-helpers.c
>> +++ b/tests/qtest/migration-helpers.c
>> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>>       wait_for_migration_status(who, "completed", NULL);
>>   }
>>   
>> -void wait_for_migration_fail(QTestState *from, bool allow_active)
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> +                             bool is_incoming)
>>   {
>>       g_test_timer_start();
>>       QDict *rsp_return;
>> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>>       /* Is the machine currently running? */
>>       rsp_return = qtest_qmp_assert_success_ref(from,
>>                                                 "{ 'execute': 'query-status' }");
>> -    g_assert(qdict_haskey(rsp_return, "running"));
>> -    g_assert(qdict_get_bool(rsp_return, "running"));
>> +    if (is_incoming) {
>> +        if (qdict_haskey(rsp_return, "running")) {
>> +            g_assert(!qdict_get_bool(rsp_return, "running"));
>> +        }
>> +    } else {
>> +        g_assert(qdict_haskey(rsp_return, "running"));
>> +        g_assert(qdict_get_bool(rsp_return, "running"));
>> +    }
>>       qobject_unref(rsp_return);
>>   }
>>   
>> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
>> index 3bf7ded1b9..7bd07059ae 100644
>> --- a/tests/qtest/migration-helpers.h
>> +++ b/tests/qtest/migration-helpers.h
>> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>>   
>>   void wait_for_migration_complete(QTestState *who);
>>   
>> -void wait_for_migration_fail(QTestState *from, bool allow_active);
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> +                             bool is_incoming);
>>   
>>   char *find_common_machine_version(const char *mtype, const char *var1,
>>                                     const char *var2);
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 1d2cee87ea..e00b755f05 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>>           return;
>>       }
>>       migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
>> -    wait_for_migration_fail(from, false);
>> +    wait_for_migration_fail(from, false, false);
>>       test_migrate_end(from, to, false);
>>   }
>>   
>> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>>   
>>       if (args->result != MIG_TEST_SUCCEED) {
>>           bool allow_active = args->result == MIG_TEST_FAIL;
>> -        wait_for_migration_fail(from, allow_active);
>> +        wait_for_migration_fail(from, allow_active, false);
>>   
>>           if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
>> -            qtest_set_expected_status(to, EXIT_FAILURE);
>> +            wait_for_migration_fail(to, true, true);
>>           }
>>       } else {
>>           if (args->live) {
>> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>>       migrate_qmp(from, uri, "{}");
>>   
>>       if (should_fail) {
>> -        qtest_set_expected_status(to, EXIT_FAILURE);
>> -        wait_for_migration_fail(from, true);
>> +        wait_for_migration_fail(to, true, true);
>> +        wait_for_migration_fail(from, true, false);
>>       } else {
>>           wait_for_migration_complete(from);
>>       }
>> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>>       migrate_cancel(from);
>>   
>>       /* Make sure QEMU process "to" exited */
>> -    qtest_set_expected_status(to, EXIT_FAILURE);
>> -    qtest_wait_qemu(to);
>> +    wait_for_migration_fail(to, true, true);
>> +    qtest_quit(to);
>>   
>>       args = (MigrateStart){
>>           .only_target = true,
Vladimir Sementsov-Ogievskiy April 18, 2024, 3:40 p.m. UTC | #4
On 18.04.24 17:37, Daniel P. Berrangé wrote:
> On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> We do set MIGRATION_FAILED state, but don't give a chance to
>> orchestrator to query migration state and get the error.
>>
>> Let's report an error through QAPI like we do on outgoing migration.
>>
>> migration-test is updated correspondingly.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>
>> Doubt: is exiting on failure a contract? Will this commit break
>> something in Libvirt? Finally, could we just change the logic, or I need
>> and additional migration-parameter for new behavior?
> 
> There's a decent risk that this could break apps, whether
> libvirt or something else, especially if the app is just
> launching QEMU with '-incoming URI', rather than using
> '-incoming defer' and then explicitly using QMP to start the
> incoming migration.
> 
> I'd say that with '-incoming defer' we should *not* exit on
> migration error, because that arg implies the app explicitly
> wants to be using QMP to control migration.
> 
> With the legacy '-incoming URI' it is probably best to keep
> exit on error, as that's comparatively more likely to be used
> in adhoc scenarios where the app/user is ignoring QMP on the
> dst side.
> 
> None the less, I think we need to check how libvirt behaves
> with this patch to be sure of no surprises.
> 

Sounds reasonable, thanks! I'll rework it to behave the new way only with "-incoming defer", and check how libvirt behave with it.

>>
>>   migration/migration.c           | 22 +++++++---------------
>>   tests/qtest/migration-helpers.c | 13 ++++++++++---
>>   tests/qtest/migration-helpers.h |  3 ++-
>>   tests/qtest/migration-test.c    | 14 +++++++-------
>>   4 files changed, 26 insertions(+), 26 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 86bf76e925..3c203e767d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -738,11 +738,12 @@ process_incoming_migration_co(void *opaque)
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       PostcopyState ps;
>>       int ret;
>> +    Error *local_err = NULL;
>>   
>>       assert(mis->from_src_file);
>>   
>>       if (compress_threads_load_setup(mis->from_src_file)) {
>> -        error_report("Failed to setup decompress threads");
>> +        error_setg(&local_err, "Failed to setup decompress threads");
>>           goto fail;
>>       }
>>   
>> @@ -779,32 +780,23 @@ process_incoming_migration_co(void *opaque)
>>       }
>>   
>>       if (ret < 0) {
>> -        MigrationState *s = migrate_get_current();
>> -
>> -        if (migrate_has_error(s)) {
>> -            WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
>> -                error_report_err(s->error);
>> -            }
>> -        }
>> -        error_report("load of migration failed: %s", strerror(-ret));
>> +        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
>>           goto fail;
>>       }
>>   
>>       if (colo_incoming_co() < 0) {
>> +        error_setg(&local_err, "colo incoming failed");
>>           goto fail;
>>       }
>>   
>>       migration_bh_schedule(process_incoming_migration_bh, mis);
>>       return;
>>   fail:
>> +    migrate_set_error(migrate_get_current(), local_err);
>> +    error_report_err(local_err);
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>                         MIGRATION_STATUS_FAILED);
>> -    qemu_fclose(mis->from_src_file);
>> -
>> -    multifd_recv_cleanup();
>> -    compress_threads_load_cleanup();
>> -
>> -    exit(EXIT_FAILURE);
>> +    migration_incoming_state_destroy();
>>   }
>>   
>>   /**
>> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
>> index e451dbdbed..91c13bd566 100644
>> --- a/tests/qtest/migration-helpers.c
>> +++ b/tests/qtest/migration-helpers.c
>> @@ -211,7 +211,8 @@ void wait_for_migration_complete(QTestState *who)
>>       wait_for_migration_status(who, "completed", NULL);
>>   }
>>   
>> -void wait_for_migration_fail(QTestState *from, bool allow_active)
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> +                             bool is_incoming)
>>   {
>>       g_test_timer_start();
>>       QDict *rsp_return;
>> @@ -236,8 +237,14 @@ void wait_for_migration_fail(QTestState *from, bool allow_active)
>>       /* Is the machine currently running? */
>>       rsp_return = qtest_qmp_assert_success_ref(from,
>>                                                 "{ 'execute': 'query-status' }");
>> -    g_assert(qdict_haskey(rsp_return, "running"));
>> -    g_assert(qdict_get_bool(rsp_return, "running"));
>> +    if (is_incoming) {
>> +        if (qdict_haskey(rsp_return, "running")) {
>> +            g_assert(!qdict_get_bool(rsp_return, "running"));
>> +        }
>> +    } else {
>> +        g_assert(qdict_haskey(rsp_return, "running"));
>> +        g_assert(qdict_get_bool(rsp_return, "running"));
>> +    }
>>       qobject_unref(rsp_return);
>>   }
>>   
>> diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
>> index 3bf7ded1b9..7bd07059ae 100644
>> --- a/tests/qtest/migration-helpers.h
>> +++ b/tests/qtest/migration-helpers.h
>> @@ -46,7 +46,8 @@ void wait_for_migration_status(QTestState *who,
>>   
>>   void wait_for_migration_complete(QTestState *who);
>>   
>> -void wait_for_migration_fail(QTestState *from, bool allow_active);
>> +void wait_for_migration_fail(QTestState *from, bool allow_active,
>> +                             bool is_incoming);
>>   
>>   char *find_common_machine_version(const char *mtype, const char *var1,
>>                                     const char *var2);
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 1d2cee87ea..e00b755f05 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -1670,7 +1670,7 @@ static void test_baddest(void)
>>           return;
>>       }
>>       migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
>> -    wait_for_migration_fail(from, false);
>> +    wait_for_migration_fail(from, false, false);
>>       test_migrate_end(from, to, false);
>>   }
>>   
>> @@ -1781,10 +1781,10 @@ static void test_precopy_common(MigrateCommon *args)
>>   
>>       if (args->result != MIG_TEST_SUCCEED) {
>>           bool allow_active = args->result == MIG_TEST_FAIL;
>> -        wait_for_migration_fail(from, allow_active);
>> +        wait_for_migration_fail(from, allow_active, false);
>>   
>>           if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
>> -            qtest_set_expected_status(to, EXIT_FAILURE);
>> +            wait_for_migration_fail(to, true, true);
>>           }
>>       } else {
>>           if (args->live) {
>> @@ -2571,8 +2571,8 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
>>       migrate_qmp(from, uri, "{}");
>>   
>>       if (should_fail) {
>> -        qtest_set_expected_status(to, EXIT_FAILURE);
>> -        wait_for_migration_fail(from, true);
>> +        wait_for_migration_fail(to, true, true);
>> +        wait_for_migration_fail(from, true, false);
>>       } else {
>>           wait_for_migration_complete(from);
>>       }
>> @@ -3047,8 +3047,8 @@ static void test_multifd_tcp_cancel(void)
>>       migrate_cancel(from);
>>   
>>       /* Make sure QEMU process "to" exited */
>> -    qtest_set_expected_status(to, EXIT_FAILURE);
>> -    qtest_wait_qemu(to);
>> +    wait_for_migration_fail(to, true, true);
>> +    qtest_quit(to);
>>   
>>       args = (MigrateStart){
>>           .only_target = true,
>> -- 
>> 2.34.1
>>
>>
> 
> With regards,
> Daniel
Daniel P. Berrangé April 18, 2024, 3:43 p.m. UTC | #5
On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > orchestrator to query migration state and get the error.
> > > 
> > > Let's report an error through QAPI like we do on outgoing migration.
> > > 
> > > migration-test is updated correspondingly.
> > > 
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > ---
> > > 
> > > Doubt: is exiting on failure a contract? Will this commit break
> > > something in Libvirt? Finally, could we just change the logic, or I need
> > > and additional migration-parameter for new behavior?
> > 
> > There's a decent risk that this could break apps, whether
> > libvirt or something else, especially if the app is just
> > launching QEMU with '-incoming URI', rather than using
> > '-incoming defer' and then explicitly using QMP to start the
> > incoming migration.
> > 
> > I'd say that with '-incoming defer' we should *not* exit on
> > migration error, because that arg implies the app explicitly
> > wants to be using QMP to control migration.
> > 
> > With the legacy '-incoming URI' it is probably best to keep
> > exit on error, as that's comparatively more likely to be used
> > in adhoc scenarios where the app/user is ignoring QMP on the
> > dst side.
> > 
> > None the less, I think we need to check how libvirt behaves
> > with this patch to be sure of no surprises.
> > 
> 
> Sounds reasonable, thanks! I'll rework it to behave the new
> way only with "-incoming defer", and check how libvirt behave with it.

If there are problems and/or we want to be super safe wrt
backcompat, we could add a new  '-incoming managed' as
being equivalent to '-incoming defer' but without the
implicit exit.

With regards,
Daniel
Vladimir Sementsov-Ogievskiy April 18, 2024, 3:47 p.m. UTC | #6
On 18.04.24 18:43, Daniel P. Berrangé wrote:
> On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> On 18.04.24 17:37, Daniel P. Berrangé wrote:
>>> On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
>>>> We do set MIGRATION_FAILED state, but don't give a chance to
>>>> orchestrator to query migration state and get the error.
>>>>
>>>> Let's report an error through QAPI like we do on outgoing migration.
>>>>
>>>> migration-test is updated correspondingly.
>>>>
>>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>>>> ---
>>>>
>>>> Doubt: is exiting on failure a contract? Will this commit break
>>>> something in Libvirt? Finally, could we just change the logic, or I need
>>>> and additional migration-parameter for new behavior?
>>>
>>> There's a decent risk that this could break apps, whether
>>> libvirt or something else, especially if the app is just
>>> launching QEMU with '-incoming URI', rather than using
>>> '-incoming defer' and then explicitly using QMP to start the
>>> incoming migration.
>>>
>>> I'd say that with '-incoming defer' we should *not* exit on
>>> migration error, because that arg implies the app explicitly
>>> wants to be using QMP to control migration.
>>>
>>> With the legacy '-incoming URI' it is probably best to keep
>>> exit on error, as that's comparatively more likely to be used
>>> in adhoc scenarios where the app/user is ignoring QMP on the
>>> dst side.
>>>
>>> None the less, I think we need to check how libvirt behaves
>>> with this patch to be sure of no surprises.
>>>
>>
>> Sounds reasonable, thanks! I'll rework it to behave the new
>> way only with "-incoming defer", and check how libvirt behave with it.
> 
> If there are problems and/or we want to be super safe wrt
> backcompat, we could add a new  '-incoming managed' as
> being equivalent to '-incoming defer' but without the
> implicit exit.
> 

Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
And we can in the same time deprecate "-incoming defer" in favor of new behavior.
Peter Xu April 18, 2024, 4:43 p.m. UTC | #7
On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 18.04.24 18:43, Daniel P. Berrangé wrote:
> > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > > > orchestrator to query migration state and get the error.
> > > > > 
> > > > > Let's report an error through QAPI like we do on outgoing migration.
> > > > > 
> > > > > migration-test is updated correspondingly.
> > > > > 
> > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > > > ---
> > > > > 
> > > > > Doubt: is exiting on failure a contract? Will this commit break
> > > > > something in Libvirt? Finally, could we just change the logic, or I need
> > > > > and additional migration-parameter for new behavior?
> > > > 
> > > > There's a decent risk that this could break apps, whether
> > > > libvirt or something else, especially if the app is just
> > > > launching QEMU with '-incoming URI', rather than using
> > > > '-incoming defer' and then explicitly using QMP to start the
> > > > incoming migration.
> > > > 
> > > > I'd say that with '-incoming defer' we should *not* exit on
> > > > migration error, because that arg implies the app explicitly
> > > > wants to be using QMP to control migration.
> > > > 
> > > > With the legacy '-incoming URI' it is probably best to keep
> > > > exit on error, as that's comparatively more likely to be used
> > > > in adhoc scenarios where the app/user is ignoring QMP on the
> > > > dst side.
> > > > 
> > > > None the less, I think we need to check how libvirt behaves
> > > > with this patch to be sure of no surprises.
> > > > 
> > > 
> > > Sounds reasonable, thanks! I'll rework it to behave the new
> > > way only with "-incoming defer", and check how libvirt behave with it.
> > 
> > If there are problems and/or we want to be super safe wrt
> > backcompat, we could add a new  '-incoming managed' as
> > being equivalent to '-incoming defer' but without the
> > implicit exit.
> > 
> 
> Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
> And we can in the same time deprecate "-incoming defer" in favor of new behavior.

Or just make it a new migration parameter?  Then we keep all existing
interfaces untouched, no risk of breaking anyone, and then it'll also apply
to anyone who uses things like -incoming tcp but still wants to keep the
qemu instance alive?

Thanks,
Daniel P. Berrangé April 18, 2024, 4:57 p.m. UTC | #8
On Thu, Apr 18, 2024 at 12:43:42PM -0400, Peter Xu wrote:
> On Thu, Apr 18, 2024 at 06:47:31PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > On 18.04.24 18:43, Daniel P. Berrangé wrote:
> > > On Thu, Apr 18, 2024 at 06:40:38PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > On 18.04.24 17:37, Daniel P. Berrangé wrote:
> > > > > On Thu, Apr 18, 2024 at 01:13:29AM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > > > > We do set MIGRATION_FAILED state, but don't give a chance to
> > > > > > orchestrator to query migration state and get the error.
> > > > > > 
> > > > > > Let's report an error through QAPI like we do on outgoing migration.
> > > > > > 
> > > > > > migration-test is updated correspondingly.
> > > > > > 
> > > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> > > > > > ---
> > > > > > 
> > > > > > Doubt: is exiting on failure a contract? Will this commit break
> > > > > > something in Libvirt? Finally, could we just change the logic, or I need
> > > > > > and additional migration-parameter for new behavior?
> > > > > 
> > > > > There's a decent risk that this could break apps, whether
> > > > > libvirt or something else, especially if the app is just
> > > > > launching QEMU with '-incoming URI', rather than using
> > > > > '-incoming defer' and then explicitly using QMP to start the
> > > > > incoming migration.
> > > > > 
> > > > > I'd say that with '-incoming defer' we should *not* exit on
> > > > > migration error, because that arg implies the app explicitly
> > > > > wants to be using QMP to control migration.
> > > > > 
> > > > > With the legacy '-incoming URI' it is probably best to keep
> > > > > exit on error, as that's comparatively more likely to be used
> > > > > in adhoc scenarios where the app/user is ignoring QMP on the
> > > > > dst side.
> > > > > 
> > > > > None the less, I think we need to check how libvirt behaves
> > > > > with this patch to be sure of no surprises.
> > > > > 
> > > > 
> > > > Sounds reasonable, thanks! I'll rework it to behave the new
> > > > way only with "-incoming defer", and check how libvirt behave with it.
> > > 
> > > If there are problems and/or we want to be super safe wrt
> > > backcompat, we could add a new  '-incoming managed' as
> > > being equivalent to '-incoming defer' but without the
> > > implicit exit.
> > > 
> > 
> > Probably, that's the best variant. As I can check libvirt in some case, but not at all cases. And libvirt is not the only vm manager finally.
> > And we can in the same time deprecate "-incoming defer" in favor of new behavior.
> 
> Or just make it a new migration parameter?  Then we keep all existing
> interfaces untouched, no risk of breaking anyone, and then it'll also apply
> to anyone who uses things like -incoming tcp but still wants to keep the
> qemu instance alive?

True, or even more simply, an argument to the 'migrate-incoming' command

diff --git a/qapi/migration.json b/qapi/migration.json
index 8c65b90328..6882aef328 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1889,7 +1889,8 @@
 ##
 { 'command': 'migrate-incoming',
              'data': {'*uri': 'str',
-                      '*channels': [ 'MigrationChannel' ] } }
+                      '*channels': [ 'MigrationChannel' ],
+                      '*exit-on-error': 'bool' } }
 
 ##
 # @xen-save-devices-state:


With regards,
Daniel
diff mbox series

Patch

diff --git a/migration/migration.c b/migration/migration.c
index 86bf76e925..3c203e767d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -738,11 +738,12 @@  process_incoming_migration_co(void *opaque)
     MigrationIncomingState *mis = migration_incoming_get_current();
     PostcopyState ps;
     int ret;
+    Error *local_err = NULL;
 
     assert(mis->from_src_file);
 
     if (compress_threads_load_setup(mis->from_src_file)) {
-        error_report("Failed to setup decompress threads");
+        error_setg(&local_err, "Failed to setup decompress threads");
         goto fail;
     }
 
@@ -779,32 +780,23 @@  process_incoming_migration_co(void *opaque)
     }
 
     if (ret < 0) {
-        MigrationState *s = migrate_get_current();
-
-        if (migrate_has_error(s)) {
-            WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
-                error_report_err(s->error);
-            }
-        }
-        error_report("load of migration failed: %s", strerror(-ret));
+        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
         goto fail;
     }
 
     if (colo_incoming_co() < 0) {
+        error_setg(&local_err, "colo incoming failed");
         goto fail;
     }
 
     migration_bh_schedule(process_incoming_migration_bh, mis);
     return;
 fail:
+    migrate_set_error(migrate_get_current(), local_err);
+    error_report_err(local_err);
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
-    qemu_fclose(mis->from_src_file);
-
-    multifd_recv_cleanup();
-    compress_threads_load_cleanup();
-
-    exit(EXIT_FAILURE);
+    migration_incoming_state_destroy();
 }
 
 /**
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index e451dbdbed..91c13bd566 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -211,7 +211,8 @@  void wait_for_migration_complete(QTestState *who)
     wait_for_migration_status(who, "completed", NULL);
 }
 
-void wait_for_migration_fail(QTestState *from, bool allow_active)
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+                             bool is_incoming)
 {
     g_test_timer_start();
     QDict *rsp_return;
@@ -236,8 +237,14 @@  void wait_for_migration_fail(QTestState *from, bool allow_active)
     /* Is the machine currently running? */
     rsp_return = qtest_qmp_assert_success_ref(from,
                                               "{ 'execute': 'query-status' }");
-    g_assert(qdict_haskey(rsp_return, "running"));
-    g_assert(qdict_get_bool(rsp_return, "running"));
+    if (is_incoming) {
+        if (qdict_haskey(rsp_return, "running")) {
+            g_assert(!qdict_get_bool(rsp_return, "running"));
+        }
+    } else {
+        g_assert(qdict_haskey(rsp_return, "running"));
+        g_assert(qdict_get_bool(rsp_return, "running"));
+    }
     qobject_unref(rsp_return);
 }
 
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 3bf7ded1b9..7bd07059ae 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -46,7 +46,8 @@  void wait_for_migration_status(QTestState *who,
 
 void wait_for_migration_complete(QTestState *who);
 
-void wait_for_migration_fail(QTestState *from, bool allow_active);
+void wait_for_migration_fail(QTestState *from, bool allow_active,
+                             bool is_incoming);
 
 char *find_common_machine_version(const char *mtype, const char *var1,
                                   const char *var2);
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 1d2cee87ea..e00b755f05 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1670,7 +1670,7 @@  static void test_baddest(void)
         return;
     }
     migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
-    wait_for_migration_fail(from, false);
+    wait_for_migration_fail(from, false, false);
     test_migrate_end(from, to, false);
 }
 
@@ -1781,10 +1781,10 @@  static void test_precopy_common(MigrateCommon *args)
 
     if (args->result != MIG_TEST_SUCCEED) {
         bool allow_active = args->result == MIG_TEST_FAIL;
-        wait_for_migration_fail(from, allow_active);
+        wait_for_migration_fail(from, allow_active, false);
 
         if (args->result == MIG_TEST_FAIL_DEST_QUIT_ERR) {
-            qtest_set_expected_status(to, EXIT_FAILURE);
+            wait_for_migration_fail(to, true, true);
         }
     } else {
         if (args->live) {
@@ -2571,8 +2571,8 @@  static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
     migrate_qmp(from, uri, "{}");
 
     if (should_fail) {
-        qtest_set_expected_status(to, EXIT_FAILURE);
-        wait_for_migration_fail(from, true);
+        wait_for_migration_fail(to, true, true);
+        wait_for_migration_fail(from, true, false);
     } else {
         wait_for_migration_complete(from);
     }
@@ -3047,8 +3047,8 @@  static void test_multifd_tcp_cancel(void)
     migrate_cancel(from);
 
     /* Make sure QEMU process "to" exited */
-    qtest_set_expected_status(to, EXIT_FAILURE);
-    qtest_wait_qemu(to);
+    wait_for_migration_fail(to, true, true);
+    qtest_quit(to);
 
     args = (MigrateStart){
         .only_target = true,